@alexkroman1/aai 1.2.3 → 1.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (69) hide show
  1. package/.turbo/turbo-build.log +14 -12
  2. package/CHANGELOG.md +20 -0
  3. package/dist/{constants-VTFoymJ-.js → constants-BL3nvg4I.js} +8 -1
  4. package/dist/host/_pipeline-test-fakes.d.ts +117 -0
  5. package/dist/host/pipeline-session-ctx.d.ts +24 -0
  6. package/dist/host/pipeline-session.d.ts +48 -0
  7. package/dist/host/providers/llm.d.ts +2 -0
  8. package/dist/host/providers/stt/assemblyai.d.ts +31 -0
  9. package/dist/host/providers/stt-barrel.d.ts +8 -0
  10. package/dist/host/providers/stt-barrel.js +92 -0
  11. package/dist/host/providers/stt.d.ts +2 -0
  12. package/dist/host/providers/tts/cartesia.d.ts +39 -0
  13. package/dist/host/providers/tts-barrel.d.ts +8 -0
  14. package/dist/host/providers/tts-barrel.js +182 -0
  15. package/dist/host/providers/tts.d.ts +2 -0
  16. package/dist/host/runtime-barrel.js +565 -81
  17. package/dist/host/runtime.d.ts +17 -0
  18. package/dist/host/s2s.d.ts +5 -0
  19. package/dist/host/session-ctx.d.ts +22 -4
  20. package/dist/host/to-vercel-tools.d.ts +45 -0
  21. package/dist/index.js +7 -2
  22. package/dist/sdk/_internal-types.d.ts +15 -1
  23. package/dist/sdk/constants.d.ts +7 -0
  24. package/dist/sdk/define.d.ts +21 -0
  25. package/dist/sdk/manifest.d.ts +22 -0
  26. package/dist/sdk/protocol.d.ts +3 -3
  27. package/dist/sdk/protocol.js +1 -1
  28. package/dist/sdk/providers.d.ts +70 -0
  29. package/dist/sdk/types.d.ts +16 -0
  30. package/exports-no-dev-deps.test.ts +39 -14
  31. package/host/_pipeline-test-fakes.ts +357 -0
  32. package/host/_test-utils.ts +1 -0
  33. package/host/integration/fixtures/README.md +49 -0
  34. package/host/integration/pipeline-reference.integration.test.ts +124 -0
  35. package/host/pipeline-session-ctx.test.ts +31 -0
  36. package/host/pipeline-session-ctx.ts +36 -0
  37. package/host/pipeline-session.test.ts +572 -0
  38. package/host/pipeline-session.ts +489 -0
  39. package/host/providers/llm.ts +3 -0
  40. package/host/providers/providers.test-d.ts +31 -0
  41. package/host/providers/stt/assemblyai.test.ts +100 -0
  42. package/host/providers/stt/assemblyai.ts +154 -0
  43. package/host/providers/stt/fixtures/assemblyai/basic-turn.json +30 -0
  44. package/host/providers/stt-barrel.ts +13 -0
  45. package/host/providers/stt.ts +3 -0
  46. package/host/providers/tts/cartesia.test.ts +210 -0
  47. package/host/providers/tts/cartesia.ts +251 -0
  48. package/host/providers/tts-barrel.ts +13 -0
  49. package/host/providers/tts.ts +3 -0
  50. package/host/runtime.test.ts +81 -1
  51. package/host/runtime.ts +61 -0
  52. package/host/s2s.test.ts +19 -0
  53. package/host/s2s.ts +10 -0
  54. package/host/session-ctx.ts +35 -8
  55. package/host/to-vercel-tools.test.ts +187 -0
  56. package/host/to-vercel-tools.ts +74 -0
  57. package/package.json +15 -1
  58. package/sdk/__snapshots__/exports.test.ts.snap +2 -0
  59. package/sdk/_internal-types.ts +16 -0
  60. package/sdk/constants.ts +8 -0
  61. package/sdk/define.test-d.ts +21 -0
  62. package/sdk/define.test.ts +33 -0
  63. package/sdk/define.ts +21 -0
  64. package/sdk/manifest.test-d.ts +14 -0
  65. package/sdk/manifest.test.ts +51 -0
  66. package/sdk/manifest.ts +39 -0
  67. package/sdk/providers.ts +90 -0
  68. package/sdk/types.ts +16 -0
  69. package/vitest.config.ts +1 -0
package/sdk/constants.ts CHANGED
@@ -34,6 +34,14 @@ export const RUN_CODE_TIMEOUT_MS = 5000;
34
34
  /** Maximum time to wait for sessions to stop during graceful shutdown. */
35
35
  export const DEFAULT_SHUTDOWN_TIMEOUT_MS = 30_000;
36
36
 
37
+ /**
38
+ * Maximum time to wait for a pipeline-mode TTS drain after `flush()` before
39
+ * forcing the turn to complete. Prevents a stuck TTS provider from wedging
40
+ * the session. Short relative to `DEFAULT_SHUTDOWN_TIMEOUT_MS` so stop()
41
+ * can still reclaim the socket cleanly.
42
+ */
43
+ export const PIPELINE_FLUSH_TIMEOUT_MS = 10_000;
44
+
37
45
  // ─── Size / length limits ────────────────────────────────────────────────
38
46
 
39
47
  /** Maximum length for tool result strings sent to clients. */
@@ -0,0 +1,21 @@
1
+ // Copyright 2025 the AAI authors. MIT license.
2
+ import { expectTypeOf, test } from "vitest";
3
+ import { agent } from "./define.ts";
4
+ import type { LlmProvider, SttProvider, TtsProvider } from "./providers.ts";
5
+
6
+ test("agent() accepts stt/llm/tts optional fields", () => {
7
+ const stt = {} as SttProvider;
8
+ const llm = {} as LlmProvider;
9
+ const tts = {} as TtsProvider;
10
+ const def = agent({ name: "t", systemPrompt: "p", stt, llm, tts });
11
+ expectTypeOf(def.stt).toEqualTypeOf<SttProvider | undefined>();
12
+ expectTypeOf(def.llm).toEqualTypeOf<LlmProvider | undefined>();
13
+ expectTypeOf(def.tts).toEqualTypeOf<TtsProvider | undefined>();
14
+ });
15
+
16
+ test("agent() without stt/llm/tts is still legal (s2s mode)", () => {
17
+ const def = agent({ name: "t", systemPrompt: "p" });
18
+ expectTypeOf(def.stt).toEqualTypeOf<SttProvider | undefined>();
19
+ expectTypeOf(def.llm).toEqualTypeOf<LlmProvider | undefined>();
20
+ expectTypeOf(def.tts).toEqualTypeOf<TtsProvider | undefined>();
21
+ });
@@ -2,6 +2,8 @@
2
2
  import { describe, expect, test } from "vitest";
3
3
  import { z } from "zod";
4
4
  import { agent, tool } from "./define.ts";
5
+ import { parseManifest } from "./manifest.ts";
6
+ import type { LlmProvider, SttProvider, TtsProvider } from "./providers.ts";
5
7
 
6
8
  describe("tool()", () => {
7
9
  test("returns the definition unchanged", () => {
@@ -54,4 +56,35 @@ describe("agent()", () => {
54
56
  expect(def.tools.greet).toBe(greetTool);
55
57
  expect(def.builtinTools).toEqual(["web_search"]);
56
58
  });
59
+
60
+ test("preserves stt/llm/tts providers on the returned def", () => {
61
+ const stt = { name: "fake-stt", open: async () => ({}) } as unknown as SttProvider;
62
+ const tts = { name: "fake-tts", open: async () => ({}) } as unknown as TtsProvider;
63
+ const llm = {} as LlmProvider;
64
+ const def = agent({ name: "t", systemPrompt: "p", stt, llm, tts });
65
+ expect(def.stt).toBe(stt);
66
+ expect(def.llm).toBe(llm);
67
+ expect(def.tts).toBe(tts);
68
+ });
69
+
70
+ test("stt/llm/tts flow through parseManifest to mode 'pipeline'", () => {
71
+ const stt = { name: "fake-stt", open: async () => ({}) } as unknown as SttProvider;
72
+ const tts = { name: "fake-tts", open: async () => ({}) } as unknown as TtsProvider;
73
+ const llm = {} as LlmProvider;
74
+ const def = agent({ name: "t", systemPrompt: "p", stt, llm, tts });
75
+ const parsed = parseManifest(def);
76
+ expect(parsed.mode).toBe("pipeline");
77
+ expect(parsed.stt).toBe(stt);
78
+ expect(parsed.llm).toBe(llm);
79
+ expect(parsed.tts).toBe(tts);
80
+ });
81
+
82
+ test("agent without providers resolves to mode 's2s'", () => {
83
+ const def = agent({ name: "t", systemPrompt: "p" });
84
+ const parsed = parseManifest(def);
85
+ expect(parsed.mode).toBe("s2s");
86
+ expect(parsed.stt).toBeUndefined();
87
+ expect(parsed.llm).toBeUndefined();
88
+ expect(parsed.tts).toBeUndefined();
89
+ });
57
90
  });
package/sdk/define.ts CHANGED
@@ -4,6 +4,7 @@
4
4
  */
5
5
 
6
6
  import type { z } from "zod";
7
+ import type { LlmProvider, SttProvider, TtsProvider } from "./providers.ts";
7
8
  import {
8
9
  type AgentDef,
9
10
  type BuiltinTool,
@@ -65,6 +66,11 @@ export function tool<P extends z.ZodObject<z.ZodRawShape>>(def: {
65
66
  * });
66
67
  * ```
67
68
  *
69
+ * @remarks
70
+ * Pipeline mode: pass `stt`, `llm`, and `tts` together to switch from the
71
+ * default AssemblyAI Streaming Speech-to-Speech path to a pluggable
72
+ * STT → LLM → TTS pipeline. All three must be set (or all left unset).
73
+ *
68
74
  * @public
69
75
  */
70
76
  export function agent(def: {
@@ -77,6 +83,21 @@ export function agent(def: {
77
83
  toolChoice?: ToolChoice;
78
84
  sttPrompt?: string;
79
85
  idleTimeoutMs?: number;
86
+ /**
87
+ * Pluggable STT provider. Must be set together with `llm` and `tts` to
88
+ * enable pipeline mode; leave all three unset for S2S mode.
89
+ */
90
+ stt?: SttProvider;
91
+ /**
92
+ * Pluggable LLM provider (Vercel AI SDK `LanguageModel`). Must be set
93
+ * together with `stt` and `tts` to enable pipeline mode.
94
+ */
95
+ llm?: LlmProvider;
96
+ /**
97
+ * Pluggable TTS provider. Must be set together with `stt` and `llm` to
98
+ * enable pipeline mode.
99
+ */
100
+ tts?: TtsProvider;
80
101
  }): AgentDef {
81
102
  return {
82
103
  systemPrompt: DEFAULT_SYSTEM_PROMPT,
@@ -0,0 +1,14 @@
1
+ // Copyright 2025 the AAI authors. MIT license.
2
+ import { expectTypeOf, test } from "vitest";
3
+ import type { Manifest } from "./manifest.ts";
4
+
5
+ test("Manifest.stt/llm/tts are optional", () => {
6
+ expectTypeOf<Manifest["stt"]>().toBeNullable();
7
+ expectTypeOf<Manifest["llm"]>().toBeNullable();
8
+ expectTypeOf<Manifest["tts"]>().toBeNullable();
9
+ });
10
+
11
+ test("parseManifest return includes mode", () => {
12
+ type Parsed = ReturnType<typeof import("./manifest.ts").parseManifest>;
13
+ expectTypeOf<Parsed["mode"]>().toEqualTypeOf<"s2s" | "pipeline">();
14
+ });
@@ -12,11 +12,18 @@ describe("parseManifest", () => {
12
12
  name: "Simple Agent",
13
13
  systemPrompt: expect.any(String),
14
14
  greeting: expect.any(String),
15
+ sttPrompt: undefined,
15
16
  maxSteps: 5,
16
17
  toolChoice: "auto",
18
+ idleTimeoutMs: undefined,
19
+ theme: undefined,
17
20
  builtinTools: [],
18
21
  allowedHosts: [],
19
22
  tools: {},
23
+ stt: undefined,
24
+ llm: undefined,
25
+ tts: undefined,
26
+ mode: "s2s",
20
27
  });
21
28
  });
22
29
 
@@ -155,3 +162,47 @@ describe("manifest type contracts", () => {
155
162
  expectTypeOf(result.allowedHosts).toEqualTypeOf<string[]>();
156
163
  });
157
164
  });
165
+
166
+ describe("parseManifest — mode classification", () => {
167
+ const stubStt = { name: "stub-stt", open: async () => ({}) };
168
+ const stubTts = { name: "stub-tts", open: async () => ({}) };
169
+ const stubLlm = { modelId: "stub-llm" };
170
+
171
+ test("no stt/llm/tts ⇒ mode: 's2s'", () => {
172
+ const parsed = parseManifest({
173
+ name: "hello",
174
+ systemPrompt: "hi",
175
+ });
176
+ expect(parsed.mode).toBe("s2s");
177
+ });
178
+
179
+ test("all three of stt/llm/tts set ⇒ mode: 'pipeline'", () => {
180
+ const parsed = parseManifest({
181
+ name: "hello",
182
+ systemPrompt: "hi",
183
+ stt: stubStt,
184
+ llm: stubLlm,
185
+ tts: stubTts,
186
+ } as never);
187
+ expect(parsed.mode).toBe("pipeline");
188
+ });
189
+
190
+ test("only stt set ⇒ throws", () => {
191
+ expect(() =>
192
+ parseManifest({
193
+ name: "hello",
194
+ stt: stubStt,
195
+ } as never),
196
+ ).toThrow(/stt, llm, and tts must be set together/);
197
+ });
198
+
199
+ test("stt + tts without llm ⇒ throws", () => {
200
+ expect(() =>
201
+ parseManifest({
202
+ name: "hello",
203
+ stt: stubStt,
204
+ tts: stubTts,
205
+ } as never),
206
+ ).toThrow(/stt, llm, and tts must be set together/);
207
+ });
208
+ });
package/sdk/manifest.ts CHANGED
@@ -8,6 +8,7 @@
8
8
 
9
9
  import { z } from "zod";
10
10
  import { validateAllowedHostPattern } from "./allowed-hosts.ts";
11
+ import type { LlmProvider, SttProvider, TtsProvider } from "./providers.ts";
11
12
  import { BuiltinToolSchema, DEFAULT_GREETING, DEFAULT_SYSTEM_PROMPT } from "./types.ts";
12
13
 
13
14
  /**
@@ -46,6 +47,27 @@ export type Manifest = {
46
47
  tools: Record<string, ToolManifest>;
47
48
  /** Hostnames the agent is allowed to fetch. Empty = no fetch access. */
48
49
  allowedHosts: string[];
50
+ /**
51
+ * Pluggable STT provider. Must be set together with `llm` and `tts` to
52
+ * enable pipeline mode, or all three left unset for s2s mode.
53
+ */
54
+ stt?: SttProvider | undefined;
55
+ /**
56
+ * Pluggable LLM provider (Vercel AI SDK `LanguageModel`). Must be set
57
+ * together with `stt` and `tts` to enable pipeline mode.
58
+ */
59
+ llm?: LlmProvider | undefined;
60
+ /**
61
+ * Pluggable TTS provider. Must be set together with `stt` and `llm` to
62
+ * enable pipeline mode.
63
+ */
64
+ tts?: TtsProvider | undefined;
65
+ /**
66
+ * Session mode derived from provider fields:
67
+ * - `"s2s"` (default): AssemblyAI Streaming Speech-to-Speech path (no stt/llm/tts set).
68
+ * - `"pipeline"`: pluggable STT → LLM → TTS path (stt + llm + tts all set).
69
+ */
70
+ mode: "s2s" | "pipeline";
49
71
  };
50
72
 
51
73
  const ToolManifestSchema = z.object({
@@ -92,6 +114,19 @@ const ManifestSchema = z.object({
92
114
  */
93
115
  export function parseManifest(input: unknown): Manifest {
94
116
  const parsed = ManifestSchema.parse(input);
117
+ // stt/llm/tts are runtime objects (functions/classes) that can't be
118
+ // validated by the JSON-oriented Zod schema. Pull them straight from
119
+ // the raw input and enforce all-or-nothing presence here.
120
+ const raw = (input ?? {}) as {
121
+ stt?: SttProvider;
122
+ llm?: LlmProvider;
123
+ tts?: TtsProvider;
124
+ };
125
+ const providerCount = [raw.stt, raw.llm, raw.tts].filter((x) => x != null).length;
126
+ if (providerCount !== 0 && providerCount !== 3) {
127
+ throw new Error("stt, llm, and tts must be set together");
128
+ }
129
+ const mode: "s2s" | "pipeline" = providerCount === 3 ? "pipeline" : "s2s";
95
130
  return {
96
131
  name: parsed.name,
97
132
  systemPrompt: parsed.systemPrompt ?? DEFAULT_SYSTEM_PROMPT,
@@ -104,5 +139,9 @@ export function parseManifest(input: unknown): Manifest {
104
139
  theme: parsed.theme,
105
140
  tools: parsed.tools ?? {},
106
141
  allowedHosts: parsed.allowedHosts ?? [],
142
+ stt: raw.stt,
143
+ llm: raw.llm,
144
+ tts: raw.tts,
145
+ mode,
107
146
  };
108
147
  }
@@ -0,0 +1,90 @@
1
+ // Copyright 2025 the AAI authors. MIT license.
2
+ /**
3
+ * Pluggable provider interfaces — normalized seams over streaming STT / TTS
4
+ * SDKs, plus the LLM provider type.
5
+ *
6
+ * These are zero-runtime **type** declarations with no Node.js dependencies,
7
+ * so they live in `sdk/` alongside the `Manifest` type that references them.
8
+ * Concrete adapters (e.g. AssemblyAI STT, Cartesia TTS) live under
9
+ * `host/providers/` because they depend on Node-only SDKs.
10
+ */
11
+
12
+ import type { LanguageModel } from "ai";
13
+
14
+ /** Unsubscribe callback returned by `.on()` event subscriptions. */
15
+ export type Unsubscribe = () => void;
16
+
17
+ // -------- STT --------
18
+
19
+ export interface SttError extends Error {
20
+ readonly code: "stt_connect_failed" | "stt_auth_failed" | "stt_stream_error";
21
+ }
22
+
23
+ export type SttEvents = {
24
+ /** Interim transcript; drives barge-in detection. */
25
+ partial: (text: string) => void;
26
+ /** End-of-turn final transcript; cue to run the LLM. */
27
+ final: (text: string) => void;
28
+ /** Terminal error. The session is expected to end after this fires. */
29
+ error: (err: SttError) => void;
30
+ };
31
+
32
+ export interface SttSession {
33
+ sendAudio(pcm: Int16Array): void;
34
+ on<E extends keyof SttEvents>(event: E, fn: SttEvents[E]): Unsubscribe;
35
+ close(): Promise<void>;
36
+ }
37
+
38
+ export interface SttOpenOptions {
39
+ sampleRate: number;
40
+ apiKey: string;
41
+ sttPrompt?: string | undefined;
42
+ signal: AbortSignal;
43
+ }
44
+
45
+ export interface SttProvider {
46
+ readonly name: string;
47
+ open(opts: SttOpenOptions): Promise<SttSession>;
48
+ }
49
+
50
+ // -------- TTS --------
51
+
52
+ export interface TtsError extends Error {
53
+ readonly code: "tts_connect_failed" | "tts_auth_failed" | "tts_stream_error";
54
+ }
55
+
56
+ export type TtsEvents = {
57
+ /** One PCM16 audio chunk. Orchestrator forwards to the client. */
58
+ audio: (pcm: Int16Array) => void;
59
+ /** Synthesis drained after flush() or cancel(). Emitted exactly once per turn. */
60
+ done: () => void;
61
+ /** Terminal error. The session is expected to end after this fires. */
62
+ error: (err: TtsError) => void;
63
+ };
64
+
65
+ export interface TtsSession {
66
+ /** Push text deltas from the LLM. Provider may synthesize as chunks arrive. */
67
+ sendText(text: string): void;
68
+ /** Signal "no more text this turn". Emits `done` when fully synthesized. */
69
+ flush(): void;
70
+ /** Interrupt immediately (barge-in). Emits `done` synchronously. */
71
+ cancel(): void;
72
+ on<E extends keyof TtsEvents>(event: E, fn: TtsEvents[E]): Unsubscribe;
73
+ close(): Promise<void>;
74
+ }
75
+
76
+ export interface TtsOpenOptions {
77
+ sampleRate: number;
78
+ apiKey: string;
79
+ signal: AbortSignal;
80
+ }
81
+
82
+ export interface TtsProvider {
83
+ readonly name: string;
84
+ open(opts: TtsOpenOptions): Promise<TtsSession>;
85
+ }
86
+
87
+ // -------- LLM --------
88
+
89
+ /** LLM provider — Vercel AI SDK's `LanguageModel`; no wrapping. */
90
+ export type LlmProvider = LanguageModel;
package/sdk/types.ts CHANGED
@@ -5,6 +5,7 @@
5
5
 
6
6
  import { z } from "zod";
7
7
  import type { Kv } from "./kv.ts";
8
+ import type { LlmProvider, SttProvider, TtsProvider } from "./providers.ts";
8
9
 
9
10
  /**
10
11
  * Identifier for a built-in server-side tool.
@@ -217,6 +218,21 @@ export type AgentDef<S = Record<string, unknown>> = {
217
218
  tools: Readonly<Record<string, ToolDef<z.ZodObject<z.ZodRawShape>, S>>>;
218
219
  state?: () => S;
219
220
  idleTimeoutMs?: number;
221
+ /**
222
+ * Pluggable STT provider. Set together with `llm` and `tts` to enable
223
+ * pipeline mode; all three unset means S2S mode.
224
+ */
225
+ stt?: SttProvider;
226
+ /**
227
+ * Pluggable LLM provider (Vercel AI SDK `LanguageModel`). Set together
228
+ * with `stt` and `tts` for pipeline mode.
229
+ */
230
+ llm?: LlmProvider;
231
+ /**
232
+ * Pluggable TTS provider. Set together with `stt` and `llm` for
233
+ * pipeline mode.
234
+ */
235
+ tts?: TtsProvider;
220
236
  };
221
237
 
222
238
  // ─── Zod schemas ────────────────────────────────────────────────────────────
package/vitest.config.ts CHANGED
@@ -10,6 +10,7 @@ export default defineConfig({
10
10
  "**/pentest.test.ts",
11
11
  "**/run-code-sandbox.test.ts",
12
12
  "**/integration.test.ts",
13
+ "**/*.integration.test.ts",
13
14
  "node_modules",
14
15
  "dist",
15
16
  ],