@alexkroman1/aai 1.2.3 → 1.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (69) hide show
  1. package/.turbo/turbo-build.log +14 -12
  2. package/CHANGELOG.md +20 -0
  3. package/dist/{constants-VTFoymJ-.js → constants-BL3nvg4I.js} +8 -1
  4. package/dist/host/_pipeline-test-fakes.d.ts +117 -0
  5. package/dist/host/pipeline-session-ctx.d.ts +24 -0
  6. package/dist/host/pipeline-session.d.ts +48 -0
  7. package/dist/host/providers/llm.d.ts +2 -0
  8. package/dist/host/providers/stt/assemblyai.d.ts +31 -0
  9. package/dist/host/providers/stt-barrel.d.ts +8 -0
  10. package/dist/host/providers/stt-barrel.js +92 -0
  11. package/dist/host/providers/stt.d.ts +2 -0
  12. package/dist/host/providers/tts/cartesia.d.ts +39 -0
  13. package/dist/host/providers/tts-barrel.d.ts +8 -0
  14. package/dist/host/providers/tts-barrel.js +182 -0
  15. package/dist/host/providers/tts.d.ts +2 -0
  16. package/dist/host/runtime-barrel.js +565 -81
  17. package/dist/host/runtime.d.ts +17 -0
  18. package/dist/host/s2s.d.ts +5 -0
  19. package/dist/host/session-ctx.d.ts +22 -4
  20. package/dist/host/to-vercel-tools.d.ts +45 -0
  21. package/dist/index.js +7 -2
  22. package/dist/sdk/_internal-types.d.ts +15 -1
  23. package/dist/sdk/constants.d.ts +7 -0
  24. package/dist/sdk/define.d.ts +21 -0
  25. package/dist/sdk/manifest.d.ts +22 -0
  26. package/dist/sdk/protocol.d.ts +3 -3
  27. package/dist/sdk/protocol.js +1 -1
  28. package/dist/sdk/providers.d.ts +70 -0
  29. package/dist/sdk/types.d.ts +16 -0
  30. package/exports-no-dev-deps.test.ts +39 -14
  31. package/host/_pipeline-test-fakes.ts +357 -0
  32. package/host/_test-utils.ts +1 -0
  33. package/host/integration/fixtures/README.md +49 -0
  34. package/host/integration/pipeline-reference.integration.test.ts +124 -0
  35. package/host/pipeline-session-ctx.test.ts +31 -0
  36. package/host/pipeline-session-ctx.ts +36 -0
  37. package/host/pipeline-session.test.ts +572 -0
  38. package/host/pipeline-session.ts +489 -0
  39. package/host/providers/llm.ts +3 -0
  40. package/host/providers/providers.test-d.ts +31 -0
  41. package/host/providers/stt/assemblyai.test.ts +100 -0
  42. package/host/providers/stt/assemblyai.ts +154 -0
  43. package/host/providers/stt/fixtures/assemblyai/basic-turn.json +30 -0
  44. package/host/providers/stt-barrel.ts +13 -0
  45. package/host/providers/stt.ts +3 -0
  46. package/host/providers/tts/cartesia.test.ts +210 -0
  47. package/host/providers/tts/cartesia.ts +251 -0
  48. package/host/providers/tts-barrel.ts +13 -0
  49. package/host/providers/tts.ts +3 -0
  50. package/host/runtime.test.ts +81 -1
  51. package/host/runtime.ts +61 -0
  52. package/host/s2s.test.ts +19 -0
  53. package/host/s2s.ts +10 -0
  54. package/host/session-ctx.ts +35 -8
  55. package/host/to-vercel-tools.test.ts +187 -0
  56. package/host/to-vercel-tools.ts +74 -0
  57. package/package.json +15 -1
  58. package/sdk/__snapshots__/exports.test.ts.snap +2 -0
  59. package/sdk/_internal-types.ts +16 -0
  60. package/sdk/constants.ts +8 -0
  61. package/sdk/define.test-d.ts +21 -0
  62. package/sdk/define.test.ts +33 -0
  63. package/sdk/define.ts +21 -0
  64. package/sdk/manifest.test-d.ts +14 -0
  65. package/sdk/manifest.test.ts +51 -0
  66. package/sdk/manifest.ts +39 -0
  67. package/sdk/providers.ts +90 -0
  68. package/sdk/types.ts +16 -0
  69. package/vitest.config.ts +1 -0
@@ -9,6 +9,7 @@ import { type ToolSchema } from "../sdk/_internal-types.ts";
9
9
  import type { Kv } from "../sdk/kv.ts";
10
10
  import type { ClientSink } from "../sdk/protocol.ts";
11
11
  import { type ReadyConfig } from "../sdk/protocol.ts";
12
+ import type { LlmProvider, SttProvider, TtsProvider } from "../sdk/providers.ts";
12
13
  import type { AgentDef } from "../sdk/types.ts";
13
14
  import type { Logger, S2SConfig } from "./runtime-config.ts";
14
15
  import type { CreateS2sWebSocket } from "./s2s.ts";
@@ -89,6 +90,22 @@ export type RuntimeOptions = {
89
90
  * their own fetch wrapper.
90
91
  */
91
92
  fetch?: typeof globalThis.fetch | undefined;
93
+ /**
94
+ * Pluggable STT provider. Must be set together with `llm` and `tts` to
95
+ * route sessions through the pipeline path; leave all three unset for
96
+ * the default AssemblyAI Streaming Speech-to-Speech (S2S) path.
97
+ */
98
+ stt?: SttProvider | undefined;
99
+ /**
100
+ * Pluggable LLM provider (Vercel AI SDK `LanguageModel`). Must be set
101
+ * together with `stt` and `tts` to route sessions through the pipeline path.
102
+ */
103
+ llm?: LlmProvider | undefined;
104
+ /**
105
+ * Pluggable TTS provider. Must be set together with `stt` and `llm` to
106
+ * route sessions through the pipeline path.
107
+ */
108
+ tts?: TtsProvider | undefined;
92
109
  };
93
110
  /**
94
111
  * The agent runtime returned by {@link createRuntime}.
@@ -62,6 +62,11 @@ export type S2sEvents = {
62
62
  export type S2sHandle = {
63
63
  on<K extends keyof S2sEvents>(event: K, cb: S2sEvents[K]): Unsubscribe;
64
64
  sendAudio(audio: Uint8Array): void;
65
+ /**
66
+ * Send a pre-encoded audio wire frame. For perf-critical callers (load tests)
67
+ * that batch-encode up front. Skips logging; caller owns wire format.
68
+ */
69
+ sendAudioRaw(jsonFrame: string): void;
65
70
  sendToolResult(callId: string, result: string): void;
66
71
  updateSession(config: S2sSessionConfig): void;
67
72
  resumeSession(sessionId: string): void;
@@ -25,15 +25,18 @@ export type SessionDeps = {
25
25
  readonly maxHistory: number;
26
26
  };
27
27
  /**
28
- * Session context threaded through event handlers.
28
+ * Transport-agnostic session context shared by S2S and pipeline sessions.
29
+ *
30
+ * Owns reply lifecycle, conversation history (with sliding-window truncation),
31
+ * and per-turn tool-call step enforcement. Transport-specific fields (e.g.
32
+ * `s2s` for S2S, `stt`/`tts` for the pipeline) live on the extending types.
29
33
  *
30
34
  * Split into three layers:
31
35
  * - {@link SessionDeps} — immutable dependencies (set once)
32
36
  * - {@link ReplyState} via `reply` — per-reply mutable state (reset on beginReply/cancelReply)
33
- * - Remaining fields — connection, conversation, and lifecycle methods
37
+ * - Remaining fields — conversation and lifecycle methods
34
38
  */
35
- export type S2sSessionCtx = SessionDeps & {
36
- s2s: S2sHandle | null;
39
+ export type BaseSessionCtx = SessionDeps & {
37
40
  reply: ReplyState;
38
41
  turnPromise: Promise<void> | null;
39
42
  conversationMessages: Message[];
@@ -43,6 +46,21 @@ export type S2sSessionCtx = SessionDeps & {
43
46
  cancelReply(): void;
44
47
  chainTurn(p: Promise<void>): void;
45
48
  };
49
+ /**
50
+ * S2S session context — {@link BaseSessionCtx} plus the S2S WebSocket handle.
51
+ */
52
+ export type S2sSessionCtx = BaseSessionCtx & {
53
+ s2s: S2sHandle | null;
54
+ };
55
+ export declare function _buildBaseCtx(opts: {
56
+ id: string;
57
+ agent: string;
58
+ client: ClientSink;
59
+ agentConfig: AgentConfig;
60
+ executeTool: ExecuteTool;
61
+ log: Logger;
62
+ maxHistory?: number | undefined;
63
+ }): BaseSessionCtx;
46
64
  export declare function buildCtx(opts: {
47
65
  id: string;
48
66
  agent: string;
@@ -0,0 +1,45 @@
1
+ /**
2
+ * Converts agent {@link ToolSchema}[] to Vercel AI SDK tools with `execute`
3
+ * delegation to the agent's {@link ExecuteTool} function.
4
+ *
5
+ * The pipeline orchestrator passes the output to `streamText({ tools })`.
6
+ * Each produced tool's `execute` closure calls
7
+ * `ctx.executeTool(name, args, sessionId, messages(), { signal, toolCallId })`,
8
+ * so the existing agent tool infrastructure (argument validation, KV, hooks,
9
+ * timeout) remains the single source of truth for tool behavior.
10
+ *
11
+ * Per-call `options.abortSignal` (forwarded by `streamText` when the
12
+ * outer turn is aborted, e.g. barge-in) takes precedence over the
13
+ * bag-level `ctx.signal` so individual invocations respect streamText
14
+ * aborts.
15
+ */
16
+ import { type Tool } from "ai";
17
+ import type { ExecuteTool, ToolSchema } from "../sdk/_internal-types.ts";
18
+ import type { Message } from "../sdk/types.ts";
19
+ export interface ToVercelToolsContext {
20
+ /** The agent's tool-execution function (from the runtime). */
21
+ executeTool: ExecuteTool;
22
+ /** Session id threaded to {@link executeTool}. */
23
+ sessionId: string;
24
+ /**
25
+ * Returns the current conversation history at call-time. The orchestrator
26
+ * calls this per invocation; `toVercelTools` snapshots the returned array
27
+ * before forwarding to `executeTool` so concurrent mutations cannot leak
28
+ * across tool calls.
29
+ */
30
+ messages: () => readonly Message[];
31
+ /**
32
+ * Bag-level abort signal. Used as a fallback when the per-call
33
+ * `options.abortSignal` from Vercel's `ToolExecutionOptions` is absent.
34
+ */
35
+ signal?: AbortSignal;
36
+ }
37
+ /**
38
+ * Convert an array of {@link ToolSchema} to a Vercel AI SDK `ToolSet`
39
+ * (record keyed by tool name).
40
+ *
41
+ * Uses the v6 `tool()` helper with `inputSchema: jsonSchema(...)` wrapping
42
+ * the agent's JSON Schema `parameters`. Execution is delegated to
43
+ * `ctx.executeTool` so validation, KV, timeouts, and hooks keep working.
44
+ */
45
+ export declare function toVercelTools(schemas: readonly ToolSchema[], ctx: ToVercelToolsContext): Record<string, Tool>;
package/dist/index.js CHANGED
@@ -1,4 +1,4 @@
1
- import { _ as WS_OPEN, a as DEFAULT_SHUTDOWN_TIMEOUT_MS, c as FETCH_TIMEOUT_MS, d as MAX_PAGE_CHARS, f as MAX_TOOL_RESULT_CHARS, g as TOOL_EXECUTION_TIMEOUT_MS, h as RUN_CODE_TIMEOUT_MS, i as DEFAULT_SESSION_START_TIMEOUT_MS, l as MAX_HTML_BYTES, m as MAX_WS_PAYLOAD_BYTES, n as DEFAULT_IDLE_TIMEOUT_MS, o as DEFAULT_STT_SAMPLE_RATE, p as MAX_VALUE_SIZE, r as DEFAULT_MAX_HISTORY, s as DEFAULT_TTS_SAMPLE_RATE, t as AGENT_CSP, u as MAX_MESSAGE_BUFFER_SIZE } from "./constants-VTFoymJ-.js";
1
+ import { _ as TOOL_EXECUTION_TIMEOUT_MS, a as DEFAULT_SHUTDOWN_TIMEOUT_MS, c as FETCH_TIMEOUT_MS, d as MAX_PAGE_CHARS, f as MAX_TOOL_RESULT_CHARS, g as RUN_CODE_TIMEOUT_MS, h as PIPELINE_FLUSH_TIMEOUT_MS, i as DEFAULT_SESSION_START_TIMEOUT_MS, l as MAX_HTML_BYTES, m as MAX_WS_PAYLOAD_BYTES, n as DEFAULT_IDLE_TIMEOUT_MS, o as DEFAULT_STT_SAMPLE_RATE, p as MAX_VALUE_SIZE, r as DEFAULT_MAX_HISTORY, s as DEFAULT_TTS_SAMPLE_RATE, t as AGENT_CSP, u as MAX_MESSAGE_BUFFER_SIZE, v as WS_OPEN } from "./constants-BL3nvg4I.js";
2
2
  import { i as ToolChoiceSchema, n as DEFAULT_GREETING, r as DEFAULT_SYSTEM_PROMPT, t as BuiltinToolSchema } from "./types-Cfx_4QDK.js";
3
3
  import { i as toolError, n as errorDetail, r as errorMessage, t as parseWsUpgradeParams } from "./ws-upgrade-BeOQ7fXL.js";
4
4
  //#region sdk/allowed-hosts.ts
@@ -137,6 +137,11 @@ function tool(def) {
137
137
  * });
138
138
  * ```
139
139
  *
140
+ * @remarks
141
+ * Pipeline mode: pass `stt`, `llm`, and `tts` together to switch from the
142
+ * default AssemblyAI Streaming Speech-to-Speech path to a pluggable
143
+ * STT → LLM → TTS pipeline. All three must be set (or all left unset).
144
+ *
140
145
  * @public
141
146
  */
142
147
  function agent(def) {
@@ -149,4 +154,4 @@ function agent(def) {
149
154
  };
150
155
  }
151
156
  //#endregion
152
- export { AGENT_CSP, BuiltinToolSchema, DEFAULT_GREETING, DEFAULT_IDLE_TIMEOUT_MS, DEFAULT_MAX_HISTORY, DEFAULT_SESSION_START_TIMEOUT_MS, DEFAULT_SHUTDOWN_TIMEOUT_MS, DEFAULT_STT_SAMPLE_RATE, DEFAULT_SYSTEM_PROMPT, DEFAULT_TTS_SAMPLE_RATE, FETCH_TIMEOUT_MS, MAX_HTML_BYTES, MAX_MESSAGE_BUFFER_SIZE, MAX_PAGE_CHARS, MAX_TOOL_RESULT_CHARS, MAX_VALUE_SIZE, MAX_WS_PAYLOAD_BYTES, RUN_CODE_TIMEOUT_MS, TOOL_EXECUTION_TIMEOUT_MS, ToolChoiceSchema, WS_OPEN, agent, errorDetail, errorMessage, matchesAllowedHost, parseWsUpgradeParams, tool, toolError, validateAllowedHostPattern };
157
+ export { AGENT_CSP, BuiltinToolSchema, DEFAULT_GREETING, DEFAULT_IDLE_TIMEOUT_MS, DEFAULT_MAX_HISTORY, DEFAULT_SESSION_START_TIMEOUT_MS, DEFAULT_SHUTDOWN_TIMEOUT_MS, DEFAULT_STT_SAMPLE_RATE, DEFAULT_SYSTEM_PROMPT, DEFAULT_TTS_SAMPLE_RATE, FETCH_TIMEOUT_MS, MAX_HTML_BYTES, MAX_MESSAGE_BUFFER_SIZE, MAX_PAGE_CHARS, MAX_TOOL_RESULT_CHARS, MAX_VALUE_SIZE, MAX_WS_PAYLOAD_BYTES, PIPELINE_FLUSH_TIMEOUT_MS, RUN_CODE_TIMEOUT_MS, TOOL_EXECUTION_TIMEOUT_MS, ToolChoiceSchema, WS_OPEN, agent, errorDetail, errorMessage, matchesAllowedHost, parseWsUpgradeParams, tool, toolError, validateAllowedHostPattern };
@@ -7,13 +7,27 @@ import type { JSONSchema7 } from "json-schema";
7
7
  import { z } from "zod";
8
8
  import type { Message } from "./types.ts";
9
9
  import { type ToolDef } from "./types.ts";
10
+ /**
11
+ * Options forwarded to an {@link ExecuteTool} invocation.
12
+ *
13
+ * Primarily used by the pipeline orchestrator (streamText tool loop) to
14
+ * thread an {@link AbortSignal} into tool execution. The S2S voice path
15
+ * does not pass these options today — recipients must treat the whole
16
+ * bag as optional.
17
+ */
18
+ export interface ExecuteToolOptions {
19
+ /** Abort signal bound to the enclosing LLM turn / request. */
20
+ signal?: AbortSignal;
21
+ /** Vercel AI SDK tool-call ID for this invocation. Useful for tracing and correlation. */
22
+ toolCallId?: string;
23
+ }
10
24
  /**
11
25
  * Function signature for executing a tool by name.
12
26
  *
13
27
  * Used by session.ts to invoke tools, by direct-executor.ts and
14
28
  * harness-runtime.ts to implement the execution.
15
29
  */
16
- export type ExecuteTool = (name: string, args: Readonly<Record<string, unknown>>, sessionId?: string, messages?: readonly Message[]) => Promise<string>;
30
+ export type ExecuteTool = (name: string, args: Readonly<Record<string, unknown>>, sessionId?: string, messages?: readonly Message[], opts?: ExecuteToolOptions) => Promise<string>;
17
31
  /**
18
32
  * Zod schema for serializable agent configuration sent over the wire.
19
33
  *
@@ -20,6 +20,13 @@ export declare const FETCH_TIMEOUT_MS = 15000;
20
20
  export declare const RUN_CODE_TIMEOUT_MS = 5000;
21
21
  /** Maximum time to wait for sessions to stop during graceful shutdown. */
22
22
  export declare const DEFAULT_SHUTDOWN_TIMEOUT_MS = 30000;
23
+ /**
24
+ * Maximum time to wait for a pipeline-mode TTS drain after `flush()` before
25
+ * forcing the turn to complete. Prevents a stuck TTS provider from wedging
26
+ * the session. Short relative to `DEFAULT_SHUTDOWN_TIMEOUT_MS` so stop()
27
+ * can still reclaim the socket cleanly.
28
+ */
29
+ export declare const PIPELINE_FLUSH_TIMEOUT_MS = 10000;
23
30
  /** Maximum length for tool result strings sent to clients. */
24
31
  export declare const MAX_TOOL_RESULT_CHARS = 4000;
25
32
  /** Maximum chars for webpage text after HTML-to-text conversion. */
@@ -2,6 +2,7 @@
2
2
  * Helper functions for defining agents and tools with full type inference.
3
3
  */
4
4
  import type { z } from "zod";
5
+ import type { LlmProvider, SttProvider, TtsProvider } from "./providers.ts";
5
6
  import { type AgentDef, type BuiltinTool, type ToolChoice, type ToolContext, type ToolDef } from "./types.ts";
6
7
  /**
7
8
  * Define a tool with typed parameters and execute function.
@@ -51,6 +52,11 @@ export declare function tool<P extends z.ZodObject<z.ZodRawShape>>(def: {
51
52
  * });
52
53
  * ```
53
54
  *
55
+ * @remarks
56
+ * Pipeline mode: pass `stt`, `llm`, and `tts` together to switch from the
57
+ * default AssemblyAI Streaming Speech-to-Speech path to a pluggable
58
+ * STT → LLM → TTS pipeline. All three must be set (or all left unset).
59
+ *
54
60
  * @public
55
61
  */
56
62
  export declare function agent(def: {
@@ -63,4 +69,19 @@ export declare function agent(def: {
63
69
  toolChoice?: ToolChoice;
64
70
  sttPrompt?: string;
65
71
  idleTimeoutMs?: number;
72
+ /**
73
+ * Pluggable STT provider. Must be set together with `llm` and `tts` to
74
+ * enable pipeline mode; leave all three unset for S2S mode.
75
+ */
76
+ stt?: SttProvider;
77
+ /**
78
+ * Pluggable LLM provider (Vercel AI SDK `LanguageModel`). Must be set
79
+ * together with `stt` and `tts` to enable pipeline mode.
80
+ */
81
+ llm?: LlmProvider;
82
+ /**
83
+ * Pluggable TTS provider. Must be set together with `stt` and `llm` to
84
+ * enable pipeline mode.
85
+ */
86
+ tts?: TtsProvider;
66
87
  }): AgentDef;
@@ -4,6 +4,7 @@
4
4
  * Flows from build → host → sdk. Validated via Zod at the boundary,
5
5
  * then used as a plain typed object throughout the runtime.
6
6
  */
7
+ import type { LlmProvider, SttProvider, TtsProvider } from "./providers.ts";
7
8
  /**
8
9
  * Tool definition as it appears in the serialized manifest JSON.
9
10
  *
@@ -39,6 +40,27 @@ export type Manifest = {
39
40
  tools: Record<string, ToolManifest>;
40
41
  /** Hostnames the agent is allowed to fetch. Empty = no fetch access. */
41
42
  allowedHosts: string[];
43
+ /**
44
+ * Pluggable STT provider. Must be set together with `llm` and `tts` to
45
+ * enable pipeline mode, or all three left unset for s2s mode.
46
+ */
47
+ stt?: SttProvider | undefined;
48
+ /**
49
+ * Pluggable LLM provider (Vercel AI SDK `LanguageModel`). Must be set
50
+ * together with `stt` and `tts` to enable pipeline mode.
51
+ */
52
+ llm?: LlmProvider | undefined;
53
+ /**
54
+ * Pluggable TTS provider. Must be set together with `stt` and `llm` to
55
+ * enable pipeline mode.
56
+ */
57
+ tts?: TtsProvider | undefined;
58
+ /**
59
+ * Session mode derived from provider fields:
60
+ * - `"s2s"` (default): AssemblyAI Streaming Speech-to-Speech path (no stt/llm/tts set).
61
+ * - `"pipeline"`: pluggable STT → LLM → TTS path (stt + llm + tts all set).
62
+ */
63
+ mode: "s2s" | "pipeline";
42
64
  };
43
65
  /**
44
66
  * Parse and normalize a raw agent manifest, applying defaults for all
@@ -61,13 +61,13 @@ export type KvRequest = z.infer<typeof KvRequestSchema>;
61
61
  */
62
62
  export declare const SessionErrorCodeSchema: z.ZodEnum<{
63
63
  internal: "internal";
64
+ audio: "audio";
64
65
  tool: "tool";
65
66
  connection: "connection";
66
67
  stt: "stt";
67
68
  llm: "llm";
68
69
  tts: "tts";
69
70
  protocol: "protocol";
70
- audio: "audio";
71
71
  }>;
72
72
  /**
73
73
  * Error codes for categorizing session errors on the wire.
@@ -108,13 +108,13 @@ export declare const ClientEventSchema: z.ZodDiscriminatedUnion<[z.ZodObject<{
108
108
  type: z.ZodLiteral<"error">;
109
109
  code: z.ZodEnum<{
110
110
  internal: "internal";
111
+ audio: "audio";
111
112
  tool: "tool";
112
113
  connection: "connection";
113
114
  stt: "stt";
114
115
  llm: "llm";
115
116
  tts: "tts";
116
117
  protocol: "protocol";
117
- audio: "audio";
118
118
  }>;
119
119
  message: z.ZodString;
120
120
  }, z.core.$strip>, z.ZodObject<{
@@ -190,13 +190,13 @@ export declare const ServerMessageSchema: z.ZodDiscriminatedUnion<[z.ZodObject<{
190
190
  type: z.ZodLiteral<"error">;
191
191
  code: z.ZodEnum<{
192
192
  internal: "internal";
193
+ audio: "audio";
193
194
  tool: "tool";
194
195
  connection: "connection";
195
196
  stt: "stt";
196
197
  llm: "llm";
197
198
  tts: "tts";
198
199
  protocol: "protocol";
199
- audio: "audio";
200
200
  }>;
201
201
  message: z.ZodString;
202
202
  }, z.core.$strip>, z.ZodObject<{
@@ -1,4 +1,4 @@
1
- import { f as MAX_TOOL_RESULT_CHARS } from "../constants-VTFoymJ-.js";
1
+ import { f as MAX_TOOL_RESULT_CHARS } from "../constants-BL3nvg4I.js";
2
2
  import { z } from "zod";
3
3
  //#region sdk/protocol.ts
4
4
  /**
@@ -0,0 +1,70 @@
1
+ /**
2
+ * Pluggable provider interfaces — normalized seams over streaming STT / TTS
3
+ * SDKs, plus the LLM provider type.
4
+ *
5
+ * These are zero-runtime **type** declarations with no Node.js dependencies,
6
+ * so they live in `sdk/` alongside the `Manifest` type that references them.
7
+ * Concrete adapters (e.g. AssemblyAI STT, Cartesia TTS) live under
8
+ * `host/providers/` because they depend on Node-only SDKs.
9
+ */
10
+ import type { LanguageModel } from "ai";
11
+ /** Unsubscribe callback returned by `.on()` event subscriptions. */
12
+ export type Unsubscribe = () => void;
13
+ export interface SttError extends Error {
14
+ readonly code: "stt_connect_failed" | "stt_auth_failed" | "stt_stream_error";
15
+ }
16
+ export type SttEvents = {
17
+ /** Interim transcript; drives barge-in detection. */
18
+ partial: (text: string) => void;
19
+ /** End-of-turn final transcript; cue to run the LLM. */
20
+ final: (text: string) => void;
21
+ /** Terminal error. The session is expected to end after this fires. */
22
+ error: (err: SttError) => void;
23
+ };
24
+ export interface SttSession {
25
+ sendAudio(pcm: Int16Array): void;
26
+ on<E extends keyof SttEvents>(event: E, fn: SttEvents[E]): Unsubscribe;
27
+ close(): Promise<void>;
28
+ }
29
+ export interface SttOpenOptions {
30
+ sampleRate: number;
31
+ apiKey: string;
32
+ sttPrompt?: string | undefined;
33
+ signal: AbortSignal;
34
+ }
35
+ export interface SttProvider {
36
+ readonly name: string;
37
+ open(opts: SttOpenOptions): Promise<SttSession>;
38
+ }
39
+ export interface TtsError extends Error {
40
+ readonly code: "tts_connect_failed" | "tts_auth_failed" | "tts_stream_error";
41
+ }
42
+ export type TtsEvents = {
43
+ /** One PCM16 audio chunk. Orchestrator forwards to the client. */
44
+ audio: (pcm: Int16Array) => void;
45
+ /** Synthesis drained after flush() or cancel(). Emitted exactly once per turn. */
46
+ done: () => void;
47
+ /** Terminal error. The session is expected to end after this fires. */
48
+ error: (err: TtsError) => void;
49
+ };
50
+ export interface TtsSession {
51
+ /** Push text deltas from the LLM. Provider may synthesize as chunks arrive. */
52
+ sendText(text: string): void;
53
+ /** Signal "no more text this turn". Emits `done` when fully synthesized. */
54
+ flush(): void;
55
+ /** Interrupt immediately (barge-in). Emits `done` synchronously. */
56
+ cancel(): void;
57
+ on<E extends keyof TtsEvents>(event: E, fn: TtsEvents[E]): Unsubscribe;
58
+ close(): Promise<void>;
59
+ }
60
+ export interface TtsOpenOptions {
61
+ sampleRate: number;
62
+ apiKey: string;
63
+ signal: AbortSignal;
64
+ }
65
+ export interface TtsProvider {
66
+ readonly name: string;
67
+ open(opts: TtsOpenOptions): Promise<TtsSession>;
68
+ }
69
+ /** LLM provider — Vercel AI SDK's `LanguageModel`; no wrapping. */
70
+ export type LlmProvider = LanguageModel;
@@ -3,6 +3,7 @@
3
3
  */
4
4
  import { z } from "zod";
5
5
  import type { Kv } from "./kv.ts";
6
+ import type { LlmProvider, SttProvider, TtsProvider } from "./providers.ts";
6
7
  /**
7
8
  * Identifier for a built-in server-side tool.
8
9
  *
@@ -188,6 +189,21 @@ export type AgentDef<S = Record<string, unknown>> = {
188
189
  tools: Readonly<Record<string, ToolDef<z.ZodObject<z.ZodRawShape>, S>>>;
189
190
  state?: () => S;
190
191
  idleTimeoutMs?: number;
192
+ /**
193
+ * Pluggable STT provider. Set together with `llm` and `tts` to enable
194
+ * pipeline mode; all three unset means S2S mode.
195
+ */
196
+ stt?: SttProvider;
197
+ /**
198
+ * Pluggable LLM provider (Vercel AI SDK `LanguageModel`). Set together
199
+ * with `stt` and `tts` for pipeline mode.
200
+ */
201
+ llm?: LlmProvider;
202
+ /**
203
+ * Pluggable TTS provider. Set together with `stt` and `llm` for
204
+ * pipeline mode.
205
+ */
206
+ tts?: TtsProvider;
191
207
  };
192
208
  /** @internal Zod schema for {@link BuiltinTool}. Exported for reuse in internal schemas. */
193
209
  export declare const BuiltinToolSchema: z.ZodEnum<{
@@ -1,30 +1,46 @@
1
1
  // Copyright 2025 the AAI authors. MIT license.
2
2
  /**
3
- * Regression guard: the published bundle must not import any devDependency.
3
+ * Regression guard: the published bundle must not import any devDependency
4
+ * that isn't also a (peer) dependency.
4
5
  *
5
6
  * `tsdown` is configured with `deps.neverBundle: [/^[^./]/]`, meaning every
6
7
  * bare npm specifier survives as an `import` in the built output. If a
7
- * devDependency (e.g. `vitest`) is reachable from any public export, the
8
- * production server — which only installs `dependencies` — crashes at
9
- * startup with `ERR_MODULE_NOT_FOUND`.
8
+ * pure devDependency (e.g. `vitest`) is reachable from any public export,
9
+ * the production server — which only installs `dependencies` +
10
+ * `peerDependencies` — crashes at startup with `ERR_MODULE_NOT_FOUND`.
11
+ *
12
+ * Optional peer dependencies (e.g. `ai`, `assemblyai`,
13
+ * `@cartesia/cartesia-js`) are legitimately listed in both
14
+ * `devDependencies` (so our own tests resolve them) and
15
+ * `peerDependencies` (so consumers supply their own pin). Those are
16
+ * allowed — only specifiers that are `devDependencies`-only count as
17
+ * leaks.
10
18
  *
11
19
  * This test reads the built `dist/` files for each public export and fails
12
- * if any bare import specifier is a devDependency.
20
+ * if any bare import specifier is exclusively a devDependency.
13
21
  */
14
22
 
23
+ import { execFileSync } from "node:child_process";
15
24
  import { existsSync, readFileSync } from "node:fs";
16
25
  import { dirname, resolve } from "node:path";
17
26
  import { fileURLToPath } from "node:url";
18
- import { describe, expect, test } from "vitest";
27
+ import { beforeAll, describe, expect, test } from "vitest";
19
28
 
20
29
  const PKG_DIR = dirname(fileURLToPath(import.meta.url));
21
30
  const pkg = JSON.parse(readFileSync(resolve(PKG_DIR, "package.json"), "utf-8")) as {
22
31
  exports: Record<string, { "@dev/source"?: string; import?: string }>;
23
32
  devDependencies?: Record<string, string>;
24
33
  dependencies?: Record<string, string>;
34
+ peerDependencies?: Record<string, string>;
25
35
  };
26
36
 
27
- const devDeps = new Set(Object.keys(pkg.devDependencies ?? {}));
37
+ const peerDeps = new Set(Object.keys(pkg.peerDependencies ?? {}));
38
+ // "leak" means: listed in devDependencies but NOT also a peer dep. Pure
39
+ // devDeps (like `vitest`, `tsdown`) are leaks; optional peer SDKs that
40
+ // happen to double as a devDep for our own tests are not.
41
+ const devDeps = new Set(
42
+ Object.keys(pkg.devDependencies ?? {}).filter((name) => !peerDeps.has(name)),
43
+ );
28
44
 
29
45
  // Extract bare module specifiers from an ESM source string. Covers:
30
46
  // import ... from "x" export ... from "x" import("x")
@@ -36,18 +52,25 @@ function rootSpecifier(spec: string): string {
36
52
  return spec.split("/")[0] ?? spec;
37
53
  }
38
54
 
39
- describe("built exports do not import devDependencies", () => {
55
+ describe("built exports do not import devDependency-only packages", () => {
40
56
  const entries = Object.entries(pkg.exports)
41
57
  .map(([subpath, val]) => ({ subpath, dist: val.import }))
42
58
  .filter((e): e is { subpath: string; dist: string } => typeof e.dist === "string");
43
59
 
60
+ // Self-heal so this test works from a clean checkout without a manual
61
+ // build step — otherwise `pnpm test` on a fresh worktree fails opaquely.
62
+ beforeAll(() => {
63
+ const missing = entries.some(({ dist }) => !existsSync(resolve(PKG_DIR, dist)));
64
+ if (missing) {
65
+ execFileSync("pnpm", ["--filter", "@alexkroman1/aai", "build"], {
66
+ cwd: resolve(PKG_DIR, "../.."),
67
+ stdio: "inherit",
68
+ });
69
+ }
70
+ }, 60_000);
71
+
44
72
  test.each(entries)("$subpath bundle has no devDependency import", ({ dist }) => {
45
73
  const file = resolve(PKG_DIR, dist);
46
- if (!existsSync(file)) {
47
- throw new Error(
48
- `Built artifact missing: ${file}. Run \`pnpm --filter @alexkroman1/aai build\` first.`,
49
- );
50
- }
51
74
  const src = readFileSync(file, "utf-8");
52
75
  const leaks = new Set<string>();
53
76
  for (const match of src.matchAll(IMPORT_RE)) {
@@ -57,6 +80,8 @@ describe("built exports do not import devDependencies", () => {
57
80
  const root = rootSpecifier(spec);
58
81
  if (devDeps.has(root)) leaks.add(root);
59
82
  }
60
- expect([...leaks], `devDependency imports in ${dist}: ${[...leaks].join(", ")}`).toEqual([]);
83
+ expect([...leaks], `devDependency-only imports in ${dist}: ${[...leaks].join(", ")}`).toEqual(
84
+ [],
85
+ );
61
86
  });
62
87
  });