@alexkroman1/aai 1.3.2 → 1.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. package/.turbo/turbo-build.log +18 -14
  2. package/CHANGELOG.md +2 -0
  3. package/dist/_internal-types-3p3OJZPb.js +145 -0
  4. package/dist/anthropic-BrUCPKUc.js +10 -0
  5. package/dist/assemblyai-Cxg9eobY.js +18 -0
  6. package/dist/cartesia-DwDk2tEu.js +10 -0
  7. package/dist/host/_pipeline-test-fakes.d.ts +5 -5
  8. package/dist/host/pipeline-session.d.ts +5 -5
  9. package/dist/host/providers/resolve.d.ts +34 -0
  10. package/dist/host/providers/stt/assemblyai.d.ts +9 -18
  11. package/dist/host/providers/tts/cartesia.d.ts +11 -18
  12. package/dist/host/runtime-barrel.js +345 -42
  13. package/dist/host/runtime.d.ts +13 -9
  14. package/dist/index.js +2 -91
  15. package/dist/sdk/_internal-types.d.ts +27 -1
  16. package/dist/sdk/manifest-barrel.d.ts +2 -0
  17. package/dist/sdk/manifest-barrel.js +2 -2
  18. package/dist/sdk/manifest.d.ts +13 -2
  19. package/dist/sdk/protocol.d.ts +3 -3
  20. package/dist/sdk/protocol.js +1 -1
  21. package/dist/sdk/providers/llm/anthropic.d.ts +23 -0
  22. package/dist/sdk/providers/llm-barrel.d.ts +9 -0
  23. package/dist/sdk/providers/llm-barrel.js +2 -0
  24. package/dist/sdk/providers/stt/assemblyai.d.ts +30 -0
  25. package/dist/sdk/providers/stt-barrel.d.ts +9 -0
  26. package/dist/sdk/providers/stt-barrel.js +2 -0
  27. package/dist/sdk/providers/tts/cartesia.d.ts +23 -0
  28. package/dist/sdk/providers/tts-barrel.d.ts +9 -0
  29. package/dist/sdk/providers/tts-barrel.js +2 -0
  30. package/dist/sdk/providers.d.ts +59 -11
  31. package/dist/types-KUgezM6u.js +128 -0
  32. package/host/_pipeline-test-fakes.ts +6 -6
  33. package/host/integration/pipeline-reference.integration.test.ts +4 -4
  34. package/host/pipeline-session.ts +6 -6
  35. package/host/providers/providers.test-d.ts +19 -10
  36. package/host/providers/resolve.ts +87 -0
  37. package/host/providers/stt/assemblyai.test.ts +2 -2
  38. package/host/providers/stt/assemblyai.ts +25 -47
  39. package/host/providers/tts/cartesia.test.ts +2 -2
  40. package/host/providers/tts/cartesia.ts +43 -73
  41. package/host/runtime.ts +66 -39
  42. package/package.json +13 -7
  43. package/sdk/__snapshots__/exports.test.ts.snap +2 -0
  44. package/sdk/__snapshots__/schema-shapes.test.ts.snap +4 -0
  45. package/sdk/_internal-types.ts +28 -1
  46. package/sdk/define.test.ts +12 -10
  47. package/sdk/manifest-barrel.ts +2 -0
  48. package/sdk/manifest.test.ts +6 -3
  49. package/sdk/manifest.ts +26 -18
  50. package/sdk/providers/llm/anthropic.ts +31 -0
  51. package/sdk/providers/llm-barrel.ts +12 -0
  52. package/sdk/providers/stt/assemblyai.ts +38 -0
  53. package/sdk/providers/stt-barrel.ts +12 -0
  54. package/sdk/providers/tts/cartesia.ts +31 -0
  55. package/sdk/providers/tts-barrel.ts +12 -0
  56. package/sdk/providers.ts +81 -17
  57. package/dist/_internal-types-CoDTiBd1.js +0 -61
  58. package/dist/host/providers/llm.d.ts +0 -2
  59. package/dist/host/providers/stt-barrel.d.ts +0 -8
  60. package/dist/host/providers/stt-barrel.js +0 -92
  61. package/dist/host/providers/stt.d.ts +0 -2
  62. package/dist/host/providers/tts-barrel.d.ts +0 -8
  63. package/dist/host/providers/tts-barrel.js +0 -182
  64. package/dist/host/providers/tts.d.ts +0 -2
  65. package/dist/types-Cfx_4QDK.js +0 -39
  66. package/host/providers/llm.ts +0 -3
  67. package/host/providers/stt-barrel.ts +0 -13
  68. package/host/providers/stt.ts +0 -3
  69. package/host/providers/tts-barrel.ts +0 -13
  70. package/host/providers/tts.ts +0 -3
  71. /package/dist/{constants-BL3nvg4I.js → constants-C2nirZUI.js} +0 -0
@@ -7,6 +7,13 @@
7
7
 
8
8
  import type { JSONSchema7 } from "json-schema";
9
9
  import { z } from "zod";
10
+ import { ProviderDescriptorSchema } from "./manifest.ts";
11
+ import {
12
+ assertProviderTriple,
13
+ type LlmProvider,
14
+ type SttProvider,
15
+ type TtsProvider,
16
+ } from "./providers.ts";
10
17
  import type { Message } from "./types.ts";
11
18
  import { BuiltinToolSchema, ToolChoiceSchema, type ToolDef } from "./types.ts";
12
19
 
@@ -56,6 +63,10 @@ export const AgentConfigSchema = z.object({
56
63
  toolChoice: ToolChoiceSchema.optional(),
57
64
  builtinTools: z.array(BuiltinToolSchema).readonly().optional(),
58
65
  idleTimeoutMs: z.number().nonnegative().optional(),
66
+ stt: ProviderDescriptorSchema.optional(),
67
+ llm: ProviderDescriptorSchema.optional(),
68
+ tts: ProviderDescriptorSchema.optional(),
69
+ mode: z.enum(["s2s", "pipeline"]).optional(),
59
70
  });
60
71
 
61
72
  /** Serializable agent configuration — derived from {@link AgentConfigSchema}. */
@@ -75,9 +86,18 @@ export interface AgentConfigSource {
75
86
  toolChoice?: AgentConfig["toolChoice"] | undefined;
76
87
  builtinTools?: Readonly<AgentConfig["builtinTools"]> | undefined;
77
88
  idleTimeoutMs?: number | undefined;
89
+ stt?: SttProvider | undefined;
90
+ llm?: LlmProvider | undefined;
91
+ tts?: TtsProvider | undefined;
78
92
  }
79
93
 
80
- /** Extract the serializable {@link AgentConfig} subset from a source object. */
94
+ /**
95
+ * Extract the serializable {@link AgentConfig} subset from a source object.
96
+ *
97
+ * When `stt`, `llm`, and `tts` descriptors are present they are all three
98
+ * required (or none) — enforced here so the server can trust the config.
99
+ * `mode` is derived from their presence.
100
+ */
81
101
  export function toAgentConfig(src: AgentConfigSource): AgentConfig {
82
102
  const config: AgentConfig = {
83
103
  name: src.name,
@@ -89,6 +109,13 @@ export function toAgentConfig(src: AgentConfigSource): AgentConfig {
89
109
  if (src.toolChoice !== undefined) config.toolChoice = src.toolChoice;
90
110
  if (src.builtinTools) config.builtinTools = [...src.builtinTools];
91
111
  if (src.idleTimeoutMs !== undefined) config.idleTimeoutMs = src.idleTimeoutMs;
112
+
113
+ config.mode = assertProviderTriple(src.stt, src.llm, src.tts);
114
+ if (config.mode === "pipeline") {
115
+ config.stt = src.stt;
116
+ config.llm = src.llm;
117
+ config.tts = src.tts;
118
+ }
92
119
  return config;
93
120
  }
94
121
 
@@ -3,7 +3,9 @@ import { describe, expect, test } from "vitest";
3
3
  import { z } from "zod";
4
4
  import { agent, tool } from "./define.ts";
5
5
  import { parseManifest } from "./manifest.ts";
6
- import type { LlmProvider, SttProvider, TtsProvider } from "./providers.ts";
6
+ import { anthropic } from "./providers/llm/anthropic.ts";
7
+ import { assemblyAI } from "./providers/stt/assemblyai.ts";
8
+ import { cartesia } from "./providers/tts/cartesia.ts";
7
9
 
8
10
  describe("tool()", () => {
9
11
  test("returns the definition unchanged", () => {
@@ -58,9 +60,9 @@ describe("agent()", () => {
58
60
  });
59
61
 
60
62
  test("preserves stt/llm/tts providers on the returned def", () => {
61
- const stt = { name: "fake-stt", open: async () => ({}) } as unknown as SttProvider;
62
- const tts = { name: "fake-tts", open: async () => ({}) } as unknown as TtsProvider;
63
- const llm = {} as LlmProvider;
63
+ const stt = assemblyAI({ model: "u3pro-rt" });
64
+ const tts = cartesia({ voice: "v" });
65
+ const llm = anthropic({ model: "claude-haiku-4-5" });
64
66
  const def = agent({ name: "t", systemPrompt: "p", stt, llm, tts });
65
67
  expect(def.stt).toBe(stt);
66
68
  expect(def.llm).toBe(llm);
@@ -68,15 +70,15 @@ describe("agent()", () => {
68
70
  });
69
71
 
70
72
  test("stt/llm/tts flow through parseManifest to mode 'pipeline'", () => {
71
- const stt = { name: "fake-stt", open: async () => ({}) } as unknown as SttProvider;
72
- const tts = { name: "fake-tts", open: async () => ({}) } as unknown as TtsProvider;
73
- const llm = {} as LlmProvider;
73
+ const stt = assemblyAI({ model: "u3pro-rt" });
74
+ const tts = cartesia({ voice: "v" });
75
+ const llm = anthropic({ model: "claude-haiku-4-5" });
74
76
  const def = agent({ name: "t", systemPrompt: "p", stt, llm, tts });
75
77
  const parsed = parseManifest(def);
76
78
  expect(parsed.mode).toBe("pipeline");
77
- expect(parsed.stt).toBe(stt);
78
- expect(parsed.llm).toBe(llm);
79
- expect(parsed.tts).toBe(tts);
79
+ expect(parsed.stt).toStrictEqual(stt);
80
+ expect(parsed.llm).toStrictEqual(llm);
81
+ expect(parsed.tts).toStrictEqual(tts);
80
82
  });
81
83
 
82
84
  test("agent without providers resolves to mode 's2s'", () => {
@@ -16,3 +16,5 @@ export {
16
16
  ToolSchemaSchema,
17
17
  toAgentConfig,
18
18
  } from "./_internal-types.ts";
19
+ export { ProviderDescriptorSchema } from "./manifest.ts";
20
+ export { assertProviderTriple, type SessionMode } from "./providers.ts";
@@ -4,6 +4,9 @@ import { describe, expect, expectTypeOf, test } from "vitest";
4
4
  import { type Manifest, parseManifest } from "./manifest.ts";
5
5
  import type { AgentConfig, ToolSchema } from "./manifest-barrel.ts";
6
6
  import { agentToolsToSchemas, toAgentConfig } from "./manifest-barrel.ts";
7
+ import { anthropic } from "./providers/llm/anthropic.ts";
8
+ import { assemblyAI } from "./providers/stt/assemblyai.ts";
9
+ import { cartesia } from "./providers/tts/cartesia.ts";
7
10
 
8
11
  describe("parseManifest", () => {
9
12
  test("minimal manifest requires only name", () => {
@@ -164,9 +167,9 @@ describe("manifest type contracts", () => {
164
167
  });
165
168
 
166
169
  describe("parseManifest — mode classification", () => {
167
- const stubStt = { name: "stub-stt", open: async () => ({}) };
168
- const stubTts = { name: "stub-tts", open: async () => ({}) };
169
- const stubLlm = { modelId: "stub-llm" };
170
+ const stubStt = assemblyAI({ model: "u3pro-rt" });
171
+ const stubTts = cartesia({ voice: "v" });
172
+ const stubLlm = anthropic({ model: "claude-haiku-4-5" });
170
173
 
171
174
  test("no stt/llm/tts ⇒ mode: 's2s'", () => {
172
175
  const parsed = parseManifest({
package/sdk/manifest.ts CHANGED
@@ -8,7 +8,13 @@
8
8
 
9
9
  import { z } from "zod";
10
10
  import { validateAllowedHostPattern } from "./allowed-hosts.ts";
11
- import type { LlmProvider, SttProvider, TtsProvider } from "./providers.ts";
11
+ import {
12
+ assertProviderTriple,
13
+ type LlmProvider,
14
+ type SessionMode,
15
+ type SttProvider,
16
+ type TtsProvider,
17
+ } from "./providers.ts";
12
18
  import { BuiltinToolSchema, DEFAULT_GREETING, DEFAULT_SYSTEM_PROMPT } from "./types.ts";
13
19
 
14
20
  /**
@@ -67,7 +73,7 @@ export type Manifest = {
67
73
  * - `"s2s"` (default): AssemblyAI Streaming Speech-to-Speech path (no stt/llm/tts set).
68
74
  * - `"pipeline"`: pluggable STT → LLM → TTS path (stt + llm + tts all set).
69
75
  */
70
- mode: "s2s" | "pipeline";
76
+ mode: SessionMode;
71
77
  };
72
78
 
73
79
  const ToolManifestSchema = z.object({
@@ -75,6 +81,17 @@ const ToolManifestSchema = z.object({
75
81
  parameters: z.record(z.string(), z.unknown()).optional(),
76
82
  });
77
83
 
84
+ /**
85
+ * Provider descriptor — a `{ kind, options }` pair produced by factories
86
+ * like `assemblyAI(...)` / `anthropic(...)` / `cartesia(...)`. Kept
87
+ * deliberately generic at the schema layer: kind-specific validation lives
88
+ * in the host-side resolver, which knows what each adapter expects.
89
+ */
90
+ export const ProviderDescriptorSchema = z.object({
91
+ kind: z.string().min(1),
92
+ options: z.record(z.string(), z.unknown()),
93
+ });
94
+
78
95
  const ManifestSchema = z.object({
79
96
  name: z.string().min(1),
80
97
  systemPrompt: z.string().optional(),
@@ -101,6 +118,9 @@ const ManifestSchema = z.object({
101
118
  }
102
119
  }
103
120
  }),
121
+ stt: ProviderDescriptorSchema.optional(),
122
+ llm: ProviderDescriptorSchema.optional(),
123
+ tts: ProviderDescriptorSchema.optional(),
104
124
  });
105
125
 
106
126
  /**
@@ -114,19 +134,7 @@ const ManifestSchema = z.object({
114
134
  */
115
135
  export function parseManifest(input: unknown): Manifest {
116
136
  const parsed = ManifestSchema.parse(input);
117
- // stt/llm/tts are runtime objects (functions/classes) that can't be
118
- // validated by the JSON-oriented Zod schema. Pull them straight from
119
- // the raw input and enforce all-or-nothing presence here.
120
- const raw = (input ?? {}) as {
121
- stt?: SttProvider;
122
- llm?: LlmProvider;
123
- tts?: TtsProvider;
124
- };
125
- const providerCount = [raw.stt, raw.llm, raw.tts].filter((x) => x != null).length;
126
- if (providerCount !== 0 && providerCount !== 3) {
127
- throw new Error("stt, llm, and tts must be set together");
128
- }
129
- const mode: "s2s" | "pipeline" = providerCount === 3 ? "pipeline" : "s2s";
137
+ const mode = assertProviderTriple(parsed.stt, parsed.llm, parsed.tts);
130
138
  return {
131
139
  name: parsed.name,
132
140
  systemPrompt: parsed.systemPrompt ?? DEFAULT_SYSTEM_PROMPT,
@@ -139,9 +147,9 @@ export function parseManifest(input: unknown): Manifest {
139
147
  theme: parsed.theme,
140
148
  tools: parsed.tools ?? {},
141
149
  allowedHosts: parsed.allowedHosts ?? [],
142
- stt: raw.stt,
143
- llm: raw.llm,
144
- tts: raw.tts,
150
+ stt: parsed.stt,
151
+ llm: parsed.llm,
152
+ tts: parsed.tts,
145
153
  mode,
146
154
  };
147
155
  }
@@ -0,0 +1,31 @@
1
+ // Copyright 2025 the AAI authors. MIT license.
2
+ /**
3
+ * Anthropic LLM factory — returns a pure descriptor.
4
+ *
5
+ * Users call this in place of importing from `@ai-sdk/anthropic` directly,
6
+ * so agent bundles don't drag the Anthropic SDK into the guest sandbox
7
+ * (which has no `--allow-env` permission and would crash on the SDK's
8
+ * eager `ANTHROPIC_BASE_URL` read).
9
+ *
10
+ * The host-side resolver in `host/providers/resolve.ts` builds a real
11
+ * Vercel AI SDK `LanguageModel` from this descriptor during
12
+ * `createRuntime`, using `ANTHROPIC_API_KEY` from the agent's env.
13
+ */
14
+
15
+ import type { LlmProvider } from "../../providers.ts";
16
+
17
+ export const ANTHROPIC_KIND = "anthropic" as const;
18
+
19
+ export interface AnthropicOptions {
20
+ /** Anthropic model id, e.g. `"claude-haiku-4-5"`. */
21
+ model: string;
22
+ }
23
+
24
+ export type AnthropicProvider = LlmProvider & {
25
+ readonly kind: typeof ANTHROPIC_KIND;
26
+ readonly options: AnthropicOptions;
27
+ };
28
+
29
+ export function anthropic(opts: AnthropicOptions): AnthropicProvider {
30
+ return { kind: ANTHROPIC_KIND, options: { ...opts } };
31
+ }
@@ -0,0 +1,12 @@
1
+ // Copyright 2025 the AAI authors. MIT license.
2
+ /**
3
+ * `@alexkroman1/aai/llm` subpath barrel.
4
+ *
5
+ * Re-exports LLM provider factories. Users import from here instead of
6
+ * `@ai-sdk/anthropic` directly so the agent bundle stays free of eager
7
+ * env reads and other SDK side-effects.
8
+ */
9
+
10
+ export type { LlmProvider } from "../providers.ts";
11
+ // biome-ignore lint/performance/noReExportAll: subpath barrel
12
+ export * from "./llm/anthropic.ts";
@@ -0,0 +1,38 @@
1
+ // Copyright 2025 the AAI authors. MIT license.
2
+ /**
3
+ * AssemblyAI Universal-Streaming STT factory — returns a pure descriptor.
4
+ *
5
+ * The descriptor flows through the bundle → server → runtime pipeline
6
+ * without importing the `assemblyai` SDK. The host-side resolver in
7
+ * `host/providers/resolve.ts` turns it into an openable {@link SttOpener}
8
+ * during `createRuntime`.
9
+ */
10
+
11
+ import type { SttProvider } from "../../providers.ts";
12
+
13
+ /** Kind tag recognised by the host-side resolver. */
14
+ export const ASSEMBLYAI_KIND = "assemblyai" as const;
15
+
16
+ export interface AssemblyAIOptions {
17
+ /**
18
+ * Streaming speech model. Defaults to `"u3pro-rt"` (Universal-3 Pro
19
+ * Real-Time). Arbitrary strings are forwarded to the SDK unchanged.
20
+ */
21
+ model?: "u3pro-rt" | string;
22
+ }
23
+
24
+ export type AssemblyAIProvider = SttProvider & {
25
+ readonly kind: typeof ASSEMBLYAI_KIND;
26
+ readonly options: AssemblyAIOptions;
27
+ };
28
+
29
+ /**
30
+ * Build an AssemblyAI STT descriptor.
31
+ *
32
+ * The API key is resolved host-side from the agent's env
33
+ * (`ASSEMBLYAI_API_KEY`); there is no factory-time key parameter, so the
34
+ * descriptor stays free of secrets and safe to serialize.
35
+ */
36
+ export function assemblyAI(opts: AssemblyAIOptions = {}): AssemblyAIProvider {
37
+ return { kind: ASSEMBLYAI_KIND, options: { ...opts } };
38
+ }
@@ -0,0 +1,12 @@
1
+ // Copyright 2025 the AAI authors. MIT license.
2
+ /**
3
+ * `@alexkroman1/aai/stt` subpath barrel.
4
+ *
5
+ * Re-exports the descriptor factory (`assemblyAI`) and the shared STT
6
+ * contract types. Importing this barrel does not pull in the `assemblyai`
7
+ * SDK — that happens only when the host resolver is invoked.
8
+ */
9
+
10
+ export type { SttError, SttEvents, SttOpenOptions, SttProvider, SttSession } from "../providers.ts";
11
+ // biome-ignore lint/performance/noReExportAll: subpath barrel
12
+ export * from "./stt/assemblyai.ts";
@@ -0,0 +1,31 @@
1
+ // Copyright 2025 the AAI authors. MIT license.
2
+ /**
3
+ * Cartesia TTS factory — returns a pure descriptor.
4
+ *
5
+ * See `sdk/providers/stt/assemblyai.ts` for the descriptor/opener split;
6
+ * the host-side resolver in `host/providers/resolve.ts` turns this into an
7
+ * openable {@link TtsOpener} during `createRuntime` using the
8
+ * `CARTESIA_API_KEY` from the agent's env.
9
+ */
10
+
11
+ import type { TtsProvider } from "../../providers.ts";
12
+
13
+ export const CARTESIA_KIND = "cartesia" as const;
14
+
15
+ export interface CartesiaOptions {
16
+ /** Cartesia voice ID. Required. */
17
+ voice: string;
18
+ /** Model ID. Defaults to `"sonic-2"`. */
19
+ model?: string;
20
+ /** Spoken language hint. Defaults to `"en"`. */
21
+ language?: string;
22
+ }
23
+
24
+ export type CartesiaProvider = TtsProvider & {
25
+ readonly kind: typeof CARTESIA_KIND;
26
+ readonly options: CartesiaOptions;
27
+ };
28
+
29
+ export function cartesia(opts: CartesiaOptions): CartesiaProvider {
30
+ return { kind: CARTESIA_KIND, options: { ...opts } };
31
+ }
@@ -0,0 +1,12 @@
1
+ // Copyright 2025 the AAI authors. MIT license.
2
+ /**
3
+ * `@alexkroman1/aai/tts` subpath barrel.
4
+ *
5
+ * Re-exports the descriptor factory (`cartesia`) and the shared TTS
6
+ * contract types. Does not pull in `@cartesia/cartesia-js` — the host
7
+ * resolver handles that at session start.
8
+ */
9
+
10
+ export type { TtsError, TtsEvents, TtsOpenOptions, TtsProvider, TtsSession } from "../providers.ts";
11
+ // biome-ignore lint/performance/noReExportAll: subpath barrel
12
+ export * from "./tts/cartesia.ts";
package/sdk/providers.ts CHANGED
@@ -1,25 +1,87 @@
1
1
  // Copyright 2025 the AAI authors. MIT license.
2
2
  /**
3
- * Pluggable provider interfaces — normalized seams over streaming STT / TTS
4
- * SDKs, plus the LLM provider type.
3
+ * Pluggable provider contracts.
5
4
  *
6
- * These are zero-runtime **type** declarations with no Node.js dependencies,
7
- * so they live in `sdk/` alongside the `Manifest` type that references them.
8
- * Concrete adapters (e.g. AssemblyAI STT, Cartesia TTS) live under
9
- * `host/providers/` because they depend on Node-only SDKs.
5
+ * **Two layers, strict boundary.**
6
+ *
7
+ * - The *descriptor* layer (`SttProvider` / `LlmProvider` / `TtsProvider`) is
8
+ * pure data — `{ kind, options }` objects returned by the user-facing
9
+ * factories (`assemblyAI(...)`, `anthropic(...)`, `cartesia(...)`). They
10
+ * are JSON-serializable, contain no functions, and can cross the CLI →
11
+ * server → guest boundary without evaluating any third-party SDK.
12
+ * They live in `sdk/` alongside `Manifest` and have zero Node-only deps.
13
+ *
14
+ * - The *openable* layer (`SttOpener` / `TtsOpener` + `SttSession` /
15
+ * `TtsSession`) is host-only. The host's internal
16
+ * `host/providers/resolve.ts` registry turns descriptors into openers
17
+ * during `createRuntime`, importing the concrete SDKs (`assemblyai`,
18
+ * `@cartesia/cartesia-js`, `@ai-sdk/anthropic`) only at that point.
19
+ * Only the openable layer talks to the network; descriptors never do.
20
+ *
21
+ * This split is load-bearing for the sandboxed deployment path: the guest
22
+ * Deno sandbox can import `@alexkroman1/aai/{stt,tts,llm}` without pulling
23
+ * in any AI-SDK code, which means no env reads (`ANTHROPIC_BASE_URL`, etc.)
24
+ * at bundle load — the exact failure mode that forced this refactor.
10
25
  */
11
26
 
12
- import type { LanguageModel } from "ai";
13
-
14
27
  /** Unsubscribe callback returned by `.on()` event subscriptions. */
15
28
  export type Unsubscribe = () => void;
16
29
 
17
- // -------- STT --------
30
+ // -------- Descriptor shape (user-facing, serializable) ----------------------
31
+
32
+ /**
33
+ * Base shape for a provider descriptor. A `kind` tag + opaque `options`
34
+ * payload lets the host registry pick the right resolver and pass the
35
+ * caller's options through verbatim.
36
+ */
37
+ export interface ProviderDescriptor<Kind extends string, Options> {
38
+ readonly kind: Kind;
39
+ readonly options: Options;
40
+ }
41
+
42
+ /** Descriptor for an STT provider. Returned by factories like `assemblyAI(...)`. */
43
+ export type SttProvider = ProviderDescriptor<string, Record<string, unknown>>;
44
+
45
+ /** Descriptor for an LLM provider. Returned by factories like `anthropic(...)`. */
46
+ export type LlmProvider = ProviderDescriptor<string, Record<string, unknown>>;
47
+
48
+ /** Descriptor for a TTS provider. Returned by factories like `cartesia(...)`. */
49
+ export type TtsProvider = ProviderDescriptor<string, Record<string, unknown>>;
50
+
51
+ /**
52
+ * Session mode derived from which provider triple is set.
53
+ *
54
+ * `parseManifest`, `toAgentConfig`, `createRuntime`, and the server's
55
+ * `IsolateConfigSchema` all use {@link assertProviderTriple} so there's
56
+ * one source of truth for the validation.
57
+ */
58
+ export type SessionMode = "s2s" | "pipeline";
59
+
60
+ /**
61
+ * Enforce the all-or-nothing provider rule and return the derived mode.
62
+ *
63
+ * Pipeline mode requires STT, LLM, and TTS all set; S2S mode requires
64
+ * none of them. Anything in-between is a configuration error.
65
+ */
66
+ export function assertProviderTriple(stt: unknown, llm: unknown, tts: unknown): SessionMode {
67
+ const count = (stt != null ? 1 : 0) + (llm != null ? 1 : 0) + (tts != null ? 1 : 0);
68
+ if (count !== 0 && count !== 3) {
69
+ throw new Error("stt, llm, and tts must be set together");
70
+ }
71
+ return count === 3 ? "pipeline" : "s2s";
72
+ }
73
+
74
+ // -------- STT openable (host-only) ------------------------------------------
18
75
 
19
76
  export interface SttError extends Error {
20
77
  readonly code: "stt_connect_failed" | "stt_auth_failed" | "stt_stream_error";
21
78
  }
22
79
 
80
+ /** Build an {@link SttError} with a typed `code`. Zero-dep helper so both sdk/ and host/ can use it. */
81
+ export function makeSttError(code: SttError["code"], message: string): SttError {
82
+ return Object.assign(new Error(message), { code }) as SttError;
83
+ }
84
+
23
85
  export type SttEvents = {
24
86
  /** Interim transcript; drives barge-in detection. */
25
87
  partial: (text: string) => void;
@@ -42,17 +104,23 @@ export interface SttOpenOptions {
42
104
  signal: AbortSignal;
43
105
  }
44
106
 
45
- export interface SttProvider {
107
+ /** Host-side openable STT provider — produced by `resolveStt(descriptor)`. */
108
+ export interface SttOpener {
46
109
  readonly name: string;
47
110
  open(opts: SttOpenOptions): Promise<SttSession>;
48
111
  }
49
112
 
50
- // -------- TTS --------
113
+ // -------- TTS openable (host-only) ------------------------------------------
51
114
 
52
115
  export interface TtsError extends Error {
53
116
  readonly code: "tts_connect_failed" | "tts_auth_failed" | "tts_stream_error";
54
117
  }
55
118
 
119
+ /** Build a {@link TtsError} with a typed `code`. Mirror of {@link makeSttError}. */
120
+ export function makeTtsError(code: TtsError["code"], message: string): TtsError {
121
+ return Object.assign(new Error(message), { code }) as TtsError;
122
+ }
123
+
56
124
  export type TtsEvents = {
57
125
  /** One PCM16 audio chunk. Orchestrator forwards to the client. */
58
126
  audio: (pcm: Int16Array) => void;
@@ -79,12 +147,8 @@ export interface TtsOpenOptions {
79
147
  signal: AbortSignal;
80
148
  }
81
149
 
82
- export interface TtsProvider {
150
+ /** Host-side openable TTS provider — produced by `resolveTts(descriptor)`. */
151
+ export interface TtsOpener {
83
152
  readonly name: string;
84
153
  open(opts: TtsOpenOptions): Promise<TtsSession>;
85
154
  }
86
-
87
- // -------- LLM --------
88
-
89
- /** LLM provider — Vercel AI SDK's `LanguageModel`; no wrapping. */
90
- export type LlmProvider = LanguageModel;
@@ -1,61 +0,0 @@
1
- import { i as ToolChoiceSchema, t as BuiltinToolSchema } from "./types-Cfx_4QDK.js";
2
- import { z } from "zod";
3
- //#region sdk/_internal-types.ts
4
- /**
5
- * Zod schema for serializable agent configuration sent over the wire.
6
- *
7
- * This is the JSON-safe subset of the agent definition that can be
8
- * transmitted between the worker and the host process via structured clone.
9
- */
10
- const AgentConfigSchema = z.object({
11
- name: z.string().min(1),
12
- systemPrompt: z.string(),
13
- greeting: z.string(),
14
- sttPrompt: z.string().optional(),
15
- maxSteps: z.number().int().positive().optional(),
16
- toolChoice: ToolChoiceSchema.optional(),
17
- builtinTools: z.array(BuiltinToolSchema).readonly().optional(),
18
- idleTimeoutMs: z.number().nonnegative().optional()
19
- });
20
- /** Extract the serializable {@link AgentConfig} subset from a source object. */
21
- function toAgentConfig(src) {
22
- const config = {
23
- name: src.name,
24
- systemPrompt: src.systemPrompt,
25
- greeting: src.greeting
26
- };
27
- if (src.sttPrompt !== void 0) config.sttPrompt = src.sttPrompt;
28
- if (src.maxSteps !== void 0) config.maxSteps = src.maxSteps;
29
- if (src.toolChoice !== void 0) config.toolChoice = src.toolChoice;
30
- if (src.builtinTools) config.builtinTools = [...src.builtinTools];
31
- if (src.idleTimeoutMs !== void 0) config.idleTimeoutMs = src.idleTimeoutMs;
32
- return config;
33
- }
34
- /**
35
- * Zod schema for serialized tool definitions sent over the wire.
36
- *
37
- * `parameters` must be a valid JSON Schema object (with `type`, `properties`,
38
- * etc.) — the Vercel AI SDK wraps it via `jsonSchema()`.
39
- */
40
- const ToolSchemaSchema = z.object({
41
- name: z.string().min(1),
42
- description: z.string().min(1),
43
- parameters: z.record(z.string(), z.unknown())
44
- });
45
- /** Empty Zod object schema used as default when tools have no parameters. */
46
- const EMPTY_PARAMS = z.object({});
47
- /**
48
- * Convert agent tool definitions to JSON Schema format for wire transport.
49
- *
50
- * Transforms the Zod-based `parameters` of each tool into a plain JSON Schema
51
- * object suitable for structured clone / JSON serialization.
52
- */
53
- function agentToolsToSchemas(tools) {
54
- return Object.entries(tools).map(([name, def]) => ({
55
- name,
56
- description: def.description,
57
- parameters: z.toJSONSchema(def.parameters ?? EMPTY_PARAMS)
58
- }));
59
- }
60
- //#endregion
61
- export { toAgentConfig as a, agentToolsToSchemas as i, EMPTY_PARAMS as n, ToolSchemaSchema as r, AgentConfigSchema as t };
@@ -1,2 +0,0 @@
1
- /** LLM provider type — re-exported from sdk/ for host-side consumption. */
2
- export type * from "../../sdk/providers.ts";
@@ -1,8 +0,0 @@
1
- /**
2
- * `@alexkroman1/aai/stt` subpath barrel. Re-exports the STT provider
3
- * contract types (via `stt.ts` → `sdk/providers.ts`) alongside the
4
- * concrete AssemblyAI adapter factory. Task 9 owns wiring this file
5
- * into `package.json` exports.
6
- */
7
- export * from "./stt/assemblyai.ts";
8
- export type * from "./stt.ts";
@@ -1,92 +0,0 @@
1
- import { createNanoEvents } from "nanoevents";
2
- import { AssemblyAI } from "assemblyai";
3
- //#region host/providers/stt/assemblyai.ts
4
- /**
5
- * AssemblyAI Universal-Streaming STT adapter.
6
- *
7
- * Wraps the `assemblyai` Node SDK's {@link StreamingTranscriber} and
8
- * normalizes its event surface onto the {@link SttProvider} /
9
- * {@link SttEvents} contract consumed by the pipeline orchestrator.
10
- *
11
- * Default model: `"u3pro-rt"` (Universal-3 Pro Real-Time). The adapter
12
- * maps that to the SDK's `"u3-rt-pro"` `speechModel` value; any other
13
- * string is forwarded verbatim.
14
- */
15
- /** Translate the adapter's model alias to the SDK's `speechModel` value. */
16
- function resolveSpeechModel(model) {
17
- if (model === "u3pro-rt") return "u3-rt-pro";
18
- return model;
19
- }
20
- function makeError(message) {
21
- const err = new Error(message);
22
- err.code = "stt_stream_error";
23
- return err;
24
- }
25
- function assemblyAI(opts = {}) {
26
- return {
27
- name: "assemblyai",
28
- async open(openOpts) {
29
- const apiKey = opts.apiKey ?? openOpts.apiKey ?? process.env.ASSEMBLYAI_API_KEY;
30
- if (!apiKey) {
31
- const err = /* @__PURE__ */ new Error("AssemblyAI STT adapter: missing API key. Provide via the factory option, SttOpenOptions, or the ASSEMBLYAI_API_KEY environment variable.");
32
- err.code = "stt_auth_failed";
33
- throw err;
34
- }
35
- const client = new AssemblyAI({ apiKey });
36
- const speechModel = resolveSpeechModel(opts.model ?? "u3pro-rt");
37
- const transcriber = client.streaming.transcriber({
38
- sampleRate: openOpts.sampleRate,
39
- speechModel,
40
- ...openOpts.sttPrompt ? { prompt: openOpts.sttPrompt } : {}
41
- });
42
- const emitter = createNanoEvents();
43
- let closed = false;
44
- transcriber.on("turn", (event) => {
45
- if (closed) return;
46
- const text = event.transcript ?? "";
47
- if (event.end_of_turn) {
48
- if (text.length > 0) emitter.emit("final", text);
49
- } else if (text.length > 0) emitter.emit("partial", text);
50
- });
51
- transcriber.on("error", (err) => {
52
- if (closed) return;
53
- emitter.emit("error", makeError(err?.message ?? String(err)));
54
- });
55
- transcriber.on("close", (code) => {
56
- if (closed) return;
57
- if (code !== 1e3) emitter.emit("error", makeError(`socket closed ${code}`));
58
- });
59
- try {
60
- await transcriber.connect();
61
- } catch (cause) {
62
- const err = /* @__PURE__ */ new Error(`AssemblyAI STT: connect failed: ${cause instanceof Error ? cause.message : String(cause)}`);
63
- err.code = "stt_connect_failed";
64
- throw err;
65
- }
66
- const close = async () => {
67
- if (closed) return;
68
- closed = true;
69
- try {
70
- await transcriber.close();
71
- } catch {}
72
- };
73
- if (openOpts.signal.aborted) close();
74
- else openOpts.signal.addEventListener("abort", () => void close(), { once: true });
75
- return {
76
- sendAudio(pcm) {
77
- if (closed) return;
78
- const copy = new Uint8Array(pcm.byteLength);
79
- copy.set(new Uint8Array(pcm.buffer, pcm.byteOffset, pcm.byteLength));
80
- transcriber.sendAudio(copy.buffer);
81
- },
82
- on(event, fn) {
83
- return emitter.on(event, fn);
84
- },
85
- close,
86
- _transcriber: transcriber
87
- };
88
- }
89
- };
90
- }
91
- //#endregion
92
- export { assemblyAI };