@alexkroman1/aai 1.3.2 → 1.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.turbo/turbo-build.log +18 -14
- package/CHANGELOG.md +2 -0
- package/dist/_internal-types-3p3OJZPb.js +145 -0
- package/dist/anthropic-BrUCPKUc.js +10 -0
- package/dist/assemblyai-Cxg9eobY.js +18 -0
- package/dist/cartesia-DwDk2tEu.js +10 -0
- package/dist/host/_pipeline-test-fakes.d.ts +5 -5
- package/dist/host/pipeline-session.d.ts +5 -5
- package/dist/host/providers/resolve.d.ts +34 -0
- package/dist/host/providers/stt/assemblyai.d.ts +9 -18
- package/dist/host/providers/tts/cartesia.d.ts +11 -18
- package/dist/host/runtime-barrel.js +345 -42
- package/dist/host/runtime.d.ts +13 -9
- package/dist/index.js +2 -91
- package/dist/sdk/_internal-types.d.ts +27 -1
- package/dist/sdk/manifest-barrel.d.ts +2 -0
- package/dist/sdk/manifest-barrel.js +2 -2
- package/dist/sdk/manifest.d.ts +13 -2
- package/dist/sdk/protocol.d.ts +3 -3
- package/dist/sdk/protocol.js +1 -1
- package/dist/sdk/providers/llm/anthropic.d.ts +23 -0
- package/dist/sdk/providers/llm-barrel.d.ts +9 -0
- package/dist/sdk/providers/llm-barrel.js +2 -0
- package/dist/sdk/providers/stt/assemblyai.d.ts +30 -0
- package/dist/sdk/providers/stt-barrel.d.ts +9 -0
- package/dist/sdk/providers/stt-barrel.js +2 -0
- package/dist/sdk/providers/tts/cartesia.d.ts +23 -0
- package/dist/sdk/providers/tts-barrel.d.ts +9 -0
- package/dist/sdk/providers/tts-barrel.js +2 -0
- package/dist/sdk/providers.d.ts +59 -11
- package/dist/types-KUgezM6u.js +128 -0
- package/host/_pipeline-test-fakes.ts +6 -6
- package/host/integration/pipeline-reference.integration.test.ts +4 -4
- package/host/pipeline-session.ts +6 -6
- package/host/providers/providers.test-d.ts +19 -10
- package/host/providers/resolve.ts +87 -0
- package/host/providers/stt/assemblyai.test.ts +2 -2
- package/host/providers/stt/assemblyai.ts +25 -47
- package/host/providers/tts/cartesia.test.ts +2 -2
- package/host/providers/tts/cartesia.ts +43 -73
- package/host/runtime.ts +66 -39
- package/package.json +13 -7
- package/sdk/__snapshots__/exports.test.ts.snap +2 -0
- package/sdk/__snapshots__/schema-shapes.test.ts.snap +4 -0
- package/sdk/_internal-types.ts +28 -1
- package/sdk/define.test.ts +12 -10
- package/sdk/manifest-barrel.ts +2 -0
- package/sdk/manifest.test.ts +6 -3
- package/sdk/manifest.ts +26 -18
- package/sdk/providers/llm/anthropic.ts +31 -0
- package/sdk/providers/llm-barrel.ts +12 -0
- package/sdk/providers/stt/assemblyai.ts +38 -0
- package/sdk/providers/stt-barrel.ts +12 -0
- package/sdk/providers/tts/cartesia.ts +31 -0
- package/sdk/providers/tts-barrel.ts +12 -0
- package/sdk/providers.ts +81 -17
- package/dist/_internal-types-CoDTiBd1.js +0 -61
- package/dist/host/providers/llm.d.ts +0 -2
- package/dist/host/providers/stt-barrel.d.ts +0 -8
- package/dist/host/providers/stt-barrel.js +0 -92
- package/dist/host/providers/stt.d.ts +0 -2
- package/dist/host/providers/tts-barrel.d.ts +0 -8
- package/dist/host/providers/tts-barrel.js +0 -182
- package/dist/host/providers/tts.d.ts +0 -2
- package/dist/types-Cfx_4QDK.js +0 -39
- package/host/providers/llm.ts +0 -3
- package/host/providers/stt-barrel.ts +0 -13
- package/host/providers/stt.ts +0 -3
- package/host/providers/tts-barrel.ts +0 -13
- package/host/providers/tts.ts +0 -3
- /package/dist/{constants-BL3nvg4I.js → constants-C2nirZUI.js} +0 -0
package/sdk/_internal-types.ts
CHANGED
|
@@ -7,6 +7,13 @@
|
|
|
7
7
|
|
|
8
8
|
import type { JSONSchema7 } from "json-schema";
|
|
9
9
|
import { z } from "zod";
|
|
10
|
+
import { ProviderDescriptorSchema } from "./manifest.ts";
|
|
11
|
+
import {
|
|
12
|
+
assertProviderTriple,
|
|
13
|
+
type LlmProvider,
|
|
14
|
+
type SttProvider,
|
|
15
|
+
type TtsProvider,
|
|
16
|
+
} from "./providers.ts";
|
|
10
17
|
import type { Message } from "./types.ts";
|
|
11
18
|
import { BuiltinToolSchema, ToolChoiceSchema, type ToolDef } from "./types.ts";
|
|
12
19
|
|
|
@@ -56,6 +63,10 @@ export const AgentConfigSchema = z.object({
|
|
|
56
63
|
toolChoice: ToolChoiceSchema.optional(),
|
|
57
64
|
builtinTools: z.array(BuiltinToolSchema).readonly().optional(),
|
|
58
65
|
idleTimeoutMs: z.number().nonnegative().optional(),
|
|
66
|
+
stt: ProviderDescriptorSchema.optional(),
|
|
67
|
+
llm: ProviderDescriptorSchema.optional(),
|
|
68
|
+
tts: ProviderDescriptorSchema.optional(),
|
|
69
|
+
mode: z.enum(["s2s", "pipeline"]).optional(),
|
|
59
70
|
});
|
|
60
71
|
|
|
61
72
|
/** Serializable agent configuration — derived from {@link AgentConfigSchema}. */
|
|
@@ -75,9 +86,18 @@ export interface AgentConfigSource {
|
|
|
75
86
|
toolChoice?: AgentConfig["toolChoice"] | undefined;
|
|
76
87
|
builtinTools?: Readonly<AgentConfig["builtinTools"]> | undefined;
|
|
77
88
|
idleTimeoutMs?: number | undefined;
|
|
89
|
+
stt?: SttProvider | undefined;
|
|
90
|
+
llm?: LlmProvider | undefined;
|
|
91
|
+
tts?: TtsProvider | undefined;
|
|
78
92
|
}
|
|
79
93
|
|
|
80
|
-
/**
|
|
94
|
+
/**
|
|
95
|
+
* Extract the serializable {@link AgentConfig} subset from a source object.
|
|
96
|
+
*
|
|
97
|
+
* When `stt`, `llm`, and `tts` descriptors are present they are all three
|
|
98
|
+
* required (or none) — enforced here so the server can trust the config.
|
|
99
|
+
* `mode` is derived from their presence.
|
|
100
|
+
*/
|
|
81
101
|
export function toAgentConfig(src: AgentConfigSource): AgentConfig {
|
|
82
102
|
const config: AgentConfig = {
|
|
83
103
|
name: src.name,
|
|
@@ -89,6 +109,13 @@ export function toAgentConfig(src: AgentConfigSource): AgentConfig {
|
|
|
89
109
|
if (src.toolChoice !== undefined) config.toolChoice = src.toolChoice;
|
|
90
110
|
if (src.builtinTools) config.builtinTools = [...src.builtinTools];
|
|
91
111
|
if (src.idleTimeoutMs !== undefined) config.idleTimeoutMs = src.idleTimeoutMs;
|
|
112
|
+
|
|
113
|
+
config.mode = assertProviderTriple(src.stt, src.llm, src.tts);
|
|
114
|
+
if (config.mode === "pipeline") {
|
|
115
|
+
config.stt = src.stt;
|
|
116
|
+
config.llm = src.llm;
|
|
117
|
+
config.tts = src.tts;
|
|
118
|
+
}
|
|
92
119
|
return config;
|
|
93
120
|
}
|
|
94
121
|
|
package/sdk/define.test.ts
CHANGED
|
@@ -3,7 +3,9 @@ import { describe, expect, test } from "vitest";
|
|
|
3
3
|
import { z } from "zod";
|
|
4
4
|
import { agent, tool } from "./define.ts";
|
|
5
5
|
import { parseManifest } from "./manifest.ts";
|
|
6
|
-
import
|
|
6
|
+
import { anthropic } from "./providers/llm/anthropic.ts";
|
|
7
|
+
import { assemblyAI } from "./providers/stt/assemblyai.ts";
|
|
8
|
+
import { cartesia } from "./providers/tts/cartesia.ts";
|
|
7
9
|
|
|
8
10
|
describe("tool()", () => {
|
|
9
11
|
test("returns the definition unchanged", () => {
|
|
@@ -58,9 +60,9 @@ describe("agent()", () => {
|
|
|
58
60
|
});
|
|
59
61
|
|
|
60
62
|
test("preserves stt/llm/tts providers on the returned def", () => {
|
|
61
|
-
const stt = {
|
|
62
|
-
const tts = {
|
|
63
|
-
const llm = {
|
|
63
|
+
const stt = assemblyAI({ model: "u3pro-rt" });
|
|
64
|
+
const tts = cartesia({ voice: "v" });
|
|
65
|
+
const llm = anthropic({ model: "claude-haiku-4-5" });
|
|
64
66
|
const def = agent({ name: "t", systemPrompt: "p", stt, llm, tts });
|
|
65
67
|
expect(def.stt).toBe(stt);
|
|
66
68
|
expect(def.llm).toBe(llm);
|
|
@@ -68,15 +70,15 @@ describe("agent()", () => {
|
|
|
68
70
|
});
|
|
69
71
|
|
|
70
72
|
test("stt/llm/tts flow through parseManifest to mode 'pipeline'", () => {
|
|
71
|
-
const stt = {
|
|
72
|
-
const tts = {
|
|
73
|
-
const llm = {
|
|
73
|
+
const stt = assemblyAI({ model: "u3pro-rt" });
|
|
74
|
+
const tts = cartesia({ voice: "v" });
|
|
75
|
+
const llm = anthropic({ model: "claude-haiku-4-5" });
|
|
74
76
|
const def = agent({ name: "t", systemPrompt: "p", stt, llm, tts });
|
|
75
77
|
const parsed = parseManifest(def);
|
|
76
78
|
expect(parsed.mode).toBe("pipeline");
|
|
77
|
-
expect(parsed.stt).
|
|
78
|
-
expect(parsed.llm).
|
|
79
|
-
expect(parsed.tts).
|
|
79
|
+
expect(parsed.stt).toStrictEqual(stt);
|
|
80
|
+
expect(parsed.llm).toStrictEqual(llm);
|
|
81
|
+
expect(parsed.tts).toStrictEqual(tts);
|
|
80
82
|
});
|
|
81
83
|
|
|
82
84
|
test("agent without providers resolves to mode 's2s'", () => {
|
package/sdk/manifest-barrel.ts
CHANGED
package/sdk/manifest.test.ts
CHANGED
|
@@ -4,6 +4,9 @@ import { describe, expect, expectTypeOf, test } from "vitest";
|
|
|
4
4
|
import { type Manifest, parseManifest } from "./manifest.ts";
|
|
5
5
|
import type { AgentConfig, ToolSchema } from "./manifest-barrel.ts";
|
|
6
6
|
import { agentToolsToSchemas, toAgentConfig } from "./manifest-barrel.ts";
|
|
7
|
+
import { anthropic } from "./providers/llm/anthropic.ts";
|
|
8
|
+
import { assemblyAI } from "./providers/stt/assemblyai.ts";
|
|
9
|
+
import { cartesia } from "./providers/tts/cartesia.ts";
|
|
7
10
|
|
|
8
11
|
describe("parseManifest", () => {
|
|
9
12
|
test("minimal manifest requires only name", () => {
|
|
@@ -164,9 +167,9 @@ describe("manifest type contracts", () => {
|
|
|
164
167
|
});
|
|
165
168
|
|
|
166
169
|
describe("parseManifest — mode classification", () => {
|
|
167
|
-
const stubStt = {
|
|
168
|
-
const stubTts = {
|
|
169
|
-
const stubLlm = {
|
|
170
|
+
const stubStt = assemblyAI({ model: "u3pro-rt" });
|
|
171
|
+
const stubTts = cartesia({ voice: "v" });
|
|
172
|
+
const stubLlm = anthropic({ model: "claude-haiku-4-5" });
|
|
170
173
|
|
|
171
174
|
test("no stt/llm/tts ⇒ mode: 's2s'", () => {
|
|
172
175
|
const parsed = parseManifest({
|
package/sdk/manifest.ts
CHANGED
|
@@ -8,7 +8,13 @@
|
|
|
8
8
|
|
|
9
9
|
import { z } from "zod";
|
|
10
10
|
import { validateAllowedHostPattern } from "./allowed-hosts.ts";
|
|
11
|
-
import
|
|
11
|
+
import {
|
|
12
|
+
assertProviderTriple,
|
|
13
|
+
type LlmProvider,
|
|
14
|
+
type SessionMode,
|
|
15
|
+
type SttProvider,
|
|
16
|
+
type TtsProvider,
|
|
17
|
+
} from "./providers.ts";
|
|
12
18
|
import { BuiltinToolSchema, DEFAULT_GREETING, DEFAULT_SYSTEM_PROMPT } from "./types.ts";
|
|
13
19
|
|
|
14
20
|
/**
|
|
@@ -67,7 +73,7 @@ export type Manifest = {
|
|
|
67
73
|
* - `"s2s"` (default): AssemblyAI Streaming Speech-to-Speech path (no stt/llm/tts set).
|
|
68
74
|
* - `"pipeline"`: pluggable STT → LLM → TTS path (stt + llm + tts all set).
|
|
69
75
|
*/
|
|
70
|
-
mode:
|
|
76
|
+
mode: SessionMode;
|
|
71
77
|
};
|
|
72
78
|
|
|
73
79
|
const ToolManifestSchema = z.object({
|
|
@@ -75,6 +81,17 @@ const ToolManifestSchema = z.object({
|
|
|
75
81
|
parameters: z.record(z.string(), z.unknown()).optional(),
|
|
76
82
|
});
|
|
77
83
|
|
|
84
|
+
/**
|
|
85
|
+
* Provider descriptor — a `{ kind, options }` pair produced by factories
|
|
86
|
+
* like `assemblyAI(...)` / `anthropic(...)` / `cartesia(...)`. Kept
|
|
87
|
+
* deliberately generic at the schema layer: kind-specific validation lives
|
|
88
|
+
* in the host-side resolver, which knows what each adapter expects.
|
|
89
|
+
*/
|
|
90
|
+
export const ProviderDescriptorSchema = z.object({
|
|
91
|
+
kind: z.string().min(1),
|
|
92
|
+
options: z.record(z.string(), z.unknown()),
|
|
93
|
+
});
|
|
94
|
+
|
|
78
95
|
const ManifestSchema = z.object({
|
|
79
96
|
name: z.string().min(1),
|
|
80
97
|
systemPrompt: z.string().optional(),
|
|
@@ -101,6 +118,9 @@ const ManifestSchema = z.object({
|
|
|
101
118
|
}
|
|
102
119
|
}
|
|
103
120
|
}),
|
|
121
|
+
stt: ProviderDescriptorSchema.optional(),
|
|
122
|
+
llm: ProviderDescriptorSchema.optional(),
|
|
123
|
+
tts: ProviderDescriptorSchema.optional(),
|
|
104
124
|
});
|
|
105
125
|
|
|
106
126
|
/**
|
|
@@ -114,19 +134,7 @@ const ManifestSchema = z.object({
|
|
|
114
134
|
*/
|
|
115
135
|
export function parseManifest(input: unknown): Manifest {
|
|
116
136
|
const parsed = ManifestSchema.parse(input);
|
|
117
|
-
|
|
118
|
-
// validated by the JSON-oriented Zod schema. Pull them straight from
|
|
119
|
-
// the raw input and enforce all-or-nothing presence here.
|
|
120
|
-
const raw = (input ?? {}) as {
|
|
121
|
-
stt?: SttProvider;
|
|
122
|
-
llm?: LlmProvider;
|
|
123
|
-
tts?: TtsProvider;
|
|
124
|
-
};
|
|
125
|
-
const providerCount = [raw.stt, raw.llm, raw.tts].filter((x) => x != null).length;
|
|
126
|
-
if (providerCount !== 0 && providerCount !== 3) {
|
|
127
|
-
throw new Error("stt, llm, and tts must be set together");
|
|
128
|
-
}
|
|
129
|
-
const mode: "s2s" | "pipeline" = providerCount === 3 ? "pipeline" : "s2s";
|
|
137
|
+
const mode = assertProviderTriple(parsed.stt, parsed.llm, parsed.tts);
|
|
130
138
|
return {
|
|
131
139
|
name: parsed.name,
|
|
132
140
|
systemPrompt: parsed.systemPrompt ?? DEFAULT_SYSTEM_PROMPT,
|
|
@@ -139,9 +147,9 @@ export function parseManifest(input: unknown): Manifest {
|
|
|
139
147
|
theme: parsed.theme,
|
|
140
148
|
tools: parsed.tools ?? {},
|
|
141
149
|
allowedHosts: parsed.allowedHosts ?? [],
|
|
142
|
-
stt:
|
|
143
|
-
llm:
|
|
144
|
-
tts:
|
|
150
|
+
stt: parsed.stt,
|
|
151
|
+
llm: parsed.llm,
|
|
152
|
+
tts: parsed.tts,
|
|
145
153
|
mode,
|
|
146
154
|
};
|
|
147
155
|
}
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
// Copyright 2025 the AAI authors. MIT license.
|
|
2
|
+
/**
|
|
3
|
+
* Anthropic LLM factory — returns a pure descriptor.
|
|
4
|
+
*
|
|
5
|
+
* Users call this in place of importing from `@ai-sdk/anthropic` directly,
|
|
6
|
+
* so agent bundles don't drag the Anthropic SDK into the guest sandbox
|
|
7
|
+
* (which has no `--allow-env` permission and would crash on the SDK's
|
|
8
|
+
* eager `ANTHROPIC_BASE_URL` read).
|
|
9
|
+
*
|
|
10
|
+
* The host-side resolver in `host/providers/resolve.ts` builds a real
|
|
11
|
+
* Vercel AI SDK `LanguageModel` from this descriptor during
|
|
12
|
+
* `createRuntime`, using `ANTHROPIC_API_KEY` from the agent's env.
|
|
13
|
+
*/
|
|
14
|
+
|
|
15
|
+
import type { LlmProvider } from "../../providers.ts";
|
|
16
|
+
|
|
17
|
+
export const ANTHROPIC_KIND = "anthropic" as const;
|
|
18
|
+
|
|
19
|
+
export interface AnthropicOptions {
|
|
20
|
+
/** Anthropic model id, e.g. `"claude-haiku-4-5"`. */
|
|
21
|
+
model: string;
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
export type AnthropicProvider = LlmProvider & {
|
|
25
|
+
readonly kind: typeof ANTHROPIC_KIND;
|
|
26
|
+
readonly options: AnthropicOptions;
|
|
27
|
+
};
|
|
28
|
+
|
|
29
|
+
export function anthropic(opts: AnthropicOptions): AnthropicProvider {
|
|
30
|
+
return { kind: ANTHROPIC_KIND, options: { ...opts } };
|
|
31
|
+
}
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
// Copyright 2025 the AAI authors. MIT license.
|
|
2
|
+
/**
|
|
3
|
+
* `@alexkroman1/aai/llm` subpath barrel.
|
|
4
|
+
*
|
|
5
|
+
* Re-exports LLM provider factories. Users import from here instead of
|
|
6
|
+
* `@ai-sdk/anthropic` directly so the agent bundle stays free of eager
|
|
7
|
+
* env reads and other SDK side-effects.
|
|
8
|
+
*/
|
|
9
|
+
|
|
10
|
+
export type { LlmProvider } from "../providers.ts";
|
|
11
|
+
// biome-ignore lint/performance/noReExportAll: subpath barrel
|
|
12
|
+
export * from "./llm/anthropic.ts";
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
// Copyright 2025 the AAI authors. MIT license.
|
|
2
|
+
/**
|
|
3
|
+
* AssemblyAI Universal-Streaming STT factory — returns a pure descriptor.
|
|
4
|
+
*
|
|
5
|
+
* The descriptor flows through the bundle → server → runtime pipeline
|
|
6
|
+
* without importing the `assemblyai` SDK. The host-side resolver in
|
|
7
|
+
* `host/providers/resolve.ts` turns it into an openable {@link SttOpener}
|
|
8
|
+
* during `createRuntime`.
|
|
9
|
+
*/
|
|
10
|
+
|
|
11
|
+
import type { SttProvider } from "../../providers.ts";
|
|
12
|
+
|
|
13
|
+
/** Kind tag recognised by the host-side resolver. */
|
|
14
|
+
export const ASSEMBLYAI_KIND = "assemblyai" as const;
|
|
15
|
+
|
|
16
|
+
export interface AssemblyAIOptions {
|
|
17
|
+
/**
|
|
18
|
+
* Streaming speech model. Defaults to `"u3pro-rt"` (Universal-3 Pro
|
|
19
|
+
* Real-Time). Arbitrary strings are forwarded to the SDK unchanged.
|
|
20
|
+
*/
|
|
21
|
+
model?: "u3pro-rt" | string;
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
export type AssemblyAIProvider = SttProvider & {
|
|
25
|
+
readonly kind: typeof ASSEMBLYAI_KIND;
|
|
26
|
+
readonly options: AssemblyAIOptions;
|
|
27
|
+
};
|
|
28
|
+
|
|
29
|
+
/**
|
|
30
|
+
* Build an AssemblyAI STT descriptor.
|
|
31
|
+
*
|
|
32
|
+
* The API key is resolved host-side from the agent's env
|
|
33
|
+
* (`ASSEMBLYAI_API_KEY`); there is no factory-time key parameter, so the
|
|
34
|
+
* descriptor stays free of secrets and safe to serialize.
|
|
35
|
+
*/
|
|
36
|
+
export function assemblyAI(opts: AssemblyAIOptions = {}): AssemblyAIProvider {
|
|
37
|
+
return { kind: ASSEMBLYAI_KIND, options: { ...opts } };
|
|
38
|
+
}
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
// Copyright 2025 the AAI authors. MIT license.
|
|
2
|
+
/**
|
|
3
|
+
* `@alexkroman1/aai/stt` subpath barrel.
|
|
4
|
+
*
|
|
5
|
+
* Re-exports the descriptor factory (`assemblyAI`) and the shared STT
|
|
6
|
+
* contract types. Importing this barrel does not pull in the `assemblyai`
|
|
7
|
+
* SDK — that happens only when the host resolver is invoked.
|
|
8
|
+
*/
|
|
9
|
+
|
|
10
|
+
export type { SttError, SttEvents, SttOpenOptions, SttProvider, SttSession } from "../providers.ts";
|
|
11
|
+
// biome-ignore lint/performance/noReExportAll: subpath barrel
|
|
12
|
+
export * from "./stt/assemblyai.ts";
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
// Copyright 2025 the AAI authors. MIT license.
|
|
2
|
+
/**
|
|
3
|
+
* Cartesia TTS factory — returns a pure descriptor.
|
|
4
|
+
*
|
|
5
|
+
* See `sdk/providers/stt/assemblyai.ts` for the descriptor/opener split;
|
|
6
|
+
* the host-side resolver in `host/providers/resolve.ts` turns this into an
|
|
7
|
+
* openable {@link TtsOpener} during `createRuntime` using the
|
|
8
|
+
* `CARTESIA_API_KEY` from the agent's env.
|
|
9
|
+
*/
|
|
10
|
+
|
|
11
|
+
import type { TtsProvider } from "../../providers.ts";
|
|
12
|
+
|
|
13
|
+
export const CARTESIA_KIND = "cartesia" as const;
|
|
14
|
+
|
|
15
|
+
export interface CartesiaOptions {
|
|
16
|
+
/** Cartesia voice ID. Required. */
|
|
17
|
+
voice: string;
|
|
18
|
+
/** Model ID. Defaults to `"sonic-2"`. */
|
|
19
|
+
model?: string;
|
|
20
|
+
/** Spoken language hint. Defaults to `"en"`. */
|
|
21
|
+
language?: string;
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
export type CartesiaProvider = TtsProvider & {
|
|
25
|
+
readonly kind: typeof CARTESIA_KIND;
|
|
26
|
+
readonly options: CartesiaOptions;
|
|
27
|
+
};
|
|
28
|
+
|
|
29
|
+
export function cartesia(opts: CartesiaOptions): CartesiaProvider {
|
|
30
|
+
return { kind: CARTESIA_KIND, options: { ...opts } };
|
|
31
|
+
}
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
// Copyright 2025 the AAI authors. MIT license.
|
|
2
|
+
/**
|
|
3
|
+
* `@alexkroman1/aai/tts` subpath barrel.
|
|
4
|
+
*
|
|
5
|
+
* Re-exports the descriptor factory (`cartesia`) and the shared TTS
|
|
6
|
+
* contract types. Does not pull in `@cartesia/cartesia-js` — the host
|
|
7
|
+
* resolver handles that at session start.
|
|
8
|
+
*/
|
|
9
|
+
|
|
10
|
+
export type { TtsError, TtsEvents, TtsOpenOptions, TtsProvider, TtsSession } from "../providers.ts";
|
|
11
|
+
// biome-ignore lint/performance/noReExportAll: subpath barrel
|
|
12
|
+
export * from "./tts/cartesia.ts";
|
package/sdk/providers.ts
CHANGED
|
@@ -1,25 +1,87 @@
|
|
|
1
1
|
// Copyright 2025 the AAI authors. MIT license.
|
|
2
2
|
/**
|
|
3
|
-
* Pluggable provider
|
|
4
|
-
* SDKs, plus the LLM provider type.
|
|
3
|
+
* Pluggable provider contracts.
|
|
5
4
|
*
|
|
6
|
-
*
|
|
7
|
-
*
|
|
8
|
-
*
|
|
9
|
-
* `
|
|
5
|
+
* **Two layers, strict boundary.**
|
|
6
|
+
*
|
|
7
|
+
* - The *descriptor* layer (`SttProvider` / `LlmProvider` / `TtsProvider`) is
|
|
8
|
+
* pure data — `{ kind, options }` objects returned by the user-facing
|
|
9
|
+
* factories (`assemblyAI(...)`, `anthropic(...)`, `cartesia(...)`). They
|
|
10
|
+
* are JSON-serializable, contain no functions, and can cross the CLI →
|
|
11
|
+
* server → guest boundary without evaluating any third-party SDK.
|
|
12
|
+
* They live in `sdk/` alongside `Manifest` and have zero Node-only deps.
|
|
13
|
+
*
|
|
14
|
+
* - The *openable* layer (`SttOpener` / `TtsOpener` + `SttSession` /
|
|
15
|
+
* `TtsSession`) is host-only. The host's internal
|
|
16
|
+
* `host/providers/resolve.ts` registry turns descriptors into openers
|
|
17
|
+
* during `createRuntime`, importing the concrete SDKs (`assemblyai`,
|
|
18
|
+
* `@cartesia/cartesia-js`, `@ai-sdk/anthropic`) only at that point.
|
|
19
|
+
* Only the openable layer talks to the network; descriptors never do.
|
|
20
|
+
*
|
|
21
|
+
* This split is load-bearing for the sandboxed deployment path: the guest
|
|
22
|
+
* Deno sandbox can import `@alexkroman1/aai/{stt,tts,llm}` without pulling
|
|
23
|
+
* in any AI-SDK code, which means no env reads (`ANTHROPIC_BASE_URL`, etc.)
|
|
24
|
+
* at bundle load — the exact failure mode that forced this refactor.
|
|
10
25
|
*/
|
|
11
26
|
|
|
12
|
-
import type { LanguageModel } from "ai";
|
|
13
|
-
|
|
14
27
|
/** Unsubscribe callback returned by `.on()` event subscriptions. */
|
|
15
28
|
export type Unsubscribe = () => void;
|
|
16
29
|
|
|
17
|
-
// --------
|
|
30
|
+
// -------- Descriptor shape (user-facing, serializable) ----------------------
|
|
31
|
+
|
|
32
|
+
/**
|
|
33
|
+
* Base shape for a provider descriptor. A `kind` tag + opaque `options`
|
|
34
|
+
* payload lets the host registry pick the right resolver and pass the
|
|
35
|
+
* caller's options through verbatim.
|
|
36
|
+
*/
|
|
37
|
+
export interface ProviderDescriptor<Kind extends string, Options> {
|
|
38
|
+
readonly kind: Kind;
|
|
39
|
+
readonly options: Options;
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
/** Descriptor for an STT provider. Returned by factories like `assemblyAI(...)`. */
|
|
43
|
+
export type SttProvider = ProviderDescriptor<string, Record<string, unknown>>;
|
|
44
|
+
|
|
45
|
+
/** Descriptor for an LLM provider. Returned by factories like `anthropic(...)`. */
|
|
46
|
+
export type LlmProvider = ProviderDescriptor<string, Record<string, unknown>>;
|
|
47
|
+
|
|
48
|
+
/** Descriptor for a TTS provider. Returned by factories like `cartesia(...)`. */
|
|
49
|
+
export type TtsProvider = ProviderDescriptor<string, Record<string, unknown>>;
|
|
50
|
+
|
|
51
|
+
/**
|
|
52
|
+
* Session mode derived from which provider triple is set.
|
|
53
|
+
*
|
|
54
|
+
* `parseManifest`, `toAgentConfig`, `createRuntime`, and the server's
|
|
55
|
+
* `IsolateConfigSchema` all use {@link assertProviderTriple} so there's
|
|
56
|
+
* one source of truth for the validation.
|
|
57
|
+
*/
|
|
58
|
+
export type SessionMode = "s2s" | "pipeline";
|
|
59
|
+
|
|
60
|
+
/**
|
|
61
|
+
* Enforce the all-or-nothing provider rule and return the derived mode.
|
|
62
|
+
*
|
|
63
|
+
* Pipeline mode requires STT, LLM, and TTS all set; S2S mode requires
|
|
64
|
+
* none of them. Anything in-between is a configuration error.
|
|
65
|
+
*/
|
|
66
|
+
export function assertProviderTriple(stt: unknown, llm: unknown, tts: unknown): SessionMode {
|
|
67
|
+
const count = (stt != null ? 1 : 0) + (llm != null ? 1 : 0) + (tts != null ? 1 : 0);
|
|
68
|
+
if (count !== 0 && count !== 3) {
|
|
69
|
+
throw new Error("stt, llm, and tts must be set together");
|
|
70
|
+
}
|
|
71
|
+
return count === 3 ? "pipeline" : "s2s";
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
// -------- STT openable (host-only) ------------------------------------------
|
|
18
75
|
|
|
19
76
|
export interface SttError extends Error {
|
|
20
77
|
readonly code: "stt_connect_failed" | "stt_auth_failed" | "stt_stream_error";
|
|
21
78
|
}
|
|
22
79
|
|
|
80
|
+
/** Build an {@link SttError} with a typed `code`. Zero-dep helper so both sdk/ and host/ can use it. */
|
|
81
|
+
export function makeSttError(code: SttError["code"], message: string): SttError {
|
|
82
|
+
return Object.assign(new Error(message), { code }) as SttError;
|
|
83
|
+
}
|
|
84
|
+
|
|
23
85
|
export type SttEvents = {
|
|
24
86
|
/** Interim transcript; drives barge-in detection. */
|
|
25
87
|
partial: (text: string) => void;
|
|
@@ -42,17 +104,23 @@ export interface SttOpenOptions {
|
|
|
42
104
|
signal: AbortSignal;
|
|
43
105
|
}
|
|
44
106
|
|
|
45
|
-
|
|
107
|
+
/** Host-side openable STT provider — produced by `resolveStt(descriptor)`. */
|
|
108
|
+
export interface SttOpener {
|
|
46
109
|
readonly name: string;
|
|
47
110
|
open(opts: SttOpenOptions): Promise<SttSession>;
|
|
48
111
|
}
|
|
49
112
|
|
|
50
|
-
// -------- TTS
|
|
113
|
+
// -------- TTS openable (host-only) ------------------------------------------
|
|
51
114
|
|
|
52
115
|
export interface TtsError extends Error {
|
|
53
116
|
readonly code: "tts_connect_failed" | "tts_auth_failed" | "tts_stream_error";
|
|
54
117
|
}
|
|
55
118
|
|
|
119
|
+
/** Build a {@link TtsError} with a typed `code`. Mirror of {@link makeSttError}. */
|
|
120
|
+
export function makeTtsError(code: TtsError["code"], message: string): TtsError {
|
|
121
|
+
return Object.assign(new Error(message), { code }) as TtsError;
|
|
122
|
+
}
|
|
123
|
+
|
|
56
124
|
export type TtsEvents = {
|
|
57
125
|
/** One PCM16 audio chunk. Orchestrator forwards to the client. */
|
|
58
126
|
audio: (pcm: Int16Array) => void;
|
|
@@ -79,12 +147,8 @@ export interface TtsOpenOptions {
|
|
|
79
147
|
signal: AbortSignal;
|
|
80
148
|
}
|
|
81
149
|
|
|
82
|
-
|
|
150
|
+
/** Host-side openable TTS provider — produced by `resolveTts(descriptor)`. */
|
|
151
|
+
export interface TtsOpener {
|
|
83
152
|
readonly name: string;
|
|
84
153
|
open(opts: TtsOpenOptions): Promise<TtsSession>;
|
|
85
154
|
}
|
|
86
|
-
|
|
87
|
-
// -------- LLM --------
|
|
88
|
-
|
|
89
|
-
/** LLM provider — Vercel AI SDK's `LanguageModel`; no wrapping. */
|
|
90
|
-
export type LlmProvider = LanguageModel;
|
|
@@ -1,61 +0,0 @@
|
|
|
1
|
-
import { i as ToolChoiceSchema, t as BuiltinToolSchema } from "./types-Cfx_4QDK.js";
|
|
2
|
-
import { z } from "zod";
|
|
3
|
-
//#region sdk/_internal-types.ts
|
|
4
|
-
/**
|
|
5
|
-
* Zod schema for serializable agent configuration sent over the wire.
|
|
6
|
-
*
|
|
7
|
-
* This is the JSON-safe subset of the agent definition that can be
|
|
8
|
-
* transmitted between the worker and the host process via structured clone.
|
|
9
|
-
*/
|
|
10
|
-
const AgentConfigSchema = z.object({
|
|
11
|
-
name: z.string().min(1),
|
|
12
|
-
systemPrompt: z.string(),
|
|
13
|
-
greeting: z.string(),
|
|
14
|
-
sttPrompt: z.string().optional(),
|
|
15
|
-
maxSteps: z.number().int().positive().optional(),
|
|
16
|
-
toolChoice: ToolChoiceSchema.optional(),
|
|
17
|
-
builtinTools: z.array(BuiltinToolSchema).readonly().optional(),
|
|
18
|
-
idleTimeoutMs: z.number().nonnegative().optional()
|
|
19
|
-
});
|
|
20
|
-
/** Extract the serializable {@link AgentConfig} subset from a source object. */
|
|
21
|
-
function toAgentConfig(src) {
|
|
22
|
-
const config = {
|
|
23
|
-
name: src.name,
|
|
24
|
-
systemPrompt: src.systemPrompt,
|
|
25
|
-
greeting: src.greeting
|
|
26
|
-
};
|
|
27
|
-
if (src.sttPrompt !== void 0) config.sttPrompt = src.sttPrompt;
|
|
28
|
-
if (src.maxSteps !== void 0) config.maxSteps = src.maxSteps;
|
|
29
|
-
if (src.toolChoice !== void 0) config.toolChoice = src.toolChoice;
|
|
30
|
-
if (src.builtinTools) config.builtinTools = [...src.builtinTools];
|
|
31
|
-
if (src.idleTimeoutMs !== void 0) config.idleTimeoutMs = src.idleTimeoutMs;
|
|
32
|
-
return config;
|
|
33
|
-
}
|
|
34
|
-
/**
|
|
35
|
-
* Zod schema for serialized tool definitions sent over the wire.
|
|
36
|
-
*
|
|
37
|
-
* `parameters` must be a valid JSON Schema object (with `type`, `properties`,
|
|
38
|
-
* etc.) — the Vercel AI SDK wraps it via `jsonSchema()`.
|
|
39
|
-
*/
|
|
40
|
-
const ToolSchemaSchema = z.object({
|
|
41
|
-
name: z.string().min(1),
|
|
42
|
-
description: z.string().min(1),
|
|
43
|
-
parameters: z.record(z.string(), z.unknown())
|
|
44
|
-
});
|
|
45
|
-
/** Empty Zod object schema used as default when tools have no parameters. */
|
|
46
|
-
const EMPTY_PARAMS = z.object({});
|
|
47
|
-
/**
|
|
48
|
-
* Convert agent tool definitions to JSON Schema format for wire transport.
|
|
49
|
-
*
|
|
50
|
-
* Transforms the Zod-based `parameters` of each tool into a plain JSON Schema
|
|
51
|
-
* object suitable for structured clone / JSON serialization.
|
|
52
|
-
*/
|
|
53
|
-
function agentToolsToSchemas(tools) {
|
|
54
|
-
return Object.entries(tools).map(([name, def]) => ({
|
|
55
|
-
name,
|
|
56
|
-
description: def.description,
|
|
57
|
-
parameters: z.toJSONSchema(def.parameters ?? EMPTY_PARAMS)
|
|
58
|
-
}));
|
|
59
|
-
}
|
|
60
|
-
//#endregion
|
|
61
|
-
export { toAgentConfig as a, agentToolsToSchemas as i, EMPTY_PARAMS as n, ToolSchemaSchema as r, AgentConfigSchema as t };
|
|
@@ -1,8 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* `@alexkroman1/aai/stt` subpath barrel. Re-exports the STT provider
|
|
3
|
-
* contract types (via `stt.ts` → `sdk/providers.ts`) alongside the
|
|
4
|
-
* concrete AssemblyAI adapter factory. Task 9 owns wiring this file
|
|
5
|
-
* into `package.json` exports.
|
|
6
|
-
*/
|
|
7
|
-
export * from "./stt/assemblyai.ts";
|
|
8
|
-
export type * from "./stt.ts";
|
|
@@ -1,92 +0,0 @@
|
|
|
1
|
-
import { createNanoEvents } from "nanoevents";
|
|
2
|
-
import { AssemblyAI } from "assemblyai";
|
|
3
|
-
//#region host/providers/stt/assemblyai.ts
|
|
4
|
-
/**
|
|
5
|
-
* AssemblyAI Universal-Streaming STT adapter.
|
|
6
|
-
*
|
|
7
|
-
* Wraps the `assemblyai` Node SDK's {@link StreamingTranscriber} and
|
|
8
|
-
* normalizes its event surface onto the {@link SttProvider} /
|
|
9
|
-
* {@link SttEvents} contract consumed by the pipeline orchestrator.
|
|
10
|
-
*
|
|
11
|
-
* Default model: `"u3pro-rt"` (Universal-3 Pro Real-Time). The adapter
|
|
12
|
-
* maps that to the SDK's `"u3-rt-pro"` `speechModel` value; any other
|
|
13
|
-
* string is forwarded verbatim.
|
|
14
|
-
*/
|
|
15
|
-
/** Translate the adapter's model alias to the SDK's `speechModel` value. */
|
|
16
|
-
function resolveSpeechModel(model) {
|
|
17
|
-
if (model === "u3pro-rt") return "u3-rt-pro";
|
|
18
|
-
return model;
|
|
19
|
-
}
|
|
20
|
-
function makeError(message) {
|
|
21
|
-
const err = new Error(message);
|
|
22
|
-
err.code = "stt_stream_error";
|
|
23
|
-
return err;
|
|
24
|
-
}
|
|
25
|
-
function assemblyAI(opts = {}) {
|
|
26
|
-
return {
|
|
27
|
-
name: "assemblyai",
|
|
28
|
-
async open(openOpts) {
|
|
29
|
-
const apiKey = opts.apiKey ?? openOpts.apiKey ?? process.env.ASSEMBLYAI_API_KEY;
|
|
30
|
-
if (!apiKey) {
|
|
31
|
-
const err = /* @__PURE__ */ new Error("AssemblyAI STT adapter: missing API key. Provide via the factory option, SttOpenOptions, or the ASSEMBLYAI_API_KEY environment variable.");
|
|
32
|
-
err.code = "stt_auth_failed";
|
|
33
|
-
throw err;
|
|
34
|
-
}
|
|
35
|
-
const client = new AssemblyAI({ apiKey });
|
|
36
|
-
const speechModel = resolveSpeechModel(opts.model ?? "u3pro-rt");
|
|
37
|
-
const transcriber = client.streaming.transcriber({
|
|
38
|
-
sampleRate: openOpts.sampleRate,
|
|
39
|
-
speechModel,
|
|
40
|
-
...openOpts.sttPrompt ? { prompt: openOpts.sttPrompt } : {}
|
|
41
|
-
});
|
|
42
|
-
const emitter = createNanoEvents();
|
|
43
|
-
let closed = false;
|
|
44
|
-
transcriber.on("turn", (event) => {
|
|
45
|
-
if (closed) return;
|
|
46
|
-
const text = event.transcript ?? "";
|
|
47
|
-
if (event.end_of_turn) {
|
|
48
|
-
if (text.length > 0) emitter.emit("final", text);
|
|
49
|
-
} else if (text.length > 0) emitter.emit("partial", text);
|
|
50
|
-
});
|
|
51
|
-
transcriber.on("error", (err) => {
|
|
52
|
-
if (closed) return;
|
|
53
|
-
emitter.emit("error", makeError(err?.message ?? String(err)));
|
|
54
|
-
});
|
|
55
|
-
transcriber.on("close", (code) => {
|
|
56
|
-
if (closed) return;
|
|
57
|
-
if (code !== 1e3) emitter.emit("error", makeError(`socket closed ${code}`));
|
|
58
|
-
});
|
|
59
|
-
try {
|
|
60
|
-
await transcriber.connect();
|
|
61
|
-
} catch (cause) {
|
|
62
|
-
const err = /* @__PURE__ */ new Error(`AssemblyAI STT: connect failed: ${cause instanceof Error ? cause.message : String(cause)}`);
|
|
63
|
-
err.code = "stt_connect_failed";
|
|
64
|
-
throw err;
|
|
65
|
-
}
|
|
66
|
-
const close = async () => {
|
|
67
|
-
if (closed) return;
|
|
68
|
-
closed = true;
|
|
69
|
-
try {
|
|
70
|
-
await transcriber.close();
|
|
71
|
-
} catch {}
|
|
72
|
-
};
|
|
73
|
-
if (openOpts.signal.aborted) close();
|
|
74
|
-
else openOpts.signal.addEventListener("abort", () => void close(), { once: true });
|
|
75
|
-
return {
|
|
76
|
-
sendAudio(pcm) {
|
|
77
|
-
if (closed) return;
|
|
78
|
-
const copy = new Uint8Array(pcm.byteLength);
|
|
79
|
-
copy.set(new Uint8Array(pcm.buffer, pcm.byteOffset, pcm.byteLength));
|
|
80
|
-
transcriber.sendAudio(copy.buffer);
|
|
81
|
-
},
|
|
82
|
-
on(event, fn) {
|
|
83
|
-
return emitter.on(event, fn);
|
|
84
|
-
},
|
|
85
|
-
close,
|
|
86
|
-
_transcriber: transcriber
|
|
87
|
-
};
|
|
88
|
-
}
|
|
89
|
-
};
|
|
90
|
-
}
|
|
91
|
-
//#endregion
|
|
92
|
-
export { assemblyAI };
|