@alexkroman1/aai 1.4.5 → 1.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.turbo/turbo-build.log +10 -10
- package/CHANGELOG.md +19 -0
- package/dist/{_internal-types-3p3OJZPb.js → _internal-types-DFL07G3f.js} +2 -0
- package/dist/assemblyai-C969QGi4.js +35 -0
- package/dist/cartesia-BfQPOQ7Y.js +37 -0
- package/dist/host/_pipeline-test-fakes.d.ts +3 -1
- package/dist/host/providers/stt/deepgram.d.ts +28 -0
- package/dist/host/providers/tts/cartesia.d.ts +1 -1
- package/dist/host/providers/tts/rime.d.ts +44 -0
- package/dist/host/runtime-barrel.d.ts +4 -2
- package/dist/host/runtime-barrel.js +1434 -1209
- package/dist/host/runtime.d.ts +2 -2
- package/dist/host/s2s.d.ts +16 -16
- package/dist/host/session-core.d.ts +37 -0
- package/dist/host/transports/pipeline-transport.d.ts +48 -0
- package/dist/host/transports/s2s-transport.d.ts +19 -0
- package/dist/host/transports/types.d.ts +45 -0
- package/dist/host/ws-handler.d.ts +14 -10
- package/dist/sdk/_internal-types.d.ts +2 -0
- package/dist/sdk/manifest-barrel.js +1 -1
- package/dist/sdk/protocol.d.ts +6 -5
- package/dist/sdk/providers/llm-barrel.js +1 -1
- package/dist/sdk/providers/stt/deepgram.d.ts +35 -0
- package/dist/sdk/providers/stt-barrel.d.ts +1 -0
- package/dist/sdk/providers/stt-barrel.js +2 -2
- package/dist/sdk/providers/tts/cartesia.d.ts +12 -4
- package/dist/sdk/providers/tts/rime.d.ts +42 -0
- package/dist/sdk/providers/tts-barrel.d.ts +1 -0
- package/dist/sdk/providers/tts-barrel.js +2 -2
- package/host/_pipeline-test-fakes.ts +6 -3
- package/host/_test-utils.ts +209 -128
- package/host/builtin-tools.ts +1 -0
- package/host/cleanup.test.ts +25 -298
- package/host/integration/pipeline-reference.integration.test.ts +30 -35
- package/host/providers/resolve.ts +10 -2
- package/host/providers/stt/deepgram.test.ts +229 -0
- package/host/providers/stt/deepgram.ts +172 -0
- package/host/providers/tts/cartesia.ts +7 -3
- package/host/providers/tts/rime.test.ts +251 -0
- package/host/providers/tts/rime.ts +322 -0
- package/host/runtime-barrel.ts +4 -2
- package/host/runtime.test.ts +16 -47
- package/host/runtime.ts +131 -23
- package/host/s2s.test.ts +122 -131
- package/host/s2s.ts +44 -52
- package/host/session-core.test.ts +257 -0
- package/host/session-core.ts +262 -0
- package/host/to-vercel-tools.test.ts +9 -1
- package/host/transports/pipeline-transport.test.ts +653 -0
- package/host/transports/pipeline-transport.ts +532 -0
- package/host/{fixture-replay.test.ts → transports/s2s-transport-fixtures.test.ts} +76 -106
- package/host/transports/s2s-transport.test.ts +56 -0
- package/host/transports/s2s-transport.ts +116 -0
- package/host/transports/types.test.ts +22 -0
- package/host/transports/types.ts +51 -0
- package/host/ws-handler.test.ts +324 -242
- package/host/ws-handler.ts +56 -59
- package/package.json +2 -1
- package/sdk/__snapshots__/exports.test.ts.snap +3 -3
- package/sdk/__snapshots__/schema-shapes.test.ts.snap +1 -0
- package/sdk/_internal-types.ts +3 -0
- package/sdk/protocol-compat.test.ts +8 -0
- package/sdk/protocol.ts +6 -5
- package/sdk/providers/stt/deepgram.ts +43 -0
- package/sdk/providers/stt-barrel.ts +2 -0
- package/sdk/providers/tts/cartesia.ts +15 -5
- package/sdk/providers/tts/rime.ts +52 -0
- package/sdk/providers/tts-barrel.ts +2 -0
- package/sdk/schema-alignment.test.ts +18 -6
- package/dist/assemblyai-Cxg9eobY.js +0 -18
- package/dist/cartesia-DwDk2tEu.js +0 -10
- package/dist/host/pipeline-session-ctx.d.ts +0 -24
- package/dist/host/pipeline-session.d.ts +0 -52
- package/dist/host/session-ctx.d.ts +0 -73
- package/dist/host/session.d.ts +0 -62
- package/host/pipeline-session-ctx.test.ts +0 -31
- package/host/pipeline-session-ctx.ts +0 -36
- package/host/pipeline-session.test.ts +0 -672
- package/host/pipeline-session.ts +0 -533
- package/host/s2s-fixtures.test.ts +0 -237
- package/host/session-ctx.test.ts +0 -387
- package/host/session-ctx.ts +0 -134
- package/host/session-fixture-replay.test.ts +0 -128
- package/host/session.test.ts +0 -634
- package/host/session.ts +0 -412
- /package/dist/{anthropic-BrUCPKUc.js → anthropic-CcLZygAr.js} +0 -0
package/.turbo/turbo-build.log
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
|
|
2
|
-
> @alexkroman1/aai@1.
|
|
2
|
+
> @alexkroman1/aai@1.5.1 build /home/runner/work/agent/agent/packages/aai
|
|
3
3
|
> tsdown && tsc -p tsconfig.build.json
|
|
4
4
|
|
|
5
5
|
[34mℹ[39m [34mtsdown v0.21.7[39m powered by [38;2;255;126;23mrolldown v1.0.0-rc.12[39m
|
|
@@ -8,19 +8,19 @@
|
|
|
8
8
|
[34mℹ[39m target: [34mnode22[39m
|
|
9
9
|
[34mℹ[39m tsconfig: [34mtsconfig.json[39m
|
|
10
10
|
[34mℹ[39m Build start
|
|
11
|
-
[34mℹ[39m [2mdist/[22m[1mhost/runtime-barrel.js[22m [
|
|
11
|
+
[34mℹ[39m [2mdist/[22m[1mhost/runtime-barrel.js[22m [2m87.68 kB[22m [2m│ gzip: 25.35 kB[22m
|
|
12
12
|
[34mℹ[39m [2mdist/[22m[1msdk/protocol.js[22m [2m 4.75 kB[22m [2m│ gzip: 1.76 kB[22m
|
|
13
13
|
[34mℹ[39m [2mdist/[22m[1mindex.js[22m [2m 2.88 kB[22m [2m│ gzip: 1.24 kB[22m
|
|
14
14
|
[34mℹ[39m [2mdist/[22m[1msdk/manifest-barrel.js[22m [2m 0.36 kB[22m [2m│ gzip: 0.20 kB[22m
|
|
15
|
-
[34mℹ[39m [2mdist/[22m[1msdk/providers/
|
|
15
|
+
[34mℹ[39m [2mdist/[22m[1msdk/providers/tts-barrel.js[22m [2m 0.26 kB[22m [2m│ gzip: 0.16 kB[22m
|
|
16
|
+
[34mℹ[39m [2mdist/[22m[1msdk/providers/stt-barrel.js[22m [2m 0.19 kB[22m [2m│ gzip: 0.14 kB[22m
|
|
16
17
|
[34mℹ[39m [2mdist/[22m[1msdk/providers/llm-barrel.js[22m [2m 0.12 kB[22m [2m│ gzip: 0.11 kB[22m
|
|
17
|
-
[34mℹ[39m [2mdist/[
|
|
18
|
+
[34mℹ[39m [2mdist/[22m_internal-types-DFL07G3f.js [2m 5.66 kB[22m [2m│ gzip: 2.10 kB[22m
|
|
18
19
|
[34mℹ[39m [2mdist/[22mtypes-KUgezM6u.js [2m 5.64 kB[22m [2m│ gzip: 2.39 kB[22m
|
|
19
|
-
[34mℹ[39m [2mdist/[22m_internal-types-3p3OJZPb.js [2m 5.61 kB[22m [2m│ gzip: 2.07 kB[22m
|
|
20
20
|
[34mℹ[39m [2mdist/[22mconstants-C2nirZUI.js [2m 3.10 kB[22m [2m│ gzip: 1.38 kB[22m
|
|
21
21
|
[34mℹ[39m [2mdist/[22mws-upgrade-BeOQ7fXL.js [2m 1.14 kB[22m [2m│ gzip: 0.54 kB[22m
|
|
22
|
-
[34mℹ[39m [2mdist/[
|
|
23
|
-
[34mℹ[39m [2mdist/[
|
|
24
|
-
[34mℹ[39m [2mdist/[
|
|
25
|
-
[34mℹ[39m 14 files, total:
|
|
26
|
-
[32m✔[39m Build complete in [
|
|
22
|
+
[34mℹ[39m [2mdist/[22mcartesia-BfQPOQ7Y.js [2m 1.08 kB[22m [2m│ gzip: 0.50 kB[22m
|
|
23
|
+
[34mℹ[39m [2mdist/[22massemblyai-C969QGi4.js [2m 1.03 kB[22m [2m│ gzip: 0.42 kB[22m
|
|
24
|
+
[34mℹ[39m [2mdist/[22manthropic-CcLZygAr.js [2m 0.23 kB[22m [2m│ gzip: 0.18 kB[22m
|
|
25
|
+
[34mℹ[39m 14 files, total: 114.11 kB
|
|
26
|
+
[32m✔[39m Build complete in [32m45ms[39m
|
package/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,24 @@
|
|
|
1
1
|
# @alexkroman1/aai
|
|
2
2
|
|
|
3
|
+
## 1.5.1
|
|
4
|
+
|
|
5
|
+
### Patch Changes
|
|
6
|
+
|
|
7
|
+
- fbb3816: Add type: "function" to tool schemas in S2S session.update payload — AssemblyAI's S2S API rejects tool objects without it.
|
|
8
|
+
|
|
9
|
+
## 1.5.0
|
|
10
|
+
|
|
11
|
+
### Minor Changes
|
|
12
|
+
|
|
13
|
+
- 58c5c75: Consolidate session.ts + pipeline-session.ts into a unified SessionCore with two transport strategies (S2S, pipeline). Switch connectS2s to typed callbacks (removing the nanoevents-backed S2sHandle emitter) and flatten client→server→provider dispatch from four layers to two. Wire format is JSON text events + raw PCM16 binary audio frames — the existing public protocol is unchanged. Adds Deepgram as a pipeline-mode STT option and Rime as a pipeline-mode TTS option.
|
|
14
|
+
- 868b85e: Plumb agent maxSteps and toolChoice config into pipeline mode streamText
|
|
15
|
+
- 58c5c75: Add Deepgram as a pipeline-mode STT provider option
|
|
16
|
+
- 58c5c75: feat(aai): add Rime as a pipeline-mode TTS provider option
|
|
17
|
+
|
|
18
|
+
### Patch Changes
|
|
19
|
+
|
|
20
|
+
- a361363: Fix Rime TTS provider: correct WebSocket host (users-ws.rime.ai), JSON message protocol on /ws2, longer first-audio timeout so the greeting plays. Default voice for cartesia() and rime() so they can be called with no args.
|
|
21
|
+
|
|
3
22
|
## 1.4.5
|
|
4
23
|
|
|
5
24
|
### Patch Changes
|
|
@@ -122,6 +122,7 @@ function toAgentConfig(src) {
|
|
|
122
122
|
* etc.) — the Vercel AI SDK wraps it via `jsonSchema()`.
|
|
123
123
|
*/
|
|
124
124
|
const ToolSchemaSchema = z.object({
|
|
125
|
+
type: z.literal("function"),
|
|
125
126
|
name: z.string().min(1),
|
|
126
127
|
description: z.string().min(1),
|
|
127
128
|
parameters: z.record(z.string(), z.unknown())
|
|
@@ -136,6 +137,7 @@ const EMPTY_PARAMS = z.object({});
|
|
|
136
137
|
*/
|
|
137
138
|
function agentToolsToSchemas(tools) {
|
|
138
139
|
return Object.entries(tools).map(([name, def]) => ({
|
|
140
|
+
type: "function",
|
|
139
141
|
name,
|
|
140
142
|
description: def.description,
|
|
141
143
|
parameters: z.toJSONSchema(def.parameters ?? EMPTY_PARAMS)
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
//#region sdk/providers/stt/deepgram.ts
|
|
2
|
+
/** Kind tag recognised by the host-side resolver. */
|
|
3
|
+
const DEEPGRAM_KIND = "deepgram";
|
|
4
|
+
/**
|
|
5
|
+
* Build a Deepgram STT descriptor.
|
|
6
|
+
*
|
|
7
|
+
* The API key is resolved host-side from the agent's env
|
|
8
|
+
* (`DEEPGRAM_API_KEY`); there is no factory-time key parameter, so the
|
|
9
|
+
* descriptor stays free of secrets and safe to serialize.
|
|
10
|
+
*/
|
|
11
|
+
function deepgram(opts = {}) {
|
|
12
|
+
return {
|
|
13
|
+
kind: DEEPGRAM_KIND,
|
|
14
|
+
options: { ...opts }
|
|
15
|
+
};
|
|
16
|
+
}
|
|
17
|
+
//#endregion
|
|
18
|
+
//#region sdk/providers/stt/assemblyai.ts
|
|
19
|
+
/** Kind tag recognised by the host-side resolver. */
|
|
20
|
+
const ASSEMBLYAI_KIND = "assemblyai";
|
|
21
|
+
/**
|
|
22
|
+
* Build an AssemblyAI STT descriptor.
|
|
23
|
+
*
|
|
24
|
+
* The API key is resolved host-side from the agent's env
|
|
25
|
+
* (`ASSEMBLYAI_API_KEY`); there is no factory-time key parameter, so the
|
|
26
|
+
* descriptor stays free of secrets and safe to serialize.
|
|
27
|
+
*/
|
|
28
|
+
function assemblyAI(opts = {}) {
|
|
29
|
+
return {
|
|
30
|
+
kind: ASSEMBLYAI_KIND,
|
|
31
|
+
options: { ...opts }
|
|
32
|
+
};
|
|
33
|
+
}
|
|
34
|
+
//#endregion
|
|
35
|
+
export { deepgram as i, assemblyAI as n, DEEPGRAM_KIND as r, ASSEMBLYAI_KIND as t };
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
//#region sdk/providers/tts/rime.ts
|
|
2
|
+
const RIME_KIND = "rime";
|
|
3
|
+
/**
|
|
4
|
+
* Default Rime speaker used when callers invoke `rime()` with no `voice`.
|
|
5
|
+
* `cove` is a `mistv2` speaker, matching the default model below — so a
|
|
6
|
+
* bare `rime()` works out of the box for new agents.
|
|
7
|
+
*/
|
|
8
|
+
const RIME_DEFAULT_VOICE = "cove";
|
|
9
|
+
function rime(opts = {}) {
|
|
10
|
+
return {
|
|
11
|
+
kind: RIME_KIND,
|
|
12
|
+
options: {
|
|
13
|
+
...opts,
|
|
14
|
+
voice: opts.voice ?? "cove"
|
|
15
|
+
}
|
|
16
|
+
};
|
|
17
|
+
}
|
|
18
|
+
//#endregion
|
|
19
|
+
//#region sdk/providers/tts/cartesia.ts
|
|
20
|
+
const CARTESIA_KIND = "cartesia";
|
|
21
|
+
/**
|
|
22
|
+
* Default voice used when callers invoke `cartesia()` with no `voice`. This
|
|
23
|
+
* is the same voice the example templates ship with, so a bare `cartesia()`
|
|
24
|
+
* works out of the box for new agents.
|
|
25
|
+
*/
|
|
26
|
+
const CARTESIA_DEFAULT_VOICE = "f786b574-daa5-4673-aa0c-cbe3e8534c02";
|
|
27
|
+
function cartesia(opts = {}) {
|
|
28
|
+
return {
|
|
29
|
+
kind: CARTESIA_KIND,
|
|
30
|
+
options: {
|
|
31
|
+
...opts,
|
|
32
|
+
voice: opts.voice ?? "f786b574-daa5-4673-aa0c-cbe3e8534c02"
|
|
33
|
+
}
|
|
34
|
+
};
|
|
35
|
+
}
|
|
36
|
+
//#endregion
|
|
37
|
+
export { RIME_KIND as a, RIME_DEFAULT_VOICE as i, CARTESIA_KIND as n, rime as o, cartesia as r, CARTESIA_DEFAULT_VOICE as t };
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Deepgram Nova streaming STT opener (host-only).
|
|
3
|
+
*
|
|
4
|
+
* The user-facing descriptor factory (`deepgram(...)`) lives in
|
|
5
|
+
* `sdk/providers/stt/deepgram.ts`. This module is the host-side
|
|
6
|
+
* counterpart: it takes the descriptor options + an API key and
|
|
7
|
+
* returns an {@link SttOpener} that the pipeline session drives.
|
|
8
|
+
*
|
|
9
|
+
* Default model: `"nova-3"`. Any string is forwarded verbatim to the SDK.
|
|
10
|
+
*
|
|
11
|
+
* This adapter targets the Deepgram SDK v5 (`@deepgram/sdk@^5`). The v5
|
|
12
|
+
* streaming API is:
|
|
13
|
+
* `client.listen.v1.connect(args)` → `Promise<V1Socket>`
|
|
14
|
+
* followed by:
|
|
15
|
+
* `socket.connect()` + `socket.waitForOpen()` to establish the connection.
|
|
16
|
+
*/
|
|
17
|
+
import { DeepgramClient } from "@deepgram/sdk";
|
|
18
|
+
import type { DeepgramOptions } from "../../../sdk/providers/stt/deepgram.ts";
|
|
19
|
+
import { type SttOpener, type SttSession } from "../../../sdk/providers.ts";
|
|
20
|
+
type V1Socket = Awaited<ReturnType<InstanceType<typeof DeepgramClient>["listen"]["v1"]["connect"]>>;
|
|
21
|
+
/** Internal: SttSession with a test-only handle to the raw SDK socket. */
|
|
22
|
+
export interface DeepgramSession extends SttSession {
|
|
23
|
+
/** @internal Test-only: exposes the underlying SDK socket for fixture replay. */
|
|
24
|
+
readonly _connection: V1Socket;
|
|
25
|
+
}
|
|
26
|
+
/** Build an {@link SttOpener} from resolved Deepgram descriptor options. */
|
|
27
|
+
export declare function openDeepgram(opts?: DeepgramOptions): SttOpener;
|
|
28
|
+
export {};
|
|
@@ -19,7 +19,7 @@
|
|
|
19
19
|
* conversion.
|
|
20
20
|
*/
|
|
21
21
|
import type { TTSWS } from "@cartesia/cartesia-js/resources/tts";
|
|
22
|
-
import type
|
|
22
|
+
import { type CartesiaOptions } from "../../../sdk/providers/tts/cartesia.ts";
|
|
23
23
|
import { type TtsOpener, type TtsSession } from "../../../sdk/providers.ts";
|
|
24
24
|
/** Internal: TtsSession with a test-only handle to the raw SDK socket. */
|
|
25
25
|
export interface CartesiaSession extends TtsSession {
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Rime TTS opener (host-only).
|
|
3
|
+
*
|
|
4
|
+
* The user-facing descriptor factory (`rime(...)`) lives in
|
|
5
|
+
* `sdk/providers/tts/rime.ts`. This module is the host-side
|
|
6
|
+
* counterpart: it takes the descriptor options + an API key and
|
|
7
|
+
* returns a {@link TtsOpener} that the pipeline session drives.
|
|
8
|
+
*
|
|
9
|
+
* **Protocol.** Connects to Rime's `ws2` JSON WebSocket endpoint
|
|
10
|
+
* (`wss://users-ws.rime.ai/ws2`). Client-to-server messages are JSON:
|
|
11
|
+
* - `{ "text": "..." }` — append text to the synthesis buffer
|
|
12
|
+
* - `{ "operation": "clear" }` — drop buffered text (barge-in)
|
|
13
|
+
* - `{ "operation": "eos" }` — drain buffer, close connection (NOT used
|
|
14
|
+
* during a session: it would tear down the WS, forcing reconnect per
|
|
15
|
+
* turn). We force end-of-turn synthesis with a trailing `"."` instead.
|
|
16
|
+
* The server responds with JSON frames:
|
|
17
|
+
* - `{ type: "chunk", data: <base64 PCM16 LE>, contextId: string | null }`
|
|
18
|
+
* - `{ type: "timestamps", ... }` (ignored)
|
|
19
|
+
* - `{ type: "error", message: string }` (surfaced as `tts_stream_error`)
|
|
20
|
+
*
|
|
21
|
+
* **Single long-lived connection per session.** Rime buffers text until it
|
|
22
|
+
* sees terminal punctuation (`.`, `?`, `!`), so we use one WebSocket per
|
|
23
|
+
* `open()` call and reuse it across turns. `clear` resets the buffer
|
|
24
|
+
* between cancellations.
|
|
25
|
+
*
|
|
26
|
+
* **Done detection.** After `flush()` sends a trailing `"."` to force the
|
|
27
|
+
* server to synthesize any half-buffered text, we arm a quiescence timer
|
|
28
|
+
* that fires 500 ms after the last received audio chunk. When it fires,
|
|
29
|
+
* `done` is emitted.
|
|
30
|
+
*
|
|
31
|
+
* **Audio format.** The URL requests `audioFormat=pcm` at the negotiated
|
|
32
|
+
* `sampleRate`, which returns raw PCM16 little-endian. We decode the base64
|
|
33
|
+
* payload and construct a zero-copy `Int16Array` view over the decoded bytes.
|
|
34
|
+
*/
|
|
35
|
+
import WebSocket from "ws";
|
|
36
|
+
import { type RimeOptions } from "../../../sdk/providers/tts/rime.ts";
|
|
37
|
+
import { type TtsOpener, type TtsSession } from "../../../sdk/providers.ts";
|
|
38
|
+
/** Internal: TtsSession with a test-only handle to the raw WebSocket. */
|
|
39
|
+
export interface RimeSession extends TtsSession {
|
|
40
|
+
/** @internal Test-only: exposes the underlying raw WebSocket. */
|
|
41
|
+
readonly _ws: WebSocket;
|
|
42
|
+
}
|
|
43
|
+
/** Build a {@link TtsOpener} from resolved Rime descriptor options. */
|
|
44
|
+
export declare function openRime(opts: RimeOptions): TtsOpener;
|
|
@@ -7,8 +7,10 @@ export * from "./builtin-tools.ts";
|
|
|
7
7
|
export * from "./runtime.ts";
|
|
8
8
|
export * from "./runtime-config.ts";
|
|
9
9
|
export * from "./server.ts";
|
|
10
|
-
export * from "./session.ts";
|
|
11
|
-
export * from "./session-ctx.ts";
|
|
10
|
+
export * from "./session-core.ts";
|
|
12
11
|
export * from "./tool-executor.ts";
|
|
12
|
+
export * from "./transports/pipeline-transport.ts";
|
|
13
|
+
export * from "./transports/s2s-transport.ts";
|
|
14
|
+
export * from "./transports/types.ts";
|
|
13
15
|
export * from "./unstorage-kv.ts";
|
|
14
16
|
export * from "./ws-handler.ts";
|