@alexkroman1/aai 1.4.5 → 1.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (78) hide show
  1. package/.turbo/turbo-build.log +9 -9
  2. package/CHANGELOG.md +13 -0
  3. package/dist/assemblyai-C969QGi4.js +35 -0
  4. package/dist/cartesia-BfQPOQ7Y.js +37 -0
  5. package/dist/host/_pipeline-test-fakes.d.ts +3 -1
  6. package/dist/host/providers/stt/deepgram.d.ts +28 -0
  7. package/dist/host/providers/tts/cartesia.d.ts +1 -1
  8. package/dist/host/providers/tts/rime.d.ts +44 -0
  9. package/dist/host/runtime-barrel.d.ts +4 -2
  10. package/dist/host/runtime-barrel.js +1432 -1208
  11. package/dist/host/runtime.d.ts +2 -2
  12. package/dist/host/s2s.d.ts +16 -16
  13. package/dist/host/session-core.d.ts +37 -0
  14. package/dist/host/transports/pipeline-transport.d.ts +48 -0
  15. package/dist/host/transports/s2s-transport.d.ts +19 -0
  16. package/dist/host/transports/types.d.ts +45 -0
  17. package/dist/host/ws-handler.d.ts +14 -10
  18. package/dist/sdk/protocol.d.ts +6 -5
  19. package/dist/sdk/providers/llm-barrel.js +1 -1
  20. package/dist/sdk/providers/stt/deepgram.d.ts +35 -0
  21. package/dist/sdk/providers/stt-barrel.d.ts +1 -0
  22. package/dist/sdk/providers/stt-barrel.js +2 -2
  23. package/dist/sdk/providers/tts/cartesia.d.ts +12 -4
  24. package/dist/sdk/providers/tts/rime.d.ts +42 -0
  25. package/dist/sdk/providers/tts-barrel.d.ts +1 -0
  26. package/dist/sdk/providers/tts-barrel.js +2 -2
  27. package/host/_pipeline-test-fakes.ts +6 -3
  28. package/host/_test-utils.ts +209 -128
  29. package/host/cleanup.test.ts +25 -298
  30. package/host/integration/pipeline-reference.integration.test.ts +30 -35
  31. package/host/providers/resolve.ts +10 -2
  32. package/host/providers/stt/deepgram.test.ts +229 -0
  33. package/host/providers/stt/deepgram.ts +172 -0
  34. package/host/providers/tts/cartesia.ts +7 -3
  35. package/host/providers/tts/rime.test.ts +251 -0
  36. package/host/providers/tts/rime.ts +322 -0
  37. package/host/runtime-barrel.ts +4 -2
  38. package/host/runtime.test.ts +13 -46
  39. package/host/runtime.ts +131 -23
  40. package/host/s2s.test.ts +122 -131
  41. package/host/s2s.ts +44 -52
  42. package/host/session-core.test.ts +257 -0
  43. package/host/session-core.ts +262 -0
  44. package/host/transports/pipeline-transport.test.ts +651 -0
  45. package/host/transports/pipeline-transport.ts +532 -0
  46. package/host/{fixture-replay.test.ts → transports/s2s-transport-fixtures.test.ts} +76 -106
  47. package/host/transports/s2s-transport.test.ts +56 -0
  48. package/host/transports/s2s-transport.ts +116 -0
  49. package/host/transports/types.test.ts +22 -0
  50. package/host/transports/types.ts +51 -0
  51. package/host/ws-handler.test.ts +324 -242
  52. package/host/ws-handler.ts +56 -59
  53. package/package.json +2 -1
  54. package/sdk/__snapshots__/exports.test.ts.snap +3 -3
  55. package/sdk/protocol-compat.test.ts +8 -0
  56. package/sdk/protocol.ts +6 -5
  57. package/sdk/providers/stt/deepgram.ts +43 -0
  58. package/sdk/providers/stt-barrel.ts +2 -0
  59. package/sdk/providers/tts/cartesia.ts +15 -5
  60. package/sdk/providers/tts/rime.ts +52 -0
  61. package/sdk/providers/tts-barrel.ts +2 -0
  62. package/dist/assemblyai-Cxg9eobY.js +0 -18
  63. package/dist/cartesia-DwDk2tEu.js +0 -10
  64. package/dist/host/pipeline-session-ctx.d.ts +0 -24
  65. package/dist/host/pipeline-session.d.ts +0 -52
  66. package/dist/host/session-ctx.d.ts +0 -73
  67. package/dist/host/session.d.ts +0 -62
  68. package/host/pipeline-session-ctx.test.ts +0 -31
  69. package/host/pipeline-session-ctx.ts +0 -36
  70. package/host/pipeline-session.test.ts +0 -672
  71. package/host/pipeline-session.ts +0 -533
  72. package/host/s2s-fixtures.test.ts +0 -237
  73. package/host/session-ctx.test.ts +0 -387
  74. package/host/session-ctx.ts +0 -134
  75. package/host/session-fixture-replay.test.ts +0 -128
  76. package/host/session.test.ts +0 -634
  77. package/host/session.ts +0 -412
  78. /package/dist/{anthropic-BrUCPKUc.js → anthropic-CcLZygAr.js} +0 -0
@@ -1,5 +1,5 @@
1
1
 
2
- > @alexkroman1/aai@1.4.5 build /home/runner/work/agent/agent/packages/aai
2
+ > @alexkroman1/aai@1.5.0 build /home/runner/work/agent/agent/packages/aai
3
3
  > tsdown && tsc -p tsconfig.build.json
4
4
 
5
5
  ℹ tsdown v0.21.7 powered by rolldown v1.0.0-rc.12
@@ -8,19 +8,19 @@
8
8
  ℹ target: node22
9
9
  ℹ tsconfig: tsconfig.json
10
10
  ℹ Build start
11
- ℹ dist/host/runtime-barrel.js 77.48 kB │ gzip: 23.08 kB
11
+ ℹ dist/host/runtime-barrel.js 87.66 kB │ gzip: 25.35 kB
12
12
  ℹ dist/sdk/protocol.js  4.75 kB │ gzip: 1.76 kB
13
13
  ℹ dist/index.js  2.88 kB │ gzip: 1.24 kB
14
14
  ℹ dist/sdk/manifest-barrel.js  0.36 kB │ gzip: 0.20 kB
15
- ℹ dist/sdk/providers/stt-barrel.js  0.13 kB │ gzip: 0.11 kB
15
+ ℹ dist/sdk/providers/tts-barrel.js  0.26 kB │ gzip: 0.16 kB
16
+ ℹ dist/sdk/providers/stt-barrel.js  0.19 kB │ gzip: 0.14 kB
16
17
  ℹ dist/sdk/providers/llm-barrel.js  0.12 kB │ gzip: 0.11 kB
17
- ℹ dist/sdk/providers/tts-barrel.js  0.12 kB │ gzip: 0.10 kB
18
18
  ℹ dist/types-KUgezM6u.js  5.64 kB │ gzip: 2.39 kB
19
19
  ℹ dist/_internal-types-3p3OJZPb.js  5.61 kB │ gzip: 2.07 kB
20
20
  ℹ dist/constants-C2nirZUI.js  3.10 kB │ gzip: 1.38 kB
21
21
  ℹ dist/ws-upgrade-BeOQ7fXL.js  1.14 kB │ gzip: 0.54 kB
22
- ℹ dist/assemblyai-Cxg9eobY.js  0.53 kB │ gzip: 0.35 kB
23
- ℹ dist/anthropic-BrUCPKUc.js  0.23 kB │ gzip: 0.18 kB
24
- ℹ dist/cartesia-DwDk2tEu.js  0.22 kB │ gzip: 0.17 kB
25
- ℹ 14 files, total: 102.30 kB
26
- ✔ Build complete in 49ms
22
+ ℹ dist/cartesia-BfQPOQ7Y.js  1.08 kB │ gzip: 0.50 kB
23
+ ℹ dist/assemblyai-C969QGi4.js  1.03 kB │ gzip: 0.42 kB
24
+ ℹ dist/anthropic-CcLZygAr.js  0.23 kB │ gzip: 0.18 kB
25
+ ℹ 14 files, total: 114.04 kB
26
+ ✔ Build complete in 45ms
package/CHANGELOG.md CHANGED
@@ -1,5 +1,18 @@
1
1
  # @alexkroman1/aai
2
2
 
3
+ ## 1.5.0
4
+
5
+ ### Minor Changes
6
+
7
+ - 58c5c75: Consolidate session.ts + pipeline-session.ts into a unified SessionCore with two transport strategies (S2S, pipeline). Switch connectS2s to typed callbacks (removing the nanoevents-backed S2sHandle emitter) and flatten client→server→provider dispatch from four layers to two. Wire format is JSON text events + raw PCM16 binary audio frames — the existing public protocol is unchanged. Adds Deepgram as a pipeline-mode STT option and Rime as a pipeline-mode TTS option.
8
+ - 868b85e: Plumb agent maxSteps and toolChoice config into pipeline mode streamText
9
+ - 58c5c75: Add Deepgram as a pipeline-mode STT provider option
10
+ - 58c5c75: feat(aai): add Rime as a pipeline-mode TTS provider option
11
+
12
+ ### Patch Changes
13
+
14
+ - a361363: Fix Rime TTS provider: correct WebSocket host (users-ws.rime.ai), JSON message protocol on /ws2, longer first-audio timeout so the greeting plays. Default voice for cartesia() and rime() so they can be called with no args.
15
+
3
16
  ## 1.4.5
4
17
 
5
18
  ### Patch Changes
@@ -0,0 +1,35 @@
1
+ //#region sdk/providers/stt/deepgram.ts
2
+ /** Kind tag recognised by the host-side resolver. */
3
+ const DEEPGRAM_KIND = "deepgram";
4
+ /**
5
+ * Build a Deepgram STT descriptor.
6
+ *
7
+ * The API key is resolved host-side from the agent's env
8
+ * (`DEEPGRAM_API_KEY`); there is no factory-time key parameter, so the
9
+ * descriptor stays free of secrets and safe to serialize.
10
+ */
11
+ function deepgram(opts = {}) {
12
+ return {
13
+ kind: DEEPGRAM_KIND,
14
+ options: { ...opts }
15
+ };
16
+ }
17
+ //#endregion
18
+ //#region sdk/providers/stt/assemblyai.ts
19
+ /** Kind tag recognised by the host-side resolver. */
20
+ const ASSEMBLYAI_KIND = "assemblyai";
21
+ /**
22
+ * Build an AssemblyAI STT descriptor.
23
+ *
24
+ * The API key is resolved host-side from the agent's env
25
+ * (`ASSEMBLYAI_API_KEY`); there is no factory-time key parameter, so the
26
+ * descriptor stays free of secrets and safe to serialize.
27
+ */
28
+ function assemblyAI(opts = {}) {
29
+ return {
30
+ kind: ASSEMBLYAI_KIND,
31
+ options: { ...opts }
32
+ };
33
+ }
34
+ //#endregion
35
+ export { deepgram as i, assemblyAI as n, DEEPGRAM_KIND as r, ASSEMBLYAI_KIND as t };
@@ -0,0 +1,37 @@
1
+ //#region sdk/providers/tts/rime.ts
2
+ const RIME_KIND = "rime";
3
+ /**
4
+ * Default Rime speaker used when callers invoke `rime()` with no `voice`.
5
+ * `cove` is a `mistv2` speaker, matching the default model below — so a
6
+ * bare `rime()` works out of the box for new agents.
7
+ */
8
+ const RIME_DEFAULT_VOICE = "cove";
9
+ function rime(opts = {}) {
10
+ return {
11
+ kind: RIME_KIND,
12
+ options: {
13
+ ...opts,
14
+ voice: opts.voice ?? "cove"
15
+ }
16
+ };
17
+ }
18
+ //#endregion
19
+ //#region sdk/providers/tts/cartesia.ts
20
+ const CARTESIA_KIND = "cartesia";
21
+ /**
22
+ * Default voice used when callers invoke `cartesia()` with no `voice`. This
23
+ * is the same voice the example templates ship with, so a bare `cartesia()`
24
+ * works out of the box for new agents.
25
+ */
26
+ const CARTESIA_DEFAULT_VOICE = "f786b574-daa5-4673-aa0c-cbe3e8534c02";
27
+ function cartesia(opts = {}) {
28
+ return {
29
+ kind: CARTESIA_KIND,
30
+ options: {
31
+ ...opts,
32
+ voice: opts.voice ?? "f786b574-daa5-4673-aa0c-cbe3e8534c02"
33
+ }
34
+ };
35
+ }
36
+ //#endregion
37
+ export { RIME_KIND as a, RIME_DEFAULT_VOICE as i, CARTESIA_KIND as n, rime as o, cartesia as r, CARTESIA_DEFAULT_VOICE as t };
@@ -114,4 +114,6 @@ export declare function createFakeLanguageModel(options: {
114
114
  } | {
115
115
  steps: ScriptedPart[][];
116
116
  delayMs?: number;
117
- }): LanguageModel;
117
+ }): LanguageModel & {
118
+ readonly calls: readonly Record<string, unknown>[];
119
+ };
@@ -0,0 +1,28 @@
1
+ /**
2
+ * Deepgram Nova streaming STT opener (host-only).
3
+ *
4
+ * The user-facing descriptor factory (`deepgram(...)`) lives in
5
+ * `sdk/providers/stt/deepgram.ts`. This module is the host-side
6
+ * counterpart: it takes the descriptor options + an API key and
7
+ * returns an {@link SttOpener} that the pipeline session drives.
8
+ *
9
+ * Default model: `"nova-3"`. Any string is forwarded verbatim to the SDK.
10
+ *
11
+ * This adapter targets the Deepgram SDK v5 (`@deepgram/sdk@^5`). The v5
12
+ * streaming API is:
13
+ * `client.listen.v1.connect(args)` → `Promise<V1Socket>`
14
+ * followed by:
15
+ * `socket.connect()` + `socket.waitForOpen()` to establish the connection.
16
+ */
17
+ import { DeepgramClient } from "@deepgram/sdk";
18
+ import type { DeepgramOptions } from "../../../sdk/providers/stt/deepgram.ts";
19
+ import { type SttOpener, type SttSession } from "../../../sdk/providers.ts";
20
+ type V1Socket = Awaited<ReturnType<InstanceType<typeof DeepgramClient>["listen"]["v1"]["connect"]>>;
21
+ /** Internal: SttSession with a test-only handle to the raw SDK socket. */
22
+ export interface DeepgramSession extends SttSession {
23
+ /** @internal Test-only: exposes the underlying SDK socket for fixture replay. */
24
+ readonly _connection: V1Socket;
25
+ }
26
+ /** Build an {@link SttOpener} from resolved Deepgram descriptor options. */
27
+ export declare function openDeepgram(opts?: DeepgramOptions): SttOpener;
28
+ export {};
@@ -19,7 +19,7 @@
19
19
  * conversion.
20
20
  */
21
21
  import type { TTSWS } from "@cartesia/cartesia-js/resources/tts";
22
- import type { CartesiaOptions } from "../../../sdk/providers/tts/cartesia.ts";
22
+ import { type CartesiaOptions } from "../../../sdk/providers/tts/cartesia.ts";
23
23
  import { type TtsOpener, type TtsSession } from "../../../sdk/providers.ts";
24
24
  /** Internal: TtsSession with a test-only handle to the raw SDK socket. */
25
25
  export interface CartesiaSession extends TtsSession {
@@ -0,0 +1,44 @@
1
+ /**
2
+ * Rime TTS opener (host-only).
3
+ *
4
+ * The user-facing descriptor factory (`rime(...)`) lives in
5
+ * `sdk/providers/tts/rime.ts`. This module is the host-side
6
+ * counterpart: it takes the descriptor options + an API key and
7
+ * returns a {@link TtsOpener} that the pipeline session drives.
8
+ *
9
+ * **Protocol.** Connects to Rime's `ws2` JSON WebSocket endpoint
10
+ * (`wss://users-ws.rime.ai/ws2`). Client-to-server messages are JSON:
11
+ * - `{ "text": "..." }` — append text to the synthesis buffer
12
+ * - `{ "operation": "clear" }` — drop buffered text (barge-in)
13
+ * - `{ "operation": "eos" }` — drain buffer, close connection (NOT used
14
+ * during a session: it would tear down the WS, forcing reconnect per
15
+ * turn). We force end-of-turn synthesis with a trailing `"."` instead.
16
+ * The server responds with JSON frames:
17
+ * - `{ type: "chunk", data: <base64 PCM16 LE>, contextId: string | null }`
18
+ * - `{ type: "timestamps", ... }` (ignored)
19
+ * - `{ type: "error", message: string }` (surfaced as `tts_stream_error`)
20
+ *
21
+ * **Single long-lived connection per session.** Rime buffers text until it
22
+ * sees terminal punctuation (`.`, `?`, `!`), so we use one WebSocket per
23
+ * `open()` call and reuse it across turns. `clear` resets the buffer
24
+ * between cancellations.
25
+ *
26
+ * **Done detection.** After `flush()` sends a trailing `"."` to force the
27
+ * server to synthesize any half-buffered text, we arm a quiescence timer
28
+ * that fires 500 ms after the last received audio chunk. When it fires,
29
+ * `done` is emitted.
30
+ *
31
+ * **Audio format.** The URL requests `audioFormat=pcm` at the negotiated
32
+ * `sampleRate`, which returns raw PCM16 little-endian. We decode the base64
33
+ * payload and construct a zero-copy `Int16Array` view over the decoded bytes.
34
+ */
35
+ import WebSocket from "ws";
36
+ import { type RimeOptions } from "../../../sdk/providers/tts/rime.ts";
37
+ import { type TtsOpener, type TtsSession } from "../../../sdk/providers.ts";
38
+ /** Internal: TtsSession with a test-only handle to the raw WebSocket. */
39
+ export interface RimeSession extends TtsSession {
40
+ /** @internal Test-only: exposes the underlying raw WebSocket. */
41
+ readonly _ws: WebSocket;
42
+ }
43
+ /** Build a {@link TtsOpener} from resolved Rime descriptor options. */
44
+ export declare function openRime(opts: RimeOptions): TtsOpener;
@@ -7,8 +7,10 @@ export * from "./builtin-tools.ts";
7
7
  export * from "./runtime.ts";
8
8
  export * from "./runtime-config.ts";
9
9
  export * from "./server.ts";
10
- export * from "./session.ts";
11
- export * from "./session-ctx.ts";
10
+ export * from "./session-core.ts";
12
11
  export * from "./tool-executor.ts";
12
+ export * from "./transports/pipeline-transport.ts";
13
+ export * from "./transports/s2s-transport.ts";
14
+ export * from "./transports/types.ts";
13
15
  export * from "./unstorage-kv.ts";
14
16
  export * from "./ws-handler.ts";