@alexkroman1/aai 1.4.5 → 1.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (78) hide show
  1. package/.turbo/turbo-build.log +9 -9
  2. package/CHANGELOG.md +13 -0
  3. package/dist/assemblyai-C969QGi4.js +35 -0
  4. package/dist/cartesia-BfQPOQ7Y.js +37 -0
  5. package/dist/host/_pipeline-test-fakes.d.ts +3 -1
  6. package/dist/host/providers/stt/deepgram.d.ts +28 -0
  7. package/dist/host/providers/tts/cartesia.d.ts +1 -1
  8. package/dist/host/providers/tts/rime.d.ts +44 -0
  9. package/dist/host/runtime-barrel.d.ts +4 -2
  10. package/dist/host/runtime-barrel.js +1432 -1208
  11. package/dist/host/runtime.d.ts +2 -2
  12. package/dist/host/s2s.d.ts +16 -16
  13. package/dist/host/session-core.d.ts +37 -0
  14. package/dist/host/transports/pipeline-transport.d.ts +48 -0
  15. package/dist/host/transports/s2s-transport.d.ts +19 -0
  16. package/dist/host/transports/types.d.ts +45 -0
  17. package/dist/host/ws-handler.d.ts +14 -10
  18. package/dist/sdk/protocol.d.ts +6 -5
  19. package/dist/sdk/providers/llm-barrel.js +1 -1
  20. package/dist/sdk/providers/stt/deepgram.d.ts +35 -0
  21. package/dist/sdk/providers/stt-barrel.d.ts +1 -0
  22. package/dist/sdk/providers/stt-barrel.js +2 -2
  23. package/dist/sdk/providers/tts/cartesia.d.ts +12 -4
  24. package/dist/sdk/providers/tts/rime.d.ts +42 -0
  25. package/dist/sdk/providers/tts-barrel.d.ts +1 -0
  26. package/dist/sdk/providers/tts-barrel.js +2 -2
  27. package/host/_pipeline-test-fakes.ts +6 -3
  28. package/host/_test-utils.ts +209 -128
  29. package/host/cleanup.test.ts +25 -298
  30. package/host/integration/pipeline-reference.integration.test.ts +30 -35
  31. package/host/providers/resolve.ts +10 -2
  32. package/host/providers/stt/deepgram.test.ts +229 -0
  33. package/host/providers/stt/deepgram.ts +172 -0
  34. package/host/providers/tts/cartesia.ts +7 -3
  35. package/host/providers/tts/rime.test.ts +251 -0
  36. package/host/providers/tts/rime.ts +322 -0
  37. package/host/runtime-barrel.ts +4 -2
  38. package/host/runtime.test.ts +13 -46
  39. package/host/runtime.ts +131 -23
  40. package/host/s2s.test.ts +122 -131
  41. package/host/s2s.ts +44 -52
  42. package/host/session-core.test.ts +257 -0
  43. package/host/session-core.ts +262 -0
  44. package/host/transports/pipeline-transport.test.ts +651 -0
  45. package/host/transports/pipeline-transport.ts +532 -0
  46. package/host/{fixture-replay.test.ts → transports/s2s-transport-fixtures.test.ts} +76 -106
  47. package/host/transports/s2s-transport.test.ts +56 -0
  48. package/host/transports/s2s-transport.ts +116 -0
  49. package/host/transports/types.test.ts +22 -0
  50. package/host/transports/types.ts +51 -0
  51. package/host/ws-handler.test.ts +324 -242
  52. package/host/ws-handler.ts +56 -59
  53. package/package.json +2 -1
  54. package/sdk/__snapshots__/exports.test.ts.snap +3 -3
  55. package/sdk/protocol-compat.test.ts +8 -0
  56. package/sdk/protocol.ts +6 -5
  57. package/sdk/providers/stt/deepgram.ts +43 -0
  58. package/sdk/providers/stt-barrel.ts +2 -0
  59. package/sdk/providers/tts/cartesia.ts +15 -5
  60. package/sdk/providers/tts/rime.ts +52 -0
  61. package/sdk/providers/tts-barrel.ts +2 -0
  62. package/dist/assemblyai-Cxg9eobY.js +0 -18
  63. package/dist/cartesia-DwDk2tEu.js +0 -10
  64. package/dist/host/pipeline-session-ctx.d.ts +0 -24
  65. package/dist/host/pipeline-session.d.ts +0 -52
  66. package/dist/host/session-ctx.d.ts +0 -73
  67. package/dist/host/session.d.ts +0 -62
  68. package/host/pipeline-session-ctx.test.ts +0 -31
  69. package/host/pipeline-session-ctx.ts +0 -36
  70. package/host/pipeline-session.test.ts +0 -672
  71. package/host/pipeline-session.ts +0 -533
  72. package/host/s2s-fixtures.test.ts +0 -237
  73. package/host/session-ctx.test.ts +0 -387
  74. package/host/session-ctx.ts +0 -134
  75. package/host/session-fixture-replay.test.ts +0 -128
  76. package/host/session.test.ts +0 -634
  77. package/host/session.ts +0 -412
  78. /package/dist/{anthropic-BrUCPKUc.js → anthropic-CcLZygAr.js} +0 -0
@@ -1,134 +0,0 @@
1
- // Copyright 2025 the AAI authors. MIT license.
2
- /** Session context builder — extracted from session.ts. */
3
-
4
- import type { AgentConfig, ExecuteTool } from "../sdk/_internal-types.ts";
5
- import { DEFAULT_MAX_HISTORY } from "../sdk/constants.ts";
6
- import type { ClientSink } from "../sdk/protocol.ts";
7
- import type { Message } from "../sdk/types.ts";
8
- import { toolError } from "../sdk/utils.ts";
9
- import type { Logger } from "./runtime-config.ts";
10
- import type { S2sHandle } from "./s2s.ts";
11
-
12
- type PendingTool = { callId: string; result: string };
13
-
14
- /** Per-reply mutable state — reset on beginReply/cancelReply. */
15
- export type ReplyState = {
16
- pendingTools: PendingTool[];
17
- toolCallCount: number;
18
- currentReplyId: string | null;
19
- };
20
-
21
- /** Immutable dependencies injected at session creation. */
22
- export type SessionDeps = {
23
- readonly id: string;
24
- readonly agent: string;
25
- readonly client: ClientSink;
26
- readonly agentConfig: AgentConfig;
27
- readonly executeTool: ExecuteTool;
28
- readonly log: Logger;
29
- readonly maxHistory: number;
30
- };
31
-
32
- /**
33
- * Transport-agnostic session context shared by S2S and pipeline sessions.
34
- *
35
- * Owns reply lifecycle, conversation history (with sliding-window truncation),
36
- * and per-turn tool-call step enforcement. Transport-specific fields (e.g.
37
- * `s2s` for S2S, `stt`/`tts` for the pipeline) live on the extending types.
38
- *
39
- * Split into three layers:
40
- * - {@link SessionDeps} — immutable dependencies (set once)
41
- * - {@link ReplyState} via `reply` — per-reply mutable state (reset on beginReply/cancelReply)
42
- * - Remaining fields — conversation and lifecycle methods
43
- */
44
- export type BaseSessionCtx = SessionDeps & {
45
- reply: ReplyState;
46
- turnPromise: Promise<void> | null;
47
- conversationMessages: Message[];
48
-
49
- consumeToolCallStep(name: string, replyId: string | null): string | null;
50
- pushMessages(...msgs: Message[]): void;
51
- beginReply(replyId: string): void;
52
- cancelReply(): void;
53
- chainTurn(p: Promise<void>): void;
54
- };
55
-
56
- /**
57
- * S2S session context — {@link BaseSessionCtx} plus the S2S WebSocket handle.
58
- */
59
- export type S2sSessionCtx = BaseSessionCtx & {
60
- s2s: S2sHandle | null;
61
- };
62
-
63
- export function _buildBaseCtx(opts: {
64
- id: string;
65
- agent: string;
66
- client: ClientSink;
67
- agentConfig: AgentConfig;
68
- executeTool: ExecuteTool;
69
- log: Logger;
70
- maxHistory?: number | undefined;
71
- }): BaseSessionCtx {
72
- const { agentConfig, log } = opts;
73
- const maxHistory = opts.maxHistory ?? DEFAULT_MAX_HISTORY;
74
- const ctx: BaseSessionCtx = {
75
- ...opts,
76
- reply: { pendingTools: [], toolCallCount: 0, currentReplyId: null },
77
- turnPromise: null,
78
- conversationMessages: [],
79
- maxHistory,
80
- consumeToolCallStep(_name, replyId) {
81
- // Guard 1: reject tool calls from interrupted/stale replies
82
- if (replyId === null || replyId !== ctx.reply.currentReplyId) {
83
- return toolError("Reply was interrupted. Discarding stale tool call.");
84
- }
85
- // Guard 2: enforce maxSteps (default 5, set in manifest.ts) to prevent
86
- // runaway tool-call loops within a single LLM reply
87
- const maxSteps = agentConfig.maxSteps;
88
- ctx.reply.toolCallCount++;
89
- if (maxSteps !== undefined && ctx.reply.toolCallCount > maxSteps) {
90
- log.info("maxSteps exceeded, refusing tool call", {
91
- toolCallCount: ctx.reply.toolCallCount,
92
- maxSteps,
93
- });
94
- return toolError("Maximum tool steps reached. Please respond to the user now.");
95
- }
96
- return null;
97
- },
98
- pushMessages(...msgs: Message[]) {
99
- ctx.conversationMessages.push(...msgs);
100
- if (maxHistory > 0 && ctx.conversationMessages.length > maxHistory) {
101
- ctx.conversationMessages.splice(0, ctx.conversationMessages.length - maxHistory);
102
- }
103
- },
104
- beginReply(replyId: string) {
105
- ctx.reply = { pendingTools: [], toolCallCount: 0, currentReplyId: replyId };
106
- ctx.turnPromise = null;
107
- },
108
- cancelReply() {
109
- ctx.reply = { pendingTools: [], toolCallCount: 0, currentReplyId: null };
110
- },
111
- chainTurn(p: Promise<void>) {
112
- ctx.turnPromise = (ctx.turnPromise ?? Promise.resolve()).then(() => p);
113
- },
114
- };
115
- return ctx;
116
- }
117
-
118
- export function buildCtx(opts: {
119
- id: string;
120
- agent: string;
121
- client: ClientSink;
122
- agentConfig: AgentConfig;
123
- executeTool: ExecuteTool;
124
- log: Logger;
125
- maxHistory?: number | undefined;
126
- }): S2sSessionCtx {
127
- // Mutate the base ctx in place rather than spreading into a new object —
128
- // the helper methods close over the base ctx reference, so spreading would
129
- // leave them writing to an orphan object (e.g. `beginReply` would mutate
130
- // the base `reply`, not the spread copy's `reply`).
131
- const base = _buildBaseCtx(opts) as S2sSessionCtx;
132
- base.s2s = null;
133
- return base;
134
- }
@@ -1,128 +0,0 @@
1
- import { afterEach, describe, expect, test, vi } from "vitest";
2
- import {
3
- flush,
4
- loadFixture,
5
- makeClient,
6
- makeMockHandle,
7
- makeSessionOpts,
8
- replayFixtureMessages,
9
- } from "./_test-utils.ts";
10
- import { _internals, createS2sSession, type S2sSessionOptions } from "./session.ts";
11
-
12
- // ─── Session-level fixture replay tests ─────────────────────────────────────
13
- //
14
- // These replay real AssemblyAI S2S messages (recorded with Kokoro TTS audio)
15
- // through the full session orchestration layer — the same setupListeners /
16
- // handleToolCall / handleReplyDone code path that production uses.
17
-
18
- describe("fixture replay through session", () => {
19
- let connectSpy: ReturnType<typeof vi.spyOn>;
20
-
21
- function setupReplay(overrides?: Partial<S2sSessionOptions>) {
22
- const mockHandle = makeMockHandle();
23
- connectSpy = vi.spyOn(_internals, "connectS2s").mockResolvedValue(mockHandle);
24
- const client = makeClient();
25
- const opts = makeSessionOpts({ client, ...overrides });
26
- const session = createS2sSession(opts);
27
- return { session, client, mockHandle, opts };
28
- }
29
-
30
- afterEach(() => {
31
- connectSpy?.mockRestore();
32
- });
33
-
34
- test("greeting session: client receives speech events and chat messages", async () => {
35
- const { session, client, mockHandle } = setupReplay();
36
- await session.start();
37
-
38
- const messages = loadFixture("greeting-session-sequence.json");
39
- replayFixtureMessages(mockHandle, messages);
40
-
41
- // Client should have received speech_started/stopped for the greeting
42
- const types = client.events.map((e) => (e as { type: string }).type);
43
- expect(types).toContain("agent_transcript"); // final agent transcript
44
- expect(types).toContain("reply_done"); // reply completed
45
- });
46
-
47
- test("simple question: user transcript builds conversation history", async () => {
48
- const { session, client, mockHandle } = setupReplay();
49
- await session.start();
50
-
51
- const messages = loadFixture("simple-question-sequence.json");
52
- replayFixtureMessages(mockHandle, messages);
53
- await flush();
54
-
55
- // Client should see both greeting and answer as agent_transcript events
56
- const chatEvents = client.events.filter(
57
- (e) => (e as { type: string }).type === "agent_transcript",
58
- );
59
- expect(chatEvents.length).toBe(2); // greeting + answer
60
- });
61
-
62
- test("tool call: session executes tool, buffers result, sends after replyDone", async () => {
63
- const executeTool = vi.fn(async () =>
64
- JSON.stringify({ city: "San Francisco", temperature: "72°F", condition: "sunny" }),
65
- );
66
-
67
- const { session, client, mockHandle } = setupReplay({ executeTool });
68
- await session.start();
69
-
70
- const messages = loadFixture("tool-call-sequence.json");
71
- replayFixtureMessages(mockHandle, messages);
72
-
73
- // Wait for tool execution to complete
74
- await vi.waitFor(() => expect(executeTool).toHaveBeenCalled());
75
- await session.waitForTurn();
76
-
77
- // Tool was called with the right name and args
78
- expect(executeTool).toHaveBeenCalledWith(
79
- "get_weather",
80
- expect.objectContaining({ city: "San Francisco" }),
81
- expect.any(String), // session ID
82
- expect.any(Array), // messages
83
- );
84
-
85
- // Client received tool_call and tool_call_done events
86
- expect(client.events).toContainEvent("tool_call", { toolName: "get_weather" });
87
- expect(client.events).toContainEvent("tool_call_done");
88
-
89
- // Tool result was sent back to S2S after replyDone
90
- await vi.waitFor(() => expect(mockHandle.sendToolResult).toHaveBeenCalled());
91
- });
92
-
93
- test("tool call: conversation messages accumulate correctly", async () => {
94
- const executeTool = vi.fn(async () => JSON.stringify({ result: "ok" }));
95
- const { session, mockHandle } = setupReplay({ executeTool });
96
- await session.start();
97
-
98
- const messages = loadFixture("tool-call-sequence.json");
99
- replayFixtureMessages(mockHandle, messages);
100
- await vi.waitFor(() => expect(executeTool).toHaveBeenCalled());
101
- await session.waitForTurn();
102
-
103
- // The conversation messages passed to executeTool should include
104
- // the user's transcript (from STT recognition of Kokoro audio)
105
- const call = executeTool.mock.calls[0] as unknown as [
106
- string,
107
- unknown,
108
- string,
109
- { role: string; content: string }[],
110
- ];
111
- const userMsg = call[3]?.find((m) => m.role === "user");
112
- expect(userMsg?.content.toLowerCase()).toContain("weather");
113
- });
114
-
115
- test("user speech recognition events reach the client", async () => {
116
- const { session, client, mockHandle } = setupReplay();
117
- await session.start();
118
-
119
- const messages = loadFixture("user-speech-recognition.json");
120
- replayFixtureMessages(mockHandle, messages);
121
- await flush();
122
-
123
- const types = client.events.map((e) => (e as { type: string }).type);
124
- expect(types).toContain("speech_started");
125
- expect(types).toContain("speech_stopped");
126
- expect(types).toContain("user_transcript"); // triggers orchestration
127
- });
128
- });