kernl 0.8.4 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. package/.turbo/turbo-build.log +1 -1
  2. package/CHANGELOG.md +15 -0
  3. package/dist/agent/base.d.ts +73 -0
  4. package/dist/agent/base.d.ts.map +1 -0
  5. package/dist/agent/base.js +137 -0
  6. package/dist/agent/index.d.ts +2 -0
  7. package/dist/agent/index.d.ts.map +1 -1
  8. package/dist/agent/index.js +2 -1
  9. package/dist/agent/types.d.ts +4 -0
  10. package/dist/agent/types.d.ts.map +1 -1
  11. package/dist/agent.d.ts +10 -90
  12. package/dist/agent.d.ts.map +1 -1
  13. package/dist/agent.js +5 -171
  14. package/dist/api/resources/agents/agents.d.ts +11 -7
  15. package/dist/api/resources/agents/agents.d.ts.map +1 -1
  16. package/dist/api/resources/agents/agents.js +14 -8
  17. package/dist/kernl/kernl.d.ts +2 -2
  18. package/dist/kernl/kernl.d.ts.map +1 -1
  19. package/dist/kernl/kernl.js +6 -6
  20. package/dist/kernl/types.d.ts +3 -3
  21. package/dist/kernl/types.d.ts.map +1 -1
  22. package/dist/lib/env.d.ts +2 -2
  23. package/dist/mcp/__tests__/utils.test.js +4 -2
  24. package/dist/mcp/utils.d.ts +1 -1
  25. package/dist/mcp/utils.js +1 -1
  26. package/dist/realtime/agent.d.ts +17 -0
  27. package/dist/realtime/agent.d.ts.map +1 -0
  28. package/dist/realtime/agent.js +17 -0
  29. package/dist/realtime/channel.d.ts +30 -0
  30. package/dist/realtime/channel.d.ts.map +1 -0
  31. package/dist/realtime/channel.js +1 -0
  32. package/dist/realtime/index.d.ts +5 -0
  33. package/dist/realtime/index.d.ts.map +1 -0
  34. package/dist/realtime/index.js +4 -0
  35. package/dist/realtime/session.d.ts +98 -0
  36. package/dist/realtime/session.d.ts.map +1 -0
  37. package/dist/realtime/session.js +203 -0
  38. package/dist/realtime/types.d.ts +58 -0
  39. package/dist/realtime/types.d.ts.map +1 -0
  40. package/dist/realtime/types.js +1 -0
  41. package/dist/storage/in-memory.d.ts.map +1 -1
  42. package/dist/storage/in-memory.js +5 -1
  43. package/dist/tool/__tests__/toolkit.test.js +2 -2
  44. package/dist/tool/tool.d.ts +2 -1
  45. package/dist/tool/tool.d.ts.map +1 -1
  46. package/dist/tool/toolkit.d.ts +4 -4
  47. package/dist/tool/toolkit.d.ts.map +1 -1
  48. package/dist/tool/toolkit.js +2 -1
  49. package/dist/tool/types.d.ts +4 -4
  50. package/dist/tool/types.d.ts.map +1 -1
  51. package/package.json +4 -4
  52. package/src/agent/base.ts +220 -0
  53. package/src/agent/index.ts +2 -0
  54. package/src/agent/types.ts +5 -0
  55. package/src/agent.ts +12 -231
  56. package/src/api/resources/agents/agents.ts +19 -13
  57. package/src/kernl/kernl.ts +9 -9
  58. package/src/kernl/types.ts +3 -3
  59. package/src/mcp/__tests__/utils.test.ts +4 -2
  60. package/src/mcp/utils.ts +1 -1
  61. package/src/realtime/agent.ts +24 -0
  62. package/src/realtime/channel.ts +32 -0
  63. package/src/realtime/index.ts +4 -0
  64. package/src/realtime/session.ts +259 -0
  65. package/src/realtime/types.ts +73 -0
  66. package/src/storage/in-memory.ts +9 -1
  67. package/src/tool/__tests__/toolkit.test.ts +2 -2
  68. package/src/tool/tool.ts +2 -1
  69. package/src/tool/toolkit.ts +6 -5
  70. package/src/tool/types.ts +4 -4
@@ -0,0 +1,259 @@
1
+ import { EventEmitter } from "node:events";
2
+
3
+ import {
4
+ RealtimeModel,
5
+ RealtimeConnection,
6
+ RealtimeServerEvent,
7
+ RealtimeSessionConfig,
8
+ ToolCallEvent,
9
+ message,
10
+ } from "@kernl-sdk/protocol";
11
+
12
+ import { Context, UnknownContext } from "@/context";
13
+ import { MisconfiguredError } from "@/lib/error";
14
+
15
+ import { RealtimeAgent } from "./agent";
16
+ import type { RealtimeChannel } from "./channel";
17
+ import type { RealtimeSessionOptions } from "./types";
18
+
19
+ /**
20
+ * A realtime session manages the connection to a realtime model.
21
+ *
22
+ * Handles the bidirectional communication between an agent and a model,
23
+ * including audio I/O (via channels), tool execution, and event routing.
24
+ */
25
+ export class RealtimeSession<TContext = UnknownContext> extends EventEmitter {
26
+ /**
27
+ * Session ID. Null until connected.
28
+ */
29
+ id: string | null = null;
30
+
31
+ /**
32
+ * The agent definition.
33
+ */
34
+ readonly agent: RealtimeAgent<TContext>;
35
+
36
+ /**
37
+ * The realtime model.
38
+ */
39
+ readonly model: RealtimeModel;
40
+
41
+ /**
42
+ * The audio I/O channel (if any).
43
+ */
44
+ readonly channel: RealtimeChannel | null;
45
+
46
+ /**
47
+ * The session context.
48
+ */
49
+ readonly context: Context<TContext>;
50
+
51
+ /**
52
+ * The active connection. Null until connected.
53
+ */
54
+ private connection: RealtimeConnection | null = null;
55
+
56
+ /**
57
+ * Session options.
58
+ */
59
+ private options: RealtimeSessionOptions<TContext>;
60
+
61
+ constructor(
62
+ agent: RealtimeAgent<TContext>,
63
+ options: RealtimeSessionOptions<TContext> = {},
64
+ ) {
65
+ super();
66
+
67
+ if (options.transport?.handlesAudio && options.channel) {
68
+ throw new MisconfiguredError(
69
+ "Cannot use channel with WebRTC transport - audio is handled by transport",
70
+ );
71
+ }
72
+
73
+ this.agent = agent;
74
+ this.model = options.model ?? agent.model;
75
+ this.channel = options.channel ?? null;
76
+ this.context = options.context ?? new Context("kernl", {} as TContext);
77
+ this.options = options;
78
+ }
79
+
80
+ /**
81
+ * Connect to the realtime model.
82
+ */
83
+ async connect(): Promise<void> {
84
+ const sessionConfig = await this.buildSessionConfig();
85
+ const options = {
86
+ ...this.options.connectOptions,
87
+ sessionConfig,
88
+ };
89
+
90
+ this.connection = this.options.transport
91
+ ? await this.options.transport.connect(this.model, options)
92
+ : await this.model.connect(options);
93
+
94
+ this.connection.on("event", this.onEvent.bind(this));
95
+ this.connection.on("error", (e) => this.emit("error", e));
96
+ this.connection.on("status", (s) => this.emit("status", s));
97
+
98
+ this.init();
99
+ }
100
+
101
+ /**
102
+ * Initialize event listeners and send session configuration.
103
+ */
104
+ private async init(): Promise<void> {
105
+ if (this.channel) {
106
+ this.channel.on("audio", (audio: string) => this.sendAudio(audio));
107
+ this.channel.on("commit", () => this.commit());
108
+ this.channel.on("interrupt", () => this.interrupt());
109
+ }
110
+
111
+ this.connection?.send({
112
+ kind: "session.update",
113
+ config: await this.buildSessionConfig(),
114
+ });
115
+ }
116
+
117
+ /**
118
+ * Build session configuration from agent.
119
+ */
120
+ private async buildSessionConfig(): Promise<RealtimeSessionConfig> {
121
+ const tools = await this.agent.tools(this.context);
122
+
123
+ return {
124
+ instructions: await this.agent.instructions(this.context),
125
+ tools: tools.map((t) => t.serialize()),
126
+ voice: this.agent.voice,
127
+ };
128
+ }
129
+
130
+ /**
131
+ * Handle incoming events from the connection.
132
+ *
133
+ * Maps protocol events to simplified user-facing events:
134
+ * - 'audio' - audio output from assistant
135
+ * - 'transcript' - speech transcriptions (user or assistant)
136
+ * - 'text' - text output from assistant
137
+ * - 'error' - errors
138
+ */
139
+ private onEvent(event: RealtimeServerEvent): void {
140
+ switch (event.kind) {
141
+ // Audio output → 'audio'
142
+ case "audio.output.delta":
143
+ this.channel?.sendAudio(event.audio);
144
+ this.emit("audio", event);
145
+ break;
146
+ case "audio.output.done":
147
+ this.emit("audio", event);
148
+ break;
149
+
150
+ // Speech transcriptions → 'transcript'
151
+ case "transcript.input":
152
+ case "transcript.output":
153
+ this.emit("transcript", event);
154
+ break;
155
+
156
+ // Text output → 'text'
157
+ case "text.output":
158
+ this.emit("text", event);
159
+ break;
160
+
161
+ // Errors → 'error'
162
+ case "session.error":
163
+ this.emit("error", event.error);
164
+ break;
165
+
166
+ // Tool calls - handled internally
167
+ case "tool.call":
168
+ this.performActions(event);
169
+ break;
170
+
171
+ // Session lifecycle - internal state
172
+ case "session.created":
173
+ this.id = event.session.id;
174
+ break;
175
+ }
176
+ }
177
+
178
+ /**
179
+ * Execute tool calls from the model.
180
+ */
181
+ private async performActions(event: ToolCallEvent): Promise<void> {
182
+ const tool = this.agent.tool(event.toolId);
183
+ if (!tool || tool.type !== "function") {
184
+ this.connection?.send({
185
+ kind: "tool.result",
186
+ callId: event.callId,
187
+ error: `Unknown tool: ${event.toolId}`,
188
+ });
189
+ return;
190
+ }
191
+
192
+ const result = await tool.invoke(
193
+ this.context,
194
+ event.arguments,
195
+ event.callId,
196
+ );
197
+
198
+ this.connection?.send({
199
+ kind: "tool.result",
200
+ callId: event.callId,
201
+ result: result.state === "completed" ? String(result.result) : undefined,
202
+ error: result.error ?? undefined,
203
+ });
204
+ }
205
+
206
+ /**
207
+ * Send audio to the model.
208
+ */
209
+ sendAudio(audio: string): void {
210
+ this.connection?.send({ kind: "audio.input.append", audio });
211
+ }
212
+
213
+ /**
214
+ * Commit the audio buffer (signal end of speech).
215
+ */
216
+ commit(): void {
217
+ this.connection?.send({ kind: "audio.input.commit" });
218
+ }
219
+
220
+ /**
221
+ * Send a text message to the model.
222
+ */
223
+ sendMessage(text: string): void {
224
+ this.connection?.send({
225
+ kind: "item.create",
226
+ item: message({ role: "user", text }),
227
+ });
228
+ }
229
+
230
+ /**
231
+ * Interrupt the current response.
232
+ */
233
+ interrupt(): void {
234
+ this.connection?.send({ kind: "response.cancel" });
235
+ this.channel?.interrupt();
236
+ }
237
+
238
+ /**
239
+ * Mute audio input.
240
+ */
241
+ mute(): void {
242
+ this.connection?.mute();
243
+ }
244
+
245
+ /**
246
+ * Unmute audio input.
247
+ */
248
+ unmute(): void {
249
+ this.connection?.unmute();
250
+ }
251
+
252
+ /**
253
+ * Close the session and release resources.
254
+ */
255
+ close(): void {
256
+ this.channel?.close();
257
+ this.connection?.close();
258
+ }
259
+ }
@@ -0,0 +1,73 @@
1
+ import {
2
+ RealtimeModel,
3
+ RealtimeTransport,
4
+ RealtimeConnectOptions,
5
+ } from "@kernl-sdk/protocol";
6
+
7
+ import { Context, UnknownContext } from "@/context";
8
+ import type { BaseAgentConfig } from "@/agent/base";
9
+
10
+ import type { RealtimeChannel } from "./channel";
11
+
12
+ /**
13
+ * Configuration for a realtime agent.
14
+ */
15
+ export interface RealtimeAgentConfig<TContext = UnknownContext>
16
+ extends BaseAgentConfig<TContext> {
17
+ /**
18
+ * The realtime model to use for this agent.
19
+ */
20
+ model: RealtimeModel;
21
+
22
+ /**
23
+ * Voice configuration for the agent.
24
+ */
25
+ voice?: RealtimeAgentVoiceConfig;
26
+ }
27
+
28
+ /**
29
+ * Voice configuration for a realtime agent.
30
+ */
31
+ export interface RealtimeAgentVoiceConfig {
32
+ /**
33
+ * Voice ID to use for audio output.
34
+ */
35
+ voiceId: string;
36
+
37
+ /**
38
+ * Playback speed multiplier.
39
+ */
40
+ speed?: number;
41
+ }
42
+
43
+ /**
44
+ * Options for creating a realtime session.
45
+ */
46
+ export interface RealtimeSessionOptions<TContext = UnknownContext> {
47
+ /**
48
+ * Override the agent's default model for this session.
49
+ */
50
+ model?: RealtimeModel;
51
+
52
+ /**
53
+ * Audio I/O channel (e.g., BrowserChannel, TwilioChannel).
54
+ * Not used with WebRTC transport.
55
+ */
56
+ channel?: RealtimeChannel;
57
+
58
+ /**
59
+ * Custom transport (e.g., WebRTCTransport).
60
+ * If not provided, model.connect() creates the default transport.
61
+ */
62
+ transport?: RealtimeTransport;
63
+
64
+ /**
65
+ * Context for this session.
66
+ */
67
+ context?: Context<TContext>;
68
+
69
+ /**
70
+ * Options passed to model.connect() or transport.connect().
71
+ */
72
+ connectOptions?: RealtimeConnectOptions;
73
+ }
@@ -6,6 +6,7 @@
6
6
  * Defined here so that it can be used as default and for testing.
7
7
  */
8
8
 
9
+ import { Agent } from "@/agent";
9
10
  import { Thread } from "@/thread";
10
11
  import { Context } from "@/context";
11
12
  import { STOPPED } from "@kernl-sdk/protocol";
@@ -241,8 +242,15 @@ export class InMemoryThreadStore implements ThreadStore {
241
242
  );
242
243
  }
243
244
 
245
+ // safety: threads only exist for llm agents
246
+ if (agent.kind !== "llm") {
247
+ throw new Error(
248
+ `Thread ${data.tid} references non-llm agent ${data.agentId} (kind: ${agent.kind})`,
249
+ );
250
+ }
251
+
244
252
  return new Thread({
245
- agent,
253
+ agent: agent as Agent,
246
254
  tid: data.tid,
247
255
  context: new Context(data.namespace, data.context),
248
256
  model,
@@ -167,10 +167,10 @@ describe("FunctionToolkit", () => {
167
167
  const serialized = (await toolkit.list()).map((tool: any) => tool.serialize());
168
168
  expect(serialized).toHaveLength(1);
169
169
  expect(serialized[0]).toEqual({
170
- type: "hosted-tool",
170
+ kind: "provider-defined",
171
171
  id: anotherHostedTool.id,
172
172
  name: anotherHostedTool.name,
173
- providerData: undefined,
173
+ args: {},
174
174
  });
175
175
  });
176
176
  });
package/src/tool/tool.ts CHANGED
@@ -1,5 +1,6 @@
1
1
  import { z } from "zod";
2
2
  import { Context, UnknownContext } from "@/context";
3
+ import type { BaseAgent } from "@/agent/base";
3
4
 
4
5
  import { ModelBehaviorError } from "@/lib/error";
5
6
  import { logger } from "@/lib/logger";
@@ -61,7 +62,7 @@ export abstract class BaseTool<TContext = UnknownContext> {
61
62
  /**
62
63
  * Determines whether the tool should be exposed to the model for the current run.
63
64
  */
64
- abstract isEnabled(context: Context<TContext>, agent: any): Promise<boolean>;
65
+ abstract isEnabled(context: Context<TContext>, agent: BaseAgent<TContext>): Promise<boolean>;
65
66
 
66
67
  /**
67
68
  * Serialize this tool for sending to the model
@@ -1,4 +1,4 @@
1
- import type { Agent } from "@/agent";
1
+ import type { BaseAgent } from "@/agent/base";
2
2
  import type { Context, UnknownContext } from "@/context";
3
3
 
4
4
  import { MCPServer } from "@/mcp/base";
@@ -32,13 +32,13 @@ export abstract class BaseToolkit<TContext = UnknownContext> {
32
32
  /**
33
33
  * The agent this toolkit is bound to (if any)
34
34
  */
35
- protected agent?: Agent<TContext, any>;
35
+ protected agent?: BaseAgent<TContext>;
36
36
 
37
37
  /**
38
38
  * Bind this toolkit to an agent.
39
- * Called by Agent constructor.
39
+ * Called by agent constructor.
40
40
  */
41
- bind(agent: Agent<TContext, any>): void {
41
+ bind(agent: BaseAgent<TContext>): void {
42
42
  this.agent = agent;
43
43
  }
44
44
 
@@ -216,7 +216,8 @@ export class MCPToolkit<
216
216
  const mcpTools = await this.server.listTools();
217
217
 
218
218
  for (const mcpTool of mcpTools) {
219
- const tool = mcpToFunctionTool(this.server, mcpTool);
219
+ // safety: MCP tools are context-agnostic (external servers)
220
+ const tool = mcpToFunctionTool(this.server, mcpTool) as Tool<TContext>;
220
221
  this.cache.set(tool.id, tool);
221
222
  }
222
223
 
package/src/tool/types.ts CHANGED
@@ -1,6 +1,6 @@
1
1
  import { z, type ZodType } from "zod";
2
2
 
3
- import { Agent } from "@/agent";
3
+ import type { BaseAgent } from "@/agent/base";
4
4
  import { Context, UnknownContext } from "@/context";
5
5
  import { MCPServer } from "@/mcp/base";
6
6
  import type { ToolCallState } from "@kernl-sdk/protocol";
@@ -79,7 +79,7 @@ export type ToolConfig<
79
79
  */
80
80
  export interface ToolkitFilterContext<TContext = UnknownContext> {
81
81
  context: Context<TContext>;
82
- agent: Agent<TContext, any>;
82
+ agent: BaseAgent<TContext>;
83
83
  toolkitId: string;
84
84
  }
85
85
 
@@ -227,12 +227,12 @@ export type ToolApprovalFunction<TParameters extends ToolInputParameters> = (
227
227
 
228
228
  export type ToolEnabledFunction<TContext = UnknownContext> = (
229
229
  context: Context<TContext>,
230
- agent: Agent<any, any>, // (TODO): why would we need to take an agent here?
230
+ agent: BaseAgent<TContext>,
231
231
  ) => Promise<boolean>;
232
232
 
233
233
  export type ToolEnabledPredicate<TContext = UnknownContext> = (args: {
234
234
  context: Context<TContext>;
235
- agent: Agent<any, any>; // (TODO): why take an agent here? other options?
235
+ agent: BaseAgent<TContext>;
236
236
  }) => boolean | Promise<boolean>;
237
237
 
238
238
  type ToolEnabledOption<Context = UnknownContext> =