mojentic 1.2.4 → 1.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/agents/async-dispatcher.d.ts +1 -0
- package/dist/agents/async-dispatcher.d.ts.map +1 -1
- package/dist/agents/async-dispatcher.js +6 -1
- package/dist/agents/async-dispatcher.js.map +1 -1
- package/dist/agents/simple-recursive-agent.d.ts +8 -0
- package/dist/agents/simple-recursive-agent.d.ts.map +1 -1
- package/dist/agents/simple-recursive-agent.js +35 -5
- package/dist/agents/simple-recursive-agent.js.map +1 -1
- package/dist/context/shared-working-memory.js +1 -1
- package/dist/context/shared-working-memory.js.map +1 -1
- package/dist/index.d.ts +1 -0
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +1 -0
- package/dist/index.js.map +1 -1
- package/dist/llm/broker.d.ts +10 -4
- package/dist/llm/broker.d.ts.map +1 -1
- package/dist/llm/broker.js +78 -68
- package/dist/llm/broker.js.map +1 -1
- package/dist/llm/gateways/openai-messages-adapter.js +3 -3
- package/dist/llm/gateways/openai-messages-adapter.js.map +1 -1
- package/dist/llm/gateways/openai-model-registry.d.ts.map +1 -1
- package/dist/llm/gateways/openai-model-registry.js +34 -1
- package/dist/llm/gateways/openai-model-registry.js.map +1 -1
- package/dist/llm/models.d.ts +2 -0
- package/dist/llm/models.d.ts.map +1 -1
- package/dist/llm/tools/index.d.ts +1 -0
- package/dist/llm/tools/index.d.ts.map +1 -1
- package/dist/llm/tools/index.js +1 -0
- package/dist/llm/tools/index.js.map +1 -1
- package/dist/llm/tools/runner.d.ts +89 -0
- package/dist/llm/tools/runner.d.ts.map +1 -0
- package/dist/llm/tools/runner.js +130 -0
- package/dist/llm/tools/runner.js.map +1 -0
- package/dist/llm/tools/tool.d.ts +18 -3
- package/dist/llm/tools/tool.d.ts.map +1 -1
- package/dist/llm/tools/tool.js.map +1 -1
- package/dist/realtime/broker.d.ts +40 -0
- package/dist/realtime/broker.d.ts.map +1 -0
- package/dist/realtime/broker.js +74 -0
- package/dist/realtime/broker.js.map +1 -0
- package/dist/realtime/config.d.ts +134 -0
- package/dist/realtime/config.d.ts.map +1 -0
- package/dist/realtime/config.js +23 -0
- package/dist/realtime/config.js.map +1 -0
- package/dist/realtime/events.d.ts +146 -0
- package/dist/realtime/events.d.ts.map +1 -0
- package/dist/realtime/events.js +10 -0
- package/dist/realtime/events.js.map +1 -0
- package/dist/realtime/gateway.d.ts +48 -0
- package/dist/realtime/gateway.d.ts.map +1 -0
- package/dist/realtime/gateway.js +9 -0
- package/dist/realtime/gateway.js.map +1 -0
- package/dist/realtime/index.d.ts +14 -0
- package/dist/realtime/index.d.ts.map +1 -0
- package/dist/realtime/index.js +30 -0
- package/dist/realtime/index.js.map +1 -0
- package/dist/realtime/openai-gateway.d.ts +39 -0
- package/dist/realtime/openai-gateway.d.ts.map +1 -0
- package/dist/realtime/openai-gateway.js +154 -0
- package/dist/realtime/openai-gateway.js.map +1 -0
- package/dist/realtime/schemas.d.ts +333 -0
- package/dist/realtime/schemas.d.ts.map +1 -0
- package/dist/realtime/schemas.js +243 -0
- package/dist/realtime/schemas.js.map +1 -0
- package/dist/realtime/session.d.ts +115 -0
- package/dist/realtime/session.d.ts.map +1 -0
- package/dist/realtime/session.js +715 -0
- package/dist/realtime/session.js.map +1 -0
- package/dist/realtime/transport.d.ts +87 -0
- package/dist/realtime/transport.d.ts.map +1 -0
- package/dist/realtime/transport.js +115 -0
- package/dist/realtime/transport.js.map +1 -0
- package/dist/tracer/tracerEvents.d.ts +23 -0
- package/dist/tracer/tracerEvents.d.ts.map +1 -1
- package/dist/tracer/tracerEvents.js +40 -1
- package/dist/tracer/tracerEvents.js.map +1 -1
- package/dist/tracer/tracerSystem.d.ts +16 -0
- package/dist/tracer/tracerSystem.d.ts.map +1 -1
- package/dist/tracer/tracerSystem.js +22 -0
- package/dist/tracer/tracerSystem.js.map +1 -1
- package/package.json +27 -14
|
@@ -0,0 +1,715 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* Realtime session — the stateful surface exposed by {@link RealtimeVoiceBroker}.
|
|
4
|
+
*
|
|
5
|
+
* Owns the socket lifetime, demultiplexes raw server events into a
|
|
6
|
+
* vendor-neutral {@link RealtimeEvent} stream, and drives parallel tool
|
|
7
|
+
* execution per response turn.
|
|
8
|
+
*/
|
|
9
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
10
|
+
exports.RealtimeSession = void 0;
|
|
11
|
+
exports.encodeBase64Pcm16 = encodeBase64Pcm16;
|
|
12
|
+
exports.buildSessionUpdate = buildSessionUpdate;
|
|
13
|
+
const crypto_1 = require("crypto");
|
|
14
|
+
const error_1 = require("../error");
|
|
15
|
+
const tools_1 = require("../llm/tools");
|
|
16
|
+
const config_1 = require("./config");
|
|
17
|
+
const TOOL_BATCH_SOURCE = 'RealtimeVoiceBroker';
|
|
18
|
+
function makeChannel() {
|
|
19
|
+
const queue = [];
|
|
20
|
+
const waiters = [];
|
|
21
|
+
let done = false;
|
|
22
|
+
return {
|
|
23
|
+
push: (value) => {
|
|
24
|
+
if (done)
|
|
25
|
+
return;
|
|
26
|
+
const waiter = waiters.shift();
|
|
27
|
+
if (waiter) {
|
|
28
|
+
waiter({ value, done: false });
|
|
29
|
+
}
|
|
30
|
+
else {
|
|
31
|
+
queue.push(value);
|
|
32
|
+
}
|
|
33
|
+
},
|
|
34
|
+
end: () => {
|
|
35
|
+
if (done)
|
|
36
|
+
return;
|
|
37
|
+
done = true;
|
|
38
|
+
while (waiters.length > 0) {
|
|
39
|
+
const waiter = waiters.shift();
|
|
40
|
+
waiter?.({ value: undefined, done: true });
|
|
41
|
+
}
|
|
42
|
+
},
|
|
43
|
+
iter: async function* () {
|
|
44
|
+
while (true) {
|
|
45
|
+
if (queue.length > 0) {
|
|
46
|
+
yield queue.shift();
|
|
47
|
+
continue;
|
|
48
|
+
}
|
|
49
|
+
if (done)
|
|
50
|
+
return;
|
|
51
|
+
const next = await new Promise((resolve) => {
|
|
52
|
+
waiters.push(resolve);
|
|
53
|
+
});
|
|
54
|
+
if (next.done)
|
|
55
|
+
return;
|
|
56
|
+
yield next.value;
|
|
57
|
+
}
|
|
58
|
+
},
|
|
59
|
+
};
|
|
60
|
+
}
|
|
61
|
+
/**
|
|
62
|
+
* Decode a base64 string into an `Int16Array` of PCM samples (little-endian).
|
|
63
|
+
*/
|
|
64
|
+
function decodeBase64Pcm16(b64) {
|
|
65
|
+
const buf = Buffer.from(b64, 'base64');
|
|
66
|
+
// Buffer is a Node Uint8Array view; copy into an Int16Array of the right length.
|
|
67
|
+
const samples = new Int16Array(buf.length / 2);
|
|
68
|
+
for (let i = 0; i < samples.length; i++) {
|
|
69
|
+
// eslint-disable-next-line security/detect-object-injection -- bounded index
|
|
70
|
+
samples[i] = buf.readInt16LE(i * 2);
|
|
71
|
+
}
|
|
72
|
+
return samples;
|
|
73
|
+
}
|
|
74
|
+
/**
|
|
75
|
+
* Encode an `Int16Array` or `Uint8Array` of PCM samples as base64.
|
|
76
|
+
*/
|
|
77
|
+
function encodeBase64Pcm16(frame) {
|
|
78
|
+
if (frame instanceof Int16Array) {
|
|
79
|
+
const buf = Buffer.alloc(frame.length * 2);
|
|
80
|
+
for (let i = 0; i < frame.length; i++) {
|
|
81
|
+
// eslint-disable-next-line security/detect-object-injection -- bounded index
|
|
82
|
+
buf.writeInt16LE(frame[i], i * 2);
|
|
83
|
+
}
|
|
84
|
+
return buf.toString('base64');
|
|
85
|
+
}
|
|
86
|
+
return Buffer.from(frame).toString('base64');
|
|
87
|
+
}
|
|
88
|
+
/**
|
|
89
|
+
* Build a vendor-specific `session.update` payload from a vendor-neutral
|
|
90
|
+
* config, matching the OpenAI Realtime GA shape:
|
|
91
|
+
*
|
|
92
|
+
* - `session.type: "realtime"` is required.
|
|
93
|
+
* - Vendor-neutral `modalities` maps to GA `output_modalities`.
|
|
94
|
+
* - Voice + output audio format live under `session.audio.output`.
|
|
95
|
+
* - Turn detection, transcription, and input audio format live under
|
|
96
|
+
* `session.audio.input`.
|
|
97
|
+
* - `temperature` is no longer accepted on GA — silently dropped.
|
|
98
|
+
* - `maxResponseOutputTokens` maps to GA `max_output_tokens`.
|
|
99
|
+
*/
|
|
100
|
+
function buildSessionUpdate(config) {
|
|
101
|
+
const modalities = config.modalities ?? config_1.REALTIME_DEFAULTS.modalities;
|
|
102
|
+
const turnDetection = config.turnDetection ?? config_1.REALTIME_DEFAULTS.turnDetection;
|
|
103
|
+
// GA splits modalities into output-only.
|
|
104
|
+
const outputModalities = modalities.includes('audio') ? ['audio'] : ['text'];
|
|
105
|
+
const audioInput = {
|
|
106
|
+
format: encodeAudioFormat(config.inputAudioFormat ?? config_1.REALTIME_DEFAULTS.inputAudioFormat),
|
|
107
|
+
turn_detection: encodeTurnDetection(turnDetection),
|
|
108
|
+
};
|
|
109
|
+
if (config.inputAudioTranscription === false) {
|
|
110
|
+
audioInput.transcription = null;
|
|
111
|
+
}
|
|
112
|
+
else if (config.inputAudioTranscription !== undefined) {
|
|
113
|
+
audioInput.transcription = config.inputAudioTranscription;
|
|
114
|
+
}
|
|
115
|
+
const audioOutput = {
|
|
116
|
+
format: encodeAudioFormat(config.outputAudioFormat ?? config_1.REALTIME_DEFAULTS.outputAudioFormat),
|
|
117
|
+
};
|
|
118
|
+
if (config.voice !== undefined)
|
|
119
|
+
audioOutput.voice = config.voice;
|
|
120
|
+
const session = {
|
|
121
|
+
type: 'realtime',
|
|
122
|
+
output_modalities: outputModalities,
|
|
123
|
+
audio: { input: audioInput, output: audioOutput },
|
|
124
|
+
tool_choice: encodeToolChoice(config.toolChoice ?? config_1.REALTIME_DEFAULTS.toolChoice),
|
|
125
|
+
};
|
|
126
|
+
if (config.instructions !== undefined)
|
|
127
|
+
session.instructions = config.instructions;
|
|
128
|
+
if (config.maxResponseOutputTokens !== undefined) {
|
|
129
|
+
session.max_output_tokens = config.maxResponseOutputTokens;
|
|
130
|
+
}
|
|
131
|
+
if (config.tools && config.tools.length > 0) {
|
|
132
|
+
session.tools = config.tools.map((tool) => {
|
|
133
|
+
const d = tool.descriptor();
|
|
134
|
+
return {
|
|
135
|
+
type: 'function',
|
|
136
|
+
name: d.function.name,
|
|
137
|
+
description: d.function.description,
|
|
138
|
+
parameters: d.function.parameters,
|
|
139
|
+
};
|
|
140
|
+
});
|
|
141
|
+
}
|
|
142
|
+
if (config.providerExtras) {
|
|
143
|
+
Object.assign(session, config.providerExtras);
|
|
144
|
+
}
|
|
145
|
+
return { type: 'session.update', session };
|
|
146
|
+
}
|
|
147
|
+
function encodeAudioFormat(fmt) {
|
|
148
|
+
switch (fmt) {
|
|
149
|
+
case 'pcm16':
|
|
150
|
+
return { type: 'audio/pcm', rate: 24000 };
|
|
151
|
+
case 'g711_ulaw':
|
|
152
|
+
return { type: 'audio/pcmu' };
|
|
153
|
+
case 'g711_alaw':
|
|
154
|
+
return { type: 'audio/pcma' };
|
|
155
|
+
}
|
|
156
|
+
}
|
|
157
|
+
function encodeTurnDetection(td) {
|
|
158
|
+
if (td === 'none')
|
|
159
|
+
return null;
|
|
160
|
+
if (td === 'server_vad')
|
|
161
|
+
return { type: 'server_vad' };
|
|
162
|
+
if (td === 'semantic_vad')
|
|
163
|
+
return { type: 'semantic_vad' };
|
|
164
|
+
if (typeof td === 'object' && td.type === 'semantic_vad') {
|
|
165
|
+
const cfg = td;
|
|
166
|
+
return stripUndefined({
|
|
167
|
+
type: 'semantic_vad',
|
|
168
|
+
eagerness: cfg.eagerness,
|
|
169
|
+
create_response: cfg.createResponse,
|
|
170
|
+
interrupt_response: cfg.interruptResponse,
|
|
171
|
+
});
|
|
172
|
+
}
|
|
173
|
+
const cfg = td;
|
|
174
|
+
return stripUndefined({
|
|
175
|
+
type: 'server_vad',
|
|
176
|
+
threshold: cfg.threshold,
|
|
177
|
+
prefix_padding_ms: cfg.prefixPaddingMs,
|
|
178
|
+
silence_duration_ms: cfg.silenceDurationMs,
|
|
179
|
+
create_response: cfg.createResponse,
|
|
180
|
+
interrupt_response: cfg.interruptResponse,
|
|
181
|
+
idle_timeout_ms: cfg.idleTimeoutMs,
|
|
182
|
+
});
|
|
183
|
+
}
|
|
184
|
+
function stripUndefined(obj) {
|
|
185
|
+
const out = {};
|
|
186
|
+
for (const [k, v] of Object.entries(obj)) {
|
|
187
|
+
// eslint-disable-next-line security/detect-object-injection -- k comes from Object.entries on a controlled literal
|
|
188
|
+
if (v !== undefined)
|
|
189
|
+
out[k] = v;
|
|
190
|
+
}
|
|
191
|
+
return out;
|
|
192
|
+
}
|
|
193
|
+
function encodeToolChoice(choice) {
|
|
194
|
+
if (choice === undefined)
|
|
195
|
+
return 'auto';
|
|
196
|
+
if (typeof choice === 'string')
|
|
197
|
+
return choice;
|
|
198
|
+
return { type: 'function', name: choice.name };
|
|
199
|
+
}
|
|
200
|
+
/**
|
|
201
|
+
* Stateful realtime session handle.
|
|
202
|
+
*
|
|
203
|
+
* Constructed by {@link RealtimeVoiceBroker}; users don't instantiate this
|
|
204
|
+
* directly.
|
|
205
|
+
*/
|
|
206
|
+
class RealtimeSession {
|
|
207
|
+
gatewaySession;
|
|
208
|
+
config;
|
|
209
|
+
tools;
|
|
210
|
+
toolRunner;
|
|
211
|
+
tracer;
|
|
212
|
+
correlationId;
|
|
213
|
+
normalizedChannel = makeChannel();
|
|
214
|
+
rawChannel = makeChannel();
|
|
215
|
+
audioChannel = makeChannel();
|
|
216
|
+
pumpPromise;
|
|
217
|
+
pendingBatches = new Set();
|
|
218
|
+
currentTurn;
|
|
219
|
+
currentResponseId;
|
|
220
|
+
closed = false;
|
|
221
|
+
currentInstructions;
|
|
222
|
+
constructor(gatewaySession, options) {
|
|
223
|
+
this.gatewaySession = gatewaySession;
|
|
224
|
+
this.config = options.config;
|
|
225
|
+
this.tools = options.config.tools ?? [];
|
|
226
|
+
this.toolRunner = options.toolRunner ?? new tools_1.ParallelToolRunner();
|
|
227
|
+
this.tracer = options.tracer;
|
|
228
|
+
this.correlationId = options.correlationId ?? (0, crypto_1.randomUUID)();
|
|
229
|
+
this.currentInstructions = options.config.instructions;
|
|
230
|
+
this.normalizedChannel.push({
|
|
231
|
+
kind: 'session_opened',
|
|
232
|
+
sessionId: gatewaySession.sessionId,
|
|
233
|
+
});
|
|
234
|
+
this.pumpPromise = this.pump();
|
|
235
|
+
}
|
|
236
|
+
/**
|
|
237
|
+
* Initialise the session by sending the `session.update`. Called once by
|
|
238
|
+
* the broker before returning the session to the caller.
|
|
239
|
+
*/
|
|
240
|
+
async initialise() {
|
|
241
|
+
const payload = buildSessionUpdate(this.config);
|
|
242
|
+
return this.gatewaySession.sendEvent(payload);
|
|
243
|
+
}
|
|
244
|
+
/** Vendor-neutral event stream. Terminates when the session closes. */
|
|
245
|
+
events() {
|
|
246
|
+
return this.normalizedChannel.iter();
|
|
247
|
+
}
|
|
248
|
+
/** Raw server events for power users / debugging. */
|
|
249
|
+
rawEvents() {
|
|
250
|
+
return this.rawChannel.iter();
|
|
251
|
+
}
|
|
252
|
+
/** Async generator yielding PCM frames from the assistant. */
|
|
253
|
+
audioOutput() {
|
|
254
|
+
return this.audioChannel.iter();
|
|
255
|
+
}
|
|
256
|
+
/**
|
|
257
|
+
* Send a text-mode user message. Use this in text-only sessions and tests.
|
|
258
|
+
*/
|
|
259
|
+
async sendText(text) {
|
|
260
|
+
const create = await this.gatewaySession.sendEvent({
|
|
261
|
+
type: 'conversation.item.create',
|
|
262
|
+
item: {
|
|
263
|
+
type: 'message',
|
|
264
|
+
role: 'user',
|
|
265
|
+
content: [{ type: 'input_text', text }],
|
|
266
|
+
},
|
|
267
|
+
});
|
|
268
|
+
if (!(0, error_1.isOk)(create))
|
|
269
|
+
return create;
|
|
270
|
+
return this.gatewaySession.sendEvent({ type: 'response.create' });
|
|
271
|
+
}
|
|
272
|
+
/**
|
|
273
|
+
* Pipe an async iterable of PCM frames into the input audio buffer.
|
|
274
|
+
*
|
|
275
|
+
* Frames are encoded as base64 pcm16 and appended to the server VAD buffer.
|
|
276
|
+
* Returns once the iterable completes or the session closes.
|
|
277
|
+
*/
|
|
278
|
+
async sendAudio(stream) {
|
|
279
|
+
try {
|
|
280
|
+
for await (const frame of stream) {
|
|
281
|
+
if (this.closed)
|
|
282
|
+
return (0, error_1.Ok)(undefined);
|
|
283
|
+
const append = await this.gatewaySession.sendEvent({
|
|
284
|
+
type: 'input_audio_buffer.append',
|
|
285
|
+
audio: encodeBase64Pcm16(frame),
|
|
286
|
+
});
|
|
287
|
+
if (!(0, error_1.isOk)(append))
|
|
288
|
+
return append;
|
|
289
|
+
}
|
|
290
|
+
return (0, error_1.Ok)(undefined);
|
|
291
|
+
}
|
|
292
|
+
catch (err) {
|
|
293
|
+
return (0, error_1.Err)(err instanceof Error ? err : new Error(String(err)));
|
|
294
|
+
}
|
|
295
|
+
}
|
|
296
|
+
/**
|
|
297
|
+
* Manually commit the input audio buffer (only meaningful with
|
|
298
|
+
* `turnDetection: 'none'`) and request a response.
|
|
299
|
+
*/
|
|
300
|
+
async commitAudio() {
|
|
301
|
+
const commit = await this.gatewaySession.sendEvent({
|
|
302
|
+
type: 'input_audio_buffer.commit',
|
|
303
|
+
});
|
|
304
|
+
if (!(0, error_1.isOk)(commit))
|
|
305
|
+
return commit;
|
|
306
|
+
return this.gatewaySession.sendEvent({ type: 'response.create' });
|
|
307
|
+
}
|
|
308
|
+
/**
|
|
309
|
+
* Cancel the in-flight response, abort any in-flight tool execution, and
|
|
310
|
+
* fire an `interrupted` event with `reason: 'manual'`.
|
|
311
|
+
*/
|
|
312
|
+
async interrupt() {
|
|
313
|
+
return this.cancelCurrentTurn('manual');
|
|
314
|
+
}
|
|
315
|
+
/**
|
|
316
|
+
* Update the instructions used by future assistant turns. The change is
|
|
317
|
+
* sent immediately via a `session.update`.
|
|
318
|
+
*/
|
|
319
|
+
async updateInstructions(instructions) {
|
|
320
|
+
this.currentInstructions = instructions;
|
|
321
|
+
return this.gatewaySession.sendEvent({
|
|
322
|
+
type: 'session.update',
|
|
323
|
+
session: { instructions },
|
|
324
|
+
});
|
|
325
|
+
}
|
|
326
|
+
/** Close the session, dispose the socket, and end all event streams. */
|
|
327
|
+
async close() {
|
|
328
|
+
if (this.closed)
|
|
329
|
+
return;
|
|
330
|
+
this.closed = true;
|
|
331
|
+
if (this.currentTurn && !this.currentTurn.cancelled) {
|
|
332
|
+
this.currentTurn.cancelled = true;
|
|
333
|
+
this.currentTurn.toolAbort.abort();
|
|
334
|
+
}
|
|
335
|
+
await Promise.allSettled(Array.from(this.pendingBatches));
|
|
336
|
+
await this.gatewaySession.close();
|
|
337
|
+
this.normalizedChannel.push({ kind: 'session_closed', reason: 'client' });
|
|
338
|
+
this.normalizedChannel.end();
|
|
339
|
+
this.rawChannel.end();
|
|
340
|
+
this.audioChannel.end();
|
|
341
|
+
await this.pumpPromise.catch(() => {
|
|
342
|
+
/* swallow — close should never throw */
|
|
343
|
+
});
|
|
344
|
+
}
|
|
345
|
+
/** Symbol.asyncDispose support for `await using` syntax. */
|
|
346
|
+
[Symbol.asyncDispose]() {
|
|
347
|
+
return this.close();
|
|
348
|
+
}
|
|
349
|
+
/** Effective instructions used in the most recent `session.update`. */
|
|
350
|
+
getInstructions() {
|
|
351
|
+
return this.currentInstructions;
|
|
352
|
+
}
|
|
353
|
+
// ---------------------------------------------------------------------------
|
|
354
|
+
// Event pump
|
|
355
|
+
// ---------------------------------------------------------------------------
|
|
356
|
+
async pump() {
|
|
357
|
+
const emit = (event) => this.normalizedChannel.push(event);
|
|
358
|
+
try {
|
|
359
|
+
for await (const raw of this.gatewaySession.events()) {
|
|
360
|
+
this.rawChannel.push(raw);
|
|
361
|
+
await this.handleServerEvent(raw, emit);
|
|
362
|
+
if (this.closed)
|
|
363
|
+
break;
|
|
364
|
+
}
|
|
365
|
+
}
|
|
366
|
+
finally {
|
|
367
|
+
if (!this.closed) {
|
|
368
|
+
this.normalizedChannel.push({ kind: 'session_closed', reason: 'server' });
|
|
369
|
+
this.normalizedChannel.end();
|
|
370
|
+
this.rawChannel.end();
|
|
371
|
+
this.audioChannel.end();
|
|
372
|
+
this.closed = true;
|
|
373
|
+
}
|
|
374
|
+
}
|
|
375
|
+
}
|
|
376
|
+
async handleServerEvent(raw, emit) {
|
|
377
|
+
switch (raw.type) {
|
|
378
|
+
case 'session.created':
|
|
379
|
+
// session_opened already emitted on construction; ignore.
|
|
380
|
+
return;
|
|
381
|
+
case 'session.updated':
|
|
382
|
+
emit({ kind: 'session_updated', config: { instructions: this.currentInstructions } });
|
|
383
|
+
return;
|
|
384
|
+
case 'input_audio_buffer.speech_started': {
|
|
385
|
+
const ev = raw;
|
|
386
|
+
emit({ kind: 'user_speech_started', atMs: ev.audio_start_ms ?? Date.now() });
|
|
387
|
+
// Barge-in: cancel any in-flight assistant response.
|
|
388
|
+
if (this.currentTurn && !this.currentTurn.cancelled) {
|
|
389
|
+
await this.cancelCurrentTurn('barge_in');
|
|
390
|
+
}
|
|
391
|
+
return;
|
|
392
|
+
}
|
|
393
|
+
case 'input_audio_buffer.speech_stopped': {
|
|
394
|
+
const ev = raw;
|
|
395
|
+
emit({ kind: 'user_speech_stopped', atMs: ev.audio_end_ms ?? Date.now() });
|
|
396
|
+
return;
|
|
397
|
+
}
|
|
398
|
+
case 'conversation.item.input_audio_transcription.delta': {
|
|
399
|
+
const ev = raw;
|
|
400
|
+
emit({ kind: 'user_transcript_delta', itemId: ev.item_id, delta: ev.delta });
|
|
401
|
+
return;
|
|
402
|
+
}
|
|
403
|
+
case 'conversation.item.input_audio_transcription.completed': {
|
|
404
|
+
const ev = raw;
|
|
405
|
+
emit({ kind: 'user_transcript', itemId: ev.item_id, text: ev.transcript });
|
|
406
|
+
return;
|
|
407
|
+
}
|
|
408
|
+
case 'response.created': {
|
|
409
|
+
const ev = raw;
|
|
410
|
+
const turnId = ev.response.id;
|
|
411
|
+
this.currentResponseId = turnId;
|
|
412
|
+
this.currentTurn = {
|
|
413
|
+
turnId,
|
|
414
|
+
calls: new Map(),
|
|
415
|
+
textBuffer: '',
|
|
416
|
+
transcriptBuffer: '',
|
|
417
|
+
cancelled: false,
|
|
418
|
+
toolAbort: new AbortController(),
|
|
419
|
+
};
|
|
420
|
+
emit({ kind: 'assistant_turn_started', turnId });
|
|
421
|
+
return;
|
|
422
|
+
}
|
|
423
|
+
case 'response.output_item.added': {
|
|
424
|
+
const ev = raw;
|
|
425
|
+
if (ev.item.type === 'function_call' && ev.item.call_id && ev.item.name) {
|
|
426
|
+
const turn = this.currentTurn;
|
|
427
|
+
if (!turn)
|
|
428
|
+
return;
|
|
429
|
+
turn.calls.set(ev.item.call_id, {
|
|
430
|
+
callId: ev.item.call_id,
|
|
431
|
+
itemId: ev.item.id,
|
|
432
|
+
name: ev.item.name,
|
|
433
|
+
argsBuffer: '',
|
|
434
|
+
done: false,
|
|
435
|
+
});
|
|
436
|
+
emit({
|
|
437
|
+
kind: 'tool_call_started',
|
|
438
|
+
turnId: turn.turnId,
|
|
439
|
+
callId: ev.item.call_id,
|
|
440
|
+
name: ev.item.name,
|
|
441
|
+
});
|
|
442
|
+
}
|
|
443
|
+
return;
|
|
444
|
+
}
|
|
445
|
+
case 'response.function_call_arguments.delta': {
|
|
446
|
+
const ev = raw;
|
|
447
|
+
const turn = this.currentTurn;
|
|
448
|
+
const call = turn?.calls.get(ev.call_id);
|
|
449
|
+
if (call) {
|
|
450
|
+
call.argsBuffer += ev.delta;
|
|
451
|
+
}
|
|
452
|
+
emit({ kind: 'tool_call_args_delta', callId: ev.call_id, delta: ev.delta });
|
|
453
|
+
return;
|
|
454
|
+
}
|
|
455
|
+
case 'response.function_call_arguments.done': {
|
|
456
|
+
const ev = raw;
|
|
457
|
+
const turn = this.currentTurn;
|
|
458
|
+
const call = turn?.calls.get(ev.call_id);
|
|
459
|
+
if (call) {
|
|
460
|
+
call.argsBuffer = ev.arguments || call.argsBuffer;
|
|
461
|
+
call.done = true;
|
|
462
|
+
}
|
|
463
|
+
return;
|
|
464
|
+
}
|
|
465
|
+
case 'response.text.delta':
|
|
466
|
+
case 'response.output_text.delta': {
|
|
467
|
+
const ev = raw;
|
|
468
|
+
const turn = this.currentTurn;
|
|
469
|
+
if (!turn)
|
|
470
|
+
return;
|
|
471
|
+
turn.textBuffer += ev.delta;
|
|
472
|
+
emit({ kind: 'assistant_text_delta', turnId: turn.turnId, delta: ev.delta });
|
|
473
|
+
return;
|
|
474
|
+
}
|
|
475
|
+
case 'response.text.done':
|
|
476
|
+
case 'response.output_text.done': {
|
|
477
|
+
const ev = raw;
|
|
478
|
+
const turn = this.currentTurn;
|
|
479
|
+
if (!turn)
|
|
480
|
+
return;
|
|
481
|
+
emit({ kind: 'assistant_text', turnId: turn.turnId, text: ev.text });
|
|
482
|
+
return;
|
|
483
|
+
}
|
|
484
|
+
case 'response.audio_transcript.delta':
|
|
485
|
+
case 'response.output_audio_transcript.delta': {
|
|
486
|
+
const ev = raw;
|
|
487
|
+
const turn = this.currentTurn;
|
|
488
|
+
if (!turn)
|
|
489
|
+
return;
|
|
490
|
+
turn.transcriptBuffer += ev.delta;
|
|
491
|
+
emit({ kind: 'assistant_transcript_delta', turnId: turn.turnId, delta: ev.delta });
|
|
492
|
+
return;
|
|
493
|
+
}
|
|
494
|
+
case 'response.audio_transcript.done':
|
|
495
|
+
case 'response.output_audio_transcript.done': {
|
|
496
|
+
const ev = raw;
|
|
497
|
+
const turn = this.currentTurn;
|
|
498
|
+
if (!turn)
|
|
499
|
+
return;
|
|
500
|
+
emit({ kind: 'assistant_transcript', turnId: turn.turnId, text: ev.transcript });
|
|
501
|
+
return;
|
|
502
|
+
}
|
|
503
|
+
case 'response.audio.delta':
|
|
504
|
+
case 'response.output_audio.delta': {
|
|
505
|
+
const ev = raw;
|
|
506
|
+
const turn = this.currentTurn;
|
|
507
|
+
if (!turn)
|
|
508
|
+
return;
|
|
509
|
+
const pcm = decodeBase64Pcm16(ev.delta);
|
|
510
|
+
this.audioChannel.push(pcm);
|
|
511
|
+
emit({ kind: 'assistant_audio_delta', turnId: turn.turnId, pcm });
|
|
512
|
+
return;
|
|
513
|
+
}
|
|
514
|
+
case 'response.done': {
|
|
515
|
+
const ev = raw;
|
|
516
|
+
const turn = this.currentTurn;
|
|
517
|
+
if (!turn || turn.turnId !== ev.response.id) {
|
|
518
|
+
this.currentTurn = undefined;
|
|
519
|
+
return;
|
|
520
|
+
}
|
|
521
|
+
const usage = ev.response.usage;
|
|
522
|
+
const usageOut = usage
|
|
523
|
+
? {
|
|
524
|
+
promptTokens: usage.input_tokens,
|
|
525
|
+
completionTokens: usage.output_tokens,
|
|
526
|
+
totalTokens: usage.total_tokens,
|
|
527
|
+
extras: {
|
|
528
|
+
input_token_details: usage.input_token_details,
|
|
529
|
+
output_token_details: usage.output_token_details,
|
|
530
|
+
},
|
|
531
|
+
}
|
|
532
|
+
: undefined;
|
|
533
|
+
emit({ kind: 'assistant_turn_completed', turnId: turn.turnId, usage: usageOut });
|
|
534
|
+
if (turn.calls.size > 0) {
|
|
535
|
+
// Fire-and-track: the pump must stay responsive to barge-in /
|
|
536
|
+
// server events while tools are running. We keep `currentTurn`
|
|
537
|
+
// set during execution so a late `speech_started` or manual
|
|
538
|
+
// `interrupt()` can still abort the batch.
|
|
539
|
+
const batchPromise = this.runToolBatchForTurn(turn, emit).catch((err) => {
|
|
540
|
+
emit({
|
|
541
|
+
kind: 'error',
|
|
542
|
+
error: err instanceof Error ? err : new Error(String(err)),
|
|
543
|
+
recoverable: true,
|
|
544
|
+
});
|
|
545
|
+
});
|
|
546
|
+
this.pendingBatches.add(batchPromise);
|
|
547
|
+
batchPromise.finally(() => {
|
|
548
|
+
this.pendingBatches.delete(batchPromise);
|
|
549
|
+
if (this.currentTurn === turn) {
|
|
550
|
+
this.currentTurn = undefined;
|
|
551
|
+
this.currentResponseId = undefined;
|
|
552
|
+
}
|
|
553
|
+
});
|
|
554
|
+
}
|
|
555
|
+
else {
|
|
556
|
+
this.currentTurn = undefined;
|
|
557
|
+
this.currentResponseId = undefined;
|
|
558
|
+
}
|
|
559
|
+
return;
|
|
560
|
+
}
|
|
561
|
+
case 'rate_limits.updated': {
|
|
562
|
+
const ev = raw;
|
|
563
|
+
const first = ev.rate_limits[0] ?? {};
|
|
564
|
+
const reset = first.reset_seconds ?? 0;
|
|
565
|
+
emit({
|
|
566
|
+
kind: 'rate_limited',
|
|
567
|
+
resetMs: Math.round(reset * 1000),
|
|
568
|
+
details: { rate_limits: ev.rate_limits },
|
|
569
|
+
});
|
|
570
|
+
return;
|
|
571
|
+
}
|
|
572
|
+
case 'error': {
|
|
573
|
+
const ev = raw;
|
|
574
|
+
// Suppress the expected race when `response.cancel` lands after the
|
|
575
|
+
// response has already completed server-side — common during barge-in.
|
|
576
|
+
if (/no active response/i.test(ev.error.message)) {
|
|
577
|
+
return;
|
|
578
|
+
}
|
|
579
|
+
emit({
|
|
580
|
+
kind: 'error',
|
|
581
|
+
error: new Error(ev.error.message),
|
|
582
|
+
recoverable: ev.error.type !== 'session_error',
|
|
583
|
+
});
|
|
584
|
+
return;
|
|
585
|
+
}
|
|
586
|
+
default:
|
|
587
|
+
// Unknown event types stay in rawEvents() only.
|
|
588
|
+
return;
|
|
589
|
+
}
|
|
590
|
+
}
|
|
591
|
+
// ---------------------------------------------------------------------------
|
|
592
|
+
// Tool batch execution
|
|
593
|
+
// ---------------------------------------------------------------------------
|
|
594
|
+
async runToolBatchForTurn(turn, emit) {
|
|
595
|
+
const calls = Array.from(turn.calls.values()).filter((c) => c.done || c.argsBuffer.length > 0);
|
|
596
|
+
const executions = [];
|
|
597
|
+
for (const call of calls) {
|
|
598
|
+
try {
|
|
599
|
+
const args = (call.argsBuffer ? JSON.parse(call.argsBuffer) : {});
|
|
600
|
+
call.parsedArgs = args;
|
|
601
|
+
executions.push({ id: call.callId, name: call.name, args });
|
|
602
|
+
}
|
|
603
|
+
catch (err) {
|
|
604
|
+
const error = err instanceof Error ? err : new Error(String(err));
|
|
605
|
+
emit({ kind: 'tool_call_failed', callId: call.callId, name: call.name, error });
|
|
606
|
+
}
|
|
607
|
+
}
|
|
608
|
+
if (executions.length === 0) {
|
|
609
|
+
return;
|
|
610
|
+
}
|
|
611
|
+
const batchStart = Date.now();
|
|
612
|
+
const outcomes = await this.toolRunner.runBatch(executions, this.tools, {
|
|
613
|
+
signal: turn.toolAbort.signal,
|
|
614
|
+
correlationId: this.correlationId,
|
|
615
|
+
source: TOOL_BATCH_SOURCE,
|
|
616
|
+
onCallStart: (call) => {
|
|
617
|
+
emit({
|
|
618
|
+
kind: 'tool_call_dispatched',
|
|
619
|
+
callId: call.id,
|
|
620
|
+
name: call.name,
|
|
621
|
+
args: call.args,
|
|
622
|
+
});
|
|
623
|
+
},
|
|
624
|
+
onCallComplete: (outcome) => {
|
|
625
|
+
if (this.tracer) {
|
|
626
|
+
const args = executions.find((e) => e.id === outcome.id)?.args ?? {};
|
|
627
|
+
this.tracer.recordToolCall(outcome.name, args, outcome.ok ? outcome.result : { error: outcome.error.message }, TOOL_BATCH_SOURCE, outcome.durationMs, this.correlationId, 'RealtimeSession.toolBatch');
|
|
628
|
+
}
|
|
629
|
+
if (outcome.ok) {
|
|
630
|
+
emit({
|
|
631
|
+
kind: 'tool_call_completed',
|
|
632
|
+
callId: outcome.id,
|
|
633
|
+
name: outcome.name,
|
|
634
|
+
result: outcome.result,
|
|
635
|
+
});
|
|
636
|
+
}
|
|
637
|
+
else {
|
|
638
|
+
emit({
|
|
639
|
+
kind: 'tool_call_failed',
|
|
640
|
+
callId: outcome.id,
|
|
641
|
+
name: outcome.name,
|
|
642
|
+
error: outcome.error,
|
|
643
|
+
});
|
|
644
|
+
}
|
|
645
|
+
},
|
|
646
|
+
});
|
|
647
|
+
if (this.tracer) {
|
|
648
|
+
const ok = outcomes.filter((o) => o.ok).length;
|
|
649
|
+
const fail = outcomes.length - ok;
|
|
650
|
+
this.tracer.recordToolBatch((0, crypto_1.randomUUID)(), executions.map((e) => e.name), ok, fail, Date.now() - batchStart, this.correlationId, 'RealtimeSession.toolBatch');
|
|
651
|
+
}
|
|
652
|
+
const policy = this.config.onInterrupt ?? config_1.REALTIME_DEFAULTS.onInterrupt;
|
|
653
|
+
const toSubmit = this.selectOutputsToSubmit(turn, outcomes, policy);
|
|
654
|
+
const submittedIds = [];
|
|
655
|
+
for (const outcome of toSubmit) {
|
|
656
|
+
const output = serialiseOutput(outcome);
|
|
657
|
+
const send = await this.gatewaySession.sendEvent({
|
|
658
|
+
type: 'conversation.item.create',
|
|
659
|
+
item: {
|
|
660
|
+
type: 'function_call_output',
|
|
661
|
+
call_id: outcome.id,
|
|
662
|
+
output,
|
|
663
|
+
},
|
|
664
|
+
});
|
|
665
|
+
if ((0, error_1.isOk)(send)) {
|
|
666
|
+
submittedIds.push(outcome.id);
|
|
667
|
+
}
|
|
668
|
+
}
|
|
669
|
+
if (submittedIds.length > 0) {
|
|
670
|
+
emit({ kind: 'tool_batch_submitted', turnId: turn.turnId, callIds: submittedIds });
|
|
671
|
+
// Trigger the model's follow-up response.
|
|
672
|
+
await this.gatewaySession.sendEvent({ type: 'response.create' });
|
|
673
|
+
}
|
|
674
|
+
}
|
|
675
|
+
selectOutputsToSubmit(turn, outcomes, policy) {
|
|
676
|
+
if (!turn.cancelled)
|
|
677
|
+
return outcomes;
|
|
678
|
+
if (policy === 'submit')
|
|
679
|
+
return outcomes;
|
|
680
|
+
if (policy === 'drop')
|
|
681
|
+
return [];
|
|
682
|
+
// submit-completed-only: only outcomes that wrapped up before abort signal landed.
|
|
683
|
+
return outcomes.filter((o) => o.ok);
|
|
684
|
+
}
|
|
685
|
+
async cancelCurrentTurn(reason) {
|
|
686
|
+
const turn = this.currentTurn;
|
|
687
|
+
if (!turn || turn.cancelled) {
|
|
688
|
+
return (0, error_1.Ok)(undefined);
|
|
689
|
+
}
|
|
690
|
+
turn.cancelled = true;
|
|
691
|
+
turn.toolAbort.abort();
|
|
692
|
+
this.normalizedChannel.push({ kind: 'interrupted', turnId: turn.turnId, reason });
|
|
693
|
+
if (!this.currentResponseId)
|
|
694
|
+
return (0, error_1.Ok)(undefined);
|
|
695
|
+
return this.gatewaySession.sendEvent({ type: 'response.cancel' });
|
|
696
|
+
}
|
|
697
|
+
}
|
|
698
|
+
exports.RealtimeSession = RealtimeSession;
|
|
699
|
+
function serialiseOutput(outcome) {
|
|
700
|
+
if (outcome.ok) {
|
|
701
|
+
return safeJsonStringify(outcome.result);
|
|
702
|
+
}
|
|
703
|
+
return JSON.stringify({ error: outcome.error.message });
|
|
704
|
+
}
|
|
705
|
+
function safeJsonStringify(value) {
|
|
706
|
+
if (typeof value === 'string')
|
|
707
|
+
return value;
|
|
708
|
+
try {
|
|
709
|
+
return JSON.stringify(value);
|
|
710
|
+
}
|
|
711
|
+
catch (err) {
|
|
712
|
+
return JSON.stringify({ error: 'serialisation_failed', detail: String(err) });
|
|
713
|
+
}
|
|
714
|
+
}
|
|
715
|
+
//# sourceMappingURL=session.js.map
|