discoclaw 1.2.4 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (87) hide show
  1. package/.context/voice.md +30 -2
  2. package/.env.example +7 -3
  3. package/.env.example.full +13 -32
  4. package/README.md +1 -1
  5. package/dist/cli/dashboard.js +7 -1
  6. package/dist/cli/dashboard.test.js +0 -4
  7. package/dist/cli/init-wizard.js +4 -8
  8. package/dist/cli/init-wizard.test.js +4 -10
  9. package/dist/config.js +5 -38
  10. package/dist/config.test.js +8 -72
  11. package/dist/cron/executor.js +72 -1
  12. package/dist/dashboard/api/metrics.js +7 -0
  13. package/dist/dashboard/api/metrics.test.js +16 -0
  14. package/dist/dashboard/api/traces.js +14 -0
  15. package/dist/dashboard/api/traces.test.js +40 -0
  16. package/dist/dashboard/page.js +187 -8
  17. package/dist/dashboard/server.js +82 -19
  18. package/dist/dashboard/server.test.js +123 -10
  19. package/dist/discord/actions.js +112 -6
  20. package/dist/discord/actions.test.js +117 -1
  21. package/dist/discord/deferred-runner.js +306 -219
  22. package/dist/discord/help-command.js +1 -1
  23. package/dist/discord/message-coordinator.js +4 -36
  24. package/dist/discord/models-command.js +1 -1
  25. package/dist/discord/reaction-handler.js +83 -5
  26. package/dist/discord/reaction-handler.test.js +55 -0
  27. package/dist/discord/verify-push.js +31 -36
  28. package/dist/discord/verify-push.test.js +34 -6
  29. package/dist/discord/voice-command.js +1 -31
  30. package/dist/discord/voice-command.test.js +21 -259
  31. package/dist/discord/voice-status-command.js +3 -22
  32. package/dist/discord/voice-status-command.test.js +16 -124
  33. package/dist/discord-followup.test.js +133 -0
  34. package/dist/health/config-doctor.js +5 -27
  35. package/dist/health/config-doctor.test.js +1 -4
  36. package/dist/index.js +15 -28
  37. package/dist/observability/trace-store.js +56 -0
  38. package/dist/observability/trace-utils.js +31 -0
  39. package/dist/runtime/codex-cli.js +3 -2
  40. package/dist/runtime/codex-cli.test.js +33 -0
  41. package/dist/runtime/model-tiers.js +1 -1
  42. package/dist/runtime/model-tiers.test.js +9 -0
  43. package/dist/runtime/openai-tool-schemas.js +17 -0
  44. package/dist/runtime-overrides.js +2 -3
  45. package/dist/runtime-overrides.test.js +27 -193
  46. package/dist/tasks/store.js +10 -6
  47. package/dist/tasks/store.test.js +44 -0
  48. package/dist/tasks/task-action-executor.test.js +162 -50
  49. package/dist/tasks/task-action-mutations.js +22 -2
  50. package/dist/tasks/task-action-read-ops.js +7 -1
  51. package/dist/tasks/task-action-runner-types.js +19 -1
  52. package/dist/voice/audio-pipeline.js +183 -96
  53. package/dist/voice/audio-receiver.js +8 -0
  54. package/dist/voice/audio-receiver.test.js +16 -0
  55. package/dist/voice/conversation-buffer.js +16 -6
  56. package/dist/voice/providers/gemini-live-provider.js +481 -0
  57. package/dist/voice/providers/gemini-live-provider.test.js +834 -0
  58. package/dist/voice/providers/gemini-live-responder.js +267 -0
  59. package/dist/voice/providers/gemini-live-responder.test.js +615 -0
  60. package/dist/voice/providers/gemini-live-token-estimator.js +100 -0
  61. package/dist/voice/providers/gemini-live-token-estimator.test.js +160 -0
  62. package/dist/voice/providers/gemini-live-types.js +32 -0
  63. package/dist/voice/providers/gemini-tool-mapper.js +91 -0
  64. package/dist/voice/providers/gemini-tool-mapper.test.js +253 -0
  65. package/dist/voice/providers/index.js +3 -0
  66. package/dist/voice/voice-prompt-builder.js +26 -17
  67. package/dist/voice/voice-prompt-builder.test.js +16 -1
  68. package/docs/configuration.md +4 -9
  69. package/docs/official-docs.md +6 -9
  70. package/docs/runtime-switching.md +1 -1
  71. package/package.json +1 -1
  72. package/dist/voice/audio-pipeline.test.js +0 -619
  73. package/dist/voice/stt-deepgram.js +0 -154
  74. package/dist/voice/stt-deepgram.test.js +0 -275
  75. package/dist/voice/stt-factory.js +0 -42
  76. package/dist/voice/stt-factory.test.js +0 -45
  77. package/dist/voice/stt-openai.js +0 -156
  78. package/dist/voice/stt-openai.test.js +0 -281
  79. package/dist/voice/tts-cartesia.js +0 -169
  80. package/dist/voice/tts-cartesia.test.js +0 -228
  81. package/dist/voice/tts-deepgram.js +0 -84
  82. package/dist/voice/tts-deepgram.test.js +0 -220
  83. package/dist/voice/tts-factory.js +0 -52
  84. package/dist/voice/tts-factory.test.js +0 -53
  85. package/dist/voice/tts-openai.js +0 -70
  86. package/dist/voice/tts-openai.test.js +0 -138
  87. package/dist/voice/types.test.js +0 -84
@@ -0,0 +1,834 @@
1
+ import { describe, it, expect, vi, beforeEach } from 'vitest';
2
+ import { EventEmitter } from 'node:events';
3
+ import { GeminiLiveProvider, } from './gemini-live-provider.js';
4
+ // ---------------------------------------------------------------------------
5
+ // Mock WebSocket (ws-library style: EventEmitter with readyState)
6
+ // ---------------------------------------------------------------------------
7
+ class MockWebSocket extends EventEmitter {
8
+ static OPEN = 1;
9
+ static CLOSED = 3;
10
+ url;
11
+ readyState = MockWebSocket.OPEN;
12
+ sent = [];
13
+ constructor(url) {
14
+ super();
15
+ this.url = url;
16
+ // Auto-open on next microtask so callers can attach handlers
17
+ queueMicrotask(() => this.emit('open'));
18
+ }
19
+ send(data) {
20
+ this.sent.push(data);
21
+ }
22
+ close(code, reason) {
23
+ this.readyState = MockWebSocket.CLOSED;
24
+ // Real WebSocket emits 'close' after close() — fire on next microtask
25
+ queueMicrotask(() => this.emit('close', code ?? 1000, Buffer.from(reason ?? '')));
26
+ }
27
+ // Test helpers
28
+ _receiveMessage(data) {
29
+ this.emit('message', JSON.stringify(data));
30
+ }
31
+ _triggerClose(code = 1006) {
32
+ this.readyState = MockWebSocket.CLOSED;
33
+ this.emit('close', code, Buffer.from(''));
34
+ }
35
+ _triggerError(msg = 'test error') {
36
+ this.emit('error', new Error(msg));
37
+ }
38
+ }
39
+ // ---------------------------------------------------------------------------
40
+ // Helpers
41
+ // ---------------------------------------------------------------------------
42
+ function createLogger() {
43
+ return { info: vi.fn(), warn: vi.fn(), error: vi.fn() };
44
+ }
45
+ let lastCreatedWs = null;
46
+ function mockWsFactory(url) {
47
+ const ws = new MockWebSocket(url);
48
+ lastCreatedWs = ws;
49
+ return ws;
50
+ }
51
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
52
+ const typedWsFactory = mockWsFactory;
53
+ function makeProvider(overrides = {}) {
54
+ return new GeminiLiveProvider({
55
+ apiKey: overrides.apiKey ?? 'test-key',
56
+ log: overrides.log ?? createLogger(),
57
+ wsFactory: typedWsFactory,
58
+ ...overrides,
59
+ });
60
+ }
61
+ /** Simulate a successful setup by sending setupComplete after connect. */
62
+ async function connectWithSetup(provider) {
63
+ const connectPromise = provider.connect();
64
+ // Wait for microtask to open WS and send setup
65
+ await new Promise((r) => setTimeout(r, 5));
66
+ lastCreatedWs._receiveMessage({ setupComplete: {} });
67
+ await connectPromise;
68
+ return lastCreatedWs;
69
+ }
70
+ function collectEvents(provider) {
71
+ const events = [];
72
+ provider.onEvent((e) => events.push(e));
73
+ return events;
74
+ }
75
+ // ---------------------------------------------------------------------------
76
+ // Tests
77
+ // ---------------------------------------------------------------------------
78
+ beforeEach(() => {
79
+ vi.clearAllMocks();
80
+ lastCreatedWs = null;
81
+ });
82
+ describe('GeminiLiveProvider', () => {
83
+ // -----------------------------------------------------------------------
84
+ // Connection & setup
85
+ // -----------------------------------------------------------------------
86
+ it('connects with correct URL containing API key', async () => {
87
+ const provider = makeProvider({ apiKey: 'my-api-key' });
88
+ await connectWithSetup(provider);
89
+ expect(lastCreatedWs).not.toBeNull();
90
+ const url = new URL(lastCreatedWs.url);
91
+ expect(url.protocol).toBe('wss:');
92
+ expect(url.hostname).toBe('generativelanguage.googleapis.com');
93
+ expect(url.searchParams.get('key')).toBe('my-api-key');
94
+ });
95
+ it('sends setup message with the 3.1 model, compression, and transcription config on open', async () => {
96
+ const provider = makeProvider();
97
+ await connectWithSetup(provider);
98
+ const setupMsg = JSON.parse(lastCreatedWs.sent[0]);
99
+ expect(setupMsg.setup).toBeDefined();
100
+ expect(setupMsg.setup.model).toBe('models/gemini-3.1-flash-live-preview');
101
+ expect(setupMsg.setup.generationConfig.responseModalities).toEqual(['AUDIO']);
102
+ expect(setupMsg.setup.contextWindowCompression).toEqual({
103
+ slidingWindow: {},
104
+ });
105
+ expect(setupMsg.setup.realtimeInputConfig).toEqual({
106
+ activityHandling: 'START_OF_ACTIVITY_INTERRUPTS',
107
+ });
108
+ expect(setupMsg.setup.inputAudioTranscription).toEqual({});
109
+ expect(setupMsg.setup.outputAudioTranscription).toEqual({});
110
+ });
111
+ it('sends custom model, systemInstruction, and voiceName in setup', async () => {
112
+ const provider = makeProvider({
113
+ model: 'gemini-2.0-flash-exp',
114
+ systemInstruction: 'You are a helpful assistant.',
115
+ voiceName: 'Kore',
116
+ responseModalities: ['AUDIO', 'TEXT'],
117
+ });
118
+ await connectWithSetup(provider);
119
+ const setupMsg = JSON.parse(lastCreatedWs.sent[0]);
120
+ expect(setupMsg.setup.model).toBe('models/gemini-2.0-flash-exp');
121
+ expect(setupMsg.setup.systemInstruction).toEqual({
122
+ parts: [{ text: 'You are a helpful assistant.' }],
123
+ });
124
+ expect(setupMsg.setup.generationConfig.responseModalities).toEqual(['AUDIO', 'TEXT']);
125
+ expect(setupMsg.setup.generationConfig.speechConfig).toEqual({
126
+ voiceConfig: { prebuiltVoiceConfig: { voiceName: 'Kore' } },
127
+ });
128
+ });
129
+ it('includes historyConfig when initialHistoryInClientContent is enabled', async () => {
130
+ const provider = makeProvider({ initialHistoryInClientContent: true });
131
+ await connectWithSetup(provider);
132
+ const setupMsg = JSON.parse(lastCreatedWs.sent[0]);
133
+ expect(setupMsg.setup.historyConfig).toEqual({
134
+ initialHistoryInClientContent: true,
135
+ });
136
+ });
137
+ it('includes tools in setup message when provided', async () => {
138
+ const tools = {
139
+ functionDeclarations: [
140
+ { name: 'web_search', description: 'Search the web.', parameters: { type: 'OBJECT', properties: { query: { type: 'STRING' } }, required: ['query'] } },
141
+ ],
142
+ };
143
+ const provider = makeProvider({ tools });
144
+ await connectWithSetup(provider);
145
+ const setupMsg = JSON.parse(lastCreatedWs.sent[0]);
146
+ expect(setupMsg.setup.tools).toEqual([tools]);
147
+ });
148
+ it('omits tools from setup message when not provided', async () => {
149
+ const provider = makeProvider();
150
+ await connectWithSetup(provider);
151
+ const setupMsg = JSON.parse(lastCreatedWs.sent[0]);
152
+ expect(setupMsg.setup.tools).toBeUndefined();
153
+ });
154
+ it('transitions to open state after setupComplete', async () => {
155
+ const provider = makeProvider();
156
+ const events = collectEvents(provider);
157
+ expect(provider.state).toBe('idle');
158
+ await connectWithSetup(provider);
159
+ expect(provider.state).toBe('open');
160
+ expect(events).toContainEqual({ type: 'setup_complete' });
161
+ });
162
+ it('connect is idempotent when already connected', async () => {
163
+ const provider = makeProvider();
164
+ await connectWithSetup(provider);
165
+ const ws1 = lastCreatedWs;
166
+ // Second connect should be a no-op
167
+ await provider.connect();
168
+ expect(lastCreatedWs).toBe(ws1);
169
+ });
170
+ // -----------------------------------------------------------------------
171
+ // Sending audio
172
+ // -----------------------------------------------------------------------
173
+ it('sendAudio sends base64-encoded PCM as realtimeInput', async () => {
174
+ const provider = makeProvider();
175
+ await connectWithSetup(provider);
176
+ const pcm = Buffer.from([0x01, 0x02, 0x03, 0x04]);
177
+ provider.sendAudio(pcm);
178
+ // sent[0] is setup, sent[1] is the audio
179
+ const msg = JSON.parse(lastCreatedWs.sent[1]);
180
+ expect(msg.realtimeInput).toBeDefined();
181
+ expect(msg.realtimeInput.audio).toBeDefined();
182
+ expect(msg.realtimeInput.audio.mimeType).toBe('audio/pcm;rate=16000');
183
+ expect(msg.realtimeInput.audio.data).toBe(pcm.toString('base64'));
184
+ });
185
+ it('sendAudio throws when not connected', () => {
186
+ const provider = makeProvider();
187
+ expect(() => provider.sendAudio(Buffer.from([1]))).toThrow('Cannot sendAudio before connect()');
188
+ });
189
+ it('sendAudioStreamEnd sends realtimeInput audioStreamEnd', async () => {
190
+ const provider = makeProvider();
191
+ await connectWithSetup(provider);
192
+ provider.sendAudioStreamEnd();
193
+ const msg = JSON.parse(lastCreatedWs.sent[1]);
194
+ expect(msg.realtimeInput).toEqual({ audioStreamEnd: true });
195
+ });
196
+ // -----------------------------------------------------------------------
197
+ // Sending text
198
+ // -----------------------------------------------------------------------
199
+ it('sendText sends realtimeInput text for the default 3.1 live model', async () => {
200
+ const provider = makeProvider();
201
+ await connectWithSetup(provider);
202
+ provider.sendText('Hello there');
203
+ const msg = JSON.parse(lastCreatedWs.sent[1]);
204
+ expect(msg.realtimeInput).toEqual({ text: 'Hello there' });
205
+ });
206
+ it('sendText preserves clientContent for explicit 2.5 live models', async () => {
207
+ const provider = makeProvider({ model: 'gemini-2.5-flash-live-preview' });
208
+ await connectWithSetup(provider);
209
+ provider.sendText('Hello there');
210
+ const msg = JSON.parse(lastCreatedWs.sent[1]);
211
+ expect(msg.clientContent).toBeDefined();
212
+ expect(msg.clientContent.turns).toEqual([
213
+ { role: 'user', parts: [{ text: 'Hello there' }] },
214
+ ]);
215
+ expect(msg.clientContent.turnComplete).toBe(true);
216
+ });
217
+ it('sendText throws when not connected', () => {
218
+ const provider = makeProvider();
219
+ expect(() => provider.sendText('hello')).toThrow('Cannot sendText before connect()');
220
+ });
221
+ it('sendInitialHistory sends clientContent turns without completing the turn', async () => {
222
+ const provider = makeProvider({ initialHistoryInClientContent: true });
223
+ await connectWithSetup(provider);
224
+ provider.sendInitialHistory([
225
+ { role: 'user', parts: [{ text: 'Earlier user question' }] },
226
+ { role: 'model', parts: [{ text: 'Earlier model answer' }] },
227
+ ]);
228
+ const msg = JSON.parse(lastCreatedWs.sent[1]);
229
+ expect(msg.clientContent).toEqual({
230
+ turns: [
231
+ { role: 'user', parts: [{ text: 'Earlier user question' }] },
232
+ { role: 'model', parts: [{ text: 'Earlier model answer' }] },
233
+ ],
234
+ turnComplete: false,
235
+ });
236
+ });
237
+ it('sendInitialHistory throws when not connected', () => {
238
+ const provider = makeProvider();
239
+ expect(() => provider.sendInitialHistory([
240
+ { role: 'user', parts: [{ text: 'hello' }] },
241
+ ])).toThrow('Cannot sendInitialHistory before connect()');
242
+ });
243
+ // -----------------------------------------------------------------------
244
+ // Sending tool responses
245
+ // -----------------------------------------------------------------------
246
+ it('sendToolResponse sends functionResponses message', async () => {
247
+ const provider = makeProvider();
248
+ collectEvents(provider);
249
+ await connectWithSetup(provider);
250
+ // Simulate server sending tool calls so the IDs are registered as in-flight
251
+ lastCreatedWs._receiveMessage({
252
+ toolCall: {
253
+ functionCalls: [
254
+ { id: 'call-1', name: 'bash', args: {} },
255
+ { id: 'call-2', name: 'read_file', args: {} },
256
+ ],
257
+ },
258
+ });
259
+ provider.sendToolResponse([
260
+ { id: 'call-1', name: 'bash', output: '{"result":"ok"}', scheduling: 'INTERRUPT' },
261
+ { id: 'call-2', name: 'read_file', output: 'done', scheduling: 'SILENT' },
262
+ ]);
263
+ // sent[0] is setup, sent[1] is the tool response
264
+ const msg = JSON.parse(lastCreatedWs.sent[1]);
265
+ expect(msg.toolResponse).toBeDefined();
266
+ expect(msg.toolResponse.functionResponses).toEqual([
267
+ { id: 'call-1', name: 'bash', response: { result: '{"result":"ok"}', scheduling: 'INTERRUPT' } },
268
+ { id: 'call-2', name: 'read_file', response: { result: 'done', scheduling: 'SILENT' } },
269
+ ]);
270
+ });
271
+ it('sendToolResponse drops stale responses not in-flight', async () => {
272
+ const log = createLogger();
273
+ const provider = makeProvider({ log });
274
+ collectEvents(provider);
275
+ await connectWithSetup(provider);
276
+ // Send response without any tool call — should be silently dropped
277
+ provider.sendToolResponse([
278
+ { id: 'stale-1', name: 'bash', output: 'old result' },
279
+ ]);
280
+ // No message sent beyond the setup
281
+ expect(lastCreatedWs.sent).toHaveLength(1);
282
+ expect(log.warn).toHaveBeenCalledWith({ id: 'stale-1' }, 'Gemini Live: dropping stale tool response (not in-flight)');
283
+ });
284
+ it('sendToolResponse throws when not connected', () => {
285
+ const provider = makeProvider();
286
+ expect(() => provider.sendToolResponse([{ id: 'x', name: 'bash', output: 'y' }])).toThrow('Cannot sendToolResponse before connect()');
287
+ });
288
+ // -----------------------------------------------------------------------
289
+ // Receiving events
290
+ // -----------------------------------------------------------------------
291
+ it('emits audio events from serverContent with inlineData', async () => {
292
+ const provider = makeProvider();
293
+ const events = collectEvents(provider);
294
+ await connectWithSetup(provider);
295
+ const audioBytes = Buffer.from([10, 20, 30]);
296
+ lastCreatedWs._receiveMessage({
297
+ serverContent: {
298
+ modelTurn: {
299
+ parts: [{ inlineData: { data: audioBytes.toString('base64') } }],
300
+ },
301
+ },
302
+ });
303
+ const audioEvents = events.filter((e) => e.type === 'audio');
304
+ expect(audioEvents).toHaveLength(1);
305
+ expect([...audioEvents[0].data]).toEqual([10, 20, 30]);
306
+ });
307
+ it('emits text events from serverContent', async () => {
308
+ const provider = makeProvider();
309
+ const events = collectEvents(provider);
310
+ await connectWithSetup(provider);
311
+ lastCreatedWs._receiveMessage({
312
+ serverContent: {
313
+ modelTurn: {
314
+ parts: [{ text: 'Hello world' }],
315
+ },
316
+ },
317
+ });
318
+ const textEvents = events.filter((e) => e.type === 'text');
319
+ expect(textEvents).toHaveLength(1);
320
+ expect(textEvents[0].text).toBe('Hello world');
321
+ });
322
+ it('emits turn_complete event', async () => {
323
+ const provider = makeProvider();
324
+ const events = collectEvents(provider);
325
+ await connectWithSetup(provider);
326
+ lastCreatedWs._receiveMessage({
327
+ serverContent: { turnComplete: true },
328
+ });
329
+ expect(events).toContainEqual({ type: 'turn_complete' });
330
+ });
331
+ it('emits interrupted event', async () => {
332
+ const provider = makeProvider();
333
+ const events = collectEvents(provider);
334
+ await connectWithSetup(provider);
335
+ lastCreatedWs._receiveMessage({
336
+ serverContent: { interrupted: true },
337
+ });
338
+ expect(events).toContainEqual({ type: 'interrupted' });
339
+ });
340
+ it('does not drop turnComplete or transcription when interrupted is present in the same serverContent', async () => {
341
+ const provider = makeProvider();
342
+ const events = collectEvents(provider);
343
+ await connectWithSetup(provider);
344
+ lastCreatedWs._receiveMessage({
345
+ serverContent: {
346
+ interrupted: true,
347
+ turnComplete: true,
348
+ outputTranscription: { text: 'partial reply' },
349
+ },
350
+ });
351
+ expect(events).toContainEqual({ type: 'interrupted' });
352
+ expect(events).toContainEqual({ type: 'turn_complete' });
353
+ expect(events).toContainEqual({ type: 'text', text: 'partial reply' });
354
+ });
355
+ it('emits input_transcript event from serverContent with inputTranscription', async () => {
356
+ const provider = makeProvider();
357
+ const events = collectEvents(provider);
358
+ await connectWithSetup(provider);
359
+ lastCreatedWs._receiveMessage({
360
+ serverContent: {
361
+ inputTranscription: { text: 'Hello from the user' },
362
+ },
363
+ });
364
+ const transcriptEvents = events.filter((e) => e.type === 'input_transcript');
365
+ expect(transcriptEvents).toHaveLength(1);
366
+ expect(transcriptEvents[0].text).toBe('Hello from the user');
367
+ });
368
+ it('ignores empty inputTranscription in serverContent', async () => {
369
+ const provider = makeProvider();
370
+ const events = collectEvents(provider);
371
+ await connectWithSetup(provider);
372
+ lastCreatedWs._receiveMessage({
373
+ serverContent: {
374
+ inputTranscription: { text: '' },
375
+ },
376
+ });
377
+ const transcriptEvents = events.filter((e) => e.type === 'input_transcript');
378
+ expect(transcriptEvents).toHaveLength(0);
379
+ });
380
+ it('emits text event from serverContent outputTranscription', async () => {
381
+ const provider = makeProvider();
382
+ const events = collectEvents(provider);
383
+ await connectWithSetup(provider);
384
+ lastCreatedWs._receiveMessage({
385
+ serverContent: {
386
+ outputTranscription: { text: 'Hello from Gemini audio' },
387
+ },
388
+ });
389
+ const textEvents = events.filter((e) => e.type === 'text');
390
+ expect(textEvents).toHaveLength(1);
391
+ expect(textEvents[0].text).toBe('Hello from Gemini audio');
392
+ });
393
+ it('emits error event from server error message', async () => {
394
+ const provider = makeProvider();
395
+ const events = collectEvents(provider);
396
+ await connectWithSetup(provider);
397
+ lastCreatedWs._receiveMessage({
398
+ error: { message: 'Rate limit exceeded', code: 429 },
399
+ });
400
+ const errorEvents = events.filter((e) => e.type === 'error');
401
+ expect(errorEvents).toHaveLength(1);
402
+ expect(errorEvents[0].error).toBe('Rate limit exceeded');
403
+ });
404
+ it('emits tool_call events from server toolCall message', async () => {
405
+ const provider = makeProvider();
406
+ const events = collectEvents(provider);
407
+ await connectWithSetup(provider);
408
+ lastCreatedWs._receiveMessage({
409
+ toolCall: {
410
+ functionCalls: [
411
+ { id: 'fc-1', name: 'web_search', args: { query: 'hello' } },
412
+ { id: 'fc-2', name: 'read_file', args: { file_path: '/tmp/x' } },
413
+ ],
414
+ },
415
+ });
416
+ const toolEvents = events.filter((e) => e.type === 'tool_call');
417
+ expect(toolEvents).toHaveLength(1);
418
+ const tc = toolEvents[0];
419
+ expect(tc.functionCalls).toHaveLength(2);
420
+ expect(tc.functionCalls[0]).toEqual({ id: 'fc-1', name: 'web_search', args: { query: 'hello' } });
421
+ expect(tc.functionCalls[1]).toEqual({ id: 'fc-2', name: 'read_file', args: { file_path: '/tmp/x' } });
422
+ });
423
+ it('ignores toolCall messages with empty functionCalls', async () => {
424
+ const provider = makeProvider();
425
+ const events = collectEvents(provider);
426
+ await connectWithSetup(provider);
427
+ lastCreatedWs._receiveMessage({ toolCall: { functionCalls: [] } });
428
+ const toolEvents = events.filter((e) => e.type === 'tool_call');
429
+ expect(toolEvents).toHaveLength(0);
430
+ });
431
+ it('handles mixed audio and text parts in a single message', async () => {
432
+ const provider = makeProvider();
433
+ const events = collectEvents(provider);
434
+ await connectWithSetup(provider);
435
+ const audioBytes = Buffer.from([1, 2]);
436
+ lastCreatedWs._receiveMessage({
437
+ serverContent: {
438
+ modelTurn: {
439
+ parts: [
440
+ { inlineData: { data: audioBytes.toString('base64') } },
441
+ { text: 'transcript' },
442
+ ],
443
+ },
444
+ },
445
+ });
446
+ const audioEvents = events.filter((e) => e.type === 'audio');
447
+ const textEvents = events.filter((e) => e.type === 'text');
448
+ expect(audioEvents).toHaveLength(1);
449
+ expect(textEvents).toHaveLength(1);
450
+ });
451
+ it('logs unrecognized message shapes', async () => {
452
+ const log = createLogger();
453
+ const provider = makeProvider({ log });
454
+ await connectWithSetup(provider);
455
+ lastCreatedWs._receiveMessage({ unknownField: true });
456
+ expect(log.warn).toHaveBeenCalledWith({ keys: 'unknownField' }, 'Gemini Live: unrecognized message');
457
+ });
458
+ // -----------------------------------------------------------------------
459
+ // Disconnect
460
+ // -----------------------------------------------------------------------
461
+ it('disconnect closes the WebSocket and transitions to stopped', async () => {
462
+ const provider = makeProvider();
463
+ await connectWithSetup(provider);
464
+ const ws = lastCreatedWs;
465
+ await provider.disconnect();
466
+ expect(ws.readyState).toBe(MockWebSocket.CLOSED);
467
+ expect(provider.state).toBe('stopped');
468
+ });
469
+ it('double disconnect is idempotent', async () => {
470
+ const provider = makeProvider();
471
+ await connectWithSetup(provider);
472
+ await provider.disconnect();
473
+ await provider.disconnect(); // should not throw
474
+ expect(provider.state).toBe('stopped');
475
+ });
476
+ it('disconnect during connect rejects the connect promise', async () => {
477
+ const provider = makeProvider();
478
+ const connectPromise = provider.connect();
479
+ // Wait for WS to open and enter setup state
480
+ await new Promise((r) => setTimeout(r, 5));
481
+ expect(provider.state).toBe('setup');
482
+ // Disconnect while setup is in progress
483
+ await provider.disconnect();
484
+ // The connect promise should reject, not hang
485
+ await expect(connectPromise).rejects.toThrow('disconnect() called');
486
+ expect(provider.state).toBe('stopped');
487
+ });
488
+ it('sendAudio after disconnect throws', async () => {
489
+ const provider = makeProvider();
490
+ await connectWithSetup(provider);
491
+ await provider.disconnect();
492
+ expect(() => provider.sendAudio(Buffer.from([1]))).toThrow('Cannot sendAudio before connect()');
493
+ });
494
+ // -----------------------------------------------------------------------
495
+ // Reconnection
496
+ // -----------------------------------------------------------------------
497
+ it('resets retry counter after successful reconnect so long-lived sessions survive', async () => {
498
+ vi.useFakeTimers();
499
+ const log = createLogger();
500
+ const provider = makeProvider({ log });
501
+ // Initial connect
502
+ const connectP = provider.connect();
503
+ await vi.advanceTimersByTimeAsync(0);
504
+ lastCreatedWs._receiveMessage({ setupComplete: {} });
505
+ await connectP;
506
+ // Simulate 5 successive drop-then-reconnect cycles — each should succeed
507
+ // because the retry counter resets after each successful reconnect.
508
+ for (let i = 0; i < 5; i++) {
509
+ lastCreatedWs._triggerClose(1006);
510
+ // First retry delay is always 500ms (retryCount goes 0→1, backoff = 500 * 2^0)
511
+ await vi.advanceTimersByTimeAsync(500);
512
+ await vi.advanceTimersByTimeAsync(0);
513
+ lastCreatedWs._receiveMessage({ setupComplete: {} });
514
+ expect(provider.state).toBe('open');
515
+ }
516
+ // All 5 reconnects succeeded — provider is still alive
517
+ expect(log.error).not.toHaveBeenCalled();
518
+ vi.useRealTimers();
519
+ });
520
+ it('exhausts retries when consecutive reconnect attempts fail', async () => {
521
+ vi.useFakeTimers();
522
+ const log = createLogger();
523
+ // Factory that produces websockets which open but never complete setup
524
+ let closeCount = 0;
525
+ function failingWsFactory(url) {
526
+ const ws = new MockWebSocket(url);
527
+ lastCreatedWs = ws;
528
+ // After the first successful connect, make all subsequent WS connections
529
+ // close immediately after open (simulating persistent failure)
530
+ if (closeCount > 0) {
531
+ const origEmit = ws.emit.bind(ws);
532
+ ws.emit = function (event, ...args) {
533
+ origEmit(event, ...args);
534
+ if (event === 'open') {
535
+ queueMicrotask(() => ws._triggerClose(1006));
536
+ }
537
+ return true;
538
+ };
539
+ }
540
+ return ws;
541
+ }
542
+ const provider = new GeminiLiveProvider({
543
+ apiKey: 'key',
544
+ log,
545
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
546
+ wsFactory: failingWsFactory,
547
+ });
548
+ // Initial connect succeeds
549
+ const connectP = provider.connect();
550
+ await vi.advanceTimersByTimeAsync(0);
551
+ lastCreatedWs._receiveMessage({ setupComplete: {} });
552
+ await connectP;
553
+ // Trigger first unexpected close — all subsequent reconnects will fail
554
+ closeCount = 1;
555
+ lastCreatedWs._triggerClose(1006);
556
+ // Exhaust all 3 retries (500ms, 1000ms, 2000ms)
557
+ for (const delay of [500, 1000, 2000]) {
558
+ await vi.advanceTimersByTimeAsync(delay);
559
+ await vi.advanceTimersByTimeAsync(0);
560
+ await vi.advanceTimersByTimeAsync(0);
561
+ }
562
+ expect(provider.state).toBe('stopped');
563
+ expect(vi.mocked(log.error).mock.calls.some((c) => typeof c[1] === 'string' && c[1].includes('exhausted'))).toBe(true);
564
+ vi.useRealTimers();
565
+ });
566
+ it('does not reconnect after explicit disconnect', async () => {
567
+ vi.useFakeTimers();
568
+ const log = createLogger();
569
+ const provider = makeProvider({ log });
570
+ const connectP = provider.connect();
571
+ await vi.advanceTimersByTimeAsync(0);
572
+ lastCreatedWs._receiveMessage({ setupComplete: {} });
573
+ await connectP;
574
+ await provider.disconnect();
575
+ // Trigger close — should be a no-op since state is 'stopped'
576
+ lastCreatedWs._triggerClose(1006);
577
+ await vi.advanceTimersByTimeAsync(5000);
578
+ expect(log.warn).not.toHaveBeenCalled();
579
+ vi.useRealTimers();
580
+ });
581
+ // -----------------------------------------------------------------------
582
+ // Session resume handle
583
+ // -----------------------------------------------------------------------
584
+ it('captures session resume handle from server and includes it on reconnect', async () => {
585
+ vi.useFakeTimers();
586
+ const provider = makeProvider();
587
+ // Initial connect
588
+ const connectP = provider.connect();
589
+ await vi.advanceTimersByTimeAsync(0);
590
+ lastCreatedWs._receiveMessage({ setupComplete: {} });
591
+ await connectP;
592
+ // Server sends a session resumption update
593
+ lastCreatedWs._receiveMessage({
594
+ sessionResumptionUpdate: { newHandle: 'resume-token-abc' },
595
+ });
596
+ // Trigger unexpected close — should reconnect with resume handle
597
+ lastCreatedWs._triggerClose(1006);
598
+ await vi.advanceTimersByTimeAsync(500);
599
+ await vi.advanceTimersByTimeAsync(0);
600
+ // Check the setup message on reconnect includes the resume handle
601
+ const reconnectSetup = JSON.parse(lastCreatedWs.sent[0]);
602
+ expect(reconnectSetup.setup.sessionResumption).toEqual({
603
+ handle: 'resume-token-abc',
604
+ });
605
+ lastCreatedWs._receiveMessage({ setupComplete: {} });
606
+ vi.useRealTimers();
607
+ });
608
+ it('does not include sessionResumption on first connect', async () => {
609
+ const provider = makeProvider();
610
+ await connectWithSetup(provider);
611
+ const setupMsg = JSON.parse(lastCreatedWs.sent[0]);
612
+ expect(setupMsg.setup.sessionResumption).toBeUndefined();
613
+ });
614
+ // -----------------------------------------------------------------------
615
+ // Token estimation and threshold warnings
616
+ // -----------------------------------------------------------------------
617
+ describe('token estimation', () => {
618
+ it('emits token_warning at warn threshold via sendText', async () => {
619
+ const provider = makeProvider({ tokenBudget: { warnAt: 2, compressAt: 1000 } });
620
+ const events = collectEvents(provider);
621
+ await connectWithSetup(provider);
622
+ // 8 chars -> ceil(8/4) = 2 tokens -> crosses warn threshold
623
+ provider.sendText('12345678');
624
+ const warnings = events.filter((e) => e.type === 'token_warning');
625
+ expect(warnings).toHaveLength(1);
626
+ expect(warnings[0]).toMatchObject({ type: 'token_warning', threshold: 'warn' });
627
+ });
628
+ it('emits token_warning only once per threshold crossing', async () => {
629
+ const provider = makeProvider({ tokenBudget: { warnAt: 2, compressAt: 1000 } });
630
+ const events = collectEvents(provider);
631
+ await connectWithSetup(provider);
632
+ provider.sendText('12345678'); // crosses warn
633
+ provider.sendText('more text'); // still above warn, but already emitted
634
+ const warnings = events.filter((e) => e.type === 'token_warning');
635
+ expect(warnings).toHaveLength(1);
636
+ });
637
+ it('emits compress threshold and triggers proactive rotation', async () => {
638
+ vi.useFakeTimers();
639
+ const provider = makeProvider({ tokenBudget: { warnAt: 1, compressAt: 3 } });
640
+ const events = collectEvents(provider);
641
+ const connectP = provider.connect();
642
+ await vi.advanceTimersByTimeAsync(0);
643
+ lastCreatedWs._receiveMessage({ setupComplete: {} });
644
+ await connectP;
645
+ // 12 chars -> ceil(12/4) = 3 tokens -> crosses compress
646
+ provider.sendText('123456789012');
647
+ const warnings = events.filter((e) => e.type === 'token_warning');
648
+ expect(warnings.some((w) => w.threshold === 'compress')).toBe(true);
649
+ // Compress threshold should trigger session_rotating via graceful reconnect
650
+ const rotations = events.filter((e) => e.type === 'session_rotating');
651
+ expect(rotations).toHaveLength(1);
652
+ vi.useRealTimers();
653
+ });
654
+ it('tracks audio token usage via sendAudio', async () => {
655
+ const provider = makeProvider({ tokenBudget: { warnAt: 20, compressAt: 1000 } });
656
+ const events = collectEvents(provider);
657
+ await connectWithSetup(provider);
658
+ // 32000 bytes of 16kHz PCM = 1 second = 25 tokens -> crosses warn at 20
659
+ provider.sendAudio(Buffer.alloc(32_000));
660
+ const warnings = events.filter((e) => e.type === 'token_warning');
661
+ expect(warnings).toHaveLength(1);
662
+ expect(warnings[0]).toMatchObject({ type: 'token_warning', threshold: 'warn' });
663
+ });
664
+ it('tracks output audio and text tokens from server messages', async () => {
665
+ const provider = makeProvider({ tokenBudget: { warnAt: 20, compressAt: 1000 } });
666
+ const events = collectEvents(provider);
667
+ await connectWithSetup(provider);
668
+ // Server sends 48000 bytes of output audio (24kHz, 1 second = 25 tokens)
669
+ const audioBytes = Buffer.alloc(48_000);
670
+ lastCreatedWs._receiveMessage({
671
+ serverContent: {
672
+ modelTurn: {
673
+ parts: [{ inlineData: { data: audioBytes.toString('base64') } }],
674
+ },
675
+ },
676
+ });
677
+ const warnings = events.filter((e) => e.type === 'token_warning');
678
+ expect(warnings).toHaveLength(1);
679
+ });
680
+ });
681
+ // -----------------------------------------------------------------------
682
+ // Connection failure
683
+ // -----------------------------------------------------------------------
684
+ it('rejects connect() if WebSocket closes during setup', async () => {
685
+ function failWsFactory(url) {
686
+ const ws = new MockWebSocket(url);
687
+ // Override auto-open: open then immediately close before setup completes
688
+ const origEmit = ws.emit.bind(ws);
689
+ ws.emit = function (event, ...args) {
690
+ origEmit(event, ...args);
691
+ if (event === 'open') {
692
+ queueMicrotask(() => ws._triggerClose(1006));
693
+ }
694
+ return true;
695
+ };
696
+ lastCreatedWs = ws;
697
+ return ws;
698
+ }
699
+ const provider = new GeminiLiveProvider({
700
+ apiKey: 'key',
701
+ log: createLogger(),
702
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
703
+ wsFactory: failWsFactory,
704
+ });
705
+ await expect(provider.connect()).rejects.toThrow('closed during connect');
706
+ });
707
+ // -----------------------------------------------------------------------
708
+ // WebSocket error handling
709
+ // -----------------------------------------------------------------------
710
+ it('logs WebSocket errors without crashing', async () => {
711
+ const log = createLogger();
712
+ const provider = makeProvider({ log });
713
+ await connectWithSetup(provider);
714
+ lastCreatedWs._triggerError('connection reset');
715
+ expect(log.error).toHaveBeenCalledWith({ err: 'connection reset' }, 'Gemini Live WebSocket error');
716
+ });
717
+ it('handles malformed JSON messages gracefully', async () => {
718
+ const log = createLogger();
719
+ const provider = makeProvider({ log });
720
+ await connectWithSetup(provider);
721
+ // Send raw invalid JSON
722
+ lastCreatedWs.emit('message', 'not json at all');
723
+ expect(log.error).toHaveBeenCalled();
724
+ const errorCall = vi.mocked(log.error).mock.calls.find((c) => typeof c[1] === 'string' && c[1].includes('Failed to parse'));
725
+ expect(errorCall).toBeDefined();
726
+ });
727
+ // -----------------------------------------------------------------------
728
+ // Session rotation
729
+ // -----------------------------------------------------------------------
730
+ describe('session rotation', () => {
731
+ it('fires at configured threshold and triggers reconnect', async () => {
732
+ vi.useFakeTimers();
733
+ const provider = makeProvider({ sessionRotationMs: 5000 });
734
+ const events = collectEvents(provider);
735
+ const connectP = provider.connect();
736
+ await vi.advanceTimersByTimeAsync(0);
737
+ lastCreatedWs._receiveMessage({ setupComplete: {} });
738
+ await connectP;
739
+ // Advance to just before threshold — no rotation yet
740
+ await vi.advanceTimersByTimeAsync(4999);
741
+ expect(events.filter((e) => e.type === 'session_rotating')).toHaveLength(0);
742
+ // Advance past threshold — rotation fires, closes WS
743
+ await vi.advanceTimersByTimeAsync(1);
744
+ expect(events.filter((e) => e.type === 'session_rotating')).toHaveLength(1);
745
+ // The WS close triggers reconnect
746
+ await vi.advanceTimersByTimeAsync(0); // microtask for MockWebSocket close event
747
+ await vi.advanceTimersByTimeAsync(500); // reconnect backoff
748
+ await vi.advanceTimersByTimeAsync(0); // microtask for new WS open
749
+ // Complete the reconnect
750
+ lastCreatedWs._receiveMessage({ setupComplete: {} });
751
+ expect(provider.state).toBe('open');
752
+ expect(events.filter((e) => e.type === 'reconnected')).toHaveLength(1);
753
+ vi.useRealTimers();
754
+ });
755
+ it('resets timer after successful reconnect (survives multiple rotations)', async () => {
756
+ vi.useFakeTimers();
757
+ const provider = makeProvider({ sessionRotationMs: 3000 });
758
+ const events = collectEvents(provider);
759
+ const connectP = provider.connect();
760
+ await vi.advanceTimersByTimeAsync(0);
761
+ lastCreatedWs._receiveMessage({ setupComplete: {} });
762
+ await connectP;
763
+ for (let i = 0; i < 3; i++) {
764
+ // Wait for rotation
765
+ await vi.advanceTimersByTimeAsync(3000);
766
+ // Process close microtask + reconnect backoff + open microtask
767
+ await vi.advanceTimersByTimeAsync(0);
768
+ await vi.advanceTimersByTimeAsync(500);
769
+ await vi.advanceTimersByTimeAsync(0);
770
+ lastCreatedWs._receiveMessage({ setupComplete: {} });
771
+ expect(provider.state).toBe('open');
772
+ }
773
+ expect(events.filter((e) => e.type === 'session_rotating')).toHaveLength(3);
774
+ expect(events.filter((e) => e.type === 'reconnected')).toHaveLength(3);
775
+ vi.useRealTimers();
776
+ });
777
+ it('cancels timer on explicit disconnect', async () => {
778
+ vi.useFakeTimers();
779
+ const provider = makeProvider({ sessionRotationMs: 5000 });
780
+ const events = collectEvents(provider);
781
+ const connectP = provider.connect();
782
+ await vi.advanceTimersByTimeAsync(0);
783
+ lastCreatedWs._receiveMessage({ setupComplete: {} });
784
+ await connectP;
785
+ await provider.disconnect();
786
+ // Advance well past the threshold — no rotation should fire
787
+ await vi.advanceTimersByTimeAsync(10000);
788
+ expect(events.filter((e) => e.type === 'session_rotating')).toHaveLength(0);
789
+ vi.useRealTimers();
790
+ });
791
+ it('rotation with a long-expired resume handle falls through to fresh session', async () => {
792
+ vi.useFakeTimers();
793
+ // Use a rotation threshold longer than the resume handle TTL (2h)
794
+ // so the handle expires before rotation fires.
795
+ const provider = makeProvider({ sessionRotationMs: 7_300_000 });
796
+ const connectP = provider.connect();
797
+ await vi.advanceTimersByTimeAsync(0);
798
+ lastCreatedWs._receiveMessage({ setupComplete: {} });
799
+ await connectP;
800
+ // Server sends a resume handle
801
+ lastCreatedWs._receiveMessage({
802
+ sessionResumptionUpdate: { newHandle: 'handle-xyz' },
803
+ });
804
+ // Advance past the resume handle TTL (2h) but before rotation threshold
805
+ await vi.advanceTimersByTimeAsync(7_210_000);
806
+ // Now advance to rotation threshold
807
+ await vi.advanceTimersByTimeAsync(90_000);
808
+ // Process close microtask + reconnect backoff + open microtask
809
+ await vi.advanceTimersByTimeAsync(0);
810
+ await vi.advanceTimersByTimeAsync(500);
811
+ await vi.advanceTimersByTimeAsync(0);
812
+ // The reconnect setup should NOT include the expired handle
813
+ const reconnectSetup = JSON.parse(lastCreatedWs.sent[0]);
814
+ expect(reconnectSetup.setup.sessionResumption).toBeUndefined();
815
+ lastCreatedWs._receiveMessage({ setupComplete: {} });
816
+ expect(provider.state).toBe('open');
817
+ vi.useRealTimers();
818
+ });
819
+ it('disables rotation when threshold is 0', async () => {
820
+ vi.useFakeTimers();
821
+ const provider = makeProvider({ sessionRotationMs: 0 });
822
+ const events = collectEvents(provider);
823
+ const connectP = provider.connect();
824
+ await vi.advanceTimersByTimeAsync(0);
825
+ lastCreatedWs._receiveMessage({ setupComplete: {} });
826
+ await connectP;
827
+ // Advance well past default threshold — no rotation
828
+ await vi.advanceTimersByTimeAsync(900_000);
829
+ expect(events.filter((e) => e.type === 'session_rotating')).toHaveLength(0);
830
+ expect(provider.state).toBe('open');
831
+ vi.useRealTimers();
832
+ });
833
+ });
834
+ });