discoclaw 1.3.0 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.env.example +4 -6
- package/.env.example.full +13 -32
- package/README.md +1 -1
- package/dist/cli/dashboard.test.js +0 -4
- package/dist/cli/init-wizard.js +4 -8
- package/dist/cli/init-wizard.test.js +4 -10
- package/dist/config.js +2 -42
- package/dist/config.test.js +8 -72
- package/dist/dashboard/server.js +1 -5
- package/dist/dashboard/server.test.js +3 -6
- package/dist/discord/actions.js +112 -6
- package/dist/discord/actions.test.js +117 -1
- package/dist/discord/help-command.js +1 -1
- package/dist/discord/message-coordinator.js +3 -8
- package/dist/discord/models-command.js +1 -1
- package/dist/discord/reaction-handler.js +2 -2
- package/dist/discord/reaction-handler.test.js +55 -0
- package/dist/discord/verify-push.js +31 -36
- package/dist/discord/verify-push.test.js +34 -6
- package/dist/discord/voice-command.js +1 -31
- package/dist/discord/voice-command.test.js +21 -259
- package/dist/discord/voice-status-command.js +3 -22
- package/dist/discord/voice-status-command.test.js +16 -124
- package/dist/discord-followup.test.js +133 -0
- package/dist/health/config-doctor.js +5 -27
- package/dist/health/config-doctor.test.js +1 -4
- package/dist/index.js +1 -28
- package/dist/runtime-overrides.js +2 -3
- package/dist/runtime-overrides.test.js +27 -193
- package/dist/tasks/store.js +10 -6
- package/dist/tasks/store.test.js +44 -0
- package/dist/tasks/task-action-executor.test.js +162 -50
- package/dist/tasks/task-action-mutations.js +22 -2
- package/dist/tasks/task-action-read-ops.js +7 -1
- package/dist/tasks/task-action-runner-types.js +19 -1
- package/dist/voice/audio-pipeline.js +145 -298
- package/docs/configuration.md +4 -9
- package/docs/official-docs.md +6 -9
- package/docs/runtime-switching.md +1 -1
- package/package.json +1 -1
- package/dist/voice/audio-pipeline.test.js +0 -1100
- package/dist/voice/stt-deepgram.js +0 -154
- package/dist/voice/stt-deepgram.test.js +0 -275
- package/dist/voice/stt-factory.js +0 -42
- package/dist/voice/stt-factory.test.js +0 -45
- package/dist/voice/stt-openai.js +0 -156
- package/dist/voice/stt-openai.test.js +0 -281
- package/dist/voice/tts-cartesia.js +0 -169
- package/dist/voice/tts-cartesia.test.js +0 -228
- package/dist/voice/tts-deepgram.js +0 -84
- package/dist/voice/tts-deepgram.test.js +0 -220
- package/dist/voice/tts-factory.js +0 -52
- package/dist/voice/tts-factory.test.js +0 -53
- package/dist/voice/tts-openai.js +0 -70
- package/dist/voice/tts-openai.test.js +0 -138
- package/dist/voice/types.test.js +0 -90
|
@@ -1,1100 +0,0 @@
|
|
|
1
|
-
import { describe, it, expect, vi, beforeEach } from 'vitest';
|
|
2
|
-
import { EventEmitter } from 'node:events';
|
|
3
|
-
import { AudioPipelineManager } from './audio-pipeline.js';
|
|
4
|
-
// ---------------------------------------------------------------------------
|
|
5
|
-
// Mock @discordjs/voice — includes AudioPlayer infrastructure for responder
|
|
6
|
-
// ---------------------------------------------------------------------------
|
|
7
|
-
/** Track the last mock player created so tests can manipulate its state. */
|
|
8
|
-
let lastMockPlayer = null;
|
|
9
|
-
function makeMockPlayer() {
|
|
10
|
-
const emitter = new EventEmitter();
|
|
11
|
-
const player = {
|
|
12
|
-
state: { status: 'idle' },
|
|
13
|
-
play: vi.fn(() => {
|
|
14
|
-
const old = { ...player.state };
|
|
15
|
-
player.state = { status: 'playing' };
|
|
16
|
-
emitter.emit('stateChange', old, player.state);
|
|
17
|
-
}),
|
|
18
|
-
stop: vi.fn(() => {
|
|
19
|
-
if (player.state.status !== 'idle') {
|
|
20
|
-
const old = { ...player.state };
|
|
21
|
-
player.state = { status: 'idle' };
|
|
22
|
-
emitter.emit('stateChange', old, player.state);
|
|
23
|
-
}
|
|
24
|
-
}),
|
|
25
|
-
on: vi.fn((event, listener) => {
|
|
26
|
-
emitter.on(event, listener);
|
|
27
|
-
return player;
|
|
28
|
-
}),
|
|
29
|
-
removeListener: vi.fn((event, listener) => {
|
|
30
|
-
emitter.removeListener(event, listener);
|
|
31
|
-
return player;
|
|
32
|
-
}),
|
|
33
|
-
};
|
|
34
|
-
return player;
|
|
35
|
-
}
|
|
36
|
-
vi.mock('@discordjs/voice', () => ({
|
|
37
|
-
VoiceConnectionStatus: {
|
|
38
|
-
Signalling: 'signalling',
|
|
39
|
-
Connecting: 'connecting',
|
|
40
|
-
Ready: 'ready',
|
|
41
|
-
Disconnected: 'disconnected',
|
|
42
|
-
Destroyed: 'destroyed',
|
|
43
|
-
},
|
|
44
|
-
EndBehaviorType: { Manual: 0, AfterSilence: 1, AfterInactivity: 2 },
|
|
45
|
-
AudioPlayerStatus: {
|
|
46
|
-
Idle: 'idle',
|
|
47
|
-
Playing: 'playing',
|
|
48
|
-
Buffering: 'buffering',
|
|
49
|
-
Paused: 'paused',
|
|
50
|
-
AutoPaused: 'autopaused',
|
|
51
|
-
},
|
|
52
|
-
StreamType: { Raw: 'raw' },
|
|
53
|
-
createAudioPlayer: vi.fn(() => {
|
|
54
|
-
lastMockPlayer = makeMockPlayer();
|
|
55
|
-
return lastMockPlayer;
|
|
56
|
-
}),
|
|
57
|
-
createAudioResource: vi.fn(() => ({ type: 'mock-resource' })),
|
|
58
|
-
}));
|
|
59
|
-
// ---------------------------------------------------------------------------
|
|
60
|
-
// Mock Gemini Live providers
|
|
61
|
-
// ---------------------------------------------------------------------------
|
|
62
|
-
let mockGeminiProvider;
|
|
63
|
-
let mockGeminiResponder;
|
|
64
|
-
vi.mock('./providers/gemini-live-provider.js', () => ({
|
|
65
|
-
GeminiLiveProvider: vi.fn().mockImplementation(() => {
|
|
66
|
-
mockGeminiProvider = {
|
|
67
|
-
connect: vi.fn(async () => { }),
|
|
68
|
-
disconnect: vi.fn(async () => { }),
|
|
69
|
-
sendAudio: vi.fn(),
|
|
70
|
-
sendAudioStreamEnd: vi.fn(),
|
|
71
|
-
sendInitialHistory: vi.fn(),
|
|
72
|
-
sendToolResponse: vi.fn(),
|
|
73
|
-
onEvent: vi.fn(),
|
|
74
|
-
state: 'open',
|
|
75
|
-
};
|
|
76
|
-
return mockGeminiProvider;
|
|
77
|
-
}),
|
|
78
|
-
}));
|
|
79
|
-
vi.mock('./providers/gemini-live-responder.js', () => ({
|
|
80
|
-
GeminiLiveResponder: vi.fn().mockImplementation(() => {
|
|
81
|
-
mockGeminiResponder = {
|
|
82
|
-
start: vi.fn(),
|
|
83
|
-
stop: vi.fn(),
|
|
84
|
-
destroy: vi.fn(),
|
|
85
|
-
};
|
|
86
|
-
return mockGeminiResponder;
|
|
87
|
-
}),
|
|
88
|
-
}));
|
|
89
|
-
// ---------------------------------------------------------------------------
|
|
90
|
-
// Mock tool execution
|
|
91
|
-
// ---------------------------------------------------------------------------
|
|
92
|
-
const mockExecuteToolCall = vi.fn(async () => ({ result: 'ok', ok: true }));
|
|
93
|
-
vi.mock('../runtime/openai-tool-exec.js', () => ({
|
|
94
|
-
executeToolCall: (...args) => mockExecuteToolCall(...args),
|
|
95
|
-
}));
|
|
96
|
-
vi.mock('../runtime/openai-tool-schemas.js', () => ({
|
|
97
|
-
OPENAI_TO_DISCO_NAME: {
|
|
98
|
-
Read: 'Read',
|
|
99
|
-
Bash: 'Bash',
|
|
100
|
-
MemoryQuery: 'MemoryQuery',
|
|
101
|
-
read_file: 'Read',
|
|
102
|
-
bash: 'Bash',
|
|
103
|
-
},
|
|
104
|
-
buildGeminiToolDeclarations: vi.fn((enabledTools, opts) => ({
|
|
105
|
-
functionDeclarations: enabledTools.map((name) => ({
|
|
106
|
-
name,
|
|
107
|
-
...(opts?.nonBlocking ? { behavior: 'NON_BLOCKING' } : {}),
|
|
108
|
-
})),
|
|
109
|
-
})),
|
|
110
|
-
buildToolSchemas: vi.fn((enabledTools) => enabledTools.map((name) => ({
|
|
111
|
-
type: 'function',
|
|
112
|
-
function: { name, description: `${name} tool`, parameters: {} },
|
|
113
|
-
}))),
|
|
114
|
-
}));
|
|
115
|
-
// We don't want real stt-factory or audio-receiver internals — the pipeline
|
|
116
|
-
// injects a createStt override and AudioReceiver is tested separately.
|
|
117
|
-
// However we do import AudioReceiver for real so the wiring is exercised.
|
|
118
|
-
// ---------------------------------------------------------------------------
|
|
119
|
-
// Helpers
|
|
120
|
-
// ---------------------------------------------------------------------------
|
|
121
|
-
function createLogger() {
|
|
122
|
-
return { info: vi.fn(), warn: vi.fn(), error: vi.fn() };
|
|
123
|
-
}
|
|
124
|
-
function createMockStt() {
|
|
125
|
-
const stt = {
|
|
126
|
-
transcriptionCb: null,
|
|
127
|
-
start: vi.fn(async () => { }),
|
|
128
|
-
feedAudio: vi.fn((_frame) => { }),
|
|
129
|
-
onTranscription: vi.fn((cb) => {
|
|
130
|
-
stt.transcriptionCb = cb;
|
|
131
|
-
}),
|
|
132
|
-
stop: vi.fn(async () => { }),
|
|
133
|
-
};
|
|
134
|
-
return stt;
|
|
135
|
-
}
|
|
136
|
-
function createMockDecoder() {
|
|
137
|
-
return {
|
|
138
|
-
decode: vi.fn((_packet) => Buffer.alloc(960 * 2 * 2)),
|
|
139
|
-
destroy: vi.fn(),
|
|
140
|
-
};
|
|
141
|
-
}
|
|
142
|
-
function createMockConnection() {
|
|
143
|
-
const stateListeners = [];
|
|
144
|
-
const speakingEmitter = new EventEmitter();
|
|
145
|
-
const subscriptions = new Map();
|
|
146
|
-
const streams = new Map();
|
|
147
|
-
const conn = {
|
|
148
|
-
state: { status: 'signalling' },
|
|
149
|
-
/** Top-level subscribe (used by VoiceResponder to attach AudioPlayer). */
|
|
150
|
-
subscribe: vi.fn(),
|
|
151
|
-
receiver: {
|
|
152
|
-
speaking: speakingEmitter,
|
|
153
|
-
subscriptions,
|
|
154
|
-
subscribe: vi.fn((userId) => {
|
|
155
|
-
const stream = new EventEmitter();
|
|
156
|
-
streams.set(userId, stream);
|
|
157
|
-
subscriptions.set(userId, stream);
|
|
158
|
-
return stream;
|
|
159
|
-
}),
|
|
160
|
-
},
|
|
161
|
-
on: vi.fn((event, listener) => {
|
|
162
|
-
if (event === 'stateChange')
|
|
163
|
-
stateListeners.push(listener);
|
|
164
|
-
return conn;
|
|
165
|
-
}),
|
|
166
|
-
_transition(status) {
|
|
167
|
-
const old = { ...conn.state };
|
|
168
|
-
conn.state = { status };
|
|
169
|
-
for (const l of stateListeners)
|
|
170
|
-
l(old, conn.state);
|
|
171
|
-
},
|
|
172
|
-
};
|
|
173
|
-
return {
|
|
174
|
-
connection: conn,
|
|
175
|
-
_transition: conn._transition.bind(conn),
|
|
176
|
-
speakingEmitter,
|
|
177
|
-
subscriptions,
|
|
178
|
-
streams,
|
|
179
|
-
};
|
|
180
|
-
}
|
|
181
|
-
function baseVoiceConfig(overrides = {}) {
|
|
182
|
-
return {
|
|
183
|
-
enabled: true,
|
|
184
|
-
sttProvider: 'deepgram',
|
|
185
|
-
ttsProvider: 'cartesia',
|
|
186
|
-
deepgramApiKey: 'test-key',
|
|
187
|
-
...overrides,
|
|
188
|
-
};
|
|
189
|
-
}
|
|
190
|
-
function createPipelineOpts(overrides = {}) {
|
|
191
|
-
const mockStt = createMockStt();
|
|
192
|
-
return {
|
|
193
|
-
mockStt,
|
|
194
|
-
log: createLogger(),
|
|
195
|
-
voiceConfig: baseVoiceConfig(),
|
|
196
|
-
allowedUserIds: new Set(['111', '222']),
|
|
197
|
-
createDecoder: () => createMockDecoder(),
|
|
198
|
-
createStt: () => mockStt,
|
|
199
|
-
...overrides,
|
|
200
|
-
};
|
|
201
|
-
}
|
|
202
|
-
// ---------------------------------------------------------------------------
|
|
203
|
-
// Tests
|
|
204
|
-
// ---------------------------------------------------------------------------
|
|
205
|
-
beforeEach(() => {
|
|
206
|
-
vi.clearAllMocks();
|
|
207
|
-
mockExecuteToolCall.mockReset();
|
|
208
|
-
mockExecuteToolCall.mockResolvedValue({ result: 'ok', ok: true });
|
|
209
|
-
lastMockPlayer = null;
|
|
210
|
-
});
|
|
211
|
-
describe('AudioPipelineManager', () => {
|
|
212
|
-
describe('startPipeline / stopPipeline', () => {
|
|
213
|
-
it('starts STT and receiver for a guild', async () => {
|
|
214
|
-
const opts = createPipelineOpts();
|
|
215
|
-
const mgr = new AudioPipelineManager(opts);
|
|
216
|
-
const { connection } = createMockConnection();
|
|
217
|
-
await mgr.startPipeline('g1', connection);
|
|
218
|
-
expect(opts.mockStt.start).toHaveBeenCalled();
|
|
219
|
-
expect(mgr.hasPipeline('g1')).toBe(true);
|
|
220
|
-
expect(mgr.activePipelineCount).toBe(1);
|
|
221
|
-
});
|
|
222
|
-
it('stopPipeline stops receiver and STT', async () => {
|
|
223
|
-
const opts = createPipelineOpts();
|
|
224
|
-
const mgr = new AudioPipelineManager(opts);
|
|
225
|
-
const { connection } = createMockConnection();
|
|
226
|
-
await mgr.startPipeline('g1', connection);
|
|
227
|
-
await mgr.stopPipeline('g1');
|
|
228
|
-
expect(opts.mockStt.stop).toHaveBeenCalled();
|
|
229
|
-
expect(mgr.hasPipeline('g1')).toBe(false);
|
|
230
|
-
expect(mgr.activePipelineCount).toBe(0);
|
|
231
|
-
});
|
|
232
|
-
it('stopPipeline is a no-op for unknown guild', async () => {
|
|
233
|
-
const opts = createPipelineOpts();
|
|
234
|
-
const mgr = new AudioPipelineManager(opts);
|
|
235
|
-
// Should not throw
|
|
236
|
-
await mgr.stopPipeline('unknown');
|
|
237
|
-
expect(mgr.activePipelineCount).toBe(0);
|
|
238
|
-
});
|
|
239
|
-
it('startPipeline stops existing pipeline before restarting', async () => {
|
|
240
|
-
const stts = [];
|
|
241
|
-
const opts = createPipelineOpts({
|
|
242
|
-
createStt: () => {
|
|
243
|
-
const stt = createMockStt();
|
|
244
|
-
stts.push(stt);
|
|
245
|
-
return stt;
|
|
246
|
-
},
|
|
247
|
-
});
|
|
248
|
-
const mgr = new AudioPipelineManager(opts);
|
|
249
|
-
const { connection } = createMockConnection();
|
|
250
|
-
await mgr.startPipeline('g1', connection);
|
|
251
|
-
await mgr.startPipeline('g1', connection);
|
|
252
|
-
// First STT should have been stopped
|
|
253
|
-
expect(stts[0].stop).toHaveBeenCalled();
|
|
254
|
-
// Second STT should be started
|
|
255
|
-
expect(stts[1].start).toHaveBeenCalled();
|
|
256
|
-
expect(mgr.activePipelineCount).toBe(1);
|
|
257
|
-
});
|
|
258
|
-
it('logs error and does not add pipeline if STT start fails', async () => {
|
|
259
|
-
const log = createLogger();
|
|
260
|
-
const failingStt = createMockStt();
|
|
261
|
-
failingStt.start.mockRejectedValue(new Error('stt connect failed'));
|
|
262
|
-
const mgr = new AudioPipelineManager({
|
|
263
|
-
log,
|
|
264
|
-
voiceConfig: baseVoiceConfig(),
|
|
265
|
-
allowedUserIds: new Set(['111']),
|
|
266
|
-
createDecoder: () => createMockDecoder(),
|
|
267
|
-
createStt: () => failingStt,
|
|
268
|
-
});
|
|
269
|
-
const { connection } = createMockConnection();
|
|
270
|
-
await mgr.startPipeline('g1', connection);
|
|
271
|
-
expect(mgr.hasPipeline('g1')).toBe(false);
|
|
272
|
-
expect(log.error).toHaveBeenCalledWith(expect.objectContaining({ guildId: 'g1' }), 'failed to start audio pipeline');
|
|
273
|
-
});
|
|
274
|
-
it('logs error if STT stop throws but still removes pipeline', async () => {
|
|
275
|
-
const log = createLogger();
|
|
276
|
-
const stt = createMockStt();
|
|
277
|
-
stt.stop.mockRejectedValue(new Error('stop failed'));
|
|
278
|
-
const mgr = new AudioPipelineManager({
|
|
279
|
-
log,
|
|
280
|
-
voiceConfig: baseVoiceConfig(),
|
|
281
|
-
allowedUserIds: new Set(['111']),
|
|
282
|
-
createDecoder: () => createMockDecoder(),
|
|
283
|
-
createStt: () => stt,
|
|
284
|
-
});
|
|
285
|
-
const { connection } = createMockConnection();
|
|
286
|
-
await mgr.startPipeline('g1', connection);
|
|
287
|
-
await mgr.stopPipeline('g1');
|
|
288
|
-
expect(mgr.hasPipeline('g1')).toBe(false);
|
|
289
|
-
expect(log.error).toHaveBeenCalledWith(expect.objectContaining({ guildId: 'g1' }), 'error stopping STT provider');
|
|
290
|
-
});
|
|
291
|
-
});
|
|
292
|
-
describe('attach', () => {
|
|
293
|
-
it('starts pipeline when connection transitions to Ready', async () => {
|
|
294
|
-
const opts = createPipelineOpts();
|
|
295
|
-
const mgr = new AudioPipelineManager(opts);
|
|
296
|
-
const { connection, _transition } = createMockConnection();
|
|
297
|
-
mgr.attach('g1', connection);
|
|
298
|
-
_transition('ready');
|
|
299
|
-
// Allow async handler to settle
|
|
300
|
-
await vi.waitFor(() => {
|
|
301
|
-
expect(opts.mockStt.start).toHaveBeenCalled();
|
|
302
|
-
});
|
|
303
|
-
expect(mgr.hasPipeline('g1')).toBe(true);
|
|
304
|
-
});
|
|
305
|
-
it('stops pipeline when connection transitions to Destroyed', async () => {
|
|
306
|
-
const opts = createPipelineOpts();
|
|
307
|
-
const mgr = new AudioPipelineManager(opts);
|
|
308
|
-
const { connection, _transition } = createMockConnection();
|
|
309
|
-
mgr.attach('g1', connection);
|
|
310
|
-
_transition('ready');
|
|
311
|
-
await vi.waitFor(() => {
|
|
312
|
-
expect(mgr.hasPipeline('g1')).toBe(true);
|
|
313
|
-
});
|
|
314
|
-
_transition('destroyed');
|
|
315
|
-
await vi.waitFor(() => {
|
|
316
|
-
expect(mgr.hasPipeline('g1')).toBe(false);
|
|
317
|
-
});
|
|
318
|
-
expect(opts.mockStt.stop).toHaveBeenCalled();
|
|
319
|
-
});
|
|
320
|
-
it('ignores non-Ready/Destroyed transitions', async () => {
|
|
321
|
-
const opts = createPipelineOpts();
|
|
322
|
-
const mgr = new AudioPipelineManager(opts);
|
|
323
|
-
const { connection, _transition } = createMockConnection();
|
|
324
|
-
mgr.attach('g1', connection);
|
|
325
|
-
_transition('connecting');
|
|
326
|
-
// Give it a tick to settle
|
|
327
|
-
await new Promise((r) => setTimeout(r, 10));
|
|
328
|
-
expect(mgr.hasPipeline('g1')).toBe(false);
|
|
329
|
-
expect(opts.mockStt.start).not.toHaveBeenCalled();
|
|
330
|
-
});
|
|
331
|
-
});
|
|
332
|
-
describe('stopAll', () => {
|
|
333
|
-
it('stops all active pipelines', async () => {
|
|
334
|
-
const stts = [];
|
|
335
|
-
const mgr = new AudioPipelineManager({
|
|
336
|
-
log: createLogger(),
|
|
337
|
-
voiceConfig: baseVoiceConfig(),
|
|
338
|
-
allowedUserIds: new Set(['111']),
|
|
339
|
-
createDecoder: () => createMockDecoder(),
|
|
340
|
-
createStt: () => {
|
|
341
|
-
const stt = createMockStt();
|
|
342
|
-
stts.push(stt);
|
|
343
|
-
return stt;
|
|
344
|
-
},
|
|
345
|
-
});
|
|
346
|
-
const { connection: conn1 } = createMockConnection();
|
|
347
|
-
const { connection: conn2 } = createMockConnection();
|
|
348
|
-
await mgr.startPipeline('g1', conn1);
|
|
349
|
-
await mgr.startPipeline('g2', conn2);
|
|
350
|
-
expect(mgr.activePipelineCount).toBe(2);
|
|
351
|
-
await mgr.stopAll();
|
|
352
|
-
expect(mgr.activePipelineCount).toBe(0);
|
|
353
|
-
expect(stts[0].stop).toHaveBeenCalled();
|
|
354
|
-
expect(stts[1].stop).toHaveBeenCalled();
|
|
355
|
-
});
|
|
356
|
-
it('is a no-op when no pipelines are active', async () => {
|
|
357
|
-
const mgr = new AudioPipelineManager(createPipelineOpts());
|
|
358
|
-
await mgr.stopAll(); // should not throw
|
|
359
|
-
expect(mgr.activePipelineCount).toBe(0);
|
|
360
|
-
});
|
|
361
|
-
});
|
|
362
|
-
describe('onTranscription callback', () => {
|
|
363
|
-
it('forwards transcription results with guildId', async () => {
|
|
364
|
-
const transcriptions = [];
|
|
365
|
-
const stt = createMockStt();
|
|
366
|
-
const mgr = new AudioPipelineManager({
|
|
367
|
-
log: createLogger(),
|
|
368
|
-
voiceConfig: baseVoiceConfig(),
|
|
369
|
-
allowedUserIds: new Set(['111']),
|
|
370
|
-
createDecoder: () => createMockDecoder(),
|
|
371
|
-
createStt: () => stt,
|
|
372
|
-
onTranscription: (guildId, result) => {
|
|
373
|
-
transcriptions.push({ guildId, result });
|
|
374
|
-
},
|
|
375
|
-
});
|
|
376
|
-
const { connection } = createMockConnection();
|
|
377
|
-
await mgr.startPipeline('g1', connection);
|
|
378
|
-
// STT onTranscription should have been wired up
|
|
379
|
-
expect(stt.onTranscription).toHaveBeenCalled();
|
|
380
|
-
// Simulate a transcription from the STT provider
|
|
381
|
-
const result = {
|
|
382
|
-
text: 'hello world',
|
|
383
|
-
isFinal: true,
|
|
384
|
-
confidence: 0.95,
|
|
385
|
-
};
|
|
386
|
-
stt.transcriptionCb(result);
|
|
387
|
-
expect(transcriptions).toHaveLength(1);
|
|
388
|
-
expect(transcriptions[0]).toEqual({ guildId: 'g1', result });
|
|
389
|
-
});
|
|
390
|
-
it('does not wire onTranscription when no callback is provided', async () => {
|
|
391
|
-
const stt = createMockStt();
|
|
392
|
-
const mgr = new AudioPipelineManager({
|
|
393
|
-
log: createLogger(),
|
|
394
|
-
voiceConfig: baseVoiceConfig(),
|
|
395
|
-
allowedUserIds: new Set(['111']),
|
|
396
|
-
createDecoder: () => createMockDecoder(),
|
|
397
|
-
createStt: () => stt,
|
|
398
|
-
});
|
|
399
|
-
const { connection } = createMockConnection();
|
|
400
|
-
await mgr.startPipeline('g1', connection);
|
|
401
|
-
expect(stt.onTranscription).not.toHaveBeenCalled();
|
|
402
|
-
});
|
|
403
|
-
});
|
|
404
|
-
describe('re-entrancy guard', () => {
|
|
405
|
-
it('prevents infinite recursion when startPipeline is re-entered', async () => {
|
|
406
|
-
const stt = createMockStt();
|
|
407
|
-
let startCount = 0;
|
|
408
|
-
const mgr = new AudioPipelineManager({
|
|
409
|
-
log: createLogger(),
|
|
410
|
-
voiceConfig: baseVoiceConfig(),
|
|
411
|
-
allowedUserIds: new Set(['111']),
|
|
412
|
-
createDecoder: () => createMockDecoder(),
|
|
413
|
-
createStt: () => {
|
|
414
|
-
startCount++;
|
|
415
|
-
return stt;
|
|
416
|
-
},
|
|
417
|
-
});
|
|
418
|
-
const { connection } = createMockConnection();
|
|
419
|
-
// Simulate what @discordjs/voice does: VoiceConnection.subscribe()
|
|
420
|
-
// synchronously fires stateChange→Ready, which would re-invoke
|
|
421
|
-
// startPipeline. We mock this by calling startPipeline again inside
|
|
422
|
-
// the first invocation via the STT start hook.
|
|
423
|
-
let reEntryAttempted = false;
|
|
424
|
-
stt.start.mockImplementation(async () => {
|
|
425
|
-
// Simulate re-entrant call (as if subscribe triggered onReady)
|
|
426
|
-
reEntryAttempted = true;
|
|
427
|
-
await mgr.startPipeline('g1', connection);
|
|
428
|
-
});
|
|
429
|
-
await mgr.startPipeline('g1', connection);
|
|
430
|
-
expect(reEntryAttempted).toBe(true);
|
|
431
|
-
// Should only have created one STT (the re-entrant call was blocked)
|
|
432
|
-
expect(startCount).toBe(1);
|
|
433
|
-
expect(mgr.hasPipeline('g1')).toBe(true);
|
|
434
|
-
});
|
|
435
|
-
});
|
|
436
|
-
describe('hasPipeline / activePipelineCount', () => {
|
|
437
|
-
it('returns false and 0 when empty', () => {
|
|
438
|
-
const mgr = new AudioPipelineManager(createPipelineOpts());
|
|
439
|
-
expect(mgr.hasPipeline('g1')).toBe(false);
|
|
440
|
-
expect(mgr.activePipelineCount).toBe(0);
|
|
441
|
-
});
|
|
442
|
-
it('reflects active pipelines', async () => {
|
|
443
|
-
const stts = [];
|
|
444
|
-
const mgr = new AudioPipelineManager({
|
|
445
|
-
log: createLogger(),
|
|
446
|
-
voiceConfig: baseVoiceConfig(),
|
|
447
|
-
allowedUserIds: new Set(['111']),
|
|
448
|
-
createDecoder: () => createMockDecoder(),
|
|
449
|
-
createStt: () => {
|
|
450
|
-
const stt = createMockStt();
|
|
451
|
-
stts.push(stt);
|
|
452
|
-
return stt;
|
|
453
|
-
},
|
|
454
|
-
});
|
|
455
|
-
const { connection: conn1 } = createMockConnection();
|
|
456
|
-
const { connection: conn2 } = createMockConnection();
|
|
457
|
-
await mgr.startPipeline('g1', conn1);
|
|
458
|
-
expect(mgr.hasPipeline('g1')).toBe(true);
|
|
459
|
-
expect(mgr.activePipelineCount).toBe(1);
|
|
460
|
-
await mgr.startPipeline('g2', conn2);
|
|
461
|
-
expect(mgr.hasPipeline('g2')).toBe(true);
|
|
462
|
-
expect(mgr.activePipelineCount).toBe(2);
|
|
463
|
-
await mgr.stopPipeline('g1');
|
|
464
|
-
expect(mgr.hasPipeline('g1')).toBe(false);
|
|
465
|
-
expect(mgr.activePipelineCount).toBe(1);
|
|
466
|
-
});
|
|
467
|
-
});
|
|
468
|
-
describe('transcript mirror integration', () => {
|
|
469
|
-
function createMockMirror() {
|
|
470
|
-
return {
|
|
471
|
-
postUserTranscription: vi.fn(async () => { }),
|
|
472
|
-
postBotResponse: vi.fn(async () => { }),
|
|
473
|
-
};
|
|
474
|
-
}
|
|
475
|
-
function createMirrorTts() {
|
|
476
|
-
return {
|
|
477
|
-
synthesize: vi.fn(async function* (_text) {
|
|
478
|
-
yield { buffer: Buffer.alloc(480, 0x42), sampleRate: 24000, channels: 1 };
|
|
479
|
-
}),
|
|
480
|
-
};
|
|
481
|
-
}
|
|
482
|
-
it('calls postUserTranscription for final transcriptions', async () => {
|
|
483
|
-
const stt = createMockStt();
|
|
484
|
-
const mirror = createMockMirror();
|
|
485
|
-
const mgr = new AudioPipelineManager({
|
|
486
|
-
log: createLogger(),
|
|
487
|
-
voiceConfig: baseVoiceConfig(),
|
|
488
|
-
allowedUserIds: new Set(['111']),
|
|
489
|
-
createDecoder: () => createMockDecoder(),
|
|
490
|
-
createStt: () => stt,
|
|
491
|
-
transcriptMirror: mirror,
|
|
492
|
-
});
|
|
493
|
-
const { connection } = createMockConnection();
|
|
494
|
-
await mgr.startPipeline('g1', connection);
|
|
495
|
-
stt.transcriptionCb({ text: 'hello world', isFinal: true, confidence: 0.95 });
|
|
496
|
-
expect(mirror.postUserTranscription).toHaveBeenCalledWith('User', 'hello world');
|
|
497
|
-
});
|
|
498
|
-
it('does not call postUserTranscription for non-final transcriptions', async () => {
|
|
499
|
-
const stt = createMockStt();
|
|
500
|
-
const mirror = createMockMirror();
|
|
501
|
-
const mgr = new AudioPipelineManager({
|
|
502
|
-
log: createLogger(),
|
|
503
|
-
voiceConfig: baseVoiceConfig(),
|
|
504
|
-
allowedUserIds: new Set(['111']),
|
|
505
|
-
createDecoder: () => createMockDecoder(),
|
|
506
|
-
createStt: () => stt,
|
|
507
|
-
transcriptMirror: mirror,
|
|
508
|
-
});
|
|
509
|
-
const { connection } = createMockConnection();
|
|
510
|
-
await mgr.startPipeline('g1', connection);
|
|
511
|
-
stt.transcriptionCb({ text: 'hello', isFinal: false, confidence: 0.5 });
|
|
512
|
-
expect(mirror.postUserTranscription).not.toHaveBeenCalled();
|
|
513
|
-
});
|
|
514
|
-
it('calls postBotResponse when responder gets an AI response', async () => {
|
|
515
|
-
const stt = createMockStt();
|
|
516
|
-
const mirror = createMockMirror();
|
|
517
|
-
const mgr = new AudioPipelineManager({
|
|
518
|
-
log: createLogger(),
|
|
519
|
-
voiceConfig: baseVoiceConfig(),
|
|
520
|
-
allowedUserIds: new Set(['111']),
|
|
521
|
-
createDecoder: () => createMockDecoder(),
|
|
522
|
-
createStt: () => stt,
|
|
523
|
-
invokeAi: async () => 'AI response text',
|
|
524
|
-
createTts: () => createMirrorTts(),
|
|
525
|
-
transcriptMirror: mirror,
|
|
526
|
-
botDisplayName: 'TestBot',
|
|
527
|
-
});
|
|
528
|
-
const { connection } = createMockConnection();
|
|
529
|
-
await mgr.startPipeline('g1', connection);
|
|
530
|
-
stt.transcriptionCb({ text: 'hello bot', isFinal: true, confidence: 0.95 });
|
|
531
|
-
await vi.waitFor(() => {
|
|
532
|
-
expect(mirror.postBotResponse).toHaveBeenCalledWith('TestBot', 'AI response text');
|
|
533
|
-
});
|
|
534
|
-
});
|
|
535
|
-
it('causes no errors when transcript mirror is omitted', async () => {
|
|
536
|
-
const stt = createMockStt();
|
|
537
|
-
const log = createLogger();
|
|
538
|
-
const mgr = new AudioPipelineManager({
|
|
539
|
-
log,
|
|
540
|
-
voiceConfig: baseVoiceConfig(),
|
|
541
|
-
allowedUserIds: new Set(['111']),
|
|
542
|
-
createDecoder: () => createMockDecoder(),
|
|
543
|
-
createStt: () => stt,
|
|
544
|
-
invokeAi: async () => 'response',
|
|
545
|
-
createTts: () => createMirrorTts(),
|
|
546
|
-
// No transcriptMirror
|
|
547
|
-
});
|
|
548
|
-
const { connection } = createMockConnection();
|
|
549
|
-
await mgr.startPipeline('g1', connection);
|
|
550
|
-
stt.transcriptionCb({ text: 'hello', isFinal: true, confidence: 0.95 });
|
|
551
|
-
// Allow async pipeline to settle
|
|
552
|
-
await new Promise((r) => setTimeout(r, 50));
|
|
553
|
-
// No transcript-mirror errors should have been logged
|
|
554
|
-
for (const call of log.warn.mock.calls) {
|
|
555
|
-
expect(call[1]).not.toContain('transcript-mirror');
|
|
556
|
-
}
|
|
557
|
-
for (const call of log.error.mock.calls) {
|
|
558
|
-
expect(call[1]).not.toContain('transcript-mirror');
|
|
559
|
-
}
|
|
560
|
-
});
|
|
561
|
-
});
|
|
562
|
-
describe('barge-in', () => {
|
|
563
|
-
function createMockTts() {
|
|
564
|
-
return {
|
|
565
|
-
synthesize: vi.fn(async function* (_text) {
|
|
566
|
-
yield { buffer: Buffer.alloc(480, 0x42), sampleRate: 24000, channels: 1 };
|
|
567
|
-
}),
|
|
568
|
-
};
|
|
569
|
-
}
|
|
570
|
-
it('calls responder.stop() on non-empty transcription while playing', async () => {
|
|
571
|
-
const stt = createMockStt();
|
|
572
|
-
const log = createLogger();
|
|
573
|
-
const { connection } = createMockConnection();
|
|
574
|
-
const mgr = new AudioPipelineManager({
|
|
575
|
-
log,
|
|
576
|
-
voiceConfig: baseVoiceConfig(),
|
|
577
|
-
allowedUserIds: new Set(['111']),
|
|
578
|
-
createDecoder: () => createMockDecoder(),
|
|
579
|
-
createStt: () => stt,
|
|
580
|
-
invokeAi: async () => 'response',
|
|
581
|
-
createTts: () => createMockTts(),
|
|
582
|
-
});
|
|
583
|
-
await mgr.startPipeline('g1', connection);
|
|
584
|
-
// The pipeline created a VoiceResponder which created a mock player
|
|
585
|
-
const player = lastMockPlayer;
|
|
586
|
-
expect(player).toBeTruthy();
|
|
587
|
-
// Simulate the player being in "playing" state (mid-playback)
|
|
588
|
-
player.state = { status: 'playing' };
|
|
589
|
-
// Non-empty transcription arrives — should trigger barge-in
|
|
590
|
-
stt.transcriptionCb({ text: 'stop that', isFinal: false, confidence: 0.9 });
|
|
591
|
-
expect(player.stop).toHaveBeenCalled();
|
|
592
|
-
expect(log.info).toHaveBeenCalledWith(expect.objectContaining({ guildId: 'g1' }), 'barge-in detected');
|
|
593
|
-
});
|
|
594
|
-
it('does not trigger barge-in for empty transcription while playing (echo case)', async () => {
|
|
595
|
-
const stt = createMockStt();
|
|
596
|
-
const log = createLogger();
|
|
597
|
-
const { connection } = createMockConnection();
|
|
598
|
-
const mgr = new AudioPipelineManager({
|
|
599
|
-
log,
|
|
600
|
-
voiceConfig: baseVoiceConfig(),
|
|
601
|
-
allowedUserIds: new Set(['111']),
|
|
602
|
-
createDecoder: () => createMockDecoder(),
|
|
603
|
-
createStt: () => stt,
|
|
604
|
-
invokeAi: async () => 'response',
|
|
605
|
-
createTts: () => createMockTts(),
|
|
606
|
-
});
|
|
607
|
-
await mgr.startPipeline('g1', connection);
|
|
608
|
-
const player = lastMockPlayer;
|
|
609
|
-
player.state = { status: 'playing' };
|
|
610
|
-
player.stop.mockClear();
|
|
611
|
-
// Empty transcription (echo) — must not trigger barge-in
|
|
612
|
-
stt.transcriptionCb({ text: '', isFinal: false, confidence: 0.0 });
|
|
613
|
-
expect(player.stop).not.toHaveBeenCalled();
|
|
614
|
-
expect(log.info).not.toHaveBeenCalledWith(expect.objectContaining({ guildId: 'g1' }), 'barge-in detected');
|
|
615
|
-
});
|
|
616
|
-
it('triggers barge-in on interim (non-final) non-empty transcription while playing', async () => {
|
|
617
|
-
const stt = createMockStt();
|
|
618
|
-
const log = createLogger();
|
|
619
|
-
const { connection } = createMockConnection();
|
|
620
|
-
const mgr = new AudioPipelineManager({
|
|
621
|
-
log,
|
|
622
|
-
voiceConfig: baseVoiceConfig(),
|
|
623
|
-
allowedUserIds: new Set(['111']),
|
|
624
|
-
createDecoder: () => createMockDecoder(),
|
|
625
|
-
createStt: () => stt,
|
|
626
|
-
invokeAi: async () => 'response',
|
|
627
|
-
createTts: () => createMockTts(),
|
|
628
|
-
});
|
|
629
|
-
await mgr.startPipeline('g1', connection);
|
|
630
|
-
const player = lastMockPlayer;
|
|
631
|
-
player.state = { status: 'playing' };
|
|
632
|
-
// Interim transcription with non-empty text — should trigger barge-in
|
|
633
|
-
stt.transcriptionCb({ text: 'hey wait', isFinal: false, confidence: 0.8 });
|
|
634
|
-
expect(player.stop).toHaveBeenCalled();
|
|
635
|
-
expect(log.info).toHaveBeenCalledWith(expect.objectContaining({ guildId: 'g1' }), 'barge-in detected');
|
|
636
|
-
});
|
|
637
|
-
it('does not interrupt when player is idle', async () => {
|
|
638
|
-
const stt = createMockStt();
|
|
639
|
-
const log = createLogger();
|
|
640
|
-
const { connection } = createMockConnection();
|
|
641
|
-
const mgr = new AudioPipelineManager({
|
|
642
|
-
log,
|
|
643
|
-
voiceConfig: baseVoiceConfig(),
|
|
644
|
-
allowedUserIds: new Set(['111']),
|
|
645
|
-
createDecoder: () => createMockDecoder(),
|
|
646
|
-
createStt: () => stt,
|
|
647
|
-
invokeAi: async () => 'response',
|
|
648
|
-
createTts: () => createMockTts(),
|
|
649
|
-
});
|
|
650
|
-
await mgr.startPipeline('g1', connection);
|
|
651
|
-
const player = lastMockPlayer;
|
|
652
|
-
// Player is idle (default state)
|
|
653
|
-
expect(player.state.status).toBe('idle');
|
|
654
|
-
player.stop.mockClear();
|
|
655
|
-
// Non-empty transcription while idle — no barge-in needed
|
|
656
|
-
stt.transcriptionCb({ text: 'hello', isFinal: false, confidence: 0.9 });
|
|
657
|
-
expect(player.stop).not.toHaveBeenCalled();
|
|
658
|
-
expect(log.info).not.toHaveBeenCalledWith(expect.objectContaining({ guildId: 'g1' }), 'barge-in detected');
|
|
659
|
-
});
|
|
660
|
-
it('works without a responder (no invokeAi configured)', async () => {
|
|
661
|
-
const stt = createMockStt();
|
|
662
|
-
const { connection } = createMockConnection();
|
|
663
|
-
const mgr = new AudioPipelineManager({
|
|
664
|
-
log: createLogger(),
|
|
665
|
-
voiceConfig: baseVoiceConfig(),
|
|
666
|
-
allowedUserIds: new Set(['111']),
|
|
667
|
-
createDecoder: () => createMockDecoder(),
|
|
668
|
-
createStt: () => stt,
|
|
669
|
-
onTranscription: () => { },
|
|
670
|
-
// No invokeAi — no responder created
|
|
671
|
-
});
|
|
672
|
-
await mgr.startPipeline('g1', connection);
|
|
673
|
-
// Non-empty transcription — should not throw even though there's no responder
|
|
674
|
-
expect(() => stt.transcriptionCb({ text: 'hello', isFinal: false, confidence: 0.9 })).not.toThrow();
|
|
675
|
-
});
|
|
676
|
-
});
|
|
677
|
-
describe('gemini-live mode', () => {
|
|
678
|
-
function createGeminiOpts(overrides = {}) {
|
|
679
|
-
return {
|
|
680
|
-
log: createLogger(),
|
|
681
|
-
voiceConfig: baseVoiceConfig(),
|
|
682
|
-
allowedUserIds: new Set(['111']),
|
|
683
|
-
createDecoder: () => createMockDecoder(),
|
|
684
|
-
voiceProvider: 'gemini-live',
|
|
685
|
-
geminiApiKey: 'test-gemini-key',
|
|
686
|
-
...overrides,
|
|
687
|
-
};
|
|
688
|
-
}
|
|
689
|
-
it('creates GeminiLiveProvider and GeminiLiveResponder, skipping STT/TTS', async () => {
|
|
690
|
-
const opts = createGeminiOpts();
|
|
691
|
-
const mgr = new AudioPipelineManager(opts);
|
|
692
|
-
const { connection } = createMockConnection();
|
|
693
|
-
await mgr.startPipeline('g1', connection);
|
|
694
|
-
expect(mgr.hasPipeline('g1')).toBe(true);
|
|
695
|
-
expect(mockGeminiProvider.connect).toHaveBeenCalled();
|
|
696
|
-
expect(mockGeminiResponder.start).toHaveBeenCalled();
|
|
697
|
-
});
|
|
698
|
-
it('passes built systemInstruction into GeminiLiveProvider setup', async () => {
|
|
699
|
-
const buildGeminiSystemInstruction = vi.fn(async () => 'voice system instruction');
|
|
700
|
-
const opts = createGeminiOpts({ buildGeminiSystemInstruction });
|
|
701
|
-
const mgr = new AudioPipelineManager(opts);
|
|
702
|
-
const { connection } = createMockConnection();
|
|
703
|
-
const { GeminiLiveProvider: ProviderMock } = await import('./providers/gemini-live-provider.js');
|
|
704
|
-
await mgr.startPipeline('g1', connection);
|
|
705
|
-
expect(buildGeminiSystemInstruction).toHaveBeenCalled();
|
|
706
|
-
const providerCalls = ProviderMock.mock.calls;
|
|
707
|
-
expect(providerCalls.at(-1)?.[0]).toEqual(expect.objectContaining({
|
|
708
|
-
systemInstruction: 'voice system instruction',
|
|
709
|
-
}));
|
|
710
|
-
});
|
|
711
|
-
it('backfills and seeds initial history into Gemini Live before audio starts', async () => {
|
|
712
|
-
const backfill = vi.fn(async () => [
|
|
713
|
-
{ user: 'first user', assistant: 'first reply' },
|
|
714
|
-
{ user: 'second user', assistant: 'second reply' },
|
|
715
|
-
]);
|
|
716
|
-
const opts = createGeminiOpts({ backfill });
|
|
717
|
-
const mgr = new AudioPipelineManager(opts);
|
|
718
|
-
const { connection } = createMockConnection();
|
|
719
|
-
const { GeminiLiveProvider: ProviderMock } = await import('./providers/gemini-live-provider.js');
|
|
720
|
-
await mgr.startPipeline('g1', connection);
|
|
721
|
-
const providerCalls = ProviderMock.mock.calls;
|
|
722
|
-
expect(providerCalls.at(-1)?.[0]).toEqual(expect.objectContaining({
|
|
723
|
-
initialHistoryInClientContent: true,
|
|
724
|
-
}));
|
|
725
|
-
expect(mockGeminiProvider.sendInitialHistory).toHaveBeenCalledWith([
|
|
726
|
-
{ role: 'user', parts: [{ text: 'first user' }] },
|
|
727
|
-
{ role: 'model', parts: [{ text: 'first reply' }] },
|
|
728
|
-
{ role: 'user', parts: [{ text: 'second user' }] },
|
|
729
|
-
{ role: 'model', parts: [{ text: 'second reply' }] },
|
|
730
|
-
]);
|
|
731
|
-
});
|
|
732
|
-
it('uses synchronous tool declarations for the default 3.1 live model', async () => {
|
|
733
|
-
const opts = createGeminiOpts({ enabledTools: ['Read', 'Bash'] });
|
|
734
|
-
const mgr = new AudioPipelineManager(opts);
|
|
735
|
-
const { connection } = createMockConnection();
|
|
736
|
-
const toolSchemas = await import('../runtime/openai-tool-schemas.js');
|
|
737
|
-
const { GeminiLiveProvider: ProviderMock } = await import('./providers/gemini-live-provider.js');
|
|
738
|
-
await mgr.startPipeline('g1', connection);
|
|
739
|
-
expect(toolSchemas.buildGeminiToolDeclarations).toHaveBeenCalledWith(['Read', 'Bash'], { nonBlocking: false });
|
|
740
|
-
const providerCalls = ProviderMock.mock.calls;
|
|
741
|
-
expect(providerCalls.at(-1)?.[0]).toEqual(expect.objectContaining({
|
|
742
|
-
model: 'gemini-3.1-flash-live-preview',
|
|
743
|
-
}));
|
|
744
|
-
});
|
|
745
|
-
it('preserves NON_BLOCKING tool declarations for explicit 2.5 live models', async () => {
|
|
746
|
-
const opts = createGeminiOpts({
|
|
747
|
-
enabledTools: ['Read'],
|
|
748
|
-
runtimeModel: 'gemini-2.5-flash-live-preview',
|
|
749
|
-
});
|
|
750
|
-
const mgr = new AudioPipelineManager(opts);
|
|
751
|
-
const { connection } = createMockConnection();
|
|
752
|
-
const toolSchemas = await import('../runtime/openai-tool-schemas.js');
|
|
753
|
-
const { GeminiLiveProvider: ProviderMock } = await import('./providers/gemini-live-provider.js');
|
|
754
|
-
await mgr.startPipeline('g1', connection);
|
|
755
|
-
expect(toolSchemas.buildGeminiToolDeclarations).toHaveBeenCalledWith(['Read'], { nonBlocking: true });
|
|
756
|
-
const providerCalls = ProviderMock.mock.calls;
|
|
757
|
-
expect(providerCalls.at(-1)?.[0]).toEqual(expect.objectContaining({
|
|
758
|
-
model: 'gemini-2.5-flash-live-preview',
|
|
759
|
-
}));
|
|
760
|
-
});
|
|
761
|
-
it('calls provider.disconnect() and responder.destroy() on stopPipeline', async () => {
|
|
762
|
-
const opts = createGeminiOpts();
|
|
763
|
-
const mgr = new AudioPipelineManager(opts);
|
|
764
|
-
const { connection } = createMockConnection();
|
|
765
|
-
await mgr.startPipeline('g1', connection);
|
|
766
|
-
await mgr.stopPipeline('g1');
|
|
767
|
-
expect(mockGeminiResponder.destroy).toHaveBeenCalled();
|
|
768
|
-
expect(mockGeminiProvider.disconnect).toHaveBeenCalled();
|
|
769
|
-
expect(mgr.hasPipeline('g1')).toBe(false);
|
|
770
|
-
});
|
|
771
|
-
it('shim feedAudio bridges to provider.sendAudio', async () => {
|
|
772
|
-
const opts = createGeminiOpts();
|
|
773
|
-
const mgr = new AudioPipelineManager(opts);
|
|
774
|
-
const { connection, speakingEmitter, streams } = createMockConnection();
|
|
775
|
-
await mgr.startPipeline('g1', connection);
|
|
776
|
-
// Simulate a user speaking — trigger the receiver to subscribe
|
|
777
|
-
speakingEmitter.emit('start', '111');
|
|
778
|
-
// Feed a packet through the stream to exercise the shim
|
|
779
|
-
const stream = streams.get('111');
|
|
780
|
-
if (stream) {
|
|
781
|
-
stream.emit('data', Buffer.alloc(80));
|
|
782
|
-
}
|
|
783
|
-
// Allow async processing
|
|
784
|
-
await new Promise((r) => setTimeout(r, 20));
|
|
785
|
-
// The shim feedAudio calls provider.sendAudio
|
|
786
|
-
expect(mockGeminiProvider.sendAudio).toHaveBeenCalled();
|
|
787
|
-
});
|
|
788
|
-
it('shim swallows sendAudio errors without crashing', async () => {
|
|
789
|
-
const log = createLogger();
|
|
790
|
-
const opts = createGeminiOpts({ log });
|
|
791
|
-
const mgr = new AudioPipelineManager(opts);
|
|
792
|
-
const { connection, speakingEmitter, streams } = createMockConnection();
|
|
793
|
-
await mgr.startPipeline('g1', connection);
|
|
794
|
-
// Make sendAudio throw
|
|
795
|
-
mockGeminiProvider.sendAudio.mockImplementation(() => {
|
|
796
|
-
throw new Error('WebSocket not open');
|
|
797
|
-
});
|
|
798
|
-
// Simulate a user speaking
|
|
799
|
-
speakingEmitter.emit('start', '111');
|
|
800
|
-
const stream = streams.get('111');
|
|
801
|
-
if (stream) {
|
|
802
|
-
stream.emit('data', Buffer.alloc(80));
|
|
803
|
-
}
|
|
804
|
-
await new Promise((r) => setTimeout(r, 20));
|
|
805
|
-
// Should have logged a warning but not thrown
|
|
806
|
-
expect(log.warn).toHaveBeenCalledWith(expect.objectContaining({ guildId: 'g1' }), 'gemini-live: sendAudio error (non-fatal)');
|
|
807
|
-
});
|
|
808
|
-
it('signals audioStreamEnd when a user speaking burst ends', async () => {
|
|
809
|
-
const opts = createGeminiOpts();
|
|
810
|
-
const mgr = new AudioPipelineManager(opts);
|
|
811
|
-
const { connection, speakingEmitter, streams } = createMockConnection();
|
|
812
|
-
await mgr.startPipeline('g1', connection);
|
|
813
|
-
speakingEmitter.emit('start', '111');
|
|
814
|
-
streams.get('111').emit('end');
|
|
815
|
-
expect(mockGeminiProvider.sendAudioStreamEnd).toHaveBeenCalled();
|
|
816
|
-
});
|
|
817
|
-
it('throws when geminiApiKey is missing', async () => {
|
|
818
|
-
const log = createLogger();
|
|
819
|
-
const opts = createGeminiOpts({ geminiApiKey: undefined, log });
|
|
820
|
-
const mgr = new AudioPipelineManager(opts);
|
|
821
|
-
const { connection } = createMockConnection();
|
|
822
|
-
await mgr.startPipeline('g1', connection);
|
|
823
|
-
expect(mgr.hasPipeline('g1')).toBe(false);
|
|
824
|
-
expect(log.error).toHaveBeenCalledWith(expect.objectContaining({ guildId: 'g1' }), 'failed to start audio pipeline');
|
|
825
|
-
});
|
|
826
|
-
it('default voiceProvider unset uses standard pipeline path', async () => {
|
|
827
|
-
// No voiceProvider set — should use the normal STT path
|
|
828
|
-
const mockStt = createMockStt();
|
|
829
|
-
const opts = createPipelineOpts({ createStt: () => mockStt });
|
|
830
|
-
const mgr = new AudioPipelineManager(opts);
|
|
831
|
-
const { connection } = createMockConnection();
|
|
832
|
-
await mgr.startPipeline('g1', connection);
|
|
833
|
-
expect(mockStt.start).toHaveBeenCalled();
|
|
834
|
-
expect(mgr.hasPipeline('g1')).toBe(true);
|
|
835
|
-
});
|
|
836
|
-
it('wires onBotResponse to transcriptMirror.postBotResponse', async () => {
|
|
837
|
-
const mirror = {
|
|
838
|
-
postUserTranscription: vi.fn(async () => { }),
|
|
839
|
-
postBotResponse: vi.fn(async () => { }),
|
|
840
|
-
};
|
|
841
|
-
const opts = createGeminiOpts({
|
|
842
|
-
transcriptMirror: mirror,
|
|
843
|
-
botDisplayName: 'GeminiBot',
|
|
844
|
-
});
|
|
845
|
-
const mgr = new AudioPipelineManager(opts);
|
|
846
|
-
const { connection } = createMockConnection();
|
|
847
|
-
// Access the GeminiLiveResponder constructor mock to check the onBotResponse option
|
|
848
|
-
const { GeminiLiveResponder: ResponderMock } = await import('./providers/gemini-live-responder.js');
|
|
849
|
-
await mgr.startPipeline('g1', connection);
|
|
850
|
-
// Extract the onBotResponse callback passed to GeminiLiveResponder
|
|
851
|
-
const constructorCalls = ResponderMock.mock.calls;
|
|
852
|
-
const lastCall = constructorCalls[constructorCalls.length - 1];
|
|
853
|
-
const responderOpts = lastCall[0];
|
|
854
|
-
expect(responderOpts.onBotResponse).toBeDefined();
|
|
855
|
-
responderOpts.onBotResponse('Hello from Gemini');
|
|
856
|
-
await vi.waitFor(() => {
|
|
857
|
-
expect(mirror.postBotResponse).toHaveBeenCalledWith('GeminiBot', 'Hello from Gemini');
|
|
858
|
-
});
|
|
859
|
-
});
|
|
860
|
-
it('records completed Gemini Live turns in the local conversation buffer', async () => {
|
|
861
|
-
const backfill = vi.fn(async () => []);
|
|
862
|
-
const opts = createGeminiOpts({ backfill });
|
|
863
|
-
const mgr = new AudioPipelineManager(opts);
|
|
864
|
-
const { connection } = createMockConnection();
|
|
865
|
-
const { GeminiLiveResponder: ResponderMock } = await import('./providers/gemini-live-responder.js');
|
|
866
|
-
await mgr.startPipeline('g1', connection);
|
|
867
|
-
const constructorCalls = ResponderMock.mock.calls;
|
|
868
|
-
const lastCall = constructorCalls[constructorCalls.length - 1];
|
|
869
|
-
const responderOpts = lastCall[0];
|
|
870
|
-
responderOpts.onInputTranscript?.('hello there');
|
|
871
|
-
responderOpts.onBotResponse?.('general kenobi');
|
|
872
|
-
expect(backfill).toHaveBeenCalled();
|
|
873
|
-
const pipeline = mgr.pipelines.get('g1');
|
|
874
|
-
expect(pipeline?.buffer?.getHistory()).toContain('[User]: hello there');
|
|
875
|
-
expect(pipeline?.buffer?.getHistory()).toContain('[Assistant]: general kenobi');
|
|
876
|
-
});
|
|
877
|
-
it('wires onInputTranscript to transcriptMirror.postUserTranscription', async () => {
|
|
878
|
-
const mirror = {
|
|
879
|
-
postUserTranscription: vi.fn(async () => { }),
|
|
880
|
-
postBotResponse: vi.fn(async () => { }),
|
|
881
|
-
};
|
|
882
|
-
const opts = createGeminiOpts({
|
|
883
|
-
transcriptMirror: mirror,
|
|
884
|
-
});
|
|
885
|
-
const mgr = new AudioPipelineManager(opts);
|
|
886
|
-
const { connection } = createMockConnection();
|
|
887
|
-
const { GeminiLiveResponder: ResponderMock } = await import('./providers/gemini-live-responder.js');
|
|
888
|
-
await mgr.startPipeline('g1', connection);
|
|
889
|
-
// Extract the onInputTranscript callback passed to GeminiLiveResponder
|
|
890
|
-
const constructorCalls = ResponderMock.mock.calls;
|
|
891
|
-
const lastCall = constructorCalls[constructorCalls.length - 1];
|
|
892
|
-
const responderOpts = lastCall[0];
|
|
893
|
-
expect(responderOpts.onInputTranscript).toBeDefined();
|
|
894
|
-
responderOpts.onInputTranscript('hello from user');
|
|
895
|
-
await vi.waitFor(() => {
|
|
896
|
-
expect(mirror.postUserTranscription).toHaveBeenCalledWith('User', 'hello from user');
|
|
897
|
-
});
|
|
898
|
-
});
|
|
899
|
-
// -----------------------------------------------------------------------
|
|
900
|
-
// Tool call dispatch tests
|
|
901
|
-
// -----------------------------------------------------------------------
|
|
902
|
-
/** Helper: extract the onToolCall callback from the GeminiLiveResponder constructor mock. */
|
|
903
|
-
async function extractOnToolCall(overrides = {}) {
|
|
904
|
-
const opts = createGeminiOpts({
|
|
905
|
-
enabledTools: ['Read', 'Bash'],
|
|
906
|
-
runtimeCwd: '/fake/cwd',
|
|
907
|
-
...overrides,
|
|
908
|
-
});
|
|
909
|
-
const mgr = new AudioPipelineManager(opts);
|
|
910
|
-
const { connection } = createMockConnection();
|
|
911
|
-
const { GeminiLiveResponder: ResponderMock } = await import('./providers/gemini-live-responder.js');
|
|
912
|
-
await mgr.startPipeline('g1', connection);
|
|
913
|
-
const constructorCalls = ResponderMock.mock.calls;
|
|
914
|
-
const lastCall = constructorCalls[constructorCalls.length - 1];
|
|
915
|
-
const responderOpts = lastCall[0];
|
|
916
|
-
return responderOpts.onToolCall;
|
|
917
|
-
}
|
|
918
|
-
it('dispatches tool call to executor and sends response back via provider', async () => {
|
|
919
|
-
mockExecuteToolCall.mockResolvedValueOnce({ result: 'file contents here', ok: true });
|
|
920
|
-
const onToolCall = await extractOnToolCall();
|
|
921
|
-
onToolCall([{ id: 'tc-1', name: 'Read', args: { file_path: '/foo.txt' } }]);
|
|
922
|
-
await vi.waitFor(() => {
|
|
923
|
-
expect(mockExecuteToolCall).toHaveBeenCalledWith('Read', { file_path: '/foo.txt' }, ['/fake/cwd'], expect.any(Function), expect.objectContaining({ enableHybridPipeline: false }));
|
|
924
|
-
expect(mockGeminiProvider.sendToolResponse).toHaveBeenCalledWith([
|
|
925
|
-
{ id: 'tc-1', name: 'Read', output: 'file contents here' },
|
|
926
|
-
]);
|
|
927
|
-
});
|
|
928
|
-
});
|
|
929
|
-
it('returns error result string to Gemini when executor throws', async () => {
|
|
930
|
-
mockExecuteToolCall.mockRejectedValueOnce(new Error('permission denied'));
|
|
931
|
-
const onToolCall = await extractOnToolCall();
|
|
932
|
-
onToolCall([{ id: 'tc-err', name: 'Read', args: {} }]);
|
|
933
|
-
await vi.waitFor(() => {
|
|
934
|
-
expect(mockGeminiProvider.sendToolResponse).toHaveBeenCalledWith([
|
|
935
|
-
{ id: 'tc-err', name: 'Read', output: 'Error: permission denied' },
|
|
936
|
-
]);
|
|
937
|
-
});
|
|
938
|
-
});
|
|
939
|
-
it('returns error for unknown tool name (executor rejects non-allowlisted tool)', async () => {
|
|
940
|
-
mockExecuteToolCall.mockResolvedValueOnce({ result: 'Tool not allowed: UnknownTool', ok: false });
|
|
941
|
-
const onToolCall = await extractOnToolCall();
|
|
942
|
-
onToolCall([{ id: 'tc-unk', name: 'UnknownTool', args: {} }]);
|
|
943
|
-
await vi.waitFor(() => {
|
|
944
|
-
expect(mockGeminiProvider.sendToolResponse).toHaveBeenCalledWith([
|
|
945
|
-
{ id: 'tc-unk', name: 'UnknownTool', output: 'Tool not allowed: UnknownTool' },
|
|
946
|
-
]);
|
|
947
|
-
});
|
|
948
|
-
});
|
|
949
|
-
it('dispatches multiple function calls and sends all responses in one sendToolResponse', async () => {
|
|
950
|
-
mockExecuteToolCall
|
|
951
|
-
.mockResolvedValueOnce({ result: 'result-A', ok: true })
|
|
952
|
-
.mockResolvedValueOnce({ result: 'result-B', ok: true });
|
|
953
|
-
const onToolCall = await extractOnToolCall();
|
|
954
|
-
onToolCall([
|
|
955
|
-
{ id: 'tc-a', name: 'Read', args: { file_path: '/a.txt' } },
|
|
956
|
-
{ id: 'tc-b', name: 'Bash', args: { command: 'ls' } },
|
|
957
|
-
]);
|
|
958
|
-
await vi.waitFor(() => {
|
|
959
|
-
expect(mockExecuteToolCall).toHaveBeenCalledTimes(2);
|
|
960
|
-
expect(mockGeminiProvider.sendToolResponse).toHaveBeenCalledWith([
|
|
961
|
-
{ id: 'tc-a', name: 'Read', output: 'result-A' },
|
|
962
|
-
{ id: 'tc-b', name: 'Bash', output: 'result-B' },
|
|
963
|
-
]);
|
|
964
|
-
});
|
|
965
|
-
});
|
|
966
|
-
it('catches and logs sendToolResponse throw without crashing', async () => {
|
|
967
|
-
mockExecuteToolCall.mockResolvedValueOnce({ result: 'ok', ok: true });
|
|
968
|
-
const log = createLogger();
|
|
969
|
-
const onToolCall = await extractOnToolCall({ log });
|
|
970
|
-
mockGeminiProvider.sendToolResponse.mockImplementation(() => {
|
|
971
|
-
throw new Error('WebSocket closed');
|
|
972
|
-
});
|
|
973
|
-
onToolCall([{ id: 'tc-disc', name: 'Read', args: {} }]);
|
|
974
|
-
await vi.waitFor(() => {
|
|
975
|
-
expect(log.warn).toHaveBeenCalledWith(expect.objectContaining({ guildId: 'g1' }), 'gemini-live: sendToolResponse failed (provider likely disconnected)');
|
|
976
|
-
});
|
|
977
|
-
});
|
|
978
|
-
// -----------------------------------------------------------------------
|
|
979
|
-
// Scheduled tool responses
|
|
980
|
-
// -----------------------------------------------------------------------
|
|
981
|
-
it('ignores silent tool scheduling on the default 3.1 live model', async () => {
|
|
982
|
-
mockExecuteToolCall.mockResolvedValue({ result: 'memory contents', ok: true });
|
|
983
|
-
const log = createLogger();
|
|
984
|
-
const opts = createGeminiOpts({
|
|
985
|
-
log,
|
|
986
|
-
enabledTools: ['Read', 'MemoryQuery'],
|
|
987
|
-
silentTools: ['MemoryQuery'],
|
|
988
|
-
runtimeCwd: '/fake/cwd',
|
|
989
|
-
});
|
|
990
|
-
const mgr = new AudioPipelineManager(opts);
|
|
991
|
-
const { connection } = createMockConnection();
|
|
992
|
-
const { GeminiLiveResponder: ResponderMock } = await import('./providers/gemini-live-responder.js');
|
|
993
|
-
await mgr.startPipeline('g1', connection);
|
|
994
|
-
const constructorCalls = ResponderMock.mock.calls;
|
|
995
|
-
const lastCall = constructorCalls[constructorCalls.length - 1];
|
|
996
|
-
const responderOpts = lastCall[0];
|
|
997
|
-
// Dispatch a SILENT tool call
|
|
998
|
-
responderOpts.onToolCall([{ id: 'tc-silent', name: 'MemoryQuery', args: { key: 'test' } }]);
|
|
999
|
-
await vi.waitFor(() => {
|
|
1000
|
-
expect(mockGeminiProvider.sendToolResponse).toHaveBeenCalledWith([
|
|
1001
|
-
{ id: 'tc-silent', name: 'MemoryQuery', output: 'memory contents' },
|
|
1002
|
-
]);
|
|
1003
|
-
});
|
|
1004
|
-
expect(log.info).toHaveBeenCalledWith(expect.objectContaining({
|
|
1005
|
-
guildId: 'g1',
|
|
1006
|
-
model: 'gemini-3.1-flash-live-preview',
|
|
1007
|
-
count: 1,
|
|
1008
|
-
}), 'gemini-live: current model does not support scheduled tool responses; silent tool scheduling disabled');
|
|
1009
|
-
});
|
|
1010
|
-
it('sends SILENT-scheduled tool responses for explicit 2.5 live models', async () => {
|
|
1011
|
-
mockExecuteToolCall
|
|
1012
|
-
.mockResolvedValueOnce({ result: 'memory data', ok: true });
|
|
1013
|
-
const log = createLogger();
|
|
1014
|
-
const opts = createGeminiOpts({
|
|
1015
|
-
log,
|
|
1016
|
-
enabledTools: ['MemoryQuery'],
|
|
1017
|
-
silentTools: ['MemoryQuery'],
|
|
1018
|
-
runtimeCwd: '/fake/cwd',
|
|
1019
|
-
runtimeModel: 'gemini-2.5-flash-live-preview',
|
|
1020
|
-
});
|
|
1021
|
-
const mgr = new AudioPipelineManager(opts);
|
|
1022
|
-
const { connection } = createMockConnection();
|
|
1023
|
-
const { GeminiLiveResponder: ResponderMock } = await import('./providers/gemini-live-responder.js');
|
|
1024
|
-
await mgr.startPipeline('g1', connection);
|
|
1025
|
-
const constructorCalls = ResponderMock.mock.calls;
|
|
1026
|
-
const lastCall = constructorCalls[constructorCalls.length - 1];
|
|
1027
|
-
const responderOpts = lastCall[0];
|
|
1028
|
-
responderOpts.onToolCall([
|
|
1029
|
-
{ id: 'tc-mem', name: 'MemoryQuery', args: { key: 'test' } },
|
|
1030
|
-
]);
|
|
1031
|
-
await vi.waitFor(() => {
|
|
1032
|
-
expect(mockExecuteToolCall).toHaveBeenCalledTimes(1);
|
|
1033
|
-
expect(log.info).toHaveBeenCalledWith(expect.objectContaining({ guildId: 'g1', count: 1 }), 'gemini-live: SILENT tool execution complete — results scheduled silently');
|
|
1034
|
-
expect(mockGeminiProvider.sendToolResponse).toHaveBeenCalledWith([
|
|
1035
|
-
{ id: 'tc-mem', name: 'MemoryQuery', output: 'memory data', scheduling: 'SILENT' },
|
|
1036
|
-
]);
|
|
1037
|
-
});
|
|
1038
|
-
});
|
|
1039
|
-
// -----------------------------------------------------------------------
|
|
1040
|
-
// Fallback to standard pipeline
|
|
1041
|
-
// -----------------------------------------------------------------------
|
|
1042
|
-
it('does not fall back when initial gemini-live connection fails (fallback disabled)', async () => {
|
|
1043
|
-
const { GeminiLiveProvider: ProviderMock } = await import('./providers/gemini-live-provider.js');
|
|
1044
|
-
// Make the next provider's connect() reject
|
|
1045
|
-
ProviderMock.mockImplementationOnce(() => {
|
|
1046
|
-
mockGeminiProvider = {
|
|
1047
|
-
connect: vi.fn(async () => { throw new Error('connection refused'); }),
|
|
1048
|
-
disconnect: vi.fn(async () => { }),
|
|
1049
|
-
sendAudio: vi.fn(),
|
|
1050
|
-
sendAudioStreamEnd: vi.fn(),
|
|
1051
|
-
sendInitialHistory: vi.fn(),
|
|
1052
|
-
sendToolResponse: vi.fn(),
|
|
1053
|
-
onEvent: vi.fn(),
|
|
1054
|
-
state: 'idle',
|
|
1055
|
-
};
|
|
1056
|
-
return mockGeminiProvider;
|
|
1057
|
-
});
|
|
1058
|
-
const log = createLogger();
|
|
1059
|
-
const opts = createGeminiOpts({ log });
|
|
1060
|
-
const mgr = new AudioPipelineManager(opts);
|
|
1061
|
-
const { connection } = createMockConnection();
|
|
1062
|
-
await mgr.startPipeline('g1', connection);
|
|
1063
|
-
expect(mgr.hasPipeline('g1')).toBe(false);
|
|
1064
|
-
expect(log.error).toHaveBeenCalledWith(expect.objectContaining({ guildId: 'g1' }), 'gemini-live: connection failed — no fallback (fallback disabled)');
|
|
1065
|
-
});
|
|
1066
|
-
it('does not fall back when onSessionTerminated is triggered (fallback disabled)', async () => {
|
|
1067
|
-
const log = createLogger();
|
|
1068
|
-
const opts = createGeminiOpts({ log });
|
|
1069
|
-
const mgr = new AudioPipelineManager(opts);
|
|
1070
|
-
const { connection } = createMockConnection();
|
|
1071
|
-
const { GeminiLiveResponder: ResponderMock } = await import('./providers/gemini-live-responder.js');
|
|
1072
|
-
await mgr.startPipeline('g1', connection);
|
|
1073
|
-
expect(mgr.pipelineMode('g1')).toBe('gemini-live');
|
|
1074
|
-
// Extract onSessionTerminated callback
|
|
1075
|
-
const constructorCalls = ResponderMock.mock.calls;
|
|
1076
|
-
const lastCall = constructorCalls[constructorCalls.length - 1];
|
|
1077
|
-
const responderOpts = lastCall[0];
|
|
1078
|
-
expect(responderOpts.onSessionTerminated).toBeDefined();
|
|
1079
|
-
responderOpts.onSessionTerminated();
|
|
1080
|
-
// Pipeline should NOT switch to standard mode — it stays as gemini-live (or gets stopped)
|
|
1081
|
-
expect(log.error).toHaveBeenCalledWith(expect.objectContaining({ guildId: 'g1' }), 'gemini-live session terminally failed — no fallback (fallback disabled)');
|
|
1082
|
-
});
|
|
1083
|
-
it('does not fall back when onFallbackRecommended is triggered (fallback disabled)', async () => {
|
|
1084
|
-
const log = createLogger();
|
|
1085
|
-
const opts = createGeminiOpts({ log });
|
|
1086
|
-
const mgr = new AudioPipelineManager(opts);
|
|
1087
|
-
const { connection } = createMockConnection();
|
|
1088
|
-
const { GeminiLiveResponder: ResponderMock } = await import('./providers/gemini-live-responder.js');
|
|
1089
|
-
await mgr.startPipeline('g1', connection);
|
|
1090
|
-
// Extract onFallbackRecommended callback
|
|
1091
|
-
const constructorCalls = ResponderMock.mock.calls;
|
|
1092
|
-
const lastCall = constructorCalls[constructorCalls.length - 1];
|
|
1093
|
-
const responderOpts = lastCall[0];
|
|
1094
|
-
expect(responderOpts.onFallbackRecommended).toBeDefined();
|
|
1095
|
-
responderOpts.onFallbackRecommended('exhausted reconnect retries');
|
|
1096
|
-
// Pipeline should NOT switch to standard mode
|
|
1097
|
-
expect(log.warn).toHaveBeenCalledWith(expect.objectContaining({ guildId: 'g1', reason: 'exhausted reconnect retries' }), 'gemini-live: fallback recommended but fallback is disabled');
|
|
1098
|
-
});
|
|
1099
|
-
});
|
|
1100
|
-
});
|