discoclaw 1.2.4 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (87) hide show
  1. package/.context/voice.md +30 -2
  2. package/.env.example +7 -3
  3. package/.env.example.full +13 -32
  4. package/README.md +1 -1
  5. package/dist/cli/dashboard.js +7 -1
  6. package/dist/cli/dashboard.test.js +0 -4
  7. package/dist/cli/init-wizard.js +4 -8
  8. package/dist/cli/init-wizard.test.js +4 -10
  9. package/dist/config.js +5 -38
  10. package/dist/config.test.js +8 -72
  11. package/dist/cron/executor.js +72 -1
  12. package/dist/dashboard/api/metrics.js +7 -0
  13. package/dist/dashboard/api/metrics.test.js +16 -0
  14. package/dist/dashboard/api/traces.js +14 -0
  15. package/dist/dashboard/api/traces.test.js +40 -0
  16. package/dist/dashboard/page.js +187 -8
  17. package/dist/dashboard/server.js +82 -19
  18. package/dist/dashboard/server.test.js +123 -10
  19. package/dist/discord/actions.js +112 -6
  20. package/dist/discord/actions.test.js +117 -1
  21. package/dist/discord/deferred-runner.js +306 -219
  22. package/dist/discord/help-command.js +1 -1
  23. package/dist/discord/message-coordinator.js +4 -36
  24. package/dist/discord/models-command.js +1 -1
  25. package/dist/discord/reaction-handler.js +83 -5
  26. package/dist/discord/reaction-handler.test.js +55 -0
  27. package/dist/discord/verify-push.js +31 -36
  28. package/dist/discord/verify-push.test.js +34 -6
  29. package/dist/discord/voice-command.js +1 -31
  30. package/dist/discord/voice-command.test.js +21 -259
  31. package/dist/discord/voice-status-command.js +3 -22
  32. package/dist/discord/voice-status-command.test.js +16 -124
  33. package/dist/discord-followup.test.js +133 -0
  34. package/dist/health/config-doctor.js +5 -27
  35. package/dist/health/config-doctor.test.js +1 -4
  36. package/dist/index.js +15 -28
  37. package/dist/observability/trace-store.js +56 -0
  38. package/dist/observability/trace-utils.js +31 -0
  39. package/dist/runtime/codex-cli.js +3 -2
  40. package/dist/runtime/codex-cli.test.js +33 -0
  41. package/dist/runtime/model-tiers.js +1 -1
  42. package/dist/runtime/model-tiers.test.js +9 -0
  43. package/dist/runtime/openai-tool-schemas.js +17 -0
  44. package/dist/runtime-overrides.js +2 -3
  45. package/dist/runtime-overrides.test.js +27 -193
  46. package/dist/tasks/store.js +10 -6
  47. package/dist/tasks/store.test.js +44 -0
  48. package/dist/tasks/task-action-executor.test.js +162 -50
  49. package/dist/tasks/task-action-mutations.js +22 -2
  50. package/dist/tasks/task-action-read-ops.js +7 -1
  51. package/dist/tasks/task-action-runner-types.js +19 -1
  52. package/dist/voice/audio-pipeline.js +183 -96
  53. package/dist/voice/audio-receiver.js +8 -0
  54. package/dist/voice/audio-receiver.test.js +16 -0
  55. package/dist/voice/conversation-buffer.js +16 -6
  56. package/dist/voice/providers/gemini-live-provider.js +481 -0
  57. package/dist/voice/providers/gemini-live-provider.test.js +834 -0
  58. package/dist/voice/providers/gemini-live-responder.js +267 -0
  59. package/dist/voice/providers/gemini-live-responder.test.js +615 -0
  60. package/dist/voice/providers/gemini-live-token-estimator.js +100 -0
  61. package/dist/voice/providers/gemini-live-token-estimator.test.js +160 -0
  62. package/dist/voice/providers/gemini-live-types.js +32 -0
  63. package/dist/voice/providers/gemini-tool-mapper.js +91 -0
  64. package/dist/voice/providers/gemini-tool-mapper.test.js +253 -0
  65. package/dist/voice/providers/index.js +3 -0
  66. package/dist/voice/voice-prompt-builder.js +26 -17
  67. package/dist/voice/voice-prompt-builder.test.js +16 -1
  68. package/docs/configuration.md +4 -9
  69. package/docs/official-docs.md +6 -9
  70. package/docs/runtime-switching.md +1 -1
  71. package/package.json +1 -1
  72. package/dist/voice/audio-pipeline.test.js +0 -619
  73. package/dist/voice/stt-deepgram.js +0 -154
  74. package/dist/voice/stt-deepgram.test.js +0 -275
  75. package/dist/voice/stt-factory.js +0 -42
  76. package/dist/voice/stt-factory.test.js +0 -45
  77. package/dist/voice/stt-openai.js +0 -156
  78. package/dist/voice/stt-openai.test.js +0 -281
  79. package/dist/voice/tts-cartesia.js +0 -169
  80. package/dist/voice/tts-cartesia.test.js +0 -228
  81. package/dist/voice/tts-deepgram.js +0 -84
  82. package/dist/voice/tts-deepgram.test.js +0 -220
  83. package/dist/voice/tts-factory.js +0 -52
  84. package/dist/voice/tts-factory.test.js +0 -53
  85. package/dist/voice/tts-openai.js +0 -70
  86. package/dist/voice/tts-openai.test.js +0 -138
  87. package/dist/voice/types.test.js +0 -84
@@ -1,619 +0,0 @@
1
- import { describe, it, expect, vi, beforeEach } from 'vitest';
2
- import { EventEmitter } from 'node:events';
3
- import { AudioPipelineManager } from './audio-pipeline.js';
4
- // ---------------------------------------------------------------------------
5
- // Mock @discordjs/voice — includes AudioPlayer infrastructure for responder
6
- // ---------------------------------------------------------------------------
7
- /** Track the last mock player created so tests can manipulate its state. */
8
- let lastMockPlayer = null;
9
- function makeMockPlayer() {
10
- const emitter = new EventEmitter();
11
- const player = {
12
- state: { status: 'idle' },
13
- play: vi.fn(() => {
14
- const old = { ...player.state };
15
- player.state = { status: 'playing' };
16
- emitter.emit('stateChange', old, player.state);
17
- }),
18
- stop: vi.fn(() => {
19
- if (player.state.status !== 'idle') {
20
- const old = { ...player.state };
21
- player.state = { status: 'idle' };
22
- emitter.emit('stateChange', old, player.state);
23
- }
24
- }),
25
- on: vi.fn((event, listener) => {
26
- emitter.on(event, listener);
27
- return player;
28
- }),
29
- removeListener: vi.fn((event, listener) => {
30
- emitter.removeListener(event, listener);
31
- return player;
32
- }),
33
- };
34
- return player;
35
- }
36
- vi.mock('@discordjs/voice', () => ({
37
- VoiceConnectionStatus: {
38
- Signalling: 'signalling',
39
- Connecting: 'connecting',
40
- Ready: 'ready',
41
- Disconnected: 'disconnected',
42
- Destroyed: 'destroyed',
43
- },
44
- EndBehaviorType: { Manual: 0, AfterSilence: 1, AfterInactivity: 2 },
45
- AudioPlayerStatus: {
46
- Idle: 'idle',
47
- Playing: 'playing',
48
- Buffering: 'buffering',
49
- Paused: 'paused',
50
- AutoPaused: 'autopaused',
51
- },
52
- StreamType: { Raw: 'raw' },
53
- createAudioPlayer: vi.fn(() => {
54
- lastMockPlayer = makeMockPlayer();
55
- return lastMockPlayer;
56
- }),
57
- createAudioResource: vi.fn(() => ({ type: 'mock-resource' })),
58
- }));
59
- // We don't want real stt-factory or audio-receiver internals — the pipeline
60
- // injects a createStt override and AudioReceiver is tested separately.
61
- // However we do import AudioReceiver for real so the wiring is exercised.
62
- // ---------------------------------------------------------------------------
63
- // Helpers
64
- // ---------------------------------------------------------------------------
65
- function createLogger() {
66
- return { info: vi.fn(), warn: vi.fn(), error: vi.fn() };
67
- }
68
- function createMockStt() {
69
- const stt = {
70
- transcriptionCb: null,
71
- start: vi.fn(async () => { }),
72
- feedAudio: vi.fn((_frame) => { }),
73
- onTranscription: vi.fn((cb) => {
74
- stt.transcriptionCb = cb;
75
- }),
76
- stop: vi.fn(async () => { }),
77
- };
78
- return stt;
79
- }
80
- function createMockDecoder() {
81
- return {
82
- decode: vi.fn((_packet) => Buffer.alloc(960 * 2 * 2)),
83
- destroy: vi.fn(),
84
- };
85
- }
86
- function createMockConnection() {
87
- const stateListeners = [];
88
- const speakingEmitter = new EventEmitter();
89
- const subscriptions = new Map();
90
- const streams = new Map();
91
- const conn = {
92
- state: { status: 'signalling' },
93
- /** Top-level subscribe (used by VoiceResponder to attach AudioPlayer). */
94
- subscribe: vi.fn(),
95
- receiver: {
96
- speaking: speakingEmitter,
97
- subscriptions,
98
- subscribe: vi.fn((userId) => {
99
- const stream = new EventEmitter();
100
- streams.set(userId, stream);
101
- subscriptions.set(userId, stream);
102
- return stream;
103
- }),
104
- },
105
- on: vi.fn((event, listener) => {
106
- if (event === 'stateChange')
107
- stateListeners.push(listener);
108
- return conn;
109
- }),
110
- _transition(status) {
111
- const old = { ...conn.state };
112
- conn.state = { status };
113
- for (const l of stateListeners)
114
- l(old, conn.state);
115
- },
116
- };
117
- return {
118
- connection: conn,
119
- _transition: conn._transition.bind(conn),
120
- speakingEmitter,
121
- subscriptions,
122
- streams,
123
- };
124
- }
125
- function baseVoiceConfig(overrides = {}) {
126
- return {
127
- enabled: true,
128
- sttProvider: 'deepgram',
129
- ttsProvider: 'cartesia',
130
- deepgramApiKey: 'test-key',
131
- ...overrides,
132
- };
133
- }
134
- function createPipelineOpts(overrides = {}) {
135
- const mockStt = createMockStt();
136
- return {
137
- mockStt,
138
- log: createLogger(),
139
- voiceConfig: baseVoiceConfig(),
140
- allowedUserIds: new Set(['111', '222']),
141
- createDecoder: () => createMockDecoder(),
142
- createStt: () => mockStt,
143
- ...overrides,
144
- };
145
- }
146
- // ---------------------------------------------------------------------------
147
- // Tests
148
- // ---------------------------------------------------------------------------
149
- beforeEach(() => {
150
- vi.clearAllMocks();
151
- lastMockPlayer = null;
152
- });
153
- describe('AudioPipelineManager', () => {
154
- describe('startPipeline / stopPipeline', () => {
155
- it('starts STT and receiver for a guild', async () => {
156
- const opts = createPipelineOpts();
157
- const mgr = new AudioPipelineManager(opts);
158
- const { connection } = createMockConnection();
159
- await mgr.startPipeline('g1', connection);
160
- expect(opts.mockStt.start).toHaveBeenCalled();
161
- expect(mgr.hasPipeline('g1')).toBe(true);
162
- expect(mgr.activePipelineCount).toBe(1);
163
- });
164
- it('stopPipeline stops receiver and STT', async () => {
165
- const opts = createPipelineOpts();
166
- const mgr = new AudioPipelineManager(opts);
167
- const { connection } = createMockConnection();
168
- await mgr.startPipeline('g1', connection);
169
- await mgr.stopPipeline('g1');
170
- expect(opts.mockStt.stop).toHaveBeenCalled();
171
- expect(mgr.hasPipeline('g1')).toBe(false);
172
- expect(mgr.activePipelineCount).toBe(0);
173
- });
174
- it('stopPipeline is a no-op for unknown guild', async () => {
175
- const opts = createPipelineOpts();
176
- const mgr = new AudioPipelineManager(opts);
177
- // Should not throw
178
- await mgr.stopPipeline('unknown');
179
- expect(mgr.activePipelineCount).toBe(0);
180
- });
181
- it('startPipeline stops existing pipeline before restarting', async () => {
182
- const stts = [];
183
- const opts = createPipelineOpts({
184
- createStt: () => {
185
- const stt = createMockStt();
186
- stts.push(stt);
187
- return stt;
188
- },
189
- });
190
- const mgr = new AudioPipelineManager(opts);
191
- const { connection } = createMockConnection();
192
- await mgr.startPipeline('g1', connection);
193
- await mgr.startPipeline('g1', connection);
194
- // First STT should have been stopped
195
- expect(stts[0].stop).toHaveBeenCalled();
196
- // Second STT should be started
197
- expect(stts[1].start).toHaveBeenCalled();
198
- expect(mgr.activePipelineCount).toBe(1);
199
- });
200
- it('logs error and does not add pipeline if STT start fails', async () => {
201
- const log = createLogger();
202
- const failingStt = createMockStt();
203
- failingStt.start.mockRejectedValue(new Error('stt connect failed'));
204
- const mgr = new AudioPipelineManager({
205
- log,
206
- voiceConfig: baseVoiceConfig(),
207
- allowedUserIds: new Set(['111']),
208
- createDecoder: () => createMockDecoder(),
209
- createStt: () => failingStt,
210
- });
211
- const { connection } = createMockConnection();
212
- await mgr.startPipeline('g1', connection);
213
- expect(mgr.hasPipeline('g1')).toBe(false);
214
- expect(log.error).toHaveBeenCalledWith(expect.objectContaining({ guildId: 'g1' }), 'failed to start audio pipeline');
215
- });
216
- it('logs error if STT stop throws but still removes pipeline', async () => {
217
- const log = createLogger();
218
- const stt = createMockStt();
219
- stt.stop.mockRejectedValue(new Error('stop failed'));
220
- const mgr = new AudioPipelineManager({
221
- log,
222
- voiceConfig: baseVoiceConfig(),
223
- allowedUserIds: new Set(['111']),
224
- createDecoder: () => createMockDecoder(),
225
- createStt: () => stt,
226
- });
227
- const { connection } = createMockConnection();
228
- await mgr.startPipeline('g1', connection);
229
- await mgr.stopPipeline('g1');
230
- expect(mgr.hasPipeline('g1')).toBe(false);
231
- expect(log.error).toHaveBeenCalledWith(expect.objectContaining({ guildId: 'g1' }), 'error stopping STT provider');
232
- });
233
- });
234
- describe('attach', () => {
235
- it('starts pipeline when connection transitions to Ready', async () => {
236
- const opts = createPipelineOpts();
237
- const mgr = new AudioPipelineManager(opts);
238
- const { connection, _transition } = createMockConnection();
239
- mgr.attach('g1', connection);
240
- _transition('ready');
241
- // Allow async handler to settle
242
- await vi.waitFor(() => {
243
- expect(opts.mockStt.start).toHaveBeenCalled();
244
- });
245
- expect(mgr.hasPipeline('g1')).toBe(true);
246
- });
247
- it('stops pipeline when connection transitions to Destroyed', async () => {
248
- const opts = createPipelineOpts();
249
- const mgr = new AudioPipelineManager(opts);
250
- const { connection, _transition } = createMockConnection();
251
- mgr.attach('g1', connection);
252
- _transition('ready');
253
- await vi.waitFor(() => {
254
- expect(mgr.hasPipeline('g1')).toBe(true);
255
- });
256
- _transition('destroyed');
257
- await vi.waitFor(() => {
258
- expect(mgr.hasPipeline('g1')).toBe(false);
259
- });
260
- expect(opts.mockStt.stop).toHaveBeenCalled();
261
- });
262
- it('ignores non-Ready/Destroyed transitions', async () => {
263
- const opts = createPipelineOpts();
264
- const mgr = new AudioPipelineManager(opts);
265
- const { connection, _transition } = createMockConnection();
266
- mgr.attach('g1', connection);
267
- _transition('connecting');
268
- // Give it a tick to settle
269
- await new Promise((r) => setTimeout(r, 10));
270
- expect(mgr.hasPipeline('g1')).toBe(false);
271
- expect(opts.mockStt.start).not.toHaveBeenCalled();
272
- });
273
- });
274
- describe('stopAll', () => {
275
- it('stops all active pipelines', async () => {
276
- const stts = [];
277
- const mgr = new AudioPipelineManager({
278
- log: createLogger(),
279
- voiceConfig: baseVoiceConfig(),
280
- allowedUserIds: new Set(['111']),
281
- createDecoder: () => createMockDecoder(),
282
- createStt: () => {
283
- const stt = createMockStt();
284
- stts.push(stt);
285
- return stt;
286
- },
287
- });
288
- const { connection: conn1 } = createMockConnection();
289
- const { connection: conn2 } = createMockConnection();
290
- await mgr.startPipeline('g1', conn1);
291
- await mgr.startPipeline('g2', conn2);
292
- expect(mgr.activePipelineCount).toBe(2);
293
- await mgr.stopAll();
294
- expect(mgr.activePipelineCount).toBe(0);
295
- expect(stts[0].stop).toHaveBeenCalled();
296
- expect(stts[1].stop).toHaveBeenCalled();
297
- });
298
- it('is a no-op when no pipelines are active', async () => {
299
- const mgr = new AudioPipelineManager(createPipelineOpts());
300
- await mgr.stopAll(); // should not throw
301
- expect(mgr.activePipelineCount).toBe(0);
302
- });
303
- });
304
- describe('onTranscription callback', () => {
305
- it('forwards transcription results with guildId', async () => {
306
- const transcriptions = [];
307
- const stt = createMockStt();
308
- const mgr = new AudioPipelineManager({
309
- log: createLogger(),
310
- voiceConfig: baseVoiceConfig(),
311
- allowedUserIds: new Set(['111']),
312
- createDecoder: () => createMockDecoder(),
313
- createStt: () => stt,
314
- onTranscription: (guildId, result) => {
315
- transcriptions.push({ guildId, result });
316
- },
317
- });
318
- const { connection } = createMockConnection();
319
- await mgr.startPipeline('g1', connection);
320
- // STT onTranscription should have been wired up
321
- expect(stt.onTranscription).toHaveBeenCalled();
322
- // Simulate a transcription from the STT provider
323
- const result = {
324
- text: 'hello world',
325
- isFinal: true,
326
- confidence: 0.95,
327
- };
328
- stt.transcriptionCb(result);
329
- expect(transcriptions).toHaveLength(1);
330
- expect(transcriptions[0]).toEqual({ guildId: 'g1', result });
331
- });
332
- it('does not wire onTranscription when no callback is provided', async () => {
333
- const stt = createMockStt();
334
- const mgr = new AudioPipelineManager({
335
- log: createLogger(),
336
- voiceConfig: baseVoiceConfig(),
337
- allowedUserIds: new Set(['111']),
338
- createDecoder: () => createMockDecoder(),
339
- createStt: () => stt,
340
- });
341
- const { connection } = createMockConnection();
342
- await mgr.startPipeline('g1', connection);
343
- expect(stt.onTranscription).not.toHaveBeenCalled();
344
- });
345
- });
346
- describe('re-entrancy guard', () => {
347
- it('prevents infinite recursion when startPipeline is re-entered', async () => {
348
- const stt = createMockStt();
349
- let startCount = 0;
350
- const mgr = new AudioPipelineManager({
351
- log: createLogger(),
352
- voiceConfig: baseVoiceConfig(),
353
- allowedUserIds: new Set(['111']),
354
- createDecoder: () => createMockDecoder(),
355
- createStt: () => {
356
- startCount++;
357
- return stt;
358
- },
359
- });
360
- const { connection } = createMockConnection();
361
- // Simulate what @discordjs/voice does: VoiceConnection.subscribe()
362
- // synchronously fires stateChange→Ready, which would re-invoke
363
- // startPipeline. We mock this by calling startPipeline again inside
364
- // the first invocation via the STT start hook.
365
- let reEntryAttempted = false;
366
- stt.start.mockImplementation(async () => {
367
- // Simulate re-entrant call (as if subscribe triggered onReady)
368
- reEntryAttempted = true;
369
- await mgr.startPipeline('g1', connection);
370
- });
371
- await mgr.startPipeline('g1', connection);
372
- expect(reEntryAttempted).toBe(true);
373
- // Should only have created one STT (the re-entrant call was blocked)
374
- expect(startCount).toBe(1);
375
- expect(mgr.hasPipeline('g1')).toBe(true);
376
- });
377
- });
378
- describe('hasPipeline / activePipelineCount', () => {
379
- it('returns false and 0 when empty', () => {
380
- const mgr = new AudioPipelineManager(createPipelineOpts());
381
- expect(mgr.hasPipeline('g1')).toBe(false);
382
- expect(mgr.activePipelineCount).toBe(0);
383
- });
384
- it('reflects active pipelines', async () => {
385
- const stts = [];
386
- const mgr = new AudioPipelineManager({
387
- log: createLogger(),
388
- voiceConfig: baseVoiceConfig(),
389
- allowedUserIds: new Set(['111']),
390
- createDecoder: () => createMockDecoder(),
391
- createStt: () => {
392
- const stt = createMockStt();
393
- stts.push(stt);
394
- return stt;
395
- },
396
- });
397
- const { connection: conn1 } = createMockConnection();
398
- const { connection: conn2 } = createMockConnection();
399
- await mgr.startPipeline('g1', conn1);
400
- expect(mgr.hasPipeline('g1')).toBe(true);
401
- expect(mgr.activePipelineCount).toBe(1);
402
- await mgr.startPipeline('g2', conn2);
403
- expect(mgr.hasPipeline('g2')).toBe(true);
404
- expect(mgr.activePipelineCount).toBe(2);
405
- await mgr.stopPipeline('g1');
406
- expect(mgr.hasPipeline('g1')).toBe(false);
407
- expect(mgr.activePipelineCount).toBe(1);
408
- });
409
- });
410
- describe('transcript mirror integration', () => {
411
- function createMockMirror() {
412
- return {
413
- postUserTranscription: vi.fn(async () => { }),
414
- postBotResponse: vi.fn(async () => { }),
415
- };
416
- }
417
- function createMirrorTts() {
418
- return {
419
- synthesize: vi.fn(async function* (_text) {
420
- yield { buffer: Buffer.alloc(480, 0x42), sampleRate: 24000, channels: 1 };
421
- }),
422
- };
423
- }
424
- it('calls postUserTranscription for final transcriptions', async () => {
425
- const stt = createMockStt();
426
- const mirror = createMockMirror();
427
- const mgr = new AudioPipelineManager({
428
- log: createLogger(),
429
- voiceConfig: baseVoiceConfig(),
430
- allowedUserIds: new Set(['111']),
431
- createDecoder: () => createMockDecoder(),
432
- createStt: () => stt,
433
- transcriptMirror: mirror,
434
- });
435
- const { connection } = createMockConnection();
436
- await mgr.startPipeline('g1', connection);
437
- stt.transcriptionCb({ text: 'hello world', isFinal: true, confidence: 0.95 });
438
- expect(mirror.postUserTranscription).toHaveBeenCalledWith('User', 'hello world');
439
- });
440
- it('does not call postUserTranscription for non-final transcriptions', async () => {
441
- const stt = createMockStt();
442
- const mirror = createMockMirror();
443
- const mgr = new AudioPipelineManager({
444
- log: createLogger(),
445
- voiceConfig: baseVoiceConfig(),
446
- allowedUserIds: new Set(['111']),
447
- createDecoder: () => createMockDecoder(),
448
- createStt: () => stt,
449
- transcriptMirror: mirror,
450
- });
451
- const { connection } = createMockConnection();
452
- await mgr.startPipeline('g1', connection);
453
- stt.transcriptionCb({ text: 'hello', isFinal: false, confidence: 0.5 });
454
- expect(mirror.postUserTranscription).not.toHaveBeenCalled();
455
- });
456
- it('calls postBotResponse when responder gets an AI response', async () => {
457
- const stt = createMockStt();
458
- const mirror = createMockMirror();
459
- const mgr = new AudioPipelineManager({
460
- log: createLogger(),
461
- voiceConfig: baseVoiceConfig(),
462
- allowedUserIds: new Set(['111']),
463
- createDecoder: () => createMockDecoder(),
464
- createStt: () => stt,
465
- invokeAi: async () => 'AI response text',
466
- createTts: () => createMirrorTts(),
467
- transcriptMirror: mirror,
468
- botDisplayName: 'TestBot',
469
- });
470
- const { connection } = createMockConnection();
471
- await mgr.startPipeline('g1', connection);
472
- stt.transcriptionCb({ text: 'hello bot', isFinal: true, confidence: 0.95 });
473
- await vi.waitFor(() => {
474
- expect(mirror.postBotResponse).toHaveBeenCalledWith('TestBot', 'AI response text');
475
- });
476
- });
477
- it('causes no errors when transcript mirror is omitted', async () => {
478
- const stt = createMockStt();
479
- const log = createLogger();
480
- const mgr = new AudioPipelineManager({
481
- log,
482
- voiceConfig: baseVoiceConfig(),
483
- allowedUserIds: new Set(['111']),
484
- createDecoder: () => createMockDecoder(),
485
- createStt: () => stt,
486
- invokeAi: async () => 'response',
487
- createTts: () => createMirrorTts(),
488
- // No transcriptMirror
489
- });
490
- const { connection } = createMockConnection();
491
- await mgr.startPipeline('g1', connection);
492
- stt.transcriptionCb({ text: 'hello', isFinal: true, confidence: 0.95 });
493
- // Allow async pipeline to settle
494
- await new Promise((r) => setTimeout(r, 50));
495
- // No transcript-mirror errors should have been logged
496
- for (const call of log.warn.mock.calls) {
497
- expect(call[1]).not.toContain('transcript-mirror');
498
- }
499
- for (const call of log.error.mock.calls) {
500
- expect(call[1]).not.toContain('transcript-mirror');
501
- }
502
- });
503
- });
504
- describe('barge-in', () => {
505
- function createMockTts() {
506
- return {
507
- synthesize: vi.fn(async function* (_text) {
508
- yield { buffer: Buffer.alloc(480, 0x42), sampleRate: 24000, channels: 1 };
509
- }),
510
- };
511
- }
512
- it('calls responder.stop() on non-empty transcription while playing', async () => {
513
- const stt = createMockStt();
514
- const log = createLogger();
515
- const { connection } = createMockConnection();
516
- const mgr = new AudioPipelineManager({
517
- log,
518
- voiceConfig: baseVoiceConfig(),
519
- allowedUserIds: new Set(['111']),
520
- createDecoder: () => createMockDecoder(),
521
- createStt: () => stt,
522
- invokeAi: async () => 'response',
523
- createTts: () => createMockTts(),
524
- });
525
- await mgr.startPipeline('g1', connection);
526
- // The pipeline created a VoiceResponder which created a mock player
527
- const player = lastMockPlayer;
528
- expect(player).toBeTruthy();
529
- // Simulate the player being in "playing" state (mid-playback)
530
- player.state = { status: 'playing' };
531
- // Non-empty transcription arrives — should trigger barge-in
532
- stt.transcriptionCb({ text: 'stop that', isFinal: false, confidence: 0.9 });
533
- expect(player.stop).toHaveBeenCalled();
534
- expect(log.info).toHaveBeenCalledWith(expect.objectContaining({ guildId: 'g1' }), 'barge-in detected');
535
- });
536
- it('does not trigger barge-in for empty transcription while playing (echo case)', async () => {
537
- const stt = createMockStt();
538
- const log = createLogger();
539
- const { connection } = createMockConnection();
540
- const mgr = new AudioPipelineManager({
541
- log,
542
- voiceConfig: baseVoiceConfig(),
543
- allowedUserIds: new Set(['111']),
544
- createDecoder: () => createMockDecoder(),
545
- createStt: () => stt,
546
- invokeAi: async () => 'response',
547
- createTts: () => createMockTts(),
548
- });
549
- await mgr.startPipeline('g1', connection);
550
- const player = lastMockPlayer;
551
- player.state = { status: 'playing' };
552
- player.stop.mockClear();
553
- // Empty transcription (echo) — must not trigger barge-in
554
- stt.transcriptionCb({ text: '', isFinal: false, confidence: 0.0 });
555
- expect(player.stop).not.toHaveBeenCalled();
556
- expect(log.info).not.toHaveBeenCalledWith(expect.objectContaining({ guildId: 'g1' }), 'barge-in detected');
557
- });
558
- it('triggers barge-in on interim (non-final) non-empty transcription while playing', async () => {
559
- const stt = createMockStt();
560
- const log = createLogger();
561
- const { connection } = createMockConnection();
562
- const mgr = new AudioPipelineManager({
563
- log,
564
- voiceConfig: baseVoiceConfig(),
565
- allowedUserIds: new Set(['111']),
566
- createDecoder: () => createMockDecoder(),
567
- createStt: () => stt,
568
- invokeAi: async () => 'response',
569
- createTts: () => createMockTts(),
570
- });
571
- await mgr.startPipeline('g1', connection);
572
- const player = lastMockPlayer;
573
- player.state = { status: 'playing' };
574
- // Interim transcription with non-empty text — should trigger barge-in
575
- stt.transcriptionCb({ text: 'hey wait', isFinal: false, confidence: 0.8 });
576
- expect(player.stop).toHaveBeenCalled();
577
- expect(log.info).toHaveBeenCalledWith(expect.objectContaining({ guildId: 'g1' }), 'barge-in detected');
578
- });
579
- it('does not interrupt when player is idle', async () => {
580
- const stt = createMockStt();
581
- const log = createLogger();
582
- const { connection } = createMockConnection();
583
- const mgr = new AudioPipelineManager({
584
- log,
585
- voiceConfig: baseVoiceConfig(),
586
- allowedUserIds: new Set(['111']),
587
- createDecoder: () => createMockDecoder(),
588
- createStt: () => stt,
589
- invokeAi: async () => 'response',
590
- createTts: () => createMockTts(),
591
- });
592
- await mgr.startPipeline('g1', connection);
593
- const player = lastMockPlayer;
594
- // Player is idle (default state)
595
- expect(player.state.status).toBe('idle');
596
- player.stop.mockClear();
597
- // Non-empty transcription while idle — no barge-in needed
598
- stt.transcriptionCb({ text: 'hello', isFinal: false, confidence: 0.9 });
599
- expect(player.stop).not.toHaveBeenCalled();
600
- expect(log.info).not.toHaveBeenCalledWith(expect.objectContaining({ guildId: 'g1' }), 'barge-in detected');
601
- });
602
- it('works without a responder (no invokeAi configured)', async () => {
603
- const stt = createMockStt();
604
- const { connection } = createMockConnection();
605
- const mgr = new AudioPipelineManager({
606
- log: createLogger(),
607
- voiceConfig: baseVoiceConfig(),
608
- allowedUserIds: new Set(['111']),
609
- createDecoder: () => createMockDecoder(),
610
- createStt: () => stt,
611
- onTranscription: () => { },
612
- // No invokeAi — no responder created
613
- });
614
- await mgr.startPipeline('g1', connection);
615
- // Non-empty transcription — should not throw even though there's no responder
616
- expect(() => stt.transcriptionCb({ text: 'hello', isFinal: false, confidence: 0.9 })).not.toThrow();
617
- });
618
- });
619
- });