discoclaw 1.2.4 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.context/voice.md +30 -2
- package/.env.example +7 -3
- package/.env.example.full +13 -32
- package/README.md +1 -1
- package/dist/cli/dashboard.js +7 -1
- package/dist/cli/dashboard.test.js +0 -4
- package/dist/cli/init-wizard.js +4 -8
- package/dist/cli/init-wizard.test.js +4 -10
- package/dist/config.js +5 -38
- package/dist/config.test.js +8 -72
- package/dist/cron/executor.js +72 -1
- package/dist/dashboard/api/metrics.js +7 -0
- package/dist/dashboard/api/metrics.test.js +16 -0
- package/dist/dashboard/api/traces.js +14 -0
- package/dist/dashboard/api/traces.test.js +40 -0
- package/dist/dashboard/page.js +187 -8
- package/dist/dashboard/server.js +82 -19
- package/dist/dashboard/server.test.js +123 -10
- package/dist/discord/actions.js +112 -6
- package/dist/discord/actions.test.js +117 -1
- package/dist/discord/deferred-runner.js +306 -219
- package/dist/discord/help-command.js +1 -1
- package/dist/discord/message-coordinator.js +4 -36
- package/dist/discord/models-command.js +1 -1
- package/dist/discord/reaction-handler.js +83 -5
- package/dist/discord/reaction-handler.test.js +55 -0
- package/dist/discord/verify-push.js +31 -36
- package/dist/discord/verify-push.test.js +34 -6
- package/dist/discord/voice-command.js +1 -31
- package/dist/discord/voice-command.test.js +21 -259
- package/dist/discord/voice-status-command.js +3 -22
- package/dist/discord/voice-status-command.test.js +16 -124
- package/dist/discord-followup.test.js +133 -0
- package/dist/health/config-doctor.js +5 -27
- package/dist/health/config-doctor.test.js +1 -4
- package/dist/index.js +15 -28
- package/dist/observability/trace-store.js +56 -0
- package/dist/observability/trace-utils.js +31 -0
- package/dist/runtime/codex-cli.js +3 -2
- package/dist/runtime/codex-cli.test.js +33 -0
- package/dist/runtime/model-tiers.js +1 -1
- package/dist/runtime/model-tiers.test.js +9 -0
- package/dist/runtime/openai-tool-schemas.js +17 -0
- package/dist/runtime-overrides.js +2 -3
- package/dist/runtime-overrides.test.js +27 -193
- package/dist/tasks/store.js +10 -6
- package/dist/tasks/store.test.js +44 -0
- package/dist/tasks/task-action-executor.test.js +162 -50
- package/dist/tasks/task-action-mutations.js +22 -2
- package/dist/tasks/task-action-read-ops.js +7 -1
- package/dist/tasks/task-action-runner-types.js +19 -1
- package/dist/voice/audio-pipeline.js +183 -96
- package/dist/voice/audio-receiver.js +8 -0
- package/dist/voice/audio-receiver.test.js +16 -0
- package/dist/voice/conversation-buffer.js +16 -6
- package/dist/voice/providers/gemini-live-provider.js +481 -0
- package/dist/voice/providers/gemini-live-provider.test.js +834 -0
- package/dist/voice/providers/gemini-live-responder.js +267 -0
- package/dist/voice/providers/gemini-live-responder.test.js +615 -0
- package/dist/voice/providers/gemini-live-token-estimator.js +100 -0
- package/dist/voice/providers/gemini-live-token-estimator.test.js +160 -0
- package/dist/voice/providers/gemini-live-types.js +32 -0
- package/dist/voice/providers/gemini-tool-mapper.js +91 -0
- package/dist/voice/providers/gemini-tool-mapper.test.js +253 -0
- package/dist/voice/providers/index.js +3 -0
- package/dist/voice/voice-prompt-builder.js +26 -17
- package/dist/voice/voice-prompt-builder.test.js +16 -1
- package/docs/configuration.md +4 -9
- package/docs/official-docs.md +6 -9
- package/docs/runtime-switching.md +1 -1
- package/package.json +1 -1
- package/dist/voice/audio-pipeline.test.js +0 -619
- package/dist/voice/stt-deepgram.js +0 -154
- package/dist/voice/stt-deepgram.test.js +0 -275
- package/dist/voice/stt-factory.js +0 -42
- package/dist/voice/stt-factory.test.js +0 -45
- package/dist/voice/stt-openai.js +0 -156
- package/dist/voice/stt-openai.test.js +0 -281
- package/dist/voice/tts-cartesia.js +0 -169
- package/dist/voice/tts-cartesia.test.js +0 -228
- package/dist/voice/tts-deepgram.js +0 -84
- package/dist/voice/tts-deepgram.test.js +0 -220
- package/dist/voice/tts-factory.js +0 -52
- package/dist/voice/tts-factory.test.js +0 -53
- package/dist/voice/tts-openai.js +0 -70
- package/dist/voice/tts-openai.test.js +0 -138
- package/dist/voice/types.test.js +0 -84
|
@@ -1,619 +0,0 @@
|
|
|
1
|
-
import { describe, it, expect, vi, beforeEach } from 'vitest';
|
|
2
|
-
import { EventEmitter } from 'node:events';
|
|
3
|
-
import { AudioPipelineManager } from './audio-pipeline.js';
|
|
4
|
-
// ---------------------------------------------------------------------------
|
|
5
|
-
// Mock @discordjs/voice — includes AudioPlayer infrastructure for responder
|
|
6
|
-
// ---------------------------------------------------------------------------
|
|
7
|
-
/** Track the last mock player created so tests can manipulate its state. */
|
|
8
|
-
let lastMockPlayer = null;
|
|
9
|
-
function makeMockPlayer() {
|
|
10
|
-
const emitter = new EventEmitter();
|
|
11
|
-
const player = {
|
|
12
|
-
state: { status: 'idle' },
|
|
13
|
-
play: vi.fn(() => {
|
|
14
|
-
const old = { ...player.state };
|
|
15
|
-
player.state = { status: 'playing' };
|
|
16
|
-
emitter.emit('stateChange', old, player.state);
|
|
17
|
-
}),
|
|
18
|
-
stop: vi.fn(() => {
|
|
19
|
-
if (player.state.status !== 'idle') {
|
|
20
|
-
const old = { ...player.state };
|
|
21
|
-
player.state = { status: 'idle' };
|
|
22
|
-
emitter.emit('stateChange', old, player.state);
|
|
23
|
-
}
|
|
24
|
-
}),
|
|
25
|
-
on: vi.fn((event, listener) => {
|
|
26
|
-
emitter.on(event, listener);
|
|
27
|
-
return player;
|
|
28
|
-
}),
|
|
29
|
-
removeListener: vi.fn((event, listener) => {
|
|
30
|
-
emitter.removeListener(event, listener);
|
|
31
|
-
return player;
|
|
32
|
-
}),
|
|
33
|
-
};
|
|
34
|
-
return player;
|
|
35
|
-
}
|
|
36
|
-
vi.mock('@discordjs/voice', () => ({
|
|
37
|
-
VoiceConnectionStatus: {
|
|
38
|
-
Signalling: 'signalling',
|
|
39
|
-
Connecting: 'connecting',
|
|
40
|
-
Ready: 'ready',
|
|
41
|
-
Disconnected: 'disconnected',
|
|
42
|
-
Destroyed: 'destroyed',
|
|
43
|
-
},
|
|
44
|
-
EndBehaviorType: { Manual: 0, AfterSilence: 1, AfterInactivity: 2 },
|
|
45
|
-
AudioPlayerStatus: {
|
|
46
|
-
Idle: 'idle',
|
|
47
|
-
Playing: 'playing',
|
|
48
|
-
Buffering: 'buffering',
|
|
49
|
-
Paused: 'paused',
|
|
50
|
-
AutoPaused: 'autopaused',
|
|
51
|
-
},
|
|
52
|
-
StreamType: { Raw: 'raw' },
|
|
53
|
-
createAudioPlayer: vi.fn(() => {
|
|
54
|
-
lastMockPlayer = makeMockPlayer();
|
|
55
|
-
return lastMockPlayer;
|
|
56
|
-
}),
|
|
57
|
-
createAudioResource: vi.fn(() => ({ type: 'mock-resource' })),
|
|
58
|
-
}));
|
|
59
|
-
// We don't want real stt-factory or audio-receiver internals — the pipeline
|
|
60
|
-
// injects a createStt override and AudioReceiver is tested separately.
|
|
61
|
-
// However we do import AudioReceiver for real so the wiring is exercised.
|
|
62
|
-
// ---------------------------------------------------------------------------
|
|
63
|
-
// Helpers
|
|
64
|
-
// ---------------------------------------------------------------------------
|
|
65
|
-
function createLogger() {
|
|
66
|
-
return { info: vi.fn(), warn: vi.fn(), error: vi.fn() };
|
|
67
|
-
}
|
|
68
|
-
function createMockStt() {
|
|
69
|
-
const stt = {
|
|
70
|
-
transcriptionCb: null,
|
|
71
|
-
start: vi.fn(async () => { }),
|
|
72
|
-
feedAudio: vi.fn((_frame) => { }),
|
|
73
|
-
onTranscription: vi.fn((cb) => {
|
|
74
|
-
stt.transcriptionCb = cb;
|
|
75
|
-
}),
|
|
76
|
-
stop: vi.fn(async () => { }),
|
|
77
|
-
};
|
|
78
|
-
return stt;
|
|
79
|
-
}
|
|
80
|
-
function createMockDecoder() {
|
|
81
|
-
return {
|
|
82
|
-
decode: vi.fn((_packet) => Buffer.alloc(960 * 2 * 2)),
|
|
83
|
-
destroy: vi.fn(),
|
|
84
|
-
};
|
|
85
|
-
}
|
|
86
|
-
function createMockConnection() {
|
|
87
|
-
const stateListeners = [];
|
|
88
|
-
const speakingEmitter = new EventEmitter();
|
|
89
|
-
const subscriptions = new Map();
|
|
90
|
-
const streams = new Map();
|
|
91
|
-
const conn = {
|
|
92
|
-
state: { status: 'signalling' },
|
|
93
|
-
/** Top-level subscribe (used by VoiceResponder to attach AudioPlayer). */
|
|
94
|
-
subscribe: vi.fn(),
|
|
95
|
-
receiver: {
|
|
96
|
-
speaking: speakingEmitter,
|
|
97
|
-
subscriptions,
|
|
98
|
-
subscribe: vi.fn((userId) => {
|
|
99
|
-
const stream = new EventEmitter();
|
|
100
|
-
streams.set(userId, stream);
|
|
101
|
-
subscriptions.set(userId, stream);
|
|
102
|
-
return stream;
|
|
103
|
-
}),
|
|
104
|
-
},
|
|
105
|
-
on: vi.fn((event, listener) => {
|
|
106
|
-
if (event === 'stateChange')
|
|
107
|
-
stateListeners.push(listener);
|
|
108
|
-
return conn;
|
|
109
|
-
}),
|
|
110
|
-
_transition(status) {
|
|
111
|
-
const old = { ...conn.state };
|
|
112
|
-
conn.state = { status };
|
|
113
|
-
for (const l of stateListeners)
|
|
114
|
-
l(old, conn.state);
|
|
115
|
-
},
|
|
116
|
-
};
|
|
117
|
-
return {
|
|
118
|
-
connection: conn,
|
|
119
|
-
_transition: conn._transition.bind(conn),
|
|
120
|
-
speakingEmitter,
|
|
121
|
-
subscriptions,
|
|
122
|
-
streams,
|
|
123
|
-
};
|
|
124
|
-
}
|
|
125
|
-
function baseVoiceConfig(overrides = {}) {
|
|
126
|
-
return {
|
|
127
|
-
enabled: true,
|
|
128
|
-
sttProvider: 'deepgram',
|
|
129
|
-
ttsProvider: 'cartesia',
|
|
130
|
-
deepgramApiKey: 'test-key',
|
|
131
|
-
...overrides,
|
|
132
|
-
};
|
|
133
|
-
}
|
|
134
|
-
function createPipelineOpts(overrides = {}) {
|
|
135
|
-
const mockStt = createMockStt();
|
|
136
|
-
return {
|
|
137
|
-
mockStt,
|
|
138
|
-
log: createLogger(),
|
|
139
|
-
voiceConfig: baseVoiceConfig(),
|
|
140
|
-
allowedUserIds: new Set(['111', '222']),
|
|
141
|
-
createDecoder: () => createMockDecoder(),
|
|
142
|
-
createStt: () => mockStt,
|
|
143
|
-
...overrides,
|
|
144
|
-
};
|
|
145
|
-
}
|
|
146
|
-
// ---------------------------------------------------------------------------
|
|
147
|
-
// Tests
|
|
148
|
-
// ---------------------------------------------------------------------------
|
|
149
|
-
beforeEach(() => {
|
|
150
|
-
vi.clearAllMocks();
|
|
151
|
-
lastMockPlayer = null;
|
|
152
|
-
});
|
|
153
|
-
describe('AudioPipelineManager', () => {
|
|
154
|
-
describe('startPipeline / stopPipeline', () => {
|
|
155
|
-
it('starts STT and receiver for a guild', async () => {
|
|
156
|
-
const opts = createPipelineOpts();
|
|
157
|
-
const mgr = new AudioPipelineManager(opts);
|
|
158
|
-
const { connection } = createMockConnection();
|
|
159
|
-
await mgr.startPipeline('g1', connection);
|
|
160
|
-
expect(opts.mockStt.start).toHaveBeenCalled();
|
|
161
|
-
expect(mgr.hasPipeline('g1')).toBe(true);
|
|
162
|
-
expect(mgr.activePipelineCount).toBe(1);
|
|
163
|
-
});
|
|
164
|
-
it('stopPipeline stops receiver and STT', async () => {
|
|
165
|
-
const opts = createPipelineOpts();
|
|
166
|
-
const mgr = new AudioPipelineManager(opts);
|
|
167
|
-
const { connection } = createMockConnection();
|
|
168
|
-
await mgr.startPipeline('g1', connection);
|
|
169
|
-
await mgr.stopPipeline('g1');
|
|
170
|
-
expect(opts.mockStt.stop).toHaveBeenCalled();
|
|
171
|
-
expect(mgr.hasPipeline('g1')).toBe(false);
|
|
172
|
-
expect(mgr.activePipelineCount).toBe(0);
|
|
173
|
-
});
|
|
174
|
-
it('stopPipeline is a no-op for unknown guild', async () => {
|
|
175
|
-
const opts = createPipelineOpts();
|
|
176
|
-
const mgr = new AudioPipelineManager(opts);
|
|
177
|
-
// Should not throw
|
|
178
|
-
await mgr.stopPipeline('unknown');
|
|
179
|
-
expect(mgr.activePipelineCount).toBe(0);
|
|
180
|
-
});
|
|
181
|
-
it('startPipeline stops existing pipeline before restarting', async () => {
|
|
182
|
-
const stts = [];
|
|
183
|
-
const opts = createPipelineOpts({
|
|
184
|
-
createStt: () => {
|
|
185
|
-
const stt = createMockStt();
|
|
186
|
-
stts.push(stt);
|
|
187
|
-
return stt;
|
|
188
|
-
},
|
|
189
|
-
});
|
|
190
|
-
const mgr = new AudioPipelineManager(opts);
|
|
191
|
-
const { connection } = createMockConnection();
|
|
192
|
-
await mgr.startPipeline('g1', connection);
|
|
193
|
-
await mgr.startPipeline('g1', connection);
|
|
194
|
-
// First STT should have been stopped
|
|
195
|
-
expect(stts[0].stop).toHaveBeenCalled();
|
|
196
|
-
// Second STT should be started
|
|
197
|
-
expect(stts[1].start).toHaveBeenCalled();
|
|
198
|
-
expect(mgr.activePipelineCount).toBe(1);
|
|
199
|
-
});
|
|
200
|
-
it('logs error and does not add pipeline if STT start fails', async () => {
|
|
201
|
-
const log = createLogger();
|
|
202
|
-
const failingStt = createMockStt();
|
|
203
|
-
failingStt.start.mockRejectedValue(new Error('stt connect failed'));
|
|
204
|
-
const mgr = new AudioPipelineManager({
|
|
205
|
-
log,
|
|
206
|
-
voiceConfig: baseVoiceConfig(),
|
|
207
|
-
allowedUserIds: new Set(['111']),
|
|
208
|
-
createDecoder: () => createMockDecoder(),
|
|
209
|
-
createStt: () => failingStt,
|
|
210
|
-
});
|
|
211
|
-
const { connection } = createMockConnection();
|
|
212
|
-
await mgr.startPipeline('g1', connection);
|
|
213
|
-
expect(mgr.hasPipeline('g1')).toBe(false);
|
|
214
|
-
expect(log.error).toHaveBeenCalledWith(expect.objectContaining({ guildId: 'g1' }), 'failed to start audio pipeline');
|
|
215
|
-
});
|
|
216
|
-
it('logs error if STT stop throws but still removes pipeline', async () => {
|
|
217
|
-
const log = createLogger();
|
|
218
|
-
const stt = createMockStt();
|
|
219
|
-
stt.stop.mockRejectedValue(new Error('stop failed'));
|
|
220
|
-
const mgr = new AudioPipelineManager({
|
|
221
|
-
log,
|
|
222
|
-
voiceConfig: baseVoiceConfig(),
|
|
223
|
-
allowedUserIds: new Set(['111']),
|
|
224
|
-
createDecoder: () => createMockDecoder(),
|
|
225
|
-
createStt: () => stt,
|
|
226
|
-
});
|
|
227
|
-
const { connection } = createMockConnection();
|
|
228
|
-
await mgr.startPipeline('g1', connection);
|
|
229
|
-
await mgr.stopPipeline('g1');
|
|
230
|
-
expect(mgr.hasPipeline('g1')).toBe(false);
|
|
231
|
-
expect(log.error).toHaveBeenCalledWith(expect.objectContaining({ guildId: 'g1' }), 'error stopping STT provider');
|
|
232
|
-
});
|
|
233
|
-
});
|
|
234
|
-
describe('attach', () => {
|
|
235
|
-
it('starts pipeline when connection transitions to Ready', async () => {
|
|
236
|
-
const opts = createPipelineOpts();
|
|
237
|
-
const mgr = new AudioPipelineManager(opts);
|
|
238
|
-
const { connection, _transition } = createMockConnection();
|
|
239
|
-
mgr.attach('g1', connection);
|
|
240
|
-
_transition('ready');
|
|
241
|
-
// Allow async handler to settle
|
|
242
|
-
await vi.waitFor(() => {
|
|
243
|
-
expect(opts.mockStt.start).toHaveBeenCalled();
|
|
244
|
-
});
|
|
245
|
-
expect(mgr.hasPipeline('g1')).toBe(true);
|
|
246
|
-
});
|
|
247
|
-
it('stops pipeline when connection transitions to Destroyed', async () => {
|
|
248
|
-
const opts = createPipelineOpts();
|
|
249
|
-
const mgr = new AudioPipelineManager(opts);
|
|
250
|
-
const { connection, _transition } = createMockConnection();
|
|
251
|
-
mgr.attach('g1', connection);
|
|
252
|
-
_transition('ready');
|
|
253
|
-
await vi.waitFor(() => {
|
|
254
|
-
expect(mgr.hasPipeline('g1')).toBe(true);
|
|
255
|
-
});
|
|
256
|
-
_transition('destroyed');
|
|
257
|
-
await vi.waitFor(() => {
|
|
258
|
-
expect(mgr.hasPipeline('g1')).toBe(false);
|
|
259
|
-
});
|
|
260
|
-
expect(opts.mockStt.stop).toHaveBeenCalled();
|
|
261
|
-
});
|
|
262
|
-
it('ignores non-Ready/Destroyed transitions', async () => {
|
|
263
|
-
const opts = createPipelineOpts();
|
|
264
|
-
const mgr = new AudioPipelineManager(opts);
|
|
265
|
-
const { connection, _transition } = createMockConnection();
|
|
266
|
-
mgr.attach('g1', connection);
|
|
267
|
-
_transition('connecting');
|
|
268
|
-
// Give it a tick to settle
|
|
269
|
-
await new Promise((r) => setTimeout(r, 10));
|
|
270
|
-
expect(mgr.hasPipeline('g1')).toBe(false);
|
|
271
|
-
expect(opts.mockStt.start).not.toHaveBeenCalled();
|
|
272
|
-
});
|
|
273
|
-
});
|
|
274
|
-
describe('stopAll', () => {
|
|
275
|
-
it('stops all active pipelines', async () => {
|
|
276
|
-
const stts = [];
|
|
277
|
-
const mgr = new AudioPipelineManager({
|
|
278
|
-
log: createLogger(),
|
|
279
|
-
voiceConfig: baseVoiceConfig(),
|
|
280
|
-
allowedUserIds: new Set(['111']),
|
|
281
|
-
createDecoder: () => createMockDecoder(),
|
|
282
|
-
createStt: () => {
|
|
283
|
-
const stt = createMockStt();
|
|
284
|
-
stts.push(stt);
|
|
285
|
-
return stt;
|
|
286
|
-
},
|
|
287
|
-
});
|
|
288
|
-
const { connection: conn1 } = createMockConnection();
|
|
289
|
-
const { connection: conn2 } = createMockConnection();
|
|
290
|
-
await mgr.startPipeline('g1', conn1);
|
|
291
|
-
await mgr.startPipeline('g2', conn2);
|
|
292
|
-
expect(mgr.activePipelineCount).toBe(2);
|
|
293
|
-
await mgr.stopAll();
|
|
294
|
-
expect(mgr.activePipelineCount).toBe(0);
|
|
295
|
-
expect(stts[0].stop).toHaveBeenCalled();
|
|
296
|
-
expect(stts[1].stop).toHaveBeenCalled();
|
|
297
|
-
});
|
|
298
|
-
it('is a no-op when no pipelines are active', async () => {
|
|
299
|
-
const mgr = new AudioPipelineManager(createPipelineOpts());
|
|
300
|
-
await mgr.stopAll(); // should not throw
|
|
301
|
-
expect(mgr.activePipelineCount).toBe(0);
|
|
302
|
-
});
|
|
303
|
-
});
|
|
304
|
-
describe('onTranscription callback', () => {
|
|
305
|
-
it('forwards transcription results with guildId', async () => {
|
|
306
|
-
const transcriptions = [];
|
|
307
|
-
const stt = createMockStt();
|
|
308
|
-
const mgr = new AudioPipelineManager({
|
|
309
|
-
log: createLogger(),
|
|
310
|
-
voiceConfig: baseVoiceConfig(),
|
|
311
|
-
allowedUserIds: new Set(['111']),
|
|
312
|
-
createDecoder: () => createMockDecoder(),
|
|
313
|
-
createStt: () => stt,
|
|
314
|
-
onTranscription: (guildId, result) => {
|
|
315
|
-
transcriptions.push({ guildId, result });
|
|
316
|
-
},
|
|
317
|
-
});
|
|
318
|
-
const { connection } = createMockConnection();
|
|
319
|
-
await mgr.startPipeline('g1', connection);
|
|
320
|
-
// STT onTranscription should have been wired up
|
|
321
|
-
expect(stt.onTranscription).toHaveBeenCalled();
|
|
322
|
-
// Simulate a transcription from the STT provider
|
|
323
|
-
const result = {
|
|
324
|
-
text: 'hello world',
|
|
325
|
-
isFinal: true,
|
|
326
|
-
confidence: 0.95,
|
|
327
|
-
};
|
|
328
|
-
stt.transcriptionCb(result);
|
|
329
|
-
expect(transcriptions).toHaveLength(1);
|
|
330
|
-
expect(transcriptions[0]).toEqual({ guildId: 'g1', result });
|
|
331
|
-
});
|
|
332
|
-
it('does not wire onTranscription when no callback is provided', async () => {
|
|
333
|
-
const stt = createMockStt();
|
|
334
|
-
const mgr = new AudioPipelineManager({
|
|
335
|
-
log: createLogger(),
|
|
336
|
-
voiceConfig: baseVoiceConfig(),
|
|
337
|
-
allowedUserIds: new Set(['111']),
|
|
338
|
-
createDecoder: () => createMockDecoder(),
|
|
339
|
-
createStt: () => stt,
|
|
340
|
-
});
|
|
341
|
-
const { connection } = createMockConnection();
|
|
342
|
-
await mgr.startPipeline('g1', connection);
|
|
343
|
-
expect(stt.onTranscription).not.toHaveBeenCalled();
|
|
344
|
-
});
|
|
345
|
-
});
|
|
346
|
-
describe('re-entrancy guard', () => {
|
|
347
|
-
it('prevents infinite recursion when startPipeline is re-entered', async () => {
|
|
348
|
-
const stt = createMockStt();
|
|
349
|
-
let startCount = 0;
|
|
350
|
-
const mgr = new AudioPipelineManager({
|
|
351
|
-
log: createLogger(),
|
|
352
|
-
voiceConfig: baseVoiceConfig(),
|
|
353
|
-
allowedUserIds: new Set(['111']),
|
|
354
|
-
createDecoder: () => createMockDecoder(),
|
|
355
|
-
createStt: () => {
|
|
356
|
-
startCount++;
|
|
357
|
-
return stt;
|
|
358
|
-
},
|
|
359
|
-
});
|
|
360
|
-
const { connection } = createMockConnection();
|
|
361
|
-
// Simulate what @discordjs/voice does: VoiceConnection.subscribe()
|
|
362
|
-
// synchronously fires stateChange→Ready, which would re-invoke
|
|
363
|
-
// startPipeline. We mock this by calling startPipeline again inside
|
|
364
|
-
// the first invocation via the STT start hook.
|
|
365
|
-
let reEntryAttempted = false;
|
|
366
|
-
stt.start.mockImplementation(async () => {
|
|
367
|
-
// Simulate re-entrant call (as if subscribe triggered onReady)
|
|
368
|
-
reEntryAttempted = true;
|
|
369
|
-
await mgr.startPipeline('g1', connection);
|
|
370
|
-
});
|
|
371
|
-
await mgr.startPipeline('g1', connection);
|
|
372
|
-
expect(reEntryAttempted).toBe(true);
|
|
373
|
-
// Should only have created one STT (the re-entrant call was blocked)
|
|
374
|
-
expect(startCount).toBe(1);
|
|
375
|
-
expect(mgr.hasPipeline('g1')).toBe(true);
|
|
376
|
-
});
|
|
377
|
-
});
|
|
378
|
-
describe('hasPipeline / activePipelineCount', () => {
|
|
379
|
-
it('returns false and 0 when empty', () => {
|
|
380
|
-
const mgr = new AudioPipelineManager(createPipelineOpts());
|
|
381
|
-
expect(mgr.hasPipeline('g1')).toBe(false);
|
|
382
|
-
expect(mgr.activePipelineCount).toBe(0);
|
|
383
|
-
});
|
|
384
|
-
it('reflects active pipelines', async () => {
|
|
385
|
-
const stts = [];
|
|
386
|
-
const mgr = new AudioPipelineManager({
|
|
387
|
-
log: createLogger(),
|
|
388
|
-
voiceConfig: baseVoiceConfig(),
|
|
389
|
-
allowedUserIds: new Set(['111']),
|
|
390
|
-
createDecoder: () => createMockDecoder(),
|
|
391
|
-
createStt: () => {
|
|
392
|
-
const stt = createMockStt();
|
|
393
|
-
stts.push(stt);
|
|
394
|
-
return stt;
|
|
395
|
-
},
|
|
396
|
-
});
|
|
397
|
-
const { connection: conn1 } = createMockConnection();
|
|
398
|
-
const { connection: conn2 } = createMockConnection();
|
|
399
|
-
await mgr.startPipeline('g1', conn1);
|
|
400
|
-
expect(mgr.hasPipeline('g1')).toBe(true);
|
|
401
|
-
expect(mgr.activePipelineCount).toBe(1);
|
|
402
|
-
await mgr.startPipeline('g2', conn2);
|
|
403
|
-
expect(mgr.hasPipeline('g2')).toBe(true);
|
|
404
|
-
expect(mgr.activePipelineCount).toBe(2);
|
|
405
|
-
await mgr.stopPipeline('g1');
|
|
406
|
-
expect(mgr.hasPipeline('g1')).toBe(false);
|
|
407
|
-
expect(mgr.activePipelineCount).toBe(1);
|
|
408
|
-
});
|
|
409
|
-
});
|
|
410
|
-
describe('transcript mirror integration', () => {
|
|
411
|
-
function createMockMirror() {
|
|
412
|
-
return {
|
|
413
|
-
postUserTranscription: vi.fn(async () => { }),
|
|
414
|
-
postBotResponse: vi.fn(async () => { }),
|
|
415
|
-
};
|
|
416
|
-
}
|
|
417
|
-
function createMirrorTts() {
|
|
418
|
-
return {
|
|
419
|
-
synthesize: vi.fn(async function* (_text) {
|
|
420
|
-
yield { buffer: Buffer.alloc(480, 0x42), sampleRate: 24000, channels: 1 };
|
|
421
|
-
}),
|
|
422
|
-
};
|
|
423
|
-
}
|
|
424
|
-
it('calls postUserTranscription for final transcriptions', async () => {
|
|
425
|
-
const stt = createMockStt();
|
|
426
|
-
const mirror = createMockMirror();
|
|
427
|
-
const mgr = new AudioPipelineManager({
|
|
428
|
-
log: createLogger(),
|
|
429
|
-
voiceConfig: baseVoiceConfig(),
|
|
430
|
-
allowedUserIds: new Set(['111']),
|
|
431
|
-
createDecoder: () => createMockDecoder(),
|
|
432
|
-
createStt: () => stt,
|
|
433
|
-
transcriptMirror: mirror,
|
|
434
|
-
});
|
|
435
|
-
const { connection } = createMockConnection();
|
|
436
|
-
await mgr.startPipeline('g1', connection);
|
|
437
|
-
stt.transcriptionCb({ text: 'hello world', isFinal: true, confidence: 0.95 });
|
|
438
|
-
expect(mirror.postUserTranscription).toHaveBeenCalledWith('User', 'hello world');
|
|
439
|
-
});
|
|
440
|
-
it('does not call postUserTranscription for non-final transcriptions', async () => {
|
|
441
|
-
const stt = createMockStt();
|
|
442
|
-
const mirror = createMockMirror();
|
|
443
|
-
const mgr = new AudioPipelineManager({
|
|
444
|
-
log: createLogger(),
|
|
445
|
-
voiceConfig: baseVoiceConfig(),
|
|
446
|
-
allowedUserIds: new Set(['111']),
|
|
447
|
-
createDecoder: () => createMockDecoder(),
|
|
448
|
-
createStt: () => stt,
|
|
449
|
-
transcriptMirror: mirror,
|
|
450
|
-
});
|
|
451
|
-
const { connection } = createMockConnection();
|
|
452
|
-
await mgr.startPipeline('g1', connection);
|
|
453
|
-
stt.transcriptionCb({ text: 'hello', isFinal: false, confidence: 0.5 });
|
|
454
|
-
expect(mirror.postUserTranscription).not.toHaveBeenCalled();
|
|
455
|
-
});
|
|
456
|
-
it('calls postBotResponse when responder gets an AI response', async () => {
|
|
457
|
-
const stt = createMockStt();
|
|
458
|
-
const mirror = createMockMirror();
|
|
459
|
-
const mgr = new AudioPipelineManager({
|
|
460
|
-
log: createLogger(),
|
|
461
|
-
voiceConfig: baseVoiceConfig(),
|
|
462
|
-
allowedUserIds: new Set(['111']),
|
|
463
|
-
createDecoder: () => createMockDecoder(),
|
|
464
|
-
createStt: () => stt,
|
|
465
|
-
invokeAi: async () => 'AI response text',
|
|
466
|
-
createTts: () => createMirrorTts(),
|
|
467
|
-
transcriptMirror: mirror,
|
|
468
|
-
botDisplayName: 'TestBot',
|
|
469
|
-
});
|
|
470
|
-
const { connection } = createMockConnection();
|
|
471
|
-
await mgr.startPipeline('g1', connection);
|
|
472
|
-
stt.transcriptionCb({ text: 'hello bot', isFinal: true, confidence: 0.95 });
|
|
473
|
-
await vi.waitFor(() => {
|
|
474
|
-
expect(mirror.postBotResponse).toHaveBeenCalledWith('TestBot', 'AI response text');
|
|
475
|
-
});
|
|
476
|
-
});
|
|
477
|
-
it('causes no errors when transcript mirror is omitted', async () => {
|
|
478
|
-
const stt = createMockStt();
|
|
479
|
-
const log = createLogger();
|
|
480
|
-
const mgr = new AudioPipelineManager({
|
|
481
|
-
log,
|
|
482
|
-
voiceConfig: baseVoiceConfig(),
|
|
483
|
-
allowedUserIds: new Set(['111']),
|
|
484
|
-
createDecoder: () => createMockDecoder(),
|
|
485
|
-
createStt: () => stt,
|
|
486
|
-
invokeAi: async () => 'response',
|
|
487
|
-
createTts: () => createMirrorTts(),
|
|
488
|
-
// No transcriptMirror
|
|
489
|
-
});
|
|
490
|
-
const { connection } = createMockConnection();
|
|
491
|
-
await mgr.startPipeline('g1', connection);
|
|
492
|
-
stt.transcriptionCb({ text: 'hello', isFinal: true, confidence: 0.95 });
|
|
493
|
-
// Allow async pipeline to settle
|
|
494
|
-
await new Promise((r) => setTimeout(r, 50));
|
|
495
|
-
// No transcript-mirror errors should have been logged
|
|
496
|
-
for (const call of log.warn.mock.calls) {
|
|
497
|
-
expect(call[1]).not.toContain('transcript-mirror');
|
|
498
|
-
}
|
|
499
|
-
for (const call of log.error.mock.calls) {
|
|
500
|
-
expect(call[1]).not.toContain('transcript-mirror');
|
|
501
|
-
}
|
|
502
|
-
});
|
|
503
|
-
});
|
|
504
|
-
describe('barge-in', () => {
|
|
505
|
-
function createMockTts() {
|
|
506
|
-
return {
|
|
507
|
-
synthesize: vi.fn(async function* (_text) {
|
|
508
|
-
yield { buffer: Buffer.alloc(480, 0x42), sampleRate: 24000, channels: 1 };
|
|
509
|
-
}),
|
|
510
|
-
};
|
|
511
|
-
}
|
|
512
|
-
it('calls responder.stop() on non-empty transcription while playing', async () => {
|
|
513
|
-
const stt = createMockStt();
|
|
514
|
-
const log = createLogger();
|
|
515
|
-
const { connection } = createMockConnection();
|
|
516
|
-
const mgr = new AudioPipelineManager({
|
|
517
|
-
log,
|
|
518
|
-
voiceConfig: baseVoiceConfig(),
|
|
519
|
-
allowedUserIds: new Set(['111']),
|
|
520
|
-
createDecoder: () => createMockDecoder(),
|
|
521
|
-
createStt: () => stt,
|
|
522
|
-
invokeAi: async () => 'response',
|
|
523
|
-
createTts: () => createMockTts(),
|
|
524
|
-
});
|
|
525
|
-
await mgr.startPipeline('g1', connection);
|
|
526
|
-
// The pipeline created a VoiceResponder which created a mock player
|
|
527
|
-
const player = lastMockPlayer;
|
|
528
|
-
expect(player).toBeTruthy();
|
|
529
|
-
// Simulate the player being in "playing" state (mid-playback)
|
|
530
|
-
player.state = { status: 'playing' };
|
|
531
|
-
// Non-empty transcription arrives — should trigger barge-in
|
|
532
|
-
stt.transcriptionCb({ text: 'stop that', isFinal: false, confidence: 0.9 });
|
|
533
|
-
expect(player.stop).toHaveBeenCalled();
|
|
534
|
-
expect(log.info).toHaveBeenCalledWith(expect.objectContaining({ guildId: 'g1' }), 'barge-in detected');
|
|
535
|
-
});
|
|
536
|
-
it('does not trigger barge-in for empty transcription while playing (echo case)', async () => {
|
|
537
|
-
const stt = createMockStt();
|
|
538
|
-
const log = createLogger();
|
|
539
|
-
const { connection } = createMockConnection();
|
|
540
|
-
const mgr = new AudioPipelineManager({
|
|
541
|
-
log,
|
|
542
|
-
voiceConfig: baseVoiceConfig(),
|
|
543
|
-
allowedUserIds: new Set(['111']),
|
|
544
|
-
createDecoder: () => createMockDecoder(),
|
|
545
|
-
createStt: () => stt,
|
|
546
|
-
invokeAi: async () => 'response',
|
|
547
|
-
createTts: () => createMockTts(),
|
|
548
|
-
});
|
|
549
|
-
await mgr.startPipeline('g1', connection);
|
|
550
|
-
const player = lastMockPlayer;
|
|
551
|
-
player.state = { status: 'playing' };
|
|
552
|
-
player.stop.mockClear();
|
|
553
|
-
// Empty transcription (echo) — must not trigger barge-in
|
|
554
|
-
stt.transcriptionCb({ text: '', isFinal: false, confidence: 0.0 });
|
|
555
|
-
expect(player.stop).not.toHaveBeenCalled();
|
|
556
|
-
expect(log.info).not.toHaveBeenCalledWith(expect.objectContaining({ guildId: 'g1' }), 'barge-in detected');
|
|
557
|
-
});
|
|
558
|
-
it('triggers barge-in on interim (non-final) non-empty transcription while playing', async () => {
|
|
559
|
-
const stt = createMockStt();
|
|
560
|
-
const log = createLogger();
|
|
561
|
-
const { connection } = createMockConnection();
|
|
562
|
-
const mgr = new AudioPipelineManager({
|
|
563
|
-
log,
|
|
564
|
-
voiceConfig: baseVoiceConfig(),
|
|
565
|
-
allowedUserIds: new Set(['111']),
|
|
566
|
-
createDecoder: () => createMockDecoder(),
|
|
567
|
-
createStt: () => stt,
|
|
568
|
-
invokeAi: async () => 'response',
|
|
569
|
-
createTts: () => createMockTts(),
|
|
570
|
-
});
|
|
571
|
-
await mgr.startPipeline('g1', connection);
|
|
572
|
-
const player = lastMockPlayer;
|
|
573
|
-
player.state = { status: 'playing' };
|
|
574
|
-
// Interim transcription with non-empty text — should trigger barge-in
|
|
575
|
-
stt.transcriptionCb({ text: 'hey wait', isFinal: false, confidence: 0.8 });
|
|
576
|
-
expect(player.stop).toHaveBeenCalled();
|
|
577
|
-
expect(log.info).toHaveBeenCalledWith(expect.objectContaining({ guildId: 'g1' }), 'barge-in detected');
|
|
578
|
-
});
|
|
579
|
-
it('does not interrupt when player is idle', async () => {
|
|
580
|
-
const stt = createMockStt();
|
|
581
|
-
const log = createLogger();
|
|
582
|
-
const { connection } = createMockConnection();
|
|
583
|
-
const mgr = new AudioPipelineManager({
|
|
584
|
-
log,
|
|
585
|
-
voiceConfig: baseVoiceConfig(),
|
|
586
|
-
allowedUserIds: new Set(['111']),
|
|
587
|
-
createDecoder: () => createMockDecoder(),
|
|
588
|
-
createStt: () => stt,
|
|
589
|
-
invokeAi: async () => 'response',
|
|
590
|
-
createTts: () => createMockTts(),
|
|
591
|
-
});
|
|
592
|
-
await mgr.startPipeline('g1', connection);
|
|
593
|
-
const player = lastMockPlayer;
|
|
594
|
-
// Player is idle (default state)
|
|
595
|
-
expect(player.state.status).toBe('idle');
|
|
596
|
-
player.stop.mockClear();
|
|
597
|
-
// Non-empty transcription while idle — no barge-in needed
|
|
598
|
-
stt.transcriptionCb({ text: 'hello', isFinal: false, confidence: 0.9 });
|
|
599
|
-
expect(player.stop).not.toHaveBeenCalled();
|
|
600
|
-
expect(log.info).not.toHaveBeenCalledWith(expect.objectContaining({ guildId: 'g1' }), 'barge-in detected');
|
|
601
|
-
});
|
|
602
|
-
it('works without a responder (no invokeAi configured)', async () => {
|
|
603
|
-
const stt = createMockStt();
|
|
604
|
-
const { connection } = createMockConnection();
|
|
605
|
-
const mgr = new AudioPipelineManager({
|
|
606
|
-
log: createLogger(),
|
|
607
|
-
voiceConfig: baseVoiceConfig(),
|
|
608
|
-
allowedUserIds: new Set(['111']),
|
|
609
|
-
createDecoder: () => createMockDecoder(),
|
|
610
|
-
createStt: () => stt,
|
|
611
|
-
onTranscription: () => { },
|
|
612
|
-
// No invokeAi — no responder created
|
|
613
|
-
});
|
|
614
|
-
await mgr.startPipeline('g1', connection);
|
|
615
|
-
// Non-empty transcription — should not throw even though there's no responder
|
|
616
|
-
expect(() => stt.transcriptionCb({ text: 'hello', isFinal: false, confidence: 0.9 })).not.toThrow();
|
|
617
|
-
});
|
|
618
|
-
});
|
|
619
|
-
});
|