discoclaw 1.3.0 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.env.example +4 -6
- package/.env.example.full +13 -32
- package/README.md +1 -1
- package/dist/cli/dashboard.test.js +0 -4
- package/dist/cli/init-wizard.js +4 -8
- package/dist/cli/init-wizard.test.js +4 -10
- package/dist/config.js +2 -42
- package/dist/config.test.js +8 -72
- package/dist/dashboard/server.js +1 -5
- package/dist/dashboard/server.test.js +3 -6
- package/dist/discord/actions.js +112 -6
- package/dist/discord/actions.test.js +117 -1
- package/dist/discord/help-command.js +1 -1
- package/dist/discord/message-coordinator.js +3 -8
- package/dist/discord/models-command.js +1 -1
- package/dist/discord/reaction-handler.js +2 -2
- package/dist/discord/reaction-handler.test.js +55 -0
- package/dist/discord/verify-push.js +31 -36
- package/dist/discord/verify-push.test.js +34 -6
- package/dist/discord/voice-command.js +1 -31
- package/dist/discord/voice-command.test.js +21 -259
- package/dist/discord/voice-status-command.js +3 -22
- package/dist/discord/voice-status-command.test.js +16 -124
- package/dist/discord-followup.test.js +133 -0
- package/dist/health/config-doctor.js +5 -27
- package/dist/health/config-doctor.test.js +1 -4
- package/dist/index.js +1 -28
- package/dist/runtime-overrides.js +2 -3
- package/dist/runtime-overrides.test.js +27 -193
- package/dist/tasks/store.js +10 -6
- package/dist/tasks/store.test.js +44 -0
- package/dist/tasks/task-action-executor.test.js +162 -50
- package/dist/tasks/task-action-mutations.js +22 -2
- package/dist/tasks/task-action-read-ops.js +7 -1
- package/dist/tasks/task-action-runner-types.js +19 -1
- package/dist/voice/audio-pipeline.js +145 -298
- package/docs/configuration.md +4 -9
- package/docs/official-docs.md +6 -9
- package/docs/runtime-switching.md +1 -1
- package/package.json +1 -1
- package/dist/voice/audio-pipeline.test.js +0 -1100
- package/dist/voice/stt-deepgram.js +0 -154
- package/dist/voice/stt-deepgram.test.js +0 -275
- package/dist/voice/stt-factory.js +0 -42
- package/dist/voice/stt-factory.test.js +0 -45
- package/dist/voice/stt-openai.js +0 -156
- package/dist/voice/stt-openai.test.js +0 -281
- package/dist/voice/tts-cartesia.js +0 -169
- package/dist/voice/tts-cartesia.test.js +0 -228
- package/dist/voice/tts-deepgram.js +0 -84
- package/dist/voice/tts-deepgram.test.js +0 -220
- package/dist/voice/tts-factory.js +0 -52
- package/dist/voice/tts-factory.test.js +0 -53
- package/dist/voice/tts-openai.js +0 -70
- package/dist/voice/tts-openai.test.js +0 -138
- package/dist/voice/types.test.js +0 -90
|
@@ -1,154 +0,0 @@
|
|
|
1
|
-
import WebSocket from 'ws';
|
|
2
|
-
const DEEPGRAM_STREAMING_URL = 'wss://api.deepgram.com/v1/listen';
|
|
3
|
-
const MAX_RETRIES = 3;
|
|
4
|
-
const BASE_BACKOFF_MS = 500;
|
|
5
|
-
export const KEEPALIVE_INTERVAL_MS = 5_000;
|
|
6
|
-
export class DeepgramSttProvider {
|
|
7
|
-
apiKey;
|
|
8
|
-
sampleRate;
|
|
9
|
-
model;
|
|
10
|
-
log;
|
|
11
|
-
wsFactory;
|
|
12
|
-
ws = null;
|
|
13
|
-
callback = null;
|
|
14
|
-
state = 'idle';
|
|
15
|
-
retryCount = 0;
|
|
16
|
-
feedCount = 0;
|
|
17
|
-
keepAliveTimer = null;
|
|
18
|
-
constructor(opts) {
|
|
19
|
-
this.apiKey = opts.apiKey;
|
|
20
|
-
this.sampleRate = opts.sampleRate;
|
|
21
|
-
this.model = opts.model ?? 'nova-3-general';
|
|
22
|
-
this.log = opts.log;
|
|
23
|
-
this.wsFactory =
|
|
24
|
-
opts.wsFactory ?? ((url, headers) => new WebSocket(url, { headers }));
|
|
25
|
-
}
|
|
26
|
-
async start() {
|
|
27
|
-
if (this.state === 'open' || this.state === 'starting')
|
|
28
|
-
return;
|
|
29
|
-
this.state = 'starting';
|
|
30
|
-
this.retryCount = 0;
|
|
31
|
-
await this.connect();
|
|
32
|
-
}
|
|
33
|
-
feedAudio(frame) {
|
|
34
|
-
if (this.state !== 'open') {
|
|
35
|
-
throw new Error('Cannot feedAudio before start() or after stop()');
|
|
36
|
-
}
|
|
37
|
-
this.feedCount++;
|
|
38
|
-
if (this.feedCount === 1 || this.feedCount % 100 === 0) {
|
|
39
|
-
this.log.info({ feedCount: this.feedCount, bufferSize: frame.buffer.length }, 'stt:feedAudio');
|
|
40
|
-
}
|
|
41
|
-
this.ws.send(frame.buffer);
|
|
42
|
-
}
|
|
43
|
-
onTranscription(callback) {
|
|
44
|
-
this.callback = callback;
|
|
45
|
-
}
|
|
46
|
-
async stop() {
|
|
47
|
-
this.clearKeepAlive();
|
|
48
|
-
if (this.state === 'stopped' || this.state === 'idle')
|
|
49
|
-
return;
|
|
50
|
-
this.state = 'stopped';
|
|
51
|
-
if (this.ws && this.ws.readyState === WebSocket.OPEN) {
|
|
52
|
-
this.ws.send(JSON.stringify({ type: 'CloseStream' }));
|
|
53
|
-
this.ws.close();
|
|
54
|
-
}
|
|
55
|
-
this.ws = null;
|
|
56
|
-
}
|
|
57
|
-
buildUrl() {
|
|
58
|
-
const params = new URLSearchParams({
|
|
59
|
-
model: this.model,
|
|
60
|
-
encoding: 'linear16',
|
|
61
|
-
sample_rate: String(this.sampleRate),
|
|
62
|
-
});
|
|
63
|
-
return `${DEEPGRAM_STREAMING_URL}?${params.toString()}`;
|
|
64
|
-
}
|
|
65
|
-
connect() {
|
|
66
|
-
this.clearKeepAlive();
|
|
67
|
-
return new Promise((resolve, reject) => {
|
|
68
|
-
const url = this.buildUrl();
|
|
69
|
-
const ws = this.wsFactory(url, {
|
|
70
|
-
Authorization: `Token ${this.apiKey}`,
|
|
71
|
-
});
|
|
72
|
-
this.ws = ws;
|
|
73
|
-
ws.on('open', () => {
|
|
74
|
-
this.state = 'open';
|
|
75
|
-
this.log.info({ url: DEEPGRAM_STREAMING_URL }, 'Deepgram STT connected');
|
|
76
|
-
this.keepAliveTimer = setInterval(() => {
|
|
77
|
-
if (this.ws && this.ws.readyState === WebSocket.OPEN) {
|
|
78
|
-
this.ws.send(JSON.stringify({ type: 'KeepAlive' }));
|
|
79
|
-
}
|
|
80
|
-
}, KEEPALIVE_INTERVAL_MS);
|
|
81
|
-
resolve();
|
|
82
|
-
});
|
|
83
|
-
ws.on('message', (data) => {
|
|
84
|
-
this.handleMessage(data);
|
|
85
|
-
});
|
|
86
|
-
ws.on('error', (err) => {
|
|
87
|
-
this.log.error({ err: err.message }, 'Deepgram STT WebSocket error');
|
|
88
|
-
});
|
|
89
|
-
ws.on('close', (code, reason) => {
|
|
90
|
-
if (this.state === 'stopped')
|
|
91
|
-
return;
|
|
92
|
-
// If we were still in the initial connect, reject
|
|
93
|
-
if (this.state === 'starting') {
|
|
94
|
-
reject(new Error(`WebSocket closed during connect: code=${code} reason=${reason.toString()}`));
|
|
95
|
-
return;
|
|
96
|
-
}
|
|
97
|
-
this.handleUnexpectedClose();
|
|
98
|
-
});
|
|
99
|
-
});
|
|
100
|
-
}
|
|
101
|
-
handleMessage(data) {
|
|
102
|
-
try {
|
|
103
|
-
const parsed = JSON.parse(String(data));
|
|
104
|
-
// Log all Deepgram messages for debugging
|
|
105
|
-
const alt = parsed?.channel?.alternatives?.[0];
|
|
106
|
-
const transcript = alt?.transcript ?? '';
|
|
107
|
-
this.log.info({
|
|
108
|
-
type: parsed.type,
|
|
109
|
-
isFinal: parsed.is_final,
|
|
110
|
-
speechFinal: parsed.speech_final,
|
|
111
|
-
transcript: transcript.slice(0, 80),
|
|
112
|
-
}, 'stt:deepgram message');
|
|
113
|
-
if (!this.callback)
|
|
114
|
-
return;
|
|
115
|
-
if (!alt)
|
|
116
|
-
return;
|
|
117
|
-
const result = {
|
|
118
|
-
text: transcript,
|
|
119
|
-
confidence: alt.confidence,
|
|
120
|
-
isFinal: Boolean(parsed.is_final && parsed.speech_final),
|
|
121
|
-
};
|
|
122
|
-
this.callback(result);
|
|
123
|
-
}
|
|
124
|
-
catch (err) {
|
|
125
|
-
this.log.error({ err }, 'Failed to parse Deepgram STT message');
|
|
126
|
-
}
|
|
127
|
-
}
|
|
128
|
-
clearKeepAlive() {
|
|
129
|
-
if (this.keepAliveTimer !== null) {
|
|
130
|
-
clearInterval(this.keepAliveTimer);
|
|
131
|
-
this.keepAliveTimer = null;
|
|
132
|
-
}
|
|
133
|
-
}
|
|
134
|
-
handleUnexpectedClose() {
|
|
135
|
-
this.clearKeepAlive();
|
|
136
|
-
if (this.retryCount >= MAX_RETRIES) {
|
|
137
|
-
this.log.error({ retries: this.retryCount }, 'Deepgram STT exhausted reconnect retries');
|
|
138
|
-
this.state = 'stopped';
|
|
139
|
-
return;
|
|
140
|
-
}
|
|
141
|
-
this.retryCount++;
|
|
142
|
-
const delay = BASE_BACKOFF_MS * 2 ** (this.retryCount - 1);
|
|
143
|
-
this.log.warn({ attempt: this.retryCount, maxRetries: MAX_RETRIES, delayMs: delay }, 'Deepgram STT reconnecting after unexpected close');
|
|
144
|
-
setTimeout(() => {
|
|
145
|
-
if (this.state === 'stopped')
|
|
146
|
-
return;
|
|
147
|
-
this.state = 'starting';
|
|
148
|
-
this.connect().catch((err) => {
|
|
149
|
-
this.log.error({ err }, 'Deepgram STT reconnect failed');
|
|
150
|
-
this.handleUnexpectedClose();
|
|
151
|
-
});
|
|
152
|
-
}, delay);
|
|
153
|
-
}
|
|
154
|
-
}
|
|
@@ -1,275 +0,0 @@
|
|
|
1
|
-
import { describe, it, expect, vi, beforeEach } from 'vitest';
|
|
2
|
-
import { EventEmitter } from 'node:events';
|
|
3
|
-
import { DeepgramSttProvider, KEEPALIVE_INTERVAL_MS } from './stt-deepgram.js';
|
|
4
|
-
// ---------------------------------------------------------------------------
|
|
5
|
-
// Mock WebSocket (ws-library style: EventEmitter with readyState)
|
|
6
|
-
// ---------------------------------------------------------------------------
|
|
7
|
-
class MockWebSocket extends EventEmitter {
|
|
8
|
-
static OPEN = 1;
|
|
9
|
-
static CLOSED = 3;
|
|
10
|
-
url;
|
|
11
|
-
headers;
|
|
12
|
-
readyState = MockWebSocket.OPEN;
|
|
13
|
-
sent = [];
|
|
14
|
-
constructor(url, headers) {
|
|
15
|
-
super();
|
|
16
|
-
this.url = url;
|
|
17
|
-
this.headers = headers;
|
|
18
|
-
// Auto-open on next microtask so callers can attach handlers
|
|
19
|
-
queueMicrotask(() => this.emit('open'));
|
|
20
|
-
}
|
|
21
|
-
send(data) {
|
|
22
|
-
this.sent.push(data);
|
|
23
|
-
}
|
|
24
|
-
close() {
|
|
25
|
-
this.readyState = MockWebSocket.CLOSED;
|
|
26
|
-
}
|
|
27
|
-
// Test helpers
|
|
28
|
-
_receiveMessage(data) {
|
|
29
|
-
this.emit('message', JSON.stringify(data));
|
|
30
|
-
}
|
|
31
|
-
_triggerClose(code = 1006) {
|
|
32
|
-
this.readyState = MockWebSocket.CLOSED;
|
|
33
|
-
this.emit('close', code, Buffer.from(''));
|
|
34
|
-
}
|
|
35
|
-
_triggerError(msg = 'test error') {
|
|
36
|
-
this.emit('error', new Error(msg));
|
|
37
|
-
}
|
|
38
|
-
}
|
|
39
|
-
// ---------------------------------------------------------------------------
|
|
40
|
-
// Helpers
|
|
41
|
-
// ---------------------------------------------------------------------------
|
|
42
|
-
function createLogger() {
|
|
43
|
-
return { info: vi.fn(), warn: vi.fn(), error: vi.fn() };
|
|
44
|
-
}
|
|
45
|
-
let lastCreatedWs = null;
|
|
46
|
-
function mockWsFactory(url, headers) {
|
|
47
|
-
const ws = new MockWebSocket(url, headers);
|
|
48
|
-
lastCreatedWs = ws;
|
|
49
|
-
return ws;
|
|
50
|
-
}
|
|
51
|
-
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
52
|
-
const typedWsFactory = mockWsFactory;
|
|
53
|
-
function makeProvider(overrides = {}) {
|
|
54
|
-
return new DeepgramSttProvider({
|
|
55
|
-
apiKey: overrides.apiKey ?? 'test-key',
|
|
56
|
-
sampleRate: overrides.sampleRate ?? 16000,
|
|
57
|
-
log: overrides.log ?? createLogger(),
|
|
58
|
-
wsFactory: typedWsFactory,
|
|
59
|
-
});
|
|
60
|
-
}
|
|
61
|
-
function makeFrame(data = [0, 1, 2, 3]) {
|
|
62
|
-
return { buffer: Buffer.from(data), sampleRate: 16000, channels: 1 };
|
|
63
|
-
}
|
|
64
|
-
// Re-export for the retry test's backoff calculation
|
|
65
|
-
const BASE_BACKOFF_MS = 500;
|
|
66
|
-
// ---------------------------------------------------------------------------
|
|
67
|
-
// Tests
|
|
68
|
-
// ---------------------------------------------------------------------------
|
|
69
|
-
beforeEach(() => {
|
|
70
|
-
vi.clearAllMocks();
|
|
71
|
-
lastCreatedWs = null;
|
|
72
|
-
});
|
|
73
|
-
describe('DeepgramSttProvider', () => {
|
|
74
|
-
it('start opens connection with correct URL and auth header', async () => {
|
|
75
|
-
const provider = makeProvider({ apiKey: 'my-key', sampleRate: 48000 });
|
|
76
|
-
await provider.start();
|
|
77
|
-
expect(lastCreatedWs).not.toBeNull();
|
|
78
|
-
const url = new URL(lastCreatedWs.url);
|
|
79
|
-
expect(url.protocol).toBe('wss:');
|
|
80
|
-
expect(url.hostname).toBe('api.deepgram.com');
|
|
81
|
-
expect(url.pathname).toBe('/v1/listen');
|
|
82
|
-
expect(url.searchParams.get('model')).toBe('nova-3-general');
|
|
83
|
-
expect(url.searchParams.get('encoding')).toBe('linear16');
|
|
84
|
-
expect(url.searchParams.get('sample_rate')).toBe('48000');
|
|
85
|
-
// Auth is via header, not query param
|
|
86
|
-
expect(url.searchParams.get('token')).toBeNull();
|
|
87
|
-
expect(lastCreatedWs.headers.Authorization).toBe('Token my-key');
|
|
88
|
-
});
|
|
89
|
-
it('feedAudio sends binary data', async () => {
|
|
90
|
-
const provider = makeProvider();
|
|
91
|
-
await provider.start();
|
|
92
|
-
const frame = makeFrame([10, 20, 30]);
|
|
93
|
-
provider.feedAudio(frame);
|
|
94
|
-
expect(lastCreatedWs.sent).toHaveLength(1);
|
|
95
|
-
expect(lastCreatedWs.sent[0]).toEqual(frame.buffer);
|
|
96
|
-
});
|
|
97
|
-
it('parses Deepgram JSON into TranscriptionResult for interim results', async () => {
|
|
98
|
-
const provider = makeProvider();
|
|
99
|
-
const results = [];
|
|
100
|
-
provider.onTranscription((r) => results.push(r));
|
|
101
|
-
await provider.start();
|
|
102
|
-
lastCreatedWs._receiveMessage({
|
|
103
|
-
is_final: false,
|
|
104
|
-
speech_final: false,
|
|
105
|
-
channel: { alternatives: [{ transcript: 'hello', confidence: 0.85 }] },
|
|
106
|
-
});
|
|
107
|
-
expect(results).toHaveLength(1);
|
|
108
|
-
expect(results[0]).toEqual({ text: 'hello', confidence: 0.85, isFinal: false });
|
|
109
|
-
});
|
|
110
|
-
it('parses Deepgram JSON into TranscriptionResult for final results', async () => {
|
|
111
|
-
const provider = makeProvider();
|
|
112
|
-
const results = [];
|
|
113
|
-
provider.onTranscription((r) => results.push(r));
|
|
114
|
-
await provider.start();
|
|
115
|
-
lastCreatedWs._receiveMessage({
|
|
116
|
-
is_final: true,
|
|
117
|
-
speech_final: true,
|
|
118
|
-
channel: { alternatives: [{ transcript: 'hello world', confidence: 0.97 }] },
|
|
119
|
-
});
|
|
120
|
-
expect(results).toHaveLength(1);
|
|
121
|
-
expect(results[0]).toEqual({ text: 'hello world', confidence: 0.97, isFinal: true });
|
|
122
|
-
});
|
|
123
|
-
it('isFinal requires both is_final and speech_final', async () => {
|
|
124
|
-
const provider = makeProvider();
|
|
125
|
-
const results = [];
|
|
126
|
-
provider.onTranscription((r) => results.push(r));
|
|
127
|
-
await provider.start();
|
|
128
|
-
// is_final true but speech_final false → not final
|
|
129
|
-
lastCreatedWs._receiveMessage({
|
|
130
|
-
is_final: true,
|
|
131
|
-
speech_final: false,
|
|
132
|
-
channel: { alternatives: [{ transcript: 'partial', confidence: 0.9 }] },
|
|
133
|
-
});
|
|
134
|
-
expect(results[0].isFinal).toBe(false);
|
|
135
|
-
});
|
|
136
|
-
it('stop sends CloseStream message', async () => {
|
|
137
|
-
const provider = makeProvider();
|
|
138
|
-
await provider.start();
|
|
139
|
-
const ws = lastCreatedWs;
|
|
140
|
-
await provider.stop();
|
|
141
|
-
const closeMsg = ws.sent.find((m) => typeof m === 'string' && JSON.parse(m).type === 'CloseStream');
|
|
142
|
-
expect(closeMsg).toBeDefined();
|
|
143
|
-
});
|
|
144
|
-
it('double stop is idempotent', async () => {
|
|
145
|
-
const provider = makeProvider();
|
|
146
|
-
await provider.start();
|
|
147
|
-
await provider.stop();
|
|
148
|
-
// Should not throw
|
|
149
|
-
await provider.stop();
|
|
150
|
-
});
|
|
151
|
-
it('feedAudio before start throws', () => {
|
|
152
|
-
const provider = makeProvider();
|
|
153
|
-
expect(() => provider.feedAudio(makeFrame())).toThrow('Cannot feedAudio before start() or after stop()');
|
|
154
|
-
});
|
|
155
|
-
it('reconnect fires on unexpected close up to retry limit', async () => {
|
|
156
|
-
vi.useFakeTimers();
|
|
157
|
-
const log = createLogger();
|
|
158
|
-
const provider = makeProvider({ log });
|
|
159
|
-
await provider.start();
|
|
160
|
-
// Trigger unexpected close — should schedule reconnect
|
|
161
|
-
lastCreatedWs._triggerClose(1006);
|
|
162
|
-
expect(log.warn).toHaveBeenCalledTimes(1);
|
|
163
|
-
expect(vi.mocked(log.warn).mock.calls[0][1]).toContain('reconnecting');
|
|
164
|
-
// Advance past first retry (500ms)
|
|
165
|
-
await vi.advanceTimersByTimeAsync(500);
|
|
166
|
-
expect(lastCreatedWs).not.toBeNull();
|
|
167
|
-
// Trigger another close
|
|
168
|
-
lastCreatedWs._triggerClose(1006);
|
|
169
|
-
expect(log.warn).toHaveBeenCalledTimes(2);
|
|
170
|
-
// Advance past second retry (1000ms)
|
|
171
|
-
await vi.advanceTimersByTimeAsync(1000);
|
|
172
|
-
// Trigger third close
|
|
173
|
-
lastCreatedWs._triggerClose(1006);
|
|
174
|
-
expect(log.warn).toHaveBeenCalledTimes(3);
|
|
175
|
-
// Advance past third retry (2000ms)
|
|
176
|
-
await vi.advanceTimersByTimeAsync(2000);
|
|
177
|
-
// Fourth close — retries exhausted
|
|
178
|
-
lastCreatedWs._triggerClose(1006);
|
|
179
|
-
expect(log.error).toHaveBeenCalled();
|
|
180
|
-
expect(vi
|
|
181
|
-
.mocked(log.error)
|
|
182
|
-
.mock.calls.some((c) => typeof c[1] === 'string' && c[1].includes('exhausted'))).toBe(true);
|
|
183
|
-
vi.useRealTimers();
|
|
184
|
-
});
|
|
185
|
-
it('error is logged after retries exhausted', async () => {
|
|
186
|
-
vi.useFakeTimers();
|
|
187
|
-
const log = createLogger();
|
|
188
|
-
const provider = makeProvider({ log });
|
|
189
|
-
await provider.start();
|
|
190
|
-
// Exhaust all 3 retries
|
|
191
|
-
for (let i = 0; i < 3; i++) {
|
|
192
|
-
lastCreatedWs._triggerClose(1006);
|
|
193
|
-
await vi.advanceTimersByTimeAsync(BASE_BACKOFF_MS * 2 ** i);
|
|
194
|
-
}
|
|
195
|
-
// Final close after all retries
|
|
196
|
-
lastCreatedWs._triggerClose(1006);
|
|
197
|
-
const errorCalls = vi.mocked(log.error).mock.calls;
|
|
198
|
-
const exhaustedCall = errorCalls.find((c) => typeof c[1] === 'string' && c[1].includes('exhausted reconnect retries'));
|
|
199
|
-
expect(exhaustedCall).toBeDefined();
|
|
200
|
-
vi.useRealTimers();
|
|
201
|
-
});
|
|
202
|
-
// -------------------------------------------------------------------------
|
|
203
|
-
// KeepAlive tests
|
|
204
|
-
// -------------------------------------------------------------------------
|
|
205
|
-
it('sends KeepAlive text frames on the interval after start', async () => {
|
|
206
|
-
vi.useFakeTimers();
|
|
207
|
-
const provider = makeProvider();
|
|
208
|
-
await provider.start();
|
|
209
|
-
const ws = lastCreatedWs;
|
|
210
|
-
// No KeepAlive sent yet (only just connected)
|
|
211
|
-
const keepAliveBefore = ws.sent.filter((m) => typeof m === 'string' && JSON.parse(m).type === 'KeepAlive');
|
|
212
|
-
expect(keepAliveBefore).toHaveLength(0);
|
|
213
|
-
// Advance one interval — should get one KeepAlive
|
|
214
|
-
await vi.advanceTimersByTimeAsync(KEEPALIVE_INTERVAL_MS);
|
|
215
|
-
const keepAliveAfter1 = ws.sent.filter((m) => typeof m === 'string' && JSON.parse(m).type === 'KeepAlive');
|
|
216
|
-
expect(keepAliveAfter1).toHaveLength(1);
|
|
217
|
-
// Advance another interval — should get a second KeepAlive
|
|
218
|
-
await vi.advanceTimersByTimeAsync(KEEPALIVE_INTERVAL_MS);
|
|
219
|
-
const keepAliveAfter2 = ws.sent.filter((m) => typeof m === 'string' && JSON.parse(m).type === 'KeepAlive');
|
|
220
|
-
expect(keepAliveAfter2).toHaveLength(2);
|
|
221
|
-
await provider.stop();
|
|
222
|
-
vi.useRealTimers();
|
|
223
|
-
});
|
|
224
|
-
it('stops sending KeepAlive after stop()', async () => {
|
|
225
|
-
vi.useFakeTimers();
|
|
226
|
-
const provider = makeProvider();
|
|
227
|
-
await provider.start();
|
|
228
|
-
const ws = lastCreatedWs;
|
|
229
|
-
await provider.stop();
|
|
230
|
-
// Advance well past the interval — no KeepAlive should appear
|
|
231
|
-
await vi.advanceTimersByTimeAsync(KEEPALIVE_INTERVAL_MS * 3);
|
|
232
|
-
const keepAlives = ws.sent.filter((m) => typeof m === 'string' && JSON.parse(m).type === 'KeepAlive');
|
|
233
|
-
expect(keepAlives).toHaveLength(0);
|
|
234
|
-
vi.useRealTimers();
|
|
235
|
-
});
|
|
236
|
-
it('clears old keepalive timer and starts new one on reconnect', async () => {
|
|
237
|
-
vi.useFakeTimers();
|
|
238
|
-
const provider = makeProvider();
|
|
239
|
-
await provider.start();
|
|
240
|
-
const ws1 = lastCreatedWs;
|
|
241
|
-
// Advance to get one KeepAlive on the first connection
|
|
242
|
-
await vi.advanceTimersByTimeAsync(KEEPALIVE_INTERVAL_MS);
|
|
243
|
-
const ka1 = ws1.sent.filter((m) => typeof m === 'string' && JSON.parse(m).type === 'KeepAlive');
|
|
244
|
-
expect(ka1).toHaveLength(1);
|
|
245
|
-
// Trigger unexpected close → reconnect
|
|
246
|
-
ws1._triggerClose(1006);
|
|
247
|
-
await vi.advanceTimersByTimeAsync(BASE_BACKOFF_MS); // first retry backoff
|
|
248
|
-
const ws2 = lastCreatedWs;
|
|
249
|
-
expect(ws2).not.toBe(ws1);
|
|
250
|
-
// Old timer should be cleared — no further KeepAlives on ws1
|
|
251
|
-
await vi.advanceTimersByTimeAsync(KEEPALIVE_INTERVAL_MS);
|
|
252
|
-
const ka1After = ws1.sent.filter((m) => typeof m === 'string' && JSON.parse(m).type === 'KeepAlive');
|
|
253
|
-
expect(ka1After).toHaveLength(1); // still just the original one
|
|
254
|
-
// New timer should fire on ws2
|
|
255
|
-
const ka2 = ws2.sent.filter((m) => typeof m === 'string' && JSON.parse(m).type === 'KeepAlive');
|
|
256
|
-
expect(ka2).toHaveLength(1);
|
|
257
|
-
await provider.stop();
|
|
258
|
-
vi.useRealTimers();
|
|
259
|
-
});
|
|
260
|
-
it('KeepAlive messages are JSON text strings, not Buffers', async () => {
|
|
261
|
-
vi.useFakeTimers();
|
|
262
|
-
const provider = makeProvider();
|
|
263
|
-
await provider.start();
|
|
264
|
-
const ws = lastCreatedWs;
|
|
265
|
-
await vi.advanceTimersByTimeAsync(KEEPALIVE_INTERVAL_MS);
|
|
266
|
-
const keepAlives = ws.sent.filter((m) => typeof m === 'string' && JSON.parse(m).type === 'KeepAlive');
|
|
267
|
-
expect(keepAlives).toHaveLength(1);
|
|
268
|
-
// Must be a string (text frame), not a Buffer (binary frame)
|
|
269
|
-
expect(typeof keepAlives[0]).toBe('string');
|
|
270
|
-
expect(keepAlives[0]).not.toBeInstanceOf(Buffer);
|
|
271
|
-
expect(JSON.parse(keepAlives[0])).toEqual({ type: 'KeepAlive' });
|
|
272
|
-
await provider.stop();
|
|
273
|
-
vi.useRealTimers();
|
|
274
|
-
});
|
|
275
|
-
});
|
|
@@ -1,42 +0,0 @@
|
|
|
1
|
-
import { DeepgramSttProvider } from './stt-deepgram.js';
|
|
2
|
-
import { OpenaiSttProvider } from './stt-openai.js';
|
|
3
|
-
/**
|
|
4
|
-
* Create an STT provider based on the voice config.
|
|
5
|
-
*
|
|
6
|
-
* Maintainers: start with `docs/official-docs.md` before changing provider
|
|
7
|
-
* wiring, model defaults, endpoint assumptions, or request parameters here.
|
|
8
|
-
*
|
|
9
|
-
* Currently supported: `deepgram` (Nova-3 General streaming via WebSocket),
|
|
10
|
-
* `openai` (Whisper API via REST).
|
|
11
|
-
* Planned: `whisper` (local Whisper model, Phase 2b).
|
|
12
|
-
*
|
|
13
|
-
* Requires `DISCOCLAW_VOICE_ENABLED=1` and a provider-specific API key
|
|
14
|
-
* (e.g. `DEEPGRAM_API_KEY`, `OPENAI_API_KEY`). See docs/voice.md for setup.
|
|
15
|
-
*/
|
|
16
|
-
export function createSttProvider(config, log) {
|
|
17
|
-
switch (config.sttProvider) {
|
|
18
|
-
case 'deepgram': {
|
|
19
|
-
if (!config.deepgramApiKey) {
|
|
20
|
-
throw new Error('deepgramApiKey is required when sttProvider is "deepgram"');
|
|
21
|
-
}
|
|
22
|
-
return new DeepgramSttProvider({
|
|
23
|
-
apiKey: config.deepgramApiKey,
|
|
24
|
-
sampleRate: 16000,
|
|
25
|
-
model: config.deepgramSttModel,
|
|
26
|
-
log,
|
|
27
|
-
});
|
|
28
|
-
}
|
|
29
|
-
case 'openai': {
|
|
30
|
-
if (!config.openaiApiKey) {
|
|
31
|
-
throw new Error('openaiApiKey is required when sttProvider is "openai"');
|
|
32
|
-
}
|
|
33
|
-
return new OpenaiSttProvider({
|
|
34
|
-
apiKey: config.openaiApiKey,
|
|
35
|
-
sampleRate: 16000,
|
|
36
|
-
log,
|
|
37
|
-
});
|
|
38
|
-
}
|
|
39
|
-
case 'whisper':
|
|
40
|
-
throw new Error('Whisper STT adapter is not yet implemented (Phase 2b)');
|
|
41
|
-
}
|
|
42
|
-
}
|
|
@@ -1,45 +0,0 @@
|
|
|
1
|
-
import { describe, it, expect, vi } from 'vitest';
|
|
2
|
-
import { createSttProvider } from './stt-factory.js';
|
|
3
|
-
import { DeepgramSttProvider } from './stt-deepgram.js';
|
|
4
|
-
import { OpenaiSttProvider } from './stt-openai.js';
|
|
5
|
-
// Stub globalThis.WebSocket so DeepgramSttProvider constructor doesn't throw
|
|
6
|
-
class StubWebSocket {
|
|
7
|
-
onopen = null;
|
|
8
|
-
constructor() {
|
|
9
|
-
queueMicrotask(() => this.onopen?.({ type: 'open' }));
|
|
10
|
-
}
|
|
11
|
-
send() { }
|
|
12
|
-
close() { }
|
|
13
|
-
}
|
|
14
|
-
globalThis.WebSocket = StubWebSocket;
|
|
15
|
-
function createLogger() {
|
|
16
|
-
return { info: vi.fn(), warn: vi.fn(), error: vi.fn() };
|
|
17
|
-
}
|
|
18
|
-
function baseConfig(overrides = {}) {
|
|
19
|
-
return {
|
|
20
|
-
enabled: true,
|
|
21
|
-
sttProvider: 'deepgram',
|
|
22
|
-
ttsProvider: 'cartesia',
|
|
23
|
-
deepgramApiKey: 'test-key',
|
|
24
|
-
...overrides,
|
|
25
|
-
};
|
|
26
|
-
}
|
|
27
|
-
describe('createSttProvider', () => {
|
|
28
|
-
it('returns a DeepgramSttProvider for deepgram config', () => {
|
|
29
|
-
const provider = createSttProvider(baseConfig(), createLogger());
|
|
30
|
-
expect(provider).toBeInstanceOf(DeepgramSttProvider);
|
|
31
|
-
});
|
|
32
|
-
it('throws when deepgramApiKey is missing for deepgram provider', () => {
|
|
33
|
-
expect(() => createSttProvider(baseConfig({ deepgramApiKey: undefined }), createLogger())).toThrow('deepgramApiKey is required');
|
|
34
|
-
});
|
|
35
|
-
it('returns an OpenaiSttProvider for openai config', () => {
|
|
36
|
-
const provider = createSttProvider(baseConfig({ sttProvider: 'openai', openaiApiKey: 'sk-test' }), createLogger());
|
|
37
|
-
expect(provider).toBeInstanceOf(OpenaiSttProvider);
|
|
38
|
-
});
|
|
39
|
-
it('throws when openaiApiKey is missing for openai provider', () => {
|
|
40
|
-
expect(() => createSttProvider(baseConfig({ sttProvider: 'openai' }), createLogger())).toThrow('openaiApiKey is required');
|
|
41
|
-
});
|
|
42
|
-
it('throws not-implemented for whisper provider', () => {
|
|
43
|
-
expect(() => createSttProvider(baseConfig({ sttProvider: 'whisper' }), createLogger())).toThrow('not yet implemented');
|
|
44
|
-
});
|
|
45
|
-
});
|
package/dist/voice/stt-openai.js
DELETED
|
@@ -1,156 +0,0 @@
|
|
|
1
|
-
const OPENAI_TRANSCRIPTIONS_URL = 'https://api.openai.com/v1/audio/transcriptions';
|
|
2
|
-
const DEFAULT_SILENCE_THRESHOLD_MS = 1500;
|
|
3
|
-
const WHISPER_MODEL = 'whisper-1';
|
|
4
|
-
/**
|
|
5
|
-
* OpenAI Whisper STT adapter.
|
|
6
|
-
*
|
|
7
|
-
* Whisper is a batch API — there is no streaming endpoint. This adapter
|
|
8
|
-
* buffers incoming PCM frames and triggers transcription when silence is
|
|
9
|
-
* detected (no new audio for `silenceThresholdMs`). On transcribe, it
|
|
10
|
-
* constructs a minimal WAV header, POSTs to the OpenAI transcriptions
|
|
11
|
-
* endpoint, and fires the `onTranscription` callback with `isFinal: true`.
|
|
12
|
-
* Keep the multipart request shape aligned with the official OpenAI audio
|
|
13
|
-
* transcription docs.
|
|
14
|
-
*/
|
|
15
|
-
export class OpenaiSttProvider {
|
|
16
|
-
apiKey;
|
|
17
|
-
sampleRate;
|
|
18
|
-
log;
|
|
19
|
-
silenceThresholdMs;
|
|
20
|
-
fetchFn;
|
|
21
|
-
state = 'idle';
|
|
22
|
-
callback = null;
|
|
23
|
-
audioBuffers = [];
|
|
24
|
-
totalBytes = 0;
|
|
25
|
-
silenceTimer = null;
|
|
26
|
-
constructor(opts) {
|
|
27
|
-
this.apiKey = opts.apiKey;
|
|
28
|
-
this.sampleRate = opts.sampleRate;
|
|
29
|
-
this.log = opts.log;
|
|
30
|
-
this.silenceThresholdMs = opts.silenceThresholdMs ?? DEFAULT_SILENCE_THRESHOLD_MS;
|
|
31
|
-
this.fetchFn = opts.fetchFn ?? globalThis.fetch;
|
|
32
|
-
}
|
|
33
|
-
async start() {
|
|
34
|
-
if (this.state === 'running')
|
|
35
|
-
return;
|
|
36
|
-
this.state = 'running';
|
|
37
|
-
this.audioBuffers = [];
|
|
38
|
-
this.totalBytes = 0;
|
|
39
|
-
this.log.info('OpenAI Whisper STT started');
|
|
40
|
-
}
|
|
41
|
-
feedAudio(frame) {
|
|
42
|
-
if (this.state !== 'running') {
|
|
43
|
-
throw new Error('Cannot feedAudio before start() or after stop()');
|
|
44
|
-
}
|
|
45
|
-
this.audioBuffers.push(frame.buffer);
|
|
46
|
-
this.totalBytes += frame.buffer.length;
|
|
47
|
-
// Reset silence timer on every audio frame
|
|
48
|
-
this.resetSilenceTimer();
|
|
49
|
-
}
|
|
50
|
-
onTranscription(callback) {
|
|
51
|
-
this.callback = callback;
|
|
52
|
-
}
|
|
53
|
-
async stop() {
|
|
54
|
-
if (this.state === 'stopped' || this.state === 'idle')
|
|
55
|
-
return;
|
|
56
|
-
this.state = 'stopped';
|
|
57
|
-
this.clearSilenceTimer();
|
|
58
|
-
// Transcribe any remaining buffered audio
|
|
59
|
-
if (this.totalBytes > 0) {
|
|
60
|
-
await this.transcribeBuffer();
|
|
61
|
-
}
|
|
62
|
-
this.audioBuffers = [];
|
|
63
|
-
this.totalBytes = 0;
|
|
64
|
-
}
|
|
65
|
-
resetSilenceTimer() {
|
|
66
|
-
this.clearSilenceTimer();
|
|
67
|
-
this.silenceTimer = setTimeout(() => {
|
|
68
|
-
this.onSilenceDetected();
|
|
69
|
-
}, this.silenceThresholdMs);
|
|
70
|
-
}
|
|
71
|
-
clearSilenceTimer() {
|
|
72
|
-
if (this.silenceTimer !== null) {
|
|
73
|
-
clearTimeout(this.silenceTimer);
|
|
74
|
-
this.silenceTimer = null;
|
|
75
|
-
}
|
|
76
|
-
}
|
|
77
|
-
onSilenceDetected() {
|
|
78
|
-
if (this.state !== 'running' || this.totalBytes === 0)
|
|
79
|
-
return;
|
|
80
|
-
this.transcribeBuffer().catch((err) => {
|
|
81
|
-
this.log.error({ err }, 'OpenAI Whisper transcription failed');
|
|
82
|
-
});
|
|
83
|
-
}
|
|
84
|
-
async transcribeBuffer() {
|
|
85
|
-
const pcm = Buffer.concat(this.audioBuffers);
|
|
86
|
-
this.audioBuffers = [];
|
|
87
|
-
this.totalBytes = 0;
|
|
88
|
-
const wav = buildWav(pcm, this.sampleRate, 1);
|
|
89
|
-
this.log.info({ pcmBytes: pcm.length, wavBytes: wav.length }, 'OpenAI Whisper: sending audio for transcription');
|
|
90
|
-
try {
|
|
91
|
-
const formData = new FormData();
|
|
92
|
-
// Copy into a plain ArrayBuffer so TypeScript accepts it as BlobPart
|
|
93
|
-
const ab = new ArrayBuffer(wav.byteLength);
|
|
94
|
-
new Uint8Array(ab).set(new Uint8Array(wav.buffer, wav.byteOffset, wav.byteLength));
|
|
95
|
-
formData.append('file', new Blob([ab], { type: 'audio/wav' }), 'audio.wav');
|
|
96
|
-
formData.append('model', WHISPER_MODEL);
|
|
97
|
-
const response = await this.fetchFn(OPENAI_TRANSCRIPTIONS_URL, {
|
|
98
|
-
method: 'POST',
|
|
99
|
-
headers: {
|
|
100
|
-
Authorization: `Bearer ${this.apiKey}`,
|
|
101
|
-
},
|
|
102
|
-
body: formData,
|
|
103
|
-
});
|
|
104
|
-
if (!response.ok) {
|
|
105
|
-
const body = await response.text();
|
|
106
|
-
this.log.error({ status: response.status, body: body.slice(0, 200) }, 'OpenAI Whisper API error');
|
|
107
|
-
return;
|
|
108
|
-
}
|
|
109
|
-
const data = (await response.json());
|
|
110
|
-
const text = data.text?.trim() ?? '';
|
|
111
|
-
if (text.length === 0) {
|
|
112
|
-
this.log.info('OpenAI Whisper: empty transcription, skipping callback');
|
|
113
|
-
return;
|
|
114
|
-
}
|
|
115
|
-
this.log.info({ text: text.slice(0, 80) }, 'OpenAI Whisper transcription');
|
|
116
|
-
if (this.callback) {
|
|
117
|
-
this.callback({ text, isFinal: true });
|
|
118
|
-
}
|
|
119
|
-
}
|
|
120
|
-
catch (err) {
|
|
121
|
-
this.log.error({ err }, 'OpenAI Whisper transcription request failed');
|
|
122
|
-
}
|
|
123
|
-
}
|
|
124
|
-
}
|
|
125
|
-
// ---------------------------------------------------------------------------
|
|
126
|
-
// WAV header construction (PCM s16le mono)
|
|
127
|
-
// ---------------------------------------------------------------------------
|
|
128
|
-
/**
|
|
129
|
-
* Build a minimal WAV file from raw PCM s16le data.
|
|
130
|
-
* 16-bit samples, mono, at the given sample rate.
|
|
131
|
-
*/
|
|
132
|
-
export function buildWav(pcm, sampleRate, channels) {
|
|
133
|
-
const bitsPerSample = 16;
|
|
134
|
-
const byteRate = sampleRate * channels * (bitsPerSample / 8);
|
|
135
|
-
const blockAlign = channels * (bitsPerSample / 8);
|
|
136
|
-
const dataSize = pcm.length;
|
|
137
|
-
const headerSize = 44;
|
|
138
|
-
const header = Buffer.alloc(headerSize);
|
|
139
|
-
// RIFF chunk descriptor
|
|
140
|
-
header.write('RIFF', 0);
|
|
141
|
-
header.writeUInt32LE(36 + dataSize, 4); // ChunkSize
|
|
142
|
-
header.write('WAVE', 8);
|
|
143
|
-
// fmt sub-chunk
|
|
144
|
-
header.write('fmt ', 12);
|
|
145
|
-
header.writeUInt32LE(16, 16); // Subchunk1Size (PCM)
|
|
146
|
-
header.writeUInt16LE(1, 20); // AudioFormat (1 = PCM)
|
|
147
|
-
header.writeUInt16LE(channels, 22);
|
|
148
|
-
header.writeUInt32LE(sampleRate, 24);
|
|
149
|
-
header.writeUInt32LE(byteRate, 28);
|
|
150
|
-
header.writeUInt16LE(blockAlign, 32);
|
|
151
|
-
header.writeUInt16LE(bitsPerSample, 34);
|
|
152
|
-
// data sub-chunk
|
|
153
|
-
header.write('data', 36);
|
|
154
|
-
header.writeUInt32LE(dataSize, 40);
|
|
155
|
-
return Buffer.concat([header, pcm]);
|
|
156
|
-
}
|