dvgateway-adapters 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +45 -0
- package/dist/index.d.ts +41 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +37 -0
- package/dist/index.js.map +1 -0
- package/dist/llm/anthropic.d.ts +62 -0
- package/dist/llm/anthropic.d.ts.map +1 -0
- package/dist/llm/anthropic.js +83 -0
- package/dist/llm/anthropic.js.map +1 -0
- package/dist/llm/index.d.ts +5 -0
- package/dist/llm/index.d.ts.map +1 -0
- package/dist/llm/index.js +4 -0
- package/dist/llm/index.js.map +1 -0
- package/dist/llm/openai-llm.d.ts +55 -0
- package/dist/llm/openai-llm.d.ts.map +1 -0
- package/dist/llm/openai-llm.js +68 -0
- package/dist/llm/openai-llm.js.map +1 -0
- package/dist/realtime/index.d.ts +3 -0
- package/dist/realtime/index.d.ts.map +1 -0
- package/dist/realtime/index.js +3 -0
- package/dist/realtime/index.js.map +1 -0
- package/dist/realtime/openai-realtime.d.ts +132 -0
- package/dist/realtime/openai-realtime.d.ts.map +1 -0
- package/dist/realtime/openai-realtime.js +261 -0
- package/dist/realtime/openai-realtime.js.map +1 -0
- package/dist/stt/deepgram.d.ts +105 -0
- package/dist/stt/deepgram.d.ts.map +1 -0
- package/dist/stt/deepgram.js +180 -0
- package/dist/stt/deepgram.js.map +1 -0
- package/dist/stt/index.d.ts +3 -0
- package/dist/stt/index.d.ts.map +1 -0
- package/dist/stt/index.js +3 -0
- package/dist/stt/index.js.map +1 -0
- package/dist/tts/cached-tts.d.ts +131 -0
- package/dist/tts/cached-tts.d.ts.map +1 -0
- package/dist/tts/cached-tts.js +231 -0
- package/dist/tts/cached-tts.js.map +1 -0
- package/dist/tts/elevenlabs.d.ts +95 -0
- package/dist/tts/elevenlabs.d.ts.map +1 -0
- package/dist/tts/elevenlabs.js +195 -0
- package/dist/tts/elevenlabs.js.map +1 -0
- package/dist/tts/index.d.ts +7 -0
- package/dist/tts/index.d.ts.map +1 -0
- package/dist/tts/index.js +5 -0
- package/dist/tts/index.js.map +1 -0
- package/dist/tts/openai-tts.d.ts +64 -0
- package/dist/tts/openai-tts.d.ts.map +1 -0
- package/dist/tts/openai-tts.js +148 -0
- package/dist/tts/openai-tts.js.map +1 -0
- package/package.json +89 -0
|
@@ -0,0 +1,261 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* OpenAI Realtime Speech-to-Speech Adapter
|
|
3
|
+
*
|
|
4
|
+
* Implements a direct speech-to-speech pipeline using the OpenAI Realtime API.
|
|
5
|
+
* This adapter bypasses the traditional STT → LLM → TTS chain entirely —
|
|
6
|
+
* audio goes in, audio comes out with minimal latency (~300ms glass-to-glass).
|
|
7
|
+
*
|
|
8
|
+
* Architecture:
|
|
9
|
+
*
|
|
10
|
+
* DVGateway audio in (16kHz PCM)
|
|
11
|
+
* ↓ upsample to 24kHz
|
|
12
|
+
* OpenAI Realtime API (WebSocket)
|
|
13
|
+
* ↓ gpt-4o-realtime (audio 1.5) processes speech end-to-end
|
|
14
|
+
* DVGateway audio out (24kHz → 16kHz PCM)
|
|
15
|
+
* ↓
|
|
16
|
+
* TTS injector → call audio
|
|
17
|
+
*
|
|
18
|
+
* Features:
|
|
19
|
+
* - Sub-300ms end-to-end latency (vs ~500ms for cascaded pipeline)
|
|
20
|
+
* - No separate STT/LLM/TTS billing — single API call
|
|
21
|
+
* - Native voice activity detection (server-side VAD)
|
|
22
|
+
* - Automatic conversation history management
|
|
23
|
+
* - Input and output transcription
|
|
24
|
+
* - Function/tool calling support
|
|
25
|
+
* - Configurable voice, instructions, and turn detection
|
|
26
|
+
*
|
|
27
|
+
* Model Reference (2026-03):
|
|
28
|
+
* gpt-4o-realtime-preview — Latest stable realtime model
|
|
29
|
+
* gpt-4o-realtime-preview-2024-12-17 — Pinned stable version
|
|
30
|
+
* gpt-4o-mini-realtime-preview — Cost-efficient realtime (audio 1.5)
|
|
31
|
+
* gpt-4o-mini-realtime-preview-2024-12-17 — Pinned mini version
|
|
32
|
+
*
|
|
33
|
+
* Audio format:
|
|
34
|
+
* Input: PCM16 24kHz mono, base64-encoded (upsample from DVGateway 16kHz)
|
|
35
|
+
* Output: PCM16 24kHz mono, base64-encoded (downsample to DVGateway 16kHz)
|
|
36
|
+
*
|
|
37
|
+
* API Reference: https://platform.openai.com/docs/api-reference/realtime
|
|
38
|
+
* WebSocket: wss://api.openai.com/v1/realtime
|
|
39
|
+
*/
|
|
40
|
+
import WebSocket from 'ws';
|
|
41
|
+
import { resample, float32ToSlin16, slin16ToFloat32 } from 'dvgateway-sdk';
|
|
42
|
+
// ─── OpenAI Realtime Adapter ─────────────────────────────────────────────────
|
|
43
|
+
const REALTIME_API_URL = 'wss://api.openai.com/v1/realtime';
|
|
44
|
+
const OPENAI_SAMPLE_RATE = 24000; // OpenAI Realtime uses 24kHz
|
|
45
|
+
const DV_SAMPLE_RATE = 16000; // DVGateway uses 16kHz
|
|
46
|
+
const INPUT_CHUNK_MS = 20; // 20ms audio frames
|
|
47
|
+
const INPUT_SAMPLES_24K = Math.round(OPENAI_SAMPLE_RATE * INPUT_CHUNK_MS / 1000); // 480 samples
|
|
48
|
+
export class OpenAIRealtimeAdapter {
|
|
49
|
+
opts;
|
|
50
|
+
audioOutputHandler = null;
|
|
51
|
+
transcriptHandler = null;
|
|
52
|
+
errorHandler = null;
|
|
53
|
+
// Per-session WebSocket connections
|
|
54
|
+
sessions = new Map();
|
|
55
|
+
stopped = false;
|
|
56
|
+
constructor(opts) {
|
|
57
|
+
this.opts = {
|
|
58
|
+
model: opts.model ?? 'gpt-4o-realtime-preview',
|
|
59
|
+
voice: opts.voice ?? 'alloy',
|
|
60
|
+
instructions: opts.instructions ?? 'You are a helpful voice assistant. Keep answers concise and conversational.',
|
|
61
|
+
turnDetection: opts.turnDetection ?? {},
|
|
62
|
+
inputTranscription: opts.inputTranscription ?? true,
|
|
63
|
+
temperature: opts.temperature ?? 0.8,
|
|
64
|
+
maxResponseTokens: opts.maxResponseTokens ?? 'inf',
|
|
65
|
+
apiKey: opts.apiKey,
|
|
66
|
+
};
|
|
67
|
+
}
|
|
68
|
+
onAudioOutput(handler) {
|
|
69
|
+
this.audioOutputHandler = handler;
|
|
70
|
+
}
|
|
71
|
+
onTranscript(handler) {
|
|
72
|
+
this.transcriptHandler = handler;
|
|
73
|
+
}
|
|
74
|
+
onError(handler) {
|
|
75
|
+
this.errorHandler = handler;
|
|
76
|
+
}
|
|
77
|
+
async startSession(linkedId, audioIn) {
|
|
78
|
+
this.stopped = false;
|
|
79
|
+
const ws = await this.connectRealtime(linkedId);
|
|
80
|
+
this.sessions.set(linkedId, ws);
|
|
81
|
+
// Send session configuration
|
|
82
|
+
this.sendEvent(ws, {
|
|
83
|
+
type: 'session.update',
|
|
84
|
+
session: {
|
|
85
|
+
modalities: ['audio', 'text'],
|
|
86
|
+
voice: this.opts.voice,
|
|
87
|
+
instructions: this.opts.instructions,
|
|
88
|
+
input_audio_format: 'pcm16',
|
|
89
|
+
output_audio_format: 'pcm16',
|
|
90
|
+
temperature: this.opts.temperature,
|
|
91
|
+
max_response_output_tokens: this.opts.maxResponseTokens,
|
|
92
|
+
...(this.opts.inputTranscription && {
|
|
93
|
+
input_audio_transcription: { model: 'whisper-1' },
|
|
94
|
+
}),
|
|
95
|
+
turn_detection: this.buildTurnDetection(),
|
|
96
|
+
},
|
|
97
|
+
});
|
|
98
|
+
// Pipe audio to OpenAI in background
|
|
99
|
+
void this.pipeAudioIn(linkedId, ws, audioIn);
|
|
100
|
+
// Wait until WS closes or stop() is called
|
|
101
|
+
await new Promise((resolve) => {
|
|
102
|
+
ws.once('close', resolve);
|
|
103
|
+
const check = setInterval(() => {
|
|
104
|
+
if (this.stopped || !this.sessions.has(linkedId)) {
|
|
105
|
+
clearInterval(check);
|
|
106
|
+
resolve();
|
|
107
|
+
}
|
|
108
|
+
}, 100);
|
|
109
|
+
});
|
|
110
|
+
}
|
|
111
|
+
async stop(linkedId) {
|
|
112
|
+
if (linkedId) {
|
|
113
|
+
const ws = this.sessions.get(linkedId);
|
|
114
|
+
if (ws && ws.readyState === WebSocket.OPEN) {
|
|
115
|
+
ws.close(1000, 'session ended');
|
|
116
|
+
}
|
|
117
|
+
this.sessions.delete(linkedId);
|
|
118
|
+
}
|
|
119
|
+
else {
|
|
120
|
+
// Stop all sessions
|
|
121
|
+
this.stopped = true;
|
|
122
|
+
for (const [id, ws] of this.sessions) {
|
|
123
|
+
if (ws.readyState === WebSocket.OPEN) {
|
|
124
|
+
ws.close(1000, 'adapter stopped');
|
|
125
|
+
}
|
|
126
|
+
this.sessions.delete(id);
|
|
127
|
+
}
|
|
128
|
+
}
|
|
129
|
+
}
|
|
130
|
+
// ── Private helpers ─────────────────────────────────────────────────────
|
|
131
|
+
async connectRealtime(linkedId) {
|
|
132
|
+
const url = `${REALTIME_API_URL}?model=${encodeURIComponent(this.opts.model)}`;
|
|
133
|
+
const ws = new WebSocket(url, {
|
|
134
|
+
headers: {
|
|
135
|
+
Authorization: `Bearer ${this.opts.apiKey}`,
|
|
136
|
+
'OpenAI-Beta': 'realtime=v1',
|
|
137
|
+
},
|
|
138
|
+
});
|
|
139
|
+
ws.on('message', (data) => {
|
|
140
|
+
try {
|
|
141
|
+
const event = JSON.parse(data.toString());
|
|
142
|
+
this.handleServerEvent(event, linkedId);
|
|
143
|
+
}
|
|
144
|
+
catch {
|
|
145
|
+
// Ignore malformed JSON
|
|
146
|
+
}
|
|
147
|
+
});
|
|
148
|
+
ws.on('error', (err) => {
|
|
149
|
+
this.errorHandler?.(err, linkedId);
|
|
150
|
+
});
|
|
151
|
+
ws.on('close', () => {
|
|
152
|
+
this.sessions.delete(linkedId);
|
|
153
|
+
});
|
|
154
|
+
// Wait for connection
|
|
155
|
+
await new Promise((resolve, reject) => {
|
|
156
|
+
ws.once('open', resolve);
|
|
157
|
+
ws.once('error', reject);
|
|
158
|
+
});
|
|
159
|
+
return ws;
|
|
160
|
+
}
|
|
161
|
+
async pipeAudioIn(linkedId, ws, audioIn) {
|
|
162
|
+
for await (const chunk of audioIn) {
|
|
163
|
+
if (this.stopped || ws.readyState !== WebSocket.OPEN)
|
|
164
|
+
break;
|
|
165
|
+
// Upsample from 16kHz (DVGateway) to 24kHz (OpenAI Realtime)
|
|
166
|
+
const samples24k = resample(chunk.samples, DV_SAMPLE_RATE, OPENAI_SAMPLE_RATE);
|
|
167
|
+
// Split into 20ms frames and send
|
|
168
|
+
let offset = 0;
|
|
169
|
+
while (offset < samples24k.length) {
|
|
170
|
+
const end = Math.min(offset + INPUT_SAMPLES_24K, samples24k.length);
|
|
171
|
+
const frame = samples24k.subarray(offset, end);
|
|
172
|
+
offset = end;
|
|
173
|
+
const pcm16 = float32ToSlin16(frame);
|
|
174
|
+
const b64 = Buffer.from(pcm16.buffer, pcm16.byteOffset, pcm16.byteLength).toString('base64');
|
|
175
|
+
this.sendEvent(ws, {
|
|
176
|
+
type: 'input_audio_buffer.append',
|
|
177
|
+
audio: b64,
|
|
178
|
+
});
|
|
179
|
+
}
|
|
180
|
+
}
|
|
181
|
+
// Signal end of input audio if not using server VAD
|
|
182
|
+
if (ws.readyState === WebSocket.OPEN && this.opts.turnDetection.mode === 'none') {
|
|
183
|
+
this.sendEvent(ws, { type: 'input_audio_buffer.commit' });
|
|
184
|
+
this.sendEvent(ws, { type: 'response.create' });
|
|
185
|
+
}
|
|
186
|
+
}
|
|
187
|
+
handleServerEvent(event, linkedId) {
|
|
188
|
+
switch (event.type) {
|
|
189
|
+
// ── Audio output delta ─────────────────────────────────────────────
|
|
190
|
+
case 'response.audio.delta': {
|
|
191
|
+
if (!event.delta)
|
|
192
|
+
break;
|
|
193
|
+
// Decode base64 PCM16 at 24kHz
|
|
194
|
+
const pcm24kBuf = Buffer.from(event.delta, 'base64');
|
|
195
|
+
const samples24k = slin16ToFloat32(pcm24kBuf);
|
|
196
|
+
// Downsample 24kHz → 16kHz for DVGateway
|
|
197
|
+
const samples16k = resample(samples24k, OPENAI_SAMPLE_RATE, DV_SAMPLE_RATE);
|
|
198
|
+
const pcm16k = float32ToSlin16(samples16k);
|
|
199
|
+
this.audioOutputHandler?.(Buffer.from(pcm16k.buffer, pcm16k.byteOffset, pcm16k.byteLength), linkedId);
|
|
200
|
+
break;
|
|
201
|
+
}
|
|
202
|
+
// ── Input transcription (user speech) ─────────────────────────────
|
|
203
|
+
case 'conversation.item.input_audio_transcription.completed': {
|
|
204
|
+
if (!event.transcript)
|
|
205
|
+
break;
|
|
206
|
+
this.transcriptHandler?.({
|
|
207
|
+
linkedId,
|
|
208
|
+
text: event.transcript,
|
|
209
|
+
isFinal: true,
|
|
210
|
+
speaker: 'customer',
|
|
211
|
+
timestampMs: Date.now(),
|
|
212
|
+
});
|
|
213
|
+
break;
|
|
214
|
+
}
|
|
215
|
+
// ── Output transcription (assistant text) ─────────────────────────
|
|
216
|
+
case 'response.audio_transcript.done': {
|
|
217
|
+
const transcript = event.transcript
|
|
218
|
+
?? (event.part?.transcript ?? '');
|
|
219
|
+
if (!transcript)
|
|
220
|
+
break;
|
|
221
|
+
this.transcriptHandler?.({
|
|
222
|
+
linkedId,
|
|
223
|
+
text: transcript,
|
|
224
|
+
isFinal: true,
|
|
225
|
+
speaker: 'agent',
|
|
226
|
+
timestampMs: Date.now(),
|
|
227
|
+
});
|
|
228
|
+
break;
|
|
229
|
+
}
|
|
230
|
+
// ── Error handling ─────────────────────────────────────────────────
|
|
231
|
+
case 'error': {
|
|
232
|
+
const msg = event.error?.message ?? 'Unknown realtime API error';
|
|
233
|
+
const code = event.error?.code ? ` [${event.error.code}]` : '';
|
|
234
|
+
this.errorHandler?.(new Error(`OpenAI Realtime${code}: ${msg}`), linkedId);
|
|
235
|
+
break;
|
|
236
|
+
}
|
|
237
|
+
default:
|
|
238
|
+
// Ignore other event types (session.created, response.created, etc.)
|
|
239
|
+
break;
|
|
240
|
+
}
|
|
241
|
+
}
|
|
242
|
+
buildTurnDetection() {
|
|
243
|
+
const td = this.opts.turnDetection;
|
|
244
|
+
if (td.mode === 'none') {
|
|
245
|
+
return { type: 'none' };
|
|
246
|
+
}
|
|
247
|
+
return {
|
|
248
|
+
type: 'server_vad',
|
|
249
|
+
threshold: td.threshold ?? 0.5,
|
|
250
|
+
silence_duration_ms: td.silenceDurationMs ?? 200,
|
|
251
|
+
prefix_padding_ms: td.prefixPaddingMs ?? 300,
|
|
252
|
+
create_response: true,
|
|
253
|
+
};
|
|
254
|
+
}
|
|
255
|
+
sendEvent(ws, event) {
|
|
256
|
+
if (ws.readyState === WebSocket.OPEN) {
|
|
257
|
+
ws.send(JSON.stringify(event));
|
|
258
|
+
}
|
|
259
|
+
}
|
|
260
|
+
}
|
|
261
|
+
//# sourceMappingURL=openai-realtime.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"openai-realtime.js","sourceRoot":"","sources":["../../src/realtime/openai-realtime.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAsCG;AAEH,OAAO,SAAS,MAAM,IAAI,CAAC;AAE3B,OAAO,EAAE,QAAQ,EAAE,eAAe,EAAE,eAAe,EAAE,MAAM,eAAe,CAAC;AAsG3E,gFAAgF;AAEhF,MAAM,gBAAgB,GAAG,kCAAkC,CAAC;AAC5D,MAAM,kBAAkB,GAAG,KAAK,CAAC,CAAC,6BAA6B;AAC/D,MAAM,cAAc,GAAG,KAAK,CAAC,CAAK,uBAAuB;AACzD,MAAM,cAAc,GAAG,EAAE,CAAC,CAAQ,oBAAoB;AACtD,MAAM,iBAAiB,GAAG,IAAI,CAAC,KAAK,CAAC,kBAAkB,GAAG,cAAc,GAAG,IAAI,CAAC,CAAC,CAAC,cAAc;AAEhG,MAAM,OAAO,qBAAqB;IACf,IAAI,CAAyC;IAEtD,kBAAkB,GAAuD,IAAI,CAAC;IAC9E,iBAAiB,GAAgD,IAAI,CAAC;IACtE,YAAY,GAAqD,IAAI,CAAC;IAE9E,oCAAoC;IACnB,QAAQ,GAAG,IAAI,GAAG,EAAqB,CAAC;IACjD,OAAO,GAAG,KAAK,CAAC;IAExB,YAAY,IAAkC;QAC5C,IAAI,CAAC,IAAI,GAAG;YACV,KAAK,EAAc,IAAI,CAAC,KAAK,IAAgB,yBAAyB;YACtE,KAAK,EAAc,IAAI,CAAC,KAAK,IAAgB,OAAO;YACpD,YAAY,EAAO,IAAI,CAAC,YAAY,IAAS,6EAA6E;YAC1H,aAAa,EAAM,IAAI,CAAC,aAAa,IAAQ,EAAE;YAC/C,kBAAkB,EAAE,IAAI,CAAC,kBAAkB,IAAI,IAAI;YACnD,WAAW,EAAQ,IAAI,CAAC,WAAW,IAAU,GAAG;YAChD,iBAAiB,EAAE,IAAI,CAAC,iBAAiB,IAAI,KAAK;YAClD,MAAM,EAAa,IAAI,CAAC,MAAM;SAC/B,CAAC;IACJ,CAAC;IAED,aAAa,CAAC,OAAkD;QAC9D,IAAI,CAAC,kBAAkB,GAAG,OAAO,CAAC;IACpC,CAAC;IAED,YAAY,CAAC,OAA2C;QACtD,IAAI,CAAC,iBAAiB,GAAG,OAAO,CAAC;IACnC,CAAC;IAED,OAAO,CAAC,OAAgD;QACtD,IAAI,CAAC,YAAY,GAAG,OAAO,CAAC;IAC9B,CAAC;IAED,KAAK,CAAC,YAAY,CAAC,QAAgB,EAAE,OAAkC;QACrE,IAAI,CAAC,OAAO,GAAG,KAAK,CAAC;QAErB,MAAM,EAAE,GAAG,MAAM,IAAI,CAAC,eAAe,CAAC,QAAQ,CAAC,CAAC;QAChD,IAAI,CAAC,QAAQ,CAAC,GAAG,CAAC,QAAQ,EAAE,EAAE,CAAC,CAAC;QAEhC,6BAA6B;QAC7B,IAAI,CAAC,SAAS,CAAC,EAAE,EAAE;YACjB,IAAI,EAAE,gBAAgB;YACtB,OAAO,EAAE;gBACP,UAAU,EAAI,CAAC,OAAO,EAAE,MAAM,CAAC;gBAC/B,KAAK,EAAS,IAAI,CAAC,IAAI,CAAC,KAAK;gBAC7B,YAAY,EAAE,IAAI,CAAC,IAAI,CAAC,YAAY;gBACpC,kBAAkB,EAAG,OAAO;gBAC5B,mBAAmB,EAAE,OAAO;gBAC5B,WAAW,EAAG,IAAI,CAAC,IAAI,CAAC,WAAW;gBACnC,0BAA0B,EAAE,IAAI,CAAC,IAAI,CAAC,iBAAiB;gBACvD,GAAG,CAAC,IAAI,CAAC,IAAI,CAAC,kBAAkB,IAAI;oBAClC,yBAAyB,EAAE,EAAE,KAAK,EAAE,WAAW,EAAE;iBAClD,CAAC;gBACF,cAAc,EAAE,IAAI,CAAC,kBAAkB,EAAE;aAC1C;SACF,CAAC,CAAC;QAEH,qCAAqC;QACrC,KAAK,IAAI,CAAC,WAAW,CAAC,QAAQ,EAAE,EAAE,EAAE,OAAO,CAAC,CAAC;QAE7C,2CAA2C;QAC3C,MAAM,IAAI,OAAO,CAAO,CAAC,OAAO,EAAE,EAAE;YAClC,EAAE,CAAC,IAAI,CAAC,OAAO,EAAE,OAAO,CAAC,CAAC;YAC1B,MAAM,KAAK,GAAG,WAAW,CAAC,GAAG,EAAE;gBAC7B,IAAI,IAAI,CAAC,OAAO,IAAI,CAAC,IAAI,CAAC,QAAQ,CAAC,GAAG,CAAC,QAAQ,CAAC,EAAE,CAAC;oBACjD,aAAa,CAAC,KAAK,CAAC,CAAC;oBACrB,OAAO,EAAE,CAAC;gBACZ,CAAC;YACH,CAAC,EAAE,GAAG,CAAC,CAAC;QACV,CAAC,CAAC,CAAC;IACL,CAAC;IAED,KAAK,CAAC,IAAI,CAAC,QAAiB;QAC1B,IAAI,QAAQ,EAAE,CAAC;YACb,MAAM,EAAE,GAAG,IAAI,CAAC,QAAQ,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC;YACvC,IAAI,EAAE,IAAI,EAAE,CAAC,UAAU,KAAK,SAAS,CAAC,IAAI,EAAE,CAAC;gBAC3C,EAAE,CAAC,KAAK,CAAC,IAAI,EAAE,eAAe,CAAC,CAAC;YAClC,CAAC;YACD,IAAI,CAAC,QAAQ,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC;QACjC,CAAC;aAAM,CAAC;YACN,oBAAoB;YACpB,IAAI,CAAC,OAAO,GAAG,IAAI,CAAC;YACpB,KAAK,MAAM,CAAC,EAAE,EAAE,EAAE,CAAC,IAAI,IAAI,CAAC,QAAQ,EAAE,CAAC;gBACrC,IAAI,EAAE,CAAC,UAAU,KAAK,SAAS,CAAC,IAAI,EAAE,CAAC;oBACrC,EAAE,CAAC,KAAK,CAAC,IAAI,EAAE,iBAAiB,CAAC,CAAC;gBACpC,CAAC;gBACD,IAAI,CAAC,QAAQ,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC;YAC3B,CAAC;QACH,CAAC;IACH,CAAC;IAED,2EAA2E;IAEnE,KAAK,CAAC,eAAe,CAAC,QAAgB;QAC5C,MAAM,GAAG,GAAG,GAAG,gBAAgB,UAAU,kBAAkB,CAAC,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,EAAE,CAAC;QAE/E,MAAM,EAAE,GAAG,IAAI,SAAS,CAAC,GAAG,EAAE;YAC5B,OAAO,EAAE;gBACP,aAAa,EAAE,UAAU,IAAI,CAAC,IAAI,CAAC,MAAM,EAAE;gBAC3C,aAAa,EAAE,aAAa;aAC7B;SACF,CAAC,CAAC;QAEH,EAAE,CAAC,EAAE,CAAC,SAAS,EAAE,CAAC,IAAuB,EAAE,EAAE;YAC3C,IAAI,CAAC;gBACH,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,QAAQ,EAAE,CAAwB,CAAC;gBACjE,IAAI,CAAC,iBAAiB,CAAC,KAAK,EAAE,QAAQ,CAAC,CAAC;YAC1C,CAAC;YAAC,MAAM,CAAC;gBACP,wBAAwB;YAC1B,CAAC;QACH,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,EAAE,CAAC,OAAO,EAAE,CAAC,GAAG,EAAE,EAAE;YACrB,IAAI,CAAC,YAAY,EAAE,CAAC,GAAG,EAAE,QAAQ,CAAC,CAAC;QACrC,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,EAAE,CAAC,OAAO,EAAE,GAAG,EAAE;YAClB,IAAI,CAAC,QAAQ,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC;QACjC,CAAC,CAAC,CAAC;QAEH,sBAAsB;QACtB,MAAM,IAAI,OAAO,CAAO,CAAC,OAAO,EAAE,MAAM,EAAE,EAAE;YAC1C,EAAE,CAAC,IAAI,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;YACzB,EAAE,CAAC,IAAI,CAAC,OAAO,EAAE,MAAM,CAAC,CAAC;QAC3B,CAAC,CAAC,CAAC;QAEH,OAAO,EAAE,CAAC;IACZ,CAAC;IAEO,KAAK,CAAC,WAAW,CACvB,QAAgB,EAChB,EAAa,EACb,OAAkC;QAElC,IAAI,KAAK,EAAE,MAAM,KAAK,IAAI,OAAO,EAAE,CAAC;YAClC,IAAI,IAAI,CAAC,OAAO,IAAI,EAAE,CAAC,UAAU,KAAK,SAAS,CAAC,IAAI;gBAAE,MAAM;YAE5D,6DAA6D;YAC7D,MAAM,UAAU,GAAG,QAAQ,CAAC,KAAK,CAAC,OAAO,EAAE,cAAc,EAAE,kBAAkB,CAAC,CAAC;YAE/E,kCAAkC;YAClC,IAAI,MAAM,GAAG,CAAC,CAAC;YACf,OAAO,MAAM,GAAG,UAAU,CAAC,MAAM,EAAE,CAAC;gBAClC,MAAM,GAAG,GAAG,IAAI,CAAC,GAAG,CAAC,MAAM,GAAG,iBAAiB,EAAE,UAAU,CAAC,MAAM,CAAC,CAAC;gBACpE,MAAM,KAAK,GAAG,UAAU,CAAC,QAAQ,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;gBAC/C,MAAM,GAAG,GAAG,CAAC;gBAEb,MAAM,KAAK,GAAG,eAAe,CAAC,KAAK,CAAC,CAAC;gBACrC,MAAM,GAAG,GAAG,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,MAAM,EAAE,KAAK,CAAC,UAAU,EAAE,KAAK,CAAC,UAAU,CAAC,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAC;gBAE7F,IAAI,CAAC,SAAS,CAAC,EAAE,EAAE;oBACjB,IAAI,EAAG,2BAA2B;oBAClC,KAAK,EAAE,GAAG;iBACX,CAAC,CAAC;YACL,CAAC;QACH,CAAC;QAED,oDAAoD;QACpD,IAAI,EAAE,CAAC,UAAU,KAAK,SAAS,CAAC,IAAI,IAAI,IAAI,CAAC,IAAI,CAAC,aAAa,CAAC,IAAI,KAAK,MAAM,EAAE,CAAC;YAChF,IAAI,CAAC,SAAS,CAAC,EAAE,EAAE,EAAE,IAAI,EAAE,2BAA2B,EAAE,CAAC,CAAC;YAC1D,IAAI,CAAC,SAAS,CAAC,EAAE,EAAE,EAAE,IAAI,EAAE,iBAAiB,EAAE,CAAC,CAAC;QAClD,CAAC;IACH,CAAC;IAEO,iBAAiB,CAAC,KAA0B,EAAE,QAAgB;QACpE,QAAQ,KAAK,CAAC,IAAI,EAAE,CAAC;YACnB,sEAAsE;YACtE,KAAK,sBAAsB,CAAC,CAAC,CAAC;gBAC5B,IAAI,CAAC,KAAK,CAAC,KAAK;oBAAE,MAAM;gBAExB,+BAA+B;gBAC/B,MAAM,SAAS,GAAG,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,KAAK,EAAE,QAAQ,CAAC,CAAC;gBACrD,MAAM,UAAU,GAAG,eAAe,CAAC,SAAS,CAAC,CAAC;gBAE9C,yCAAyC;gBACzC,MAAM,UAAU,GAAG,QAAQ,CAAC,UAAU,EAAE,kBAAkB,EAAE,cAAc,CAAC,CAAC;gBAC5E,MAAM,MAAM,GAAG,eAAe,CAAC,UAAU,CAAC,CAAC;gBAE3C,IAAI,CAAC,kBAAkB,EAAE,CAAC,MAAM,CAAC,IAAI,CAAC,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,UAAU,EAAE,MAAM,CAAC,UAAU,CAAC,EAAE,QAAQ,CAAC,CAAC;gBACtG,MAAM;YACR,CAAC;YAED,qEAAqE;YACrE,KAAK,uDAAuD,CAAC,CAAC,CAAC;gBAC7D,IAAI,CAAC,KAAK,CAAC,UAAU;oBAAE,MAAM;gBAC7B,IAAI,CAAC,iBAAiB,EAAE,CAAC;oBACvB,QAAQ;oBACR,IAAI,EAAS,KAAK,CAAC,UAAU;oBAC7B,OAAO,EAAM,IAAI;oBACjB,OAAO,EAAM,UAAU;oBACvB,WAAW,EAAE,IAAI,CAAC,GAAG,EAAE;iBACxB,CAAC,CAAC;gBACH,MAAM;YACR,CAAC;YAED,qEAAqE;YACrE,KAAK,gCAAgC,CAAC,CAAC,CAAC;gBACtC,MAAM,UAAU,GAAI,KAAuD,CAAC,UAAU;uBACjF,CAAC,KAAK,CAAC,IAAI,EAAE,UAAU,IAAI,EAAE,CAAC,CAAC;gBACpC,IAAI,CAAC,UAAU;oBAAE,MAAM;gBACvB,IAAI,CAAC,iBAAiB,EAAE,CAAC;oBACvB,QAAQ;oBACR,IAAI,EAAS,UAAU;oBACvB,OAAO,EAAM,IAAI;oBACjB,OAAO,EAAM,OAAO;oBACpB,WAAW,EAAE,IAAI,CAAC,GAAG,EAAE;iBACxB,CAAC,CAAC;gBACH,MAAM;YACR,CAAC;YAED,sEAAsE;YACtE,KAAK,OAAO,CAAC,CAAC,CAAC;gBACb,MAAM,GAAG,GAAG,KAAK,CAAC,KAAK,EAAE,OAAO,IAAI,4BAA4B,CAAC;gBACjE,MAAM,IAAI,GAAG,KAAK,CAAC,KAAK,EAAE,IAAI,CAAC,CAAC,CAAC,KAAK,KAAK,CAAC,KAAK,CAAC,IAAI,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC;gBAC/D,IAAI,CAAC,YAAY,EAAE,CAAC,IAAI,KAAK,CAAC,kBAAkB,IAAI,KAAK,GAAG,EAAE,CAAC,EAAE,QAAQ,CAAC,CAAC;gBAC3E,MAAM;YACR,CAAC;YAED;gBACE,qEAAqE;gBACrE,MAAM;QACV,CAAC;IACH,CAAC;IAEO,kBAAkB;QACxB,MAAM,EAAE,GAAG,IAAI,CAAC,IAAI,CAAC,aAAa,CAAC;QACnC,IAAI,EAAE,CAAC,IAAI,KAAK,MAAM,EAAE,CAAC;YACvB,OAAO,EAAE,IAAI,EAAE,MAAM,EAAE,CAAC;QAC1B,CAAC;QACD,OAAO;YACL,IAAI,EAAiB,YAAY;YACjC,SAAS,EAAY,EAAE,CAAC,SAAS,IAAY,GAAG;YAChD,mBAAmB,EAAE,EAAE,CAAC,iBAAiB,IAAI,GAAG;YAChD,iBAAiB,EAAI,EAAE,CAAC,eAAe,IAAM,GAAG;YAChD,eAAe,EAAM,IAAI;SAC1B,CAAC;IACJ,CAAC;IAEO,SAAS,CAAC,EAAa,EAAE,KAA8B;QAC7D,IAAI,EAAE,CAAC,UAAU,KAAK,SAAS,CAAC,IAAI,EAAE,CAAC;YACrC,EAAE,CAAC,IAAI,CAAC,IAAI,CAAC,SAAS,CAAC,KAAK,CAAC,CAAC,CAAC;QACjC,CAAC;IACH,CAAC;CACF"}
|
|
@@ -0,0 +1,105 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Deepgram STT Adapter
|
|
3
|
+
*
|
|
4
|
+
* Streams 16kHz slin16 PCM audio to Deepgram's live transcription API
|
|
5
|
+
* and fires onTranscript callbacks for both partial and final results.
|
|
6
|
+
*
|
|
7
|
+
* Features:
|
|
8
|
+
* - Nova-3 model family (2026 default — best Korean + English accuracy)
|
|
9
|
+
* - Speaker diarization support
|
|
10
|
+
* - Utterance end detection (endpointing)
|
|
11
|
+
* - Automatic reconnection on WS failure
|
|
12
|
+
* - Keyterm prompting for domain-specific vocabulary
|
|
13
|
+
* - Sentiment analysis (positive/neutral/negative per segment)
|
|
14
|
+
*
|
|
15
|
+
* Deepgram live API format:
|
|
16
|
+
* Input: raw 16-bit PCM, 16kHz, mono (matches DVGateway slin16)
|
|
17
|
+
* Output: JSON transcript events
|
|
18
|
+
*
|
|
19
|
+
* Deepgram Model Reference (2026-03):
|
|
20
|
+
* nova-3 — Best general accuracy, multilingual (default)
|
|
21
|
+
* nova-3-general — Optimized for general conversational speech
|
|
22
|
+
* nova-3-medical — Medical terminology specialization
|
|
23
|
+
* nova-3-phonecall — Telephone call quality audio (8–16kHz)
|
|
24
|
+
* enhanced — Enhanced tier for complex audio environments
|
|
25
|
+
* base — Cost-effective baseline model
|
|
26
|
+
*
|
|
27
|
+
* API Endpoint: wss://api.deepgram.com/v1/listen
|
|
28
|
+
* Docs: https://developers.deepgram.com/docs/getting-started-with-live-streaming-audio
|
|
29
|
+
*/
|
|
30
|
+
import type { SttAdapter, AudioChunk, TranscriptResult } from 'dvgateway-sdk';
|
|
31
|
+
export interface DeepgramAdapterOptions {
|
|
32
|
+
apiKey: string;
|
|
33
|
+
/** Language code, e.g. "ko", "en-US", "multi" (default: "ko") */
|
|
34
|
+
language?: string;
|
|
35
|
+
/**
|
|
36
|
+
* Deepgram model (default: "nova-3")
|
|
37
|
+
* Options:
|
|
38
|
+
* nova-3 — Best general accuracy, multilingual (default)
|
|
39
|
+
* nova-3-general — Optimized for conversational speech
|
|
40
|
+
* nova-3-medical — Medical terminology
|
|
41
|
+
* nova-3-phonecall — Telephone audio optimization
|
|
42
|
+
* enhanced — Enhanced tier
|
|
43
|
+
* base — Cost-effective baseline
|
|
44
|
+
*/
|
|
45
|
+
model?: string;
|
|
46
|
+
/** Enable speaker diarization — identifies who is speaking (default: false) */
|
|
47
|
+
diarize?: boolean;
|
|
48
|
+
/**
|
|
49
|
+
* Enable server-side VAD events from Deepgram (default: true).
|
|
50
|
+
* Sends vad_events=true to the API, enabling speech_final detection.
|
|
51
|
+
*/
|
|
52
|
+
vadEnabled?: boolean;
|
|
53
|
+
/**
|
|
54
|
+
* Endpointing — silence duration (ms) to trigger utterance end (default: 300).
|
|
55
|
+
* Controls API-side endpointing. Lower values = faster turn detection.
|
|
56
|
+
* Optimized for clean telephony/snoop media where background noise is minimal.
|
|
57
|
+
* Recommended: 200–500ms for clean media, 500–1000ms for noisy environments.
|
|
58
|
+
*/
|
|
59
|
+
endpointingMs?: number;
|
|
60
|
+
/**
|
|
61
|
+
* Utterance end ms — time (ms) of silence to mark utterance as final (default: 800).
|
|
62
|
+
* Client-side silence threshold, separate from endpointingMs (API-side VAD).
|
|
63
|
+
* Optimized for Korean telephony streaming: 800ms balances fast turn detection
|
|
64
|
+
* with avoiding premature cuts on Korean sentence-final particles.
|
|
65
|
+
*/
|
|
66
|
+
utteranceEndMs?: number;
|
|
67
|
+
/** Interim results — emit partial transcripts (default: true) */
|
|
68
|
+
interimResults?: boolean;
|
|
69
|
+
/** Smart formatting — Korean punctuation, numbers, dates, etc. (default: true) */
|
|
70
|
+
smartFormat?: boolean;
|
|
71
|
+
/**
|
|
72
|
+
* Keyterm prompting — boost accuracy for domain-specific words/phrases.
|
|
73
|
+
* E.g.: ["AI", "DVGateway", "Anthropic"] (nova-3 models only)
|
|
74
|
+
*/
|
|
75
|
+
keywords?: string[];
|
|
76
|
+
/**
|
|
77
|
+
* Punctuation — add punctuation to transcripts (default: true)
|
|
78
|
+
* Note: smartFormat implicitly enables punctuation.
|
|
79
|
+
*/
|
|
80
|
+
punctuate?: boolean;
|
|
81
|
+
/**
|
|
82
|
+
* Profanity filter — redact profane words (default: false)
|
|
83
|
+
*/
|
|
84
|
+
profanityFilter?: boolean;
|
|
85
|
+
/**
|
|
86
|
+
* Sentiment analysis — classify each transcript segment as positive/neutral/negative
|
|
87
|
+
* with a confidence score. Requires Nova-3 or later. (default: false)
|
|
88
|
+
* Docs: https://developers.deepgram.com/docs/sentiment-analysis
|
|
89
|
+
*/
|
|
90
|
+
sentiment?: boolean;
|
|
91
|
+
}
|
|
92
|
+
export declare class DeepgramAdapter implements SttAdapter {
|
|
93
|
+
private readonly opts;
|
|
94
|
+
private transcriptHandler;
|
|
95
|
+
private ws;
|
|
96
|
+
private stopped;
|
|
97
|
+
constructor(opts: DeepgramAdapterOptions);
|
|
98
|
+
onTranscript(handler: (result: TranscriptResult) => void): void;
|
|
99
|
+
startStream(linkedId: string, audioStream: AsyncIterable<AudioChunk>): Promise<void>;
|
|
100
|
+
stop(): Promise<void>;
|
|
101
|
+
private connectDeepgram;
|
|
102
|
+
private pipeAudio;
|
|
103
|
+
private handleTranscript;
|
|
104
|
+
}
|
|
105
|
+
//# sourceMappingURL=deepgram.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"deepgram.d.ts","sourceRoot":"","sources":["../../src/stt/deepgram.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA4BG;AAGH,OAAO,KAAK,EAAE,UAAU,EAAE,UAAU,EAAE,gBAAgB,EAAmB,MAAM,eAAe,CAAC;AAG/F,MAAM,WAAW,sBAAsB;IACrC,MAAM,EAAE,MAAM,CAAC;IACf,iEAAiE;IACjE,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB;;;;;;;;;OASG;IACH,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,+EAA+E;IAC/E,OAAO,CAAC,EAAE,OAAO,CAAC;IAMlB;;;OAGG;IACH,UAAU,CAAC,EAAE,OAAO,CAAC;IACrB;;;;;OAKG;IACH,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB;;;;;OAKG;IACH,cAAc,CAAC,EAAE,MAAM,CAAC;IAKxB,iEAAiE;IACjE,cAAc,CAAC,EAAE,OAAO,CAAC;IACzB,kFAAkF;IAClF,WAAW,CAAC,EAAE,OAAO,CAAC;IACtB;;;OAGG;IACH,QAAQ,CAAC,EAAE,MAAM,EAAE,CAAC;IACpB;;;OAGG;IACH,SAAS,CAAC,EAAE,OAAO,CAAC;IACpB;;OAEG;IACH,eAAe,CAAC,EAAE,OAAO,CAAC;IAC1B;;;;OAIG;IACH,SAAS,CAAC,EAAE,OAAO,CAAC;CACrB;AAmCD,qBAAa,eAAgB,YAAW,UAAU;IAChD,OAAO,CAAC,QAAQ,CAAC,IAAI,CAAmC;IACxD,OAAO,CAAC,iBAAiB,CAAqD;IAC9E,OAAO,CAAC,EAAE,CAA0B;IACpC,OAAO,CAAC,OAAO,CAAS;gBAEZ,IAAI,EAAE,sBAAsB;IAoBxC,YAAY,CAAC,OAAO,EAAE,CAAC,MAAM,EAAE,gBAAgB,KAAK,IAAI,GAAG,IAAI;IAIzD,WAAW,CAAC,QAAQ,EAAE,MAAM,EAAE,WAAW,EAAE,aAAa,CAAC,UAAU,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC;IAsBpF,IAAI,IAAI,OAAO,CAAC,IAAI,CAAC;YAWb,eAAe;YAoDf,SAAS;IAUvB,OAAO,CAAC,gBAAgB;CAyCzB"}
|
|
@@ -0,0 +1,180 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Deepgram STT Adapter
|
|
3
|
+
*
|
|
4
|
+
* Streams 16kHz slin16 PCM audio to Deepgram's live transcription API
|
|
5
|
+
* and fires onTranscript callbacks for both partial and final results.
|
|
6
|
+
*
|
|
7
|
+
* Features:
|
|
8
|
+
* - Nova-3 model family (2026 default — best Korean + English accuracy)
|
|
9
|
+
* - Speaker diarization support
|
|
10
|
+
* - Utterance end detection (endpointing)
|
|
11
|
+
* - Automatic reconnection on WS failure
|
|
12
|
+
* - Keyterm prompting for domain-specific vocabulary
|
|
13
|
+
* - Sentiment analysis (positive/neutral/negative per segment)
|
|
14
|
+
*
|
|
15
|
+
* Deepgram live API format:
|
|
16
|
+
* Input: raw 16-bit PCM, 16kHz, mono (matches DVGateway slin16)
|
|
17
|
+
* Output: JSON transcript events
|
|
18
|
+
*
|
|
19
|
+
* Deepgram Model Reference (2026-03):
|
|
20
|
+
* nova-3 — Best general accuracy, multilingual (default)
|
|
21
|
+
* nova-3-general — Optimized for general conversational speech
|
|
22
|
+
* nova-3-medical — Medical terminology specialization
|
|
23
|
+
* nova-3-phonecall — Telephone call quality audio (8–16kHz)
|
|
24
|
+
* enhanced — Enhanced tier for complex audio environments
|
|
25
|
+
* base — Cost-effective baseline model
|
|
26
|
+
*
|
|
27
|
+
* API Endpoint: wss://api.deepgram.com/v1/listen
|
|
28
|
+
* Docs: https://developers.deepgram.com/docs/getting-started-with-live-streaming-audio
|
|
29
|
+
*/
|
|
30
|
+
import WebSocket from 'ws';
|
|
31
|
+
import { float32ToSlin16 } from 'dvgateway-sdk';
|
|
32
|
+
export class DeepgramAdapter {
|
|
33
|
+
opts;
|
|
34
|
+
transcriptHandler = null;
|
|
35
|
+
ws = null;
|
|
36
|
+
stopped = false;
|
|
37
|
+
constructor(opts) {
|
|
38
|
+
this.opts = {
|
|
39
|
+
language: opts.language ?? 'ko',
|
|
40
|
+
model: opts.model ?? 'nova-3',
|
|
41
|
+
diarize: opts.diarize ?? false,
|
|
42
|
+
// VAD: optimized for clean dedicated media (snoop/mono/16kHz)
|
|
43
|
+
vadEnabled: opts.vadEnabled ?? true,
|
|
44
|
+
endpointingMs: opts.endpointingMs ?? 300, // aggressive for clean media (was 500)
|
|
45
|
+
utteranceEndMs: opts.utteranceEndMs ?? 800, // Korean-tuned (was 1000)
|
|
46
|
+
// Korean streaming optimization
|
|
47
|
+
interimResults: opts.interimResults ?? true,
|
|
48
|
+
smartFormat: opts.smartFormat ?? true,
|
|
49
|
+
keywords: opts.keywords ?? [],
|
|
50
|
+
punctuate: opts.punctuate ?? true,
|
|
51
|
+
profanityFilter: opts.profanityFilter ?? false,
|
|
52
|
+
sentiment: opts.sentiment ?? false,
|
|
53
|
+
apiKey: opts.apiKey,
|
|
54
|
+
};
|
|
55
|
+
}
|
|
56
|
+
onTranscript(handler) {
|
|
57
|
+
this.transcriptHandler = handler;
|
|
58
|
+
}
|
|
59
|
+
async startStream(linkedId, audioStream) {
|
|
60
|
+
this.stopped = false;
|
|
61
|
+
this.ws = await this.connectDeepgram();
|
|
62
|
+
// Pipe audio frames to Deepgram
|
|
63
|
+
void this.pipeAudio(linkedId, audioStream);
|
|
64
|
+
// Keep the promise alive until stream ends or stop() is called
|
|
65
|
+
await new Promise((resolve) => {
|
|
66
|
+
const onClose = () => resolve();
|
|
67
|
+
this.ws?.once('close', onClose);
|
|
68
|
+
const checkStopped = setInterval(() => {
|
|
69
|
+
if (this.stopped) {
|
|
70
|
+
clearInterval(checkStopped);
|
|
71
|
+
this.ws?.close(1000, 'stream ended');
|
|
72
|
+
resolve();
|
|
73
|
+
}
|
|
74
|
+
}, 100);
|
|
75
|
+
});
|
|
76
|
+
}
|
|
77
|
+
async stop() {
|
|
78
|
+
this.stopped = true;
|
|
79
|
+
if (this.ws && this.ws.readyState === WebSocket.OPEN) {
|
|
80
|
+
// Send CloseStream message to flush pending audio
|
|
81
|
+
this.ws.send(JSON.stringify({ type: 'CloseStream' }));
|
|
82
|
+
await new Promise((resolve) => setTimeout(resolve, 200));
|
|
83
|
+
this.ws.close(1000, 'stop');
|
|
84
|
+
}
|
|
85
|
+
this.ws = null;
|
|
86
|
+
}
|
|
87
|
+
async connectDeepgram() {
|
|
88
|
+
const params = new URLSearchParams({
|
|
89
|
+
model: this.opts.model,
|
|
90
|
+
language: this.opts.language,
|
|
91
|
+
encoding: 'linear16',
|
|
92
|
+
sample_rate: '16000',
|
|
93
|
+
channels: '1',
|
|
94
|
+
interim_results: String(this.opts.interimResults),
|
|
95
|
+
smart_format: String(this.opts.smartFormat),
|
|
96
|
+
endpointing: String(this.opts.endpointingMs),
|
|
97
|
+
punctuate: String(this.opts.punctuate),
|
|
98
|
+
utterance_end_ms: String(this.opts.utteranceEndMs),
|
|
99
|
+
vad_events: String(this.opts.vadEnabled),
|
|
100
|
+
...(this.opts.diarize && { diarize: 'true' }),
|
|
101
|
+
...(this.opts.profanityFilter && { profanity_filter: 'true' }),
|
|
102
|
+
...(this.opts.sentiment && { sentiment: 'true' }),
|
|
103
|
+
});
|
|
104
|
+
// Keyterm prompting (nova-3 only) — append multiple keyterm params
|
|
105
|
+
for (const kw of this.opts.keywords) {
|
|
106
|
+
params.append('keyterm', kw);
|
|
107
|
+
}
|
|
108
|
+
const url = `wss://api.deepgram.com/v1/listen?${params.toString()}`;
|
|
109
|
+
const ws = new WebSocket(url, {
|
|
110
|
+
headers: { Authorization: `Token ${this.opts.apiKey}` },
|
|
111
|
+
});
|
|
112
|
+
ws.on('message', (data) => {
|
|
113
|
+
try {
|
|
114
|
+
const msg = JSON.parse(data.toString());
|
|
115
|
+
this.handleTranscript(msg);
|
|
116
|
+
}
|
|
117
|
+
catch {
|
|
118
|
+
// Ignore malformed JSON
|
|
119
|
+
}
|
|
120
|
+
});
|
|
121
|
+
ws.on('error', (err) => {
|
|
122
|
+
// Production: replace with shared logger
|
|
123
|
+
process.stderr.write(`[DeepgramAdapter] WebSocket error: ${err.message}\n`);
|
|
124
|
+
});
|
|
125
|
+
// Wait for connection
|
|
126
|
+
await new Promise((resolve, reject) => {
|
|
127
|
+
ws.once('open', resolve);
|
|
128
|
+
ws.once('error', reject);
|
|
129
|
+
});
|
|
130
|
+
return ws;
|
|
131
|
+
}
|
|
132
|
+
async pipeAudio(linkedId, audioStream) {
|
|
133
|
+
for await (const chunk of audioStream) {
|
|
134
|
+
if (this.stopped || !this.ws || this.ws.readyState !== WebSocket.OPEN)
|
|
135
|
+
break;
|
|
136
|
+
// DVGateway provides Float32; convert back to slin16 for Deepgram
|
|
137
|
+
const pcm = float32ToSlin16(chunk.samples);
|
|
138
|
+
this.ws.send(pcm);
|
|
139
|
+
}
|
|
140
|
+
}
|
|
141
|
+
handleTranscript(msg) {
|
|
142
|
+
if (msg.type !== 'Results')
|
|
143
|
+
return;
|
|
144
|
+
const alt = msg.channel?.alternatives?.[0];
|
|
145
|
+
if (!alt || !alt.transcript)
|
|
146
|
+
return;
|
|
147
|
+
const isFinal = msg.speech_final === true || msg.is_final === true;
|
|
148
|
+
// Extract sentiment if available (when sentiment=true was requested)
|
|
149
|
+
let sentimentResult;
|
|
150
|
+
if (this.opts.sentiment && msg.sentiments) {
|
|
151
|
+
const seg = msg.sentiments.segments?.[0] ?? msg.sentiments.average;
|
|
152
|
+
if (seg) {
|
|
153
|
+
sentimentResult = {
|
|
154
|
+
sentiment: seg.sentiment,
|
|
155
|
+
sentimentScore: seg.sentiment_score,
|
|
156
|
+
};
|
|
157
|
+
}
|
|
158
|
+
}
|
|
159
|
+
const result = {
|
|
160
|
+
linkedId: '', // Set by caller context
|
|
161
|
+
text: alt.transcript,
|
|
162
|
+
isFinal,
|
|
163
|
+
confidence: alt.confidence,
|
|
164
|
+
language: this.opts.language,
|
|
165
|
+
timestampMs: Date.now(),
|
|
166
|
+
words: alt.words?.map((w) => ({
|
|
167
|
+
word: w.punctuated_word ?? w.word,
|
|
168
|
+
startMs: Math.round(w.start * 1000),
|
|
169
|
+
endMs: Math.round(w.end * 1000),
|
|
170
|
+
confidence: w.confidence,
|
|
171
|
+
})),
|
|
172
|
+
...(this.opts.diarize && alt.words?.[0]?.speaker !== undefined
|
|
173
|
+
? { speaker: `speaker_${alt.words[0].speaker}` }
|
|
174
|
+
: {}),
|
|
175
|
+
...(sentimentResult ? { sentiment: sentimentResult } : {}),
|
|
176
|
+
};
|
|
177
|
+
this.transcriptHandler?.(result);
|
|
178
|
+
}
|
|
179
|
+
}
|
|
180
|
+
//# sourceMappingURL=deepgram.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"deepgram.js","sourceRoot":"","sources":["../../src/stt/deepgram.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA4BG;AAEH,OAAO,SAAS,MAAM,IAAI,CAAC;AAE3B,OAAO,EAAE,eAAe,EAAE,MAAM,eAAe,CAAC;AA0GhD,MAAM,OAAO,eAAe;IACT,IAAI,CAAmC;IAChD,iBAAiB,GAAgD,IAAI,CAAC;IACtE,EAAE,GAAqB,IAAI,CAAC;IAC5B,OAAO,GAAG,KAAK,CAAC;IAExB,YAAY,IAA4B;QACtC,IAAI,CAAC,IAAI,GAAG;YACV,QAAQ,EAAS,IAAI,CAAC,QAAQ,IAAW,IAAI;YAC7C,KAAK,EAAY,IAAI,CAAC,KAAK,IAAc,QAAQ;YACjD,OAAO,EAAU,IAAI,CAAC,OAAO,IAAY,KAAK;YAC9C,8DAA8D;YAC9D,UAAU,EAAO,IAAI,CAAC,UAAU,IAAS,IAAI;YAC7C,aAAa,EAAI,IAAI,CAAC,aAAa,IAAM,GAAG,EAAI,uCAAuC;YACvF,cAAc,EAAG,IAAI,CAAC,cAAc,IAAK,GAAG,EAAI,0BAA0B;YAC1E,gCAAgC;YAChC,cAAc,EAAG,IAAI,CAAC,cAAc,IAAK,IAAI;YAC7C,WAAW,EAAM,IAAI,CAAC,WAAW,IAAQ,IAAI;YAC7C,QAAQ,EAAS,IAAI,CAAC,QAAQ,IAAW,EAAE;YAC3C,SAAS,EAAQ,IAAI,CAAC,SAAS,IAAU,IAAI;YAC7C,eAAe,EAAE,IAAI,CAAC,eAAe,IAAI,KAAK;YAC9C,SAAS,EAAQ,IAAI,CAAC,SAAS,IAAU,KAAK;YAC9C,MAAM,EAAW,IAAI,CAAC,MAAM;SAC7B,CAAC;IACJ,CAAC;IAED,YAAY,CAAC,OAA2C;QACtD,IAAI,CAAC,iBAAiB,GAAG,OAAO,CAAC;IACnC,CAAC;IAED,KAAK,CAAC,WAAW,CAAC,QAAgB,EAAE,WAAsC;QACxE,IAAI,CAAC,OAAO,GAAG,KAAK,CAAC;QACrB,IAAI,CAAC,EAAE,GAAG,MAAM,IAAI,CAAC,eAAe,EAAE,CAAC;QAEvC,gCAAgC;QAChC,KAAK,IAAI,CAAC,SAAS,CAAC,QAAQ,EAAE,WAAW,CAAC,CAAC;QAE3C,+DAA+D;QAC/D,MAAM,IAAI,OAAO,CAAO,CAAC,OAAO,EAAE,EAAE;YAClC,MAAM,OAAO,GAAG,GAAG,EAAE,CAAC,OAAO,EAAE,CAAC;YAChC,IAAI,CAAC,EAAE,EAAE,IAAI,CAAC,OAAO,EAAE,OAAO,CAAC,CAAC;YAEhC,MAAM,YAAY,GAAG,WAAW,CAAC,GAAG,EAAE;gBACpC,IAAI,IAAI,CAAC,OAAO,EAAE,CAAC;oBACjB,aAAa,CAAC,YAAY,CAAC,CAAC;oBAC5B,IAAI,CAAC,EAAE,EAAE,KAAK,CAAC,IAAI,EAAE,cAAc,CAAC,CAAC;oBACrC,OAAO,EAAE,CAAC;gBACZ,CAAC;YACH,CAAC,EAAE,GAAG,CAAC,CAAC;QACV,CAAC,CAAC,CAAC;IACL,CAAC;IAED,KAAK,CAAC,IAAI;QACR,IAAI,CAAC,OAAO,GAAG,IAAI,CAAC;QACpB,IAAI,IAAI,CAAC,EAAE,IAAI,IAAI,CAAC,EAAE,CAAC,UAAU,KAAK,SAAS,CAAC,IAAI,EAAE,CAAC;YACrD,kDAAkD;YAClD,IAAI,CAAC,EAAE,CAAC,IAAI,CAAC,IAAI,CAAC,SAAS,CAAC,EAAE,IAAI,EAAE,aAAa,EAAE,CAAC,CAAC,CAAC;YACtD,MAAM,IAAI,OAAO,CAAO,CAAC,OAAO,EAAE,EAAE,CAAC,UAAU,CAAC,OAAO,EAAE,GAAG,CAAC,CAAC,CAAC;YAC/D,IAAI,CAAC,EAAE,CAAC,KAAK,CAAC,IAAI,EAAE,MAAM,CAAC,CAAC;QAC9B,CAAC;QACD,IAAI,CAAC,EAAE,GAAG,IAAI,CAAC;IACjB,CAAC;IAEO,KAAK,CAAC,eAAe;QAC3B,MAAM,MAAM,GAAG,IAAI,eAAe,CAAC;YACjC,KAAK,EAAY,IAAI,CAAC,IAAI,CAAC,KAAK;YAChC,QAAQ,EAAS,IAAI,CAAC,IAAI,CAAC,QAAQ;YACnC,QAAQ,EAAS,UAAU;YAC3B,WAAW,EAAM,OAAO;YACxB,QAAQ,EAAS,GAAG;YACpB,eAAe,EAAE,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,cAAc,CAAC;YACjD,YAAY,EAAK,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,WAAW,CAAC;YAC9C,WAAW,EAAM,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,aAAa,CAAC;YAChD,SAAS,EAAQ,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,SAAS,CAAC;YAC5C,gBAAgB,EAAE,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,cAAc,CAAC;YAClD,UAAU,EAAO,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,UAAU,CAAC;YAC7C,GAAG,CAAC,IAAI,CAAC,IAAI,CAAC,OAAO,IAAI,EAAE,OAAO,EAAE,MAAM,EAAE,CAAC;YAC7C,GAAG,CAAC,IAAI,CAAC,IAAI,CAAC,eAAe,IAAI,EAAE,gBAAgB,EAAE,MAAM,EAAE,CAAC;YAC9D,GAAG,CAAC,IAAI,CAAC,IAAI,CAAC,SAAS,IAAI,EAAE,SAAS,EAAE,MAAM,EAAE,CAAC;SAClD,CAAC,CAAC;QAEH,mEAAmE;QACnE,KAAK,MAAM,EAAE,IAAI,IAAI,CAAC,IAAI,CAAC,QAAQ,EAAE,CAAC;YACpC,MAAM,CAAC,MAAM,CAAC,SAAS,EAAE,EAAE,CAAC,CAAC;QAC/B,CAAC;QAED,MAAM,GAAG,GAAG,oCAAoC,MAAM,CAAC,QAAQ,EAAE,EAAE,CAAC;QAEpE,MAAM,EAAE,GAAG,IAAI,SAAS,CAAC,GAAG,EAAE;YAC5B,OAAO,EAAE,EAAE,aAAa,EAAE,SAAS,IAAI,CAAC,IAAI,CAAC,MAAM,EAAE,EAAE;SACxD,CAAC,CAAC;QAEH,EAAE,CAAC,EAAE,CAAC,SAAS,EAAE,CAAC,IAAuB,EAAE,EAAE;YAC3C,IAAI,CAAC;gBACH,MAAM,GAAG,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,QAAQ,EAAE,CAAuB,CAAC;gBAC9D,IAAI,CAAC,gBAAgB,CAAC,GAAG,CAAC,CAAC;YAC7B,CAAC;YAAC,MAAM,CAAC;gBACP,wBAAwB;YAC1B,CAAC;QACH,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,EAAE,CAAC,OAAO,EAAE,CAAC,GAAG,EAAE,EAAE;YACrB,yCAAyC;YACzC,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,sCAAsC,GAAG,CAAC,OAAO,IAAI,CAAC,CAAC;QAC9E,CAAC,CAAC,CAAC;QAEH,sBAAsB;QACtB,MAAM,IAAI,OAAO,CAAO,CAAC,OAAO,EAAE,MAAM,EAAE,EAAE;YAC1C,EAAE,CAAC,IAAI,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;YACzB,EAAE,CAAC,IAAI,CAAC,OAAO,EAAE,MAAM,CAAC,CAAC;QAC3B,CAAC,CAAC,CAAC;QAEH,OAAO,EAAE,CAAC;IACZ,CAAC;IAEO,KAAK,CAAC,SAAS,CAAC,QAAgB,EAAE,WAAsC;QAC9E,IAAI,KAAK,EAAE,MAAM,KAAK,IAAI,WAAW,EAAE,CAAC;YACtC,IAAI,IAAI,CAAC,OAAO,IAAI,CAAC,IAAI,CAAC,EAAE,IAAI,IAAI,CAAC,EAAE,CAAC,UAAU,KAAK,SAAS,CAAC,IAAI;gBAAE,MAAM;YAE7E,kEAAkE;YAClE,MAAM,GAAG,GAAG,eAAe,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC;YAC3C,IAAI,CAAC,EAAE,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;QACpB,CAAC;IACH,CAAC;IAEO,gBAAgB,CAAC,GAAuB;QAC9C,IAAI,GAAG,CAAC,IAAI,KAAK,SAAS;YAAE,OAAO;QAEnC,MAAM,GAAG,GAAG,GAAG,CAAC,OAAO,EAAE,YAAY,EAAE,CAAC,CAAC,CAAC,CAAC;QAC3C,IAAI,CAAC,GAAG,IAAI,CAAC,GAAG,CAAC,UAAU;YAAE,OAAO;QAEpC,MAAM,OAAO,GAAG,GAAG,CAAC,YAAY,KAAK,IAAI,IAAI,GAAG,CAAC,QAAQ,KAAK,IAAI,CAAC;QAEnE,qEAAqE;QACrE,IAAI,eAA4C,CAAC;QACjD,IAAI,IAAI,CAAC,IAAI,CAAC,SAAS,IAAI,GAAG,CAAC,UAAU,EAAE,CAAC;YAC1C,MAAM,GAAG,GAAG,GAAG,CAAC,UAAU,CAAC,QAAQ,EAAE,CAAC,CAAC,CAAC,IAAI,GAAG,CAAC,UAAU,CAAC,OAAO,CAAC;YACnE,IAAI,GAAG,EAAE,CAAC;gBACR,eAAe,GAAG;oBAChB,SAAS,EAAE,GAAG,CAAC,SAAS;oBACxB,cAAc,EAAE,GAAG,CAAC,eAAe;iBACpC,CAAC;YACJ,CAAC;QACH,CAAC;QAED,MAAM,MAAM,GAAqB;YAC/B,QAAQ,EAAE,EAAE,EAAE,wBAAwB;YACtC,IAAI,EAAE,GAAG,CAAC,UAAU;YACpB,OAAO;YACP,UAAU,EAAE,GAAG,CAAC,UAAU;YAC1B,QAAQ,EAAE,IAAI,CAAC,IAAI,CAAC,QAAQ;YAC5B,WAAW,EAAE,IAAI,CAAC,GAAG,EAAE;YACvB,KAAK,EAAE,GAAG,CAAC,KAAK,EAAE,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;gBAC5B,IAAI,EAAE,CAAC,CAAC,eAAe,IAAI,CAAC,CAAC,IAAI;gBACjC,OAAO,EAAE,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,KAAK,GAAG,IAAI,CAAC;gBACnC,KAAK,EAAE,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,GAAG,GAAG,IAAI,CAAC;gBAC/B,UAAU,EAAE,CAAC,CAAC,UAAU;aACzB,CAAC,CAAC;YACH,GAAG,CAAC,IAAI,CAAC,IAAI,CAAC,OAAO,IAAI,GAAG,CAAC,KAAK,EAAE,CAAC,CAAC,CAAC,EAAE,OAAO,KAAK,SAAS;gBAC5D,CAAC,CAAC,EAAE,OAAO,EAAE,WAAW,GAAG,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,OAAO,EAAE,EAAE;gBAChD,CAAC,CAAC,EAAE,CAAC;YACP,GAAG,CAAC,eAAe,CAAC,CAAC,CAAC,EAAE,SAAS,EAAE,eAAe,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;SAC3D,CAAC;QAEF,IAAI,CAAC,iBAAiB,EAAE,CAAC,MAAM,CAAC,CAAC;IACnC,CAAC;CACF"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/stt/index.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,eAAe,EAAE,MAAM,eAAe,CAAC;AAChD,YAAY,EAAE,sBAAsB,EAAE,MAAM,eAAe,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/stt/index.ts"],"names":[],"mappings":"AAAA,gCAAgC;AAChC,OAAO,EAAE,eAAe,EAAE,MAAM,eAAe,CAAC"}
|