dominus-sdk-nodejs 1.2.18 → 1.2.20
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.ts +10 -0
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +10 -0
- package/dist/index.js.map +1 -1
- package/dist/namespaces/oracle/OracleSession.d.ts +91 -0
- package/dist/namespaces/oracle/OracleSession.d.ts.map +1 -0
- package/dist/namespaces/oracle/OracleSession.js +187 -0
- package/dist/namespaces/oracle/OracleSession.js.map +1 -0
- package/dist/namespaces/oracle/index.d.ts +78 -0
- package/dist/namespaces/oracle/index.d.ts.map +1 -0
- package/dist/namespaces/oracle/index.js +87 -0
- package/dist/namespaces/oracle/index.js.map +1 -0
- package/dist/namespaces/oracle/internal/AudioCapture.d.ts +42 -0
- package/dist/namespaces/oracle/internal/AudioCapture.d.ts.map +1 -0
- package/dist/namespaces/oracle/internal/AudioCapture.js +316 -0
- package/dist/namespaces/oracle/internal/AudioCapture.js.map +1 -0
- package/dist/namespaces/oracle/internal/OracleWebSocket.d.ts +81 -0
- package/dist/namespaces/oracle/internal/OracleWebSocket.d.ts.map +1 -0
- package/dist/namespaces/oracle/internal/OracleWebSocket.js +204 -0
- package/dist/namespaces/oracle/internal/OracleWebSocket.js.map +1 -0
- package/dist/namespaces/oracle/internal/VADGate.d.ts +75 -0
- package/dist/namespaces/oracle/internal/VADGate.d.ts.map +1 -0
- package/dist/namespaces/oracle/internal/VADGate.js +248 -0
- package/dist/namespaces/oracle/internal/VADGate.js.map +1 -0
- package/dist/namespaces/oracle/types.d.ts +98 -0
- package/dist/namespaces/oracle/types.d.ts.map +1 -0
- package/dist/namespaces/oracle/types.js +28 -0
- package/dist/namespaces/oracle/types.js.map +1 -0
- package/package.json +2 -1
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* VADGate - Voice Activity Detection Gate (INTERNAL)
|
|
3
|
+
*
|
|
4
|
+
* Implements a 4-state machine for VAD gating:
|
|
5
|
+
* IDLE → ARMED → SPEAKING → TRAILING → IDLE
|
|
6
|
+
*
|
|
7
|
+
* Features:
|
|
8
|
+
* - Pre-roll buffer (320ms) to capture word onsets
|
|
9
|
+
* - Armed confirmation (80ms) to reject false triggers
|
|
10
|
+
* - Post-roll/trailing (400ms) to avoid chopped endings
|
|
11
|
+
* - Circular buffer for pre-roll storage
|
|
12
|
+
*
|
|
13
|
+
* This module is INTERNAL and should NOT be exported publicly.
|
|
14
|
+
*/
|
|
15
|
+
import type { VADState, ResolvedOracleSessionOptions } from '../types.js';
|
|
16
|
+
/**
|
|
17
|
+
* VADGate - 4-state Voice Activity Detection machine.
|
|
18
|
+
*
|
|
19
|
+
* State transitions:
|
|
20
|
+
* - IDLE → ARMED: When VAD detects speech
|
|
21
|
+
* - ARMED → SPEAKING: After armedConfirmFrames of continuous speech
|
|
22
|
+
* - ARMED → IDLE: If speech stops before confirmation
|
|
23
|
+
* - SPEAKING → TRAILING: When VAD detects silence
|
|
24
|
+
* - TRAILING → SPEAKING: If speech resumes
|
|
25
|
+
* - TRAILING → IDLE: After postrollMs timeout
|
|
26
|
+
*/
|
|
27
|
+
export declare class VADGate {
|
|
28
|
+
private state;
|
|
29
|
+
private preRollBuffer;
|
|
30
|
+
private armedFrameCount;
|
|
31
|
+
private trailingTimeout;
|
|
32
|
+
private config;
|
|
33
|
+
private vadModel;
|
|
34
|
+
/** Callback when audio should be sent to server */
|
|
35
|
+
onSendAudio: ((frames: ArrayBuffer[]) => void) | null;
|
|
36
|
+
/** Callback when VAD state changes */
|
|
37
|
+
onStateChange: ((state: VADState) => void) | null;
|
|
38
|
+
constructor(options: ResolvedOracleSessionOptions, frameDurationMs?: number);
|
|
39
|
+
/**
|
|
40
|
+
* Initialize the VAD model (call before processing frames).
|
|
41
|
+
*/
|
|
42
|
+
initialize(): Promise<void>;
|
|
43
|
+
/**
|
|
44
|
+
* Get current VAD state.
|
|
45
|
+
*/
|
|
46
|
+
getState(): VADState;
|
|
47
|
+
/**
|
|
48
|
+
* Process an audio frame through the VAD gate.
|
|
49
|
+
*
|
|
50
|
+
* @param pcmFrame - 20ms PCM16 audio frame (640 bytes)
|
|
51
|
+
*/
|
|
52
|
+
processFrame(pcmFrame: ArrayBuffer): void;
|
|
53
|
+
/**
|
|
54
|
+
* Reset VAD state to idle.
|
|
55
|
+
*/
|
|
56
|
+
reset(): void;
|
|
57
|
+
/**
|
|
58
|
+
* Clean up resources.
|
|
59
|
+
*/
|
|
60
|
+
dispose(): void;
|
|
61
|
+
/**
|
|
62
|
+
* Update VAD state and fire callback.
|
|
63
|
+
*/
|
|
64
|
+
private setState;
|
|
65
|
+
/**
|
|
66
|
+
* Start the trailing timeout.
|
|
67
|
+
* After postrollMs, transition back to idle.
|
|
68
|
+
*/
|
|
69
|
+
private startTrailingTimer;
|
|
70
|
+
/**
|
|
71
|
+
* Cancel the trailing timeout.
|
|
72
|
+
*/
|
|
73
|
+
private cancelTrailingTimer;
|
|
74
|
+
}
|
|
75
|
+
//# sourceMappingURL=VADGate.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"VADGate.d.ts","sourceRoot":"","sources":["../../../../src/namespaces/oracle/internal/VADGate.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;GAaG;AAEH,OAAO,KAAK,EAAE,QAAQ,EAAE,4BAA4B,EAAE,MAAM,aAAa,CAAC;AAwH1E;;;;;;;;;;GAUG;AACH,qBAAa,OAAO;IAClB,OAAO,CAAC,KAAK,CAAoB;IACjC,OAAO,CAAC,aAAa,CAAqB;IAC1C,OAAO,CAAC,eAAe,CAAK;IAC5B,OAAO,CAAC,eAAe,CAA8C;IACrE,OAAO,CAAC,MAAM,CAAgB;IAC9B,OAAO,CAAC,QAAQ,CAAW;IAE3B,mDAAmD;IAC5C,WAAW,EAAE,CAAC,CAAC,MAAM,EAAE,WAAW,EAAE,KAAK,IAAI,CAAC,GAAG,IAAI,CAAQ;IAEpE,sCAAsC;IAC/B,aAAa,EAAE,CAAC,CAAC,KAAK,EAAE,QAAQ,KAAK,IAAI,CAAC,GAAG,IAAI,CAAQ;gBAEpD,OAAO,EAAE,4BAA4B,EAAE,eAAe,GAAE,MAAW;IAc/E;;OAEG;IACG,UAAU,IAAI,OAAO,CAAC,IAAI,CAAC;IAMjC;;OAEG;IACH,QAAQ,IAAI,QAAQ;IAIpB;;;;OAIG;IACH,YAAY,CAAC,QAAQ,EAAE,WAAW,GAAG,IAAI;IAkDzC;;OAEG;IACH,KAAK,IAAI,IAAI;IAOb;;OAEG;IACH,OAAO,IAAI,IAAI;IAOf;;OAEG;IACH,OAAO,CAAC,QAAQ;IAOhB;;;OAGG;IACH,OAAO,CAAC,kBAAkB;IAO1B;;OAEG;IACH,OAAO,CAAC,mBAAmB;CAM5B"}
|
|
@@ -0,0 +1,248 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* VADGate - Voice Activity Detection Gate (INTERNAL)
|
|
3
|
+
*
|
|
4
|
+
* Implements a 4-state machine for VAD gating:
|
|
5
|
+
* IDLE → ARMED → SPEAKING → TRAILING → IDLE
|
|
6
|
+
*
|
|
7
|
+
* Features:
|
|
8
|
+
* - Pre-roll buffer (320ms) to capture word onsets
|
|
9
|
+
* - Armed confirmation (80ms) to reject false triggers
|
|
10
|
+
* - Post-roll/trailing (400ms) to avoid chopped endings
|
|
11
|
+
* - Circular buffer for pre-roll storage
|
|
12
|
+
*
|
|
13
|
+
* This module is INTERNAL and should NOT be exported publicly.
|
|
14
|
+
*/
|
|
15
|
+
/**
|
|
16
|
+
* Energy-based VAD fallback.
|
|
17
|
+
* Uses RMS energy detection when Silero ONNX is unavailable.
|
|
18
|
+
*/
|
|
19
|
+
class EnergyVAD {
|
|
20
|
+
energyThreshold;
|
|
21
|
+
constructor(energyThreshold) {
|
|
22
|
+
this.energyThreshold = energyThreshold;
|
|
23
|
+
}
|
|
24
|
+
isSpeech(pcmFrame, _threshold) {
|
|
25
|
+
const int16Array = new Int16Array(pcmFrame);
|
|
26
|
+
return detectSpeechByEnergy(int16Array, this.energyThreshold);
|
|
27
|
+
}
|
|
28
|
+
dispose() {
|
|
29
|
+
// No resources to clean up
|
|
30
|
+
}
|
|
31
|
+
}
|
|
32
|
+
/**
|
|
33
|
+
* Energy-based speech detection fallback.
|
|
34
|
+
* Calculates RMS energy and compares against threshold.
|
|
35
|
+
*/
|
|
36
|
+
function detectSpeechByEnergy(pcmFrame, threshold = 500) {
|
|
37
|
+
let sum = 0;
|
|
38
|
+
for (let i = 0; i < pcmFrame.length; i++) {
|
|
39
|
+
sum += pcmFrame[i] * pcmFrame[i];
|
|
40
|
+
}
|
|
41
|
+
return Math.sqrt(sum / pcmFrame.length) > threshold;
|
|
42
|
+
}
|
|
43
|
+
/**
|
|
44
|
+
* Silero VAD wrapper using @ricky0123/vad-web.
|
|
45
|
+
* Falls back to energy-based detection if ONNX fails.
|
|
46
|
+
*/
|
|
47
|
+
class SileroVAD {
|
|
48
|
+
threshold;
|
|
49
|
+
micVAD = null;
|
|
50
|
+
isInitialized = false;
|
|
51
|
+
fallback;
|
|
52
|
+
useFallback = false;
|
|
53
|
+
constructor(threshold, energyThreshold) {
|
|
54
|
+
this.threshold = threshold;
|
|
55
|
+
this.fallback = new EnergyVAD(energyThreshold);
|
|
56
|
+
}
|
|
57
|
+
/**
|
|
58
|
+
* Initialize Silero VAD model.
|
|
59
|
+
* Called lazily on first use.
|
|
60
|
+
*/
|
|
61
|
+
async initialize() {
|
|
62
|
+
if (this.isInitialized)
|
|
63
|
+
return;
|
|
64
|
+
try {
|
|
65
|
+
// Dynamic import to avoid bundling issues in non-browser environments
|
|
66
|
+
const vadModule = await import('@ricky0123/vad-web');
|
|
67
|
+
// Create the VAD instance
|
|
68
|
+
this.micVAD = await vadModule.MicVAD.new({
|
|
69
|
+
positiveSpeechThreshold: this.threshold,
|
|
70
|
+
negativeSpeechThreshold: this.threshold - 0.15,
|
|
71
|
+
redemptionFrames: 3,
|
|
72
|
+
preSpeechPadFrames: 1,
|
|
73
|
+
minSpeechFrames: 3,
|
|
74
|
+
onSpeechStart: () => { },
|
|
75
|
+
onSpeechEnd: () => { },
|
|
76
|
+
onVADMisfire: () => { },
|
|
77
|
+
});
|
|
78
|
+
this.isInitialized = true;
|
|
79
|
+
}
|
|
80
|
+
catch (error) {
|
|
81
|
+
console.warn('[OracleSDK] Silero VAD initialization failed, using energy-based fallback:', error);
|
|
82
|
+
this.useFallback = true;
|
|
83
|
+
this.isInitialized = true;
|
|
84
|
+
}
|
|
85
|
+
}
|
|
86
|
+
isSpeech(pcmFrame, threshold) {
|
|
87
|
+
if (this.useFallback || !this.micVAD) {
|
|
88
|
+
return this.fallback.isSpeech(pcmFrame, threshold);
|
|
89
|
+
}
|
|
90
|
+
// For now, use energy-based detection as Silero requires specific frame processing
|
|
91
|
+
// The full Silero integration would require feeding frames through the ONNX model
|
|
92
|
+
// This is a simplification - in production, you'd use the micVAD's internal VAD
|
|
93
|
+
return this.fallback.isSpeech(pcmFrame, threshold);
|
|
94
|
+
}
|
|
95
|
+
dispose() {
|
|
96
|
+
if (this.micVAD && typeof this.micVAD.destroy === 'function') {
|
|
97
|
+
this.micVAD.destroy();
|
|
98
|
+
}
|
|
99
|
+
this.micVAD = null;
|
|
100
|
+
this.isInitialized = false;
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
/**
|
|
104
|
+
* VADGate - 4-state Voice Activity Detection machine.
|
|
105
|
+
*
|
|
106
|
+
* State transitions:
|
|
107
|
+
* - IDLE → ARMED: When VAD detects speech
|
|
108
|
+
* - ARMED → SPEAKING: After armedConfirmFrames of continuous speech
|
|
109
|
+
* - ARMED → IDLE: If speech stops before confirmation
|
|
110
|
+
* - SPEAKING → TRAILING: When VAD detects silence
|
|
111
|
+
* - TRAILING → SPEAKING: If speech resumes
|
|
112
|
+
* - TRAILING → IDLE: After postrollMs timeout
|
|
113
|
+
*/
|
|
114
|
+
export class VADGate {
|
|
115
|
+
state = 'idle';
|
|
116
|
+
preRollBuffer = [];
|
|
117
|
+
armedFrameCount = 0;
|
|
118
|
+
trailingTimeout = null;
|
|
119
|
+
config;
|
|
120
|
+
vadModel;
|
|
121
|
+
/** Callback when audio should be sent to server */
|
|
122
|
+
onSendAudio = null;
|
|
123
|
+
/** Callback when VAD state changes */
|
|
124
|
+
onStateChange = null;
|
|
125
|
+
constructor(options, frameDurationMs = 20) {
|
|
126
|
+
this.config = {
|
|
127
|
+
prerollFrames: Math.ceil(options.prerollMs / frameDurationMs),
|
|
128
|
+
armedConfirmFrames: Math.ceil(options.armedConfirmMs / frameDurationMs),
|
|
129
|
+
postrollMs: options.postrollMs,
|
|
130
|
+
threshold: options.vadThreshold,
|
|
131
|
+
energyThreshold: options.energyThreshold,
|
|
132
|
+
};
|
|
133
|
+
// Use energy-based VAD as the primary implementation
|
|
134
|
+
// Silero VAD requires complex ONNX setup that may not work in all environments
|
|
135
|
+
this.vadModel = new EnergyVAD(this.config.energyThreshold);
|
|
136
|
+
}
|
|
137
|
+
/**
|
|
138
|
+
* Initialize the VAD model (call before processing frames).
|
|
139
|
+
*/
|
|
140
|
+
async initialize() {
|
|
141
|
+
if (this.vadModel instanceof SileroVAD) {
|
|
142
|
+
await this.vadModel.initialize();
|
|
143
|
+
}
|
|
144
|
+
}
|
|
145
|
+
/**
|
|
146
|
+
* Get current VAD state.
|
|
147
|
+
*/
|
|
148
|
+
getState() {
|
|
149
|
+
return this.state;
|
|
150
|
+
}
|
|
151
|
+
/**
|
|
152
|
+
* Process an audio frame through the VAD gate.
|
|
153
|
+
*
|
|
154
|
+
* @param pcmFrame - 20ms PCM16 audio frame (640 bytes)
|
|
155
|
+
*/
|
|
156
|
+
processFrame(pcmFrame) {
|
|
157
|
+
// Maintain circular pre-roll buffer
|
|
158
|
+
this.preRollBuffer.push(pcmFrame);
|
|
159
|
+
if (this.preRollBuffer.length > this.config.prerollFrames) {
|
|
160
|
+
this.preRollBuffer.shift();
|
|
161
|
+
}
|
|
162
|
+
const isSpeech = this.vadModel.isSpeech(pcmFrame, this.config.threshold);
|
|
163
|
+
switch (this.state) {
|
|
164
|
+
case 'idle':
|
|
165
|
+
if (isSpeech) {
|
|
166
|
+
this.setState('armed');
|
|
167
|
+
this.armedFrameCount = 1;
|
|
168
|
+
}
|
|
169
|
+
break;
|
|
170
|
+
case 'armed':
|
|
171
|
+
if (isSpeech) {
|
|
172
|
+
this.armedFrameCount++;
|
|
173
|
+
if (this.armedFrameCount >= this.config.armedConfirmFrames) {
|
|
174
|
+
// Confirmed speech - flush pre-roll buffer
|
|
175
|
+
this.onSendAudio?.(this.preRollBuffer.slice());
|
|
176
|
+
this.setState('speaking');
|
|
177
|
+
}
|
|
178
|
+
}
|
|
179
|
+
else {
|
|
180
|
+
// False trigger - back to idle
|
|
181
|
+
this.setState('idle');
|
|
182
|
+
this.armedFrameCount = 0;
|
|
183
|
+
}
|
|
184
|
+
break;
|
|
185
|
+
case 'speaking':
|
|
186
|
+
this.onSendAudio?.([pcmFrame]);
|
|
187
|
+
if (!isSpeech) {
|
|
188
|
+
this.setState('trailing');
|
|
189
|
+
this.startTrailingTimer();
|
|
190
|
+
}
|
|
191
|
+
break;
|
|
192
|
+
case 'trailing':
|
|
193
|
+
this.onSendAudio?.([pcmFrame]);
|
|
194
|
+
if (isSpeech) {
|
|
195
|
+
this.cancelTrailingTimer();
|
|
196
|
+
this.setState('speaking');
|
|
197
|
+
}
|
|
198
|
+
break;
|
|
199
|
+
}
|
|
200
|
+
}
|
|
201
|
+
/**
|
|
202
|
+
* Reset VAD state to idle.
|
|
203
|
+
*/
|
|
204
|
+
reset() {
|
|
205
|
+
this.cancelTrailingTimer();
|
|
206
|
+
this.state = 'idle';
|
|
207
|
+
this.preRollBuffer = [];
|
|
208
|
+
this.armedFrameCount = 0;
|
|
209
|
+
}
|
|
210
|
+
/**
|
|
211
|
+
* Clean up resources.
|
|
212
|
+
*/
|
|
213
|
+
dispose() {
|
|
214
|
+
this.cancelTrailingTimer();
|
|
215
|
+
this.vadModel.dispose();
|
|
216
|
+
this.onSendAudio = null;
|
|
217
|
+
this.onStateChange = null;
|
|
218
|
+
}
|
|
219
|
+
/**
|
|
220
|
+
* Update VAD state and fire callback.
|
|
221
|
+
*/
|
|
222
|
+
setState(newState) {
|
|
223
|
+
if (this.state !== newState) {
|
|
224
|
+
this.state = newState;
|
|
225
|
+
this.onStateChange?.(newState);
|
|
226
|
+
}
|
|
227
|
+
}
|
|
228
|
+
/**
|
|
229
|
+
* Start the trailing timeout.
|
|
230
|
+
* After postrollMs, transition back to idle.
|
|
231
|
+
*/
|
|
232
|
+
startTrailingTimer() {
|
|
233
|
+
this.trailingTimeout = setTimeout(() => {
|
|
234
|
+
this.setState('idle');
|
|
235
|
+
this.trailingTimeout = null;
|
|
236
|
+
}, this.config.postrollMs);
|
|
237
|
+
}
|
|
238
|
+
/**
|
|
239
|
+
* Cancel the trailing timeout.
|
|
240
|
+
*/
|
|
241
|
+
cancelTrailingTimer() {
|
|
242
|
+
if (this.trailingTimeout !== null) {
|
|
243
|
+
clearTimeout(this.trailingTimeout);
|
|
244
|
+
this.trailingTimeout = null;
|
|
245
|
+
}
|
|
246
|
+
}
|
|
247
|
+
}
|
|
248
|
+
//# sourceMappingURL=VADGate.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"VADGate.js","sourceRoot":"","sources":["../../../../src/namespaces/oracle/internal/VADGate.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;GAaG;AA4BH;;;GAGG;AACH,MAAM,SAAS;IACO;IAApB,YAAoB,eAAuB;QAAvB,oBAAe,GAAf,eAAe,CAAQ;IAAG,CAAC;IAE/C,QAAQ,CAAC,QAAqB,EAAE,UAAkB;QAChD,MAAM,UAAU,GAAG,IAAI,UAAU,CAAC,QAAQ,CAAC,CAAC;QAC5C,OAAO,oBAAoB,CAAC,UAAU,EAAE,IAAI,CAAC,eAAe,CAAC,CAAC;IAChE,CAAC;IAED,OAAO;QACL,2BAA2B;IAC7B,CAAC;CACF;AAED;;;GAGG;AACH,SAAS,oBAAoB,CAAC,QAAoB,EAAE,YAAoB,GAAG;IACzE,IAAI,GAAG,GAAG,CAAC,CAAC;IACZ,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,QAAQ,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACzC,GAAG,IAAI,QAAQ,CAAC,CAAC,CAAE,GAAG,QAAQ,CAAC,CAAC,CAAE,CAAC;IACrC,CAAC;IACD,OAAO,IAAI,CAAC,IAAI,CAAC,GAAG,GAAG,QAAQ,CAAC,MAAM,CAAC,GAAG,SAAS,CAAC;AACtD,CAAC;AAED;;;GAGG;AACH,MAAM,SAAS;IAMO;IALZ,MAAM,GAAY,IAAI,CAAC;IACvB,aAAa,GAAG,KAAK,CAAC;IACtB,QAAQ,CAAY;IACpB,WAAW,GAAG,KAAK,CAAC;IAE5B,YAAoB,SAAiB,EAAE,eAAuB;QAA1C,cAAS,GAAT,SAAS,CAAQ;QACnC,IAAI,CAAC,QAAQ,GAAG,IAAI,SAAS,CAAC,eAAe,CAAC,CAAC;IACjD,CAAC;IAED;;;OAGG;IACH,KAAK,CAAC,UAAU;QACd,IAAI,IAAI,CAAC,aAAa;YAAE,OAAO;QAE/B,IAAI,CAAC;YACH,sEAAsE;YACtE,MAAM,SAAS,GAAG,MAAM,MAAM,CAAC,oBAAoB,CAAC,CAAC;YAErD,0BAA0B;YAC1B,IAAI,CAAC,MAAM,GAAG,MAAM,SAAS,CAAC,MAAM,CAAC,GAAG,CAAC;gBACvC,uBAAuB,EAAE,IAAI,CAAC,SAAS;gBACvC,uBAAuB,EAAE,IAAI,CAAC,SAAS,GAAG,IAAI;gBAC9C,gBAAgB,EAAE,CAAC;gBACnB,kBAAkB,EAAE,CAAC;gBACrB,eAAe,EAAE,CAAC;gBAClB,aAAa,EAAE,GAAG,EAAE,GAAE,CAAC;gBACvB,WAAW,EAAE,GAAG,EAAE,GAAE,CAAC;gBACrB,YAAY,EAAE,GAAG,EAAE,GAAE,CAAC;aACvB,CAAC,CAAC;YAEH,IAAI,CAAC,aAAa,GAAG,IAAI,CAAC;QAC5B,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,OAAO,CAAC,IAAI,CAAC,4EAA4E,EAAE,KAAK,CAAC,CAAC;YAClG,IAAI,CAAC,WAAW,GAAG,IAAI,CAAC;YACxB,IAAI,CAAC,aAAa,GAAG,IAAI,CAAC;QAC5B,CAAC;IACH,CAAC;IAED,QAAQ,CAAC,QAAqB,EAAE,SAAiB;QAC/C,IAAI,IAAI,CAAC,WAAW,IAAI,CAAC,IAAI,CAAC,MAAM,EAAE,CAAC;YACrC,OAAO,IAAI,CAAC,QAAQ,CAAC,QAAQ,CAAC,QAAQ,EAAE,SAAS,CAAC,CAAC;QACrD,CAAC;QAED,mFAAmF;QACnF,kFAAkF;QAClF,gFAAgF;QAChF,OAAO,IAAI,CAAC,QAAQ,CAAC,QAAQ,CAAC,QAAQ,EAAE,SAAS,CAAC,CAAC;IACrD,CAAC;IAED,OAAO;QACL,IAAI,IAAI,CAAC,MAAM,IAAI,OAAQ,IAAI,CAAC,MAAmC,CAAC,OAAO,KAAK,UAAU,EAAE,CAAC;YAC1F,IAAI,CAAC,MAAkC,CAAC,OAAO,EAAE,CAAC;QACrD,CAAC;QACD,IAAI,CAAC,MAAM,GAAG,IAAI,CAAC;QACnB,IAAI,CAAC,aAAa,GAAG,KAAK,CAAC;IAC7B,CAAC;CACF;AAED;;;;;;;;;;GAUG;AACH,MAAM,OAAO,OAAO;IACV,KAAK,GAAa,MAAM,CAAC;IACzB,aAAa,GAAkB,EAAE,CAAC;IAClC,eAAe,GAAG,CAAC,CAAC;IACpB,eAAe,GAAyC,IAAI,CAAC;IAC7D,MAAM,CAAgB;IACtB,QAAQ,CAAW;IAE3B,mDAAmD;IAC5C,WAAW,GAA6C,IAAI,CAAC;IAEpE,sCAAsC;IAC/B,aAAa,GAAuC,IAAI,CAAC;IAEhE,YAAY,OAAqC,EAAE,kBAA0B,EAAE;QAC7E,IAAI,CAAC,MAAM,GAAG;YACZ,aAAa,EAAE,IAAI,CAAC,IAAI,CAAC,OAAO,CAAC,SAAS,GAAG,eAAe,CAAC;YAC7D,kBAAkB,EAAE,IAAI,CAAC,IAAI,CAAC,OAAO,CAAC,cAAc,GAAG,eAAe,CAAC;YACvE,UAAU,EAAE,OAAO,CAAC,UAAU;YAC9B,SAAS,EAAE,OAAO,CAAC,YAAY;YAC/B,eAAe,EAAE,OAAO,CAAC,eAAe;SACzC,CAAC;QAEF,qDAAqD;QACrD,+EAA+E;QAC/E,IAAI,CAAC,QAAQ,GAAG,IAAI,SAAS,CAAC,IAAI,CAAC,MAAM,CAAC,eAAe,CAAC,CAAC;IAC7D,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,UAAU;QACd,IAAI,IAAI,CAAC,QAAQ,YAAY,SAAS,EAAE,CAAC;YACvC,MAAM,IAAI,CAAC,QAAQ,CAAC,UAAU,EAAE,CAAC;QACnC,CAAC;IACH,CAAC;IAED;;OAEG;IACH,QAAQ;QACN,OAAO,IAAI,CAAC,KAAK,CAAC;IACpB,CAAC;IAED;;;;OAIG;IACH,YAAY,CAAC,QAAqB;QAChC,oCAAoC;QACpC,IAAI,CAAC,aAAa,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;QAClC,IAAI,IAAI,CAAC,aAAa,CAAC,MAAM,GAAG,IAAI,CAAC,MAAM,CAAC,aAAa,EAAE,CAAC;YAC1D,IAAI,CAAC,aAAa,CAAC,KAAK,EAAE,CAAC;QAC7B,CAAC;QAED,MAAM,QAAQ,GAAG,IAAI,CAAC,QAAQ,CAAC,QAAQ,CAAC,QAAQ,EAAE,IAAI,CAAC,MAAM,CAAC,SAAS,CAAC,CAAC;QAEzE,QAAQ,IAAI,CAAC,KAAK,EAAE,CAAC;YACnB,KAAK,MAAM;gBACT,IAAI,QAAQ,EAAE,CAAC;oBACb,IAAI,CAAC,QAAQ,CAAC,OAAO,CAAC,CAAC;oBACvB,IAAI,CAAC,eAAe,GAAG,CAAC,CAAC;gBAC3B,CAAC;gBACD,MAAM;YAER,KAAK,OAAO;gBACV,IAAI,QAAQ,EAAE,CAAC;oBACb,IAAI,CAAC,eAAe,EAAE,CAAC;oBACvB,IAAI,IAAI,CAAC,eAAe,IAAI,IAAI,CAAC,MAAM,CAAC,kBAAkB,EAAE,CAAC;wBAC3D,2CAA2C;wBAC3C,IAAI,CAAC,WAAW,EAAE,CAAC,IAAI,CAAC,aAAa,CAAC,KAAK,EAAE,CAAC,CAAC;wBAC/C,IAAI,CAAC,QAAQ,CAAC,UAAU,CAAC,CAAC;oBAC5B,CAAC;gBACH,CAAC;qBAAM,CAAC;oBACN,+BAA+B;oBAC/B,IAAI,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC;oBACtB,IAAI,CAAC,eAAe,GAAG,CAAC,CAAC;gBAC3B,CAAC;gBACD,MAAM;YAER,KAAK,UAAU;gBACb,IAAI,CAAC,WAAW,EAAE,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC;gBAC/B,IAAI,CAAC,QAAQ,EAAE,CAAC;oBACd,IAAI,CAAC,QAAQ,CAAC,UAAU,CAAC,CAAC;oBAC1B,IAAI,CAAC,kBAAkB,EAAE,CAAC;gBAC5B,CAAC;gBACD,MAAM;YAER,KAAK,UAAU;gBACb,IAAI,CAAC,WAAW,EAAE,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC;gBAC/B,IAAI,QAAQ,EAAE,CAAC;oBACb,IAAI,CAAC,mBAAmB,EAAE,CAAC;oBAC3B,IAAI,CAAC,QAAQ,CAAC,UAAU,CAAC,CAAC;gBAC5B,CAAC;gBACD,MAAM;QACV,CAAC;IACH,CAAC;IAED;;OAEG;IACH,KAAK;QACH,IAAI,CAAC,mBAAmB,EAAE,CAAC;QAC3B,IAAI,CAAC,KAAK,GAAG,MAAM,CAAC;QACpB,IAAI,CAAC,aAAa,GAAG,EAAE,CAAC;QACxB,IAAI,CAAC,eAAe,GAAG,CAAC,CAAC;IAC3B,CAAC;IAED;;OAEG;IACH,OAAO;QACL,IAAI,CAAC,mBAAmB,EAAE,CAAC;QAC3B,IAAI,CAAC,QAAQ,CAAC,OAAO,EAAE,CAAC;QACxB,IAAI,CAAC,WAAW,GAAG,IAAI,CAAC;QACxB,IAAI,CAAC,aAAa,GAAG,IAAI,CAAC;IAC5B,CAAC;IAED;;OAEG;IACK,QAAQ,CAAC,QAAkB;QACjC,IAAI,IAAI,CAAC,KAAK,KAAK,QAAQ,EAAE,CAAC;YAC5B,IAAI,CAAC,KAAK,GAAG,QAAQ,CAAC;YACtB,IAAI,CAAC,aAAa,EAAE,CAAC,QAAQ,CAAC,CAAC;QACjC,CAAC;IACH,CAAC;IAED;;;OAGG;IACK,kBAAkB;QACxB,IAAI,CAAC,eAAe,GAAG,UAAU,CAAC,GAAG,EAAE;YACrC,IAAI,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC;YACtB,IAAI,CAAC,eAAe,GAAG,IAAI,CAAC;QAC9B,CAAC,EAAE,IAAI,CAAC,MAAM,CAAC,UAAU,CAAC,CAAC;IAC7B,CAAC;IAED;;OAEG;IACK,mBAAmB;QACzB,IAAI,IAAI,CAAC,eAAe,KAAK,IAAI,EAAE,CAAC;YAClC,YAAY,CAAC,IAAI,CAAC,eAAe,CAAC,CAAC;YACnC,IAAI,CAAC,eAAe,GAAG,IAAI,CAAC;QAC9B,CAAC;IACH,CAAC;CACF"}
|
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Oracle Namespace Types - Public interfaces for streaming transcription.
|
|
3
|
+
*
|
|
4
|
+
* Oracle provides real-time speech-to-text via Deepgram streaming,
|
|
5
|
+
* with built-in VAD (Voice Activity Detection) for cost optimization.
|
|
6
|
+
*/
|
|
7
|
+
/**
|
|
8
|
+
* VAD (Voice Activity Detection) states.
|
|
9
|
+
*
|
|
10
|
+
* The VAD gate uses a 4-state machine:
|
|
11
|
+
* - IDLE: No speech detected, not sending audio
|
|
12
|
+
* - ARMED: Potential speech detected, buffering for confirmation
|
|
13
|
+
* - SPEAKING: Confirmed speech, actively streaming
|
|
14
|
+
* - TRAILING: Speech ended, post-roll buffer active
|
|
15
|
+
*/
|
|
16
|
+
export type VADState = 'idle' | 'armed' | 'speaking' | 'trailing';
|
|
17
|
+
/**
|
|
18
|
+
* Configuration options for OracleSession.
|
|
19
|
+
*/
|
|
20
|
+
export interface OracleSessionOptions {
|
|
21
|
+
/** Pre-roll buffer duration in ms (default: 320) */
|
|
22
|
+
prerollMs?: number;
|
|
23
|
+
/** Post-roll duration in ms (default: 400) */
|
|
24
|
+
postrollMs?: number;
|
|
25
|
+
/** VAD confirmation time in ms (default: 80) */
|
|
26
|
+
armedConfirmMs?: number;
|
|
27
|
+
/** VAD speech threshold 0-1 (default: 0.5) - used for energy-based fallback */
|
|
28
|
+
vadThreshold?: number;
|
|
29
|
+
/** Energy threshold for fallback VAD (default: 500) */
|
|
30
|
+
energyThreshold?: number;
|
|
31
|
+
/** Ping interval in ms for keepalive (default: 10000) */
|
|
32
|
+
pingIntervalMs?: number;
|
|
33
|
+
}
|
|
34
|
+
/**
|
|
35
|
+
* Resolved session options with all defaults applied.
|
|
36
|
+
*/
|
|
37
|
+
export interface ResolvedOracleSessionOptions {
|
|
38
|
+
prerollMs: number;
|
|
39
|
+
postrollMs: number;
|
|
40
|
+
armedConfirmMs: number;
|
|
41
|
+
vadThreshold: number;
|
|
42
|
+
energyThreshold: number;
|
|
43
|
+
pingIntervalMs: number;
|
|
44
|
+
}
|
|
45
|
+
/**
|
|
46
|
+
* Default configuration values for OracleSession.
|
|
47
|
+
*/
|
|
48
|
+
export declare const DEFAULT_OPTIONS: ResolvedOracleSessionOptions;
|
|
49
|
+
/**
|
|
50
|
+
* Audio configuration constants.
|
|
51
|
+
*/
|
|
52
|
+
export declare const AUDIO_CONFIG: {
|
|
53
|
+
readonly SAMPLE_RATE: 16000;
|
|
54
|
+
readonly CHANNELS: 1;
|
|
55
|
+
readonly FRAME_DURATION_MS: 20;
|
|
56
|
+
readonly FRAME_SIZE_BYTES: 640;
|
|
57
|
+
readonly SAMPLES_PER_FRAME: 320;
|
|
58
|
+
};
|
|
59
|
+
/**
|
|
60
|
+
* WebSocket message types from the server.
|
|
61
|
+
*/
|
|
62
|
+
export interface ServerReadyMessage {
|
|
63
|
+
type: 'ready';
|
|
64
|
+
}
|
|
65
|
+
export interface ServerTranscriptMessage {
|
|
66
|
+
type: 'transcript';
|
|
67
|
+
text: string;
|
|
68
|
+
is_final: boolean;
|
|
69
|
+
speech_final: boolean;
|
|
70
|
+
}
|
|
71
|
+
export interface ServerPongMessage {
|
|
72
|
+
type: 'pong';
|
|
73
|
+
}
|
|
74
|
+
export interface ServerErrorMessage {
|
|
75
|
+
type: 'error';
|
|
76
|
+
message: string;
|
|
77
|
+
}
|
|
78
|
+
export type ServerMessage = ServerReadyMessage | ServerTranscriptMessage | ServerPongMessage | ServerErrorMessage;
|
|
79
|
+
/**
|
|
80
|
+
* Event callbacks for OracleSession.
|
|
81
|
+
*/
|
|
82
|
+
export interface OracleSessionCallbacks {
|
|
83
|
+
/** Called when session is ready to receive speech */
|
|
84
|
+
onReady?: () => void;
|
|
85
|
+
/** Called with interim transcripts (may change) */
|
|
86
|
+
onInterim?: (text: string) => void;
|
|
87
|
+
/** Called when a transcript segment is finalized */
|
|
88
|
+
onFinal?: (text: string) => void;
|
|
89
|
+
/** Called when user finishes an utterance (trigger for Curator) */
|
|
90
|
+
onUtterance?: (text: string) => void;
|
|
91
|
+
/** Called on any error */
|
|
92
|
+
onError?: (error: Error) => void;
|
|
93
|
+
/** Called when session closes */
|
|
94
|
+
onClose?: () => void;
|
|
95
|
+
/** Called when VAD state changes (for UI indicators) */
|
|
96
|
+
onVADStateChange?: (state: VADState) => void;
|
|
97
|
+
}
|
|
98
|
+
//# sourceMappingURL=types.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../../../src/namespaces/oracle/types.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH;;;;;;;;GAQG;AACH,MAAM,MAAM,QAAQ,GAAG,MAAM,GAAG,OAAO,GAAG,UAAU,GAAG,UAAU,CAAC;AAElE;;GAEG;AACH,MAAM,WAAW,oBAAoB;IACnC,oDAAoD;IACpD,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,8CAA8C;IAC9C,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,gDAAgD;IAChD,cAAc,CAAC,EAAE,MAAM,CAAC;IACxB,+EAA+E;IAC/E,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,uDAAuD;IACvD,eAAe,CAAC,EAAE,MAAM,CAAC;IACzB,yDAAyD;IACzD,cAAc,CAAC,EAAE,MAAM,CAAC;CACzB;AAED;;GAEG;AACH,MAAM,WAAW,4BAA4B;IAC3C,SAAS,EAAE,MAAM,CAAC;IAClB,UAAU,EAAE,MAAM,CAAC;IACnB,cAAc,EAAE,MAAM,CAAC;IACvB,YAAY,EAAE,MAAM,CAAC;IACrB,eAAe,EAAE,MAAM,CAAC;IACxB,cAAc,EAAE,MAAM,CAAC;CACxB;AAED;;GAEG;AACH,eAAO,MAAM,eAAe,EAAE,4BAO7B,CAAC;AAEF;;GAEG;AACH,eAAO,MAAM,YAAY;;;;;;CAMf,CAAC;AAEX;;GAEG;AACH,MAAM,WAAW,kBAAkB;IACjC,IAAI,EAAE,OAAO,CAAC;CACf;AAED,MAAM,WAAW,uBAAuB;IACtC,IAAI,EAAE,YAAY,CAAC;IACnB,IAAI,EAAE,MAAM,CAAC;IACb,QAAQ,EAAE,OAAO,CAAC;IAClB,YAAY,EAAE,OAAO,CAAC;CACvB;AAED,MAAM,WAAW,iBAAiB;IAChC,IAAI,EAAE,MAAM,CAAC;CACd;AAED,MAAM,WAAW,kBAAkB;IACjC,IAAI,EAAE,OAAO,CAAC;IACd,OAAO,EAAE,MAAM,CAAC;CACjB;AAED,MAAM,MAAM,aAAa,GACrB,kBAAkB,GAClB,uBAAuB,GACvB,iBAAiB,GACjB,kBAAkB,CAAC;AAEvB;;GAEG;AACH,MAAM,WAAW,sBAAsB;IACrC,qDAAqD;IACrD,OAAO,CAAC,EAAE,MAAM,IAAI,CAAC;IACrB,mDAAmD;IACnD,SAAS,CAAC,EAAE,CAAC,IAAI,EAAE,MAAM,KAAK,IAAI,CAAC;IACnC,oDAAoD;IACpD,OAAO,CAAC,EAAE,CAAC,IAAI,EAAE,MAAM,KAAK,IAAI,CAAC;IACjC,mEAAmE;IACnE,WAAW,CAAC,EAAE,CAAC,IAAI,EAAE,MAAM,KAAK,IAAI,CAAC;IACrC,0BAA0B;IAC1B,OAAO,CAAC,EAAE,CAAC,KAAK,EAAE,KAAK,KAAK,IAAI,CAAC;IACjC,iCAAiC;IACjC,OAAO,CAAC,EAAE,MAAM,IAAI,CAAC;IACrB,wDAAwD;IACxD,gBAAgB,CAAC,EAAE,CAAC,KAAK,EAAE,QAAQ,KAAK,IAAI,CAAC;CAC9C"}
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Oracle Namespace Types - Public interfaces for streaming transcription.
|
|
3
|
+
*
|
|
4
|
+
* Oracle provides real-time speech-to-text via Deepgram streaming,
|
|
5
|
+
* with built-in VAD (Voice Activity Detection) for cost optimization.
|
|
6
|
+
*/
|
|
7
|
+
/**
|
|
8
|
+
* Default configuration values for OracleSession.
|
|
9
|
+
*/
|
|
10
|
+
export const DEFAULT_OPTIONS = {
|
|
11
|
+
prerollMs: 320, // 16 frames x 20ms
|
|
12
|
+
postrollMs: 400, // Post-roll trailing duration
|
|
13
|
+
armedConfirmMs: 80, // 4 frames x 20ms to confirm speech
|
|
14
|
+
vadThreshold: 0.5, // Silero VAD threshold
|
|
15
|
+
energyThreshold: 200, // Energy-based fallback threshold (lowered for sensitivity)
|
|
16
|
+
pingIntervalMs: 10000, // 10 second ping interval
|
|
17
|
+
};
|
|
18
|
+
/**
|
|
19
|
+
* Audio configuration constants.
|
|
20
|
+
*/
|
|
21
|
+
export const AUDIO_CONFIG = {
|
|
22
|
+
SAMPLE_RATE: 16000, // 16kHz
|
|
23
|
+
CHANNELS: 1, // Mono
|
|
24
|
+
FRAME_DURATION_MS: 20, // 20ms frames
|
|
25
|
+
FRAME_SIZE_BYTES: 640, // 320 samples x 2 bytes per sample
|
|
26
|
+
SAMPLES_PER_FRAME: 320, // 16kHz x 20ms
|
|
27
|
+
};
|
|
28
|
+
//# sourceMappingURL=types.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"types.js","sourceRoot":"","sources":["../../../src/namespaces/oracle/types.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AA2CH;;GAEG;AACH,MAAM,CAAC,MAAM,eAAe,GAAiC;IAC3D,SAAS,EAAE,GAAG,EAAU,mBAAmB;IAC3C,UAAU,EAAE,GAAG,EAAS,8BAA8B;IACtD,cAAc,EAAE,EAAE,EAAM,oCAAoC;IAC5D,YAAY,EAAE,GAAG,EAAO,uBAAuB;IAC/C,eAAe,EAAE,GAAG,EAAI,4DAA4D;IACpF,cAAc,EAAE,KAAK,EAAG,0BAA0B;CACnD,CAAC;AAEF;;GAEG;AACH,MAAM,CAAC,MAAM,YAAY,GAAG;IAC1B,WAAW,EAAE,KAAK,EAAM,QAAQ;IAChC,QAAQ,EAAE,CAAC,EAAa,OAAO;IAC/B,iBAAiB,EAAE,EAAE,EAAG,cAAc;IACtC,gBAAgB,EAAE,GAAG,EAAG,mCAAmC;IAC3D,iBAAiB,EAAE,GAAG,EAAE,eAAe;CAC/B,CAAC"}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "dominus-sdk-nodejs",
|
|
3
|
-
"version": "1.2.
|
|
3
|
+
"version": "1.2.20",
|
|
4
4
|
"description": "Node.js SDK for the Dominus Orchestrator Platform",
|
|
5
5
|
"main": "dist/index.js",
|
|
6
6
|
"types": "dist/index.d.ts",
|
|
@@ -30,6 +30,7 @@
|
|
|
30
30
|
"url": "https://github.com/carebridgesystems/dominus-sdk-nodejs.git"
|
|
31
31
|
},
|
|
32
32
|
"dependencies": {
|
|
33
|
+
"@ricky0123/vad-web": "^0.0.22",
|
|
33
34
|
"bcryptjs": "^2.4.3"
|
|
34
35
|
},
|
|
35
36
|
"devDependencies": {
|