@unboundcx/sdk 2.8.6 → 2.8.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,311 @@
1
+ import { EventEmitter } from 'events';
2
+ import path from 'path';
3
+ import { fileURLToPath } from 'url';
4
+
5
+ const __filename = fileURLToPath(import.meta.url);
6
+ const __dirname = path.dirname(__filename);
7
+
8
+ /**
9
+ * SttStream - Speech-to-Text streaming interface
10
+ *
11
+ * Wraps gRPC connection to transcription service with EventEmitter API
12
+ *
13
+ * Events:
14
+ * - 'ready': Stream connected and ready for audio
15
+ * - 'transcript': Transcription result received
16
+ * - 'final-transcript': Final transcription result received
17
+ * - 'error': Error occurred
18
+ * - 'close': Stream closed
19
+ *
20
+ * Methods:
21
+ * - write(audioChunk): Send audio data (Buffer or Uint8Array)
22
+ * - end(): Close stream gracefully
23
+ */
24
+ export class SttStream extends EventEmitter {
25
+ constructor(sdk, session, options) {
26
+ super();
27
+
28
+ this.sdk = sdk;
29
+ this.session = session;
30
+ this.options = options;
31
+
32
+ // Connection state
33
+ this.isReady = false;
34
+ this.isClosed = false;
35
+ this.grpcCall = null;
36
+ this.firstChunkSent = false;
37
+
38
+ // Initialize connection asynchronously
39
+ this._initialize().catch((error) => {
40
+ this.emit('error', error);
41
+ });
42
+ }
43
+
44
+ async _initialize() {
45
+ try {
46
+ // Dynamic import of gRPC (only loads when stream is used)
47
+ const grpc = await import('@grpc/grpc-js');
48
+ const protoLoader = await import('@grpc/proto-loader');
49
+
50
+ // Load proto file from SDK proto directory
51
+ const PROTO_PATH = path.join(
52
+ __dirname,
53
+ '../../proto/transcription.proto',
54
+ );
55
+
56
+ const packageDefinition = protoLoader.loadSync(PROTO_PATH, {
57
+ keepCase: true,
58
+ longs: String,
59
+ enums: String,
60
+ defaults: true,
61
+ oneofs: true,
62
+ });
63
+
64
+ const protoDescriptor = grpc.loadPackageDefinition(packageDefinition);
65
+ const TranscriptionService =
66
+ protoDescriptor.transcription.TranscriptionService;
67
+
68
+ // Create gRPC client
69
+ const endpoint = `${this.session.grpcHost}:${this.session.grpcPort}`;
70
+ const client = new TranscriptionService(
71
+ endpoint,
72
+ grpc.credentials.createInsecure(),
73
+ );
74
+
75
+ // Create bidirectional stream
76
+ this.grpcCall = client.StreamTranscribe();
77
+
78
+ // Setup event handlers
79
+ this.grpcCall.on('data', (response) => {
80
+ this._handleTranscript(response);
81
+ });
82
+
83
+ this.grpcCall.on('error', (error) => {
84
+ if (!this.isClosed) {
85
+ this.emit('error', error);
86
+ this.close();
87
+ }
88
+ });
89
+
90
+ this.grpcCall.on('end', () => {
91
+ this.close();
92
+ });
93
+
94
+ // Mark as ready
95
+ this.isReady = true;
96
+ this.emit('ready');
97
+ } catch (error) {
98
+ this.emit(
99
+ 'error',
100
+ new Error(`Failed to initialize gRPC connection: ${error.message}`),
101
+ );
102
+ this.close();
103
+ }
104
+ }
105
+
106
+ _handleTranscript(response) {
107
+ // Parse gRPC response and emit transcript event
108
+ // Response format from transcription service (camelCase from proto snake_case):
109
+ // {
110
+ // transcript: string,
111
+ // isFinal: boolean,
112
+ // confidence: float,
113
+ // language: string,
114
+ // timestamp: long,
115
+ // words: [{ word, startTime, endTime, confidence }],
116
+ // startTime: float,
117
+ // endTime: float,
118
+ // sipCallId: string, // NEW
119
+ // side: string, // NEW
120
+ // role: string // NEW
121
+ // }
122
+
123
+ if (response.transcript) {
124
+ const transcriptData = {
125
+ text: response.transcript,
126
+ isFinal: response.isFinal || response.is_final || false,
127
+ confidence: response.confidence || 0,
128
+ languageCode: response.language,
129
+ words: response.words || [],
130
+ startTime: response.startTime || response.start_time,
131
+ endTime: response.endTime || response.end_time,
132
+ timestamp: new Date(),
133
+ // Stream identification (NEW)
134
+ sipCallId: response.sipCallId || response.sip_call_id || '',
135
+ side: response.side || '',
136
+ role: response.role || '',
137
+ };
138
+
139
+ this.emit('transcript', transcriptData);
140
+
141
+ // If final transcript, also emit final-transcript event
142
+ if (transcriptData.isFinal) {
143
+ this.emit('final-transcript', transcriptData);
144
+ }
145
+ }
146
+ }
147
+
148
+ /**
149
+ * Write audio chunk to stream
150
+ * @param {Buffer|Uint8Array} audioChunk - Audio data
151
+ * @param {Object} streamMetadata - Stream identification (sipCallId, side, role, isLastChunk)
152
+ * @param {string} streamMetadata.sipCallId - SIP call identifier
153
+ * @param {string} streamMetadata.side - 'send' or 'recv'
154
+ * @param {string} streamMetadata.role - Optional speaker role (e.g., 'customer', 'agent', 'system')
155
+ * @param {boolean} streamMetadata.isLastChunk - If true, marks this specific stream as complete
156
+ * @returns {boolean} - True if write successful
157
+ */
158
+ write(audioChunk, streamMetadata = {}) {
159
+ if (this.isClosed) {
160
+ this.emit('error', new Error('Stream is closed'));
161
+ return false;
162
+ }
163
+
164
+ if (!this.isReady) {
165
+ // Wait for ready and then write
166
+ this.once('ready', () => this.write(audioChunk, streamMetadata));
167
+ return true;
168
+ }
169
+
170
+ try {
171
+ const {
172
+ sipCallId = '',
173
+ side = '',
174
+ role = '',
175
+ isLastChunk = false,
176
+ bridgeId = '',
177
+ } = streamMetadata;
178
+
179
+ // First chunk includes token and configuration
180
+ if (!this.firstChunkSent) {
181
+ const request = {
182
+ audio_chunk: audioChunk,
183
+ token: this.session.token,
184
+ session_id: this.session.id,
185
+ language: this.options.languageCode || 'en-US',
186
+ engine: this.options.engine || 'google',
187
+ config: {
188
+ encoding: this.options.encoding || 'LINEAR16',
189
+ sample_rate_hertz: this.options.sampleRateHertz || 16000,
190
+ audio_channel_count: this.options.audioChannelCount || 1,
191
+ vad_enabled: this.options.vadEnabled || false,
192
+ min_silence_duration_ms: this.options.minSilenceDuration || 500,
193
+ speech_pad_ms: this.options.speechPadMs || 400,
194
+ },
195
+ is_first_chunk: true,
196
+ is_last_chunk: isLastChunk,
197
+ // Stream identification
198
+ sip_call_id: sipCallId,
199
+ side: side,
200
+ role: role,
201
+ playbook_id: this.options.playbookId || '',
202
+ task_id: this.options.taskId || '',
203
+ worker_id: this.options.workerId || '',
204
+ generate_subject: this.options.generateSubject || false,
205
+ generate_transcript_summary: this.options.generateTranscriptSummary || false,
206
+ generate_sentiment: this.options.generateSentiment || false,
207
+ bridge_id: bridgeId,
208
+ };
209
+
210
+ this.grpcCall.write(request);
211
+ this.firstChunkSent = true;
212
+ } else {
213
+ // Subsequent chunks include audio, session ID, and stream identification
214
+ const request = {
215
+ audio_chunk: audioChunk,
216
+ session_id: this.session.id,
217
+ is_first_chunk: false,
218
+ is_last_chunk: isLastChunk,
219
+ // Stream identification - sent on every chunk
220
+ sip_call_id: sipCallId,
221
+ side: side,
222
+ role: role,
223
+ bridge_id: bridgeId,
224
+ };
225
+
226
+ // Include VAD fields if present in metadata
227
+ if (streamMetadata.vad_event) {
228
+ request.vad_event = streamMetadata.vad_event;
229
+ request.vad_timestamp = streamMetadata.vad_timestamp;
230
+
231
+ if (streamMetadata.vad_energy !== undefined) {
232
+ request.vad_energy = streamMetadata.vad_energy;
233
+ }
234
+ if (streamMetadata.vad_duration !== undefined) {
235
+ request.vad_duration = streamMetadata.vad_duration;
236
+ }
237
+ }
238
+
239
+ this.grpcCall.write(request);
240
+ }
241
+
242
+ return true;
243
+ } catch (error) {
244
+ this.emit(
245
+ 'error',
246
+ new Error(`Failed to write audio chunk: ${error.message}`),
247
+ );
248
+ return false;
249
+ }
250
+ }
251
+
252
+ /**
253
+ * End the stream gracefully
254
+ */
255
+ end() {
256
+ if (!this.isClosed && this.grpcCall) {
257
+ try {
258
+ // Send final chunk marker
259
+ if (this.firstChunkSent) {
260
+ this.grpcCall.write({
261
+ audio_chunk: Buffer.alloc(0),
262
+ session_id: this.session.id,
263
+ is_first_chunk: false,
264
+ is_last_chunk: true,
265
+ });
266
+ }
267
+ this.grpcCall.end();
268
+ } catch (error) {
269
+ // Ignore errors during end
270
+ }
271
+ }
272
+ }
273
+
274
+ /**
275
+ * Close the stream and cleanup resources
276
+ */
277
+ close() {
278
+ if (this.isClosed) return;
279
+
280
+ this.isClosed = true;
281
+ this.isReady = false;
282
+
283
+ if (this.grpcCall) {
284
+ try {
285
+ this.grpcCall.end();
286
+ } catch (error) {
287
+ // Ignore errors during cleanup
288
+ }
289
+ this.grpcCall = null;
290
+ }
291
+
292
+ this.emit('close');
293
+ this.removeAllListeners();
294
+ }
295
+
296
+ /**
297
+ * Check if stream is ready for audio
298
+ * @returns {boolean}
299
+ */
300
+ get ready() {
301
+ return this.isReady && !this.isClosed;
302
+ }
303
+
304
+ /**
305
+ * Get the session ID
306
+ * @returns {string}
307
+ */
308
+ get sessionId() {
309
+ return this.session.id;
310
+ }
311
+ }