@mastra/voice-aws-nova-sonic 0.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js ADDED
@@ -0,0 +1,1535 @@
1
+ import { PassThrough } from 'stream';
2
+ import { randomUUID } from 'crypto';
3
+ import { MastraVoice } from '@mastra/core/voice';
4
+ import { BedrockRuntimeClient, InvokeModelWithBidirectionalStreamCommand } from '@aws-sdk/client-bedrock-runtime';
5
+ import { NodeHttp2Handler } from '@smithy/node-http-handler';
6
+ import { defaultProvider } from '@aws-sdk/credential-provider-node';
7
+
8
+ // src/index.ts
9
+
10
+ // src/types.ts
11
+ var NovaSonicErrorCode = /* @__PURE__ */ ((NovaSonicErrorCode2) => {
12
+ NovaSonicErrorCode2["CONNECTION_FAILED"] = "connection_failed";
13
+ NovaSonicErrorCode2["CONNECTION_NOT_ESTABLISHED"] = "connection_not_established";
14
+ NovaSonicErrorCode2["AUTHENTICATION_FAILED"] = "authentication_failed";
15
+ NovaSonicErrorCode2["CREDENTIALS_MISSING"] = "credentials_missing";
16
+ NovaSonicErrorCode2["REGION_INVALID"] = "region_invalid";
17
+ NovaSonicErrorCode2["WEBSOCKET_ERROR"] = "websocket_error";
18
+ NovaSonicErrorCode2["AUDIO_PROCESSING_ERROR"] = "audio_processing_error";
19
+ NovaSonicErrorCode2["AUDIO_STREAM_ERROR"] = "audio_stream_error";
20
+ NovaSonicErrorCode2["SPEAKER_STREAM_ERROR"] = "speaker_stream_error";
21
+ NovaSonicErrorCode2["TRANSCRIPTION_TIMEOUT"] = "transcription_timeout";
22
+ NovaSonicErrorCode2["TRANSCRIPTION_FAILED"] = "transcription_failed";
23
+ NovaSonicErrorCode2["TOOL_EXECUTION_ERROR"] = "tool_execution_error";
24
+ NovaSonicErrorCode2["TOOL_NOT_FOUND"] = "tool_not_found";
25
+ NovaSonicErrorCode2["SESSION_CONFIG_UPDATE_FAILED"] = "session_config_update_failed";
26
+ NovaSonicErrorCode2["INVALID_AUDIO_FORMAT"] = "invalid_audio_format";
27
+ NovaSonicErrorCode2["NOT_CONNECTED"] = "not_connected";
28
+ NovaSonicErrorCode2["INVALID_STATE"] = "invalid_state";
29
+ NovaSonicErrorCode2["VALIDATION_ERROR"] = "validation_error";
30
+ NovaSonicErrorCode2["UNKNOWN_ERROR"] = "unknown_error";
31
+ return NovaSonicErrorCode2;
32
+ })(NovaSonicErrorCode || {});
33
+
34
+ // src/utils/errors.ts
35
+ var NovaSonicError = class extends Error {
36
+ code;
37
+ details;
38
+ timestamp;
39
+ constructor(code, message, details) {
40
+ super(message);
41
+ this.name = "NovaSonicError";
42
+ this.code = code;
43
+ this.details = details;
44
+ this.timestamp = Date.now();
45
+ }
46
+ toEventData() {
47
+ return {
48
+ message: this.message,
49
+ code: this.code,
50
+ details: this.details,
51
+ timestamp: this.timestamp
52
+ };
53
+ }
54
+ };
55
+ async function getAwsCredentials(explicitCredentials, debug) {
56
+ if (explicitCredentials) {
57
+ if (debug) {
58
+ console.log("[getAwsCredentials] Using explicit credentials provided in config");
59
+ }
60
+ return explicitCredentials;
61
+ }
62
+ try {
63
+ if (debug) {
64
+ console.log("[getAwsCredentials] Using default credential provider chain");
65
+ }
66
+ const credentials = await defaultProvider()();
67
+ if (debug) {
68
+ console.log("[getAwsCredentials] Credentials retrieved successfully");
69
+ }
70
+ return credentials;
71
+ } catch (error) {
72
+ if (error instanceof NovaSonicError) {
73
+ throw error;
74
+ }
75
+ throw new NovaSonicError(
76
+ "authentication_failed" /* AUTHENTICATION_FAILED */,
77
+ `Failed to load AWS credentials: ${error instanceof Error ? error.message : "Unknown error"}`,
78
+ error
79
+ );
80
+ }
81
+ }
82
+
83
+ // src/index.ts
84
+ var DEFAULT_MODEL = "amazon.nova-2-sonic-v1:0";
85
+ var DEFAULT_REGION = "us-east-1";
86
+ var NovaSonicVoice = class extends MastraVoice {
87
+ client;
88
+ stream;
89
+ inputStream;
90
+ // Input stream for sending events to AWS
91
+ _eventQueue;
92
+ _signalQueue;
93
+ _closeSignal;
94
+ _promptName;
95
+ state = "disconnected";
96
+ events;
97
+ instructions;
98
+ tools;
99
+ requestContext;
100
+ debug;
101
+ region;
102
+ model;
103
+ credentials;
104
+ speakerStreams;
105
+ currentResponseId;
106
+ processingStream = false;
107
+ streamRestartAttempted = false;
108
+ // Prevent multiple restart attempts
109
+ sessionConfig;
110
+ promptStarted = false;
111
+ // Track if promptStart was sent (now sent during connection)
112
+ audioContentName;
113
+ audioContentStarted = false;
114
+ hasSentContentEnd = false;
115
+ // Track if contentEnd has been sent for current turn
116
+ turnCompleted = false;
117
+ // Track if turn has been completed (to prevent sending contentEnd after turn completion)
118
+ turnCompleteTimeout;
119
+ // Timeout for fallback turn completion
120
+ isReceivingAssistantAudio = false;
121
+ // Track if we're currently receiving assistant audio output
122
+ currentTextGenerationStage;
123
+ // Track generationStage (SPECULATIVE|FINAL) for current text content block
124
+ /**
125
+ * Creates a new instance of NovaSonicVoice.
126
+ *
127
+ * @param config - Configuration options for the voice instance
128
+ * @param config.region - AWS region (defaults to us-east-1)
129
+ * @param config.model - The model ID to use (defaults to amazon.nova-2-sonic-v1:0)
130
+ * @param config.credentials - AWS credentials (optional, uses default credential chain)
131
+ * @param config.speaker - Voice name/identifier
132
+ * @param config.languageCode - Language code for the voice
133
+ * @param config.debug - Enable debug mode
134
+ *
135
+ * @example
136
+ * ```typescript
137
+ * const voice = new NovaSonicVoice({
138
+ * region: 'us-east-1',
139
+ * model: 'amazon.nova-2-sonic-v1:0',
140
+ * speaker: 'default',
141
+ * });
142
+ * ```
143
+ */
144
+ constructor(config = {}) {
145
+ let normalizedConfig;
146
+ if ("realtimeConfig" in config || "speechModel" in config || "listeningModel" in config) {
147
+ normalizedConfig = config;
148
+ } else {
149
+ const configOptions = config;
150
+ normalizedConfig = {
151
+ realtimeConfig: {
152
+ model: configOptions.model || DEFAULT_MODEL,
153
+ apiKey: void 0,
154
+ // AWS doesn't use API keys
155
+ options: configOptions
156
+ },
157
+ speaker: typeof configOptions.speaker === "string" ? configOptions.speaker : "matthew"
158
+ };
159
+ }
160
+ super(normalizedConfig);
161
+ const options = normalizedConfig.realtimeConfig?.options || config;
162
+ this.region = options.region || DEFAULT_REGION;
163
+ this.model = options.model || DEFAULT_MODEL;
164
+ this.credentials = options.credentials;
165
+ this.debug = options.debug || false;
166
+ this.sessionConfig = options.sessionConfig;
167
+ this.events = {};
168
+ this.speakerStreams = /* @__PURE__ */ new Map();
169
+ const validRegions = ["us-east-1", "us-west-2", "ap-northeast-1"];
170
+ if (!validRegions.includes(this.region)) {
171
+ throw new NovaSonicError(
172
+ "region_invalid" /* REGION_INVALID */,
173
+ `Invalid region: ${this.region}. Supported regions: ${validRegions.join(", ")}`
174
+ );
175
+ }
176
+ }
177
+ /**
178
+ * Returns a list of available voice speakers.
179
+ *
180
+ * Nova 2 Sonic provides expressive voices across multiple languages.
181
+ * Tiffany (en-US, feminine) and Matthew (en-US, masculine) are polyglot
182
+ * voices that can speak all supported languages.
183
+ *
184
+ * @returns Promise resolving to an array of voice objects
185
+ */
186
+ async getSpeakers() {
187
+ return Promise.resolve([
188
+ // English (US) - Polyglot voices
189
+ { voiceId: "tiffany", name: "Tiffany", language: "English", locale: "en-US", gender: "feminine", polyglot: true },
190
+ { voiceId: "matthew", name: "Matthew", language: "English", locale: "en-US", gender: "masculine", polyglot: true },
191
+ // English (UK)
192
+ { voiceId: "amy", name: "Amy", language: "English", locale: "en-GB", gender: "feminine", polyglot: false },
193
+ // English (Australia)
194
+ { voiceId: "olivia", name: "Olivia", language: "English", locale: "en-AU", gender: "feminine", polyglot: false },
195
+ // English (Indian)
196
+ { voiceId: "kiara", name: "Kiara", language: "English", locale: "en-IN", gender: "feminine", polyglot: false },
197
+ { voiceId: "arjun", name: "Arjun", language: "English", locale: "en-IN", gender: "masculine", polyglot: false },
198
+ // French
199
+ { voiceId: "ambre", name: "Ambre", language: "French", locale: "fr-FR", gender: "feminine", polyglot: false },
200
+ { voiceId: "florian", name: "Florian", language: "French", locale: "fr-FR", gender: "masculine", polyglot: false },
201
+ // Italian
202
+ { voiceId: "beatrice", name: "Beatrice", language: "Italian", locale: "it-IT", gender: "feminine", polyglot: false },
203
+ { voiceId: "lorenzo", name: "Lorenzo", language: "Italian", locale: "it-IT", gender: "masculine", polyglot: false },
204
+ // German
205
+ { voiceId: "tina", name: "Tina", language: "German", locale: "de-DE", gender: "feminine", polyglot: false },
206
+ { voiceId: "lennart", name: "Lennart", language: "German", locale: "de-DE", gender: "masculine", polyglot: false },
207
+ // Spanish (US)
208
+ { voiceId: "lupe", name: "Lupe", language: "Spanish", locale: "es-US", gender: "feminine", polyglot: false },
209
+ { voiceId: "carlos", name: "Carlos", language: "Spanish", locale: "es-US", gender: "masculine", polyglot: false },
210
+ // Portuguese
211
+ { voiceId: "carolina", name: "Carolina", language: "Portuguese", locale: "pt-BR", gender: "feminine", polyglot: false },
212
+ { voiceId: "leo", name: "Leo", language: "Portuguese", locale: "pt-BR", gender: "masculine", polyglot: false },
213
+ // Hindi
214
+ { voiceId: "kiara", name: "Kiara", language: "Hindi", locale: "hi-IN", gender: "feminine", polyglot: false },
215
+ { voiceId: "arjun", name: "Arjun", language: "Hindi", locale: "hi-IN", gender: "masculine", polyglot: false }
216
+ ]);
217
+ }
218
+ /**
219
+ * Establishes a connection to the AWS Bedrock bidirectional streaming service.
220
+ * Must be called before using speak, listen, or send functions.
221
+ *
222
+ * @throws {NovaSonicError} If connection fails or credentials are missing
223
+ *
224
+ * @example
225
+ * ```typescript
226
+ * await voice.connect();
227
+ * // Now ready for voice interactions
228
+ * ```
229
+ */
230
+ async connect({ requestContext } = {}) {
231
+ if (this.state === "connected" || this.state === "connecting") {
232
+ this.log("Already connected or connecting");
233
+ return;
234
+ }
235
+ this.state = "connecting";
236
+ this.requestContext = requestContext;
237
+ this.streamRestartAttempted = false;
238
+ try {
239
+ await this.createBedrockClient();
240
+ const asyncIterable = this.createEventQueue();
241
+ this.enqueueInitialSessionEvents();
242
+ await this.sendInitialConnectCommand(asyncIterable);
243
+ this.processStream().catch((error) => {
244
+ this.log("Error in stream processing:", error);
245
+ this.emit("error", {
246
+ message: error instanceof Error ? error.message : "Stream processing error",
247
+ code: "STREAM_PROCESSING_ERROR",
248
+ details: error
249
+ });
250
+ });
251
+ this.log("Connected to AWS Bedrock Nova 2 Sonic");
252
+ } catch (error) {
253
+ this.state = "disconnected";
254
+ if (this.client) {
255
+ if (typeof this.client.destroy === "function") {
256
+ this.client.destroy();
257
+ }
258
+ this.client = void 0;
259
+ }
260
+ this.log("Connection error:", error);
261
+ const errorMessage = error instanceof Error ? error.message : "Unknown error during connection";
262
+ throw new NovaSonicError(
263
+ "connection_failed" /* CONNECTION_FAILED */,
264
+ `Failed to connect to AWS Bedrock: ${errorMessage}`,
265
+ error
266
+ );
267
+ }
268
+ }
269
+ /**
270
+ * Resolve credentials and initialize the Bedrock Runtime client over HTTP/2.
271
+ */
272
+ async createBedrockClient() {
273
+ this.log("Getting AWS credentials...");
274
+ const credentials = await getAwsCredentials(this.credentials, this.debug);
275
+ if (!credentials) {
276
+ throw new NovaSonicError(
277
+ "credentials_missing" /* CREDENTIALS_MISSING */,
278
+ "AWS credentials are required. Please configure AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY environment variables or provide credentials in the config."
279
+ );
280
+ }
281
+ this.log("Credentials retrieved:", {
282
+ hasAccessKeyId: !!credentials.accessKeyId,
283
+ hasSecretAccessKey: !!credentials.secretAccessKey,
284
+ hasSessionToken: !!credentials.sessionToken,
285
+ accessKeyIdPrefix: credentials.accessKeyId ? `${credentials.accessKeyId.substring(0, 6)}...` : "missing",
286
+ expiration: credentials.expiration ? credentials.expiration.toISOString() : "no expiration"
287
+ });
288
+ this.log(`Initializing Bedrock Runtime client for region: ${this.region}, model: ${this.model}`);
289
+ const nodeHttp2Handler = new NodeHttp2Handler({
290
+ requestTimeout: 3e5,
291
+ // 5 minutes
292
+ sessionTimeout: 3e5,
293
+ // 5 minutes
294
+ disableConcurrentStreams: false,
295
+ maxConcurrentStreams: 20
296
+ });
297
+ this.client = new BedrockRuntimeClient({
298
+ region: this.region,
299
+ credentials,
300
+ requestHandler: nodeHttp2Handler
301
+ });
302
+ }
303
+ /**
304
+ * Build the async-iterable event queue used as the request body for the
305
+ * bidirectional stream. Returns the iterable and wires up internal queue
306
+ * helpers (_eventQueue, _signalQueue, _closeSignal) used by sendClientEvent.
307
+ */
308
+ createEventQueue() {
309
+ this.log("Creating bidirectional stream command...");
310
+ const voiceInstance = this;
311
+ const eventQueue = [];
312
+ const pendingResolvers = [];
313
+ let closeSignal = false;
314
+ const signalQueue = () => {
315
+ if (pendingResolvers.length > 0) {
316
+ voiceInstance.log(`[AsyncIterable] Signaling queue - resolving ${pendingResolvers.length} pending Promise(s)`);
317
+ const resolvers = [...pendingResolvers];
318
+ pendingResolvers.length = 0;
319
+ resolvers.forEach((resolve) => resolve());
320
+ } else {
321
+ voiceInstance.log("[AsyncIterable] signalQueue called but no pending Promise");
322
+ }
323
+ };
324
+ const asyncIterable = {
325
+ [Symbol.asyncIterator]: () => {
326
+ voiceInstance.log("[AsyncIterable] Iterator requested");
327
+ return {
328
+ next: async () => {
329
+ try {
330
+ if (closeSignal || voiceInstance.state === "disconnected") {
331
+ voiceInstance.log(`[AsyncIterable] Stream closed (state: ${voiceInstance.state}), done = true`);
332
+ return { value: void 0, done: true };
333
+ }
334
+ if (eventQueue.length === 0) {
335
+ try {
336
+ voiceInstance.log("[AsyncIterable] Queue empty, waiting for signal...");
337
+ await new Promise((resolve) => {
338
+ pendingResolvers.push(resolve);
339
+ voiceInstance.log(`[AsyncIterable] Promise created, waiting for signal (${pendingResolvers.length} pending)...`);
340
+ setImmediate(() => {
341
+ if (eventQueue.length > 0) {
342
+ voiceInstance.log("[AsyncIterable] Data arrived before wait, resolving immediately");
343
+ const index = pendingResolvers.indexOf(resolve);
344
+ if (index !== -1) {
345
+ pendingResolvers.splice(index, 1);
346
+ resolve();
347
+ }
348
+ return;
349
+ }
350
+ if (closeSignal || voiceInstance.state === "disconnected") {
351
+ voiceInstance.log("[AsyncIterable] Closed before wait, resolving");
352
+ const index = pendingResolvers.indexOf(resolve);
353
+ if (index !== -1) {
354
+ pendingResolvers.splice(index, 1);
355
+ resolve();
356
+ }
357
+ return;
358
+ }
359
+ });
360
+ });
361
+ voiceInstance.log("[AsyncIterable] Promise resolved, checking queue...");
362
+ } catch (error) {
363
+ if (error instanceof Error && error.message === "Stream closed") {
364
+ voiceInstance.log("[AsyncIterable] Stream closed during wait");
365
+ return { value: void 0, done: true };
366
+ }
367
+ voiceInstance.log("[AsyncIterable] Error during wait:", error);
368
+ }
369
+ }
370
+ if (closeSignal) {
371
+ voiceInstance.log("[AsyncIterable] Stream closed (closeSignal)");
372
+ return { value: void 0, done: true };
373
+ }
374
+ if (voiceInstance.state === "disconnected") {
375
+ voiceInstance.log("[AsyncIterable] Stream closed (disconnected state)");
376
+ return { value: void 0, done: true };
377
+ }
378
+ while (eventQueue.length === 0 && !closeSignal) {
379
+ if (voiceInstance.state === "disconnected") {
380
+ voiceInstance.log("[AsyncIterable] Stream closed before wait loop");
381
+ return { value: void 0, done: true };
382
+ }
383
+ voiceInstance.log("[AsyncIterable] Queue still empty, waiting again...");
384
+ await new Promise((resolve) => {
385
+ pendingResolvers.push(resolve);
386
+ setImmediate(() => {
387
+ if (eventQueue.length > 0 || closeSignal || voiceInstance.state === "disconnected") {
388
+ const index = pendingResolvers.indexOf(resolve);
389
+ if (index !== -1) {
390
+ pendingResolvers.splice(index, 1);
391
+ resolve();
392
+ }
393
+ }
394
+ });
395
+ });
396
+ if (closeSignal || voiceInstance.state === "disconnected") {
397
+ voiceInstance.log("[AsyncIterable] Stream closed during wait loop");
398
+ return { value: void 0, done: true };
399
+ }
400
+ }
401
+ const nextEvent = eventQueue.shift();
402
+ const eventJson = JSON.stringify(nextEvent);
403
+ const eventBytes = Buffer.from(eventJson, "utf-8");
404
+ voiceInstance.log(`[AsyncIterable] Yielding event of size: ${eventBytes.length}`);
405
+ return {
406
+ value: {
407
+ chunk: {
408
+ bytes: eventBytes
409
+ }
410
+ },
411
+ done: false
412
+ };
413
+ } catch (error) {
414
+ voiceInstance.log("[AsyncIterable] Error in iterator:", error);
415
+ closeSignal = true;
416
+ return { value: void 0, done: true };
417
+ }
418
+ },
419
+ return: async () => {
420
+ voiceInstance.log("[AsyncIterable] Iterator return() called");
421
+ closeSignal = true;
422
+ signalQueue();
423
+ return { value: void 0, done: true };
424
+ },
425
+ throw: async (error) => {
426
+ voiceInstance.log("[AsyncIterable] Iterator throw() called:", error);
427
+ closeSignal = true;
428
+ signalQueue();
429
+ throw error;
430
+ }
431
+ };
432
+ }
433
+ };
434
+ this._eventQueue = eventQueue;
435
+ this._signalQueue = signalQueue;
436
+ this._closeSignal = () => {
437
+ closeSignal = true;
438
+ signalQueue();
439
+ };
440
+ return asyncIterable;
441
+ }
442
+ /**
443
+ * Pre-populate the event queue with the AWS Nova Sonic connection
444
+ * handshake events: sessionStart, promptStart, then a SYSTEM text content
445
+ * block carrying the configured instructions. AUDIO contentStart is NOT
446
+ * sent here; it is deferred to the first send() call.
447
+ */
448
+ enqueueInitialSessionEvents() {
449
+ const eventQueue = this._eventQueue;
450
+ if (!eventQueue) {
451
+ throw new NovaSonicError(
452
+ "connection_failed" /* CONNECTION_FAILED */,
453
+ "Event queue must be initialized before enqueueing session events"
454
+ );
455
+ }
456
+ this.log("Pre-populating queue with sessionStart and promptStart events...");
457
+ const promptName = randomUUID();
458
+ this._promptName = promptName;
459
+ const sessionStartEvent = {};
460
+ if (this.sessionConfig) {
461
+ if (this.sessionConfig.inferenceConfiguration) {
462
+ sessionStartEvent.inferenceConfiguration = {
463
+ maxTokens: this.sessionConfig.inferenceConfiguration.maxTokens || 4096,
464
+ topP: this.sessionConfig.inferenceConfiguration.topP || 0.9,
465
+ temperature: this.sessionConfig.inferenceConfiguration.temperature || 0.7,
466
+ ...this.sessionConfig.inferenceConfiguration.topK !== void 0 && { topK: this.sessionConfig.inferenceConfiguration.topK },
467
+ ...this.sessionConfig.inferenceConfiguration.stopSequences && { stopSequences: this.sessionConfig.inferenceConfiguration.stopSequences }
468
+ };
469
+ } else {
470
+ sessionStartEvent.inferenceConfiguration = {
471
+ maxTokens: 4096,
472
+ topP: 0.9,
473
+ temperature: 0.7
474
+ };
475
+ }
476
+ if (this.sessionConfig.turnDetectionConfiguration) {
477
+ sessionStartEvent.turnDetectionConfiguration = {
478
+ ...this.sessionConfig.turnDetectionConfiguration.endpointingSensitivity && {
479
+ endpointingSensitivity: this.sessionConfig.turnDetectionConfiguration.endpointingSensitivity
480
+ }
481
+ };
482
+ }
483
+ } else {
484
+ sessionStartEvent.inferenceConfiguration = {
485
+ maxTokens: 4096,
486
+ topP: 0.9,
487
+ temperature: 0.7
488
+ };
489
+ }
490
+ eventQueue.push({
491
+ event: {
492
+ sessionStart: sessionStartEvent
493
+ }
494
+ });
495
+ let voiceId = "matthew";
496
+ if (this.sessionConfig?.voice) {
497
+ if (typeof this.sessionConfig.voice === "string") {
498
+ voiceId = this.sessionConfig.voice;
499
+ } else if (this.sessionConfig.voice.name) {
500
+ voiceId = this.sessionConfig.voice.name;
501
+ }
502
+ } else if (this.speaker && this.speaker !== "default") {
503
+ if (typeof this.speaker === "string") {
504
+ voiceId = this.speaker;
505
+ } else {
506
+ const speakerObj = this.speaker;
507
+ if (speakerObj && typeof speakerObj === "object" && speakerObj.name) {
508
+ voiceId = speakerObj.name;
509
+ }
510
+ }
511
+ }
512
+ const promptStartEvent = {
513
+ promptName,
514
+ textOutputConfiguration: {
515
+ mediaType: "text/plain"
516
+ },
517
+ // AWS REQUIRES this - cannot be omitted
518
+ audioOutputConfiguration: {
519
+ mediaType: "audio/lpcm",
520
+ sampleRateHertz: 24e3,
521
+ sampleSizeBits: 16,
522
+ channelCount: 1,
523
+ voiceId,
524
+ encoding: "base64",
525
+ audioType: "SPEECH"
526
+ }
527
+ };
528
+ if (this.sessionConfig?.tools && this.sessionConfig.tools.length > 0) {
529
+ promptStartEvent.toolConfiguration = {
530
+ tools: this.sessionConfig.tools.map((tool) => {
531
+ let inputSchemaJson;
532
+ if (typeof tool.inputSchema === "string") {
533
+ inputSchemaJson = tool.inputSchema;
534
+ } else {
535
+ inputSchemaJson = JSON.stringify(tool.inputSchema);
536
+ }
537
+ return {
538
+ toolSpec: {
539
+ name: tool.name,
540
+ description: tool.description,
541
+ inputSchema: {
542
+ json: inputSchemaJson
543
+ }
544
+ }
545
+ };
546
+ }),
547
+ // toolChoice goes inside toolConfiguration for Nova 2 Sonic
548
+ ...this.sessionConfig?.toolChoice && { toolChoice: this.sessionConfig.toolChoice }
549
+ };
550
+ } else if (this.sessionConfig?.toolChoice) {
551
+ promptStartEvent.toolConfiguration = {
552
+ toolChoice: this.sessionConfig.toolChoice
553
+ };
554
+ }
555
+ eventQueue.push({
556
+ event: {
557
+ promptStart: promptStartEvent
558
+ }
559
+ });
560
+ this.promptStarted = true;
561
+ const systemContentName = randomUUID();
562
+ eventQueue.push({
563
+ event: {
564
+ contentStart: {
565
+ promptName,
566
+ contentName: systemContentName,
567
+ type: "TEXT",
568
+ interactive: false,
569
+ role: "SYSTEM",
570
+ textInputConfiguration: {
571
+ mediaType: "text/plain"
572
+ }
573
+ }
574
+ }
575
+ });
576
+ eventQueue.push({
577
+ event: {
578
+ textInput: {
579
+ promptName,
580
+ contentName: systemContentName,
581
+ content: this.instructions || ""
582
+ }
583
+ }
584
+ });
585
+ eventQueue.push({
586
+ event: {
587
+ contentEnd: {
588
+ promptName,
589
+ contentName: systemContentName
590
+ }
591
+ }
592
+ });
593
+ this.audioContentStarted = false;
594
+ this.log(`Queue pre-populated with ${eventQueue.length} event(s)`);
595
+ }
596
+ /**
597
+ * Issue the InvokeModelWithBidirectionalStreamCommand to AWS Bedrock with
598
+ * a 5-second abort timeout that tears down the client on hang to avoid
599
+ * leaked HTTP/2 sessions. On success the response stream is stored and the
600
+ * voice transitions to 'connected'.
601
+ */
602
+ async sendInitialConnectCommand(asyncIterable) {
603
+ if (!this.client) {
604
+ throw new NovaSonicError(
605
+ "connection_failed" /* CONNECTION_FAILED */,
606
+ "Bedrock client must be created before sending the initial command"
607
+ );
608
+ }
609
+ const command = new InvokeModelWithBidirectionalStreamCommand({
610
+ modelId: this.model,
611
+ body: asyncIterable
612
+ // Type assertion needed as SDK types may be strict
613
+ });
614
+ const sendStartTime = Date.now();
615
+ const abortController = new AbortController();
616
+ const timeoutId = setTimeout(() => {
617
+ this.log("[DEBUG] client.send() timeout after 5 seconds - aborting request");
618
+ abortController.abort();
619
+ }, 5e3);
620
+ let response;
621
+ try {
622
+ response = await this.client.send(command, { abortSignal: abortController.signal });
623
+ } catch (error) {
624
+ const sendDuration2 = Date.now() - sendStartTime;
625
+ if (abortController.signal.aborted) {
626
+ this.log(`[DEBUG] client.send() aborted after ${sendDuration2}ms`);
627
+ this._closeSignal?.();
628
+ this.client.destroy();
629
+ throw new Error("client.send() timeout");
630
+ }
631
+ this.log(`[DEBUG] client.send() error after ${sendDuration2}ms:`, error);
632
+ throw error;
633
+ } finally {
634
+ clearTimeout(timeoutId);
635
+ }
636
+ const sendDuration = Date.now() - sendStartTime;
637
+ this.log(`[DEBUG] client.send() completed in ${sendDuration}ms`);
638
+ this.log("Received response from AWS Bedrock");
639
+ this.stream = response.body;
640
+ this.log(`[DEBUG] Response stream is async iterable: ${this.stream && typeof this.stream[Symbol.asyncIterator] === "function"}`);
641
+ this.state = "connected";
642
+ this.log(`[STATE] State set to 'connected'`);
643
+ }
644
+ /**
645
+ * Process the bidirectional stream from AWS Bedrock
646
+ */
647
+ async processStream() {
648
+ if (!this.stream) {
649
+ this.log("[Stream] No stream available, cannot process");
650
+ return;
651
+ }
652
+ if (this.processingStream) {
653
+ this.log("[Stream] Already processing stream, skipping");
654
+ return;
655
+ }
656
+ this.processingStream = true;
657
+ this.log("[Stream] Starting stream processing");
658
+ let eventCount = 0;
659
+ let lastEventTime = Date.now();
660
+ try {
661
+ for await (const chunk of this.stream) {
662
+ if (chunk.chunk) {
663
+ const textResponse = Buffer.from(chunk.chunk.bytes || []).toString("utf-8");
664
+ eventCount++;
665
+ const now = Date.now();
666
+ const timeSinceLastEvent = now - lastEventTime;
667
+ lastEventTime = now;
668
+ this.log(`[Stream] Received chunk #${eventCount}, length: ${textResponse.length}, time since last: ${timeSinceLastEvent}ms`);
669
+ try {
670
+ const jsonResponse = JSON.parse(textResponse);
671
+ this.log(`[Stream] ========================================`);
672
+ this.log(`[Stream] Parsed JSON response, keys: ${Object.keys(jsonResponse).join(", ")}`);
673
+ if (jsonResponse.event) {
674
+ const eventKeys = Object.keys(jsonResponse.event);
675
+ this.log(`[Stream] Event keys: ${eventKeys.join(", ")}`);
676
+ if (jsonResponse.event.contentStart) {
677
+ this.log(`[Stream] \u2192 Handling contentStart`);
678
+ this.handleServerEvent({ contentStart: jsonResponse.event.contentStart });
679
+ } else if (jsonResponse.event.textOutput) {
680
+ this.log(`[Stream] \u2192 Handling textOutput, content length: ${jsonResponse.event.textOutput?.content?.length ?? 0}`);
681
+ this.handleServerEvent({ textOutput: jsonResponse.event.textOutput });
682
+ } else if (jsonResponse.event.audioOutput) {
683
+ this.handleServerEvent({ audioOutput: jsonResponse.event.audioOutput });
684
+ } else if (jsonResponse.event.toolUse) {
685
+ this.handleServerEvent({ toolUse: jsonResponse.event.toolUse });
686
+ } else if (jsonResponse.event.contentEnd && jsonResponse.event.contentEnd.type === "TOOL") {
687
+ this.handleServerEvent({ contentEnd: jsonResponse.event.contentEnd });
688
+ } else if (jsonResponse.event.contentEnd) {
689
+ this.log(`[Stream] Found contentEnd, type: ${jsonResponse.event.contentEnd.type}, stopReason: ${jsonResponse.event.contentEnd.stopReason}`);
690
+ this.handleServerEvent({ contentEnd: jsonResponse.event.contentEnd });
691
+ } else if (jsonResponse.event.completionStart) {
692
+ this.log("[Stream] Found completionStart inside event object:", JSON.stringify(jsonResponse.event.completionStart, null, 2));
693
+ this.emit("completionStart", jsonResponse.event.completionStart);
694
+ } else if (jsonResponse.event.completionEnd) {
695
+ this.log("[Stream] Found completionEnd inside event object:", JSON.stringify(jsonResponse.event.completionEnd, null, 2));
696
+ this.handleServerEvent({ completionEnd: jsonResponse.event.completionEnd });
697
+ } else {
698
+ const eventKeys2 = Object.keys(jsonResponse.event || {});
699
+ this.log(`[Stream] Event keys for other events: ${eventKeys2.join(", ")}`);
700
+ if (eventKeys2.length > 0) {
701
+ if (eventKeys2.includes("completionEnd")) {
702
+ this.log("[Stream] Found completionEnd in other events, handling explicitly");
703
+ this.handleServerEvent({ completionEnd: jsonResponse.event.completionEnd });
704
+ } else {
705
+ const eventKey = eventKeys2[0];
706
+ this.log(`[Stream] Dispatching other event: ${eventKey}`);
707
+ const eventValue = jsonResponse.event[eventKey];
708
+ if (eventValue !== void 0) {
709
+ if (eventKey === "completionEnd") {
710
+ this.handleServerEvent({ completionEnd: eventValue });
711
+ } else {
712
+ this.handleServerEvent({ [eventKey]: eventValue });
713
+ }
714
+ }
715
+ }
716
+ } else if (Object.keys(jsonResponse).length > 0) {
717
+ this.log(`[Stream] Unknown event structure, keys:`, Object.keys(jsonResponse).join(", "));
718
+ }
719
+ }
720
+ } else {
721
+ if (this.debug) {
722
+ this.log('[Stream] Received event without "event" wrapper, keys:', Object.keys(jsonResponse).join(", "));
723
+ }
724
+ if (jsonResponse.usageEvent) {
725
+ this.emit("usage", {
726
+ inputTokens: jsonResponse.usageEvent.totalInputTokens || 0,
727
+ outputTokens: jsonResponse.usageEvent.totalOutputTokens || 0,
728
+ totalTokens: jsonResponse.usageEvent.totalTokens || 0
729
+ });
730
+ }
731
+ if (jsonResponse.completionEnd) {
732
+ this.log("[Stream] Found completionEnd at top level:", JSON.stringify(jsonResponse.completionEnd, null, 2));
733
+ this.handleServerEvent({ completionEnd: jsonResponse.completionEnd });
734
+ }
735
+ if (!jsonResponse.event && !jsonResponse.completionEnd && !jsonResponse.usageEvent) {
736
+ this.log("[Stream] Received response without event wrapper, keys:", Object.keys(jsonResponse).join(", "));
737
+ }
738
+ if (jsonResponse.completionStart || jsonResponse.event?.completionStart) {
739
+ const completionStart = jsonResponse.completionStart || jsonResponse.event.completionStart;
740
+ this.log("[Stream] Found completionStart:", JSON.stringify(completionStart, null, 2));
741
+ this.emit("completionStart", completionStart);
742
+ }
743
+ }
744
+ } catch (parseError) {
745
+ this.log("[Stream] Failed to parse JSON response:", textResponse.substring(0, 200));
746
+ this.emit("error", {
747
+ message: "Failed to parse stream response",
748
+ code: "PARSE_ERROR",
749
+ details: parseError
750
+ });
751
+ }
752
+ } else if (chunk.internalServerException) {
753
+ this.emit("error", {
754
+ message: "Internal server error",
755
+ code: "INTERNAL_SERVER_ERROR",
756
+ details: chunk.internalServerException
757
+ });
758
+ } else if (chunk.modelStreamErrorException) {
759
+ this.emit("error", {
760
+ message: "Model stream error",
761
+ code: "MODEL_STREAM_ERROR",
762
+ details: chunk.modelStreamErrorException
763
+ });
764
+ } else if (chunk.modelTimeoutException) {
765
+ this.emit("error", {
766
+ message: "Model timeout",
767
+ code: "MODEL_TIMEOUT",
768
+ details: chunk.modelTimeoutException
769
+ });
770
+ } else if (chunk.serviceUnavailableException) {
771
+ this.emit("error", {
772
+ message: "Service unavailable",
773
+ code: "SERVICE_UNAVAILABLE",
774
+ details: chunk.serviceUnavailableException
775
+ });
776
+ } else if (chunk.throttlingException) {
777
+ this.emit("error", {
778
+ message: "Request throttled",
779
+ code: "THROTTLING",
780
+ details: chunk.throttlingException
781
+ });
782
+ } else if (chunk.validationException) {
783
+ this.emit("error", {
784
+ message: "Validation error",
785
+ code: "VALIDATION_ERROR",
786
+ details: chunk.validationException
787
+ });
788
+ }
789
+ }
790
+ } catch (streamError) {
791
+ this.log("[Stream] Error in processStream:", streamError);
792
+ this.emit("error", {
793
+ message: "Stream processing error",
794
+ code: "STREAM_ERROR",
795
+ details: streamError instanceof Error ? streamError.message : String(streamError)
796
+ });
797
+ } finally {
798
+ this.processingStream = false;
799
+ this.log(`[Stream] processStream finished, processingStream set to false. Total events received: ${eventCount || 0}`);
800
+ this.log(`[Stream] Stream state: state=${this.state}, stream exists=${!!this.stream}`);
801
+ if (!this.turnCompleted && this.audioContentStarted) {
802
+ this.log("[Stream] Stream ended but turn not completed - signaling turn completion as fallback");
803
+ this.log(`[Stream] State: turnCompleted=${this.turnCompleted}, audioContentStarted=${this.audioContentStarted}, hasSentContentEnd=${this.hasSentContentEnd}`);
804
+ this.turnCompleted = true;
805
+ this.emit("turnComplete", { timestamp: Date.now() });
806
+ if (this.currentResponseId) {
807
+ const stream = this.speakerStreams.get(this.currentResponseId);
808
+ if (stream) {
809
+ stream.end();
810
+ }
811
+ this.speakerStreams.delete(this.currentResponseId);
812
+ this.currentResponseId = void 0;
813
+ }
814
+ this.hasSentContentEnd = false;
815
+ this.log("[Stream] Turn completion signaled, ready for next turn");
816
+ } else if (this.turnCompleted) {
817
+ this.log("[Stream] Stream ended and turn was already completed");
818
+ } else {
819
+ this.log(`[Stream] Stream ended but turn not completed - audioContentStarted=${this.audioContentStarted}, turnCompleted=${this.turnCompleted}`);
820
+ }
821
+ if (this.stream && this.state === "connected" && !this.processingStream && !this.streamRestartAttempted) {
822
+ this.log("[Stream] Stream still open but processing stopped - will restart stream processing");
823
+ this.streamRestartAttempted = true;
824
+ setImmediate(() => {
825
+ if (this.stream && this.state === "connected" && !this.processingStream) {
826
+ this.log("[Stream] Restarting stream processing for subsequent turns");
827
+ this.processStream().catch((error) => {
828
+ this.log("[Stream] Error restarting stream processing:", error);
829
+ this.streamRestartAttempted = false;
830
+ });
831
+ } else {
832
+ this.streamRestartAttempted = false;
833
+ }
834
+ });
835
+ } else {
836
+ if (this.streamRestartAttempted) {
837
+ this.log("[Stream] Stream restart already attempted, skipping");
838
+ }
839
+ }
840
+ }
841
+ }
842
+ /**
843
+ * Handle server events from AWS Bedrock
844
+ */
845
+ handleServerEvent(event) {
846
+ if (this.debug) {
847
+ this.log("Received event, keys:", Object.keys(event).join(", "));
848
+ }
849
+ if (event.contentStart) {
850
+ this.handleContentStart(event.contentStart);
851
+ }
852
+ if (event.textOutput) {
853
+ this.handleTextOutput(event.textOutput);
854
+ }
855
+ if (event.audioOutput?.content) {
856
+ this.handleAudioOutput(event.audioOutput);
857
+ }
858
+ if (event.toolUse) {
859
+ this.handleToolUse(event.toolUse);
860
+ }
861
+ if (event.contentEnd) {
862
+ this.handleContentEnd(event.contentEnd);
863
+ }
864
+ if (event.completionEnd) {
865
+ this.handleCompletionEnd(event.completionEnd);
866
+ }
867
+ if (event.error) {
868
+ this.emit("error", {
869
+ message: event.error.message || "Unknown error",
870
+ code: event.error.code || "UNKNOWN_ERROR",
871
+ details: event.error
872
+ });
873
+ }
874
+ }
875
+ /**
876
+ * Handle a contentStart event. Tracks generationStage for text content
877
+ * blocks so the corresponding 'writing' events can be tagged
878
+ * SPECULATIVE/FINAL for the client.
879
+ */
880
+ handleContentStart(contentStart) {
881
+ const role = contentStart.role?.toLowerCase();
882
+ const contentType = contentStart.type;
883
+ this.log(`[Event] contentStart: type=${contentType || "unknown"}, role=${role}`);
884
+ this.emit("contentStart", contentStart);
885
+ if (contentType === "TEXT" && contentStart.additionalModelFields) {
886
+ try {
887
+ const additionalFields = JSON.parse(contentStart.additionalModelFields);
888
+ this.currentTextGenerationStage = additionalFields.generationStage;
889
+ this.log(`[Event] Text content generationStage: ${this.currentTextGenerationStage}`);
890
+ } catch {
891
+ this.currentTextGenerationStage = void 0;
892
+ }
893
+ } else if (contentType === "TEXT") {
894
+ this.currentTextGenerationStage = void 0;
895
+ }
896
+ }
897
+ /**
898
+ * Handle a textOutput event. Detects interruption (barge-in) markers in
899
+ * the payload, otherwise emits a 'writing' event with the text and
900
+ * current generationStage.
901
+ */
902
+ handleTextOutput(textOutput) {
903
+ const text = textOutput.content || "";
904
+ const role = textOutput.role?.toLowerCase() || "assistant";
905
+ this.log(`[Event] textOutput received: role=${role}, text length=${text.length}`);
906
+ let isInterrupted = false;
907
+ try {
908
+ const parsed = JSON.parse(text);
909
+ if (parsed && parsed.interrupted === true) {
910
+ isInterrupted = true;
911
+ }
912
+ } catch {
913
+ if (/interrupted/i.test(text)) {
914
+ isInterrupted = true;
915
+ }
916
+ }
917
+ if (isInterrupted) {
918
+ this.log(`[Event] Interrupt detected, emitting interrupt event`);
919
+ this.emit("interrupt", { type: "user", timestamp: Date.now() });
920
+ return;
921
+ }
922
+ const generationStage = this.currentTextGenerationStage;
923
+ this.log(`[Event] Emitting 'writing': role=${role}, generationStage=${generationStage}, length=${text.length}`);
924
+ this.emit("writing", { text, role, generationStage });
925
+ }
926
+ /**
927
+ * Handle an audioOutput event. Decodes the base64 LPCM payload, emits
928
+ * 'speaking' with both the base64 string and an Int16Array view, and
929
+ * forwards bytes to any active speaker stream.
930
+ */
931
+ handleAudioOutput(audioOutput) {
932
+ try {
933
+ const content = audioOutput.content;
934
+ const audioBytes = Buffer.from(content, "base64");
935
+ this.log(`[Event] Audio output: ${audioBytes.length} bytes`);
936
+ this.isReceivingAssistantAudio = true;
937
+ const audioData = new Int16Array(audioBytes.buffer, audioBytes.byteOffset, audioBytes.byteLength / 2);
938
+ this.emit("speaking", {
939
+ audio: content,
940
+ audioData,
941
+ response_id: this.currentResponseId
942
+ });
943
+ if (this.currentResponseId) {
944
+ const stream = this.speakerStreams.get(this.currentResponseId);
945
+ if (stream) {
946
+ stream.write(audioBytes);
947
+ }
948
+ }
949
+ } catch (error) {
950
+ this.log("[Event] Error decoding audio:", error);
951
+ this.emit("error", {
952
+ message: "Failed to decode audio",
953
+ code: "AUDIO_DECODE_ERROR",
954
+ details: error
955
+ });
956
+ }
957
+ }
958
+ /**
959
+ * Handle a toolUse event. Emits 'toolCall' and dispatches to the
960
+ * configured tool's execute() function via handleToolCall().
961
+ */
962
+ handleToolUse(toolUse) {
963
+ const toolUseId = toolUse.toolUseId || "";
964
+ const toolName = toolUse.toolName || "";
965
+ const toolInput = toolUse.input || {};
966
+ this.emit("toolCall", {
967
+ name: toolName,
968
+ args: toolInput,
969
+ id: toolUseId
970
+ });
971
+ if (this.tools && toolName in this.tools) {
972
+ this.handleToolCall(toolName, toolInput, toolUseId);
973
+ }
974
+ }
975
+ /**
976
+ * Handle a contentEnd event. Forwards it to clients, then routes by
977
+ * stopReason / type:
978
+ * - INTERRUPTED: emit 'interrupt' and tear down the active speaker stream
979
+ * - TOOL: end the active speaker stream
980
+ * - AUDIO with END_TURN: signal turnComplete (assistant audio finished)
981
+ * - AUDIO with PARTIAL_TURN while receiving assistant audio: schedule
982
+ * fallback turnComplete in case completionEnd never arrives
983
+ * - AUDIO otherwise: user input ended, reset turn flags
984
+ */
985
+ handleContentEnd(contentEnd) {
986
+ this.log(`[Event] contentEnd received: type=${contentEnd.type}, stopReason=${contentEnd.stopReason}`);
987
+ this.emit("contentEnd", contentEnd);
988
+ if (contentEnd.stopReason === "INTERRUPTED") {
989
+ this.log("[Event] Content interrupted by user (barge-in)");
990
+ this.emit("interrupt", { type: "user", timestamp: Date.now() });
991
+ if (this.currentResponseId) {
992
+ const stream = this.speakerStreams.get(this.currentResponseId);
993
+ if (stream) {
994
+ stream.destroy();
995
+ }
996
+ this.speakerStreams.delete(this.currentResponseId);
997
+ }
998
+ this.currentResponseId = void 0;
999
+ this.log("[Event] After interruption, keeping audioContentStarted=true for continued streaming");
1000
+ } else if (contentEnd.type === "TOOL" && this.currentResponseId) {
1001
+ const stream = this.speakerStreams.get(this.currentResponseId);
1002
+ if (stream) {
1003
+ stream.end();
1004
+ }
1005
+ } else if (contentEnd.type === "AUDIO") {
1006
+ if (contentEnd.stopReason === "END_TURN") {
1007
+ this.log(`[Event] contentEnd (AUDIO) with stopReason END_TURN - signaling turn complete`);
1008
+ if (this.currentResponseId) {
1009
+ const stream = this.speakerStreams.get(this.currentResponseId);
1010
+ if (stream) {
1011
+ stream.end();
1012
+ }
1013
+ this.speakerStreams.delete(this.currentResponseId);
1014
+ this.currentResponseId = void 0;
1015
+ }
1016
+ if (!this.turnCompleted) {
1017
+ this.turnCompleted = true;
1018
+ this.emit("turnComplete", { timestamp: Date.now() });
1019
+ this.hasSentContentEnd = false;
1020
+ this.log(`[Event] Turn complete (from contentEnd AUDIO with END_TURN), ready for next turn. audioContentStarted: ${this.audioContentStarted}, audioContentName: ${this.audioContentName}`);
1021
+ } else {
1022
+ this.log(`[Event] contentEnd (AUDIO) with END_TURN received but turn already completed - skipping duplicate turnComplete emission`);
1023
+ }
1024
+ if (!this.turnCompleteTimeout) {
1025
+ this.turnCompleteTimeout = setTimeout(() => {
1026
+ this.log(`[Event] Timeout: completionEnd not received, but turn already completed from contentEnd`);
1027
+ this.turnCompleteTimeout = void 0;
1028
+ }, 1e3);
1029
+ }
1030
+ } else {
1031
+ if (this.isReceivingAssistantAudio && contentEnd.stopReason === "PARTIAL_TURN") {
1032
+ this.isReceivingAssistantAudio = false;
1033
+ if (!this.turnCompleteTimeout && !this.turnCompleted) {
1034
+ this.log(`[Event] contentEnd (AUDIO) with PARTIAL_TURN for assistant output - waiting for completionEnd, setting fallback timeout`);
1035
+ this.turnCompleteTimeout = setTimeout(() => {
1036
+ if (!this.turnCompleted) {
1037
+ this.log(`[Event] Fallback: completionEnd not received after contentEnd (AUDIO) with PARTIAL_TURN, signaling turn complete`);
1038
+ this.turnCompleted = true;
1039
+ this.emit("turnComplete", { timestamp: Date.now() });
1040
+ if (this.currentResponseId) {
1041
+ const stream = this.speakerStreams.get(this.currentResponseId);
1042
+ if (stream) {
1043
+ stream.end();
1044
+ }
1045
+ this.speakerStreams.delete(this.currentResponseId);
1046
+ this.currentResponseId = void 0;
1047
+ }
1048
+ this.hasSentContentEnd = false;
1049
+ this.turnCompleteTimeout = void 0;
1050
+ }
1051
+ }, 2e3);
1052
+ }
1053
+ } else {
1054
+ this.hasSentContentEnd = false;
1055
+ this.turnCompleted = false;
1056
+ this.log(`[Event] contentEnd (AUDIO) - user input ended, stopReason: ${contentEnd.stopReason}. Keeping audioContentStarted=true for next turn. Reset hasSentContentEnd=false, turnCompleted=false.`);
1057
+ }
1058
+ }
1059
+ } else if (contentEnd.type === "TEXT") {
1060
+ this.currentTextGenerationStage = void 0;
1061
+ this.log(`[Event] contentEnd (TEXT) received, stopReason: ${contentEnd.stopReason}. Turn completion handled by completionEnd/contentEnd(AUDIO).`);
1062
+ if (contentEnd.stopReason === "END_TURN") {
1063
+ this.hasSentContentEnd = false;
1064
+ }
1065
+ }
1066
+ }
1067
+ /**
1068
+ * Handle a completionEnd event. AWS uses this as the definitive signal
1069
+ * that a turn (and all audio output) has finished. Tears down the active
1070
+ * speaker stream, clears any fallback timer, emits 'turnComplete' once,
1071
+ * and forwards token usage if reported.
1072
+ */
1073
+ handleCompletionEnd(completionEnd) {
1074
+ this.log(`[Event] completionEnd received, stopReason: ${completionEnd.stopReason}`);
1075
+ if (this.turnCompleteTimeout) {
1076
+ clearTimeout(this.turnCompleteTimeout);
1077
+ this.turnCompleteTimeout = void 0;
1078
+ }
1079
+ if (this.currentResponseId) {
1080
+ const stream = this.speakerStreams.get(this.currentResponseId);
1081
+ if (stream) {
1082
+ stream.end();
1083
+ }
1084
+ this.speakerStreams.delete(this.currentResponseId);
1085
+ this.currentResponseId = void 0;
1086
+ }
1087
+ this.isReceivingAssistantAudio = false;
1088
+ if (!this.turnCompleted) {
1089
+ this.log(`[Event] completionEnd - signaling turn complete (stopReason: ${completionEnd.stopReason || "undefined"})`);
1090
+ this.turnCompleted = true;
1091
+ this.emit("turnComplete", { timestamp: Date.now() });
1092
+ this.hasSentContentEnd = false;
1093
+ } else {
1094
+ this.log(`[Event] completionEnd received but turn already completed - skipping duplicate turnComplete emission`);
1095
+ }
1096
+ if (completionEnd.usage) {
1097
+ this.emit("usage", {
1098
+ inputTokens: completionEnd.usage.inputTokens || 0,
1099
+ outputTokens: completionEnd.usage.outputTokens || 0,
1100
+ totalTokens: (completionEnd.usage.inputTokens || 0) + (completionEnd.usage.outputTokens || 0)
1101
+ });
1102
+ }
1103
+ }
1104
+ /**
1105
+ * Handle tool execution
1106
+ */
1107
+ async handleToolCall(toolName, args, toolUseId) {
1108
+ const tool = this.tools?.[toolName];
1109
+ if (!tool || !tool.execute) {
1110
+ this.emit("error", {
1111
+ message: `Tool ${toolName} not found or has no execute function`,
1112
+ code: "TOOL_NOT_FOUND"
1113
+ });
1114
+ return;
1115
+ }
1116
+ try {
1117
+ const result = await tool.execute(
1118
+ { context: args, requestContext: this.requestContext },
1119
+ {
1120
+ toolCallId: toolUseId,
1121
+ messages: []
1122
+ }
1123
+ );
1124
+ await this.sendClientEvent({
1125
+ toolResult: {
1126
+ toolUseId,
1127
+ content: [
1128
+ {
1129
+ json: typeof result === "object" ? result : { result }
1130
+ }
1131
+ ]
1132
+ }
1133
+ });
1134
+ } catch (error) {
1135
+ this.emit("error", {
1136
+ message: `Error executing tool ${toolName}: ${error instanceof Error ? error.message : "Unknown error"}`,
1137
+ code: "TOOL_EXECUTION_ERROR",
1138
+ details: error
1139
+ });
1140
+ await this.sendClientEvent({
1141
+ toolResult: {
1142
+ toolUseId,
1143
+ content: [
1144
+ {
1145
+ text: `Error: ${error instanceof Error ? error.message : "Unknown error"}`
1146
+ }
1147
+ ]
1148
+ }
1149
+ });
1150
+ }
1151
+ }
1152
+ /**
1153
+ * Send a client event to AWS Bedrock
1154
+ * Events are sent through the input stream that was passed to the bidirectional stream command
1155
+ */
1156
+ async sendClientEvent(event) {
1157
+ if (this.state !== "connected") {
1158
+ throw new NovaSonicError(
1159
+ "not_connected" /* NOT_CONNECTED */,
1160
+ "Not connected to AWS Bedrock. Call connect() first."
1161
+ );
1162
+ }
1163
+ try {
1164
+ const eventQueue = this._eventQueue;
1165
+ const signalQueue = this._signalQueue;
1166
+ if (!eventQueue || !signalQueue) {
1167
+ throw new NovaSonicError(
1168
+ "not_connected" /* NOT_CONNECTED */,
1169
+ "Event queue not initialized. Connection may not be fully established."
1170
+ );
1171
+ }
1172
+ this.log(`[sendClientEvent] Adding event to queue (queue size: ${eventQueue.length})`);
1173
+ eventQueue.push({ event });
1174
+ this.log(`[sendClientEvent] Event added, queue size now: ${eventQueue.length}, signaling...`);
1175
+ signalQueue();
1176
+ this.log(`[sendClientEvent] Signal sent`);
1177
+ if (this.debug) {
1178
+ this.log("Sent client event, keys:", Object.keys(event).join(", "));
1179
+ }
1180
+ } catch (error) {
1181
+ throw new NovaSonicError(
1182
+ "websocket_error" /* WEBSOCKET_ERROR */,
1183
+ `Failed to send client event: ${error instanceof Error ? error.message : "Unknown error"}`,
1184
+ error
1185
+ );
1186
+ }
1187
+ }
1188
+ /**
1189
+ * Disconnects from the AWS Bedrock session and cleans up resources.
1190
+ *
1191
+ * Pushes a `sessionEnd` event to the queue before signalling close,
1192
+ * then schedules client destruction on the next tick so the async
1193
+ * iterator has a chance to yield the event to the SDK.
1194
+ */
1195
+ close() {
1196
+ if (this.state === "disconnected") {
1197
+ return;
1198
+ }
1199
+ this.state = "disconnected";
1200
+ this.processingStream = false;
1201
+ if (this.turnCompleteTimeout) {
1202
+ clearTimeout(this.turnCompleteTimeout);
1203
+ this.turnCompleteTimeout = void 0;
1204
+ }
1205
+ const eventQueue = this._eventQueue;
1206
+ const signalQueue = this._signalQueue;
1207
+ if (eventQueue && signalQueue) {
1208
+ eventQueue.push({ event: { sessionEnd: {} } });
1209
+ signalQueue();
1210
+ }
1211
+ const closeSignal = this._closeSignal;
1212
+ if (closeSignal) {
1213
+ closeSignal();
1214
+ }
1215
+ if (this.inputStream) {
1216
+ this.inputStream.end();
1217
+ this.inputStream = void 0;
1218
+ }
1219
+ for (const stream of this.speakerStreams.values()) {
1220
+ stream.end();
1221
+ }
1222
+ this.speakerStreams.clear();
1223
+ const client = this.client;
1224
+ this.client = void 0;
1225
+ this.stream = void 0;
1226
+ if (client) {
1227
+ setImmediate(() => {
1228
+ if (typeof client.destroy === "function") {
1229
+ client.destroy();
1230
+ }
1231
+ });
1232
+ }
1233
+ this.log("Disconnected from AWS Bedrock Nova 2 Sonic");
1234
+ }
1235
+ /**
1236
+ * Equips the voice instance with a set of instructions.
1237
+ */
1238
+ addInstructions(instructions) {
1239
+ this.instructions = instructions;
1240
+ }
1241
+ /**
1242
+ * Equips the voice instance with a set of tools.
1243
+ */
1244
+ addTools(tools) {
1245
+ this.tools = tools || {};
1246
+ }
1247
+ /**
1248
+ * Convert text to speech
1249
+ */
1250
+ async speak(input, options) {
1251
+ if (this.state !== "connected") {
1252
+ throw new NovaSonicError(
1253
+ "not_connected" /* NOT_CONNECTED */,
1254
+ "Not connected. Call connect() first."
1255
+ );
1256
+ }
1257
+ let text = "";
1258
+ if (typeof input !== "string") {
1259
+ const chunks = [];
1260
+ for await (const chunk of input) {
1261
+ chunks.push(Buffer.isBuffer(chunk) ? chunk : Buffer.from(String(chunk)));
1262
+ }
1263
+ text = Buffer.concat(chunks).toString("utf-8");
1264
+ } else {
1265
+ text = input;
1266
+ }
1267
+ if (text.trim().length === 0) {
1268
+ throw new NovaSonicError("validation_error" /* VALIDATION_ERROR */, "Input text is empty");
1269
+ }
1270
+ this.currentResponseId = `response-${Date.now()}`;
1271
+ const speakerStream = new PassThrough();
1272
+ speakerStream.id = this.currentResponseId;
1273
+ this.speakerStreams.set(this.currentResponseId, speakerStream);
1274
+ this.emit("speaker", speakerStream);
1275
+ const promptName = this._promptName;
1276
+ if (!promptName) {
1277
+ throw new NovaSonicError(
1278
+ "not_connected" /* NOT_CONNECTED */,
1279
+ "Prompt name not initialized. Connection may not be fully established."
1280
+ );
1281
+ }
1282
+ if (!this.promptStarted) {
1283
+ throw new NovaSonicError(
1284
+ "invalid_state" /* INVALID_STATE */,
1285
+ "Prompt not started. This should not happen - prompt should be started during connection."
1286
+ );
1287
+ }
1288
+ const contentName = randomUUID();
1289
+ await this.sendClientEvent({
1290
+ contentStart: {
1291
+ promptName,
1292
+ contentName,
1293
+ type: "TEXT",
1294
+ interactive: true,
1295
+ role: "USER",
1296
+ textInputConfiguration: {
1297
+ mediaType: "text/plain"
1298
+ }
1299
+ }
1300
+ });
1301
+ await this.sendClientEvent({
1302
+ textInput: {
1303
+ promptName,
1304
+ contentName,
1305
+ content: text
1306
+ }
1307
+ });
1308
+ await this.sendClientEvent({
1309
+ contentEnd: {
1310
+ promptName,
1311
+ contentName
1312
+ }
1313
+ });
1314
+ }
1315
+ /**
1316
+ * Convert speech to text (transcription)
1317
+ * For Nova Sonic, this is the same as send() - both stream audio input
1318
+ */
1319
+ async listen(audioStream, options) {
1320
+ if (audioStream && typeof audioStream === "object" && "read" in audioStream) {
1321
+ await this.send(audioStream);
1322
+ } else {
1323
+ throw new NovaSonicError(
1324
+ "invalid_audio_format" /* INVALID_AUDIO_FORMAT */,
1325
+ "Unsupported audio stream format for listen()"
1326
+ );
1327
+ }
1328
+ }
1329
+ /**
1330
+ * Streams audio data in real-time to the AWS Bedrock service.
1331
+ * Following AWS Nova 2 Sonic event sequence:
1332
+ * 1. contentStart (AUDIO, USER) - if not already sent
1333
+ * 2. audioInput events (one per chunk)
1334
+ * 3. contentEnd - when audio stream ends (handled separately via endAudioInput)
1335
+ */
1336
+ async send(audioData) {
1337
+ this.log(`[send] Current state: ${this.state}`);
1338
+ if (this.state !== "connected") {
1339
+ this.log(`[send] ERROR: State is '${this.state}', expected 'connected'`);
1340
+ throw new NovaSonicError(
1341
+ "not_connected" /* NOT_CONNECTED */,
1342
+ `Not connected. Current state: ${this.state}. Call connect() first.`
1343
+ );
1344
+ }
1345
+ this.log(`[send] State check passed, proceeding with send`);
1346
+ if (!(audioData instanceof Int16Array) && !(audioData && typeof audioData === "object" && "read" in audioData)) {
1347
+ throw new NovaSonicError(
1348
+ "invalid_audio_format" /* INVALID_AUDIO_FORMAT */,
1349
+ "Unsupported audio data format"
1350
+ );
1351
+ }
1352
+ if (this.turnCompleted || this.hasSentContentEnd) {
1353
+ this.log(`[send] Starting new turn - resetting flags. turnCompleted=${this.turnCompleted}, hasSentContentEnd=${this.hasSentContentEnd}.`);
1354
+ const needNewContent = this.hasSentContentEnd;
1355
+ this.turnCompleted = false;
1356
+ this.hasSentContentEnd = false;
1357
+ this.streamRestartAttempted = false;
1358
+ if (needNewContent) {
1359
+ this.audioContentStarted = false;
1360
+ this.log(`[send] contentEnd was previously sent - will create new audio content container`);
1361
+ }
1362
+ this.log(`[send] State reset: turnCompleted=false, hasSentContentEnd=false, audioContentStarted=${this.audioContentStarted}`);
1363
+ }
1364
+ if (!this.promptStarted) {
1365
+ this.promptStarted = true;
1366
+ }
1367
+ const promptName = this._promptName;
1368
+ if (!promptName) {
1369
+ throw new NovaSonicError(
1370
+ "not_connected" /* NOT_CONNECTED */,
1371
+ "Prompt name not initialized. Connection may not be fully established."
1372
+ );
1373
+ }
1374
+ if (!this.audioContentStarted) {
1375
+ const audioContentId = randomUUID();
1376
+ this.audioContentName = audioContentId;
1377
+ this.log(`[send] First audio send - sending AUDIO contentStart with contentName: ${audioContentId}`);
1378
+ await this.sendClientEvent({
1379
+ contentStart: {
1380
+ promptName,
1381
+ contentName: audioContentId,
1382
+ type: "AUDIO",
1383
+ interactive: true,
1384
+ role: "USER",
1385
+ audioInputConfiguration: {
1386
+ mediaType: "audio/lpcm",
1387
+ sampleRateHertz: 16e3,
1388
+ sampleSizeBits: 16,
1389
+ channelCount: 1,
1390
+ encoding: "base64",
1391
+ audioType: "SPEECH"
1392
+ }
1393
+ }
1394
+ });
1395
+ this.audioContentStarted = true;
1396
+ this.log(`[send] AUDIO contentStart sent, ready to stream audio`);
1397
+ } else {
1398
+ this.log(`[send] AUDIO contentStart already sent, sending audioInput chunks directly`);
1399
+ }
1400
+ if (!this.audioContentName) {
1401
+ throw new NovaSonicError(
1402
+ "invalid_state" /* INVALID_STATE */,
1403
+ "Audio content name not initialized. This should not happen."
1404
+ );
1405
+ }
1406
+ const contentName = this.audioContentName;
1407
+ if (audioData instanceof Int16Array) {
1408
+ const buffer = Buffer.from(audioData.buffer, audioData.byteOffset, audioData.byteLength);
1409
+ const base64Audio = buffer.toString("base64");
1410
+ this.log(`[send] Sending audioInput chunk, size: ${buffer.length} bytes, contentName: ${contentName}, turnCompleted: ${this.turnCompleted}, hasSentContentEnd: ${this.hasSentContentEnd}, audioContentStarted: ${this.audioContentStarted}, state: ${this.state}`);
1411
+ if (this.state !== "connected") {
1412
+ this.log(`[send] ERROR: State changed to '${this.state}' during send!`);
1413
+ throw new NovaSonicError(
1414
+ "not_connected" /* NOT_CONNECTED */,
1415
+ `Connection lost during send. State: ${this.state}`
1416
+ );
1417
+ }
1418
+ await this.sendClientEvent({
1419
+ audioInput: {
1420
+ promptName,
1421
+ contentName,
1422
+ content: base64Audio
1423
+ }
1424
+ });
1425
+ this.log(`[send] audioInput chunk sent successfully`);
1426
+ } else if (audioData && typeof audioData === "object" && "read" in audioData) {
1427
+ const stream = audioData;
1428
+ for await (const chunk of stream) {
1429
+ const buffer = Buffer.isBuffer(chunk) ? chunk : Buffer.from(chunk);
1430
+ const base64Audio = buffer.toString("base64");
1431
+ this.log(`[send] Sending audioInput chunk from stream, size: ${buffer.length} bytes, contentName: ${contentName}, turnCompleted: ${this.turnCompleted}, hasSentContentEnd: ${this.hasSentContentEnd}`);
1432
+ await this.sendClientEvent({
1433
+ audioInput: {
1434
+ promptName,
1435
+ contentName,
1436
+ content: base64Audio
1437
+ }
1438
+ });
1439
+ }
1440
+ } else {
1441
+ throw new NovaSonicError(
1442
+ "invalid_audio_format" /* INVALID_AUDIO_FORMAT */,
1443
+ "Unsupported audio data format"
1444
+ );
1445
+ }
1446
+ }
1447
+ /**
1448
+ * End audio input stream (sends contentEnd for audio)
1449
+ * Call this when done sending audio chunks
1450
+ */
1451
+ async endAudioInput() {
1452
+ if (this.hasSentContentEnd) {
1453
+ this.log("[endAudioInput] contentEnd already sent for this turn, skipping");
1454
+ return;
1455
+ }
1456
+ if (this.turnCompleted) {
1457
+ this.log("[endAudioInput] Turn already completed by AWS, skipping contentEnd. Resetting turnCompleted flag for next turn.");
1458
+ this.turnCompleted = false;
1459
+ this.hasSentContentEnd = false;
1460
+ return;
1461
+ }
1462
+ if (this.audioContentStarted && this.audioContentName && this._promptName) {
1463
+ const promptName = this._promptName;
1464
+ this.log("[endAudioInput] Sending contentEnd for audio input");
1465
+ await this.sendClientEvent({
1466
+ contentEnd: {
1467
+ promptName,
1468
+ contentName: this.audioContentName
1469
+ }
1470
+ });
1471
+ this.hasSentContentEnd = true;
1472
+ } else {
1473
+ this.log("[endAudioInput] Cannot send contentEnd: audioContentStarted=" + this.audioContentStarted + ", audioContentName=" + this.audioContentName);
1474
+ }
1475
+ }
1476
+ /**
1477
+ * Register an event listener
1478
+ */
1479
+ on(event, callback) {
1480
+ if (!this.events[event]) {
1481
+ this.events[event] = [];
1482
+ }
1483
+ this.events[event].push(callback);
1484
+ }
1485
+ /**
1486
+ * Remove an event listener
1487
+ */
1488
+ off(event, callback) {
1489
+ if (!this.events[event]) {
1490
+ return;
1491
+ }
1492
+ const index = this.events[event].indexOf(callback);
1493
+ if (index !== -1) {
1494
+ this.events[event].splice(index, 1);
1495
+ }
1496
+ }
1497
+ /**
1498
+ * Emit an event with arguments
1499
+ */
1500
+ emit(event, data) {
1501
+ if (!this.events[event]) {
1502
+ this.log(`[NovaSonic] emit('${event}'): No listeners registered for this event`);
1503
+ return;
1504
+ }
1505
+ const listenerCount = this.events[event].length;
1506
+ this.log(`[NovaSonic] emit('${event}'): Calling ${listenerCount} listener(s)`);
1507
+ for (const callback of this.events[event]) {
1508
+ try {
1509
+ callback(data);
1510
+ this.log(`[NovaSonic] emit('${event}'): Successfully called one listener`);
1511
+ } catch (error) {
1512
+ this.log(`Error in event handler for ${event}:`, error);
1513
+ }
1514
+ }
1515
+ this.log(`[NovaSonic] emit('${event}'): Finished calling all ${listenerCount} listener(s)`);
1516
+ }
1517
+ /**
1518
+ * Get listener status
1519
+ */
1520
+ async getListener() {
1521
+ return { enabled: this.state === "connected" };
1522
+ }
1523
+ /**
1524
+ * Log helper
1525
+ */
1526
+ log(...args) {
1527
+ if (this.debug) {
1528
+ console.log("[NovaSonicVoice]", ...args);
1529
+ }
1530
+ }
1531
+ };
1532
+
1533
+ export { NovaSonicError, NovaSonicErrorCode, NovaSonicVoice };
1534
+ //# sourceMappingURL=index.js.map
1535
+ //# sourceMappingURL=index.js.map