@mastra/voice-aws-nova-sonic 0.0.0-studio-cli-20260504022012
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +51 -0
- package/LICENSE.md +30 -0
- package/README.md +384 -0
- package/dist/docs/SKILL.md +27 -0
- package/dist/docs/assets/SOURCE_MAP.json +6 -0
- package/dist/docs/references/docs-voice-overview.md +1028 -0
- package/dist/docs/references/docs-voice-speech-to-speech.md +146 -0
- package/dist/docs/references/reference-voice-aws-nova-sonic.md +247 -0
- package/dist/index.cjs +1619 -0
- package/dist/index.cjs.map +1 -0
- package/dist/index.d.ts +269 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +1615 -0
- package/dist/index.js.map +1 -0
- package/dist/types.d.ts +354 -0
- package/dist/types.d.ts.map +1 -0
- package/dist/utils/auth.d.ts +6 -0
- package/dist/utils/auth.d.ts.map +1 -0
- package/dist/utils/errors.d.ts +17 -0
- package/dist/utils/errors.d.ts.map +1 -0
- package/package.json +68 -0
package/dist/index.js
ADDED
|
@@ -0,0 +1,1615 @@
|
|
|
1
|
+
import { randomUUID } from 'crypto';
|
|
2
|
+
import { PassThrough } from 'stream';
|
|
3
|
+
import { BedrockRuntimeClient, InvokeModelWithBidirectionalStreamCommand } from '@aws-sdk/client-bedrock-runtime';
|
|
4
|
+
import { MastraVoice } from '@mastra/core/voice';
|
|
5
|
+
import { NodeHttp2Handler } from '@smithy/node-http-handler';
|
|
6
|
+
import { defaultProvider } from '@aws-sdk/credential-provider-node';
|
|
7
|
+
|
|
8
|
+
// src/index.ts
|
|
9
|
+
|
|
10
|
+
// src/types.ts
|
|
11
|
+
var NovaSonicErrorCode = /* @__PURE__ */ ((NovaSonicErrorCode2) => {
|
|
12
|
+
NovaSonicErrorCode2["CONNECTION_FAILED"] = "connection_failed";
|
|
13
|
+
NovaSonicErrorCode2["CONNECTION_NOT_ESTABLISHED"] = "connection_not_established";
|
|
14
|
+
NovaSonicErrorCode2["AUTHENTICATION_FAILED"] = "authentication_failed";
|
|
15
|
+
NovaSonicErrorCode2["CREDENTIALS_MISSING"] = "credentials_missing";
|
|
16
|
+
NovaSonicErrorCode2["REGION_INVALID"] = "region_invalid";
|
|
17
|
+
NovaSonicErrorCode2["WEBSOCKET_ERROR"] = "websocket_error";
|
|
18
|
+
NovaSonicErrorCode2["AUDIO_PROCESSING_ERROR"] = "audio_processing_error";
|
|
19
|
+
NovaSonicErrorCode2["AUDIO_STREAM_ERROR"] = "audio_stream_error";
|
|
20
|
+
NovaSonicErrorCode2["SPEAKER_STREAM_ERROR"] = "speaker_stream_error";
|
|
21
|
+
NovaSonicErrorCode2["TRANSCRIPTION_TIMEOUT"] = "transcription_timeout";
|
|
22
|
+
NovaSonicErrorCode2["TRANSCRIPTION_FAILED"] = "transcription_failed";
|
|
23
|
+
NovaSonicErrorCode2["TOOL_EXECUTION_ERROR"] = "tool_execution_error";
|
|
24
|
+
NovaSonicErrorCode2["TOOL_NOT_FOUND"] = "tool_not_found";
|
|
25
|
+
NovaSonicErrorCode2["SESSION_CONFIG_UPDATE_FAILED"] = "session_config_update_failed";
|
|
26
|
+
NovaSonicErrorCode2["INVALID_AUDIO_FORMAT"] = "invalid_audio_format";
|
|
27
|
+
NovaSonicErrorCode2["NOT_CONNECTED"] = "not_connected";
|
|
28
|
+
NovaSonicErrorCode2["INVALID_STATE"] = "invalid_state";
|
|
29
|
+
NovaSonicErrorCode2["VALIDATION_ERROR"] = "validation_error";
|
|
30
|
+
NovaSonicErrorCode2["UNKNOWN_ERROR"] = "unknown_error";
|
|
31
|
+
return NovaSonicErrorCode2;
|
|
32
|
+
})(NovaSonicErrorCode || {});
|
|
33
|
+
|
|
34
|
+
// src/utils/errors.ts
|
|
35
|
+
var NovaSonicError = class extends Error {
|
|
36
|
+
code;
|
|
37
|
+
details;
|
|
38
|
+
timestamp;
|
|
39
|
+
constructor(code, message, details) {
|
|
40
|
+
super(message);
|
|
41
|
+
this.name = "NovaSonicError";
|
|
42
|
+
this.code = code;
|
|
43
|
+
this.details = details;
|
|
44
|
+
this.timestamp = Date.now();
|
|
45
|
+
}
|
|
46
|
+
toEventData() {
|
|
47
|
+
return {
|
|
48
|
+
message: this.message,
|
|
49
|
+
code: this.code,
|
|
50
|
+
details: this.details,
|
|
51
|
+
timestamp: this.timestamp
|
|
52
|
+
};
|
|
53
|
+
}
|
|
54
|
+
};
|
|
55
|
+
|
|
56
|
+
// src/utils/auth.ts
|
|
57
|
+
async function getAwsCredentials(explicitCredentials, debug) {
|
|
58
|
+
if (explicitCredentials) {
|
|
59
|
+
if (debug) {
|
|
60
|
+
console.info("[getAwsCredentials] Using explicit credentials provided in config");
|
|
61
|
+
}
|
|
62
|
+
return explicitCredentials;
|
|
63
|
+
}
|
|
64
|
+
try {
|
|
65
|
+
if (debug) {
|
|
66
|
+
console.info("[getAwsCredentials] Using default credential provider chain");
|
|
67
|
+
}
|
|
68
|
+
const credentials = await defaultProvider()();
|
|
69
|
+
if (debug) {
|
|
70
|
+
console.info("[getAwsCredentials] Credentials retrieved successfully");
|
|
71
|
+
}
|
|
72
|
+
return credentials;
|
|
73
|
+
} catch (error) {
|
|
74
|
+
if (error instanceof NovaSonicError) {
|
|
75
|
+
throw error;
|
|
76
|
+
}
|
|
77
|
+
throw new NovaSonicError(
|
|
78
|
+
"authentication_failed" /* AUTHENTICATION_FAILED */,
|
|
79
|
+
`Failed to load AWS credentials: ${error instanceof Error ? error.message : "Unknown error"}`,
|
|
80
|
+
error
|
|
81
|
+
);
|
|
82
|
+
}
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
// src/index.ts
|
|
86
|
+
var DEFAULT_MODEL = "amazon.nova-2-sonic-v1:0";
|
|
87
|
+
var DEFAULT_REGION = "us-east-1";
|
|
88
|
+
var NovaSonicVoice = class extends MastraVoice {
|
|
89
|
+
client;
|
|
90
|
+
stream;
|
|
91
|
+
inputStream;
|
|
92
|
+
// Input stream for sending events to AWS
|
|
93
|
+
_eventQueue;
|
|
94
|
+
_signalQueue;
|
|
95
|
+
_closeSignal;
|
|
96
|
+
_promptName;
|
|
97
|
+
state = "disconnected";
|
|
98
|
+
events;
|
|
99
|
+
instructions;
|
|
100
|
+
tools;
|
|
101
|
+
requestContext;
|
|
102
|
+
debug;
|
|
103
|
+
region;
|
|
104
|
+
model;
|
|
105
|
+
credentials;
|
|
106
|
+
speakerStreams;
|
|
107
|
+
currentResponseId;
|
|
108
|
+
processingStream = false;
|
|
109
|
+
streamRestartAttempted = false;
|
|
110
|
+
// Prevent multiple restart attempts
|
|
111
|
+
sessionConfig;
|
|
112
|
+
promptStarted = false;
|
|
113
|
+
// Track if promptStart was sent (now sent during connection)
|
|
114
|
+
audioContentName;
|
|
115
|
+
audioContentStarted = false;
|
|
116
|
+
hasSentContentEnd = false;
|
|
117
|
+
// Track if contentEnd has been sent for current turn
|
|
118
|
+
turnCompleted = false;
|
|
119
|
+
// Track if turn has been completed (to prevent sending contentEnd after turn completion)
|
|
120
|
+
turnCompleteTimeout;
|
|
121
|
+
// Timeout for fallback turn completion
|
|
122
|
+
isReceivingAssistantAudio = false;
|
|
123
|
+
// Track if we're currently receiving assistant audio output
|
|
124
|
+
currentTextGenerationStage;
|
|
125
|
+
// Track generationStage (SPECULATIVE|FINAL) for current text content block
|
|
126
|
+
/**
|
|
127
|
+
* Creates a new instance of NovaSonicVoice.
|
|
128
|
+
*
|
|
129
|
+
* @param config - Configuration options for the voice instance
|
|
130
|
+
* @param config.region - AWS region (defaults to us-east-1)
|
|
131
|
+
* @param config.model - The model ID to use (defaults to amazon.nova-2-sonic-v1:0)
|
|
132
|
+
* @param config.credentials - AWS credentials (optional, uses default credential chain)
|
|
133
|
+
* @param config.speaker - Voice name/identifier
|
|
134
|
+
* @param config.languageCode - Language code for the voice
|
|
135
|
+
* @param config.debug - Enable debug mode
|
|
136
|
+
*
|
|
137
|
+
* @example
|
|
138
|
+
* ```typescript
|
|
139
|
+
* const voice = new NovaSonicVoice({
|
|
140
|
+
* region: 'us-east-1',
|
|
141
|
+
* model: 'amazon.nova-2-sonic-v1:0',
|
|
142
|
+
* speaker: 'default',
|
|
143
|
+
* });
|
|
144
|
+
* ```
|
|
145
|
+
*/
|
|
146
|
+
constructor(config = {}) {
|
|
147
|
+
let normalizedConfig;
|
|
148
|
+
if ("realtimeConfig" in config || "speechModel" in config || "listeningModel" in config) {
|
|
149
|
+
normalizedConfig = config;
|
|
150
|
+
} else {
|
|
151
|
+
const configOptions = config;
|
|
152
|
+
normalizedConfig = {
|
|
153
|
+
realtimeConfig: {
|
|
154
|
+
model: configOptions.model || DEFAULT_MODEL,
|
|
155
|
+
apiKey: void 0,
|
|
156
|
+
// AWS doesn't use API keys
|
|
157
|
+
options: configOptions
|
|
158
|
+
},
|
|
159
|
+
speaker: typeof configOptions.speaker === "string" ? configOptions.speaker : "matthew"
|
|
160
|
+
};
|
|
161
|
+
}
|
|
162
|
+
super(normalizedConfig);
|
|
163
|
+
const options = normalizedConfig.realtimeConfig?.options || config;
|
|
164
|
+
this.region = options.region || DEFAULT_REGION;
|
|
165
|
+
this.model = options.model || DEFAULT_MODEL;
|
|
166
|
+
this.credentials = options.credentials;
|
|
167
|
+
this.debug = options.debug || false;
|
|
168
|
+
this.sessionConfig = options.sessionConfig;
|
|
169
|
+
this.events = {};
|
|
170
|
+
this.speakerStreams = /* @__PURE__ */ new Map();
|
|
171
|
+
const validRegions = ["us-east-1", "us-west-2", "ap-northeast-1"];
|
|
172
|
+
if (!validRegions.includes(this.region)) {
|
|
173
|
+
throw new NovaSonicError(
|
|
174
|
+
"region_invalid" /* REGION_INVALID */,
|
|
175
|
+
`Invalid region: ${this.region}. Supported regions: ${validRegions.join(", ")}`
|
|
176
|
+
);
|
|
177
|
+
}
|
|
178
|
+
}
|
|
179
|
+
/**
|
|
180
|
+
* Returns a list of available voice speakers.
|
|
181
|
+
*
|
|
182
|
+
* Nova 2 Sonic provides expressive voices across multiple languages.
|
|
183
|
+
* Tiffany (en-US, feminine) and Matthew (en-US, masculine) are polyglot
|
|
184
|
+
* voices that can speak all supported languages.
|
|
185
|
+
*
|
|
186
|
+
* @returns Promise resolving to an array of voice objects
|
|
187
|
+
*/
|
|
188
|
+
async getSpeakers() {
|
|
189
|
+
return Promise.resolve([
|
|
190
|
+
// English (US) - Polyglot voices
|
|
191
|
+
{ voiceId: "tiffany", name: "Tiffany", language: "English", locale: "en-US", gender: "feminine", polyglot: true },
|
|
192
|
+
{
|
|
193
|
+
voiceId: "matthew",
|
|
194
|
+
name: "Matthew",
|
|
195
|
+
language: "English",
|
|
196
|
+
locale: "en-US",
|
|
197
|
+
gender: "masculine",
|
|
198
|
+
polyglot: true
|
|
199
|
+
},
|
|
200
|
+
// English (UK)
|
|
201
|
+
{ voiceId: "amy", name: "Amy", language: "English", locale: "en-GB", gender: "feminine", polyglot: false },
|
|
202
|
+
// English (Australia)
|
|
203
|
+
{ voiceId: "olivia", name: "Olivia", language: "English", locale: "en-AU", gender: "feminine", polyglot: false },
|
|
204
|
+
// English (Indian)
|
|
205
|
+
{ voiceId: "kiara", name: "Kiara", language: "English", locale: "en-IN", gender: "feminine", polyglot: false },
|
|
206
|
+
{ voiceId: "arjun", name: "Arjun", language: "English", locale: "en-IN", gender: "masculine", polyglot: false },
|
|
207
|
+
// French
|
|
208
|
+
{ voiceId: "ambre", name: "Ambre", language: "French", locale: "fr-FR", gender: "feminine", polyglot: false },
|
|
209
|
+
{
|
|
210
|
+
voiceId: "florian",
|
|
211
|
+
name: "Florian",
|
|
212
|
+
language: "French",
|
|
213
|
+
locale: "fr-FR",
|
|
214
|
+
gender: "masculine",
|
|
215
|
+
polyglot: false
|
|
216
|
+
},
|
|
217
|
+
// Italian
|
|
218
|
+
{
|
|
219
|
+
voiceId: "beatrice",
|
|
220
|
+
name: "Beatrice",
|
|
221
|
+
language: "Italian",
|
|
222
|
+
locale: "it-IT",
|
|
223
|
+
gender: "feminine",
|
|
224
|
+
polyglot: false
|
|
225
|
+
},
|
|
226
|
+
{
|
|
227
|
+
voiceId: "lorenzo",
|
|
228
|
+
name: "Lorenzo",
|
|
229
|
+
language: "Italian",
|
|
230
|
+
locale: "it-IT",
|
|
231
|
+
gender: "masculine",
|
|
232
|
+
polyglot: false
|
|
233
|
+
},
|
|
234
|
+
// German
|
|
235
|
+
{ voiceId: "tina", name: "Tina", language: "German", locale: "de-DE", gender: "feminine", polyglot: false },
|
|
236
|
+
{
|
|
237
|
+
voiceId: "lennart",
|
|
238
|
+
name: "Lennart",
|
|
239
|
+
language: "German",
|
|
240
|
+
locale: "de-DE",
|
|
241
|
+
gender: "masculine",
|
|
242
|
+
polyglot: false
|
|
243
|
+
},
|
|
244
|
+
// Spanish (US)
|
|
245
|
+
{ voiceId: "lupe", name: "Lupe", language: "Spanish", locale: "es-US", gender: "feminine", polyglot: false },
|
|
246
|
+
{ voiceId: "carlos", name: "Carlos", language: "Spanish", locale: "es-US", gender: "masculine", polyglot: false },
|
|
247
|
+
// Portuguese
|
|
248
|
+
{
|
|
249
|
+
voiceId: "carolina",
|
|
250
|
+
name: "Carolina",
|
|
251
|
+
language: "Portuguese",
|
|
252
|
+
locale: "pt-BR",
|
|
253
|
+
gender: "feminine",
|
|
254
|
+
polyglot: false
|
|
255
|
+
},
|
|
256
|
+
{ voiceId: "leo", name: "Leo", language: "Portuguese", locale: "pt-BR", gender: "masculine", polyglot: false },
|
|
257
|
+
// Hindi
|
|
258
|
+
{ voiceId: "kiara", name: "Kiara", language: "Hindi", locale: "hi-IN", gender: "feminine", polyglot: false },
|
|
259
|
+
{ voiceId: "arjun", name: "Arjun", language: "Hindi", locale: "hi-IN", gender: "masculine", polyglot: false }
|
|
260
|
+
]);
|
|
261
|
+
}
|
|
262
|
+
/**
|
|
263
|
+
* Establishes a connection to the AWS Bedrock bidirectional streaming service.
|
|
264
|
+
* Must be called before using speak, listen, or send functions.
|
|
265
|
+
*
|
|
266
|
+
* @throws {NovaSonicError} If connection fails or credentials are missing
|
|
267
|
+
*
|
|
268
|
+
* @example
|
|
269
|
+
* ```typescript
|
|
270
|
+
* await voice.connect();
|
|
271
|
+
* // Now ready for voice interactions
|
|
272
|
+
* ```
|
|
273
|
+
*/
|
|
274
|
+
async connect({ requestContext } = {}) {
|
|
275
|
+
if (this.state === "connected" || this.state === "connecting") {
|
|
276
|
+
this.log("Already connected or connecting");
|
|
277
|
+
return;
|
|
278
|
+
}
|
|
279
|
+
this.state = "connecting";
|
|
280
|
+
this.requestContext = requestContext;
|
|
281
|
+
this.streamRestartAttempted = false;
|
|
282
|
+
try {
|
|
283
|
+
await this.createBedrockClient();
|
|
284
|
+
const asyncIterable = this.createEventQueue();
|
|
285
|
+
this.enqueueInitialSessionEvents();
|
|
286
|
+
await this.sendInitialConnectCommand(asyncIterable);
|
|
287
|
+
this.processStream().catch((error) => {
|
|
288
|
+
this.log("Error in stream processing:", error);
|
|
289
|
+
this.emit("error", {
|
|
290
|
+
message: error instanceof Error ? error.message : "Stream processing error",
|
|
291
|
+
code: "STREAM_PROCESSING_ERROR",
|
|
292
|
+
details: error
|
|
293
|
+
});
|
|
294
|
+
});
|
|
295
|
+
this.log("Connected to AWS Bedrock Nova 2 Sonic");
|
|
296
|
+
} catch (error) {
|
|
297
|
+
this.state = "disconnected";
|
|
298
|
+
if (this.client) {
|
|
299
|
+
if (typeof this.client.destroy === "function") {
|
|
300
|
+
this.client.destroy();
|
|
301
|
+
}
|
|
302
|
+
this.client = void 0;
|
|
303
|
+
}
|
|
304
|
+
this.log("Connection error:", error);
|
|
305
|
+
const errorMessage = error instanceof Error ? error.message : "Unknown error during connection";
|
|
306
|
+
throw new NovaSonicError("connection_failed" /* CONNECTION_FAILED */, `Failed to connect to AWS Bedrock: ${errorMessage}`, error);
|
|
307
|
+
}
|
|
308
|
+
}
|
|
309
|
+
/**
|
|
310
|
+
* Resolve credentials and initialize the Bedrock Runtime client over HTTP/2.
|
|
311
|
+
*/
|
|
312
|
+
async createBedrockClient() {
|
|
313
|
+
this.log("Getting AWS credentials...");
|
|
314
|
+
const credentials = await getAwsCredentials(this.credentials, this.debug);
|
|
315
|
+
if (!credentials) {
|
|
316
|
+
throw new NovaSonicError(
|
|
317
|
+
"credentials_missing" /* CREDENTIALS_MISSING */,
|
|
318
|
+
"AWS credentials are required. Please configure AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY environment variables or provide credentials in the config."
|
|
319
|
+
);
|
|
320
|
+
}
|
|
321
|
+
this.log("Credentials retrieved:", {
|
|
322
|
+
hasAccessKeyId: !!credentials.accessKeyId,
|
|
323
|
+
hasSecretAccessKey: !!credentials.secretAccessKey,
|
|
324
|
+
hasSessionToken: !!credentials.sessionToken,
|
|
325
|
+
accessKeyIdPrefix: credentials.accessKeyId ? `${credentials.accessKeyId.substring(0, 6)}...` : "missing",
|
|
326
|
+
expiration: credentials.expiration ? credentials.expiration.toISOString() : "no expiration"
|
|
327
|
+
});
|
|
328
|
+
this.log(`Initializing Bedrock Runtime client for region: ${this.region}, model: ${this.model}`);
|
|
329
|
+
const nodeHttp2Handler = new NodeHttp2Handler({
|
|
330
|
+
requestTimeout: 3e5,
|
|
331
|
+
// 5 minutes
|
|
332
|
+
sessionTimeout: 3e5,
|
|
333
|
+
// 5 minutes
|
|
334
|
+
disableConcurrentStreams: false,
|
|
335
|
+
maxConcurrentStreams: 20
|
|
336
|
+
});
|
|
337
|
+
this.client = new BedrockRuntimeClient({
|
|
338
|
+
region: this.region,
|
|
339
|
+
credentials,
|
|
340
|
+
requestHandler: nodeHttp2Handler
|
|
341
|
+
});
|
|
342
|
+
}
|
|
343
|
+
/**
|
|
344
|
+
* Build the async-iterable event queue used as the request body for the
|
|
345
|
+
* bidirectional stream. Returns the iterable and wires up internal queue
|
|
346
|
+
* helpers (_eventQueue, _signalQueue, _closeSignal) used by sendClientEvent.
|
|
347
|
+
*/
|
|
348
|
+
createEventQueue() {
|
|
349
|
+
this.log("Creating bidirectional stream command...");
|
|
350
|
+
const voiceInstance = this;
|
|
351
|
+
const eventQueue = [];
|
|
352
|
+
const pendingResolvers = [];
|
|
353
|
+
let closeSignal = false;
|
|
354
|
+
const signalQueue = () => {
|
|
355
|
+
if (pendingResolvers.length > 0) {
|
|
356
|
+
voiceInstance.log(`[AsyncIterable] Signaling queue - resolving ${pendingResolvers.length} pending Promise(s)`);
|
|
357
|
+
const resolvers = [...pendingResolvers];
|
|
358
|
+
pendingResolvers.length = 0;
|
|
359
|
+
resolvers.forEach((resolve) => resolve());
|
|
360
|
+
} else {
|
|
361
|
+
voiceInstance.log("[AsyncIterable] signalQueue called but no pending Promise");
|
|
362
|
+
}
|
|
363
|
+
};
|
|
364
|
+
const asyncIterable = {
|
|
365
|
+
[Symbol.asyncIterator]: () => {
|
|
366
|
+
voiceInstance.log("[AsyncIterable] Iterator requested");
|
|
367
|
+
return {
|
|
368
|
+
next: async () => {
|
|
369
|
+
try {
|
|
370
|
+
if (closeSignal || voiceInstance.state === "disconnected") {
|
|
371
|
+
voiceInstance.log(`[AsyncIterable] Stream closed (state: ${voiceInstance.state}), done = true`);
|
|
372
|
+
return { value: void 0, done: true };
|
|
373
|
+
}
|
|
374
|
+
if (eventQueue.length === 0) {
|
|
375
|
+
try {
|
|
376
|
+
voiceInstance.log("[AsyncIterable] Queue empty, waiting for signal...");
|
|
377
|
+
await new Promise((resolve) => {
|
|
378
|
+
pendingResolvers.push(resolve);
|
|
379
|
+
voiceInstance.log(
|
|
380
|
+
`[AsyncIterable] Promise created, waiting for signal (${pendingResolvers.length} pending)...`
|
|
381
|
+
);
|
|
382
|
+
setImmediate(() => {
|
|
383
|
+
if (eventQueue.length > 0) {
|
|
384
|
+
voiceInstance.log("[AsyncIterable] Data arrived before wait, resolving immediately");
|
|
385
|
+
const index = pendingResolvers.indexOf(resolve);
|
|
386
|
+
if (index !== -1) {
|
|
387
|
+
pendingResolvers.splice(index, 1);
|
|
388
|
+
resolve();
|
|
389
|
+
}
|
|
390
|
+
return;
|
|
391
|
+
}
|
|
392
|
+
if (closeSignal || voiceInstance.state === "disconnected") {
|
|
393
|
+
voiceInstance.log("[AsyncIterable] Closed before wait, resolving");
|
|
394
|
+
const index = pendingResolvers.indexOf(resolve);
|
|
395
|
+
if (index !== -1) {
|
|
396
|
+
pendingResolvers.splice(index, 1);
|
|
397
|
+
resolve();
|
|
398
|
+
}
|
|
399
|
+
return;
|
|
400
|
+
}
|
|
401
|
+
});
|
|
402
|
+
});
|
|
403
|
+
voiceInstance.log("[AsyncIterable] Promise resolved, checking queue...");
|
|
404
|
+
} catch (error) {
|
|
405
|
+
if (error instanceof Error && error.message === "Stream closed") {
|
|
406
|
+
voiceInstance.log("[AsyncIterable] Stream closed during wait");
|
|
407
|
+
return { value: void 0, done: true };
|
|
408
|
+
}
|
|
409
|
+
voiceInstance.log("[AsyncIterable] Error during wait:", error);
|
|
410
|
+
}
|
|
411
|
+
}
|
|
412
|
+
if (closeSignal) {
|
|
413
|
+
voiceInstance.log("[AsyncIterable] Stream closed (closeSignal)");
|
|
414
|
+
return { value: void 0, done: true };
|
|
415
|
+
}
|
|
416
|
+
if (voiceInstance.state === "disconnected") {
|
|
417
|
+
voiceInstance.log("[AsyncIterable] Stream closed (disconnected state)");
|
|
418
|
+
return { value: void 0, done: true };
|
|
419
|
+
}
|
|
420
|
+
while (eventQueue.length === 0 && !closeSignal) {
|
|
421
|
+
if (voiceInstance.state === "disconnected") {
|
|
422
|
+
voiceInstance.log("[AsyncIterable] Stream closed before wait loop");
|
|
423
|
+
return { value: void 0, done: true };
|
|
424
|
+
}
|
|
425
|
+
voiceInstance.log("[AsyncIterable] Queue still empty, waiting again...");
|
|
426
|
+
await new Promise((resolve) => {
|
|
427
|
+
pendingResolvers.push(resolve);
|
|
428
|
+
setImmediate(() => {
|
|
429
|
+
if (eventQueue.length > 0 || closeSignal || voiceInstance.state === "disconnected") {
|
|
430
|
+
const index = pendingResolvers.indexOf(resolve);
|
|
431
|
+
if (index !== -1) {
|
|
432
|
+
pendingResolvers.splice(index, 1);
|
|
433
|
+
resolve();
|
|
434
|
+
}
|
|
435
|
+
}
|
|
436
|
+
});
|
|
437
|
+
});
|
|
438
|
+
if (closeSignal || voiceInstance.state === "disconnected") {
|
|
439
|
+
voiceInstance.log("[AsyncIterable] Stream closed during wait loop");
|
|
440
|
+
return { value: void 0, done: true };
|
|
441
|
+
}
|
|
442
|
+
}
|
|
443
|
+
const nextEvent = eventQueue.shift();
|
|
444
|
+
const eventJson = JSON.stringify(nextEvent);
|
|
445
|
+
const eventBytes = Buffer.from(eventJson, "utf-8");
|
|
446
|
+
voiceInstance.log(`[AsyncIterable] Yielding event of size: ${eventBytes.length}`);
|
|
447
|
+
return {
|
|
448
|
+
value: {
|
|
449
|
+
chunk: {
|
|
450
|
+
bytes: eventBytes
|
|
451
|
+
}
|
|
452
|
+
},
|
|
453
|
+
done: false
|
|
454
|
+
};
|
|
455
|
+
} catch (error) {
|
|
456
|
+
voiceInstance.log("[AsyncIterable] Error in iterator:", error);
|
|
457
|
+
closeSignal = true;
|
|
458
|
+
return { value: void 0, done: true };
|
|
459
|
+
}
|
|
460
|
+
},
|
|
461
|
+
return: async () => {
|
|
462
|
+
voiceInstance.log("[AsyncIterable] Iterator return() called");
|
|
463
|
+
closeSignal = true;
|
|
464
|
+
signalQueue();
|
|
465
|
+
return { value: void 0, done: true };
|
|
466
|
+
},
|
|
467
|
+
throw: async (error) => {
|
|
468
|
+
voiceInstance.log("[AsyncIterable] Iterator throw() called:", error);
|
|
469
|
+
closeSignal = true;
|
|
470
|
+
signalQueue();
|
|
471
|
+
throw error;
|
|
472
|
+
}
|
|
473
|
+
};
|
|
474
|
+
}
|
|
475
|
+
};
|
|
476
|
+
this._eventQueue = eventQueue;
|
|
477
|
+
this._signalQueue = signalQueue;
|
|
478
|
+
this._closeSignal = () => {
|
|
479
|
+
closeSignal = true;
|
|
480
|
+
signalQueue();
|
|
481
|
+
};
|
|
482
|
+
return asyncIterable;
|
|
483
|
+
}
|
|
484
|
+
/**
|
|
485
|
+
* Pre-populate the event queue with the AWS Nova Sonic connection
|
|
486
|
+
* handshake events: sessionStart, promptStart, then a SYSTEM text content
|
|
487
|
+
* block carrying the configured instructions. AUDIO contentStart is NOT
|
|
488
|
+
* sent here; it is deferred to the first send() call.
|
|
489
|
+
*/
|
|
490
|
+
enqueueInitialSessionEvents() {
|
|
491
|
+
const eventQueue = this._eventQueue;
|
|
492
|
+
if (!eventQueue) {
|
|
493
|
+
throw new NovaSonicError(
|
|
494
|
+
"connection_failed" /* CONNECTION_FAILED */,
|
|
495
|
+
"Event queue must be initialized before enqueueing session events"
|
|
496
|
+
);
|
|
497
|
+
}
|
|
498
|
+
this.log("Pre-populating queue with sessionStart and promptStart events...");
|
|
499
|
+
const promptName = randomUUID();
|
|
500
|
+
this._promptName = promptName;
|
|
501
|
+
const sessionStartEvent = {};
|
|
502
|
+
if (this.sessionConfig) {
|
|
503
|
+
if (this.sessionConfig.inferenceConfiguration) {
|
|
504
|
+
sessionStartEvent.inferenceConfiguration = {
|
|
505
|
+
maxTokens: this.sessionConfig.inferenceConfiguration.maxTokens || 4096,
|
|
506
|
+
topP: this.sessionConfig.inferenceConfiguration.topP || 0.9,
|
|
507
|
+
temperature: this.sessionConfig.inferenceConfiguration.temperature || 0.7,
|
|
508
|
+
...this.sessionConfig.inferenceConfiguration.topK !== void 0 && {
|
|
509
|
+
topK: this.sessionConfig.inferenceConfiguration.topK
|
|
510
|
+
},
|
|
511
|
+
...this.sessionConfig.inferenceConfiguration.stopSequences && {
|
|
512
|
+
stopSequences: this.sessionConfig.inferenceConfiguration.stopSequences
|
|
513
|
+
}
|
|
514
|
+
};
|
|
515
|
+
} else {
|
|
516
|
+
sessionStartEvent.inferenceConfiguration = {
|
|
517
|
+
maxTokens: 4096,
|
|
518
|
+
topP: 0.9,
|
|
519
|
+
temperature: 0.7
|
|
520
|
+
};
|
|
521
|
+
}
|
|
522
|
+
if (this.sessionConfig.turnDetectionConfiguration) {
|
|
523
|
+
sessionStartEvent.turnDetectionConfiguration = {
|
|
524
|
+
...this.sessionConfig.turnDetectionConfiguration.endpointingSensitivity && {
|
|
525
|
+
endpointingSensitivity: this.sessionConfig.turnDetectionConfiguration.endpointingSensitivity
|
|
526
|
+
}
|
|
527
|
+
};
|
|
528
|
+
}
|
|
529
|
+
} else {
|
|
530
|
+
sessionStartEvent.inferenceConfiguration = {
|
|
531
|
+
maxTokens: 4096,
|
|
532
|
+
topP: 0.9,
|
|
533
|
+
temperature: 0.7
|
|
534
|
+
};
|
|
535
|
+
}
|
|
536
|
+
eventQueue.push({
|
|
537
|
+
event: {
|
|
538
|
+
sessionStart: sessionStartEvent
|
|
539
|
+
}
|
|
540
|
+
});
|
|
541
|
+
let voiceId = "matthew";
|
|
542
|
+
if (this.sessionConfig?.voice) {
|
|
543
|
+
if (typeof this.sessionConfig.voice === "string") {
|
|
544
|
+
voiceId = this.sessionConfig.voice;
|
|
545
|
+
} else if (this.sessionConfig.voice.name) {
|
|
546
|
+
voiceId = this.sessionConfig.voice.name;
|
|
547
|
+
}
|
|
548
|
+
} else if (this.speaker && this.speaker !== "default") {
|
|
549
|
+
if (typeof this.speaker === "string") {
|
|
550
|
+
voiceId = this.speaker;
|
|
551
|
+
} else {
|
|
552
|
+
const speakerObj = this.speaker;
|
|
553
|
+
if (speakerObj && typeof speakerObj === "object" && speakerObj.name) {
|
|
554
|
+
voiceId = speakerObj.name;
|
|
555
|
+
}
|
|
556
|
+
}
|
|
557
|
+
}
|
|
558
|
+
const promptStartEvent = {
|
|
559
|
+
promptName,
|
|
560
|
+
textOutputConfiguration: {
|
|
561
|
+
mediaType: "text/plain"
|
|
562
|
+
},
|
|
563
|
+
// AWS REQUIRES this - cannot be omitted
|
|
564
|
+
audioOutputConfiguration: {
|
|
565
|
+
mediaType: "audio/lpcm",
|
|
566
|
+
sampleRateHertz: 24e3,
|
|
567
|
+
sampleSizeBits: 16,
|
|
568
|
+
channelCount: 1,
|
|
569
|
+
voiceId,
|
|
570
|
+
encoding: "base64",
|
|
571
|
+
audioType: "SPEECH"
|
|
572
|
+
}
|
|
573
|
+
};
|
|
574
|
+
if (this.sessionConfig?.tools && this.sessionConfig.tools.length > 0) {
|
|
575
|
+
promptStartEvent.toolConfiguration = {
|
|
576
|
+
tools: this.sessionConfig.tools.map((tool) => {
|
|
577
|
+
let inputSchemaJson;
|
|
578
|
+
if (typeof tool.inputSchema === "string") {
|
|
579
|
+
inputSchemaJson = tool.inputSchema;
|
|
580
|
+
} else {
|
|
581
|
+
inputSchemaJson = JSON.stringify(tool.inputSchema);
|
|
582
|
+
}
|
|
583
|
+
return {
|
|
584
|
+
toolSpec: {
|
|
585
|
+
name: tool.name,
|
|
586
|
+
description: tool.description,
|
|
587
|
+
inputSchema: {
|
|
588
|
+
json: inputSchemaJson
|
|
589
|
+
}
|
|
590
|
+
}
|
|
591
|
+
};
|
|
592
|
+
}),
|
|
593
|
+
// toolChoice goes inside toolConfiguration for Nova 2 Sonic
|
|
594
|
+
...this.sessionConfig?.toolChoice && { toolChoice: this.sessionConfig.toolChoice }
|
|
595
|
+
};
|
|
596
|
+
} else if (this.sessionConfig?.toolChoice) {
|
|
597
|
+
promptStartEvent.toolConfiguration = {
|
|
598
|
+
toolChoice: this.sessionConfig.toolChoice
|
|
599
|
+
};
|
|
600
|
+
}
|
|
601
|
+
eventQueue.push({
|
|
602
|
+
event: {
|
|
603
|
+
promptStart: promptStartEvent
|
|
604
|
+
}
|
|
605
|
+
});
|
|
606
|
+
this.promptStarted = true;
|
|
607
|
+
const systemContentName = randomUUID();
|
|
608
|
+
eventQueue.push({
|
|
609
|
+
event: {
|
|
610
|
+
contentStart: {
|
|
611
|
+
promptName,
|
|
612
|
+
contentName: systemContentName,
|
|
613
|
+
type: "TEXT",
|
|
614
|
+
interactive: false,
|
|
615
|
+
role: "SYSTEM",
|
|
616
|
+
textInputConfiguration: {
|
|
617
|
+
mediaType: "text/plain"
|
|
618
|
+
}
|
|
619
|
+
}
|
|
620
|
+
}
|
|
621
|
+
});
|
|
622
|
+
eventQueue.push({
|
|
623
|
+
event: {
|
|
624
|
+
textInput: {
|
|
625
|
+
promptName,
|
|
626
|
+
contentName: systemContentName,
|
|
627
|
+
content: this.instructions || ""
|
|
628
|
+
}
|
|
629
|
+
}
|
|
630
|
+
});
|
|
631
|
+
eventQueue.push({
|
|
632
|
+
event: {
|
|
633
|
+
contentEnd: {
|
|
634
|
+
promptName,
|
|
635
|
+
contentName: systemContentName
|
|
636
|
+
}
|
|
637
|
+
}
|
|
638
|
+
});
|
|
639
|
+
this.audioContentStarted = false;
|
|
640
|
+
this.log(`Queue pre-populated with ${eventQueue.length} event(s)`);
|
|
641
|
+
}
|
|
642
|
+
/**
|
|
643
|
+
* Issue the InvokeModelWithBidirectionalStreamCommand to AWS Bedrock with
|
|
644
|
+
* a 5-second abort timeout that tears down the client on hang to avoid
|
|
645
|
+
* leaked HTTP/2 sessions. On success the response stream is stored and the
|
|
646
|
+
* voice transitions to 'connected'.
|
|
647
|
+
*/
|
|
648
|
+
async sendInitialConnectCommand(asyncIterable) {
|
|
649
|
+
if (!this.client) {
|
|
650
|
+
throw new NovaSonicError(
|
|
651
|
+
"connection_failed" /* CONNECTION_FAILED */,
|
|
652
|
+
"Bedrock client must be created before sending the initial command"
|
|
653
|
+
);
|
|
654
|
+
}
|
|
655
|
+
const command = new InvokeModelWithBidirectionalStreamCommand({
|
|
656
|
+
modelId: this.model,
|
|
657
|
+
body: asyncIterable
|
|
658
|
+
// Type assertion needed as SDK types may be strict
|
|
659
|
+
});
|
|
660
|
+
const sendStartTime = Date.now();
|
|
661
|
+
const abortController = new AbortController();
|
|
662
|
+
const timeoutId = setTimeout(() => {
|
|
663
|
+
this.log("[DEBUG] client.send() timeout after 5 seconds - aborting request");
|
|
664
|
+
abortController.abort();
|
|
665
|
+
}, 5e3);
|
|
666
|
+
let response;
|
|
667
|
+
try {
|
|
668
|
+
response = await this.client.send(command, { abortSignal: abortController.signal });
|
|
669
|
+
} catch (error) {
|
|
670
|
+
const sendDuration2 = Date.now() - sendStartTime;
|
|
671
|
+
if (abortController.signal.aborted) {
|
|
672
|
+
this.log(`[DEBUG] client.send() aborted after ${sendDuration2}ms`);
|
|
673
|
+
this._closeSignal?.();
|
|
674
|
+
this.client.destroy();
|
|
675
|
+
throw new Error("client.send() timeout");
|
|
676
|
+
}
|
|
677
|
+
this.log(`[DEBUG] client.send() error after ${sendDuration2}ms:`, error);
|
|
678
|
+
throw error;
|
|
679
|
+
} finally {
|
|
680
|
+
clearTimeout(timeoutId);
|
|
681
|
+
}
|
|
682
|
+
const sendDuration = Date.now() - sendStartTime;
|
|
683
|
+
this.log(`[DEBUG] client.send() completed in ${sendDuration}ms`);
|
|
684
|
+
this.log("Received response from AWS Bedrock");
|
|
685
|
+
this.stream = response.body;
|
|
686
|
+
this.log(
|
|
687
|
+
`[DEBUG] Response stream is async iterable: ${this.stream && typeof this.stream[Symbol.asyncIterator] === "function"}`
|
|
688
|
+
);
|
|
689
|
+
this.state = "connected";
|
|
690
|
+
this.log(`[STATE] State set to 'connected'`);
|
|
691
|
+
}
|
|
692
|
+
/**
|
|
693
|
+
* Process the bidirectional stream from AWS Bedrock
|
|
694
|
+
*/
|
|
695
|
+
async processStream() {
|
|
696
|
+
if (!this.stream) {
|
|
697
|
+
this.log("[Stream] No stream available, cannot process");
|
|
698
|
+
return;
|
|
699
|
+
}
|
|
700
|
+
if (this.processingStream) {
|
|
701
|
+
this.log("[Stream] Already processing stream, skipping");
|
|
702
|
+
return;
|
|
703
|
+
}
|
|
704
|
+
this.processingStream = true;
|
|
705
|
+
this.log("[Stream] Starting stream processing");
|
|
706
|
+
let eventCount = 0;
|
|
707
|
+
let lastEventTime = Date.now();
|
|
708
|
+
try {
|
|
709
|
+
for await (const chunk of this.stream) {
|
|
710
|
+
if (chunk.chunk) {
|
|
711
|
+
const textResponse = Buffer.from(chunk.chunk.bytes || []).toString("utf-8");
|
|
712
|
+
eventCount++;
|
|
713
|
+
const now = Date.now();
|
|
714
|
+
const timeSinceLastEvent = now - lastEventTime;
|
|
715
|
+
lastEventTime = now;
|
|
716
|
+
this.log(
|
|
717
|
+
`[Stream] Received chunk #${eventCount}, length: ${textResponse.length}, time since last: ${timeSinceLastEvent}ms`
|
|
718
|
+
);
|
|
719
|
+
try {
|
|
720
|
+
const jsonResponse = JSON.parse(textResponse);
|
|
721
|
+
this.log(`[Stream] ========================================`);
|
|
722
|
+
this.log(`[Stream] Parsed JSON response, keys: ${Object.keys(jsonResponse).join(", ")}`);
|
|
723
|
+
if (jsonResponse.event) {
|
|
724
|
+
const eventKeys = Object.keys(jsonResponse.event);
|
|
725
|
+
this.log(`[Stream] Event keys: ${eventKeys.join(", ")}`);
|
|
726
|
+
if (jsonResponse.event.contentStart) {
|
|
727
|
+
this.log(`[Stream] \u2192 Handling contentStart`);
|
|
728
|
+
this.handleServerEvent({ contentStart: jsonResponse.event.contentStart });
|
|
729
|
+
} else if (jsonResponse.event.textOutput) {
|
|
730
|
+
this.log(
|
|
731
|
+
`[Stream] \u2192 Handling textOutput, content length: ${jsonResponse.event.textOutput?.content?.length ?? 0}`
|
|
732
|
+
);
|
|
733
|
+
this.handleServerEvent({ textOutput: jsonResponse.event.textOutput });
|
|
734
|
+
} else if (jsonResponse.event.audioOutput) {
|
|
735
|
+
this.handleServerEvent({ audioOutput: jsonResponse.event.audioOutput });
|
|
736
|
+
} else if (jsonResponse.event.toolUse) {
|
|
737
|
+
this.handleServerEvent({ toolUse: jsonResponse.event.toolUse });
|
|
738
|
+
} else if (jsonResponse.event.contentEnd && jsonResponse.event.contentEnd.type === "TOOL") {
|
|
739
|
+
this.handleServerEvent({ contentEnd: jsonResponse.event.contentEnd });
|
|
740
|
+
} else if (jsonResponse.event.contentEnd) {
|
|
741
|
+
this.log(
|
|
742
|
+
`[Stream] Found contentEnd, type: ${jsonResponse.event.contentEnd.type}, stopReason: ${jsonResponse.event.contentEnd.stopReason}`
|
|
743
|
+
);
|
|
744
|
+
this.handleServerEvent({ contentEnd: jsonResponse.event.contentEnd });
|
|
745
|
+
} else if (jsonResponse.event.completionStart) {
|
|
746
|
+
this.log(
|
|
747
|
+
"[Stream] Found completionStart inside event object:",
|
|
748
|
+
JSON.stringify(jsonResponse.event.completionStart, null, 2)
|
|
749
|
+
);
|
|
750
|
+
this.emit("completionStart", jsonResponse.event.completionStart);
|
|
751
|
+
} else if (jsonResponse.event.completionEnd) {
|
|
752
|
+
this.log(
|
|
753
|
+
"[Stream] Found completionEnd inside event object:",
|
|
754
|
+
JSON.stringify(jsonResponse.event.completionEnd, null, 2)
|
|
755
|
+
);
|
|
756
|
+
this.handleServerEvent({ completionEnd: jsonResponse.event.completionEnd });
|
|
757
|
+
} else {
|
|
758
|
+
const eventKeys2 = Object.keys(jsonResponse.event || {});
|
|
759
|
+
this.log(`[Stream] Event keys for other events: ${eventKeys2.join(", ")}`);
|
|
760
|
+
if (eventKeys2.length > 0) {
|
|
761
|
+
if (eventKeys2.includes("completionEnd")) {
|
|
762
|
+
this.log("[Stream] Found completionEnd in other events, handling explicitly");
|
|
763
|
+
this.handleServerEvent({ completionEnd: jsonResponse.event.completionEnd });
|
|
764
|
+
} else {
|
|
765
|
+
const eventKey = eventKeys2[0];
|
|
766
|
+
this.log(`[Stream] Dispatching other event: ${eventKey}`);
|
|
767
|
+
const eventValue = jsonResponse.event[eventKey];
|
|
768
|
+
if (eventValue !== void 0) {
|
|
769
|
+
if (eventKey === "completionEnd") {
|
|
770
|
+
this.handleServerEvent({ completionEnd: eventValue });
|
|
771
|
+
} else {
|
|
772
|
+
this.handleServerEvent({ [eventKey]: eventValue });
|
|
773
|
+
}
|
|
774
|
+
}
|
|
775
|
+
}
|
|
776
|
+
} else if (Object.keys(jsonResponse).length > 0) {
|
|
777
|
+
this.log(`[Stream] Unknown event structure, keys:`, Object.keys(jsonResponse).join(", "));
|
|
778
|
+
}
|
|
779
|
+
}
|
|
780
|
+
} else {
|
|
781
|
+
if (this.debug) {
|
|
782
|
+
this.log(
|
|
783
|
+
'[Stream] Received event without "event" wrapper, keys:',
|
|
784
|
+
Object.keys(jsonResponse).join(", ")
|
|
785
|
+
);
|
|
786
|
+
}
|
|
787
|
+
if (jsonResponse.usageEvent) {
|
|
788
|
+
this.emit("usage", {
|
|
789
|
+
inputTokens: jsonResponse.usageEvent.totalInputTokens || 0,
|
|
790
|
+
outputTokens: jsonResponse.usageEvent.totalOutputTokens || 0,
|
|
791
|
+
totalTokens: jsonResponse.usageEvent.totalTokens || 0
|
|
792
|
+
});
|
|
793
|
+
}
|
|
794
|
+
if (jsonResponse.completionEnd) {
|
|
795
|
+
this.log(
|
|
796
|
+
"[Stream] Found completionEnd at top level:",
|
|
797
|
+
JSON.stringify(jsonResponse.completionEnd, null, 2)
|
|
798
|
+
);
|
|
799
|
+
this.handleServerEvent({ completionEnd: jsonResponse.completionEnd });
|
|
800
|
+
}
|
|
801
|
+
if (!jsonResponse.event && !jsonResponse.completionEnd && !jsonResponse.usageEvent) {
|
|
802
|
+
this.log(
|
|
803
|
+
"[Stream] Received response without event wrapper, keys:",
|
|
804
|
+
Object.keys(jsonResponse).join(", ")
|
|
805
|
+
);
|
|
806
|
+
}
|
|
807
|
+
if (jsonResponse.completionStart || jsonResponse.event?.completionStart) {
|
|
808
|
+
const completionStart = jsonResponse.completionStart || jsonResponse.event.completionStart;
|
|
809
|
+
this.log("[Stream] Found completionStart:", JSON.stringify(completionStart, null, 2));
|
|
810
|
+
this.emit("completionStart", completionStart);
|
|
811
|
+
}
|
|
812
|
+
}
|
|
813
|
+
} catch (parseError) {
|
|
814
|
+
this.log("[Stream] Failed to parse JSON response:", textResponse.substring(0, 200));
|
|
815
|
+
this.emit("error", {
|
|
816
|
+
message: "Failed to parse stream response",
|
|
817
|
+
code: "PARSE_ERROR",
|
|
818
|
+
details: parseError
|
|
819
|
+
});
|
|
820
|
+
}
|
|
821
|
+
} else if (chunk.internalServerException) {
|
|
822
|
+
this.emit("error", {
|
|
823
|
+
message: "Internal server error",
|
|
824
|
+
code: "INTERNAL_SERVER_ERROR",
|
|
825
|
+
details: chunk.internalServerException
|
|
826
|
+
});
|
|
827
|
+
} else if (chunk.modelStreamErrorException) {
|
|
828
|
+
this.emit("error", {
|
|
829
|
+
message: "Model stream error",
|
|
830
|
+
code: "MODEL_STREAM_ERROR",
|
|
831
|
+
details: chunk.modelStreamErrorException
|
|
832
|
+
});
|
|
833
|
+
} else if (chunk.modelTimeoutException) {
|
|
834
|
+
this.emit("error", {
|
|
835
|
+
message: "Model timeout",
|
|
836
|
+
code: "MODEL_TIMEOUT",
|
|
837
|
+
details: chunk.modelTimeoutException
|
|
838
|
+
});
|
|
839
|
+
} else if (chunk.serviceUnavailableException) {
|
|
840
|
+
this.emit("error", {
|
|
841
|
+
message: "Service unavailable",
|
|
842
|
+
code: "SERVICE_UNAVAILABLE",
|
|
843
|
+
details: chunk.serviceUnavailableException
|
|
844
|
+
});
|
|
845
|
+
} else if (chunk.throttlingException) {
|
|
846
|
+
this.emit("error", {
|
|
847
|
+
message: "Request throttled",
|
|
848
|
+
code: "THROTTLING",
|
|
849
|
+
details: chunk.throttlingException
|
|
850
|
+
});
|
|
851
|
+
} else if (chunk.validationException) {
|
|
852
|
+
this.emit("error", {
|
|
853
|
+
message: "Validation error",
|
|
854
|
+
code: "VALIDATION_ERROR",
|
|
855
|
+
details: chunk.validationException
|
|
856
|
+
});
|
|
857
|
+
}
|
|
858
|
+
}
|
|
859
|
+
} catch (streamError) {
|
|
860
|
+
this.log("[Stream] Error in processStream:", streamError);
|
|
861
|
+
this.emit("error", {
|
|
862
|
+
message: "Stream processing error",
|
|
863
|
+
code: "STREAM_ERROR",
|
|
864
|
+
details: streamError instanceof Error ? streamError.message : String(streamError)
|
|
865
|
+
});
|
|
866
|
+
} finally {
|
|
867
|
+
this.processingStream = false;
|
|
868
|
+
this.log(
|
|
869
|
+
`[Stream] processStream finished, processingStream set to false. Total events received: ${eventCount || 0}`
|
|
870
|
+
);
|
|
871
|
+
this.log(`[Stream] Stream state: state=${this.state}, stream exists=${!!this.stream}`);
|
|
872
|
+
if (!this.turnCompleted && this.audioContentStarted) {
|
|
873
|
+
this.log("[Stream] Stream ended but turn not completed - signaling turn completion as fallback");
|
|
874
|
+
this.log(
|
|
875
|
+
`[Stream] State: turnCompleted=${this.turnCompleted}, audioContentStarted=${this.audioContentStarted}, hasSentContentEnd=${this.hasSentContentEnd}`
|
|
876
|
+
);
|
|
877
|
+
this.turnCompleted = true;
|
|
878
|
+
this.emit("turnComplete", { timestamp: Date.now() });
|
|
879
|
+
if (this.currentResponseId) {
|
|
880
|
+
const stream = this.speakerStreams.get(this.currentResponseId);
|
|
881
|
+
if (stream) {
|
|
882
|
+
stream.end();
|
|
883
|
+
}
|
|
884
|
+
this.speakerStreams.delete(this.currentResponseId);
|
|
885
|
+
this.currentResponseId = void 0;
|
|
886
|
+
}
|
|
887
|
+
this.hasSentContentEnd = false;
|
|
888
|
+
this.log("[Stream] Turn completion signaled, ready for next turn");
|
|
889
|
+
} else if (this.turnCompleted) {
|
|
890
|
+
this.log("[Stream] Stream ended and turn was already completed");
|
|
891
|
+
} else {
|
|
892
|
+
this.log(
|
|
893
|
+
`[Stream] Stream ended but turn not completed - audioContentStarted=${this.audioContentStarted}, turnCompleted=${this.turnCompleted}`
|
|
894
|
+
);
|
|
895
|
+
}
|
|
896
|
+
if (this.stream && this.state === "connected" && !this.processingStream && !this.streamRestartAttempted) {
|
|
897
|
+
this.log("[Stream] Stream still open but processing stopped - will restart stream processing");
|
|
898
|
+
this.streamRestartAttempted = true;
|
|
899
|
+
setImmediate(() => {
|
|
900
|
+
if (this.stream && this.state === "connected" && !this.processingStream) {
|
|
901
|
+
this.log("[Stream] Restarting stream processing for subsequent turns");
|
|
902
|
+
this.processStream().catch((error) => {
|
|
903
|
+
this.log("[Stream] Error restarting stream processing:", error);
|
|
904
|
+
this.streamRestartAttempted = false;
|
|
905
|
+
});
|
|
906
|
+
} else {
|
|
907
|
+
this.streamRestartAttempted = false;
|
|
908
|
+
}
|
|
909
|
+
});
|
|
910
|
+
} else {
|
|
911
|
+
if (this.streamRestartAttempted) {
|
|
912
|
+
this.log("[Stream] Stream restart already attempted, skipping");
|
|
913
|
+
}
|
|
914
|
+
}
|
|
915
|
+
}
|
|
916
|
+
}
|
|
917
|
+
/**
|
|
918
|
+
* Handle server events from AWS Bedrock
|
|
919
|
+
*/
|
|
920
|
+
handleServerEvent(event) {
|
|
921
|
+
if (this.debug) {
|
|
922
|
+
this.log("Received event, keys:", Object.keys(event).join(", "));
|
|
923
|
+
}
|
|
924
|
+
if (event.contentStart) {
|
|
925
|
+
this.handleContentStart(event.contentStart);
|
|
926
|
+
}
|
|
927
|
+
if (event.textOutput) {
|
|
928
|
+
this.handleTextOutput(event.textOutput);
|
|
929
|
+
}
|
|
930
|
+
if (event.audioOutput?.content) {
|
|
931
|
+
this.handleAudioOutput(event.audioOutput);
|
|
932
|
+
}
|
|
933
|
+
if (event.toolUse) {
|
|
934
|
+
this.handleToolUse(event.toolUse);
|
|
935
|
+
}
|
|
936
|
+
if (event.contentEnd) {
|
|
937
|
+
this.handleContentEnd(event.contentEnd);
|
|
938
|
+
}
|
|
939
|
+
if (event.completionEnd) {
|
|
940
|
+
this.handleCompletionEnd(event.completionEnd);
|
|
941
|
+
}
|
|
942
|
+
if (event.error) {
|
|
943
|
+
this.emit("error", {
|
|
944
|
+
message: event.error.message || "Unknown error",
|
|
945
|
+
code: event.error.code || "UNKNOWN_ERROR",
|
|
946
|
+
details: event.error
|
|
947
|
+
});
|
|
948
|
+
}
|
|
949
|
+
}
|
|
950
|
+
/**
|
|
951
|
+
* Handle a contentStart event. Tracks generationStage for text content
|
|
952
|
+
* blocks so the corresponding 'writing' events can be tagged
|
|
953
|
+
* SPECULATIVE/FINAL for the client.
|
|
954
|
+
*/
|
|
955
|
+
handleContentStart(contentStart) {
|
|
956
|
+
const role = contentStart.role?.toLowerCase();
|
|
957
|
+
const contentType = contentStart.type;
|
|
958
|
+
this.log(`[Event] contentStart: type=${contentType || "unknown"}, role=${role}`);
|
|
959
|
+
this.emit("contentStart", contentStart);
|
|
960
|
+
if (contentType === "TEXT" && contentStart.additionalModelFields) {
|
|
961
|
+
try {
|
|
962
|
+
const additionalFields = JSON.parse(contentStart.additionalModelFields);
|
|
963
|
+
this.currentTextGenerationStage = additionalFields.generationStage;
|
|
964
|
+
this.log(`[Event] Text content generationStage: ${this.currentTextGenerationStage}`);
|
|
965
|
+
} catch {
|
|
966
|
+
this.currentTextGenerationStage = void 0;
|
|
967
|
+
}
|
|
968
|
+
} else if (contentType === "TEXT") {
|
|
969
|
+
this.currentTextGenerationStage = void 0;
|
|
970
|
+
}
|
|
971
|
+
}
|
|
972
|
+
/**
|
|
973
|
+
* Handle a textOutput event. Detects interruption (barge-in) markers in
|
|
974
|
+
* the payload, otherwise emits a 'writing' event with the text and
|
|
975
|
+
* current generationStage.
|
|
976
|
+
*/
|
|
977
|
+
handleTextOutput(textOutput) {
|
|
978
|
+
const text = textOutput.content || "";
|
|
979
|
+
const role = textOutput.role?.toLowerCase() || "assistant";
|
|
980
|
+
this.log(`[Event] textOutput received: role=${role}, text length=${text.length}`);
|
|
981
|
+
let isInterrupted = false;
|
|
982
|
+
try {
|
|
983
|
+
const parsed = JSON.parse(text);
|
|
984
|
+
if (parsed && parsed.interrupted === true) {
|
|
985
|
+
isInterrupted = true;
|
|
986
|
+
}
|
|
987
|
+
} catch {
|
|
988
|
+
if (/interrupted/i.test(text)) {
|
|
989
|
+
isInterrupted = true;
|
|
990
|
+
}
|
|
991
|
+
}
|
|
992
|
+
if (isInterrupted) {
|
|
993
|
+
this.log(`[Event] Interrupt detected, emitting interrupt event`);
|
|
994
|
+
this.emit("interrupt", { type: "user", timestamp: Date.now() });
|
|
995
|
+
return;
|
|
996
|
+
}
|
|
997
|
+
const generationStage = this.currentTextGenerationStage;
|
|
998
|
+
this.log(`[Event] Emitting 'writing': role=${role}, generationStage=${generationStage}, length=${text.length}`);
|
|
999
|
+
this.emit("writing", { text, role, generationStage });
|
|
1000
|
+
}
|
|
1001
|
+
/**
|
|
1002
|
+
* Handle an audioOutput event. Decodes the base64 LPCM payload, emits
|
|
1003
|
+
* 'speaking' with both the base64 string and an Int16Array view, and
|
|
1004
|
+
* forwards bytes to any active speaker stream.
|
|
1005
|
+
*/
|
|
1006
|
+
handleAudioOutput(audioOutput) {
|
|
1007
|
+
try {
|
|
1008
|
+
const content = audioOutput.content;
|
|
1009
|
+
const audioBytes = Buffer.from(content, "base64");
|
|
1010
|
+
this.log(`[Event] Audio output: ${audioBytes.length} bytes`);
|
|
1011
|
+
this.isReceivingAssistantAudio = true;
|
|
1012
|
+
const audioData = new Int16Array(audioBytes.buffer, audioBytes.byteOffset, audioBytes.byteLength / 2);
|
|
1013
|
+
this.emit("speaking", {
|
|
1014
|
+
audio: content,
|
|
1015
|
+
audioData,
|
|
1016
|
+
response_id: this.currentResponseId
|
|
1017
|
+
});
|
|
1018
|
+
if (this.currentResponseId) {
|
|
1019
|
+
const stream = this.speakerStreams.get(this.currentResponseId);
|
|
1020
|
+
if (stream) {
|
|
1021
|
+
stream.write(audioBytes);
|
|
1022
|
+
}
|
|
1023
|
+
}
|
|
1024
|
+
} catch (error) {
|
|
1025
|
+
this.log("[Event] Error decoding audio:", error);
|
|
1026
|
+
this.emit("error", {
|
|
1027
|
+
message: "Failed to decode audio",
|
|
1028
|
+
code: "AUDIO_DECODE_ERROR",
|
|
1029
|
+
details: error
|
|
1030
|
+
});
|
|
1031
|
+
}
|
|
1032
|
+
}
|
|
1033
|
+
/**
|
|
1034
|
+
* Handle a toolUse event. Emits 'toolCall' and dispatches to the
|
|
1035
|
+
* configured tool's execute() function via handleToolCall().
|
|
1036
|
+
*/
|
|
1037
|
+
handleToolUse(toolUse) {
|
|
1038
|
+
const toolUseId = toolUse.toolUseId || "";
|
|
1039
|
+
const toolName = toolUse.toolName || "";
|
|
1040
|
+
const toolInput = toolUse.input || {};
|
|
1041
|
+
this.emit("toolCall", {
|
|
1042
|
+
name: toolName,
|
|
1043
|
+
args: toolInput,
|
|
1044
|
+
id: toolUseId
|
|
1045
|
+
});
|
|
1046
|
+
if (this.tools && toolName in this.tools) {
|
|
1047
|
+
void this.handleToolCall(toolName, toolInput, toolUseId);
|
|
1048
|
+
}
|
|
1049
|
+
}
|
|
1050
|
+
/**
|
|
1051
|
+
* Handle a contentEnd event. Forwards it to clients, then routes by
|
|
1052
|
+
* stopReason / type:
|
|
1053
|
+
* - INTERRUPTED: emit 'interrupt' and tear down the active speaker stream
|
|
1054
|
+
* - TOOL: end the active speaker stream
|
|
1055
|
+
* - AUDIO with END_TURN: signal turnComplete (assistant audio finished)
|
|
1056
|
+
* - AUDIO with PARTIAL_TURN while receiving assistant audio: schedule
|
|
1057
|
+
* fallback turnComplete in case completionEnd never arrives
|
|
1058
|
+
* - AUDIO otherwise: user input ended, reset turn flags
|
|
1059
|
+
*/
|
|
1060
|
+
handleContentEnd(contentEnd) {
|
|
1061
|
+
this.log(`[Event] contentEnd received: type=${contentEnd.type}, stopReason=${contentEnd.stopReason}`);
|
|
1062
|
+
this.emit("contentEnd", contentEnd);
|
|
1063
|
+
if (contentEnd.stopReason === "INTERRUPTED") {
|
|
1064
|
+
this.log("[Event] Content interrupted by user (barge-in)");
|
|
1065
|
+
this.emit("interrupt", { type: "user", timestamp: Date.now() });
|
|
1066
|
+
if (this.currentResponseId) {
|
|
1067
|
+
const stream = this.speakerStreams.get(this.currentResponseId);
|
|
1068
|
+
if (stream) {
|
|
1069
|
+
stream.destroy();
|
|
1070
|
+
}
|
|
1071
|
+
this.speakerStreams.delete(this.currentResponseId);
|
|
1072
|
+
}
|
|
1073
|
+
this.currentResponseId = void 0;
|
|
1074
|
+
this.log("[Event] After interruption, keeping audioContentStarted=true for continued streaming");
|
|
1075
|
+
} else if (contentEnd.type === "TOOL" && this.currentResponseId) {
|
|
1076
|
+
const stream = this.speakerStreams.get(this.currentResponseId);
|
|
1077
|
+
if (stream) {
|
|
1078
|
+
stream.end();
|
|
1079
|
+
}
|
|
1080
|
+
} else if (contentEnd.type === "AUDIO") {
|
|
1081
|
+
if (contentEnd.stopReason === "END_TURN") {
|
|
1082
|
+
this.log(`[Event] contentEnd (AUDIO) with stopReason END_TURN - signaling turn complete`);
|
|
1083
|
+
if (this.currentResponseId) {
|
|
1084
|
+
const stream = this.speakerStreams.get(this.currentResponseId);
|
|
1085
|
+
if (stream) {
|
|
1086
|
+
stream.end();
|
|
1087
|
+
}
|
|
1088
|
+
this.speakerStreams.delete(this.currentResponseId);
|
|
1089
|
+
this.currentResponseId = void 0;
|
|
1090
|
+
}
|
|
1091
|
+
if (!this.turnCompleted) {
|
|
1092
|
+
this.turnCompleted = true;
|
|
1093
|
+
this.emit("turnComplete", { timestamp: Date.now() });
|
|
1094
|
+
this.hasSentContentEnd = false;
|
|
1095
|
+
this.log(
|
|
1096
|
+
`[Event] Turn complete (from contentEnd AUDIO with END_TURN), ready for next turn. audioContentStarted: ${this.audioContentStarted}, audioContentName: ${this.audioContentName}`
|
|
1097
|
+
);
|
|
1098
|
+
} else {
|
|
1099
|
+
this.log(
|
|
1100
|
+
`[Event] contentEnd (AUDIO) with END_TURN received but turn already completed - skipping duplicate turnComplete emission`
|
|
1101
|
+
);
|
|
1102
|
+
}
|
|
1103
|
+
if (!this.turnCompleteTimeout) {
|
|
1104
|
+
this.turnCompleteTimeout = setTimeout(() => {
|
|
1105
|
+
this.log(`[Event] Timeout: completionEnd not received, but turn already completed from contentEnd`);
|
|
1106
|
+
this.turnCompleteTimeout = void 0;
|
|
1107
|
+
}, 1e3);
|
|
1108
|
+
}
|
|
1109
|
+
} else {
|
|
1110
|
+
if (this.isReceivingAssistantAudio && contentEnd.stopReason === "PARTIAL_TURN") {
|
|
1111
|
+
this.isReceivingAssistantAudio = false;
|
|
1112
|
+
if (!this.turnCompleteTimeout && !this.turnCompleted) {
|
|
1113
|
+
this.log(
|
|
1114
|
+
`[Event] contentEnd (AUDIO) with PARTIAL_TURN for assistant output - waiting for completionEnd, setting fallback timeout`
|
|
1115
|
+
);
|
|
1116
|
+
this.turnCompleteTimeout = setTimeout(() => {
|
|
1117
|
+
if (!this.turnCompleted) {
|
|
1118
|
+
this.log(
|
|
1119
|
+
`[Event] Fallback: completionEnd not received after contentEnd (AUDIO) with PARTIAL_TURN, signaling turn complete`
|
|
1120
|
+
);
|
|
1121
|
+
this.turnCompleted = true;
|
|
1122
|
+
this.emit("turnComplete", { timestamp: Date.now() });
|
|
1123
|
+
if (this.currentResponseId) {
|
|
1124
|
+
const stream = this.speakerStreams.get(this.currentResponseId);
|
|
1125
|
+
if (stream) {
|
|
1126
|
+
stream.end();
|
|
1127
|
+
}
|
|
1128
|
+
this.speakerStreams.delete(this.currentResponseId);
|
|
1129
|
+
this.currentResponseId = void 0;
|
|
1130
|
+
}
|
|
1131
|
+
this.hasSentContentEnd = false;
|
|
1132
|
+
this.turnCompleteTimeout = void 0;
|
|
1133
|
+
}
|
|
1134
|
+
}, 2e3);
|
|
1135
|
+
}
|
|
1136
|
+
} else {
|
|
1137
|
+
this.hasSentContentEnd = false;
|
|
1138
|
+
this.turnCompleted = false;
|
|
1139
|
+
this.log(
|
|
1140
|
+
`[Event] contentEnd (AUDIO) - user input ended, stopReason: ${contentEnd.stopReason}. Keeping audioContentStarted=true for next turn. Reset hasSentContentEnd=false, turnCompleted=false.`
|
|
1141
|
+
);
|
|
1142
|
+
}
|
|
1143
|
+
}
|
|
1144
|
+
} else if (contentEnd.type === "TEXT") {
|
|
1145
|
+
this.currentTextGenerationStage = void 0;
|
|
1146
|
+
this.log(
|
|
1147
|
+
`[Event] contentEnd (TEXT) received, stopReason: ${contentEnd.stopReason}. Turn completion handled by completionEnd/contentEnd(AUDIO).`
|
|
1148
|
+
);
|
|
1149
|
+
if (contentEnd.stopReason === "END_TURN") {
|
|
1150
|
+
this.hasSentContentEnd = false;
|
|
1151
|
+
}
|
|
1152
|
+
}
|
|
1153
|
+
}
|
|
1154
|
+
/**
|
|
1155
|
+
* Handle a completionEnd event. AWS uses this as the definitive signal
|
|
1156
|
+
* that a turn (and all audio output) has finished. Tears down the active
|
|
1157
|
+
* speaker stream, clears any fallback timer, emits 'turnComplete' once,
|
|
1158
|
+
* and forwards token usage if reported.
|
|
1159
|
+
*/
|
|
1160
|
+
handleCompletionEnd(completionEnd) {
|
|
1161
|
+
this.log(`[Event] completionEnd received, stopReason: ${completionEnd.stopReason}`);
|
|
1162
|
+
if (this.turnCompleteTimeout) {
|
|
1163
|
+
clearTimeout(this.turnCompleteTimeout);
|
|
1164
|
+
this.turnCompleteTimeout = void 0;
|
|
1165
|
+
}
|
|
1166
|
+
if (this.currentResponseId) {
|
|
1167
|
+
const stream = this.speakerStreams.get(this.currentResponseId);
|
|
1168
|
+
if (stream) {
|
|
1169
|
+
stream.end();
|
|
1170
|
+
}
|
|
1171
|
+
this.speakerStreams.delete(this.currentResponseId);
|
|
1172
|
+
this.currentResponseId = void 0;
|
|
1173
|
+
}
|
|
1174
|
+
this.isReceivingAssistantAudio = false;
|
|
1175
|
+
if (!this.turnCompleted) {
|
|
1176
|
+
this.log(
|
|
1177
|
+
`[Event] completionEnd - signaling turn complete (stopReason: ${completionEnd.stopReason || "undefined"})`
|
|
1178
|
+
);
|
|
1179
|
+
this.turnCompleted = true;
|
|
1180
|
+
this.emit("turnComplete", { timestamp: Date.now() });
|
|
1181
|
+
this.hasSentContentEnd = false;
|
|
1182
|
+
} else {
|
|
1183
|
+
this.log(`[Event] completionEnd received but turn already completed - skipping duplicate turnComplete emission`);
|
|
1184
|
+
}
|
|
1185
|
+
if (completionEnd.usage) {
|
|
1186
|
+
this.emit("usage", {
|
|
1187
|
+
inputTokens: completionEnd.usage.inputTokens || 0,
|
|
1188
|
+
outputTokens: completionEnd.usage.outputTokens || 0,
|
|
1189
|
+
totalTokens: (completionEnd.usage.inputTokens || 0) + (completionEnd.usage.outputTokens || 0)
|
|
1190
|
+
});
|
|
1191
|
+
}
|
|
1192
|
+
}
|
|
1193
|
+
/**
|
|
1194
|
+
* Handle tool execution
|
|
1195
|
+
*/
|
|
1196
|
+
async handleToolCall(toolName, args, toolUseId) {
|
|
1197
|
+
const tool = this.tools?.[toolName];
|
|
1198
|
+
if (!tool || !tool.execute) {
|
|
1199
|
+
this.emit("error", {
|
|
1200
|
+
message: `Tool ${toolName} not found or has no execute function`,
|
|
1201
|
+
code: "TOOL_NOT_FOUND"
|
|
1202
|
+
});
|
|
1203
|
+
return;
|
|
1204
|
+
}
|
|
1205
|
+
try {
|
|
1206
|
+
const result = await tool.execute(
|
|
1207
|
+
{ context: args, requestContext: this.requestContext },
|
|
1208
|
+
{
|
|
1209
|
+
toolCallId: toolUseId,
|
|
1210
|
+
messages: []
|
|
1211
|
+
}
|
|
1212
|
+
);
|
|
1213
|
+
await this.sendClientEvent({
|
|
1214
|
+
toolResult: {
|
|
1215
|
+
toolUseId,
|
|
1216
|
+
content: [
|
|
1217
|
+
{
|
|
1218
|
+
json: typeof result === "object" ? result : { result }
|
|
1219
|
+
}
|
|
1220
|
+
]
|
|
1221
|
+
}
|
|
1222
|
+
});
|
|
1223
|
+
} catch (error) {
|
|
1224
|
+
this.emit("error", {
|
|
1225
|
+
message: `Error executing tool ${toolName}: ${error instanceof Error ? error.message : "Unknown error"}`,
|
|
1226
|
+
code: "TOOL_EXECUTION_ERROR",
|
|
1227
|
+
details: error
|
|
1228
|
+
});
|
|
1229
|
+
await this.sendClientEvent({
|
|
1230
|
+
toolResult: {
|
|
1231
|
+
toolUseId,
|
|
1232
|
+
content: [
|
|
1233
|
+
{
|
|
1234
|
+
text: `Error: ${error instanceof Error ? error.message : "Unknown error"}`
|
|
1235
|
+
}
|
|
1236
|
+
]
|
|
1237
|
+
}
|
|
1238
|
+
});
|
|
1239
|
+
}
|
|
1240
|
+
}
|
|
1241
|
+
/**
|
|
1242
|
+
* Send a client event to AWS Bedrock
|
|
1243
|
+
* Events are sent through the input stream that was passed to the bidirectional stream command
|
|
1244
|
+
*/
|
|
1245
|
+
async sendClientEvent(event) {
|
|
1246
|
+
if (this.state !== "connected") {
|
|
1247
|
+
throw new NovaSonicError("not_connected" /* NOT_CONNECTED */, "Not connected to AWS Bedrock. Call connect() first.");
|
|
1248
|
+
}
|
|
1249
|
+
try {
|
|
1250
|
+
const eventQueue = this._eventQueue;
|
|
1251
|
+
const signalQueue = this._signalQueue;
|
|
1252
|
+
if (!eventQueue || !signalQueue) {
|
|
1253
|
+
throw new NovaSonicError(
|
|
1254
|
+
"not_connected" /* NOT_CONNECTED */,
|
|
1255
|
+
"Event queue not initialized. Connection may not be fully established."
|
|
1256
|
+
);
|
|
1257
|
+
}
|
|
1258
|
+
this.log(`[sendClientEvent] Adding event to queue (queue size: ${eventQueue.length})`);
|
|
1259
|
+
eventQueue.push({ event });
|
|
1260
|
+
this.log(`[sendClientEvent] Event added, queue size now: ${eventQueue.length}, signaling...`);
|
|
1261
|
+
signalQueue();
|
|
1262
|
+
this.log(`[sendClientEvent] Signal sent`);
|
|
1263
|
+
if (this.debug) {
|
|
1264
|
+
this.log("Sent client event, keys:", Object.keys(event).join(", "));
|
|
1265
|
+
}
|
|
1266
|
+
} catch (error) {
|
|
1267
|
+
throw new NovaSonicError(
|
|
1268
|
+
"websocket_error" /* WEBSOCKET_ERROR */,
|
|
1269
|
+
`Failed to send client event: ${error instanceof Error ? error.message : "Unknown error"}`,
|
|
1270
|
+
error
|
|
1271
|
+
);
|
|
1272
|
+
}
|
|
1273
|
+
}
|
|
1274
|
+
/**
|
|
1275
|
+
* Disconnects from the AWS Bedrock session and cleans up resources.
|
|
1276
|
+
*
|
|
1277
|
+
* Pushes a `sessionEnd` event to the queue before signalling close,
|
|
1278
|
+
* then schedules client destruction on the next tick so the async
|
|
1279
|
+
* iterator has a chance to yield the event to the SDK.
|
|
1280
|
+
*/
|
|
1281
|
+
close() {
|
|
1282
|
+
if (this.state === "disconnected") {
|
|
1283
|
+
return;
|
|
1284
|
+
}
|
|
1285
|
+
this.state = "disconnected";
|
|
1286
|
+
this.processingStream = false;
|
|
1287
|
+
if (this.turnCompleteTimeout) {
|
|
1288
|
+
clearTimeout(this.turnCompleteTimeout);
|
|
1289
|
+
this.turnCompleteTimeout = void 0;
|
|
1290
|
+
}
|
|
1291
|
+
const eventQueue = this._eventQueue;
|
|
1292
|
+
const signalQueue = this._signalQueue;
|
|
1293
|
+
if (eventQueue && signalQueue) {
|
|
1294
|
+
eventQueue.push({ event: { sessionEnd: {} } });
|
|
1295
|
+
signalQueue();
|
|
1296
|
+
}
|
|
1297
|
+
const closeSignal = this._closeSignal;
|
|
1298
|
+
if (closeSignal) {
|
|
1299
|
+
closeSignal();
|
|
1300
|
+
}
|
|
1301
|
+
if (this.inputStream) {
|
|
1302
|
+
this.inputStream.end();
|
|
1303
|
+
this.inputStream = void 0;
|
|
1304
|
+
}
|
|
1305
|
+
for (const stream of this.speakerStreams.values()) {
|
|
1306
|
+
stream.end();
|
|
1307
|
+
}
|
|
1308
|
+
this.speakerStreams.clear();
|
|
1309
|
+
const client = this.client;
|
|
1310
|
+
this.client = void 0;
|
|
1311
|
+
this.stream = void 0;
|
|
1312
|
+
if (client) {
|
|
1313
|
+
setImmediate(() => {
|
|
1314
|
+
if (typeof client.destroy === "function") {
|
|
1315
|
+
client.destroy();
|
|
1316
|
+
}
|
|
1317
|
+
});
|
|
1318
|
+
}
|
|
1319
|
+
this.log("Disconnected from AWS Bedrock Nova 2 Sonic");
|
|
1320
|
+
}
|
|
1321
|
+
/**
|
|
1322
|
+
* Equips the voice instance with a set of instructions.
|
|
1323
|
+
*/
|
|
1324
|
+
addInstructions(instructions) {
|
|
1325
|
+
this.instructions = instructions;
|
|
1326
|
+
}
|
|
1327
|
+
/**
|
|
1328
|
+
* Equips the voice instance with a set of tools.
|
|
1329
|
+
*/
|
|
1330
|
+
addTools(tools) {
|
|
1331
|
+
this.tools = tools || {};
|
|
1332
|
+
}
|
|
1333
|
+
/**
|
|
1334
|
+
* Convert text to speech
|
|
1335
|
+
*/
|
|
1336
|
+
async speak(input, _options) {
|
|
1337
|
+
if (this.state !== "connected") {
|
|
1338
|
+
throw new NovaSonicError("not_connected" /* NOT_CONNECTED */, "Not connected. Call connect() first.");
|
|
1339
|
+
}
|
|
1340
|
+
let text = "";
|
|
1341
|
+
if (typeof input !== "string") {
|
|
1342
|
+
const chunks = [];
|
|
1343
|
+
for await (const chunk of input) {
|
|
1344
|
+
chunks.push(Buffer.isBuffer(chunk) ? chunk : Buffer.from(String(chunk)));
|
|
1345
|
+
}
|
|
1346
|
+
text = Buffer.concat(chunks).toString("utf-8");
|
|
1347
|
+
} else {
|
|
1348
|
+
text = input;
|
|
1349
|
+
}
|
|
1350
|
+
if (text.trim().length === 0) {
|
|
1351
|
+
throw new NovaSonicError("validation_error" /* VALIDATION_ERROR */, "Input text is empty");
|
|
1352
|
+
}
|
|
1353
|
+
this.currentResponseId = `response-${Date.now()}`;
|
|
1354
|
+
const speakerStream = new PassThrough();
|
|
1355
|
+
speakerStream.id = this.currentResponseId;
|
|
1356
|
+
this.speakerStreams.set(this.currentResponseId, speakerStream);
|
|
1357
|
+
this.emit("speaker", speakerStream);
|
|
1358
|
+
const promptName = this._promptName;
|
|
1359
|
+
if (!promptName) {
|
|
1360
|
+
throw new NovaSonicError(
|
|
1361
|
+
"not_connected" /* NOT_CONNECTED */,
|
|
1362
|
+
"Prompt name not initialized. Connection may not be fully established."
|
|
1363
|
+
);
|
|
1364
|
+
}
|
|
1365
|
+
if (!this.promptStarted) {
|
|
1366
|
+
throw new NovaSonicError(
|
|
1367
|
+
"invalid_state" /* INVALID_STATE */,
|
|
1368
|
+
"Prompt not started. This should not happen - prompt should be started during connection."
|
|
1369
|
+
);
|
|
1370
|
+
}
|
|
1371
|
+
const contentName = randomUUID();
|
|
1372
|
+
await this.sendClientEvent({
|
|
1373
|
+
contentStart: {
|
|
1374
|
+
promptName,
|
|
1375
|
+
contentName,
|
|
1376
|
+
type: "TEXT",
|
|
1377
|
+
interactive: true,
|
|
1378
|
+
role: "USER",
|
|
1379
|
+
textInputConfiguration: {
|
|
1380
|
+
mediaType: "text/plain"
|
|
1381
|
+
}
|
|
1382
|
+
}
|
|
1383
|
+
});
|
|
1384
|
+
await this.sendClientEvent({
|
|
1385
|
+
textInput: {
|
|
1386
|
+
promptName,
|
|
1387
|
+
contentName,
|
|
1388
|
+
content: text
|
|
1389
|
+
}
|
|
1390
|
+
});
|
|
1391
|
+
await this.sendClientEvent({
|
|
1392
|
+
contentEnd: {
|
|
1393
|
+
promptName,
|
|
1394
|
+
contentName
|
|
1395
|
+
}
|
|
1396
|
+
});
|
|
1397
|
+
}
|
|
1398
|
+
/**
|
|
1399
|
+
* Convert speech to text (transcription)
|
|
1400
|
+
* For Nova Sonic, this is the same as send() - both stream audio input
|
|
1401
|
+
*/
|
|
1402
|
+
async listen(audioStream, _options) {
|
|
1403
|
+
if (audioStream && typeof audioStream === "object" && "read" in audioStream) {
|
|
1404
|
+
await this.send(audioStream);
|
|
1405
|
+
} else {
|
|
1406
|
+
throw new NovaSonicError("invalid_audio_format" /* INVALID_AUDIO_FORMAT */, "Unsupported audio stream format for listen()");
|
|
1407
|
+
}
|
|
1408
|
+
}
|
|
1409
|
+
/**
|
|
1410
|
+
* Streams audio data in real-time to the AWS Bedrock service.
|
|
1411
|
+
* Following AWS Nova 2 Sonic event sequence:
|
|
1412
|
+
* 1. contentStart (AUDIO, USER) - if not already sent
|
|
1413
|
+
* 2. audioInput events (one per chunk)
|
|
1414
|
+
* 3. contentEnd - when audio stream ends (handled separately via endAudioInput)
|
|
1415
|
+
*/
|
|
1416
|
+
async send(audioData) {
|
|
1417
|
+
this.log(`[send] Current state: ${this.state}`);
|
|
1418
|
+
if (this.state !== "connected") {
|
|
1419
|
+
this.log(`[send] ERROR: State is '${this.state}', expected 'connected'`);
|
|
1420
|
+
throw new NovaSonicError(
|
|
1421
|
+
"not_connected" /* NOT_CONNECTED */,
|
|
1422
|
+
`Not connected. Current state: ${this.state}. Call connect() first.`
|
|
1423
|
+
);
|
|
1424
|
+
}
|
|
1425
|
+
this.log(`[send] State check passed, proceeding with send`);
|
|
1426
|
+
if (!(audioData instanceof Int16Array) && !(audioData && typeof audioData === "object" && "read" in audioData)) {
|
|
1427
|
+
throw new NovaSonicError("invalid_audio_format" /* INVALID_AUDIO_FORMAT */, "Unsupported audio data format");
|
|
1428
|
+
}
|
|
1429
|
+
if (this.turnCompleted || this.hasSentContentEnd) {
|
|
1430
|
+
this.log(
|
|
1431
|
+
`[send] Starting new turn - resetting flags. turnCompleted=${this.turnCompleted}, hasSentContentEnd=${this.hasSentContentEnd}.`
|
|
1432
|
+
);
|
|
1433
|
+
const needNewContent = this.hasSentContentEnd;
|
|
1434
|
+
this.turnCompleted = false;
|
|
1435
|
+
this.hasSentContentEnd = false;
|
|
1436
|
+
this.streamRestartAttempted = false;
|
|
1437
|
+
if (needNewContent) {
|
|
1438
|
+
this.audioContentStarted = false;
|
|
1439
|
+
this.log(`[send] contentEnd was previously sent - will create new audio content container`);
|
|
1440
|
+
}
|
|
1441
|
+
this.log(
|
|
1442
|
+
`[send] State reset: turnCompleted=false, hasSentContentEnd=false, audioContentStarted=${this.audioContentStarted}`
|
|
1443
|
+
);
|
|
1444
|
+
}
|
|
1445
|
+
if (!this.promptStarted) {
|
|
1446
|
+
this.promptStarted = true;
|
|
1447
|
+
}
|
|
1448
|
+
const promptName = this._promptName;
|
|
1449
|
+
if (!promptName) {
|
|
1450
|
+
throw new NovaSonicError(
|
|
1451
|
+
"not_connected" /* NOT_CONNECTED */,
|
|
1452
|
+
"Prompt name not initialized. Connection may not be fully established."
|
|
1453
|
+
);
|
|
1454
|
+
}
|
|
1455
|
+
if (!this.audioContentStarted) {
|
|
1456
|
+
const audioContentId = randomUUID();
|
|
1457
|
+
this.audioContentName = audioContentId;
|
|
1458
|
+
this.log(`[send] First audio send - sending AUDIO contentStart with contentName: ${audioContentId}`);
|
|
1459
|
+
await this.sendClientEvent({
|
|
1460
|
+
contentStart: {
|
|
1461
|
+
promptName,
|
|
1462
|
+
contentName: audioContentId,
|
|
1463
|
+
type: "AUDIO",
|
|
1464
|
+
interactive: true,
|
|
1465
|
+
role: "USER",
|
|
1466
|
+
audioInputConfiguration: {
|
|
1467
|
+
mediaType: "audio/lpcm",
|
|
1468
|
+
sampleRateHertz: 16e3,
|
|
1469
|
+
sampleSizeBits: 16,
|
|
1470
|
+
channelCount: 1,
|
|
1471
|
+
encoding: "base64",
|
|
1472
|
+
audioType: "SPEECH"
|
|
1473
|
+
}
|
|
1474
|
+
}
|
|
1475
|
+
});
|
|
1476
|
+
this.audioContentStarted = true;
|
|
1477
|
+
this.log(`[send] AUDIO contentStart sent, ready to stream audio`);
|
|
1478
|
+
} else {
|
|
1479
|
+
this.log(`[send] AUDIO contentStart already sent, sending audioInput chunks directly`);
|
|
1480
|
+
}
|
|
1481
|
+
if (!this.audioContentName) {
|
|
1482
|
+
throw new NovaSonicError("invalid_state" /* INVALID_STATE */, "Audio content name not initialized. This should not happen.");
|
|
1483
|
+
}
|
|
1484
|
+
const contentName = this.audioContentName;
|
|
1485
|
+
if (audioData instanceof Int16Array) {
|
|
1486
|
+
const buffer = Buffer.from(audioData.buffer, audioData.byteOffset, audioData.byteLength);
|
|
1487
|
+
const base64Audio = buffer.toString("base64");
|
|
1488
|
+
this.log(
|
|
1489
|
+
`[send] Sending audioInput chunk, size: ${buffer.length} bytes, contentName: ${contentName}, turnCompleted: ${this.turnCompleted}, hasSentContentEnd: ${this.hasSentContentEnd}, audioContentStarted: ${this.audioContentStarted}, state: ${this.state}`
|
|
1490
|
+
);
|
|
1491
|
+
if (this.state !== "connected") {
|
|
1492
|
+
this.log(`[send] ERROR: State changed to '${this.state}' during send!`);
|
|
1493
|
+
throw new NovaSonicError("not_connected" /* NOT_CONNECTED */, `Connection lost during send. State: ${this.state}`);
|
|
1494
|
+
}
|
|
1495
|
+
await this.sendClientEvent({
|
|
1496
|
+
audioInput: {
|
|
1497
|
+
promptName,
|
|
1498
|
+
contentName,
|
|
1499
|
+
content: base64Audio
|
|
1500
|
+
}
|
|
1501
|
+
});
|
|
1502
|
+
this.log(`[send] audioInput chunk sent successfully`);
|
|
1503
|
+
} else if (audioData && typeof audioData === "object" && "read" in audioData) {
|
|
1504
|
+
const stream = audioData;
|
|
1505
|
+
for await (const chunk of stream) {
|
|
1506
|
+
const buffer = Buffer.isBuffer(chunk) ? chunk : Buffer.from(chunk);
|
|
1507
|
+
const base64Audio = buffer.toString("base64");
|
|
1508
|
+
this.log(
|
|
1509
|
+
`[send] Sending audioInput chunk from stream, size: ${buffer.length} bytes, contentName: ${contentName}, turnCompleted: ${this.turnCompleted}, hasSentContentEnd: ${this.hasSentContentEnd}`
|
|
1510
|
+
);
|
|
1511
|
+
await this.sendClientEvent({
|
|
1512
|
+
audioInput: {
|
|
1513
|
+
promptName,
|
|
1514
|
+
contentName,
|
|
1515
|
+
content: base64Audio
|
|
1516
|
+
}
|
|
1517
|
+
});
|
|
1518
|
+
}
|
|
1519
|
+
} else {
|
|
1520
|
+
throw new NovaSonicError("invalid_audio_format" /* INVALID_AUDIO_FORMAT */, "Unsupported audio data format");
|
|
1521
|
+
}
|
|
1522
|
+
}
|
|
1523
|
+
/**
|
|
1524
|
+
* End audio input stream (sends contentEnd for audio)
|
|
1525
|
+
* Call this when done sending audio chunks
|
|
1526
|
+
*/
|
|
1527
|
+
async endAudioInput() {
|
|
1528
|
+
if (this.hasSentContentEnd) {
|
|
1529
|
+
this.log("[endAudioInput] contentEnd already sent for this turn, skipping");
|
|
1530
|
+
return;
|
|
1531
|
+
}
|
|
1532
|
+
if (this.turnCompleted) {
|
|
1533
|
+
this.log(
|
|
1534
|
+
"[endAudioInput] Turn already completed by AWS, skipping contentEnd. Resetting turnCompleted flag for next turn."
|
|
1535
|
+
);
|
|
1536
|
+
this.turnCompleted = false;
|
|
1537
|
+
this.hasSentContentEnd = false;
|
|
1538
|
+
return;
|
|
1539
|
+
}
|
|
1540
|
+
if (this.audioContentStarted && this.audioContentName && this._promptName) {
|
|
1541
|
+
const promptName = this._promptName;
|
|
1542
|
+
this.log("[endAudioInput] Sending contentEnd for audio input");
|
|
1543
|
+
await this.sendClientEvent({
|
|
1544
|
+
contentEnd: {
|
|
1545
|
+
promptName,
|
|
1546
|
+
contentName: this.audioContentName
|
|
1547
|
+
}
|
|
1548
|
+
});
|
|
1549
|
+
this.hasSentContentEnd = true;
|
|
1550
|
+
} else {
|
|
1551
|
+
this.log(
|
|
1552
|
+
"[endAudioInput] Cannot send contentEnd: audioContentStarted=" + this.audioContentStarted + ", audioContentName=" + this.audioContentName
|
|
1553
|
+
);
|
|
1554
|
+
}
|
|
1555
|
+
}
|
|
1556
|
+
/**
|
|
1557
|
+
* Register an event listener
|
|
1558
|
+
*/
|
|
1559
|
+
on(event, callback) {
|
|
1560
|
+
if (!this.events[event]) {
|
|
1561
|
+
this.events[event] = [];
|
|
1562
|
+
}
|
|
1563
|
+
this.events[event].push(callback);
|
|
1564
|
+
}
|
|
1565
|
+
/**
|
|
1566
|
+
* Remove an event listener
|
|
1567
|
+
*/
|
|
1568
|
+
off(event, callback) {
|
|
1569
|
+
if (!this.events[event]) {
|
|
1570
|
+
return;
|
|
1571
|
+
}
|
|
1572
|
+
const index = this.events[event].indexOf(callback);
|
|
1573
|
+
if (index !== -1) {
|
|
1574
|
+
this.events[event].splice(index, 1);
|
|
1575
|
+
}
|
|
1576
|
+
}
|
|
1577
|
+
/**
|
|
1578
|
+
* Emit an event with arguments
|
|
1579
|
+
*/
|
|
1580
|
+
emit(event, data) {
|
|
1581
|
+
if (!this.events[event]) {
|
|
1582
|
+
this.log(`[NovaSonic] emit('${event}'): No listeners registered for this event`);
|
|
1583
|
+
return;
|
|
1584
|
+
}
|
|
1585
|
+
const listenerCount = this.events[event].length;
|
|
1586
|
+
this.log(`[NovaSonic] emit('${event}'): Calling ${listenerCount} listener(s)`);
|
|
1587
|
+
for (const callback of this.events[event]) {
|
|
1588
|
+
try {
|
|
1589
|
+
callback(data);
|
|
1590
|
+
this.log(`[NovaSonic] emit('${event}'): Successfully called one listener`);
|
|
1591
|
+
} catch (error) {
|
|
1592
|
+
this.log(`Error in event handler for ${event}:`, error);
|
|
1593
|
+
}
|
|
1594
|
+
}
|
|
1595
|
+
this.log(`[NovaSonic] emit('${event}'): Finished calling all ${listenerCount} listener(s)`);
|
|
1596
|
+
}
|
|
1597
|
+
/**
|
|
1598
|
+
* Get listener status
|
|
1599
|
+
*/
|
|
1600
|
+
async getListener() {
|
|
1601
|
+
return { enabled: this.state === "connected" };
|
|
1602
|
+
}
|
|
1603
|
+
/**
|
|
1604
|
+
* Log helper
|
|
1605
|
+
*/
|
|
1606
|
+
log(...args) {
|
|
1607
|
+
if (this.debug) {
|
|
1608
|
+
console.info("[NovaSonicVoice]", ...args);
|
|
1609
|
+
}
|
|
1610
|
+
}
|
|
1611
|
+
};
|
|
1612
|
+
|
|
1613
|
+
export { NovaSonicError, NovaSonicErrorCode, NovaSonicVoice };
|
|
1614
|
+
//# sourceMappingURL=index.js.map
|
|
1615
|
+
//# sourceMappingURL=index.js.map
|