@layercode/js-sdk 1.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,1827 @@
1
+ /**
2
+ * Raw wav audio file contents
3
+ * @typedef {Object} WavPackerAudioType
4
+ * @property {Blob} blob
5
+ * @property {string} url
6
+ * @property {number} channelCount
7
+ * @property {number} sampleRate
8
+ * @property {number} duration
9
+ */
10
+
11
+ /**
12
+ * Utility class for assembling PCM16 "audio/wav" data
13
+ * @class
14
+ */
15
+ class WavPacker {
16
+ /**
17
+ * Converts Float32Array of amplitude data to ArrayBuffer in Int16Array format
18
+ * @param {Float32Array} float32Array
19
+ * @returns {ArrayBuffer}
20
+ */
21
+ static floatTo16BitPCM(float32Array) {
22
+ const buffer = new ArrayBuffer(float32Array.length * 2);
23
+ const view = new DataView(buffer);
24
+ let offset = 0;
25
+ for (let i = 0; i < float32Array.length; i++, offset += 2) {
26
+ let s = Math.max(-1, Math.min(1, float32Array[i]));
27
+ view.setInt16(offset, s < 0 ? s * 0x8000 : s * 0x7fff, true);
28
+ }
29
+ return buffer;
30
+ }
31
+
32
+ /**
33
+ * Concatenates two ArrayBuffers
34
+ * @param {ArrayBuffer} leftBuffer
35
+ * @param {ArrayBuffer} rightBuffer
36
+ * @returns {ArrayBuffer}
37
+ */
38
+ static mergeBuffers(leftBuffer, rightBuffer) {
39
+ const tmpArray = new Uint8Array(
40
+ leftBuffer.byteLength + rightBuffer.byteLength
41
+ );
42
+ tmpArray.set(new Uint8Array(leftBuffer), 0);
43
+ tmpArray.set(new Uint8Array(rightBuffer), leftBuffer.byteLength);
44
+ return tmpArray.buffer;
45
+ }
46
+
47
+ /**
48
+ * Packs data into an Int16 format
49
+ * @private
50
+ * @param {number} size 0 = 1x Int16, 1 = 2x Int16
51
+ * @param {number} arg value to pack
52
+ * @returns
53
+ */
54
+ _packData(size, arg) {
55
+ return [
56
+ new Uint8Array([arg, arg >> 8]),
57
+ new Uint8Array([arg, arg >> 8, arg >> 16, arg >> 24]),
58
+ ][size];
59
+ }
60
+
61
+ /**
62
+ * Packs audio into "audio/wav" Blob
63
+ * @param {number} sampleRate
64
+ * @param {{bitsPerSample: number, channels: Array<Float32Array>, data: Int16Array}} audio
65
+ * @returns {WavPackerAudioType}
66
+ */
67
+ pack(sampleRate, audio) {
68
+ if (!audio?.bitsPerSample) {
69
+ throw new Error(`Missing "bitsPerSample"`);
70
+ } else if (!audio?.channels) {
71
+ throw new Error(`Missing "channels"`);
72
+ } else if (!audio?.data) {
73
+ throw new Error(`Missing "data"`);
74
+ }
75
+ const { bitsPerSample, channels, data } = audio;
76
+ const output = [
77
+ // Header
78
+ 'RIFF',
79
+ this._packData(
80
+ 1,
81
+ 4 + (8 + 24) /* chunk 1 length */ + (8 + 8) /* chunk 2 length */
82
+ ), // Length
83
+ 'WAVE',
84
+ // chunk 1
85
+ 'fmt ', // Sub-chunk identifier
86
+ this._packData(1, 16), // Chunk length
87
+ this._packData(0, 1), // Audio format (1 is linear quantization)
88
+ this._packData(0, channels.length),
89
+ this._packData(1, sampleRate),
90
+ this._packData(1, (sampleRate * channels.length * bitsPerSample) / 8), // Byte rate
91
+ this._packData(0, (channels.length * bitsPerSample) / 8),
92
+ this._packData(0, bitsPerSample),
93
+ // chunk 2
94
+ 'data', // Sub-chunk identifier
95
+ this._packData(
96
+ 1,
97
+ (channels[0].length * channels.length * bitsPerSample) / 8
98
+ ), // Chunk length
99
+ data,
100
+ ];
101
+ const blob = new Blob(output, { type: 'audio/mpeg' });
102
+ const url = URL.createObjectURL(blob);
103
+ return {
104
+ blob,
105
+ url,
106
+ channelCount: channels.length,
107
+ sampleRate,
108
+ duration: data.byteLength / (channels.length * sampleRate * 2),
109
+ };
110
+ }
111
+ }
112
+
113
+ globalThis.WavPacker = WavPacker;
114
+
115
+ /**
116
+ * Constants for help with visualization
117
+ * Helps map frequency ranges from Fast Fourier Transform
118
+ * to human-interpretable ranges, notably music ranges and
119
+ * human vocal ranges.
120
+ */
121
+
122
+ // Eighth octave frequencies
123
+ const octave8Frequencies = [
124
+ 4186.01, 4434.92, 4698.63, 4978.03, 5274.04, 5587.65, 5919.91, 6271.93,
125
+ 6644.88, 7040.0, 7458.62, 7902.13,
126
+ ];
127
+
128
+ // Labels for each of the above frequencies
129
+ const octave8FrequencyLabels = [
130
+ 'C',
131
+ 'C#',
132
+ 'D',
133
+ 'D#',
134
+ 'E',
135
+ 'F',
136
+ 'F#',
137
+ 'G',
138
+ 'G#',
139
+ 'A',
140
+ 'A#',
141
+ 'B',
142
+ ];
143
+
144
+ /**
145
+ * All note frequencies from 1st to 8th octave
146
+ * in format "A#8" (A#, 8th octave)
147
+ */
148
+ const noteFrequencies = [];
149
+ const noteFrequencyLabels = [];
150
+ for (let i = 1; i <= 8; i++) {
151
+ for (let f = 0; f < octave8Frequencies.length; f++) {
152
+ const freq = octave8Frequencies[f];
153
+ noteFrequencies.push(freq / Math.pow(2, 8 - i));
154
+ noteFrequencyLabels.push(octave8FrequencyLabels[f] + i);
155
+ }
156
+ }
157
+
158
+ /**
159
+ * Subset of the note frequencies between 32 and 2000 Hz
160
+ * 6 octave range: C1 to B6
161
+ */
162
+ const voiceFrequencyRange = [32.0, 2000.0];
163
+ const voiceFrequencies = noteFrequencies.filter((_, i) => {
164
+ return (
165
+ noteFrequencies[i] > voiceFrequencyRange[0] &&
166
+ noteFrequencies[i] < voiceFrequencyRange[1]
167
+ );
168
+ });
169
+ const voiceFrequencyLabels = noteFrequencyLabels.filter((_, i) => {
170
+ return (
171
+ noteFrequencies[i] > voiceFrequencyRange[0] &&
172
+ noteFrequencies[i] < voiceFrequencyRange[1]
173
+ );
174
+ });
175
+
176
+ /**
177
+ * Output of AudioAnalysis for the frequency domain of the audio
178
+ * @typedef {Object} AudioAnalysisOutputType
179
+ * @property {Float32Array} values Amplitude of this frequency between {0, 1} inclusive
180
+ * @property {number[]} frequencies Raw frequency bucket values
181
+ * @property {string[]} labels Labels for the frequency bucket values
182
+ */
183
+
184
+ /**
185
+ * Analyzes audio for visual output
186
+ * @class
187
+ */
188
+ class AudioAnalysis {
189
+ /**
190
+ * Retrieves frequency domain data from an AnalyserNode adjusted to a decibel range
191
+ * returns human-readable formatting and labels
192
+ * @param {AnalyserNode} analyser
193
+ * @param {number} sampleRate
194
+ * @param {Float32Array} [fftResult]
195
+ * @param {"frequency"|"music"|"voice"} [analysisType]
196
+ * @param {number} [minDecibels] default -100
197
+ * @param {number} [maxDecibels] default -30
198
+ * @returns {AudioAnalysisOutputType}
199
+ */
200
+ static getFrequencies(
201
+ analyser,
202
+ sampleRate,
203
+ fftResult,
204
+ analysisType = 'frequency',
205
+ minDecibels = -100,
206
+ maxDecibels = -30,
207
+ ) {
208
+ if (!fftResult) {
209
+ fftResult = new Float32Array(analyser.frequencyBinCount);
210
+ analyser.getFloatFrequencyData(fftResult);
211
+ }
212
+ const nyquistFrequency = sampleRate / 2;
213
+ const frequencyStep = (1 / fftResult.length) * nyquistFrequency;
214
+ let outputValues;
215
+ let frequencies;
216
+ let labels;
217
+ if (analysisType === 'music' || analysisType === 'voice') {
218
+ const useFrequencies =
219
+ analysisType === 'voice' ? voiceFrequencies : noteFrequencies;
220
+ const aggregateOutput = Array(useFrequencies.length).fill(minDecibels);
221
+ for (let i = 0; i < fftResult.length; i++) {
222
+ const frequency = i * frequencyStep;
223
+ const amplitude = fftResult[i];
224
+ for (let n = useFrequencies.length - 1; n >= 0; n--) {
225
+ if (frequency > useFrequencies[n]) {
226
+ aggregateOutput[n] = Math.max(aggregateOutput[n], amplitude);
227
+ break;
228
+ }
229
+ }
230
+ }
231
+ outputValues = aggregateOutput;
232
+ frequencies =
233
+ analysisType === 'voice' ? voiceFrequencies : noteFrequencies;
234
+ labels =
235
+ analysisType === 'voice' ? voiceFrequencyLabels : noteFrequencyLabels;
236
+ } else {
237
+ outputValues = Array.from(fftResult);
238
+ frequencies = outputValues.map((_, i) => frequencyStep * i);
239
+ labels = frequencies.map((f) => `${f.toFixed(2)} Hz`);
240
+ }
241
+ // We normalize to {0, 1}
242
+ const normalizedOutput = outputValues.map((v) => {
243
+ return Math.max(
244
+ 0,
245
+ Math.min((v - minDecibels) / (maxDecibels - minDecibels), 1),
246
+ );
247
+ });
248
+ const values = new Float32Array(normalizedOutput);
249
+ return {
250
+ values,
251
+ frequencies,
252
+ labels,
253
+ };
254
+ }
255
+
256
+ /**
257
+ * Creates a new AudioAnalysis instance for an HTMLAudioElement
258
+ * @param {HTMLAudioElement} audioElement
259
+ * @param {AudioBuffer|null} [audioBuffer] If provided, will cache all frequency domain data from the buffer
260
+ * @returns {AudioAnalysis}
261
+ */
262
+ constructor(audioElement, audioBuffer = null) {
263
+ this.fftResults = [];
264
+ if (audioBuffer) {
265
+ /**
266
+ * Modified from
267
+ * https://stackoverflow.com/questions/75063715/using-the-web-audio-api-to-analyze-a-song-without-playing
268
+ *
269
+ * We do this to populate FFT values for the audio if provided an `audioBuffer`
270
+ * The reason to do this is that Safari fails when using `createMediaElementSource`
271
+ * This has a non-zero RAM cost so we only opt-in to run it on Safari, Chrome is better
272
+ */
273
+ const { length, sampleRate } = audioBuffer;
274
+ const offlineAudioContext = new OfflineAudioContext({
275
+ length,
276
+ sampleRate,
277
+ });
278
+ const source = offlineAudioContext.createBufferSource();
279
+ source.buffer = audioBuffer;
280
+ const analyser = offlineAudioContext.createAnalyser();
281
+ analyser.fftSize = 8192;
282
+ analyser.smoothingTimeConstant = 0.1;
283
+ source.connect(analyser);
284
+ // limit is :: 128 / sampleRate;
285
+ // but we just want 60fps - cuts ~1s from 6MB to 1MB of RAM
286
+ const renderQuantumInSeconds = 1 / 60;
287
+ const durationInSeconds = length / sampleRate;
288
+ const analyze = (index) => {
289
+ const suspendTime = renderQuantumInSeconds * index;
290
+ if (suspendTime < durationInSeconds) {
291
+ offlineAudioContext.suspend(suspendTime).then(() => {
292
+ const fftResult = new Float32Array(analyser.frequencyBinCount);
293
+ analyser.getFloatFrequencyData(fftResult);
294
+ this.fftResults.push(fftResult);
295
+ analyze(index + 1);
296
+ });
297
+ }
298
+ if (index === 1) {
299
+ offlineAudioContext.startRendering();
300
+ } else {
301
+ offlineAudioContext.resume();
302
+ }
303
+ };
304
+ source.start(0);
305
+ analyze(1);
306
+ this.audio = audioElement;
307
+ this.context = offlineAudioContext;
308
+ this.analyser = analyser;
309
+ this.sampleRate = sampleRate;
310
+ this.audioBuffer = audioBuffer;
311
+ } else {
312
+ const audioContext = new AudioContext();
313
+ const track = audioContext.createMediaElementSource(audioElement);
314
+ const analyser = audioContext.createAnalyser();
315
+ analyser.fftSize = 8192;
316
+ analyser.smoothingTimeConstant = 0.1;
317
+ track.connect(analyser);
318
+ analyser.connect(audioContext.destination);
319
+ this.audio = audioElement;
320
+ this.context = audioContext;
321
+ this.analyser = analyser;
322
+ this.sampleRate = this.context.sampleRate;
323
+ this.audioBuffer = null;
324
+ }
325
+ }
326
+
327
+ /**
328
+ * Gets the current frequency domain data from the playing audio track
329
+ * @param {"frequency"|"music"|"voice"} [analysisType]
330
+ * @param {number} [minDecibels] default -100
331
+ * @param {number} [maxDecibels] default -30
332
+ * @returns {AudioAnalysisOutputType}
333
+ */
334
+ getFrequencies(
335
+ analysisType = 'frequency',
336
+ minDecibels = -100,
337
+ maxDecibels = -30,
338
+ ) {
339
+ let fftResult = null;
340
+ if (this.audioBuffer && this.fftResults.length) {
341
+ const pct = this.audio.currentTime / this.audio.duration;
342
+ const index = Math.min(
343
+ (pct * this.fftResults.length) | 0,
344
+ this.fftResults.length - 1,
345
+ );
346
+ fftResult = this.fftResults[index];
347
+ }
348
+ return AudioAnalysis.getFrequencies(
349
+ this.analyser,
350
+ this.sampleRate,
351
+ fftResult,
352
+ analysisType,
353
+ minDecibels,
354
+ maxDecibels,
355
+ );
356
+ }
357
+
358
+ /**
359
+ * Resume the internal AudioContext if it was suspended due to the lack of
360
+ * user interaction when the AudioAnalysis was instantiated.
361
+ * @returns {Promise<true>}
362
+ */
363
+ async resumeIfSuspended() {
364
+ if (this.context.state === 'suspended') {
365
+ await this.context.resume();
366
+ }
367
+ return true;
368
+ }
369
+ }
370
+
371
+ globalThis.AudioAnalysis = AudioAnalysis;
372
+
373
+ const StreamProcessorWorklet = `
374
+ class StreamProcessor extends AudioWorkletProcessor {
375
+ constructor() {
376
+ super();
377
+ this.hasStarted = false;
378
+ this.hasInterrupted = false;
379
+ this.outputBuffers = [];
380
+ this.bufferLength = 128;
381
+ this.write = { buffer: new Float32Array(this.bufferLength), trackId: null };
382
+ this.writeOffset = 0;
383
+ this.trackSampleOffsets = {};
384
+ this.port.onmessage = (event) => {
385
+ if (event.data) {
386
+ const payload = event.data;
387
+ if (payload.event === 'write') {
388
+ const int16Array = payload.buffer;
389
+ const float32Array = new Float32Array(int16Array.length);
390
+ for (let i = 0; i < int16Array.length; i++) {
391
+ float32Array[i] = int16Array[i] / 0x8000; // Convert Int16 to Float32
392
+ }
393
+ this.writeData(float32Array, payload.trackId);
394
+ } else if (
395
+ payload.event === 'offset' ||
396
+ payload.event === 'interrupt'
397
+ ) {
398
+ const requestId = payload.requestId;
399
+ const trackId = this.write.trackId;
400
+ const offset = this.trackSampleOffsets[trackId] || 0;
401
+ this.port.postMessage({
402
+ event: 'offset',
403
+ requestId,
404
+ trackId,
405
+ offset,
406
+ });
407
+ if (payload.event === 'interrupt') {
408
+ this.hasInterrupted = true;
409
+ }
410
+ } else {
411
+ throw new Error(\`Unhandled event "\${payload.event}"\`);
412
+ }
413
+ }
414
+ };
415
+ }
416
+
417
+ writeData(float32Array, trackId = null) {
418
+ let { buffer } = this.write;
419
+ let offset = this.writeOffset;
420
+ for (let i = 0; i < float32Array.length; i++) {
421
+ buffer[offset++] = float32Array[i];
422
+ if (offset >= buffer.length) {
423
+ this.outputBuffers.push(this.write);
424
+ this.write = { buffer: new Float32Array(this.bufferLength), trackId };
425
+ buffer = this.write.buffer;
426
+ offset = 0;
427
+ }
428
+ }
429
+ this.writeOffset = offset;
430
+ return true;
431
+ }
432
+
433
+ process(inputs, outputs, parameters) {
434
+ const output = outputs[0];
435
+ const outputChannelData = output[0];
436
+ const outputBuffers = this.outputBuffers;
437
+ if (this.hasInterrupted) {
438
+ this.port.postMessage({ event: 'stop' });
439
+ return false;
440
+ } else if (outputBuffers.length) {
441
+ this.hasStarted = true;
442
+ const { buffer, trackId } = outputBuffers.shift();
443
+ for (let i = 0; i < outputChannelData.length; i++) {
444
+ outputChannelData[i] = buffer[i] || 0;
445
+ }
446
+ if (trackId) {
447
+ this.trackSampleOffsets[trackId] =
448
+ this.trackSampleOffsets[trackId] || 0;
449
+ this.trackSampleOffsets[trackId] += buffer.length;
450
+ }
451
+ return true;
452
+ } else if (this.hasStarted) {
453
+ this.port.postMessage({ event: 'stop' });
454
+ return false;
455
+ } else {
456
+ return true;
457
+ }
458
+ }
459
+ }
460
+
461
+ registerProcessor('stream_processor', StreamProcessor);
462
+ `;
463
+
464
+ const script$1 = new Blob([StreamProcessorWorklet], {
465
+ type: 'application/javascript',
466
+ });
467
+ const src$1 = URL.createObjectURL(script$1);
468
+ const StreamProcessorSrc = src$1;
469
+
470
+ /**
471
+ * Plays audio streams received in raw PCM16 chunks from the browser
472
+ * @class
473
+ */
474
+ class WavStreamPlayer {
475
+ /**
476
+ * Creates a new WavStreamPlayer instance
477
+ * @param {{sampleRate?: number}} options
478
+ * @returns {WavStreamPlayer}
479
+ */
480
+ constructor({ finishedPlayingCallback = () => {}, sampleRate = 24000 } = {}) {
481
+ this.scriptSrc = StreamProcessorSrc;
482
+ this.sampleRate = sampleRate;
483
+ this.context = null;
484
+ this.stream = null;
485
+ this.analyser = null;
486
+ this.trackSampleOffsets = {};
487
+ this.interruptedTrackIds = {};
488
+ this.finishedPlayingCallback = finishedPlayingCallback;
489
+ }
490
+
491
+ /**
492
+ * Connects the audio context and enables output to speakers
493
+ * @returns {Promise<true>}
494
+ */
495
+ async connect() {
496
+ this.context = new AudioContext({ sampleRate: this.sampleRate });
497
+ if (this.context.state === "suspended") {
498
+ await this.context.resume();
499
+ }
500
+ try {
501
+ await this.context.audioWorklet.addModule(this.scriptSrc);
502
+ } catch (e) {
503
+ console.error(e);
504
+ throw new Error(`Could not add audioWorklet module: ${this.scriptSrc}`);
505
+ }
506
+ const analyser = this.context.createAnalyser();
507
+ analyser.fftSize = 8192;
508
+ analyser.smoothingTimeConstant = 0.1;
509
+ this.analyser = analyser;
510
+ return true;
511
+ }
512
+
513
+ /**
514
+ * Gets the current frequency domain data from the playing track
515
+ * @param {"frequency"|"music"|"voice"} [analysisType]
516
+ * @param {number} [minDecibels] default -100
517
+ * @param {number} [maxDecibels] default -30
518
+ * @returns {import('./analysis/audio_analysis.js').AudioAnalysisOutputType}
519
+ */
520
+ getFrequencies(analysisType = "frequency", minDecibels = -100, maxDecibels = -30) {
521
+ if (!this.analyser) {
522
+ throw new Error("Not connected, please call .connect() first");
523
+ }
524
+ return AudioAnalysis.getFrequencies(this.analyser, this.sampleRate, null, analysisType, minDecibels, maxDecibels);
525
+ }
526
+
527
+ /**
528
+ * Gets the real-time amplitude of the audio signal
529
+ * @returns {number} Amplitude value between 0 and 1
530
+ */
531
+ getAmplitude() {
532
+ if (!this.analyser) {
533
+ throw new Error("AnalyserNode is not initialized. Please call connect() first.");
534
+ }
535
+
536
+ const bufferLength = this.analyser.fftSize;
537
+ const dataArray = new Uint8Array(bufferLength);
538
+ this.analyser.getByteTimeDomainData(dataArray);
539
+
540
+ // Calculate RMS (Root Mean Square) to get amplitude
541
+ let sumSquares = 0;
542
+ for (let i = 0; i < bufferLength; i++) {
543
+ const normalized = (dataArray[i] - 128) / 128; // Normalize between -1 and 1
544
+ sumSquares += normalized * normalized;
545
+ }
546
+ const rms = Math.sqrt(sumSquares / bufferLength);
547
+ return rms;
548
+ }
549
+
550
+ /**
551
+ * Starts amplitude monitoring
552
+ * @param {function} callback - Function to call with amplitude value
553
+ */
554
+ startAmplitudeMonitoring(callback) {
555
+ const monitor = () => {
556
+ const amplitude = this.getAmplitude();
557
+ callback(amplitude);
558
+ requestAnimationFrame(monitor);
559
+ };
560
+ monitor();
561
+ }
562
+
563
+ /**
564
+ * Starts audio streaming
565
+ * @private
566
+ * @returns {Promise<true>}
567
+ */
568
+ _start() {
569
+ const streamNode = new AudioWorkletNode(this.context, "stream_processor");
570
+ streamNode.connect(this.context.destination);
571
+ streamNode.port.onmessage = (e) => {
572
+ const { event } = e.data;
573
+ if (event === "stop") {
574
+ streamNode.disconnect();
575
+ this.stream = null;
576
+ this.finishedPlayingCallback();
577
+ } else if (event === "offset") {
578
+ const { requestId, trackId, offset } = e.data;
579
+ const currentTime = offset / this.sampleRate;
580
+ this.trackSampleOffsets[requestId] = { trackId, offset, currentTime };
581
+ }
582
+ };
583
+ this.analyser.disconnect();
584
+ streamNode.connect(this.analyser);
585
+ this.stream = streamNode;
586
+ return true;
587
+ }
588
+
589
+ /**
590
+ * Adds 16BitPCM data to the currently playing audio stream
591
+ * You can add chunks beyond the current play point and they will be queued for play
592
+ * @param {ArrayBuffer|Int16Array} arrayBuffer
593
+ * @param {string} [trackId]
594
+ * @returns {Int16Array}
595
+ */
596
+ add16BitPCM(arrayBuffer, trackId = "default") {
597
+ if (typeof trackId !== "string") {
598
+ throw new Error(`trackId must be a string`);
599
+ } else if (this.interruptedTrackIds[trackId]) {
600
+ return;
601
+ }
602
+ if (!this.stream) {
603
+ this._start();
604
+ }
605
+ let buffer;
606
+ if (arrayBuffer instanceof Int16Array) {
607
+ buffer = arrayBuffer;
608
+ } else if (arrayBuffer instanceof ArrayBuffer) {
609
+ buffer = new Int16Array(arrayBuffer);
610
+ } else {
611
+ throw new Error(`argument must be Int16Array or ArrayBuffer`);
612
+ }
613
+ this.stream.port.postMessage({ event: "write", buffer, trackId });
614
+ return buffer;
615
+ }
616
+
617
+ /**
618
+ * Gets the offset (sample count) of the currently playing stream
619
+ * @param {boolean} [interrupt]
620
+ * @returns {{trackId: string|null, offset: number, currentTime: number}}
621
+ */
622
+ async getTrackSampleOffset(interrupt = false) {
623
+ if (!this.stream) {
624
+ return null;
625
+ }
626
+ const requestId = crypto.randomUUID();
627
+ this.stream.port.postMessage({
628
+ event: interrupt ? "interrupt" : "offset",
629
+ requestId,
630
+ });
631
+ let trackSampleOffset;
632
+ while (!trackSampleOffset) {
633
+ trackSampleOffset = this.trackSampleOffsets[requestId];
634
+ await new Promise((r) => setTimeout(() => r(), 1));
635
+ }
636
+ const { trackId } = trackSampleOffset;
637
+ if (interrupt && trackId) {
638
+ this.interruptedTrackIds[trackId] = true;
639
+ }
640
+ return trackSampleOffset;
641
+ }
642
+
643
+ /**
644
+ * Strips the current stream and returns the sample offset of the audio
645
+ * @param {boolean} [interrupt]
646
+ * @returns {{trackId: string|null, offset: number, currentTime: number}}
647
+ */
648
+ async interrupt() {
649
+ return this.getTrackSampleOffset(true);
650
+ }
651
+
652
+ /**
653
+ * Disconnects the audio context and cleans up resources
654
+ * @returns {void}
655
+ */
656
+ disconnect() {
657
+ if (this.stream) {
658
+ this.stream.disconnect();
659
+ this.stream = null;
660
+ }
661
+
662
+ if (this.analyser) {
663
+ this.analyser.disconnect();
664
+ }
665
+
666
+ if (this.context) {
667
+ this.context.close().catch((err) => console.error("Error closing audio context:", err));
668
+ }
669
+ }
670
+ }
671
+
672
+ globalThis.WavStreamPlayer = WavStreamPlayer;
673
+
674
+ const AudioProcessorWorklet = `
675
+ class AudioProcessor extends AudioWorkletProcessor {
676
+
677
+ constructor() {
678
+ super();
679
+ this.port.onmessage = this.receive.bind(this);
680
+ this.initialize();
681
+ }
682
+
683
+ initialize() {
684
+ this.foundAudio = false;
685
+ this.recording = false;
686
+ this.chunks = [];
687
+ }
688
+
689
+ /**
690
+ * Concatenates sampled chunks into channels
691
+ * Format is chunk[Left[], Right[]]
692
+ */
693
+ readChannelData(chunks, channel = -1, maxChannels = 9) {
694
+ let channelLimit;
695
+ if (channel !== -1) {
696
+ if (chunks[0] && chunks[0].length - 1 < channel) {
697
+ throw new Error(
698
+ \`Channel \${channel} out of range: max \${chunks[0].length}\`
699
+ );
700
+ }
701
+ channelLimit = channel + 1;
702
+ } else {
703
+ channel = 0;
704
+ channelLimit = Math.min(chunks[0] ? chunks[0].length : 1, maxChannels);
705
+ }
706
+ const channels = [];
707
+ for (let n = channel; n < channelLimit; n++) {
708
+ const length = chunks.reduce((sum, chunk) => {
709
+ return sum + chunk[n].length;
710
+ }, 0);
711
+ const buffers = chunks.map((chunk) => chunk[n]);
712
+ const result = new Float32Array(length);
713
+ let offset = 0;
714
+ for (let i = 0; i < buffers.length; i++) {
715
+ result.set(buffers[i], offset);
716
+ offset += buffers[i].length;
717
+ }
718
+ channels[n] = result;
719
+ }
720
+ return channels;
721
+ }
722
+
723
+ /**
724
+ * Combines parallel audio data into correct format,
725
+ * channels[Left[], Right[]] to float32Array[LRLRLRLR...]
726
+ */
727
+ formatAudioData(channels) {
728
+ if (channels.length === 1) {
729
+ // Simple case is only one channel
730
+ const float32Array = channels[0].slice();
731
+ const meanValues = channels[0].slice();
732
+ return { float32Array, meanValues };
733
+ } else {
734
+ const float32Array = new Float32Array(
735
+ channels[0].length * channels.length
736
+ );
737
+ const meanValues = new Float32Array(channels[0].length);
738
+ for (let i = 0; i < channels[0].length; i++) {
739
+ const offset = i * channels.length;
740
+ let meanValue = 0;
741
+ for (let n = 0; n < channels.length; n++) {
742
+ float32Array[offset + n] = channels[n][i];
743
+ meanValue += channels[n][i];
744
+ }
745
+ meanValues[i] = meanValue / channels.length;
746
+ }
747
+ return { float32Array, meanValues };
748
+ }
749
+ }
750
+
751
+ /**
752
+ * Converts 32-bit float data to 16-bit integers
753
+ */
754
+ floatTo16BitPCM(float32Array) {
755
+ const buffer = new ArrayBuffer(float32Array.length * 2);
756
+ const view = new DataView(buffer);
757
+ let offset = 0;
758
+ for (let i = 0; i < float32Array.length; i++, offset += 2) {
759
+ let s = Math.max(-1, Math.min(1, float32Array[i]));
760
+ view.setInt16(offset, s < 0 ? s * 0x8000 : s * 0x7fff, true);
761
+ }
762
+ return buffer;
763
+ }
764
+
765
+ /**
766
+ * Retrieves the most recent amplitude values from the audio stream
767
+ * @param {number} channel
768
+ */
769
+ getValues(channel = -1) {
770
+ const channels = this.readChannelData(this.chunks, channel);
771
+ const { meanValues } = this.formatAudioData(channels);
772
+ return { meanValues, channels };
773
+ }
774
+
775
+ /**
776
+ * Exports chunks as an audio/wav file
777
+ */
778
+ export() {
779
+ const channels = this.readChannelData(this.chunks);
780
+ const { float32Array, meanValues } = this.formatAudioData(channels);
781
+ const audioData = this.floatTo16BitPCM(float32Array);
782
+ return {
783
+ meanValues: meanValues,
784
+ audio: {
785
+ bitsPerSample: 16,
786
+ channels: channels,
787
+ data: audioData,
788
+ },
789
+ };
790
+ }
791
+
792
+ receive(e) {
793
+ const { event, id } = e.data;
794
+ let receiptData = {};
795
+ switch (event) {
796
+ case 'start':
797
+ this.recording = true;
798
+ break;
799
+ case 'stop':
800
+ this.recording = false;
801
+ break;
802
+ case 'clear':
803
+ this.initialize();
804
+ break;
805
+ case 'export':
806
+ receiptData = this.export();
807
+ break;
808
+ case 'read':
809
+ receiptData = this.getValues();
810
+ break;
811
+ default:
812
+ break;
813
+ }
814
+ // Always send back receipt
815
+ this.port.postMessage({ event: 'receipt', id, data: receiptData });
816
+ }
817
+
818
+ sendChunk(chunk) {
819
+ const channels = this.readChannelData([chunk]);
820
+ const { float32Array, meanValues } = this.formatAudioData(channels);
821
+ const rawAudioData = this.floatTo16BitPCM(float32Array);
822
+ const monoAudioData = this.floatTo16BitPCM(meanValues);
823
+ this.port.postMessage({
824
+ event: 'chunk',
825
+ data: {
826
+ mono: monoAudioData,
827
+ raw: rawAudioData,
828
+ },
829
+ });
830
+ }
831
+
832
+ process(inputList, outputList, parameters) {
833
+ // Copy input to output (e.g. speakers)
834
+ // Note that this creates choppy sounds with Mac products
835
+ const sourceLimit = Math.min(inputList.length, outputList.length);
836
+ for (let inputNum = 0; inputNum < sourceLimit; inputNum++) {
837
+ const input = inputList[inputNum];
838
+ const output = outputList[inputNum];
839
+ const channelCount = Math.min(input.length, output.length);
840
+ for (let channelNum = 0; channelNum < channelCount; channelNum++) {
841
+ input[channelNum].forEach((sample, i) => {
842
+ output[channelNum][i] = sample;
843
+ });
844
+ }
845
+ }
846
+ const inputs = inputList[0];
847
+ // There's latency at the beginning of a stream before recording starts
848
+ // Make sure we actually receive audio data before we start storing chunks
849
+ let sliceIndex = 0;
850
+ if (!this.foundAudio) {
851
+ for (const channel of inputs) {
852
+ sliceIndex = 0; // reset for each channel
853
+ if (this.foundAudio) {
854
+ break;
855
+ }
856
+ if (channel) {
857
+ for (const value of channel) {
858
+ if (value !== 0) {
859
+ // find only one non-zero entry in any channel
860
+ this.foundAudio = true;
861
+ break;
862
+ } else {
863
+ sliceIndex++;
864
+ }
865
+ }
866
+ }
867
+ }
868
+ }
869
+ if (inputs && inputs[0] && this.foundAudio && this.recording) {
870
+ // We need to copy the TypedArray, because the \`process\`
871
+ // internals will reuse the same buffer to hold each input
872
+ const chunk = inputs.map((input) => input.slice(sliceIndex));
873
+ this.chunks.push(chunk);
874
+ this.sendChunk(chunk);
875
+ }
876
+ return true;
877
+ }
878
+ }
879
+
880
+ registerProcessor('audio_processor', AudioProcessor);
881
+ `;
882
+
883
+ const script = new Blob([AudioProcessorWorklet], {
884
+ type: 'application/javascript',
885
+ });
886
+ const src = URL.createObjectURL(script);
887
+ const AudioProcessorSrc = src;
888
+
889
+ /**
890
+ * Decodes audio into a wav file
891
+ * @typedef {Object} DecodedAudioType
892
+ * @property {Blob} blob
893
+ * @property {string} url
894
+ * @property {Float32Array} values
895
+ * @property {AudioBuffer} audioBuffer
896
+ */
897
+
898
+ /**
899
+ * Records live stream of user audio as PCM16 "audio/wav" data
900
+ * @class
901
+ */
902
+ class WavRecorder {
903
+ /**
904
+ * Create a new WavRecorder instance
905
+ * @param {{sampleRate?: number, outputToSpeakers?: boolean, debug?: boolean}} [options]
906
+ * @returns {WavRecorder}
907
+ */
908
+ constructor({
909
+ sampleRate = 24000,
910
+ outputToSpeakers = false,
911
+ debug = false,
912
+ } = {}) {
913
+ // Script source
914
+ this.scriptSrc = AudioProcessorSrc;
915
+ // Config
916
+ this.sampleRate = sampleRate;
917
+ this.outputToSpeakers = outputToSpeakers;
918
+ this.debug = !!debug;
919
+ this._deviceChangeCallback = null;
920
+ this._devices = [];
921
+ // State variables
922
+ this.stream = null;
923
+ this.processor = null;
924
+ this.source = null;
925
+ this.node = null;
926
+ this.recording = false;
927
+ // Event handling with AudioWorklet
928
+ this._lastEventId = 0;
929
+ this.eventReceipts = {};
930
+ this.eventTimeout = 5000;
931
+ // Process chunks of audio
932
+ this._chunkProcessor = () => {};
933
+ this._chunkProcessorSize = void 0;
934
+ this._chunkProcessorBuffer = {
935
+ raw: new ArrayBuffer(0),
936
+ mono: new ArrayBuffer(0),
937
+ };
938
+ }
939
+
940
+ /**
941
+ * Decodes audio data from multiple formats to a Blob, url, Float32Array and AudioBuffer
942
+ * @param {Blob|Float32Array|Int16Array|ArrayBuffer|number[]} audioData
943
+ * @param {number} sampleRate
944
+ * @param {number} fromSampleRate
945
+ * @returns {Promise<DecodedAudioType>}
946
+ */
947
+ static async decode(audioData, sampleRate = 24000, fromSampleRate = -1) {
948
+ const context = new AudioContext({ sampleRate });
949
+ let arrayBuffer;
950
+ let blob;
951
+ if (audioData instanceof Blob) {
952
+ if (fromSampleRate !== -1) {
953
+ throw new Error(
954
+ `Can not specify "fromSampleRate" when reading from Blob`,
955
+ );
956
+ }
957
+ blob = audioData;
958
+ arrayBuffer = await blob.arrayBuffer();
959
+ } else if (audioData instanceof ArrayBuffer) {
960
+ if (fromSampleRate !== -1) {
961
+ throw new Error(
962
+ `Can not specify "fromSampleRate" when reading from ArrayBuffer`,
963
+ );
964
+ }
965
+ arrayBuffer = audioData;
966
+ blob = new Blob([arrayBuffer], { type: 'audio/wav' });
967
+ } else {
968
+ let float32Array;
969
+ let data;
970
+ if (audioData instanceof Int16Array) {
971
+ data = audioData;
972
+ float32Array = new Float32Array(audioData.length);
973
+ for (let i = 0; i < audioData.length; i++) {
974
+ float32Array[i] = audioData[i] / 0x8000;
975
+ }
976
+ } else if (audioData instanceof Float32Array) {
977
+ float32Array = audioData;
978
+ } else if (audioData instanceof Array) {
979
+ float32Array = new Float32Array(audioData);
980
+ } else {
981
+ throw new Error(
982
+ `"audioData" must be one of: Blob, Float32Arrray, Int16Array, ArrayBuffer, Array<number>`,
983
+ );
984
+ }
985
+ if (fromSampleRate === -1) {
986
+ throw new Error(
987
+ `Must specify "fromSampleRate" when reading from Float32Array, In16Array or Array`,
988
+ );
989
+ } else if (fromSampleRate < 3000) {
990
+ throw new Error(`Minimum "fromSampleRate" is 3000 (3kHz)`);
991
+ }
992
+ if (!data) {
993
+ data = WavPacker.floatTo16BitPCM(float32Array);
994
+ }
995
+ const audio = {
996
+ bitsPerSample: 16,
997
+ channels: [float32Array],
998
+ data,
999
+ };
1000
+ const packer = new WavPacker();
1001
+ const result = packer.pack(fromSampleRate, audio);
1002
+ blob = result.blob;
1003
+ arrayBuffer = await blob.arrayBuffer();
1004
+ }
1005
+ const audioBuffer = await context.decodeAudioData(arrayBuffer);
1006
+ const values = audioBuffer.getChannelData(0);
1007
+ const url = URL.createObjectURL(blob);
1008
+ return {
1009
+ blob,
1010
+ url,
1011
+ values,
1012
+ audioBuffer,
1013
+ };
1014
+ }
1015
+
1016
+ /**
1017
+ * Logs data in debug mode
1018
+ * @param {...any} arguments
1019
+ * @returns {true}
1020
+ */
1021
+ log() {
1022
+ if (this.debug) {
1023
+ this.log(...arguments);
1024
+ }
1025
+ return true;
1026
+ }
1027
+
1028
+ /**
1029
+ * Retrieves the current sampleRate for the recorder
1030
+ * @returns {number}
1031
+ */
1032
+ getSampleRate() {
1033
+ return this.sampleRate;
1034
+ }
1035
+
1036
+ /**
1037
+ * Retrieves the current status of the recording
1038
+ * @returns {"ended"|"paused"|"recording"}
1039
+ */
1040
+ getStatus() {
1041
+ if (!this.processor) {
1042
+ return 'ended';
1043
+ } else if (!this.recording) {
1044
+ return 'paused';
1045
+ } else {
1046
+ return 'recording';
1047
+ }
1048
+ }
1049
+
1050
+ /**
1051
+ * Sends an event to the AudioWorklet
1052
+ * @private
1053
+ * @param {string} name
1054
+ * @param {{[key: string]: any}} data
1055
+ * @param {AudioWorkletNode} [_processor]
1056
+ * @returns {Promise<{[key: string]: any}>}
1057
+ */
1058
+ async _event(name, data = {}, _processor = null) {
1059
+ _processor = _processor || this.processor;
1060
+ if (!_processor) {
1061
+ throw new Error('Can not send events without recording first');
1062
+ }
1063
+ const message = {
1064
+ event: name,
1065
+ id: this._lastEventId++,
1066
+ data,
1067
+ };
1068
+ _processor.port.postMessage(message);
1069
+ const t0 = new Date().valueOf();
1070
+ while (!this.eventReceipts[message.id]) {
1071
+ if (new Date().valueOf() - t0 > this.eventTimeout) {
1072
+ throw new Error(`Timeout waiting for "${name}" event`);
1073
+ }
1074
+ await new Promise((res) => setTimeout(() => res(true), 1));
1075
+ }
1076
+ const payload = this.eventReceipts[message.id];
1077
+ delete this.eventReceipts[message.id];
1078
+ return payload;
1079
+ }
1080
+
1081
+ /**
1082
+ * Sets device change callback, remove if callback provided is `null`
1083
+ * @param {(Array<MediaDeviceInfo & {default: boolean}>): void|null} callback
1084
+ * @returns {true}
1085
+ */
1086
+ listenForDeviceChange(callback) {
1087
+ if (callback === null && this._deviceChangeCallback) {
1088
+ navigator.mediaDevices.removeEventListener(
1089
+ 'devicechange',
1090
+ this._deviceChangeCallback,
1091
+ );
1092
+ this._deviceChangeCallback = null;
1093
+ } else if (callback !== null) {
1094
+ // Basically a debounce; we only want this called once when devices change
1095
+ // And we only want the most recent callback() to be executed
1096
+ // if a few are operating at the same time
1097
+ let lastId = 0;
1098
+ let lastDevices = [];
1099
+ const serializeDevices = (devices) =>
1100
+ devices
1101
+ .map((d) => d.deviceId)
1102
+ .sort()
1103
+ .join(',');
1104
+ const cb = async () => {
1105
+ let id = ++lastId;
1106
+ const devices = await this.listDevices();
1107
+ if (id === lastId) {
1108
+ if (serializeDevices(lastDevices) !== serializeDevices(devices)) {
1109
+ lastDevices = devices;
1110
+ callback(devices.slice());
1111
+ }
1112
+ }
1113
+ };
1114
+ navigator.mediaDevices.addEventListener('devicechange', cb);
1115
+ cb();
1116
+ this._deviceChangeCallback = cb;
1117
+ }
1118
+ return true;
1119
+ }
1120
+
1121
+ /**
1122
+ * Manually request permission to use the microphone
1123
+ * @returns {Promise<true>}
1124
+ */
1125
+ async requestPermission() {
1126
+ const permissionStatus = await navigator.permissions.query({
1127
+ name: 'microphone',
1128
+ });
1129
+ if (permissionStatus.state === 'denied') {
1130
+ window.alert('You must grant microphone access to use this feature.');
1131
+ } else if (permissionStatus.state === 'prompt') {
1132
+ try {
1133
+ const stream = await navigator.mediaDevices.getUserMedia({
1134
+ audio: true,
1135
+ });
1136
+ const tracks = stream.getTracks();
1137
+ tracks.forEach((track) => track.stop());
1138
+ } catch (e) {
1139
+ window.alert('You must grant microphone access to use this feature.');
1140
+ }
1141
+ }
1142
+ return true;
1143
+ }
1144
+
1145
+ /**
1146
+ * List all eligible devices for recording, will request permission to use microphone
1147
+ * @returns {Promise<Array<MediaDeviceInfo & {default: boolean}>>}
1148
+ */
1149
+ async listDevices() {
1150
+ if (
1151
+ !navigator.mediaDevices ||
1152
+ !('enumerateDevices' in navigator.mediaDevices)
1153
+ ) {
1154
+ throw new Error('Could not request user devices');
1155
+ }
1156
+ await this.requestPermission();
1157
+ const devices = await navigator.mediaDevices.enumerateDevices();
1158
+ const audioDevices = devices.filter(
1159
+ (device) => device.kind === 'audioinput',
1160
+ );
1161
+ const defaultDeviceIndex = audioDevices.findIndex(
1162
+ (device) => device.deviceId === 'default',
1163
+ );
1164
+ const deviceList = [];
1165
+ if (defaultDeviceIndex !== -1) {
1166
+ let defaultDevice = audioDevices.splice(defaultDeviceIndex, 1)[0];
1167
+ let existingIndex = audioDevices.findIndex(
1168
+ (device) => device.groupId === defaultDevice.groupId,
1169
+ );
1170
+ if (existingIndex !== -1) {
1171
+ defaultDevice = audioDevices.splice(existingIndex, 1)[0];
1172
+ }
1173
+ defaultDevice.default = true;
1174
+ deviceList.push(defaultDevice);
1175
+ }
1176
+ return deviceList.concat(audioDevices);
1177
+ }
1178
+
1179
+ /**
1180
+ * Begins a recording session and requests microphone permissions if not already granted
1181
+ * Microphone recording indicator will appear on browser tab but status will be "paused"
1182
+ * @param {string} [deviceId] if no device provided, default device will be used
1183
+ * @returns {Promise<true>}
1184
+ */
1185
+ async begin(deviceId) {
1186
+ if (this.processor) {
1187
+ throw new Error(
1188
+ `Already connected: please call .end() to start a new session`,
1189
+ );
1190
+ }
1191
+
1192
+ if (
1193
+ !navigator.mediaDevices ||
1194
+ !('getUserMedia' in navigator.mediaDevices)
1195
+ ) {
1196
+ throw new Error('Could not request user media');
1197
+ }
1198
+ try {
1199
+ const config = { audio: true };
1200
+ if (deviceId) {
1201
+ config.audio = { deviceId: { exact: deviceId } };
1202
+ }
1203
+ this.stream = await navigator.mediaDevices.getUserMedia(config);
1204
+ } catch (err) {
1205
+ throw new Error('Could not start media stream');
1206
+ }
1207
+
1208
+ const context = new AudioContext({ sampleRate: this.sampleRate });
1209
+ const source = context.createMediaStreamSource(this.stream);
1210
+ // Load and execute the module script.
1211
+ try {
1212
+ await context.audioWorklet.addModule(this.scriptSrc);
1213
+ } catch (e) {
1214
+ console.error(e);
1215
+ throw new Error(`Could not add audioWorklet module: ${this.scriptSrc}`);
1216
+ }
1217
+ const processor = new AudioWorkletNode(context, 'audio_processor');
1218
+ processor.port.onmessage = (e) => {
1219
+ const { event, id, data } = e.data;
1220
+ if (event === 'receipt') {
1221
+ this.eventReceipts[id] = data;
1222
+ } else if (event === 'chunk') {
1223
+ if (this._chunkProcessorSize) {
1224
+ const buffer = this._chunkProcessorBuffer;
1225
+ this._chunkProcessorBuffer = {
1226
+ raw: WavPacker.mergeBuffers(buffer.raw, data.raw),
1227
+ mono: WavPacker.mergeBuffers(buffer.mono, data.mono),
1228
+ };
1229
+ if (
1230
+ this._chunkProcessorBuffer.mono.byteLength >=
1231
+ this._chunkProcessorSize
1232
+ ) {
1233
+ this._chunkProcessor(this._chunkProcessorBuffer);
1234
+ this._chunkProcessorBuffer = {
1235
+ raw: new ArrayBuffer(0),
1236
+ mono: new ArrayBuffer(0),
1237
+ };
1238
+ }
1239
+ } else {
1240
+ this._chunkProcessor(data);
1241
+ }
1242
+ }
1243
+ };
1244
+
1245
+ const node = source.connect(processor);
1246
+ const analyser = context.createAnalyser();
1247
+ analyser.fftSize = 8192;
1248
+ analyser.smoothingTimeConstant = 0.1;
1249
+ node.connect(analyser);
1250
+ if (this.outputToSpeakers) {
1251
+ // eslint-disable-next-line no-console
1252
+ console.warn(
1253
+ 'Warning: Output to speakers may affect sound quality,\n' +
1254
+ 'especially due to system audio feedback preventative measures.\n' +
1255
+ 'use only for debugging',
1256
+ );
1257
+ analyser.connect(context.destination);
1258
+ }
1259
+
1260
+ this.source = source;
1261
+ this.node = node;
1262
+ this.analyser = analyser;
1263
+ this.processor = processor;
1264
+ return true;
1265
+ }
1266
+
1267
+ /**
1268
+ * Gets the current frequency domain data from the recording track
1269
+ * @param {"frequency"|"music"|"voice"} [analysisType]
1270
+ * @param {number} [minDecibels] default -100
1271
+ * @param {number} [maxDecibels] default -30
1272
+ * @returns {import('./analysis/audio_analysis.js').AudioAnalysisOutputType}
1273
+ */
1274
+ getFrequencies(
1275
+ analysisType = 'frequency',
1276
+ minDecibels = -100,
1277
+ maxDecibels = -30,
1278
+ ) {
1279
+ if (!this.processor) {
1280
+ throw new Error('Session ended: please call .begin() first');
1281
+ }
1282
+ return AudioAnalysis.getFrequencies(
1283
+ this.analyser,
1284
+ this.sampleRate,
1285
+ null,
1286
+ analysisType,
1287
+ minDecibels,
1288
+ maxDecibels,
1289
+ );
1290
+ }
1291
+
1292
+
1293
+ /**
1294
+ * Gets the real-time amplitude of the audio signal
1295
+ * @returns {number} Amplitude value between 0 and 1
1296
+ */
1297
+ getAmplitude() {
1298
+ if (!this.analyser) {
1299
+ throw new Error('AnalyserNode is not initialized. Please call connect() first.');
1300
+ }
1301
+
1302
+ const bufferLength = this.analyser.fftSize;
1303
+ const dataArray = new Uint8Array(bufferLength);
1304
+ this.analyser.getByteTimeDomainData(dataArray);
1305
+
1306
+ // Calculate RMS (Root Mean Square) to get amplitude
1307
+ let sumSquares = 0;
1308
+ for (let i = 0; i < bufferLength; i++) {
1309
+ const normalized = (dataArray[i] - 128) / 128; // Normalize between -1 and 1
1310
+ sumSquares += normalized * normalized;
1311
+ }
1312
+ const rms = Math.sqrt(sumSquares / bufferLength);
1313
+ return rms;
1314
+ }
1315
+
1316
+ /**
1317
+ * Starts amplitude monitoring
1318
+ * @param {function} callback - Function to call with amplitude value
1319
+ */
1320
+ startAmplitudeMonitoring(callback) {
1321
+ const monitor = () => {
1322
+ const amplitude = this.getAmplitude();
1323
+ callback(amplitude);
1324
+ requestAnimationFrame(monitor);
1325
+ };
1326
+ monitor();
1327
+ }
1328
+
1329
+ /**
1330
+ * Pauses the recording
1331
+ * Keeps microphone stream open but halts storage of audio
1332
+ * @returns {Promise<true>}
1333
+ */
1334
+ async pause() {
1335
+ if (!this.processor) {
1336
+ throw new Error('Session ended: please call .begin() first');
1337
+ } else if (!this.recording) {
1338
+ throw new Error('Already paused: please call .record() first');
1339
+ }
1340
+ if (this._chunkProcessorBuffer.raw.byteLength) {
1341
+ this._chunkProcessor(this._chunkProcessorBuffer);
1342
+ }
1343
+ this.log('Pausing ...');
1344
+ await this._event('stop');
1345
+ this.recording = false;
1346
+ return true;
1347
+ }
1348
+
1349
+ /**
1350
+ * Start recording stream and storing to memory from the connected audio source
1351
+ * @param {(data: { mono: Int16Array; raw: Int16Array }) => any} [chunkProcessor]
1352
+ * @param {number} [chunkSize] chunkProcessor will not be triggered until this size threshold met in mono audio
1353
+ * @returns {Promise<true>}
1354
+ */
1355
+ async record(chunkProcessor = () => {}, chunkSize = 8192) {
1356
+ if (!this.processor) {
1357
+ throw new Error('Session ended: please call .begin() first');
1358
+ } else if (this.recording) {
1359
+ throw new Error('Already recording: please call .pause() first');
1360
+ } else if (typeof chunkProcessor !== 'function') {
1361
+ throw new Error(`chunkProcessor must be a function`);
1362
+ }
1363
+ this._chunkProcessor = chunkProcessor;
1364
+ this._chunkProcessorSize = chunkSize;
1365
+ this._chunkProcessorBuffer = {
1366
+ raw: new ArrayBuffer(0),
1367
+ mono: new ArrayBuffer(0),
1368
+ };
1369
+ this.log('Recording ...');
1370
+ await this._event('start');
1371
+ this.recording = true;
1372
+ return true;
1373
+ }
1374
+
1375
+ /**
1376
+ * Clears the audio buffer, empties stored recording
1377
+ * @returns {Promise<true>}
1378
+ */
1379
+ async clear() {
1380
+ if (!this.processor) {
1381
+ throw new Error('Session ended: please call .begin() first');
1382
+ }
1383
+ await this._event('clear');
1384
+ return true;
1385
+ }
1386
+
1387
+ /**
1388
+ * Reads the current audio stream data
1389
+ * @returns {Promise<{meanValues: Float32Array, channels: Array<Float32Array>}>}
1390
+ */
1391
+ async read() {
1392
+ if (!this.processor) {
1393
+ throw new Error('Session ended: please call .begin() first');
1394
+ }
1395
+ this.log('Reading ...');
1396
+ const result = await this._event('read');
1397
+ return result;
1398
+ }
1399
+
1400
+ /**
1401
+ * Saves the current audio stream to a file
1402
+ * @param {boolean} [force] Force saving while still recording
1403
+ * @returns {Promise<import('./wav_packer.js').WavPackerAudioType>}
1404
+ */
1405
+ async save(force = false) {
1406
+ if (!this.processor) {
1407
+ throw new Error('Session ended: please call .begin() first');
1408
+ }
1409
+ if (!force && this.recording) {
1410
+ throw new Error(
1411
+ 'Currently recording: please call .pause() first, or call .save(true) to force',
1412
+ );
1413
+ }
1414
+ this.log('Exporting ...');
1415
+ const exportData = await this._event('export');
1416
+ const packer = new WavPacker();
1417
+ const result = packer.pack(this.sampleRate, exportData.audio);
1418
+ return result;
1419
+ }
1420
+
1421
+ /**
1422
+ * Ends the current recording session and saves the result
1423
+ * @returns {Promise<import('./wav_packer.js').WavPackerAudioType>}
1424
+ */
1425
+ async end() {
1426
+ if (!this.processor) {
1427
+ throw new Error('Session ended: please call .begin() first');
1428
+ }
1429
+
1430
+ const _processor = this.processor;
1431
+
1432
+ this.log('Stopping ...');
1433
+ await this._event('stop');
1434
+ this.recording = false;
1435
+ const tracks = this.stream.getTracks();
1436
+ tracks.forEach((track) => track.stop());
1437
+
1438
+ this.log('Exporting ...');
1439
+ const exportData = await this._event('export', {}, _processor);
1440
+
1441
+ this.processor.disconnect();
1442
+ this.source.disconnect();
1443
+ this.node.disconnect();
1444
+ this.analyser.disconnect();
1445
+ this.stream = null;
1446
+ this.processor = null;
1447
+ this.source = null;
1448
+ this.node = null;
1449
+
1450
+ const packer = new WavPacker();
1451
+ const result = packer.pack(this.sampleRate, exportData.audio);
1452
+ return result;
1453
+ }
1454
+
1455
+ /**
1456
+ * Performs a full cleanup of WavRecorder instance
1457
+ * Stops actively listening via microphone and removes existing listeners
1458
+ * @returns {Promise<true>}
1459
+ */
1460
+ async quit() {
1461
+ this.listenForDeviceChange(null);
1462
+ if (this.processor) {
1463
+ await this.end();
1464
+ }
1465
+ return true;
1466
+ }
1467
+ }
1468
+
1469
+ globalThis.WavRecorder = WavRecorder;
1470
+
1471
+ /**
1472
+ * Converts a base64 string to an ArrayBuffer.
1473
+ * @param {string} base64 - The base64 string to convert.
1474
+ * @returns {ArrayBuffer} The resulting ArrayBuffer.
1475
+ */
1476
+ function base64ToArrayBuffer(base64) {
1477
+ const binaryString = atob(base64);
1478
+ const len = binaryString.length;
1479
+ const bytes = new Uint8Array(len);
1480
+ for (let i = 0; i < len; i++) {
1481
+ bytes[i] = binaryString.charCodeAt(i);
1482
+ }
1483
+ return bytes.buffer;
1484
+ }
1485
+
1486
+ /**
1487
+ * Converts an ArrayBuffer to a base64 string.
1488
+ * @param {ArrayBuffer|Float32Array|Int16Array} arrayBuffer - The ArrayBuffer to convert.
1489
+ * @returns {string} The resulting base64 string.
1490
+ */
1491
+ function arrayBufferToBase64(arrayBuffer) {
1492
+ if (arrayBuffer instanceof Float32Array) {
1493
+ arrayBuffer = this.floatTo16BitPCM(arrayBuffer);
1494
+ } else if (arrayBuffer instanceof Int16Array) {
1495
+ arrayBuffer = arrayBuffer.buffer;
1496
+ }
1497
+ let binary = '';
1498
+ let bytes = new Uint8Array(arrayBuffer);
1499
+ const chunkSize = 0x8000; // 32KB chunk size
1500
+ for (let i = 0; i < bytes.length; i += chunkSize) {
1501
+ let chunk = bytes.subarray(i, i + chunkSize);
1502
+ binary += String.fromCharCode.apply(null, chunk);
1503
+ }
1504
+ return btoa(binary);
1505
+ }
1506
+
1507
+ /* eslint-env browser */
1508
+ /**
1509
+ * @class LayercodeClient
1510
+ * @classdesc Core client for Layercode audio pipeline that manages audio recording, WebSocket communication, and speech processing.
1511
+ */
1512
+ class LayercodeClient {
1513
+ // private currentTurnId: string | null = null;
1514
+ // private lastDeltaIdPlayed: string | null = null;
1515
+ /**
1516
+ * Creates an instance of LayercodeClient.
1517
+ * @param {Object} options - Configuration options
1518
+ */
1519
+ constructor(options) {
1520
+ this.options = {
1521
+ pipelineId: options.pipelineId,
1522
+ sessionId: options.sessionId || null,
1523
+ authorizeSessionEndpoint: options.authorizeSessionEndpoint,
1524
+ metadata: options.metadata || {},
1525
+ onConnect: options.onConnect || (() => { }),
1526
+ onDisconnect: options.onDisconnect || (() => { }),
1527
+ onError: options.onError || (() => { }),
1528
+ onDataMessage: options.onDataMessage || (() => { }),
1529
+ onUserAmplitudeChange: options.onUserAmplitudeChange || (() => { }),
1530
+ onAgentAmplitudeChange: options.onAgentAmplitudeChange || (() => { }),
1531
+ onStatusChange: options.onStatusChange || (() => { }),
1532
+ };
1533
+ this.AMPLITUDE_MONITORING_SAMPLE_RATE = 10;
1534
+ this.websocketUrl = 'wss://api.layercode.com/v1/pipelines/websocket';
1535
+ this.wavRecorder = new WavRecorder({ sampleRate: 8000 }); // TODO should be set my fetched pipeline config
1536
+ this.wavPlayer = new WavStreamPlayer({
1537
+ finishedPlayingCallback: this._clientResponseAudioReplayFinished.bind(this),
1538
+ sampleRate: 16000, // TODO should be set my fetched pipeline config
1539
+ });
1540
+ this.ws = null;
1541
+ this.status = 'disconnected';
1542
+ this.userAudioAmplitude = 0;
1543
+ this.agentAudioAmplitude = 0;
1544
+ this.assistantIsSpeaking = false;
1545
+ this.assistantHasBeenInterrupted = false;
1546
+ this.sessionId = options.sessionId || null;
1547
+ this.pushToTalkActive = false;
1548
+ // this.currentTurnId = null; // TODO implement
1549
+ // this.lastDeltaIdPlayed = null; // TODO implement
1550
+ // Bind event handlers
1551
+ this._handleWebSocketMessage = this._handleWebSocketMessage.bind(this);
1552
+ this._handleDataAvailable = this._handleDataAvailable.bind(this);
1553
+ }
1554
+ /**
1555
+ * Updates the connection status and triggers the callback
1556
+ * @param {string} status - New status value
1557
+ * @private
1558
+ */
1559
+ _setStatus(status) {
1560
+ this.status = status;
1561
+ this.options.onStatusChange(status);
1562
+ }
1563
+ /**
1564
+ * Handles when agent audio finishes playing
1565
+ * @private
1566
+ */
1567
+ _clientResponseAudioReplayFinished() {
1568
+ this.assistantIsSpeaking = false;
1569
+ this._wsSend({
1570
+ type: 'trigger.response.audio.replay_finished',
1571
+ reason: 'completed',
1572
+ // last_delta_id_played: this.lastDeltaIdPlayed, // TODO implement
1573
+ // turn_id: this.currentTurnId, // TODO implement
1574
+ });
1575
+ }
1576
+ async _setupWavPlayer() {
1577
+ this.wavPlayer = new WavStreamPlayer({
1578
+ finishedPlayingCallback: this._clientResponseAudioReplayFinished.bind(this),
1579
+ sampleRate: 16000, // TODO should be set my fetched pipeline config
1580
+ });
1581
+ await this.wavPlayer.connect();
1582
+ // Set up amplitude monitoring only if callbacks are provided
1583
+ if (this.options.onAgentAmplitudeChange !== (() => { })) {
1584
+ let agentUpdateCounter = 0;
1585
+ this.wavPlayer.startAmplitudeMonitoring((amplitude) => {
1586
+ if (agentUpdateCounter == this.AMPLITUDE_MONITORING_SAMPLE_RATE) {
1587
+ this.agentAudioAmplitude = amplitude;
1588
+ this.options.onAgentAmplitudeChange(amplitude);
1589
+ agentUpdateCounter = 0; // Reset after each sample
1590
+ }
1591
+ agentUpdateCounter++;
1592
+ });
1593
+ }
1594
+ }
1595
+ async _clientInterruptAssistantReplay() {
1596
+ if (this.assistantIsSpeaking) {
1597
+ await this.wavPlayer.interrupt();
1598
+ this.wavPlayer.disconnect();
1599
+ await this._setupWavPlayer();
1600
+ console.log('interrupting assistant replay');
1601
+ console.log('setting assistantIsSpeaking to false');
1602
+ console.log('setting assistantHasBeenInterrupted to true');
1603
+ this.assistantIsSpeaking = false;
1604
+ this.assistantHasBeenInterrupted = true;
1605
+ this._wsSend({
1606
+ type: 'trigger.response.audio.replay_finished',
1607
+ reason: 'interrupted',
1608
+ // last_delta_id_played: this.lastDeltaIdPlayed, // TODO implement
1609
+ // turn_id: this.currentTurnId, // TODO implement
1610
+ });
1611
+ }
1612
+ }
1613
+ async triggerUserTurnStarted() {
1614
+ if (!this.pushToTalkActive) {
1615
+ this.pushToTalkActive = true;
1616
+ this._wsSend({ type: 'trigger.turn.start', role: 'user' });
1617
+ await this._clientInterruptAssistantReplay();
1618
+ }
1619
+ }
1620
+ async triggerUserTurnFinished() {
1621
+ if (this.pushToTalkActive) {
1622
+ this.pushToTalkActive = false;
1623
+ this._wsSend({ type: 'trigger.turn.end', role: 'user' });
1624
+ }
1625
+ }
1626
+ /**
1627
+ * Handles incoming WebSocket messages
1628
+ * @param {MessageEvent} event - The WebSocket message event
1629
+ * @private
1630
+ */
1631
+ async _handleWebSocketMessage(event) {
1632
+ try {
1633
+ const message = JSON.parse(event.data);
1634
+ if (message.type !== 'response.audio') {
1635
+ console.log('received ws msg:', message);
1636
+ }
1637
+ switch (message.type) {
1638
+ case 'turn.start':
1639
+ // Sent from the server to this client when a new user turn is detected. The client should interrupt any playing assistant audio and send a "trigger.response.audio.replay_finished" event to the server.
1640
+ if (message.role === 'user') {
1641
+ // Interrupt any playing assistant audio
1642
+ // await this._clientInterruptAssistantReplay(); // TODO work whether to call interrupt or not here
1643
+ }
1644
+ break;
1645
+ case 'response.audio':
1646
+ // console.log("received response.audio");
1647
+ if (!this.assistantHasBeenInterrupted) {
1648
+ // If the assistant has been interrupted, ignore the rest of the audio chunks for this response (turn)
1649
+ // TODO: scope audio chunks we ignore based on the turn_id
1650
+ if (!this.assistantIsSpeaking) {
1651
+ // If we have switch from assistant not speaking to now speaking, this is the start of the assistant's response
1652
+ this.assistantIsSpeaking = true;
1653
+ console.log('assistantIsSpeaking is currently false, received audio chunk, so setting to true');
1654
+ }
1655
+ console.log('adding audio chunk to wavPlayer');
1656
+ console.log('message.content length:', message.content.length);
1657
+ const audioBuffer = base64ToArrayBuffer(message.content);
1658
+ this.wavPlayer.add16BitPCM(audioBuffer, 'default');
1659
+ }
1660
+ else {
1661
+ console.log('ignoring response.audio because assistant has been interrupted');
1662
+ }
1663
+ break;
1664
+ case 'response.end':
1665
+ console.log('received response.end');
1666
+ console.log('setting assistantHasBeenInterrupted to false');
1667
+ this.assistantHasBeenInterrupted = false;
1668
+ break;
1669
+ case 'response.data':
1670
+ this.options.onDataMessage(message);
1671
+ break;
1672
+ default:
1673
+ console.error('Unknown message type received:', message);
1674
+ break;
1675
+ }
1676
+ }
1677
+ catch (error) {
1678
+ console.error('Error processing WebSocket message:', error);
1679
+ this.options.onError(error instanceof Error ? error : new Error(String(error)));
1680
+ }
1681
+ }
1682
+ /**
1683
+ * Handles available client browser microphone audio data and sends it over the WebSocket
1684
+ * @param {ArrayBuffer} data - The audio data buffer
1685
+ * @private
1686
+ */
1687
+ _handleDataAvailable(data) {
1688
+ try {
1689
+ const base64 = arrayBufferToBase64(data.mono);
1690
+ this._wsSend({ type: 'client.audio', content: base64 });
1691
+ }
1692
+ catch (error) {
1693
+ console.error('Error processing audio:', error);
1694
+ this.options.onError(error instanceof Error ? error : new Error(String(error)));
1695
+ }
1696
+ }
1697
+ _wsSend(message) {
1698
+ var _a;
1699
+ if (message.type !== 'client.audio') {
1700
+ console.log('sent ws msg:', message);
1701
+ }
1702
+ const messageString = JSON.stringify(message);
1703
+ if (((_a = this.ws) === null || _a === void 0 ? void 0 : _a.readyState) === WebSocket.OPEN) {
1704
+ this.ws.send(messageString);
1705
+ }
1706
+ }
1707
+ /**
1708
+ * Connects to the Layercode pipeline and starts the audio session
1709
+ * @async
1710
+ * @returns {Promise<void>}
1711
+ */
1712
+ async connect() {
1713
+ try {
1714
+ this._setStatus('connecting');
1715
+ // Get session key from server
1716
+ let authorizeSessionRequestBody = {
1717
+ pipeline_id: this.options.pipelineId,
1718
+ metadata: this.options.metadata,
1719
+ };
1720
+ // If we're reconnecting to a previous session, we need to include the session_id in the request. Otherwise we don't send session_id, and a new session will be created and the session_id will be returned in the response.
1721
+ if (this.options.sessionId) {
1722
+ authorizeSessionRequestBody.session_id = this.options.sessionId;
1723
+ }
1724
+ const authorizeSessionResponse = await fetch(this.options.authorizeSessionEndpoint, {
1725
+ method: 'POST',
1726
+ headers: {
1727
+ 'Content-Type': 'application/json',
1728
+ },
1729
+ body: JSON.stringify(authorizeSessionRequestBody),
1730
+ });
1731
+ if (!authorizeSessionResponse.ok) {
1732
+ throw new Error(`Failed to authorize session: ${authorizeSessionResponse.statusText}`);
1733
+ }
1734
+ const authorizeSessionResponseBody = await authorizeSessionResponse.json();
1735
+ this.sessionId = authorizeSessionResponseBody.session_id; // Save the session_id for use in future reconnects
1736
+ // Connect WebSocket
1737
+ this.ws = new WebSocket(`${this.websocketUrl}?${new URLSearchParams({
1738
+ client_session_key: authorizeSessionResponseBody.client_session_key,
1739
+ })}`);
1740
+ this.ws.onopen = () => {
1741
+ console.log('WebSocket connection established');
1742
+ this._setStatus('connected');
1743
+ this.options.onConnect({ sessionId: this.sessionId });
1744
+ };
1745
+ this.ws.onclose = () => {
1746
+ console.log('WebSocket connection closed');
1747
+ this._setStatus('disconnected');
1748
+ this.options.onDisconnect();
1749
+ };
1750
+ this.ws.onerror = (error) => {
1751
+ console.error('WebSocket error:', error);
1752
+ this._setStatus('error');
1753
+ this.options.onError(new Error('WebSocket connection error'));
1754
+ };
1755
+ this.ws.onmessage = this._handleWebSocketMessage;
1756
+ // Initialize audio
1757
+ await this.wavRecorder.begin();
1758
+ await this.wavRecorder.record(this._handleDataAvailable);
1759
+ if (this.options.onUserAmplitudeChange !== (() => { })) {
1760
+ let userUpdateCounter = 0;
1761
+ this.wavRecorder.startAmplitudeMonitoring((amplitude) => {
1762
+ if (userUpdateCounter == this.AMPLITUDE_MONITORING_SAMPLE_RATE) {
1763
+ this.userAudioAmplitude = amplitude;
1764
+ this.options.onUserAmplitudeChange(amplitude);
1765
+ userUpdateCounter = 0; // Reset after each sample
1766
+ }
1767
+ userUpdateCounter++;
1768
+ });
1769
+ }
1770
+ await this._setupWavPlayer();
1771
+ // Handle page unload
1772
+ window.addEventListener('beforeunload', () => {
1773
+ this.disconnect();
1774
+ });
1775
+ }
1776
+ catch (error) {
1777
+ console.error('Error connecting to Layercode pipeline:', error);
1778
+ this._setStatus('error');
1779
+ this.options.onError(error instanceof Error ? error : new Error(String(error)));
1780
+ throw error;
1781
+ }
1782
+ }
1783
+ /**
1784
+ * Disconnects from the Layercode pipeline and stops audio recording
1785
+ */
1786
+ disconnect() {
1787
+ console.log('disconnecting');
1788
+ if (this.ws) {
1789
+ this.ws.close();
1790
+ this.ws = null;
1791
+ }
1792
+ // Stop recording user microphone audio
1793
+ this.wavRecorder.stop();
1794
+ // Handle wavPlayer cleanup without calling disconnect directly
1795
+ if (this.wavPlayer) {
1796
+ // Use type assertion to access internal properties
1797
+ const player = this.wavPlayer;
1798
+ // Clean up any audio resources manually
1799
+ if (player.stream) {
1800
+ player.stream.disconnect();
1801
+ player.stream = null;
1802
+ }
1803
+ if (player.analyser) {
1804
+ player.analyser.disconnect();
1805
+ }
1806
+ if (player.context) {
1807
+ player.context.close().catch((err) => console.error('Error closing audio context:', err));
1808
+ }
1809
+ }
1810
+ this._setStatus('disconnected');
1811
+ }
1812
+ /**
1813
+ * Mutes or unmutes the microphone
1814
+ * @param {boolean} mute - Whether to mute the microphone
1815
+ */
1816
+ setMuteMic(mute) {
1817
+ if (mute) {
1818
+ this.wavRecorder.mute();
1819
+ }
1820
+ else {
1821
+ this.wavRecorder.unmute();
1822
+ }
1823
+ }
1824
+ }
1825
+
1826
+ export { LayercodeClient as default };
1827
+ //# sourceMappingURL=layercode-js-sdk.esm.js.map