@layercode/js-sdk 1.0.10 → 1.0.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,1769 +0,0 @@
1
- (function (global, factory) {
2
- typeof exports === 'object' && typeof module !== 'undefined' ? module.exports = factory() :
3
- typeof define === 'function' && define.amd ? define(factory) :
4
- (global = typeof globalThis !== 'undefined' ? globalThis : global || self, global.LayercodeClient = factory());
5
- })(this, (function () { 'use strict';
6
-
7
- /**
8
- * Raw wav audio file contents
9
- * @typedef {Object} WavPackerAudioType
10
- * @property {Blob} blob
11
- * @property {string} url
12
- * @property {number} channelCount
13
- * @property {number} sampleRate
14
- * @property {number} duration
15
- */
16
-
17
- /**
18
- * Utility class for assembling PCM16 "audio/wav" data
19
- * @class
20
- */
21
- class WavPacker {
22
- /**
23
- * Converts Float32Array of amplitude data to ArrayBuffer in Int16Array format
24
- * @param {Float32Array} float32Array
25
- * @returns {ArrayBuffer}
26
- */
27
- static floatTo16BitPCM(float32Array) {
28
- const buffer = new ArrayBuffer(float32Array.length * 2);
29
- const view = new DataView(buffer);
30
- let offset = 0;
31
- for (let i = 0; i < float32Array.length; i++, offset += 2) {
32
- let s = Math.max(-1, Math.min(1, float32Array[i]));
33
- view.setInt16(offset, s < 0 ? s * 0x8000 : s * 0x7fff, true);
34
- }
35
- return buffer;
36
- }
37
-
38
- /**
39
- * Concatenates two ArrayBuffers
40
- * @param {ArrayBuffer} leftBuffer
41
- * @param {ArrayBuffer} rightBuffer
42
- * @returns {ArrayBuffer}
43
- */
44
- static mergeBuffers(leftBuffer, rightBuffer) {
45
- const tmpArray = new Uint8Array(
46
- leftBuffer.byteLength + rightBuffer.byteLength
47
- );
48
- tmpArray.set(new Uint8Array(leftBuffer), 0);
49
- tmpArray.set(new Uint8Array(rightBuffer), leftBuffer.byteLength);
50
- return tmpArray.buffer;
51
- }
52
-
53
- /**
54
- * Packs data into an Int16 format
55
- * @private
56
- * @param {number} size 0 = 1x Int16, 1 = 2x Int16
57
- * @param {number} arg value to pack
58
- * @returns
59
- */
60
- _packData(size, arg) {
61
- return [
62
- new Uint8Array([arg, arg >> 8]),
63
- new Uint8Array([arg, arg >> 8, arg >> 16, arg >> 24]),
64
- ][size];
65
- }
66
-
67
- /**
68
- * Packs audio into "audio/wav" Blob
69
- * @param {number} sampleRate
70
- * @param {{bitsPerSample: number, channels: Array<Float32Array>, data: Int16Array}} audio
71
- * @returns {WavPackerAudioType}
72
- */
73
- pack(sampleRate, audio) {
74
- if (!audio?.bitsPerSample) {
75
- throw new Error(`Missing "bitsPerSample"`);
76
- } else if (!audio?.channels) {
77
- throw new Error(`Missing "channels"`);
78
- } else if (!audio?.data) {
79
- throw new Error(`Missing "data"`);
80
- }
81
- const { bitsPerSample, channels, data } = audio;
82
- const output = [
83
- // Header
84
- 'RIFF',
85
- this._packData(
86
- 1,
87
- 4 + (8 + 24) /* chunk 1 length */ + (8 + 8) /* chunk 2 length */
88
- ), // Length
89
- 'WAVE',
90
- // chunk 1
91
- 'fmt ', // Sub-chunk identifier
92
- this._packData(1, 16), // Chunk length
93
- this._packData(0, 1), // Audio format (1 is linear quantization)
94
- this._packData(0, channels.length),
95
- this._packData(1, sampleRate),
96
- this._packData(1, (sampleRate * channels.length * bitsPerSample) / 8), // Byte rate
97
- this._packData(0, (channels.length * bitsPerSample) / 8),
98
- this._packData(0, bitsPerSample),
99
- // chunk 2
100
- 'data', // Sub-chunk identifier
101
- this._packData(
102
- 1,
103
- (channels[0].length * channels.length * bitsPerSample) / 8
104
- ), // Chunk length
105
- data,
106
- ];
107
- const blob = new Blob(output, { type: 'audio/mpeg' });
108
- const url = URL.createObjectURL(blob);
109
- return {
110
- blob,
111
- url,
112
- channelCount: channels.length,
113
- sampleRate,
114
- duration: data.byteLength / (channels.length * sampleRate * 2),
115
- };
116
- }
117
- }
118
-
119
- globalThis.WavPacker = WavPacker;
120
-
121
- /**
122
- * Constants for help with visualization
123
- * Helps map frequency ranges from Fast Fourier Transform
124
- * to human-interpretable ranges, notably music ranges and
125
- * human vocal ranges.
126
- */
127
-
128
- // Eighth octave frequencies
129
- const octave8Frequencies = [
130
- 4186.01, 4434.92, 4698.63, 4978.03, 5274.04, 5587.65, 5919.91, 6271.93,
131
- 6644.88, 7040.0, 7458.62, 7902.13,
132
- ];
133
-
134
- // Labels for each of the above frequencies
135
- const octave8FrequencyLabels = [
136
- 'C',
137
- 'C#',
138
- 'D',
139
- 'D#',
140
- 'E',
141
- 'F',
142
- 'F#',
143
- 'G',
144
- 'G#',
145
- 'A',
146
- 'A#',
147
- 'B',
148
- ];
149
-
150
- /**
151
- * All note frequencies from 1st to 8th octave
152
- * in format "A#8" (A#, 8th octave)
153
- */
154
- const noteFrequencies = [];
155
- const noteFrequencyLabels = [];
156
- for (let i = 1; i <= 8; i++) {
157
- for (let f = 0; f < octave8Frequencies.length; f++) {
158
- const freq = octave8Frequencies[f];
159
- noteFrequencies.push(freq / Math.pow(2, 8 - i));
160
- noteFrequencyLabels.push(octave8FrequencyLabels[f] + i);
161
- }
162
- }
163
-
164
- /**
165
- * Subset of the note frequencies between 32 and 2000 Hz
166
- * 6 octave range: C1 to B6
167
- */
168
- const voiceFrequencyRange = [32.0, 2000.0];
169
- const voiceFrequencies = noteFrequencies.filter((_, i) => {
170
- return (
171
- noteFrequencies[i] > voiceFrequencyRange[0] &&
172
- noteFrequencies[i] < voiceFrequencyRange[1]
173
- );
174
- });
175
- const voiceFrequencyLabels = noteFrequencyLabels.filter((_, i) => {
176
- return (
177
- noteFrequencies[i] > voiceFrequencyRange[0] &&
178
- noteFrequencies[i] < voiceFrequencyRange[1]
179
- );
180
- });
181
-
182
- /**
183
- * Output of AudioAnalysis for the frequency domain of the audio
184
- * @typedef {Object} AudioAnalysisOutputType
185
- * @property {Float32Array} values Amplitude of this frequency between {0, 1} inclusive
186
- * @property {number[]} frequencies Raw frequency bucket values
187
- * @property {string[]} labels Labels for the frequency bucket values
188
- */
189
-
190
- /**
191
- * Analyzes audio for visual output
192
- * @class
193
- */
194
- class AudioAnalysis {
195
- /**
196
- * Retrieves frequency domain data from an AnalyserNode adjusted to a decibel range
197
- * returns human-readable formatting and labels
198
- * @param {AnalyserNode} analyser
199
- * @param {number} sampleRate
200
- * @param {Float32Array} [fftResult]
201
- * @param {"frequency"|"music"|"voice"} [analysisType]
202
- * @param {number} [minDecibels] default -100
203
- * @param {number} [maxDecibels] default -30
204
- * @returns {AudioAnalysisOutputType}
205
- */
206
- static getFrequencies(
207
- analyser,
208
- sampleRate,
209
- fftResult,
210
- analysisType = 'frequency',
211
- minDecibels = -100,
212
- maxDecibels = -30,
213
- ) {
214
- if (!fftResult) {
215
- fftResult = new Float32Array(analyser.frequencyBinCount);
216
- analyser.getFloatFrequencyData(fftResult);
217
- }
218
- const nyquistFrequency = sampleRate / 2;
219
- const frequencyStep = (1 / fftResult.length) * nyquistFrequency;
220
- let outputValues;
221
- let frequencies;
222
- let labels;
223
- if (analysisType === 'music' || analysisType === 'voice') {
224
- const useFrequencies =
225
- analysisType === 'voice' ? voiceFrequencies : noteFrequencies;
226
- const aggregateOutput = Array(useFrequencies.length).fill(minDecibels);
227
- for (let i = 0; i < fftResult.length; i++) {
228
- const frequency = i * frequencyStep;
229
- const amplitude = fftResult[i];
230
- for (let n = useFrequencies.length - 1; n >= 0; n--) {
231
- if (frequency > useFrequencies[n]) {
232
- aggregateOutput[n] = Math.max(aggregateOutput[n], amplitude);
233
- break;
234
- }
235
- }
236
- }
237
- outputValues = aggregateOutput;
238
- frequencies =
239
- analysisType === 'voice' ? voiceFrequencies : noteFrequencies;
240
- labels =
241
- analysisType === 'voice' ? voiceFrequencyLabels : noteFrequencyLabels;
242
- } else {
243
- outputValues = Array.from(fftResult);
244
- frequencies = outputValues.map((_, i) => frequencyStep * i);
245
- labels = frequencies.map((f) => `${f.toFixed(2)} Hz`);
246
- }
247
- // We normalize to {0, 1}
248
- const normalizedOutput = outputValues.map((v) => {
249
- return Math.max(
250
- 0,
251
- Math.min((v - minDecibels) / (maxDecibels - minDecibels), 1),
252
- );
253
- });
254
- const values = new Float32Array(normalizedOutput);
255
- return {
256
- values,
257
- frequencies,
258
- labels,
259
- };
260
- }
261
-
262
- /**
263
- * Creates a new AudioAnalysis instance for an HTMLAudioElement
264
- * @param {HTMLAudioElement} audioElement
265
- * @param {AudioBuffer|null} [audioBuffer] If provided, will cache all frequency domain data from the buffer
266
- * @returns {AudioAnalysis}
267
- */
268
- constructor(audioElement, audioBuffer = null) {
269
- this.fftResults = [];
270
- if (audioBuffer) {
271
- /**
272
- * Modified from
273
- * https://stackoverflow.com/questions/75063715/using-the-web-audio-api-to-analyze-a-song-without-playing
274
- *
275
- * We do this to populate FFT values for the audio if provided an `audioBuffer`
276
- * The reason to do this is that Safari fails when using `createMediaElementSource`
277
- * This has a non-zero RAM cost so we only opt-in to run it on Safari, Chrome is better
278
- */
279
- const { length, sampleRate } = audioBuffer;
280
- const offlineAudioContext = new OfflineAudioContext({
281
- length,
282
- sampleRate,
283
- });
284
- const source = offlineAudioContext.createBufferSource();
285
- source.buffer = audioBuffer;
286
- const analyser = offlineAudioContext.createAnalyser();
287
- analyser.fftSize = 8192;
288
- analyser.smoothingTimeConstant = 0.1;
289
- source.connect(analyser);
290
- // limit is :: 128 / sampleRate;
291
- // but we just want 60fps - cuts ~1s from 6MB to 1MB of RAM
292
- const renderQuantumInSeconds = 1 / 60;
293
- const durationInSeconds = length / sampleRate;
294
- const analyze = (index) => {
295
- const suspendTime = renderQuantumInSeconds * index;
296
- if (suspendTime < durationInSeconds) {
297
- offlineAudioContext.suspend(suspendTime).then(() => {
298
- const fftResult = new Float32Array(analyser.frequencyBinCount);
299
- analyser.getFloatFrequencyData(fftResult);
300
- this.fftResults.push(fftResult);
301
- analyze(index + 1);
302
- });
303
- }
304
- if (index === 1) {
305
- offlineAudioContext.startRendering();
306
- } else {
307
- offlineAudioContext.resume();
308
- }
309
- };
310
- source.start(0);
311
- analyze(1);
312
- this.audio = audioElement;
313
- this.context = offlineAudioContext;
314
- this.analyser = analyser;
315
- this.sampleRate = sampleRate;
316
- this.audioBuffer = audioBuffer;
317
- } else {
318
- const audioContext = new AudioContext();
319
- const track = audioContext.createMediaElementSource(audioElement);
320
- const analyser = audioContext.createAnalyser();
321
- analyser.fftSize = 8192;
322
- analyser.smoothingTimeConstant = 0.1;
323
- track.connect(analyser);
324
- analyser.connect(audioContext.destination);
325
- this.audio = audioElement;
326
- this.context = audioContext;
327
- this.analyser = analyser;
328
- this.sampleRate = this.context.sampleRate;
329
- this.audioBuffer = null;
330
- }
331
- }
332
-
333
- /**
334
- * Gets the current frequency domain data from the playing audio track
335
- * @param {"frequency"|"music"|"voice"} [analysisType]
336
- * @param {number} [minDecibels] default -100
337
- * @param {number} [maxDecibels] default -30
338
- * @returns {AudioAnalysisOutputType}
339
- */
340
- getFrequencies(
341
- analysisType = 'frequency',
342
- minDecibels = -100,
343
- maxDecibels = -30,
344
- ) {
345
- let fftResult = null;
346
- if (this.audioBuffer && this.fftResults.length) {
347
- const pct = this.audio.currentTime / this.audio.duration;
348
- const index = Math.min(
349
- (pct * this.fftResults.length) | 0,
350
- this.fftResults.length - 1,
351
- );
352
- fftResult = this.fftResults[index];
353
- }
354
- return AudioAnalysis.getFrequencies(
355
- this.analyser,
356
- this.sampleRate,
357
- fftResult,
358
- analysisType,
359
- minDecibels,
360
- maxDecibels,
361
- );
362
- }
363
-
364
- /**
365
- * Resume the internal AudioContext if it was suspended due to the lack of
366
- * user interaction when the AudioAnalysis was instantiated.
367
- * @returns {Promise<true>}
368
- */
369
- async resumeIfSuspended() {
370
- if (this.context.state === 'suspended') {
371
- await this.context.resume();
372
- }
373
- return true;
374
- }
375
- }
376
-
377
- globalThis.AudioAnalysis = AudioAnalysis;
378
-
379
- const StreamProcessorWorklet = `
380
- class StreamProcessor extends AudioWorkletProcessor {
381
- constructor() {
382
- super();
383
- this.hasStarted = false;
384
- this.hasInterrupted = false;
385
- this.outputBuffers = [];
386
- this.bufferLength = 128;
387
- this.write = { buffer: new Float32Array(this.bufferLength), trackId: null };
388
- this.writeOffset = 0;
389
- this.trackSampleOffsets = {};
390
- this.port.onmessage = (event) => {
391
- if (event.data) {
392
- const payload = event.data;
393
- if (payload.event === 'write') {
394
- const int16Array = payload.buffer;
395
- const float32Array = new Float32Array(int16Array.length);
396
- for (let i = 0; i < int16Array.length; i++) {
397
- float32Array[i] = int16Array[i] / 0x8000; // Convert Int16 to Float32
398
- }
399
- this.writeData(float32Array, payload.trackId);
400
- } else if (
401
- payload.event === 'offset' ||
402
- payload.event === 'interrupt'
403
- ) {
404
- const requestId = payload.requestId;
405
- const trackId = this.write.trackId;
406
- const offset = this.trackSampleOffsets[trackId] || 0;
407
- this.port.postMessage({
408
- event: 'offset',
409
- requestId,
410
- trackId,
411
- offset,
412
- });
413
- if (payload.event === 'interrupt') {
414
- this.hasInterrupted = true;
415
- }
416
- } else {
417
- throw new Error(\`Unhandled event "\${payload.event}"\`);
418
- }
419
- }
420
- };
421
- }
422
-
423
- writeData(float32Array, trackId = null) {
424
- let { buffer } = this.write;
425
- let offset = this.writeOffset;
426
- for (let i = 0; i < float32Array.length; i++) {
427
- buffer[offset++] = float32Array[i];
428
- if (offset >= buffer.length) {
429
- this.outputBuffers.push(this.write);
430
- this.write = { buffer: new Float32Array(this.bufferLength), trackId };
431
- buffer = this.write.buffer;
432
- offset = 0;
433
- }
434
- }
435
- this.writeOffset = offset;
436
- return true;
437
- }
438
-
439
- process(inputs, outputs, parameters) {
440
- const output = outputs[0];
441
- const outputChannelData = output[0];
442
- const outputBuffers = this.outputBuffers;
443
- if (this.hasInterrupted) {
444
- this.port.postMessage({ event: 'stop' });
445
- return false;
446
- } else if (outputBuffers.length) {
447
- this.hasStarted = true;
448
- const { buffer, trackId } = outputBuffers.shift();
449
- for (let i = 0; i < outputChannelData.length; i++) {
450
- outputChannelData[i] = buffer[i] || 0;
451
- }
452
- if (trackId) {
453
- this.trackSampleOffsets[trackId] =
454
- this.trackSampleOffsets[trackId] || 0;
455
- this.trackSampleOffsets[trackId] += buffer.length;
456
- }
457
- return true;
458
- } else if (this.hasStarted) {
459
- this.port.postMessage({ event: 'stop' });
460
- return false;
461
- } else {
462
- return true;
463
- }
464
- }
465
- }
466
-
467
- registerProcessor('stream_processor', StreamProcessor);
468
- `;
469
-
470
- const script$1 = new Blob([StreamProcessorWorklet], {
471
- type: 'application/javascript',
472
- });
473
- const src$1 = URL.createObjectURL(script$1);
474
- const StreamProcessorSrc = src$1;
475
-
476
- /**
477
- * Plays audio streams received in raw PCM16 chunks from the browser
478
- * @class
479
- */
480
- class WavStreamPlayer {
481
- /**
482
- * Creates a new WavStreamPlayer instance
483
- * @param {{sampleRate?: number}} options
484
- * @returns {WavStreamPlayer}
485
- */
486
- constructor({ finishedPlayingCallback = () => {}, sampleRate = 24000 } = {}) {
487
- this.scriptSrc = StreamProcessorSrc;
488
- this.sampleRate = sampleRate;
489
- this.context = null;
490
- this.stream = null;
491
- this.analyser = null;
492
- this.trackSampleOffsets = {};
493
- this.interruptedTrackIds = {};
494
- this.finishedPlayingCallback = finishedPlayingCallback;
495
- }
496
-
497
- /**
498
- * Connects the audio context and enables output to speakers
499
- * @returns {Promise<true>}
500
- */
501
- async connect() {
502
- this.context = new AudioContext({ sampleRate: this.sampleRate });
503
- if (this.context.state === "suspended") {
504
- await this.context.resume();
505
- }
506
- try {
507
- await this.context.audioWorklet.addModule(this.scriptSrc);
508
- } catch (e) {
509
- console.error(e);
510
- throw new Error(`Could not add audioWorklet module: ${this.scriptSrc}`);
511
- }
512
- const analyser = this.context.createAnalyser();
513
- analyser.fftSize = 8192;
514
- analyser.smoothingTimeConstant = 0.1;
515
- this.analyser = analyser;
516
- return true;
517
- }
518
-
519
- /**
520
- * Gets the current frequency domain data from the playing track
521
- * @param {"frequency"|"music"|"voice"} [analysisType]
522
- * @param {number} [minDecibels] default -100
523
- * @param {number} [maxDecibels] default -30
524
- * @returns {import('./analysis/audio_analysis.js').AudioAnalysisOutputType}
525
- */
526
- getFrequencies(analysisType = "frequency", minDecibels = -100, maxDecibels = -30) {
527
- if (!this.analyser) {
528
- throw new Error("Not connected, please call .connect() first");
529
- }
530
- return AudioAnalysis.getFrequencies(this.analyser, this.sampleRate, null, analysisType, minDecibels, maxDecibels);
531
- }
532
-
533
- /**
534
- * Gets the real-time amplitude of the audio signal
535
- * @returns {number} Amplitude value between 0 and 1
536
- */
537
- getAmplitude() {
538
- if (!this.analyser) {
539
- throw new Error("AnalyserNode is not initialized. Please call connect() first.");
540
- }
541
-
542
- const bufferLength = this.analyser.fftSize;
543
- const dataArray = new Uint8Array(bufferLength);
544
- this.analyser.getByteTimeDomainData(dataArray);
545
-
546
- // Calculate RMS (Root Mean Square) to get amplitude
547
- let sumSquares = 0;
548
- for (let i = 0; i < bufferLength; i++) {
549
- const normalized = (dataArray[i] - 128) / 128; // Normalize between -1 and 1
550
- sumSquares += normalized * normalized;
551
- }
552
- const rms = Math.sqrt(sumSquares / bufferLength);
553
- return rms;
554
- }
555
-
556
- /**
557
- * Starts amplitude monitoring
558
- * @param {function} callback - Function to call with amplitude value
559
- */
560
- startAmplitudeMonitoring(callback) {
561
- const monitor = () => {
562
- const amplitude = this.getAmplitude();
563
- callback(amplitude);
564
- requestAnimationFrame(monitor);
565
- };
566
- monitor();
567
- }
568
-
569
- /**
570
- * Starts audio streaming
571
- * @private
572
- * @returns {Promise<true>}
573
- */
574
- _start() {
575
- const streamNode = new AudioWorkletNode(this.context, "stream_processor");
576
- streamNode.connect(this.context.destination);
577
- streamNode.port.onmessage = (e) => {
578
- const { event } = e.data;
579
- if (event === "stop") {
580
- streamNode.disconnect();
581
- this.stream = null;
582
- this.finishedPlayingCallback();
583
- } else if (event === "offset") {
584
- const { requestId, trackId, offset } = e.data;
585
- const currentTime = offset / this.sampleRate;
586
- this.trackSampleOffsets[requestId] = { trackId, offset, currentTime };
587
- }
588
- };
589
- this.analyser.disconnect();
590
- streamNode.connect(this.analyser);
591
- this.stream = streamNode;
592
- return true;
593
- }
594
-
595
- /**
596
- * Adds 16BitPCM data to the currently playing audio stream
597
- * You can add chunks beyond the current play point and they will be queued for play
598
- * @param {ArrayBuffer|Int16Array} arrayBuffer
599
- * @param {string} [trackId]
600
- * @returns {Int16Array}
601
- */
602
- add16BitPCM(arrayBuffer, trackId = "default") {
603
- if (typeof trackId !== "string") {
604
- throw new Error(`trackId must be a string`);
605
- } else if (this.interruptedTrackIds[trackId]) {
606
- return;
607
- }
608
- if (!this.stream) {
609
- this._start();
610
- }
611
- let buffer;
612
- if (arrayBuffer instanceof Int16Array) {
613
- buffer = arrayBuffer;
614
- } else if (arrayBuffer instanceof ArrayBuffer) {
615
- buffer = new Int16Array(arrayBuffer);
616
- } else {
617
- throw new Error(`argument must be Int16Array or ArrayBuffer`);
618
- }
619
- this.stream.port.postMessage({ event: "write", buffer, trackId });
620
- return buffer;
621
- }
622
-
623
- /**
624
- * Gets the offset (sample count) of the currently playing stream
625
- * @param {boolean} [interrupt]
626
- * @returns {{trackId: string|null, offset: number, currentTime: number}}
627
- */
628
- async getTrackSampleOffset(interrupt = false) {
629
- if (!this.stream) {
630
- return null;
631
- }
632
- const requestId = crypto.randomUUID();
633
- this.stream.port.postMessage({
634
- event: interrupt ? "interrupt" : "offset",
635
- requestId,
636
- });
637
- let trackSampleOffset;
638
- while (!trackSampleOffset) {
639
- trackSampleOffset = this.trackSampleOffsets[requestId];
640
- await new Promise((r) => setTimeout(() => r(), 1));
641
- }
642
- const { trackId } = trackSampleOffset;
643
- if (interrupt && trackId) {
644
- this.interruptedTrackIds[trackId] = true;
645
- }
646
- return trackSampleOffset;
647
- }
648
-
649
- /**
650
- * Strips the current stream and returns the sample offset of the audio
651
- * @param {boolean} [interrupt]
652
- * @returns {{trackId: string|null, offset: number, currentTime: number}}
653
- */
654
- async interrupt() {
655
- return this.getTrackSampleOffset(true);
656
- }
657
-
658
- /**
659
- * Disconnects the audio context and cleans up resources
660
- * @returns {void}
661
- */
662
- disconnect() {
663
- if (this.stream) {
664
- this.stream.disconnect();
665
- this.stream = null;
666
- }
667
-
668
- if (this.analyser) {
669
- this.analyser.disconnect();
670
- }
671
-
672
- if (this.context) {
673
- this.context.close().catch((err) => console.error("Error closing audio context:", err));
674
- }
675
- }
676
- }
677
-
678
- globalThis.WavStreamPlayer = WavStreamPlayer;
679
-
680
- const AudioProcessorWorklet = `
681
- class AudioProcessor extends AudioWorkletProcessor {
682
-
683
- constructor() {
684
- super();
685
- this.port.onmessage = this.receive.bind(this);
686
- this.initialize();
687
- }
688
-
689
- initialize() {
690
- this.foundAudio = false;
691
- this.recording = false;
692
- this.chunks = [];
693
- }
694
-
695
- /**
696
- * Concatenates sampled chunks into channels
697
- * Format is chunk[Left[], Right[]]
698
- */
699
- readChannelData(chunks, channel = -1, maxChannels = 9) {
700
- let channelLimit;
701
- if (channel !== -1) {
702
- if (chunks[0] && chunks[0].length - 1 < channel) {
703
- throw new Error(
704
- \`Channel \${channel} out of range: max \${chunks[0].length}\`
705
- );
706
- }
707
- channelLimit = channel + 1;
708
- } else {
709
- channel = 0;
710
- channelLimit = Math.min(chunks[0] ? chunks[0].length : 1, maxChannels);
711
- }
712
- const channels = [];
713
- for (let n = channel; n < channelLimit; n++) {
714
- const length = chunks.reduce((sum, chunk) => {
715
- return sum + chunk[n].length;
716
- }, 0);
717
- const buffers = chunks.map((chunk) => chunk[n]);
718
- const result = new Float32Array(length);
719
- let offset = 0;
720
- for (let i = 0; i < buffers.length; i++) {
721
- result.set(buffers[i], offset);
722
- offset += buffers[i].length;
723
- }
724
- channels[n] = result;
725
- }
726
- return channels;
727
- }
728
-
729
- /**
730
- * Combines parallel audio data into correct format,
731
- * channels[Left[], Right[]] to float32Array[LRLRLRLR...]
732
- */
733
- formatAudioData(channels) {
734
- if (channels.length === 1) {
735
- // Simple case is only one channel
736
- const float32Array = channels[0].slice();
737
- const meanValues = channels[0].slice();
738
- return { float32Array, meanValues };
739
- } else {
740
- const float32Array = new Float32Array(
741
- channels[0].length * channels.length
742
- );
743
- const meanValues = new Float32Array(channels[0].length);
744
- for (let i = 0; i < channels[0].length; i++) {
745
- const offset = i * channels.length;
746
- let meanValue = 0;
747
- for (let n = 0; n < channels.length; n++) {
748
- float32Array[offset + n] = channels[n][i];
749
- meanValue += channels[n][i];
750
- }
751
- meanValues[i] = meanValue / channels.length;
752
- }
753
- return { float32Array, meanValues };
754
- }
755
- }
756
-
757
- /**
758
- * Converts 32-bit float data to 16-bit integers
759
- */
760
- floatTo16BitPCM(float32Array) {
761
- const buffer = new ArrayBuffer(float32Array.length * 2);
762
- const view = new DataView(buffer);
763
- let offset = 0;
764
- for (let i = 0; i < float32Array.length; i++, offset += 2) {
765
- let s = Math.max(-1, Math.min(1, float32Array[i]));
766
- view.setInt16(offset, s < 0 ? s * 0x8000 : s * 0x7fff, true);
767
- }
768
- return buffer;
769
- }
770
-
771
- /**
772
- * Retrieves the most recent amplitude values from the audio stream
773
- * @param {number} channel
774
- */
775
- getValues(channel = -1) {
776
- const channels = this.readChannelData(this.chunks, channel);
777
- const { meanValues } = this.formatAudioData(channels);
778
- return { meanValues, channels };
779
- }
780
-
781
- /**
782
- * Exports chunks as an audio/wav file
783
- */
784
- export() {
785
- const channels = this.readChannelData(this.chunks);
786
- const { float32Array, meanValues } = this.formatAudioData(channels);
787
- const audioData = this.floatTo16BitPCM(float32Array);
788
- return {
789
- meanValues: meanValues,
790
- audio: {
791
- bitsPerSample: 16,
792
- channels: channels,
793
- data: audioData,
794
- },
795
- };
796
- }
797
-
798
- receive(e) {
799
- const { event, id } = e.data;
800
- let receiptData = {};
801
- switch (event) {
802
- case 'start':
803
- this.recording = true;
804
- break;
805
- case 'stop':
806
- this.recording = false;
807
- break;
808
- case 'clear':
809
- this.initialize();
810
- break;
811
- case 'export':
812
- receiptData = this.export();
813
- break;
814
- case 'read':
815
- receiptData = this.getValues();
816
- break;
817
- default:
818
- break;
819
- }
820
- // Always send back receipt
821
- this.port.postMessage({ event: 'receipt', id, data: receiptData });
822
- }
823
-
824
- sendChunk(chunk) {
825
- const channels = this.readChannelData([chunk]);
826
- const { float32Array, meanValues } = this.formatAudioData(channels);
827
- const rawAudioData = this.floatTo16BitPCM(float32Array);
828
- const monoAudioData = this.floatTo16BitPCM(meanValues);
829
- this.port.postMessage({
830
- event: 'chunk',
831
- data: {
832
- mono: monoAudioData,
833
- raw: rawAudioData,
834
- },
835
- });
836
- }
837
-
838
- process(inputList, outputList, parameters) {
839
- // Copy input to output (e.g. speakers)
840
- // Note that this creates choppy sounds with Mac products
841
- const sourceLimit = Math.min(inputList.length, outputList.length);
842
- for (let inputNum = 0; inputNum < sourceLimit; inputNum++) {
843
- const input = inputList[inputNum];
844
- const output = outputList[inputNum];
845
- const channelCount = Math.min(input.length, output.length);
846
- for (let channelNum = 0; channelNum < channelCount; channelNum++) {
847
- input[channelNum].forEach((sample, i) => {
848
- output[channelNum][i] = sample;
849
- });
850
- }
851
- }
852
- const inputs = inputList[0];
853
- // There's latency at the beginning of a stream before recording starts
854
- // Make sure we actually receive audio data before we start storing chunks
855
- let sliceIndex = 0;
856
- if (!this.foundAudio) {
857
- for (const channel of inputs) {
858
- sliceIndex = 0; // reset for each channel
859
- if (this.foundAudio) {
860
- break;
861
- }
862
- if (channel) {
863
- for (const value of channel) {
864
- if (value !== 0) {
865
- // find only one non-zero entry in any channel
866
- this.foundAudio = true;
867
- break;
868
- } else {
869
- sliceIndex++;
870
- }
871
- }
872
- }
873
- }
874
- }
875
- if (inputs && inputs[0] && this.foundAudio && this.recording) {
876
- // We need to copy the TypedArray, because the \`process\`
877
- // internals will reuse the same buffer to hold each input
878
- const chunk = inputs.map((input) => input.slice(sliceIndex));
879
- this.chunks.push(chunk);
880
- this.sendChunk(chunk);
881
- }
882
- return true;
883
- }
884
- }
885
-
886
- registerProcessor('audio_processor', AudioProcessor);
887
- `;
888
-
889
- const script = new Blob([AudioProcessorWorklet], {
890
- type: 'application/javascript',
891
- });
892
- const src = URL.createObjectURL(script);
893
- const AudioProcessorSrc = src;
894
-
895
- /**
896
- * Decodes audio into a wav file
897
- * @typedef {Object} DecodedAudioType
898
- * @property {Blob} blob
899
- * @property {string} url
900
- * @property {Float32Array} values
901
- * @property {AudioBuffer} audioBuffer
902
- */
903
-
904
- /**
905
- * Records live stream of user audio as PCM16 "audio/wav" data
906
- * @class
907
- */
908
- class WavRecorder {
909
- /**
910
- * Create a new WavRecorder instance
911
- * @param {{sampleRate?: number, outputToSpeakers?: boolean, debug?: boolean}} [options]
912
- * @returns {WavRecorder}
913
- */
914
- constructor({
915
- sampleRate = 24000,
916
- outputToSpeakers = false,
917
- debug = false,
918
- } = {}) {
919
- // Script source
920
- this.scriptSrc = AudioProcessorSrc;
921
- // Config
922
- this.sampleRate = sampleRate;
923
- this.outputToSpeakers = outputToSpeakers;
924
- this.debug = !!debug;
925
- this._deviceChangeCallback = null;
926
- this._devices = [];
927
- // State variables
928
- this.stream = null;
929
- this.processor = null;
930
- this.source = null;
931
- this.node = null;
932
- this.recording = false;
933
- // Event handling with AudioWorklet
934
- this._lastEventId = 0;
935
- this.eventReceipts = {};
936
- this.eventTimeout = 5000;
937
- // Process chunks of audio
938
- this._chunkProcessor = () => {};
939
- this._chunkProcessorSize = void 0;
940
- this._chunkProcessorBuffer = {
941
- raw: new ArrayBuffer(0),
942
- mono: new ArrayBuffer(0),
943
- };
944
- }
945
-
946
- /**
947
- * Decodes audio data from multiple formats to a Blob, url, Float32Array and AudioBuffer
948
- * @param {Blob|Float32Array|Int16Array|ArrayBuffer|number[]} audioData
949
- * @param {number} sampleRate
950
- * @param {number} fromSampleRate
951
- * @returns {Promise<DecodedAudioType>}
952
- */
953
- static async decode(audioData, sampleRate = 24000, fromSampleRate = -1) {
954
- const context = new AudioContext({ sampleRate });
955
- let arrayBuffer;
956
- let blob;
957
- if (audioData instanceof Blob) {
958
- if (fromSampleRate !== -1) {
959
- throw new Error(
960
- `Can not specify "fromSampleRate" when reading from Blob`,
961
- );
962
- }
963
- blob = audioData;
964
- arrayBuffer = await blob.arrayBuffer();
965
- } else if (audioData instanceof ArrayBuffer) {
966
- if (fromSampleRate !== -1) {
967
- throw new Error(
968
- `Can not specify "fromSampleRate" when reading from ArrayBuffer`,
969
- );
970
- }
971
- arrayBuffer = audioData;
972
- blob = new Blob([arrayBuffer], { type: 'audio/wav' });
973
- } else {
974
- let float32Array;
975
- let data;
976
- if (audioData instanceof Int16Array) {
977
- data = audioData;
978
- float32Array = new Float32Array(audioData.length);
979
- for (let i = 0; i < audioData.length; i++) {
980
- float32Array[i] = audioData[i] / 0x8000;
981
- }
982
- } else if (audioData instanceof Float32Array) {
983
- float32Array = audioData;
984
- } else if (audioData instanceof Array) {
985
- float32Array = new Float32Array(audioData);
986
- } else {
987
- throw new Error(
988
- `"audioData" must be one of: Blob, Float32Arrray, Int16Array, ArrayBuffer, Array<number>`,
989
- );
990
- }
991
- if (fromSampleRate === -1) {
992
- throw new Error(
993
- `Must specify "fromSampleRate" when reading from Float32Array, In16Array or Array`,
994
- );
995
- } else if (fromSampleRate < 3000) {
996
- throw new Error(`Minimum "fromSampleRate" is 3000 (3kHz)`);
997
- }
998
- if (!data) {
999
- data = WavPacker.floatTo16BitPCM(float32Array);
1000
- }
1001
- const audio = {
1002
- bitsPerSample: 16,
1003
- channels: [float32Array],
1004
- data,
1005
- };
1006
- const packer = new WavPacker();
1007
- const result = packer.pack(fromSampleRate, audio);
1008
- blob = result.blob;
1009
- arrayBuffer = await blob.arrayBuffer();
1010
- }
1011
- const audioBuffer = await context.decodeAudioData(arrayBuffer);
1012
- const values = audioBuffer.getChannelData(0);
1013
- const url = URL.createObjectURL(blob);
1014
- return {
1015
- blob,
1016
- url,
1017
- values,
1018
- audioBuffer,
1019
- };
1020
- }
1021
-
1022
- /**
1023
- * Logs data in debug mode
1024
- * @param {...any} arguments
1025
- * @returns {true}
1026
- */
1027
- log() {
1028
- if (this.debug) {
1029
- this.log(...arguments);
1030
- }
1031
- return true;
1032
- }
1033
-
1034
- /**
1035
- * Retrieves the current sampleRate for the recorder
1036
- * @returns {number}
1037
- */
1038
- getSampleRate() {
1039
- return this.sampleRate;
1040
- }
1041
-
1042
- /**
1043
- * Retrieves the current status of the recording
1044
- * @returns {"ended"|"paused"|"recording"}
1045
- */
1046
- getStatus() {
1047
- if (!this.processor) {
1048
- return 'ended';
1049
- } else if (!this.recording) {
1050
- return 'paused';
1051
- } else {
1052
- return 'recording';
1053
- }
1054
- }
1055
-
1056
- /**
1057
- * Sends an event to the AudioWorklet
1058
- * @private
1059
- * @param {string} name
1060
- * @param {{[key: string]: any}} data
1061
- * @param {AudioWorkletNode} [_processor]
1062
- * @returns {Promise<{[key: string]: any}>}
1063
- */
1064
- async _event(name, data = {}, _processor = null) {
1065
- _processor = _processor || this.processor;
1066
- if (!_processor) {
1067
- throw new Error('Can not send events without recording first');
1068
- }
1069
- const message = {
1070
- event: name,
1071
- id: this._lastEventId++,
1072
- data,
1073
- };
1074
- _processor.port.postMessage(message);
1075
- const t0 = new Date().valueOf();
1076
- while (!this.eventReceipts[message.id]) {
1077
- if (new Date().valueOf() - t0 > this.eventTimeout) {
1078
- throw new Error(`Timeout waiting for "${name}" event`);
1079
- }
1080
- await new Promise((res) => setTimeout(() => res(true), 1));
1081
- }
1082
- const payload = this.eventReceipts[message.id];
1083
- delete this.eventReceipts[message.id];
1084
- return payload;
1085
- }
1086
-
1087
- /**
1088
- * Sets device change callback, remove if callback provided is `null`
1089
- * @param {(Array<MediaDeviceInfo & {default: boolean}>): void|null} callback
1090
- * @returns {true}
1091
- */
1092
- listenForDeviceChange(callback) {
1093
- if (callback === null && this._deviceChangeCallback) {
1094
- navigator.mediaDevices.removeEventListener(
1095
- 'devicechange',
1096
- this._deviceChangeCallback,
1097
- );
1098
- this._deviceChangeCallback = null;
1099
- } else if (callback !== null) {
1100
- // Basically a debounce; we only want this called once when devices change
1101
- // And we only want the most recent callback() to be executed
1102
- // if a few are operating at the same time
1103
- let lastId = 0;
1104
- let lastDevices = [];
1105
- const serializeDevices = (devices) =>
1106
- devices
1107
- .map((d) => d.deviceId)
1108
- .sort()
1109
- .join(',');
1110
- const cb = async () => {
1111
- let id = ++lastId;
1112
- const devices = await this.listDevices();
1113
- if (id === lastId) {
1114
- if (serializeDevices(lastDevices) !== serializeDevices(devices)) {
1115
- lastDevices = devices;
1116
- callback(devices.slice());
1117
- }
1118
- }
1119
- };
1120
- navigator.mediaDevices.addEventListener('devicechange', cb);
1121
- cb();
1122
- this._deviceChangeCallback = cb;
1123
- }
1124
- return true;
1125
- }
1126
-
1127
- /**
1128
- * Manually request permission to use the microphone
1129
- * @returns {Promise<true>}
1130
- */
1131
- async requestPermission() {
1132
- const permissionStatus = await navigator.permissions.query({
1133
- name: 'microphone',
1134
- });
1135
- if (permissionStatus.state === 'denied') {
1136
- window.alert('You must grant microphone access to use this feature.');
1137
- } else if (permissionStatus.state === 'prompt') {
1138
- try {
1139
- const stream = await navigator.mediaDevices.getUserMedia({
1140
- audio: true,
1141
- });
1142
- const tracks = stream.getTracks();
1143
- tracks.forEach((track) => track.stop());
1144
- } catch (e) {
1145
- window.alert('You must grant microphone access to use this feature.');
1146
- }
1147
- }
1148
- return true;
1149
- }
1150
-
1151
- /**
1152
- * List all eligible devices for recording, will request permission to use microphone
1153
- * @returns {Promise<Array<MediaDeviceInfo & {default: boolean}>>}
1154
- */
1155
- async listDevices() {
1156
- if (
1157
- !navigator.mediaDevices ||
1158
- !('enumerateDevices' in navigator.mediaDevices)
1159
- ) {
1160
- throw new Error('Could not request user devices');
1161
- }
1162
- await this.requestPermission();
1163
- const devices = await navigator.mediaDevices.enumerateDevices();
1164
- const audioDevices = devices.filter(
1165
- (device) => device.kind === 'audioinput',
1166
- );
1167
- const defaultDeviceIndex = audioDevices.findIndex(
1168
- (device) => device.deviceId === 'default',
1169
- );
1170
- const deviceList = [];
1171
- if (defaultDeviceIndex !== -1) {
1172
- let defaultDevice = audioDevices.splice(defaultDeviceIndex, 1)[0];
1173
- let existingIndex = audioDevices.findIndex(
1174
- (device) => device.groupId === defaultDevice.groupId,
1175
- );
1176
- if (existingIndex !== -1) {
1177
- defaultDevice = audioDevices.splice(existingIndex, 1)[0];
1178
- }
1179
- defaultDevice.default = true;
1180
- deviceList.push(defaultDevice);
1181
- }
1182
- return deviceList.concat(audioDevices);
1183
- }
1184
-
1185
- /**
1186
- * Begins a recording session and requests microphone permissions if not already granted
1187
- * Microphone recording indicator will appear on browser tab but status will be "paused"
1188
- * @param {string} [deviceId] if no device provided, default device will be used
1189
- * @returns {Promise<true>}
1190
- */
1191
- async begin(deviceId) {
1192
- if (this.processor) {
1193
- throw new Error(
1194
- `Already connected: please call .end() to start a new session`,
1195
- );
1196
- }
1197
-
1198
- if (
1199
- !navigator.mediaDevices ||
1200
- !('getUserMedia' in navigator.mediaDevices)
1201
- ) {
1202
- throw new Error('Could not request user media');
1203
- }
1204
- try {
1205
- const config = { audio: true };
1206
- if (deviceId) {
1207
- config.audio = { deviceId: { exact: deviceId } };
1208
- }
1209
- this.stream = await navigator.mediaDevices.getUserMedia(config);
1210
- } catch (err) {
1211
- throw new Error('Could not start media stream');
1212
- }
1213
-
1214
- const context = new AudioContext({ sampleRate: this.sampleRate });
1215
- const source = context.createMediaStreamSource(this.stream);
1216
- // Load and execute the module script.
1217
- try {
1218
- await context.audioWorklet.addModule(this.scriptSrc);
1219
- } catch (e) {
1220
- console.error(e);
1221
- throw new Error(`Could not add audioWorklet module: ${this.scriptSrc}`);
1222
- }
1223
- const processor = new AudioWorkletNode(context, 'audio_processor');
1224
- processor.port.onmessage = (e) => {
1225
- const { event, id, data } = e.data;
1226
- if (event === 'receipt') {
1227
- this.eventReceipts[id] = data;
1228
- } else if (event === 'chunk') {
1229
- if (this._chunkProcessorSize) {
1230
- const buffer = this._chunkProcessorBuffer;
1231
- this._chunkProcessorBuffer = {
1232
- raw: WavPacker.mergeBuffers(buffer.raw, data.raw),
1233
- mono: WavPacker.mergeBuffers(buffer.mono, data.mono),
1234
- };
1235
- if (
1236
- this._chunkProcessorBuffer.mono.byteLength >=
1237
- this._chunkProcessorSize
1238
- ) {
1239
- this._chunkProcessor(this._chunkProcessorBuffer);
1240
- this._chunkProcessorBuffer = {
1241
- raw: new ArrayBuffer(0),
1242
- mono: new ArrayBuffer(0),
1243
- };
1244
- }
1245
- } else {
1246
- this._chunkProcessor(data);
1247
- }
1248
- }
1249
- };
1250
-
1251
- const node = source.connect(processor);
1252
- const analyser = context.createAnalyser();
1253
- analyser.fftSize = 8192;
1254
- analyser.smoothingTimeConstant = 0.1;
1255
- node.connect(analyser);
1256
- if (this.outputToSpeakers) {
1257
- // eslint-disable-next-line no-console
1258
- console.warn(
1259
- 'Warning: Output to speakers may affect sound quality,\n' +
1260
- 'especially due to system audio feedback preventative measures.\n' +
1261
- 'use only for debugging',
1262
- );
1263
- analyser.connect(context.destination);
1264
- }
1265
-
1266
- this.source = source;
1267
- this.node = node;
1268
- this.analyser = analyser;
1269
- this.processor = processor;
1270
- return true;
1271
- }
1272
-
1273
- /**
1274
- * Gets the current frequency domain data from the recording track
1275
- * @param {"frequency"|"music"|"voice"} [analysisType]
1276
- * @param {number} [minDecibels] default -100
1277
- * @param {number} [maxDecibels] default -30
1278
- * @returns {import('./analysis/audio_analysis.js').AudioAnalysisOutputType}
1279
- */
1280
- getFrequencies(
1281
- analysisType = 'frequency',
1282
- minDecibels = -100,
1283
- maxDecibels = -30,
1284
- ) {
1285
- if (!this.processor) {
1286
- throw new Error('Session ended: please call .begin() first');
1287
- }
1288
- return AudioAnalysis.getFrequencies(
1289
- this.analyser,
1290
- this.sampleRate,
1291
- null,
1292
- analysisType,
1293
- minDecibels,
1294
- maxDecibels,
1295
- );
1296
- }
1297
-
1298
-
1299
- /**
1300
- * Gets the real-time amplitude of the audio signal
1301
- * @returns {number} Amplitude value between 0 and 1
1302
- */
1303
- getAmplitude() {
1304
- if (!this.analyser) {
1305
- throw new Error('AnalyserNode is not initialized. Please call connect() first.');
1306
- }
1307
-
1308
- const bufferLength = this.analyser.fftSize;
1309
- const dataArray = new Uint8Array(bufferLength);
1310
- this.analyser.getByteTimeDomainData(dataArray);
1311
-
1312
- // Calculate RMS (Root Mean Square) to get amplitude
1313
- let sumSquares = 0;
1314
- for (let i = 0; i < bufferLength; i++) {
1315
- const normalized = (dataArray[i] - 128) / 128; // Normalize between -1 and 1
1316
- sumSquares += normalized * normalized;
1317
- }
1318
- const rms = Math.sqrt(sumSquares / bufferLength);
1319
- return rms;
1320
- }
1321
-
1322
- /**
1323
- * Starts amplitude monitoring
1324
- * @param {function} callback - Function to call with amplitude value
1325
- */
1326
- startAmplitudeMonitoring(callback) {
1327
- const monitor = () => {
1328
- const amplitude = this.getAmplitude();
1329
- callback(amplitude);
1330
- requestAnimationFrame(monitor);
1331
- };
1332
- monitor();
1333
- }
1334
-
1335
- /**
1336
- * Pauses the recording
1337
- * Keeps microphone stream open but halts storage of audio
1338
- * @returns {Promise<true>}
1339
- */
1340
- async pause() {
1341
- if (!this.processor) {
1342
- throw new Error('Session ended: please call .begin() first');
1343
- } else if (!this.recording) {
1344
- throw new Error('Already paused: please call .record() first');
1345
- }
1346
- if (this._chunkProcessorBuffer.raw.byteLength) {
1347
- this._chunkProcessor(this._chunkProcessorBuffer);
1348
- }
1349
- this.log('Pausing ...');
1350
- await this._event('stop');
1351
- this.recording = false;
1352
- return true;
1353
- }
1354
-
1355
- /**
1356
- * Start recording stream and storing to memory from the connected audio source
1357
- * @param {(data: { mono: Int16Array; raw: Int16Array }) => any} [chunkProcessor]
1358
- * @param {number} [chunkSize] chunkProcessor will not be triggered until this size threshold met in mono audio
1359
- * @returns {Promise<true>}
1360
- */
1361
- async record(chunkProcessor = () => {}, chunkSize = 8192) {
1362
- if (!this.processor) {
1363
- throw new Error('Session ended: please call .begin() first');
1364
- } else if (this.recording) {
1365
- throw new Error('Already recording: please call .pause() first');
1366
- } else if (typeof chunkProcessor !== 'function') {
1367
- throw new Error(`chunkProcessor must be a function`);
1368
- }
1369
- this._chunkProcessor = chunkProcessor;
1370
- this._chunkProcessorSize = chunkSize;
1371
- this._chunkProcessorBuffer = {
1372
- raw: new ArrayBuffer(0),
1373
- mono: new ArrayBuffer(0),
1374
- };
1375
- this.log('Recording ...');
1376
- await this._event('start');
1377
- this.recording = true;
1378
- return true;
1379
- }
1380
-
1381
- /**
1382
- * Clears the audio buffer, empties stored recording
1383
- * @returns {Promise<true>}
1384
- */
1385
- async clear() {
1386
- if (!this.processor) {
1387
- throw new Error('Session ended: please call .begin() first');
1388
- }
1389
- await this._event('clear');
1390
- return true;
1391
- }
1392
-
1393
- /**
1394
- * Reads the current audio stream data
1395
- * @returns {Promise<{meanValues: Float32Array, channels: Array<Float32Array>}>}
1396
- */
1397
- async read() {
1398
- if (!this.processor) {
1399
- throw new Error('Session ended: please call .begin() first');
1400
- }
1401
- this.log('Reading ...');
1402
- const result = await this._event('read');
1403
- return result;
1404
- }
1405
-
1406
- /**
1407
- * Saves the current audio stream to a file
1408
- * @param {boolean} [force] Force saving while still recording
1409
- * @returns {Promise<import('./wav_packer.js').WavPackerAudioType>}
1410
- */
1411
- async save(force = false) {
1412
- if (!this.processor) {
1413
- throw new Error('Session ended: please call .begin() first');
1414
- }
1415
- if (!force && this.recording) {
1416
- throw new Error(
1417
- 'Currently recording: please call .pause() first, or call .save(true) to force',
1418
- );
1419
- }
1420
- this.log('Exporting ...');
1421
- const exportData = await this._event('export');
1422
- const packer = new WavPacker();
1423
- const result = packer.pack(this.sampleRate, exportData.audio);
1424
- return result;
1425
- }
1426
-
1427
- /**
1428
- * Ends the current recording session and saves the result
1429
- * @returns {Promise<import('./wav_packer.js').WavPackerAudioType>}
1430
- */
1431
- async end() {
1432
- if (!this.processor) {
1433
- throw new Error('Session ended: please call .begin() first');
1434
- }
1435
-
1436
- const _processor = this.processor;
1437
-
1438
- this.log('Stopping ...');
1439
- await this._event('stop');
1440
- this.recording = false;
1441
- const tracks = this.stream.getTracks();
1442
- tracks.forEach((track) => track.stop());
1443
-
1444
- this.log('Exporting ...');
1445
- const exportData = await this._event('export', {}, _processor);
1446
-
1447
- this.processor.disconnect();
1448
- this.source.disconnect();
1449
- this.node.disconnect();
1450
- this.analyser.disconnect();
1451
- this.stream = null;
1452
- this.processor = null;
1453
- this.source = null;
1454
- this.node = null;
1455
-
1456
- const packer = new WavPacker();
1457
- const result = packer.pack(this.sampleRate, exportData.audio);
1458
- return result;
1459
- }
1460
-
1461
- /**
1462
- * Performs a full cleanup of WavRecorder instance
1463
- * Stops actively listening via microphone and removes existing listeners
1464
- * @returns {Promise<true>}
1465
- */
1466
- async quit() {
1467
- this.listenForDeviceChange(null);
1468
- if (this.processor) {
1469
- await this.end();
1470
- }
1471
- return true;
1472
- }
1473
- }
1474
-
1475
- globalThis.WavRecorder = WavRecorder;
1476
-
1477
- /**
1478
- * Converts a base64 string to an ArrayBuffer.
1479
- * @param {string} base64 - The base64 string to convert.
1480
- * @returns {ArrayBuffer} The resulting ArrayBuffer.
1481
- */
1482
- function base64ToArrayBuffer(base64) {
1483
- const binaryString = atob(base64);
1484
- const len = binaryString.length;
1485
- const bytes = new Uint8Array(len);
1486
- for (let i = 0; i < len; i++) {
1487
- bytes[i] = binaryString.charCodeAt(i);
1488
- }
1489
- return bytes.buffer;
1490
- }
1491
-
1492
- /**
1493
- * Converts an ArrayBuffer to a base64 string.
1494
- * @param {ArrayBuffer|Float32Array|Int16Array} arrayBuffer - The ArrayBuffer to convert.
1495
- * @returns {string} The resulting base64 string.
1496
- */
1497
- function arrayBufferToBase64(arrayBuffer) {
1498
- if (arrayBuffer instanceof Float32Array) {
1499
- arrayBuffer = this.floatTo16BitPCM(arrayBuffer);
1500
- } else if (arrayBuffer instanceof Int16Array) {
1501
- arrayBuffer = arrayBuffer.buffer;
1502
- }
1503
- let binary = '';
1504
- let bytes = new Uint8Array(arrayBuffer);
1505
- const chunkSize = 0x8000; // 32KB chunk size
1506
- for (let i = 0; i < bytes.length; i += chunkSize) {
1507
- let chunk = bytes.subarray(i, i + chunkSize);
1508
- binary += String.fromCharCode.apply(null, chunk);
1509
- }
1510
- return btoa(binary);
1511
- }
1512
-
1513
- /* eslint-env browser */
1514
- /**
1515
- * @class LayercodeClient
1516
- * @classdesc Core client for Layercode audio pipeline that manages audio recording, WebSocket communication, and speech processing.
1517
- */
1518
- class LayercodeClient {
1519
- /**
1520
- * Creates an instance of LayercodeClient.
1521
- * @param {Object} options - Configuration options
1522
- */
1523
- constructor(options) {
1524
- this.options = {
1525
- pipelineId: options.pipelineId,
1526
- sessionId: options.sessionId || null,
1527
- authorizeSessionEndpoint: options.authorizeSessionEndpoint,
1528
- metadata: options.metadata || {},
1529
- onConnect: options.onConnect || (() => { }),
1530
- onDisconnect: options.onDisconnect || (() => { }),
1531
- onError: options.onError || (() => { }),
1532
- onDataMessage: options.onDataMessage || (() => { }),
1533
- onUserAmplitudeChange: options.onUserAmplitudeChange || (() => { }),
1534
- onAgentAmplitudeChange: options.onAgentAmplitudeChange || (() => { }),
1535
- onStatusChange: options.onStatusChange || (() => { }),
1536
- };
1537
- this.AMPLITUDE_MONITORING_SAMPLE_RATE = 10;
1538
- this._websocketUrl = 'wss://api.layercode.com/v1/pipelines/websocket';
1539
- this.wavRecorder = new WavRecorder({ sampleRate: 8000 }); // TODO should be set my fetched pipeline config
1540
- this.wavPlayer = new WavStreamPlayer({
1541
- finishedPlayingCallback: this._clientResponseAudioReplayFinished.bind(this),
1542
- sampleRate: 16000, // TODO should be set my fetched pipeline config
1543
- });
1544
- this.ws = null;
1545
- this.status = 'disconnected';
1546
- this.userAudioAmplitude = 0;
1547
- this.agentAudioAmplitude = 0;
1548
- this.sessionId = options.sessionId || null;
1549
- this.pushToTalkActive = false;
1550
- // Bind event handlers
1551
- this._handleWebSocketMessage = this._handleWebSocketMessage.bind(this);
1552
- this._handleDataAvailable = this._handleDataAvailable.bind(this);
1553
- }
1554
- /**
1555
- * Updates the connection status and triggers the callback
1556
- * @param {string} status - New status value
1557
- * @private
1558
- */
1559
- _setStatus(status) {
1560
- this.status = status;
1561
- this.options.onStatusChange(status);
1562
- }
1563
- /**
1564
- * Handles when agent audio finishes playing
1565
- * @private
1566
- */
1567
- _clientResponseAudioReplayFinished() {
1568
- console.log('clientResponseAudioReplayFinished');
1569
- this._wsSend({
1570
- type: 'trigger.response.audio.replay_finished',
1571
- reason: 'completed',
1572
- });
1573
- }
1574
- async _clientInterruptAssistantReplay() {
1575
- await this.wavPlayer.interrupt();
1576
- // TODO: Use in voice pipeline to know how much of the audio has been played and how much to truncate transcript
1577
- // this._wsSend({
1578
- // type: 'trigger.response.audio.replay_finished',
1579
- // reason: 'interrupted',
1580
- // delta_id: 'TODO'
1581
- // });
1582
- }
1583
- async triggerUserTurnStarted() {
1584
- if (!this.pushToTalkActive) {
1585
- this.pushToTalkActive = true;
1586
- this._wsSend({ type: 'trigger.turn.start', role: 'user' });
1587
- await this._clientInterruptAssistantReplay();
1588
- }
1589
- }
1590
- async triggerUserTurnFinished() {
1591
- if (this.pushToTalkActive) {
1592
- this.pushToTalkActive = false;
1593
- this._wsSend({ type: 'trigger.turn.end', role: 'user' });
1594
- }
1595
- }
1596
- /**
1597
- * Handles incoming WebSocket messages
1598
- * @param {MessageEvent} event - The WebSocket message event
1599
- * @private
1600
- */
1601
- async _handleWebSocketMessage(event) {
1602
- try {
1603
- const message = JSON.parse(event.data);
1604
- if (message.type !== 'response.audio') {
1605
- console.log('received ws msg:', message);
1606
- }
1607
- switch (message.type) {
1608
- case 'turn.start':
1609
- // Sent from the server to this client when a new user turn is detected
1610
- console.log('received turn.start from server');
1611
- console.log(message);
1612
- // if (message.role === 'user' && !this.pushToTalkActive) {
1613
- if (message.role === 'user') {
1614
- // Interrupt any playing assistant audio if this is a turn trigged by the server (and not push to talk, which will have already called interrupt)
1615
- console.log('interrupting assistant audio, as user turn has started and pushToTalkActive is false');
1616
- await this._clientInterruptAssistantReplay();
1617
- }
1618
- break;
1619
- case 'response.audio':
1620
- const audioBuffer = base64ToArrayBuffer(message.content);
1621
- this.wavPlayer.add16BitPCM(audioBuffer, message.turn_id);
1622
- break;
1623
- // case 'response.end':
1624
- // console.log('received response.end');
1625
- // break;
1626
- case 'response.data':
1627
- console.log('received response.data', message);
1628
- this.options.onDataMessage(message);
1629
- break;
1630
- default:
1631
- console.error('Unknown message type received:', message);
1632
- break;
1633
- }
1634
- }
1635
- catch (error) {
1636
- console.error('Error processing WebSocket message:', error);
1637
- this.options.onError(error instanceof Error ? error : new Error(String(error)));
1638
- }
1639
- }
1640
- /**
1641
- * Handles available client browser microphone audio data and sends it over the WebSocket
1642
- * @param {ArrayBuffer} data - The audio data buffer
1643
- * @private
1644
- */
1645
- _handleDataAvailable(data) {
1646
- try {
1647
- const base64 = arrayBufferToBase64(data.mono);
1648
- this._wsSend({
1649
- type: 'client.audio',
1650
- content: base64,
1651
- });
1652
- }
1653
- catch (error) {
1654
- console.error('Error processing audio:', error);
1655
- this.options.onError(error instanceof Error ? error : new Error(String(error)));
1656
- }
1657
- }
1658
- _wsSend(message) {
1659
- var _a;
1660
- if (message.type !== 'client.audio') {
1661
- console.log('sent ws msg:', message);
1662
- }
1663
- const messageString = JSON.stringify(message);
1664
- if (((_a = this.ws) === null || _a === void 0 ? void 0 : _a.readyState) === WebSocket.OPEN) {
1665
- this.ws.send(messageString);
1666
- }
1667
- }
1668
- /**
1669
- * Sets up amplitude monitoring for a given audio source.
1670
- * @param {WavRecorder | WavStreamPlayer} source - The audio source (recorder or player).
1671
- * @param {(amplitude: number) => void} callback - The callback function to invoke on amplitude change.
1672
- * @param {(amplitude: number) => void} updateInternalState - Function to update the internal amplitude state.
1673
- * @private
1674
- */
1675
- _setupAmplitudeMonitoring(source, callback, updateInternalState) {
1676
- // Set up amplitude monitoring only if a callback is provided
1677
- // Check against the default no-op function defined in the constructor options
1678
- if (callback !== (() => { })) {
1679
- let updateCounter = 0;
1680
- source.startAmplitudeMonitoring((amplitude) => {
1681
- // Only update and call callback at the specified sample rate
1682
- if (updateCounter >= this.AMPLITUDE_MONITORING_SAMPLE_RATE) {
1683
- updateInternalState(amplitude);
1684
- callback(amplitude);
1685
- updateCounter = 0; // Reset counter after sampling
1686
- }
1687
- updateCounter++;
1688
- });
1689
- }
1690
- }
1691
- /**
1692
- * Connects to the Layercode pipeline and starts the audio session
1693
- * @async
1694
- * @returns {Promise<void>}
1695
- */
1696
- async connect() {
1697
- try {
1698
- this._setStatus('connecting');
1699
- // Get session key from server
1700
- let authorizeSessionRequestBody = {
1701
- pipeline_id: this.options.pipelineId,
1702
- metadata: this.options.metadata,
1703
- };
1704
- // If we're reconnecting to a previous session, we need to include the session_id in the request. Otherwise we don't send session_id, and a new session will be created and the session_id will be returned in the response.
1705
- if (this.options.sessionId) {
1706
- authorizeSessionRequestBody.session_id = this.options.sessionId;
1707
- }
1708
- const authorizeSessionResponse = await fetch(this.options.authorizeSessionEndpoint, {
1709
- method: 'POST',
1710
- headers: {
1711
- 'Content-Type': 'application/json',
1712
- },
1713
- body: JSON.stringify(authorizeSessionRequestBody),
1714
- });
1715
- if (!authorizeSessionResponse.ok) {
1716
- throw new Error(`Failed to authorize session: ${authorizeSessionResponse.statusText}`);
1717
- }
1718
- const authorizeSessionResponseBody = await authorizeSessionResponse.json();
1719
- this.sessionId = authorizeSessionResponseBody.session_id; // Save the session_id for use in future reconnects
1720
- // Connect WebSocket
1721
- this.ws = new WebSocket(`${this._websocketUrl}?${new URLSearchParams({
1722
- client_session_key: authorizeSessionResponseBody.client_session_key,
1723
- })}`);
1724
- // Bind the websocket message callbacks
1725
- this.ws.onmessage = this._handleWebSocketMessage;
1726
- this.ws.onopen = () => {
1727
- console.log('WebSocket connection established');
1728
- this._setStatus('connected');
1729
- this.options.onConnect({ sessionId: this.sessionId });
1730
- };
1731
- this.ws.onclose = () => {
1732
- console.log('WebSocket connection closed');
1733
- this._setStatus('disconnected');
1734
- this.options.onDisconnect();
1735
- };
1736
- this.ws.onerror = (error) => {
1737
- console.error('WebSocket error:', error);
1738
- this._setStatus('error');
1739
- this.options.onError(new Error('WebSocket connection error'));
1740
- };
1741
- // Initialize microphone audio capture
1742
- await this.wavRecorder.begin();
1743
- await this.wavRecorder.record(this._handleDataAvailable);
1744
- // Set up microphone amplitude monitoring
1745
- this._setupAmplitudeMonitoring(this.wavRecorder, this.options.onUserAmplitudeChange, (amp) => (this.userAudioAmplitude = amp));
1746
- // Initialize audio player
1747
- await this.wavPlayer.connect();
1748
- // Set up audio player amplitude monitoring
1749
- this._setupAmplitudeMonitoring(this.wavPlayer, this.options.onAgentAmplitudeChange, (amp) => (this.agentAudioAmplitude = amp));
1750
- }
1751
- catch (error) {
1752
- console.error('Error connecting to Layercode pipeline:', error);
1753
- this._setStatus('error');
1754
- this.options.onError(error instanceof Error ? error : new Error(String(error)));
1755
- throw error;
1756
- }
1757
- }
1758
- async disconnect() {
1759
- var _a;
1760
- this.wavRecorder.quit();
1761
- this.wavPlayer.disconnect();
1762
- (_a = this.ws) === null || _a === void 0 ? void 0 : _a.close();
1763
- }
1764
- }
1765
-
1766
- return LayercodeClient;
1767
-
1768
- }));
1769
- //# sourceMappingURL=layercode-js-sdk.min.js.map