@layercode/js-sdk 1.0.10 → 1.0.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,1761 +0,0 @@
1
- /**
2
- * Raw wav audio file contents
3
- * @typedef {Object} WavPackerAudioType
4
- * @property {Blob} blob
5
- * @property {string} url
6
- * @property {number} channelCount
7
- * @property {number} sampleRate
8
- * @property {number} duration
9
- */
10
-
11
- /**
12
- * Utility class for assembling PCM16 "audio/wav" data
13
- * @class
14
- */
15
- class WavPacker {
16
- /**
17
- * Converts Float32Array of amplitude data to ArrayBuffer in Int16Array format
18
- * @param {Float32Array} float32Array
19
- * @returns {ArrayBuffer}
20
- */
21
- static floatTo16BitPCM(float32Array) {
22
- const buffer = new ArrayBuffer(float32Array.length * 2);
23
- const view = new DataView(buffer);
24
- let offset = 0;
25
- for (let i = 0; i < float32Array.length; i++, offset += 2) {
26
- let s = Math.max(-1, Math.min(1, float32Array[i]));
27
- view.setInt16(offset, s < 0 ? s * 0x8000 : s * 0x7fff, true);
28
- }
29
- return buffer;
30
- }
31
-
32
- /**
33
- * Concatenates two ArrayBuffers
34
- * @param {ArrayBuffer} leftBuffer
35
- * @param {ArrayBuffer} rightBuffer
36
- * @returns {ArrayBuffer}
37
- */
38
- static mergeBuffers(leftBuffer, rightBuffer) {
39
- const tmpArray = new Uint8Array(
40
- leftBuffer.byteLength + rightBuffer.byteLength
41
- );
42
- tmpArray.set(new Uint8Array(leftBuffer), 0);
43
- tmpArray.set(new Uint8Array(rightBuffer), leftBuffer.byteLength);
44
- return tmpArray.buffer;
45
- }
46
-
47
- /**
48
- * Packs data into an Int16 format
49
- * @private
50
- * @param {number} size 0 = 1x Int16, 1 = 2x Int16
51
- * @param {number} arg value to pack
52
- * @returns
53
- */
54
- _packData(size, arg) {
55
- return [
56
- new Uint8Array([arg, arg >> 8]),
57
- new Uint8Array([arg, arg >> 8, arg >> 16, arg >> 24]),
58
- ][size];
59
- }
60
-
61
- /**
62
- * Packs audio into "audio/wav" Blob
63
- * @param {number} sampleRate
64
- * @param {{bitsPerSample: number, channels: Array<Float32Array>, data: Int16Array}} audio
65
- * @returns {WavPackerAudioType}
66
- */
67
- pack(sampleRate, audio) {
68
- if (!audio?.bitsPerSample) {
69
- throw new Error(`Missing "bitsPerSample"`);
70
- } else if (!audio?.channels) {
71
- throw new Error(`Missing "channels"`);
72
- } else if (!audio?.data) {
73
- throw new Error(`Missing "data"`);
74
- }
75
- const { bitsPerSample, channels, data } = audio;
76
- const output = [
77
- // Header
78
- 'RIFF',
79
- this._packData(
80
- 1,
81
- 4 + (8 + 24) /* chunk 1 length */ + (8 + 8) /* chunk 2 length */
82
- ), // Length
83
- 'WAVE',
84
- // chunk 1
85
- 'fmt ', // Sub-chunk identifier
86
- this._packData(1, 16), // Chunk length
87
- this._packData(0, 1), // Audio format (1 is linear quantization)
88
- this._packData(0, channels.length),
89
- this._packData(1, sampleRate),
90
- this._packData(1, (sampleRate * channels.length * bitsPerSample) / 8), // Byte rate
91
- this._packData(0, (channels.length * bitsPerSample) / 8),
92
- this._packData(0, bitsPerSample),
93
- // chunk 2
94
- 'data', // Sub-chunk identifier
95
- this._packData(
96
- 1,
97
- (channels[0].length * channels.length * bitsPerSample) / 8
98
- ), // Chunk length
99
- data,
100
- ];
101
- const blob = new Blob(output, { type: 'audio/mpeg' });
102
- const url = URL.createObjectURL(blob);
103
- return {
104
- blob,
105
- url,
106
- channelCount: channels.length,
107
- sampleRate,
108
- duration: data.byteLength / (channels.length * sampleRate * 2),
109
- };
110
- }
111
- }
112
-
113
- globalThis.WavPacker = WavPacker;
114
-
115
- /**
116
- * Constants for help with visualization
117
- * Helps map frequency ranges from Fast Fourier Transform
118
- * to human-interpretable ranges, notably music ranges and
119
- * human vocal ranges.
120
- */
121
-
122
- // Eighth octave frequencies
123
- const octave8Frequencies = [
124
- 4186.01, 4434.92, 4698.63, 4978.03, 5274.04, 5587.65, 5919.91, 6271.93,
125
- 6644.88, 7040.0, 7458.62, 7902.13,
126
- ];
127
-
128
- // Labels for each of the above frequencies
129
- const octave8FrequencyLabels = [
130
- 'C',
131
- 'C#',
132
- 'D',
133
- 'D#',
134
- 'E',
135
- 'F',
136
- 'F#',
137
- 'G',
138
- 'G#',
139
- 'A',
140
- 'A#',
141
- 'B',
142
- ];
143
-
144
- /**
145
- * All note frequencies from 1st to 8th octave
146
- * in format "A#8" (A#, 8th octave)
147
- */
148
- const noteFrequencies = [];
149
- const noteFrequencyLabels = [];
150
- for (let i = 1; i <= 8; i++) {
151
- for (let f = 0; f < octave8Frequencies.length; f++) {
152
- const freq = octave8Frequencies[f];
153
- noteFrequencies.push(freq / Math.pow(2, 8 - i));
154
- noteFrequencyLabels.push(octave8FrequencyLabels[f] + i);
155
- }
156
- }
157
-
158
- /**
159
- * Subset of the note frequencies between 32 and 2000 Hz
160
- * 6 octave range: C1 to B6
161
- */
162
- const voiceFrequencyRange = [32.0, 2000.0];
163
- const voiceFrequencies = noteFrequencies.filter((_, i) => {
164
- return (
165
- noteFrequencies[i] > voiceFrequencyRange[0] &&
166
- noteFrequencies[i] < voiceFrequencyRange[1]
167
- );
168
- });
169
- const voiceFrequencyLabels = noteFrequencyLabels.filter((_, i) => {
170
- return (
171
- noteFrequencies[i] > voiceFrequencyRange[0] &&
172
- noteFrequencies[i] < voiceFrequencyRange[1]
173
- );
174
- });
175
-
176
- /**
177
- * Output of AudioAnalysis for the frequency domain of the audio
178
- * @typedef {Object} AudioAnalysisOutputType
179
- * @property {Float32Array} values Amplitude of this frequency between {0, 1} inclusive
180
- * @property {number[]} frequencies Raw frequency bucket values
181
- * @property {string[]} labels Labels for the frequency bucket values
182
- */
183
-
184
- /**
185
- * Analyzes audio for visual output
186
- * @class
187
- */
188
- class AudioAnalysis {
189
- /**
190
- * Retrieves frequency domain data from an AnalyserNode adjusted to a decibel range
191
- * returns human-readable formatting and labels
192
- * @param {AnalyserNode} analyser
193
- * @param {number} sampleRate
194
- * @param {Float32Array} [fftResult]
195
- * @param {"frequency"|"music"|"voice"} [analysisType]
196
- * @param {number} [minDecibels] default -100
197
- * @param {number} [maxDecibels] default -30
198
- * @returns {AudioAnalysisOutputType}
199
- */
200
- static getFrequencies(
201
- analyser,
202
- sampleRate,
203
- fftResult,
204
- analysisType = 'frequency',
205
- minDecibels = -100,
206
- maxDecibels = -30,
207
- ) {
208
- if (!fftResult) {
209
- fftResult = new Float32Array(analyser.frequencyBinCount);
210
- analyser.getFloatFrequencyData(fftResult);
211
- }
212
- const nyquistFrequency = sampleRate / 2;
213
- const frequencyStep = (1 / fftResult.length) * nyquistFrequency;
214
- let outputValues;
215
- let frequencies;
216
- let labels;
217
- if (analysisType === 'music' || analysisType === 'voice') {
218
- const useFrequencies =
219
- analysisType === 'voice' ? voiceFrequencies : noteFrequencies;
220
- const aggregateOutput = Array(useFrequencies.length).fill(minDecibels);
221
- for (let i = 0; i < fftResult.length; i++) {
222
- const frequency = i * frequencyStep;
223
- const amplitude = fftResult[i];
224
- for (let n = useFrequencies.length - 1; n >= 0; n--) {
225
- if (frequency > useFrequencies[n]) {
226
- aggregateOutput[n] = Math.max(aggregateOutput[n], amplitude);
227
- break;
228
- }
229
- }
230
- }
231
- outputValues = aggregateOutput;
232
- frequencies =
233
- analysisType === 'voice' ? voiceFrequencies : noteFrequencies;
234
- labels =
235
- analysisType === 'voice' ? voiceFrequencyLabels : noteFrequencyLabels;
236
- } else {
237
- outputValues = Array.from(fftResult);
238
- frequencies = outputValues.map((_, i) => frequencyStep * i);
239
- labels = frequencies.map((f) => `${f.toFixed(2)} Hz`);
240
- }
241
- // We normalize to {0, 1}
242
- const normalizedOutput = outputValues.map((v) => {
243
- return Math.max(
244
- 0,
245
- Math.min((v - minDecibels) / (maxDecibels - minDecibels), 1),
246
- );
247
- });
248
- const values = new Float32Array(normalizedOutput);
249
- return {
250
- values,
251
- frequencies,
252
- labels,
253
- };
254
- }
255
-
256
- /**
257
- * Creates a new AudioAnalysis instance for an HTMLAudioElement
258
- * @param {HTMLAudioElement} audioElement
259
- * @param {AudioBuffer|null} [audioBuffer] If provided, will cache all frequency domain data from the buffer
260
- * @returns {AudioAnalysis}
261
- */
262
- constructor(audioElement, audioBuffer = null) {
263
- this.fftResults = [];
264
- if (audioBuffer) {
265
- /**
266
- * Modified from
267
- * https://stackoverflow.com/questions/75063715/using-the-web-audio-api-to-analyze-a-song-without-playing
268
- *
269
- * We do this to populate FFT values for the audio if provided an `audioBuffer`
270
- * The reason to do this is that Safari fails when using `createMediaElementSource`
271
- * This has a non-zero RAM cost so we only opt-in to run it on Safari, Chrome is better
272
- */
273
- const { length, sampleRate } = audioBuffer;
274
- const offlineAudioContext = new OfflineAudioContext({
275
- length,
276
- sampleRate,
277
- });
278
- const source = offlineAudioContext.createBufferSource();
279
- source.buffer = audioBuffer;
280
- const analyser = offlineAudioContext.createAnalyser();
281
- analyser.fftSize = 8192;
282
- analyser.smoothingTimeConstant = 0.1;
283
- source.connect(analyser);
284
- // limit is :: 128 / sampleRate;
285
- // but we just want 60fps - cuts ~1s from 6MB to 1MB of RAM
286
- const renderQuantumInSeconds = 1 / 60;
287
- const durationInSeconds = length / sampleRate;
288
- const analyze = (index) => {
289
- const suspendTime = renderQuantumInSeconds * index;
290
- if (suspendTime < durationInSeconds) {
291
- offlineAudioContext.suspend(suspendTime).then(() => {
292
- const fftResult = new Float32Array(analyser.frequencyBinCount);
293
- analyser.getFloatFrequencyData(fftResult);
294
- this.fftResults.push(fftResult);
295
- analyze(index + 1);
296
- });
297
- }
298
- if (index === 1) {
299
- offlineAudioContext.startRendering();
300
- } else {
301
- offlineAudioContext.resume();
302
- }
303
- };
304
- source.start(0);
305
- analyze(1);
306
- this.audio = audioElement;
307
- this.context = offlineAudioContext;
308
- this.analyser = analyser;
309
- this.sampleRate = sampleRate;
310
- this.audioBuffer = audioBuffer;
311
- } else {
312
- const audioContext = new AudioContext();
313
- const track = audioContext.createMediaElementSource(audioElement);
314
- const analyser = audioContext.createAnalyser();
315
- analyser.fftSize = 8192;
316
- analyser.smoothingTimeConstant = 0.1;
317
- track.connect(analyser);
318
- analyser.connect(audioContext.destination);
319
- this.audio = audioElement;
320
- this.context = audioContext;
321
- this.analyser = analyser;
322
- this.sampleRate = this.context.sampleRate;
323
- this.audioBuffer = null;
324
- }
325
- }
326
-
327
- /**
328
- * Gets the current frequency domain data from the playing audio track
329
- * @param {"frequency"|"music"|"voice"} [analysisType]
330
- * @param {number} [minDecibels] default -100
331
- * @param {number} [maxDecibels] default -30
332
- * @returns {AudioAnalysisOutputType}
333
- */
334
- getFrequencies(
335
- analysisType = 'frequency',
336
- minDecibels = -100,
337
- maxDecibels = -30,
338
- ) {
339
- let fftResult = null;
340
- if (this.audioBuffer && this.fftResults.length) {
341
- const pct = this.audio.currentTime / this.audio.duration;
342
- const index = Math.min(
343
- (pct * this.fftResults.length) | 0,
344
- this.fftResults.length - 1,
345
- );
346
- fftResult = this.fftResults[index];
347
- }
348
- return AudioAnalysis.getFrequencies(
349
- this.analyser,
350
- this.sampleRate,
351
- fftResult,
352
- analysisType,
353
- minDecibels,
354
- maxDecibels,
355
- );
356
- }
357
-
358
- /**
359
- * Resume the internal AudioContext if it was suspended due to the lack of
360
- * user interaction when the AudioAnalysis was instantiated.
361
- * @returns {Promise<true>}
362
- */
363
- async resumeIfSuspended() {
364
- if (this.context.state === 'suspended') {
365
- await this.context.resume();
366
- }
367
- return true;
368
- }
369
- }
370
-
371
- globalThis.AudioAnalysis = AudioAnalysis;
372
-
373
- const StreamProcessorWorklet = `
374
- class StreamProcessor extends AudioWorkletProcessor {
375
- constructor() {
376
- super();
377
- this.hasStarted = false;
378
- this.hasInterrupted = false;
379
- this.outputBuffers = [];
380
- this.bufferLength = 128;
381
- this.write = { buffer: new Float32Array(this.bufferLength), trackId: null };
382
- this.writeOffset = 0;
383
- this.trackSampleOffsets = {};
384
- this.port.onmessage = (event) => {
385
- if (event.data) {
386
- const payload = event.data;
387
- if (payload.event === 'write') {
388
- const int16Array = payload.buffer;
389
- const float32Array = new Float32Array(int16Array.length);
390
- for (let i = 0; i < int16Array.length; i++) {
391
- float32Array[i] = int16Array[i] / 0x8000; // Convert Int16 to Float32
392
- }
393
- this.writeData(float32Array, payload.trackId);
394
- } else if (
395
- payload.event === 'offset' ||
396
- payload.event === 'interrupt'
397
- ) {
398
- const requestId = payload.requestId;
399
- const trackId = this.write.trackId;
400
- const offset = this.trackSampleOffsets[trackId] || 0;
401
- this.port.postMessage({
402
- event: 'offset',
403
- requestId,
404
- trackId,
405
- offset,
406
- });
407
- if (payload.event === 'interrupt') {
408
- this.hasInterrupted = true;
409
- }
410
- } else {
411
- throw new Error(\`Unhandled event "\${payload.event}"\`);
412
- }
413
- }
414
- };
415
- }
416
-
417
- writeData(float32Array, trackId = null) {
418
- let { buffer } = this.write;
419
- let offset = this.writeOffset;
420
- for (let i = 0; i < float32Array.length; i++) {
421
- buffer[offset++] = float32Array[i];
422
- if (offset >= buffer.length) {
423
- this.outputBuffers.push(this.write);
424
- this.write = { buffer: new Float32Array(this.bufferLength), trackId };
425
- buffer = this.write.buffer;
426
- offset = 0;
427
- }
428
- }
429
- this.writeOffset = offset;
430
- return true;
431
- }
432
-
433
- process(inputs, outputs, parameters) {
434
- const output = outputs[0];
435
- const outputChannelData = output[0];
436
- const outputBuffers = this.outputBuffers;
437
- if (this.hasInterrupted) {
438
- this.port.postMessage({ event: 'stop' });
439
- return false;
440
- } else if (outputBuffers.length) {
441
- this.hasStarted = true;
442
- const { buffer, trackId } = outputBuffers.shift();
443
- for (let i = 0; i < outputChannelData.length; i++) {
444
- outputChannelData[i] = buffer[i] || 0;
445
- }
446
- if (trackId) {
447
- this.trackSampleOffsets[trackId] =
448
- this.trackSampleOffsets[trackId] || 0;
449
- this.trackSampleOffsets[trackId] += buffer.length;
450
- }
451
- return true;
452
- } else if (this.hasStarted) {
453
- this.port.postMessage({ event: 'stop' });
454
- return false;
455
- } else {
456
- return true;
457
- }
458
- }
459
- }
460
-
461
- registerProcessor('stream_processor', StreamProcessor);
462
- `;
463
-
464
- const script$1 = new Blob([StreamProcessorWorklet], {
465
- type: 'application/javascript',
466
- });
467
- const src$1 = URL.createObjectURL(script$1);
468
- const StreamProcessorSrc = src$1;
469
-
470
- /**
471
- * Plays audio streams received in raw PCM16 chunks from the browser
472
- * @class
473
- */
474
- class WavStreamPlayer {
475
- /**
476
- * Creates a new WavStreamPlayer instance
477
- * @param {{sampleRate?: number}} options
478
- * @returns {WavStreamPlayer}
479
- */
480
- constructor({ finishedPlayingCallback = () => {}, sampleRate = 24000 } = {}) {
481
- this.scriptSrc = StreamProcessorSrc;
482
- this.sampleRate = sampleRate;
483
- this.context = null;
484
- this.stream = null;
485
- this.analyser = null;
486
- this.trackSampleOffsets = {};
487
- this.interruptedTrackIds = {};
488
- this.finishedPlayingCallback = finishedPlayingCallback;
489
- }
490
-
491
- /**
492
- * Connects the audio context and enables output to speakers
493
- * @returns {Promise<true>}
494
- */
495
- async connect() {
496
- this.context = new AudioContext({ sampleRate: this.sampleRate });
497
- if (this.context.state === "suspended") {
498
- await this.context.resume();
499
- }
500
- try {
501
- await this.context.audioWorklet.addModule(this.scriptSrc);
502
- } catch (e) {
503
- console.error(e);
504
- throw new Error(`Could not add audioWorklet module: ${this.scriptSrc}`);
505
- }
506
- const analyser = this.context.createAnalyser();
507
- analyser.fftSize = 8192;
508
- analyser.smoothingTimeConstant = 0.1;
509
- this.analyser = analyser;
510
- return true;
511
- }
512
-
513
- /**
514
- * Gets the current frequency domain data from the playing track
515
- * @param {"frequency"|"music"|"voice"} [analysisType]
516
- * @param {number} [minDecibels] default -100
517
- * @param {number} [maxDecibels] default -30
518
- * @returns {import('./analysis/audio_analysis.js').AudioAnalysisOutputType}
519
- */
520
- getFrequencies(analysisType = "frequency", minDecibels = -100, maxDecibels = -30) {
521
- if (!this.analyser) {
522
- throw new Error("Not connected, please call .connect() first");
523
- }
524
- return AudioAnalysis.getFrequencies(this.analyser, this.sampleRate, null, analysisType, minDecibels, maxDecibels);
525
- }
526
-
527
- /**
528
- * Gets the real-time amplitude of the audio signal
529
- * @returns {number} Amplitude value between 0 and 1
530
- */
531
- getAmplitude() {
532
- if (!this.analyser) {
533
- throw new Error("AnalyserNode is not initialized. Please call connect() first.");
534
- }
535
-
536
- const bufferLength = this.analyser.fftSize;
537
- const dataArray = new Uint8Array(bufferLength);
538
- this.analyser.getByteTimeDomainData(dataArray);
539
-
540
- // Calculate RMS (Root Mean Square) to get amplitude
541
- let sumSquares = 0;
542
- for (let i = 0; i < bufferLength; i++) {
543
- const normalized = (dataArray[i] - 128) / 128; // Normalize between -1 and 1
544
- sumSquares += normalized * normalized;
545
- }
546
- const rms = Math.sqrt(sumSquares / bufferLength);
547
- return rms;
548
- }
549
-
550
- /**
551
- * Starts amplitude monitoring
552
- * @param {function} callback - Function to call with amplitude value
553
- */
554
- startAmplitudeMonitoring(callback) {
555
- const monitor = () => {
556
- const amplitude = this.getAmplitude();
557
- callback(amplitude);
558
- requestAnimationFrame(monitor);
559
- };
560
- monitor();
561
- }
562
-
563
- /**
564
- * Starts audio streaming
565
- * @private
566
- * @returns {Promise<true>}
567
- */
568
- _start() {
569
- const streamNode = new AudioWorkletNode(this.context, "stream_processor");
570
- streamNode.connect(this.context.destination);
571
- streamNode.port.onmessage = (e) => {
572
- const { event } = e.data;
573
- if (event === "stop") {
574
- streamNode.disconnect();
575
- this.stream = null;
576
- this.finishedPlayingCallback();
577
- } else if (event === "offset") {
578
- const { requestId, trackId, offset } = e.data;
579
- const currentTime = offset / this.sampleRate;
580
- this.trackSampleOffsets[requestId] = { trackId, offset, currentTime };
581
- }
582
- };
583
- this.analyser.disconnect();
584
- streamNode.connect(this.analyser);
585
- this.stream = streamNode;
586
- return true;
587
- }
588
-
589
- /**
590
- * Adds 16BitPCM data to the currently playing audio stream
591
- * You can add chunks beyond the current play point and they will be queued for play
592
- * @param {ArrayBuffer|Int16Array} arrayBuffer
593
- * @param {string} [trackId]
594
- * @returns {Int16Array}
595
- */
596
- add16BitPCM(arrayBuffer, trackId = "default") {
597
- if (typeof trackId !== "string") {
598
- throw new Error(`trackId must be a string`);
599
- } else if (this.interruptedTrackIds[trackId]) {
600
- return;
601
- }
602
- if (!this.stream) {
603
- this._start();
604
- }
605
- let buffer;
606
- if (arrayBuffer instanceof Int16Array) {
607
- buffer = arrayBuffer;
608
- } else if (arrayBuffer instanceof ArrayBuffer) {
609
- buffer = new Int16Array(arrayBuffer);
610
- } else {
611
- throw new Error(`argument must be Int16Array or ArrayBuffer`);
612
- }
613
- this.stream.port.postMessage({ event: "write", buffer, trackId });
614
- return buffer;
615
- }
616
-
617
- /**
618
- * Gets the offset (sample count) of the currently playing stream
619
- * @param {boolean} [interrupt]
620
- * @returns {{trackId: string|null, offset: number, currentTime: number}}
621
- */
622
- async getTrackSampleOffset(interrupt = false) {
623
- if (!this.stream) {
624
- return null;
625
- }
626
- const requestId = crypto.randomUUID();
627
- this.stream.port.postMessage({
628
- event: interrupt ? "interrupt" : "offset",
629
- requestId,
630
- });
631
- let trackSampleOffset;
632
- while (!trackSampleOffset) {
633
- trackSampleOffset = this.trackSampleOffsets[requestId];
634
- await new Promise((r) => setTimeout(() => r(), 1));
635
- }
636
- const { trackId } = trackSampleOffset;
637
- if (interrupt && trackId) {
638
- this.interruptedTrackIds[trackId] = true;
639
- }
640
- return trackSampleOffset;
641
- }
642
-
643
- /**
644
- * Strips the current stream and returns the sample offset of the audio
645
- * @param {boolean} [interrupt]
646
- * @returns {{trackId: string|null, offset: number, currentTime: number}}
647
- */
648
- async interrupt() {
649
- return this.getTrackSampleOffset(true);
650
- }
651
-
652
- /**
653
- * Disconnects the audio context and cleans up resources
654
- * @returns {void}
655
- */
656
- disconnect() {
657
- if (this.stream) {
658
- this.stream.disconnect();
659
- this.stream = null;
660
- }
661
-
662
- if (this.analyser) {
663
- this.analyser.disconnect();
664
- }
665
-
666
- if (this.context) {
667
- this.context.close().catch((err) => console.error("Error closing audio context:", err));
668
- }
669
- }
670
- }
671
-
672
- globalThis.WavStreamPlayer = WavStreamPlayer;
673
-
674
- const AudioProcessorWorklet = `
675
- class AudioProcessor extends AudioWorkletProcessor {
676
-
677
- constructor() {
678
- super();
679
- this.port.onmessage = this.receive.bind(this);
680
- this.initialize();
681
- }
682
-
683
- initialize() {
684
- this.foundAudio = false;
685
- this.recording = false;
686
- this.chunks = [];
687
- }
688
-
689
- /**
690
- * Concatenates sampled chunks into channels
691
- * Format is chunk[Left[], Right[]]
692
- */
693
- readChannelData(chunks, channel = -1, maxChannels = 9) {
694
- let channelLimit;
695
- if (channel !== -1) {
696
- if (chunks[0] && chunks[0].length - 1 < channel) {
697
- throw new Error(
698
- \`Channel \${channel} out of range: max \${chunks[0].length}\`
699
- );
700
- }
701
- channelLimit = channel + 1;
702
- } else {
703
- channel = 0;
704
- channelLimit = Math.min(chunks[0] ? chunks[0].length : 1, maxChannels);
705
- }
706
- const channels = [];
707
- for (let n = channel; n < channelLimit; n++) {
708
- const length = chunks.reduce((sum, chunk) => {
709
- return sum + chunk[n].length;
710
- }, 0);
711
- const buffers = chunks.map((chunk) => chunk[n]);
712
- const result = new Float32Array(length);
713
- let offset = 0;
714
- for (let i = 0; i < buffers.length; i++) {
715
- result.set(buffers[i], offset);
716
- offset += buffers[i].length;
717
- }
718
- channels[n] = result;
719
- }
720
- return channels;
721
- }
722
-
723
- /**
724
- * Combines parallel audio data into correct format,
725
- * channels[Left[], Right[]] to float32Array[LRLRLRLR...]
726
- */
727
- formatAudioData(channels) {
728
- if (channels.length === 1) {
729
- // Simple case is only one channel
730
- const float32Array = channels[0].slice();
731
- const meanValues = channels[0].slice();
732
- return { float32Array, meanValues };
733
- } else {
734
- const float32Array = new Float32Array(
735
- channels[0].length * channels.length
736
- );
737
- const meanValues = new Float32Array(channels[0].length);
738
- for (let i = 0; i < channels[0].length; i++) {
739
- const offset = i * channels.length;
740
- let meanValue = 0;
741
- for (let n = 0; n < channels.length; n++) {
742
- float32Array[offset + n] = channels[n][i];
743
- meanValue += channels[n][i];
744
- }
745
- meanValues[i] = meanValue / channels.length;
746
- }
747
- return { float32Array, meanValues };
748
- }
749
- }
750
-
751
- /**
752
- * Converts 32-bit float data to 16-bit integers
753
- */
754
- floatTo16BitPCM(float32Array) {
755
- const buffer = new ArrayBuffer(float32Array.length * 2);
756
- const view = new DataView(buffer);
757
- let offset = 0;
758
- for (let i = 0; i < float32Array.length; i++, offset += 2) {
759
- let s = Math.max(-1, Math.min(1, float32Array[i]));
760
- view.setInt16(offset, s < 0 ? s * 0x8000 : s * 0x7fff, true);
761
- }
762
- return buffer;
763
- }
764
-
765
- /**
766
- * Retrieves the most recent amplitude values from the audio stream
767
- * @param {number} channel
768
- */
769
- getValues(channel = -1) {
770
- const channels = this.readChannelData(this.chunks, channel);
771
- const { meanValues } = this.formatAudioData(channels);
772
- return { meanValues, channels };
773
- }
774
-
775
- /**
776
- * Exports chunks as an audio/wav file
777
- */
778
- export() {
779
- const channels = this.readChannelData(this.chunks);
780
- const { float32Array, meanValues } = this.formatAudioData(channels);
781
- const audioData = this.floatTo16BitPCM(float32Array);
782
- return {
783
- meanValues: meanValues,
784
- audio: {
785
- bitsPerSample: 16,
786
- channels: channels,
787
- data: audioData,
788
- },
789
- };
790
- }
791
-
792
- receive(e) {
793
- const { event, id } = e.data;
794
- let receiptData = {};
795
- switch (event) {
796
- case 'start':
797
- this.recording = true;
798
- break;
799
- case 'stop':
800
- this.recording = false;
801
- break;
802
- case 'clear':
803
- this.initialize();
804
- break;
805
- case 'export':
806
- receiptData = this.export();
807
- break;
808
- case 'read':
809
- receiptData = this.getValues();
810
- break;
811
- default:
812
- break;
813
- }
814
- // Always send back receipt
815
- this.port.postMessage({ event: 'receipt', id, data: receiptData });
816
- }
817
-
818
- sendChunk(chunk) {
819
- const channels = this.readChannelData([chunk]);
820
- const { float32Array, meanValues } = this.formatAudioData(channels);
821
- const rawAudioData = this.floatTo16BitPCM(float32Array);
822
- const monoAudioData = this.floatTo16BitPCM(meanValues);
823
- this.port.postMessage({
824
- event: 'chunk',
825
- data: {
826
- mono: monoAudioData,
827
- raw: rawAudioData,
828
- },
829
- });
830
- }
831
-
832
- process(inputList, outputList, parameters) {
833
- // Copy input to output (e.g. speakers)
834
- // Note that this creates choppy sounds with Mac products
835
- const sourceLimit = Math.min(inputList.length, outputList.length);
836
- for (let inputNum = 0; inputNum < sourceLimit; inputNum++) {
837
- const input = inputList[inputNum];
838
- const output = outputList[inputNum];
839
- const channelCount = Math.min(input.length, output.length);
840
- for (let channelNum = 0; channelNum < channelCount; channelNum++) {
841
- input[channelNum].forEach((sample, i) => {
842
- output[channelNum][i] = sample;
843
- });
844
- }
845
- }
846
- const inputs = inputList[0];
847
- // There's latency at the beginning of a stream before recording starts
848
- // Make sure we actually receive audio data before we start storing chunks
849
- let sliceIndex = 0;
850
- if (!this.foundAudio) {
851
- for (const channel of inputs) {
852
- sliceIndex = 0; // reset for each channel
853
- if (this.foundAudio) {
854
- break;
855
- }
856
- if (channel) {
857
- for (const value of channel) {
858
- if (value !== 0) {
859
- // find only one non-zero entry in any channel
860
- this.foundAudio = true;
861
- break;
862
- } else {
863
- sliceIndex++;
864
- }
865
- }
866
- }
867
- }
868
- }
869
- if (inputs && inputs[0] && this.foundAudio && this.recording) {
870
- // We need to copy the TypedArray, because the \`process\`
871
- // internals will reuse the same buffer to hold each input
872
- const chunk = inputs.map((input) => input.slice(sliceIndex));
873
- this.chunks.push(chunk);
874
- this.sendChunk(chunk);
875
- }
876
- return true;
877
- }
878
- }
879
-
880
- registerProcessor('audio_processor', AudioProcessor);
881
- `;
882
-
883
- const script = new Blob([AudioProcessorWorklet], {
884
- type: 'application/javascript',
885
- });
886
- const src = URL.createObjectURL(script);
887
- const AudioProcessorSrc = src;
888
-
889
- /**
890
- * Decodes audio into a wav file
891
- * @typedef {Object} DecodedAudioType
892
- * @property {Blob} blob
893
- * @property {string} url
894
- * @property {Float32Array} values
895
- * @property {AudioBuffer} audioBuffer
896
- */
897
-
898
- /**
899
- * Records live stream of user audio as PCM16 "audio/wav" data
900
- * @class
901
- */
902
- class WavRecorder {
903
- /**
904
- * Create a new WavRecorder instance
905
- * @param {{sampleRate?: number, outputToSpeakers?: boolean, debug?: boolean}} [options]
906
- * @returns {WavRecorder}
907
- */
908
- constructor({
909
- sampleRate = 24000,
910
- outputToSpeakers = false,
911
- debug = false,
912
- } = {}) {
913
- // Script source
914
- this.scriptSrc = AudioProcessorSrc;
915
- // Config
916
- this.sampleRate = sampleRate;
917
- this.outputToSpeakers = outputToSpeakers;
918
- this.debug = !!debug;
919
- this._deviceChangeCallback = null;
920
- this._devices = [];
921
- // State variables
922
- this.stream = null;
923
- this.processor = null;
924
- this.source = null;
925
- this.node = null;
926
- this.recording = false;
927
- // Event handling with AudioWorklet
928
- this._lastEventId = 0;
929
- this.eventReceipts = {};
930
- this.eventTimeout = 5000;
931
- // Process chunks of audio
932
- this._chunkProcessor = () => {};
933
- this._chunkProcessorSize = void 0;
934
- this._chunkProcessorBuffer = {
935
- raw: new ArrayBuffer(0),
936
- mono: new ArrayBuffer(0),
937
- };
938
- }
939
-
940
- /**
941
- * Decodes audio data from multiple formats to a Blob, url, Float32Array and AudioBuffer
942
- * @param {Blob|Float32Array|Int16Array|ArrayBuffer|number[]} audioData
943
- * @param {number} sampleRate
944
- * @param {number} fromSampleRate
945
- * @returns {Promise<DecodedAudioType>}
946
- */
947
- static async decode(audioData, sampleRate = 24000, fromSampleRate = -1) {
948
- const context = new AudioContext({ sampleRate });
949
- let arrayBuffer;
950
- let blob;
951
- if (audioData instanceof Blob) {
952
- if (fromSampleRate !== -1) {
953
- throw new Error(
954
- `Can not specify "fromSampleRate" when reading from Blob`,
955
- );
956
- }
957
- blob = audioData;
958
- arrayBuffer = await blob.arrayBuffer();
959
- } else if (audioData instanceof ArrayBuffer) {
960
- if (fromSampleRate !== -1) {
961
- throw new Error(
962
- `Can not specify "fromSampleRate" when reading from ArrayBuffer`,
963
- );
964
- }
965
- arrayBuffer = audioData;
966
- blob = new Blob([arrayBuffer], { type: 'audio/wav' });
967
- } else {
968
- let float32Array;
969
- let data;
970
- if (audioData instanceof Int16Array) {
971
- data = audioData;
972
- float32Array = new Float32Array(audioData.length);
973
- for (let i = 0; i < audioData.length; i++) {
974
- float32Array[i] = audioData[i] / 0x8000;
975
- }
976
- } else if (audioData instanceof Float32Array) {
977
- float32Array = audioData;
978
- } else if (audioData instanceof Array) {
979
- float32Array = new Float32Array(audioData);
980
- } else {
981
- throw new Error(
982
- `"audioData" must be one of: Blob, Float32Arrray, Int16Array, ArrayBuffer, Array<number>`,
983
- );
984
- }
985
- if (fromSampleRate === -1) {
986
- throw new Error(
987
- `Must specify "fromSampleRate" when reading from Float32Array, In16Array or Array`,
988
- );
989
- } else if (fromSampleRate < 3000) {
990
- throw new Error(`Minimum "fromSampleRate" is 3000 (3kHz)`);
991
- }
992
- if (!data) {
993
- data = WavPacker.floatTo16BitPCM(float32Array);
994
- }
995
- const audio = {
996
- bitsPerSample: 16,
997
- channels: [float32Array],
998
- data,
999
- };
1000
- const packer = new WavPacker();
1001
- const result = packer.pack(fromSampleRate, audio);
1002
- blob = result.blob;
1003
- arrayBuffer = await blob.arrayBuffer();
1004
- }
1005
- const audioBuffer = await context.decodeAudioData(arrayBuffer);
1006
- const values = audioBuffer.getChannelData(0);
1007
- const url = URL.createObjectURL(blob);
1008
- return {
1009
- blob,
1010
- url,
1011
- values,
1012
- audioBuffer,
1013
- };
1014
- }
1015
-
1016
- /**
1017
- * Logs data in debug mode
1018
- * @param {...any} arguments
1019
- * @returns {true}
1020
- */
1021
- log() {
1022
- if (this.debug) {
1023
- this.log(...arguments);
1024
- }
1025
- return true;
1026
- }
1027
-
1028
- /**
1029
- * Retrieves the current sampleRate for the recorder
1030
- * @returns {number}
1031
- */
1032
- getSampleRate() {
1033
- return this.sampleRate;
1034
- }
1035
-
1036
- /**
1037
- * Retrieves the current status of the recording
1038
- * @returns {"ended"|"paused"|"recording"}
1039
- */
1040
- getStatus() {
1041
- if (!this.processor) {
1042
- return 'ended';
1043
- } else if (!this.recording) {
1044
- return 'paused';
1045
- } else {
1046
- return 'recording';
1047
- }
1048
- }
1049
-
1050
- /**
1051
- * Sends an event to the AudioWorklet
1052
- * @private
1053
- * @param {string} name
1054
- * @param {{[key: string]: any}} data
1055
- * @param {AudioWorkletNode} [_processor]
1056
- * @returns {Promise<{[key: string]: any}>}
1057
- */
1058
- async _event(name, data = {}, _processor = null) {
1059
- _processor = _processor || this.processor;
1060
- if (!_processor) {
1061
- throw new Error('Can not send events without recording first');
1062
- }
1063
- const message = {
1064
- event: name,
1065
- id: this._lastEventId++,
1066
- data,
1067
- };
1068
- _processor.port.postMessage(message);
1069
- const t0 = new Date().valueOf();
1070
- while (!this.eventReceipts[message.id]) {
1071
- if (new Date().valueOf() - t0 > this.eventTimeout) {
1072
- throw new Error(`Timeout waiting for "${name}" event`);
1073
- }
1074
- await new Promise((res) => setTimeout(() => res(true), 1));
1075
- }
1076
- const payload = this.eventReceipts[message.id];
1077
- delete this.eventReceipts[message.id];
1078
- return payload;
1079
- }
1080
-
1081
- /**
1082
- * Sets device change callback, remove if callback provided is `null`
1083
- * @param {(Array<MediaDeviceInfo & {default: boolean}>): void|null} callback
1084
- * @returns {true}
1085
- */
1086
- listenForDeviceChange(callback) {
1087
- if (callback === null && this._deviceChangeCallback) {
1088
- navigator.mediaDevices.removeEventListener(
1089
- 'devicechange',
1090
- this._deviceChangeCallback,
1091
- );
1092
- this._deviceChangeCallback = null;
1093
- } else if (callback !== null) {
1094
- // Basically a debounce; we only want this called once when devices change
1095
- // And we only want the most recent callback() to be executed
1096
- // if a few are operating at the same time
1097
- let lastId = 0;
1098
- let lastDevices = [];
1099
- const serializeDevices = (devices) =>
1100
- devices
1101
- .map((d) => d.deviceId)
1102
- .sort()
1103
- .join(',');
1104
- const cb = async () => {
1105
- let id = ++lastId;
1106
- const devices = await this.listDevices();
1107
- if (id === lastId) {
1108
- if (serializeDevices(lastDevices) !== serializeDevices(devices)) {
1109
- lastDevices = devices;
1110
- callback(devices.slice());
1111
- }
1112
- }
1113
- };
1114
- navigator.mediaDevices.addEventListener('devicechange', cb);
1115
- cb();
1116
- this._deviceChangeCallback = cb;
1117
- }
1118
- return true;
1119
- }
1120
-
1121
- /**
1122
- * Manually request permission to use the microphone
1123
- * @returns {Promise<true>}
1124
- */
1125
- async requestPermission() {
1126
- const permissionStatus = await navigator.permissions.query({
1127
- name: 'microphone',
1128
- });
1129
- if (permissionStatus.state === 'denied') {
1130
- window.alert('You must grant microphone access to use this feature.');
1131
- } else if (permissionStatus.state === 'prompt') {
1132
- try {
1133
- const stream = await navigator.mediaDevices.getUserMedia({
1134
- audio: true,
1135
- });
1136
- const tracks = stream.getTracks();
1137
- tracks.forEach((track) => track.stop());
1138
- } catch (e) {
1139
- window.alert('You must grant microphone access to use this feature.');
1140
- }
1141
- }
1142
- return true;
1143
- }
1144
-
1145
- /**
1146
- * List all eligible devices for recording, will request permission to use microphone
1147
- * @returns {Promise<Array<MediaDeviceInfo & {default: boolean}>>}
1148
- */
1149
- async listDevices() {
1150
- if (
1151
- !navigator.mediaDevices ||
1152
- !('enumerateDevices' in navigator.mediaDevices)
1153
- ) {
1154
- throw new Error('Could not request user devices');
1155
- }
1156
- await this.requestPermission();
1157
- const devices = await navigator.mediaDevices.enumerateDevices();
1158
- const audioDevices = devices.filter(
1159
- (device) => device.kind === 'audioinput',
1160
- );
1161
- const defaultDeviceIndex = audioDevices.findIndex(
1162
- (device) => device.deviceId === 'default',
1163
- );
1164
- const deviceList = [];
1165
- if (defaultDeviceIndex !== -1) {
1166
- let defaultDevice = audioDevices.splice(defaultDeviceIndex, 1)[0];
1167
- let existingIndex = audioDevices.findIndex(
1168
- (device) => device.groupId === defaultDevice.groupId,
1169
- );
1170
- if (existingIndex !== -1) {
1171
- defaultDevice = audioDevices.splice(existingIndex, 1)[0];
1172
- }
1173
- defaultDevice.default = true;
1174
- deviceList.push(defaultDevice);
1175
- }
1176
- return deviceList.concat(audioDevices);
1177
- }
1178
-
1179
- /**
1180
- * Begins a recording session and requests microphone permissions if not already granted
1181
- * Microphone recording indicator will appear on browser tab but status will be "paused"
1182
- * @param {string} [deviceId] if no device provided, default device will be used
1183
- * @returns {Promise<true>}
1184
- */
1185
- async begin(deviceId) {
1186
- if (this.processor) {
1187
- throw new Error(
1188
- `Already connected: please call .end() to start a new session`,
1189
- );
1190
- }
1191
-
1192
- if (
1193
- !navigator.mediaDevices ||
1194
- !('getUserMedia' in navigator.mediaDevices)
1195
- ) {
1196
- throw new Error('Could not request user media');
1197
- }
1198
- try {
1199
- const config = { audio: true };
1200
- if (deviceId) {
1201
- config.audio = { deviceId: { exact: deviceId } };
1202
- }
1203
- this.stream = await navigator.mediaDevices.getUserMedia(config);
1204
- } catch (err) {
1205
- throw new Error('Could not start media stream');
1206
- }
1207
-
1208
- const context = new AudioContext({ sampleRate: this.sampleRate });
1209
- const source = context.createMediaStreamSource(this.stream);
1210
- // Load and execute the module script.
1211
- try {
1212
- await context.audioWorklet.addModule(this.scriptSrc);
1213
- } catch (e) {
1214
- console.error(e);
1215
- throw new Error(`Could not add audioWorklet module: ${this.scriptSrc}`);
1216
- }
1217
- const processor = new AudioWorkletNode(context, 'audio_processor');
1218
- processor.port.onmessage = (e) => {
1219
- const { event, id, data } = e.data;
1220
- if (event === 'receipt') {
1221
- this.eventReceipts[id] = data;
1222
- } else if (event === 'chunk') {
1223
- if (this._chunkProcessorSize) {
1224
- const buffer = this._chunkProcessorBuffer;
1225
- this._chunkProcessorBuffer = {
1226
- raw: WavPacker.mergeBuffers(buffer.raw, data.raw),
1227
- mono: WavPacker.mergeBuffers(buffer.mono, data.mono),
1228
- };
1229
- if (
1230
- this._chunkProcessorBuffer.mono.byteLength >=
1231
- this._chunkProcessorSize
1232
- ) {
1233
- this._chunkProcessor(this._chunkProcessorBuffer);
1234
- this._chunkProcessorBuffer = {
1235
- raw: new ArrayBuffer(0),
1236
- mono: new ArrayBuffer(0),
1237
- };
1238
- }
1239
- } else {
1240
- this._chunkProcessor(data);
1241
- }
1242
- }
1243
- };
1244
-
1245
- const node = source.connect(processor);
1246
- const analyser = context.createAnalyser();
1247
- analyser.fftSize = 8192;
1248
- analyser.smoothingTimeConstant = 0.1;
1249
- node.connect(analyser);
1250
- if (this.outputToSpeakers) {
1251
- // eslint-disable-next-line no-console
1252
- console.warn(
1253
- 'Warning: Output to speakers may affect sound quality,\n' +
1254
- 'especially due to system audio feedback preventative measures.\n' +
1255
- 'use only for debugging',
1256
- );
1257
- analyser.connect(context.destination);
1258
- }
1259
-
1260
- this.source = source;
1261
- this.node = node;
1262
- this.analyser = analyser;
1263
- this.processor = processor;
1264
- return true;
1265
- }
1266
-
1267
- /**
1268
- * Gets the current frequency domain data from the recording track
1269
- * @param {"frequency"|"music"|"voice"} [analysisType]
1270
- * @param {number} [minDecibels] default -100
1271
- * @param {number} [maxDecibels] default -30
1272
- * @returns {import('./analysis/audio_analysis.js').AudioAnalysisOutputType}
1273
- */
1274
- getFrequencies(
1275
- analysisType = 'frequency',
1276
- minDecibels = -100,
1277
- maxDecibels = -30,
1278
- ) {
1279
- if (!this.processor) {
1280
- throw new Error('Session ended: please call .begin() first');
1281
- }
1282
- return AudioAnalysis.getFrequencies(
1283
- this.analyser,
1284
- this.sampleRate,
1285
- null,
1286
- analysisType,
1287
- minDecibels,
1288
- maxDecibels,
1289
- );
1290
- }
1291
-
1292
-
1293
- /**
1294
- * Gets the real-time amplitude of the audio signal
1295
- * @returns {number} Amplitude value between 0 and 1
1296
- */
1297
- getAmplitude() {
1298
- if (!this.analyser) {
1299
- throw new Error('AnalyserNode is not initialized. Please call connect() first.');
1300
- }
1301
-
1302
- const bufferLength = this.analyser.fftSize;
1303
- const dataArray = new Uint8Array(bufferLength);
1304
- this.analyser.getByteTimeDomainData(dataArray);
1305
-
1306
- // Calculate RMS (Root Mean Square) to get amplitude
1307
- let sumSquares = 0;
1308
- for (let i = 0; i < bufferLength; i++) {
1309
- const normalized = (dataArray[i] - 128) / 128; // Normalize between -1 and 1
1310
- sumSquares += normalized * normalized;
1311
- }
1312
- const rms = Math.sqrt(sumSquares / bufferLength);
1313
- return rms;
1314
- }
1315
-
1316
- /**
1317
- * Starts amplitude monitoring
1318
- * @param {function} callback - Function to call with amplitude value
1319
- */
1320
- startAmplitudeMonitoring(callback) {
1321
- const monitor = () => {
1322
- const amplitude = this.getAmplitude();
1323
- callback(amplitude);
1324
- requestAnimationFrame(monitor);
1325
- };
1326
- monitor();
1327
- }
1328
-
1329
- /**
1330
- * Pauses the recording
1331
- * Keeps microphone stream open but halts storage of audio
1332
- * @returns {Promise<true>}
1333
- */
1334
- async pause() {
1335
- if (!this.processor) {
1336
- throw new Error('Session ended: please call .begin() first');
1337
- } else if (!this.recording) {
1338
- throw new Error('Already paused: please call .record() first');
1339
- }
1340
- if (this._chunkProcessorBuffer.raw.byteLength) {
1341
- this._chunkProcessor(this._chunkProcessorBuffer);
1342
- }
1343
- this.log('Pausing ...');
1344
- await this._event('stop');
1345
- this.recording = false;
1346
- return true;
1347
- }
1348
-
1349
- /**
1350
- * Start recording stream and storing to memory from the connected audio source
1351
- * @param {(data: { mono: Int16Array; raw: Int16Array }) => any} [chunkProcessor]
1352
- * @param {number} [chunkSize] chunkProcessor will not be triggered until this size threshold met in mono audio
1353
- * @returns {Promise<true>}
1354
- */
1355
- async record(chunkProcessor = () => {}, chunkSize = 8192) {
1356
- if (!this.processor) {
1357
- throw new Error('Session ended: please call .begin() first');
1358
- } else if (this.recording) {
1359
- throw new Error('Already recording: please call .pause() first');
1360
- } else if (typeof chunkProcessor !== 'function') {
1361
- throw new Error(`chunkProcessor must be a function`);
1362
- }
1363
- this._chunkProcessor = chunkProcessor;
1364
- this._chunkProcessorSize = chunkSize;
1365
- this._chunkProcessorBuffer = {
1366
- raw: new ArrayBuffer(0),
1367
- mono: new ArrayBuffer(0),
1368
- };
1369
- this.log('Recording ...');
1370
- await this._event('start');
1371
- this.recording = true;
1372
- return true;
1373
- }
1374
-
1375
- /**
1376
- * Clears the audio buffer, empties stored recording
1377
- * @returns {Promise<true>}
1378
- */
1379
- async clear() {
1380
- if (!this.processor) {
1381
- throw new Error('Session ended: please call .begin() first');
1382
- }
1383
- await this._event('clear');
1384
- return true;
1385
- }
1386
-
1387
- /**
1388
- * Reads the current audio stream data
1389
- * @returns {Promise<{meanValues: Float32Array, channels: Array<Float32Array>}>}
1390
- */
1391
- async read() {
1392
- if (!this.processor) {
1393
- throw new Error('Session ended: please call .begin() first');
1394
- }
1395
- this.log('Reading ...');
1396
- const result = await this._event('read');
1397
- return result;
1398
- }
1399
-
1400
- /**
1401
- * Saves the current audio stream to a file
1402
- * @param {boolean} [force] Force saving while still recording
1403
- * @returns {Promise<import('./wav_packer.js').WavPackerAudioType>}
1404
- */
1405
- async save(force = false) {
1406
- if (!this.processor) {
1407
- throw new Error('Session ended: please call .begin() first');
1408
- }
1409
- if (!force && this.recording) {
1410
- throw new Error(
1411
- 'Currently recording: please call .pause() first, or call .save(true) to force',
1412
- );
1413
- }
1414
- this.log('Exporting ...');
1415
- const exportData = await this._event('export');
1416
- const packer = new WavPacker();
1417
- const result = packer.pack(this.sampleRate, exportData.audio);
1418
- return result;
1419
- }
1420
-
1421
- /**
1422
- * Ends the current recording session and saves the result
1423
- * @returns {Promise<import('./wav_packer.js').WavPackerAudioType>}
1424
- */
1425
- async end() {
1426
- if (!this.processor) {
1427
- throw new Error('Session ended: please call .begin() first');
1428
- }
1429
-
1430
- const _processor = this.processor;
1431
-
1432
- this.log('Stopping ...');
1433
- await this._event('stop');
1434
- this.recording = false;
1435
- const tracks = this.stream.getTracks();
1436
- tracks.forEach((track) => track.stop());
1437
-
1438
- this.log('Exporting ...');
1439
- const exportData = await this._event('export', {}, _processor);
1440
-
1441
- this.processor.disconnect();
1442
- this.source.disconnect();
1443
- this.node.disconnect();
1444
- this.analyser.disconnect();
1445
- this.stream = null;
1446
- this.processor = null;
1447
- this.source = null;
1448
- this.node = null;
1449
-
1450
- const packer = new WavPacker();
1451
- const result = packer.pack(this.sampleRate, exportData.audio);
1452
- return result;
1453
- }
1454
-
1455
- /**
1456
- * Performs a full cleanup of WavRecorder instance
1457
- * Stops actively listening via microphone and removes existing listeners
1458
- * @returns {Promise<true>}
1459
- */
1460
- async quit() {
1461
- this.listenForDeviceChange(null);
1462
- if (this.processor) {
1463
- await this.end();
1464
- }
1465
- return true;
1466
- }
1467
- }
1468
-
1469
- globalThis.WavRecorder = WavRecorder;
1470
-
1471
- /**
1472
- * Converts a base64 string to an ArrayBuffer.
1473
- * @param {string} base64 - The base64 string to convert.
1474
- * @returns {ArrayBuffer} The resulting ArrayBuffer.
1475
- */
1476
- function base64ToArrayBuffer(base64) {
1477
- const binaryString = atob(base64);
1478
- const len = binaryString.length;
1479
- const bytes = new Uint8Array(len);
1480
- for (let i = 0; i < len; i++) {
1481
- bytes[i] = binaryString.charCodeAt(i);
1482
- }
1483
- return bytes.buffer;
1484
- }
1485
-
1486
- /**
1487
- * Converts an ArrayBuffer to a base64 string.
1488
- * @param {ArrayBuffer|Float32Array|Int16Array} arrayBuffer - The ArrayBuffer to convert.
1489
- * @returns {string} The resulting base64 string.
1490
- */
1491
- function arrayBufferToBase64(arrayBuffer) {
1492
- if (arrayBuffer instanceof Float32Array) {
1493
- arrayBuffer = this.floatTo16BitPCM(arrayBuffer);
1494
- } else if (arrayBuffer instanceof Int16Array) {
1495
- arrayBuffer = arrayBuffer.buffer;
1496
- }
1497
- let binary = '';
1498
- let bytes = new Uint8Array(arrayBuffer);
1499
- const chunkSize = 0x8000; // 32KB chunk size
1500
- for (let i = 0; i < bytes.length; i += chunkSize) {
1501
- let chunk = bytes.subarray(i, i + chunkSize);
1502
- binary += String.fromCharCode.apply(null, chunk);
1503
- }
1504
- return btoa(binary);
1505
- }
1506
-
1507
- /* eslint-env browser */
1508
- /**
1509
- * @class LayercodeClient
1510
- * @classdesc Core client for Layercode audio pipeline that manages audio recording, WebSocket communication, and speech processing.
1511
- */
1512
- class LayercodeClient {
1513
- /**
1514
- * Creates an instance of LayercodeClient.
1515
- * @param {Object} options - Configuration options
1516
- */
1517
- constructor(options) {
1518
- this.options = {
1519
- pipelineId: options.pipelineId,
1520
- sessionId: options.sessionId || null,
1521
- authorizeSessionEndpoint: options.authorizeSessionEndpoint,
1522
- metadata: options.metadata || {},
1523
- onConnect: options.onConnect || (() => { }),
1524
- onDisconnect: options.onDisconnect || (() => { }),
1525
- onError: options.onError || (() => { }),
1526
- onDataMessage: options.onDataMessage || (() => { }),
1527
- onUserAmplitudeChange: options.onUserAmplitudeChange || (() => { }),
1528
- onAgentAmplitudeChange: options.onAgentAmplitudeChange || (() => { }),
1529
- onStatusChange: options.onStatusChange || (() => { }),
1530
- };
1531
- this.AMPLITUDE_MONITORING_SAMPLE_RATE = 10;
1532
- this._websocketUrl = 'wss://api.layercode.com/v1/pipelines/websocket';
1533
- this.wavRecorder = new WavRecorder({ sampleRate: 8000 }); // TODO should be set my fetched pipeline config
1534
- this.wavPlayer = new WavStreamPlayer({
1535
- finishedPlayingCallback: this._clientResponseAudioReplayFinished.bind(this),
1536
- sampleRate: 16000, // TODO should be set my fetched pipeline config
1537
- });
1538
- this.ws = null;
1539
- this.status = 'disconnected';
1540
- this.userAudioAmplitude = 0;
1541
- this.agentAudioAmplitude = 0;
1542
- this.sessionId = options.sessionId || null;
1543
- this.pushToTalkActive = false;
1544
- // Bind event handlers
1545
- this._handleWebSocketMessage = this._handleWebSocketMessage.bind(this);
1546
- this._handleDataAvailable = this._handleDataAvailable.bind(this);
1547
- }
1548
- /**
1549
- * Updates the connection status and triggers the callback
1550
- * @param {string} status - New status value
1551
- * @private
1552
- */
1553
- _setStatus(status) {
1554
- this.status = status;
1555
- this.options.onStatusChange(status);
1556
- }
1557
- /**
1558
- * Handles when agent audio finishes playing
1559
- * @private
1560
- */
1561
- _clientResponseAudioReplayFinished() {
1562
- console.log('clientResponseAudioReplayFinished');
1563
- this._wsSend({
1564
- type: 'trigger.response.audio.replay_finished',
1565
- reason: 'completed',
1566
- });
1567
- }
1568
- async _clientInterruptAssistantReplay() {
1569
- await this.wavPlayer.interrupt();
1570
- // TODO: Use in voice pipeline to know how much of the audio has been played and how much to truncate transcript
1571
- // this._wsSend({
1572
- // type: 'trigger.response.audio.replay_finished',
1573
- // reason: 'interrupted',
1574
- // delta_id: 'TODO'
1575
- // });
1576
- }
1577
- async triggerUserTurnStarted() {
1578
- if (!this.pushToTalkActive) {
1579
- this.pushToTalkActive = true;
1580
- this._wsSend({ type: 'trigger.turn.start', role: 'user' });
1581
- await this._clientInterruptAssistantReplay();
1582
- }
1583
- }
1584
- async triggerUserTurnFinished() {
1585
- if (this.pushToTalkActive) {
1586
- this.pushToTalkActive = false;
1587
- this._wsSend({ type: 'trigger.turn.end', role: 'user' });
1588
- }
1589
- }
1590
- /**
1591
- * Handles incoming WebSocket messages
1592
- * @param {MessageEvent} event - The WebSocket message event
1593
- * @private
1594
- */
1595
- async _handleWebSocketMessage(event) {
1596
- try {
1597
- const message = JSON.parse(event.data);
1598
- if (message.type !== 'response.audio') {
1599
- console.log('received ws msg:', message);
1600
- }
1601
- switch (message.type) {
1602
- case 'turn.start':
1603
- // Sent from the server to this client when a new user turn is detected
1604
- console.log('received turn.start from server');
1605
- console.log(message);
1606
- // if (message.role === 'user' && !this.pushToTalkActive) {
1607
- if (message.role === 'user') {
1608
- // Interrupt any playing assistant audio if this is a turn trigged by the server (and not push to talk, which will have already called interrupt)
1609
- console.log('interrupting assistant audio, as user turn has started and pushToTalkActive is false');
1610
- await this._clientInterruptAssistantReplay();
1611
- }
1612
- break;
1613
- case 'response.audio':
1614
- const audioBuffer = base64ToArrayBuffer(message.content);
1615
- this.wavPlayer.add16BitPCM(audioBuffer, message.turn_id);
1616
- break;
1617
- // case 'response.end':
1618
- // console.log('received response.end');
1619
- // break;
1620
- case 'response.data':
1621
- console.log('received response.data', message);
1622
- this.options.onDataMessage(message);
1623
- break;
1624
- default:
1625
- console.error('Unknown message type received:', message);
1626
- break;
1627
- }
1628
- }
1629
- catch (error) {
1630
- console.error('Error processing WebSocket message:', error);
1631
- this.options.onError(error instanceof Error ? error : new Error(String(error)));
1632
- }
1633
- }
1634
- /**
1635
- * Handles available client browser microphone audio data and sends it over the WebSocket
1636
- * @param {ArrayBuffer} data - The audio data buffer
1637
- * @private
1638
- */
1639
- _handleDataAvailable(data) {
1640
- try {
1641
- const base64 = arrayBufferToBase64(data.mono);
1642
- this._wsSend({
1643
- type: 'client.audio',
1644
- content: base64,
1645
- });
1646
- }
1647
- catch (error) {
1648
- console.error('Error processing audio:', error);
1649
- this.options.onError(error instanceof Error ? error : new Error(String(error)));
1650
- }
1651
- }
1652
- _wsSend(message) {
1653
- var _a;
1654
- if (message.type !== 'client.audio') {
1655
- console.log('sent ws msg:', message);
1656
- }
1657
- const messageString = JSON.stringify(message);
1658
- if (((_a = this.ws) === null || _a === void 0 ? void 0 : _a.readyState) === WebSocket.OPEN) {
1659
- this.ws.send(messageString);
1660
- }
1661
- }
1662
- /**
1663
- * Sets up amplitude monitoring for a given audio source.
1664
- * @param {WavRecorder | WavStreamPlayer} source - The audio source (recorder or player).
1665
- * @param {(amplitude: number) => void} callback - The callback function to invoke on amplitude change.
1666
- * @param {(amplitude: number) => void} updateInternalState - Function to update the internal amplitude state.
1667
- * @private
1668
- */
1669
- _setupAmplitudeMonitoring(source, callback, updateInternalState) {
1670
- // Set up amplitude monitoring only if a callback is provided
1671
- // Check against the default no-op function defined in the constructor options
1672
- if (callback !== (() => { })) {
1673
- let updateCounter = 0;
1674
- source.startAmplitudeMonitoring((amplitude) => {
1675
- // Only update and call callback at the specified sample rate
1676
- if (updateCounter >= this.AMPLITUDE_MONITORING_SAMPLE_RATE) {
1677
- updateInternalState(amplitude);
1678
- callback(amplitude);
1679
- updateCounter = 0; // Reset counter after sampling
1680
- }
1681
- updateCounter++;
1682
- });
1683
- }
1684
- }
1685
- /**
1686
- * Connects to the Layercode pipeline and starts the audio session
1687
- * @async
1688
- * @returns {Promise<void>}
1689
- */
1690
- async connect() {
1691
- try {
1692
- this._setStatus('connecting');
1693
- // Get session key from server
1694
- let authorizeSessionRequestBody = {
1695
- pipeline_id: this.options.pipelineId,
1696
- metadata: this.options.metadata,
1697
- };
1698
- // If we're reconnecting to a previous session, we need to include the session_id in the request. Otherwise we don't send session_id, and a new session will be created and the session_id will be returned in the response.
1699
- if (this.options.sessionId) {
1700
- authorizeSessionRequestBody.session_id = this.options.sessionId;
1701
- }
1702
- const authorizeSessionResponse = await fetch(this.options.authorizeSessionEndpoint, {
1703
- method: 'POST',
1704
- headers: {
1705
- 'Content-Type': 'application/json',
1706
- },
1707
- body: JSON.stringify(authorizeSessionRequestBody),
1708
- });
1709
- if (!authorizeSessionResponse.ok) {
1710
- throw new Error(`Failed to authorize session: ${authorizeSessionResponse.statusText}`);
1711
- }
1712
- const authorizeSessionResponseBody = await authorizeSessionResponse.json();
1713
- this.sessionId = authorizeSessionResponseBody.session_id; // Save the session_id for use in future reconnects
1714
- // Connect WebSocket
1715
- this.ws = new WebSocket(`${this._websocketUrl}?${new URLSearchParams({
1716
- client_session_key: authorizeSessionResponseBody.client_session_key,
1717
- })}`);
1718
- // Bind the websocket message callbacks
1719
- this.ws.onmessage = this._handleWebSocketMessage;
1720
- this.ws.onopen = () => {
1721
- console.log('WebSocket connection established');
1722
- this._setStatus('connected');
1723
- this.options.onConnect({ sessionId: this.sessionId });
1724
- };
1725
- this.ws.onclose = () => {
1726
- console.log('WebSocket connection closed');
1727
- this._setStatus('disconnected');
1728
- this.options.onDisconnect();
1729
- };
1730
- this.ws.onerror = (error) => {
1731
- console.error('WebSocket error:', error);
1732
- this._setStatus('error');
1733
- this.options.onError(new Error('WebSocket connection error'));
1734
- };
1735
- // Initialize microphone audio capture
1736
- await this.wavRecorder.begin();
1737
- await this.wavRecorder.record(this._handleDataAvailable);
1738
- // Set up microphone amplitude monitoring
1739
- this._setupAmplitudeMonitoring(this.wavRecorder, this.options.onUserAmplitudeChange, (amp) => (this.userAudioAmplitude = amp));
1740
- // Initialize audio player
1741
- await this.wavPlayer.connect();
1742
- // Set up audio player amplitude monitoring
1743
- this._setupAmplitudeMonitoring(this.wavPlayer, this.options.onAgentAmplitudeChange, (amp) => (this.agentAudioAmplitude = amp));
1744
- }
1745
- catch (error) {
1746
- console.error('Error connecting to Layercode pipeline:', error);
1747
- this._setStatus('error');
1748
- this.options.onError(error instanceof Error ? error : new Error(String(error)));
1749
- throw error;
1750
- }
1751
- }
1752
- async disconnect() {
1753
- var _a;
1754
- this.wavRecorder.quit();
1755
- this.wavPlayer.disconnect();
1756
- (_a = this.ws) === null || _a === void 0 ? void 0 : _a.close();
1757
- }
1758
- }
1759
-
1760
- export { LayercodeClient as default };
1761
- //# sourceMappingURL=layercode-js-sdk.esm.js.map