@pompeii-labs/audio 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.mjs ADDED
@@ -0,0 +1,522 @@
1
+ // src/decoders/wav.ts
2
+ function decodeWAV(bytes) {
3
+ const view = new DataView(bytes.buffer);
4
+ if (String.fromCharCode(...bytes.slice(0, 4)) !== "RIFF" || String.fromCharCode(...bytes.slice(8, 12)) !== "WAVE") {
5
+ console.log("Invalid WAV header detected");
6
+ return null;
7
+ }
8
+ let offset = 12;
9
+ let audioFormat = 0;
10
+ let channels = 0;
11
+ let sampleRate = 0;
12
+ let bitsPerSample = 0;
13
+ let dataOffset = 0;
14
+ let dataSize = 0;
15
+ while (offset < bytes.length - 8) {
16
+ const chunkId = String.fromCharCode(...bytes.slice(offset, offset + 4));
17
+ const chunkSize = view.getUint32(offset + 4, true);
18
+ if (chunkId === "fmt ") {
19
+ audioFormat = view.getUint16(offset + 8, true);
20
+ channels = view.getUint16(offset + 10, true);
21
+ sampleRate = view.getUint32(offset + 12, true);
22
+ bitsPerSample = view.getUint16(offset + 22, true);
23
+ } else if (chunkId === "data") {
24
+ dataOffset = offset + 8;
25
+ const availableSize = bytes.length - dataOffset;
26
+ dataSize = Math.min(chunkSize, availableSize);
27
+ break;
28
+ }
29
+ offset += 8 + chunkSize;
30
+ }
31
+ if (audioFormat !== 1) {
32
+ throw new Error(`Unsupported WAV format: ${audioFormat}`);
33
+ }
34
+ const bytesPerSample = bitsPerSample / 8;
35
+ const numSamples = Math.floor(dataSize / bytesPerSample);
36
+ const samples = new Float32Array(numSamples);
37
+ for (let i = 0; i < numSamples; i++) {
38
+ const byteOffset = dataOffset + i * bytesPerSample;
39
+ if (byteOffset + bytesPerSample > bytes.length) {
40
+ console.error("Buffer overflow detected:", {
41
+ byteOffset,
42
+ bytesPerSample,
43
+ bufferLength: bytes.length,
44
+ sampleIndex: i
45
+ });
46
+ throw new Error("Buffer overflow while reading samples");
47
+ }
48
+ let sample = 0;
49
+ if (bitsPerSample === 8) {
50
+ sample = (bytes[byteOffset] - 128) / 128;
51
+ } else if (bitsPerSample === 16) {
52
+ const rawValue = view.getInt16(byteOffset, true);
53
+ sample = rawValue / 32768;
54
+ } else if (bitsPerSample === 24) {
55
+ const byte1 = bytes[byteOffset];
56
+ const byte2 = bytes[byteOffset + 1];
57
+ const byte3 = bytes[byteOffset + 2];
58
+ const value = byte3 << 16 | byte2 << 8 | byte1;
59
+ sample = (value > 8388607 ? value - 16777216 : value) / 8388608;
60
+ } else if (bitsPerSample === 32) {
61
+ sample = view.getFloat32(byteOffset, true);
62
+ }
63
+ samples[i] = Math.max(-1, Math.min(1, sample));
64
+ }
65
+ const result = {
66
+ sampleRate,
67
+ channels,
68
+ bitsPerSample,
69
+ samples,
70
+ duration: numSamples / (sampleRate * channels)
71
+ // Calculate duration in seconds
72
+ };
73
+ return result;
74
+ }
75
+
76
+ // src/decoders/mulaw.ts
77
+ function mulawToPcm16(mulawData) {
78
+ const pcmData = new Int16Array(mulawData.length);
79
+ for (let i = 0; i < mulawData.length; i++) {
80
+ pcmData[i] = mulawToLinear(mulawData[i]);
81
+ }
82
+ return pcmData;
83
+ }
84
+ function mulawToLinear(mulawByte) {
85
+ const inverted = mulawByte ^ 255;
86
+ const sign = inverted & 128;
87
+ const segment = (inverted & 112) >> 4;
88
+ const step = inverted & 15;
89
+ let linear;
90
+ if (segment === 0) {
91
+ linear = (step << 1) + 1;
92
+ } else {
93
+ linear = (step << 1) + 1 + 32 << segment + 2;
94
+ }
95
+ linear -= 33;
96
+ return sign ? -linear : linear;
97
+ }
98
+
99
+ // src/encoders/mulaw.ts
100
+ var BIAS = 132;
101
+ var CLIP = 32635;
102
+ var encodeTable = [
103
+ 0,
104
+ 0,
105
+ 1,
106
+ 1,
107
+ 2,
108
+ 2,
109
+ 2,
110
+ 2,
111
+ 3,
112
+ 3,
113
+ 3,
114
+ 3,
115
+ 3,
116
+ 3,
117
+ 3,
118
+ 3,
119
+ 4,
120
+ 4,
121
+ 4,
122
+ 4,
123
+ 4,
124
+ 4,
125
+ 4,
126
+ 4,
127
+ 4,
128
+ 4,
129
+ 4,
130
+ 4,
131
+ 4,
132
+ 4,
133
+ 4,
134
+ 4,
135
+ 5,
136
+ 5,
137
+ 5,
138
+ 5,
139
+ 5,
140
+ 5,
141
+ 5,
142
+ 5,
143
+ 5,
144
+ 5,
145
+ 5,
146
+ 5,
147
+ 5,
148
+ 5,
149
+ 5,
150
+ 5,
151
+ 5,
152
+ 5,
153
+ 5,
154
+ 5,
155
+ 5,
156
+ 5,
157
+ 5,
158
+ 5,
159
+ 5,
160
+ 5,
161
+ 5,
162
+ 5,
163
+ 5,
164
+ 5,
165
+ 5,
166
+ 5,
167
+ 6,
168
+ 6,
169
+ 6,
170
+ 6,
171
+ 6,
172
+ 6,
173
+ 6,
174
+ 6,
175
+ 6,
176
+ 6,
177
+ 6,
178
+ 6,
179
+ 6,
180
+ 6,
181
+ 6,
182
+ 6,
183
+ 6,
184
+ 6,
185
+ 6,
186
+ 6,
187
+ 6,
188
+ 6,
189
+ 6,
190
+ 6,
191
+ 6,
192
+ 6,
193
+ 6,
194
+ 6,
195
+ 6,
196
+ 6,
197
+ 6,
198
+ 6,
199
+ 6,
200
+ 6,
201
+ 6,
202
+ 6,
203
+ 6,
204
+ 6,
205
+ 6,
206
+ 6,
207
+ 6,
208
+ 6,
209
+ 6,
210
+ 6,
211
+ 6,
212
+ 6,
213
+ 6,
214
+ 6,
215
+ 6,
216
+ 6,
217
+ 6,
218
+ 6,
219
+ 6,
220
+ 6,
221
+ 6,
222
+ 6,
223
+ 6,
224
+ 6,
225
+ 6,
226
+ 6,
227
+ 6,
228
+ 6,
229
+ 6,
230
+ 6,
231
+ 7,
232
+ 7,
233
+ 7,
234
+ 7,
235
+ 7,
236
+ 7,
237
+ 7,
238
+ 7,
239
+ 7,
240
+ 7,
241
+ 7,
242
+ 7,
243
+ 7,
244
+ 7,
245
+ 7,
246
+ 7,
247
+ 7,
248
+ 7,
249
+ 7,
250
+ 7,
251
+ 7,
252
+ 7,
253
+ 7,
254
+ 7,
255
+ 7,
256
+ 7,
257
+ 7,
258
+ 7,
259
+ 7,
260
+ 7,
261
+ 7,
262
+ 7,
263
+ 7,
264
+ 7,
265
+ 7,
266
+ 7,
267
+ 7,
268
+ 7,
269
+ 7,
270
+ 7,
271
+ 7,
272
+ 7,
273
+ 7,
274
+ 7,
275
+ 7,
276
+ 7,
277
+ 7,
278
+ 7,
279
+ 7,
280
+ 7,
281
+ 7,
282
+ 7,
283
+ 7,
284
+ 7,
285
+ 7,
286
+ 7,
287
+ 7,
288
+ 7,
289
+ 7,
290
+ 7,
291
+ 7,
292
+ 7,
293
+ 7,
294
+ 7,
295
+ 7,
296
+ 7,
297
+ 7,
298
+ 7,
299
+ 7,
300
+ 7,
301
+ 7,
302
+ 7,
303
+ 7,
304
+ 7,
305
+ 7,
306
+ 7,
307
+ 7,
308
+ 7,
309
+ 7,
310
+ 7,
311
+ 7,
312
+ 7,
313
+ 7,
314
+ 7,
315
+ 7,
316
+ 7,
317
+ 7,
318
+ 7,
319
+ 7,
320
+ 7,
321
+ 7,
322
+ 7,
323
+ 7,
324
+ 7,
325
+ 7,
326
+ 7,
327
+ 7,
328
+ 7,
329
+ 7,
330
+ 7,
331
+ 7,
332
+ 7,
333
+ 7,
334
+ 7,
335
+ 7,
336
+ 7,
337
+ 7,
338
+ 7,
339
+ 7,
340
+ 7,
341
+ 7,
342
+ 7,
343
+ 7,
344
+ 7,
345
+ 7,
346
+ 7,
347
+ 7,
348
+ 7,
349
+ 7,
350
+ 7,
351
+ 7,
352
+ 7,
353
+ 7,
354
+ 7,
355
+ 7,
356
+ 7,
357
+ 7,
358
+ 7
359
+ ];
360
+ function encodeSample(sample) {
361
+ const sign = sample >> 8 & 128;
362
+ if (sign !== 0) sample = -sample;
363
+ sample = sample + BIAS;
364
+ if (sample > CLIP) sample = CLIP;
365
+ const exponent = encodeTable[sample >> 7 & 255];
366
+ const mantissa = sample >> exponent + 3 & 15;
367
+ return ~(sign | exponent << 4 | mantissa);
368
+ }
369
+ function pcm16ToMulaw(pcmData) {
370
+ const mulawData = new Uint8Array(pcmData.length);
371
+ for (let i = 0; i < pcmData.length; i++) {
372
+ mulawData[i] = encodeSample(pcmData[i]);
373
+ }
374
+ return mulawData;
375
+ }
376
+
377
+ // src/helpers/bufferToInt16Array.ts
378
+ function bufferToInt16Array(buffer) {
379
+ return new Int16Array(buffer.buffer, buffer.byteOffset, buffer.byteLength / 2);
380
+ }
381
+
382
+ // src/helpers/int16ArrayToBuffer.ts
383
+ function int16ArrayToBuffer(int16Array) {
384
+ return Buffer.from(int16Array.buffer, int16Array.byteOffset, int16Array.byteLength);
385
+ }
386
+
387
+ // src/helpers/convertAudioFormat.ts
388
+ function encodePcm(audio, encoding) {
389
+ switch (encoding) {
390
+ case "mulaw":
391
+ return Buffer.from(pcm16ToMulaw(audio));
392
+ case "pcm":
393
+ return int16ArrayToBuffer(audio);
394
+ default:
395
+ throw new Error(`Could not encode audio: Unsupported encoding: ${encoding}`);
396
+ }
397
+ }
398
+ function decodeToPcm(audio, encoding) {
399
+ switch (encoding) {
400
+ case "mulaw":
401
+ return mulawToPcm16(audio);
402
+ case "pcm":
403
+ return bufferToInt16Array(audio);
404
+ default:
405
+ throw new Error(`Could not decode audio: Unsupported encoding: ${encoding}`);
406
+ }
407
+ }
408
+
409
+ // src/helpers/detectFormat.ts
410
+ function identifyAudioFormat(bytes) {
411
+ const checkBytes = (offset, expected) => {
412
+ if (offset + expected.length > bytes.length) return false;
413
+ return expected.every((byte, i) => bytes[offset + i] === byte);
414
+ };
415
+ if (checkBytes(0, [82, 73, 70, 70]) && // "RIFF"
416
+ checkBytes(8, [87, 65, 86, 69])) {
417
+ return {
418
+ format: "WAV",
419
+ mimeType: "audio/wav",
420
+ description: "Waveform Audio File Format"
421
+ };
422
+ }
423
+ if (checkBytes(0, [73, 68, 51]) || // ID3 tag
424
+ checkBytes(0, [255, 251]) || // MP3 frame sync
425
+ checkBytes(0, [255, 243]) || // MP3 frame sync
426
+ checkBytes(0, [255, 242])) {
427
+ return {
428
+ format: "MP3",
429
+ mimeType: "audio/mpeg",
430
+ description: "MPEG Audio Layer III"
431
+ };
432
+ }
433
+ if (checkBytes(0, [102, 76, 97, 67])) {
434
+ return {
435
+ format: "FLAC",
436
+ mimeType: "audio/flac",
437
+ description: "Free Lossless Audio Codec"
438
+ };
439
+ }
440
+ if (checkBytes(0, [79, 103, 103, 83])) {
441
+ return {
442
+ format: "OGG",
443
+ mimeType: "audio/ogg",
444
+ description: "Ogg Vorbis"
445
+ };
446
+ }
447
+ if (checkBytes(4, [102, 116, 121, 112]) && // "ftyp"
448
+ (checkBytes(8, [77, 52, 65, 32]) || // "M4A "
449
+ checkBytes(8, [105, 115, 111, 109]))) {
450
+ return {
451
+ format: "M4A",
452
+ mimeType: "audio/mp4",
453
+ description: "MPEG-4 Audio"
454
+ };
455
+ }
456
+ if (checkBytes(0, [70, 79, 82, 77]) && // "FORM"
457
+ checkBytes(8, [65, 73, 70, 70])) {
458
+ return {
459
+ format: "AIFF",
460
+ mimeType: "audio/aiff",
461
+ description: "Audio Interchange File Format"
462
+ };
463
+ }
464
+ if (checkBytes(
465
+ 0,
466
+ [
467
+ 48,
468
+ 38,
469
+ 178,
470
+ 117,
471
+ 142,
472
+ 102,
473
+ 207,
474
+ 17,
475
+ 166,
476
+ 217,
477
+ 0,
478
+ 170,
479
+ 0,
480
+ 98,
481
+ 206,
482
+ 108
483
+ ]
484
+ )) {
485
+ return {
486
+ format: "WMA",
487
+ mimeType: "audio/x-ms-wma",
488
+ description: "Windows Media Audio"
489
+ };
490
+ }
491
+ return null;
492
+ }
493
+
494
+ // src/helpers/generateFadeOutSamples.ts
495
+ function generateFadeOutSamples(lastSampleValue, fadeDurationMs, sampleRate) {
496
+ const fadeNumSamples = Math.ceil(fadeDurationMs / 1e3 * sampleRate);
497
+ const fadeSamples = new Int16Array(fadeNumSamples);
498
+ for (let i = 0; i < fadeNumSamples; i++) {
499
+ const progress = 1 - i / (fadeNumSamples - 1);
500
+ fadeSamples[i] = Math.round(lastSampleValue * progress);
501
+ }
502
+ return new Uint8Array(fadeSamples.buffer);
503
+ }
504
+
505
+ // src/helpers/resamplePcm.ts
506
+ function resamplePcm(pcm, originalSampleRate, targetSampleRate) {
507
+ const ratio = originalSampleRate / targetSampleRate;
508
+ const newLength = Math.floor(pcm.length / ratio);
509
+ const newSamples = new Int16Array(newLength);
510
+ for (let i = 0; i < newSamples.length; i++) {
511
+ const exactPos = i * ratio;
512
+ const lowerIndex = Math.floor(exactPos);
513
+ const upperIndex = Math.min(lowerIndex + 1, pcm.length - 1);
514
+ const fraction = exactPos - lowerIndex;
515
+ const lowerSample = pcm[lowerIndex];
516
+ const upperSample = pcm[upperIndex];
517
+ newSamples[i] = Math.round(lowerSample + (upperSample - lowerSample) * fraction);
518
+ }
519
+ return newSamples;
520
+ }
521
+
522
+ export { bufferToInt16Array, decodeToPcm, decodeWAV, encodePcm, generateFadeOutSamples, identifyAudioFormat, int16ArrayToBuffer, mulawToPcm16, pcm16ToMulaw, resamplePcm };
@@ -0,0 +1,146 @@
1
+ import { A as AudioFormat } from './index-o4B-ThOL.mjs';
2
+ import { DeepgramClient, LiveSchema } from '@deepgram/sdk';
3
+ import { ElevenLabsClient } from '@elevenlabs/elevenlabs-js';
4
+ import { StreamTextToSpeechRequest } from '@elevenlabs/elevenlabs-js/api/index.js';
5
+ import { HumeClient } from 'hume';
6
+ import OpenAI from 'openai';
7
+
8
+ declare abstract class MagmaFlowSpeechToText {
9
+ abstract input(audio: Buffer): void;
10
+ abstract flush(): void;
11
+ abstract kill(): void;
12
+ onSpeechDetected(): void;
13
+ onOutput(text: string): void;
14
+ constructor();
15
+ }
16
+
17
+ declare abstract class MagmaFlowTextToSpeech {
18
+ abstract input(text: string): void;
19
+ abstract kill(): void;
20
+ onOutput(audio: Buffer | null): void;
21
+ constructor();
22
+ }
23
+
24
+ type MagmaFlowArgs = {
25
+ stt: MagmaFlowSpeechToText;
26
+ tts: MagmaFlowTextToSpeech;
27
+ inputFormat: AudioFormat;
28
+ outputFormat: AudioFormat;
29
+ onSpeechDetected: () => void;
30
+ onTextOutput: (text: string) => void;
31
+ onAudioOutput: (audio: Buffer) => void;
32
+ };
33
+ declare class MagmaFlow {
34
+ private stt;
35
+ private tts;
36
+ private inputFormat;
37
+ private outputFormat;
38
+ private onAudioOutput;
39
+ private textBuffer;
40
+ private textQueue;
41
+ private generatingAudio;
42
+ private audioBuffer;
43
+ constructor(args: MagmaFlowArgs);
44
+ inputAudio(audio: Buffer): void;
45
+ inputText(text: string): void;
46
+ private generateAudio;
47
+ private sendAudio;
48
+ kill(): void;
49
+ }
50
+
51
+ declare enum DeepgramModel {
52
+ NOVA_3 = "nova-3"
53
+ }
54
+ declare enum DeepgramLanguage {
55
+ EN_US = "en-US"
56
+ }
57
+ type DeepgramConfig = Omit<LiveSchema, 'model' | 'vad_events' | 'interim_results' | 'encoding' | 'sample_rate' | 'channels' | 'endpointing' | 'utterance_end_ms'>;
58
+ type DeepgramSTTArgs = {
59
+ client?: DeepgramClient;
60
+ model: DeepgramModel;
61
+ config?: DeepgramConfig;
62
+ };
63
+ declare class DeepgramSTT extends MagmaFlowSpeechToText {
64
+ private client;
65
+ private connection;
66
+ private config;
67
+ private textBuffer;
68
+ constructor(args: DeepgramSTTArgs);
69
+ private setup;
70
+ input(audio: Buffer): void;
71
+ flush(): void;
72
+ kill(): void;
73
+ private handleTranscriptionEvent;
74
+ private onOpen;
75
+ private keepAlive;
76
+ }
77
+
78
+ type DeepgramTTSArgs = {
79
+ client?: DeepgramClient;
80
+ };
81
+ declare class DeepgramTTS extends MagmaFlowTextToSpeech {
82
+ private client;
83
+ constructor(args: DeepgramTTSArgs);
84
+ setup(): Promise<void>;
85
+ input(text: string | null): void;
86
+ private output;
87
+ kill(): void;
88
+ reset(): void;
89
+ }
90
+
91
+ declare enum ElevenVoice {
92
+ chris = "iP95p4xoKVk53GoZ742B",
93
+ josh = "TxGEqnHWrfWFTfGW9XjX",
94
+ rachel = "21m00Tcm4TlvDq8ikWAM",
95
+ laura = "FGY2WhTYpPnrIDTdsKH5",
96
+ felicity = "aTbnroHRGIomiKpqAQR8"
97
+ }
98
+ type ElevenLabsConfig = Omit<StreamTextToSpeechRequest, 'outputFormat' | 'text' | 'modelId'>;
99
+ type ElevenLabsTTSArgs = {
100
+ client?: ElevenLabsClient;
101
+ model: string;
102
+ voice: ElevenVoice;
103
+ config?: ElevenLabsConfig;
104
+ };
105
+ declare class ElevenLabsTTS extends MagmaFlowTextToSpeech {
106
+ private client;
107
+ private model;
108
+ private voice;
109
+ private config;
110
+ constructor(args: ElevenLabsTTSArgs);
111
+ setup(): Promise<void>;
112
+ input(text: string | null): void;
113
+ private output;
114
+ kill(): void;
115
+ reset(): void;
116
+ }
117
+
118
+ type HumeTTSArgs = {
119
+ client?: HumeClient;
120
+ };
121
+ declare class HumeTTS extends MagmaFlowTextToSpeech {
122
+ private client;
123
+ constructor(args: HumeTTSArgs);
124
+ setup(): Promise<void>;
125
+ input(text: string | null): void;
126
+ private output;
127
+ kill(): void;
128
+ reset(): void;
129
+ }
130
+
131
+ type WhisperTTSArgs = {
132
+ client?: OpenAI;
133
+ };
134
+ declare class WhisperTTS extends MagmaFlowTextToSpeech {
135
+ private client;
136
+ constructor(args: WhisperTTSArgs);
137
+ setup(): Promise<void>;
138
+ input(text: string | null): void;
139
+ private output;
140
+ kill(): void;
141
+ reset(): void;
142
+ }
143
+
144
+ declare function splitTextIntoChunks(text: string, targetLength?: number): string[];
145
+
146
+ export { DeepgramLanguage, DeepgramModel, DeepgramSTT, type DeepgramSTTArgs, DeepgramTTS, type DeepgramTTSArgs, ElevenLabsTTS, type ElevenLabsTTSArgs, ElevenVoice, HumeTTS, type HumeTTSArgs, MagmaFlow, type MagmaFlowArgs, MagmaFlowSpeechToText, MagmaFlowTextToSpeech, WhisperTTS, type WhisperTTSArgs, splitTextIntoChunks };