sera-ai 1.0.25 → 1.0.26

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -46,13 +46,10 @@ var InvalidSampleRateError = class extends Error {
46
46
  };
47
47
 
48
48
  // src/hooks/useFFmpegConverter.ts
49
- var createFFmpegWorker = () => {
49
+ var createWavConversionWorker = () => {
50
50
  const minSampleRate = MIN_VALID_SAMPLE_RATE;
51
51
  const maxSampleRate = MAX_VALID_SAMPLE_RATE;
52
52
  const workerCode = `
53
- let ffmpegModule = null;
54
-
55
- // Injected constants from audio.ts
56
53
  const MIN_SAMPLE_RATE = ${minSampleRate};
57
54
  const MAX_SAMPLE_RATE = ${maxSampleRate};
58
55
 
@@ -62,254 +59,80 @@ var createFFmpegWorker = () => {
62
59
  }
63
60
  };
64
61
 
65
- const helperFunctions = {
66
- float32ToWavFile: function(left, sampleRate) {
67
- validateSampleRate(sampleRate);
68
- const length = left.length;
69
- const buffer = new ArrayBuffer(44 + length * 2);
70
- const view = new DataView(buffer);
71
-
72
- const writeString = (offset, string) => {
73
- for (let i = 0; i < string.length; i++) {
74
- view.setUint8(offset + i, string.charCodeAt(i));
75
- }
76
- };
77
-
78
- const floatTo16BitPCM = (output, offset, input) => {
79
- for (let i = 0; i < input.length; i++, offset += 2) {
80
- const s = Math.max(-1, Math.min(1, input[i]));
81
- output.setInt16(offset, s < 0 ? s * 0x8000 : s * 0x7FFF, true);
82
- }
83
- };
84
-
85
- writeString(0, 'RIFF');
86
- view.setUint32(4, 36 + length * 2, true);
87
- writeString(8, 'WAVE');
88
- writeString(12, 'fmt ');
89
- view.setUint32(16, 16, true);
90
- view.setUint16(20, 1, true);
91
- view.setUint16(22, 1, true);
92
- view.setUint32(24, sampleRate, true);
93
- view.setUint32(28, sampleRate * 2, true);
94
- view.setUint16(32, 2, true);
95
- view.setUint16(34, 16, true);
96
- writeString(36, 'data');
97
- view.setUint32(40, length * 2, true);
98
-
99
- floatTo16BitPCM(view, 44, left);
100
-
101
- return buffer;
102
- },
103
-
104
- processAudioData: function(audioBuffer, options = {}) {
105
- try {
106
- const { quality = 1, bitRate = 128000, sampleRate } = options;
107
- validateSampleRate(sampleRate);
108
- const float32Array = new Float32Array(audioBuffer);
109
- const wavBuffer = this.float32ToWavFile(float32Array, sampleRate);
62
+ const float32ToWavFile = (left, sampleRate) => {
63
+ validateSampleRate(sampleRate);
64
+ const length = left.length;
65
+ const buffer = new ArrayBuffer(44 + length * 2);
66
+ const view = new DataView(buffer);
110
67
 
111
- return {
112
- buffer: wavBuffer,
113
- size: wavBuffer.byteLength,
114
- duration: float32Array.length / sampleRate
115
- };
116
- } catch (error) {
117
- throw new Error('Failed to process audio data: ' + error.message);
68
+ const writeString = (offset, string) => {
69
+ for (let i = 0; i < string.length; i++) {
70
+ view.setUint8(offset + i, string.charCodeAt(i));
118
71
  }
119
- },
120
-
121
- removeSilenceFromAudio: function(audioBuffer, options = {}) {
122
- try {
123
- const {
124
- silenceThreshold = 0.005, // Low threshold to only detect true silence
125
- minSilenceDuration = 1.5, // Only remove silences longer than 1.5 seconds
126
- sampleRate
127
- } = options;
128
- validateSampleRate(sampleRate);
129
-
130
- const float32Array = new Float32Array(audioBuffer);
131
- const minSilenceSamples = Math.floor(minSilenceDuration * sampleRate);
132
- const result = [];
133
-
134
- let silenceStart = -1;
135
- let silenceLength = 0;
136
- let totalAudioSamples = 0;
137
- let totalSilentSamples = 0;
138
-
139
- // First pass: analyze audio content
140
- for (let i = 0; i < float32Array.length; i++) {
141
- const sample = Math.abs(float32Array[i]);
142
- if (sample > silenceThreshold) {
143
- totalAudioSamples++;
144
- } else {
145
- totalSilentSamples++;
146
- }
147
- }
148
-
149
- const audioPercentage = totalAudioSamples / float32Array.length;
150
- console.log(\`[AUDIO] Audio content: \${(audioPercentage * 100).toFixed(2)}%\`);
151
-
152
- // If less than 5% is audio, skip silence removal to prevent over-processing
153
- if (audioPercentage < 0.05) {
154
- console.warn('[WARN] Audio content too low, skipping silence removal to preserve speech');
155
- const wavBuffer = this.float32ToWavFile(float32Array, sampleRate);
156
- return {
157
- buffer: wavBuffer,
158
- size: wavBuffer.byteLength,
159
- duration: float32Array.length / sampleRate,
160
- originalDuration: float32Array.length / sampleRate,
161
- reductionPercentage: 0
162
- };
163
- }
164
-
165
- // Second pass: remove long silences
166
- for (let i = 0; i < float32Array.length; i++) {
167
- const sample = Math.abs(float32Array[i]);
168
-
169
- if (sample < silenceThreshold) {
170
- if (silenceStart === -1) {
171
- silenceStart = i;
172
- }
173
- silenceLength++;
174
- } else {
175
- // End of silence detected
176
- if (silenceStart !== -1) {
177
- if (silenceLength < minSilenceSamples) {
178
- // Keep short silences (natural pauses)
179
- for (let j = silenceStart; j < i; j++) {
180
- result.push(float32Array[j]);
181
- }
182
- } else {
183
- // Replace long silences with a brief pause (0.15 seconds)
184
- const shortSilenceSamples = Math.floor(0.15 * sampleRate);
185
- for (let j = 0; j < shortSilenceSamples; j++) {
186
- result.push(0);
187
- }
188
- }
189
- silenceStart = -1;
190
- silenceLength = 0;
191
- }
192
-
193
- // Add non-silent sample
194
- result.push(float32Array[i]);
195
- }
196
- }
197
-
198
- // Handle trailing silence
199
- if (silenceStart !== -1 && silenceLength >= minSilenceSamples) {
200
- const shortSilenceSamples = Math.floor(0.15 * sampleRate);
201
- for (let j = 0; j < shortSilenceSamples; j++) {
202
- result.push(0);
203
- }
204
- } else if (silenceStart !== -1) {
205
- for (let j = silenceStart; j < float32Array.length; j++) {
206
- result.push(float32Array[j]);
207
- }
208
- }
209
-
210
- const processedArray = new Float32Array(result);
211
-
212
- // Safety check: ensure we haven't removed too much content
213
- const reductionPercentage = Math.round((1 - processedArray.length / float32Array.length) * 100);
214
- if (reductionPercentage > 80) {
215
- console.warn('[WARN] Excessive silence removal detected, skipping to preserve speech naturalness');
216
- const wavBuffer = this.float32ToWavFile(float32Array, sampleRate);
217
- return {
218
- buffer: wavBuffer,
219
- size: wavBuffer.byteLength,
220
- duration: float32Array.length / sampleRate,
221
- originalDuration: float32Array.length / sampleRate,
222
- reductionPercentage: 0
223
- };
224
- }
225
-
226
- const wavBuffer = this.float32ToWavFile(processedArray, sampleRate);
227
-
228
- console.log(\`[SUCCESS] Silence removal complete. Original Duration: \${(float32Array.length / sampleRate).toFixed(2)}s, Processed Duration: \${(processedArray.length / sampleRate).toFixed(2)}s, Reduction: \${reductionPercentage}%\`);
229
-
230
- return {
231
- buffer: wavBuffer,
232
- size: wavBuffer.byteLength,
233
- duration: processedArray.length / sampleRate,
234
- originalDuration: float32Array.length / sampleRate,
235
- reductionPercentage: reductionPercentage
236
- };
237
- } catch (error) {
238
- throw new Error('Failed to remove silence: ' + error.message);
72
+ };
73
+
74
+ const floatTo16BitPCM = (output, offset, input) => {
75
+ for (let i = 0; i < input.length; i++, offset += 2) {
76
+ const s = Math.max(-1, Math.min(1, input[i]));
77
+ output.setInt16(offset, s < 0 ? s * 0x8000 : s * 0x7FFF, true);
239
78
  }
240
- }
79
+ };
80
+
81
+ writeString(0, 'RIFF');
82
+ view.setUint32(4, 36 + length * 2, true);
83
+ writeString(8, 'WAVE');
84
+ writeString(12, 'fmt ');
85
+ view.setUint32(16, 16, true);
86
+ view.setUint16(20, 1, true);
87
+ view.setUint16(22, 1, true);
88
+ view.setUint32(24, sampleRate, true);
89
+ view.setUint32(28, sampleRate * 2, true);
90
+ view.setUint16(32, 2, true);
91
+ view.setUint16(34, 16, true);
92
+ writeString(36, 'data');
93
+ view.setUint32(40, length * 2, true);
94
+
95
+ floatTo16BitPCM(view, 44, left);
96
+
97
+ return buffer;
241
98
  };
242
99
 
243
100
  self.onmessage = function(e) {
244
101
  const { type, audioBuffer, options } = e.data;
245
-
102
+
246
103
  if (type === 'convertWav') {
247
104
  try {
248
105
  self.postMessage({ type: 'progress', data: { progress: 10, message: 'Starting conversion...' } });
249
-
250
- const result = helperFunctions.processAudioData(audioBuffer, options);
251
-
106
+
107
+ const { sampleRate } = options;
108
+ validateSampleRate(sampleRate);
109
+ const float32Array = new Float32Array(audioBuffer);
110
+ const wavBuffer = float32ToWavFile(float32Array, sampleRate);
111
+
252
112
  self.postMessage({ type: 'progress', data: { progress: 50, message: 'Processing audio...' } });
253
-
113
+
254
114
  setTimeout(() => {
255
115
  self.postMessage({ type: 'progress', data: { progress: 90, message: 'Finalizing...' } });
256
-
257
- setTimeout(() => {
258
- self.postMessage({
259
- type: 'complete',
260
- data: {
261
- buffer: result.buffer,
262
- size: result.size,
263
- duration: result.duration
264
- }
265
- });
266
- }, 100);
267
- }, 100);
268
-
269
- } catch (error) {
270
- self.postMessage({
271
- type: 'error',
272
- error: error.message || 'Unknown conversion error'
273
- });
274
- }
275
- } else if (type === 'removeSilence') {
276
- try {
277
- self.postMessage({ type: 'progress', progress: 10, message: 'Analyzing audio...' });
278
-
279
- const result = helperFunctions.removeSilenceFromAudio(audioBuffer, options);
280
-
281
- self.postMessage({ type: 'progress', progress: 70, message: 'Removing silence...' });
282
-
283
- setTimeout(() => {
284
- self.postMessage({ type: 'progress', progress: 90, message: 'Finalizing...' });
285
-
116
+
286
117
  setTimeout(() => {
287
- self.postMessage({
288
- type: 'complete',
289
- result: {
290
- data: result.buffer,
291
- name: options.fileName || 'processed_audio.wav',
292
- type: options.fileType || 'audio/wav',
293
- stats: {
294
- originalDuration: result.originalDuration,
295
- processedDuration: result.duration,
296
- reductionPercentage: result.reductionPercentage,
297
- originalSize: options.originalSize || 0,
298
- processedSize: result.size
299
- }
118
+ self.postMessage({
119
+ type: 'complete',
120
+ data: {
121
+ buffer: wavBuffer,
122
+ size: wavBuffer.byteLength,
123
+ duration: float32Array.length / sampleRate
300
124
  }
301
125
  });
302
126
  }, 100);
303
127
  }, 100);
304
-
128
+
305
129
  } catch (error) {
306
- self.postMessage({
307
- type: 'error',
308
- error: error.message || 'Unknown silence removal error'
130
+ self.postMessage({
131
+ type: 'error',
132
+ error: error.message || 'Unknown conversion error'
309
133
  });
310
134
  }
311
135
  } else if (type === 'init') {
312
- // For compatibility with existing code
313
136
  self.postMessage({ type: 'ready' });
314
137
  }
315
138
  };
@@ -376,7 +199,7 @@ var useFFmpegConverter = () => {
376
199
  setError(null);
377
200
  setStatusMessage("Converting audio...");
378
201
  try {
379
- const workerUrl = createFFmpegWorker();
202
+ const workerUrl = createWavConversionWorker();
380
203
  const worker = new Worker(workerUrl);
381
204
  return new Promise((resolve, reject) => {
382
205
  worker.onmessage = (e) => {
@@ -495,164 +318,79 @@ var useFFmpegConverter = () => {
495
318
  }
496
319
  const maxFileSize = 50 * 1024 * 1024;
497
320
  if (file.size > maxFileSize) {
498
- console.warn(`File too large (${file.size} bytes), skipping silence removal`);
321
+ console.warn(`[SILENCE] File too large (${file.size} bytes), skipping silence removal`);
499
322
  return file;
500
323
  }
501
324
  try {
502
325
  setIsConverting(true);
503
326
  setError(null);
504
327
  setProgress(0);
505
- setStatusMessage("Starting audio processing...");
506
- const arrayBuffer = await file.arrayBuffer();
507
- console.log(`[INFO] Processing WAV file: ${file.size} bytes, name: ${file.name}`);
508
- const dataView = new DataView(arrayBuffer);
509
- const riffSignature = String.fromCharCode(
510
- dataView.getUint8(0),
511
- dataView.getUint8(1),
512
- dataView.getUint8(2),
513
- dataView.getUint8(3)
514
- );
515
- if (riffSignature !== "RIFF") {
516
- console.error("Invalid WAV file: Missing RIFF header");
517
- return file;
518
- }
519
- const waveSignature = String.fromCharCode(
520
- dataView.getUint8(8),
521
- dataView.getUint8(9),
522
- dataView.getUint8(10),
523
- dataView.getUint8(11)
524
- );
525
- if (waveSignature !== "WAVE") {
526
- console.error("Invalid WAV file: Missing WAVE signature");
527
- return file;
528
- }
529
- let dataChunkOffset = 12;
530
- let audioDataStart = -1;
531
- let audioDataLength = 0;
532
- while (dataChunkOffset < arrayBuffer.byteLength - 8) {
533
- const chunkId = String.fromCharCode(
534
- dataView.getUint8(dataChunkOffset),
535
- dataView.getUint8(dataChunkOffset + 1),
536
- dataView.getUint8(dataChunkOffset + 2),
537
- dataView.getUint8(dataChunkOffset + 3)
538
- );
539
- const chunkSize = dataView.getUint32(dataChunkOffset + 4, true);
540
- if (chunkId === "data") {
541
- audioDataStart = dataChunkOffset + 8;
542
- audioDataLength = chunkSize;
543
- break;
544
- }
545
- dataChunkOffset += 8 + chunkSize;
546
- }
547
- if (audioDataStart === -1) {
548
- console.error("No audio data chunk found in WAV file");
549
- return file;
550
- }
551
- const originalSampleRate = dataView.getUint32(24, true);
552
- console.log(`[AUDIO] Found audio data: start=${audioDataStart}, length=${audioDataLength} bytes, sampleRate=${originalSampleRate}Hz`);
553
- const audioData = new Int16Array(arrayBuffer, audioDataStart, audioDataLength / 2);
554
- if (audioData.length === 0) {
555
- console.error("No audio data extracted from WAV file");
556
- return file;
557
- }
558
- const float32Data = new Float32Array(audioData.length);
559
- let nonZeroCount = 0;
560
- for (let i = 0; i < audioData.length; i++) {
561
- float32Data[i] = audioData[i] / 32768;
562
- if (Math.abs(float32Data[i]) > 1e-3) nonZeroCount++;
563
- }
564
- const audioPercentage = nonZeroCount / float32Data.length;
565
- console.log(`[INFO] Audio validation: ${audioData.length} samples, ${nonZeroCount} non-zero samples (${(audioPercentage * 100).toFixed(2)}%)`);
566
- if (audioPercentage < 0.01) {
567
- console.warn(`[WARN] Very little audio content (${(audioPercentage * 100).toFixed(2)}%), skipping silence removal`);
568
- return file;
569
- }
570
- const workerUrl = createFFmpegWorker();
571
- const worker = new Worker(workerUrl);
572
- return new Promise((resolve, reject) => {
573
- const messageHandler = (e) => {
574
- const { type, progress: workerProgress, message, result, error: error2 } = e.data;
575
- switch (type) {
576
- case "progress":
577
- setProgress(workerProgress);
578
- setStatusMessage(message);
579
- break;
580
- case "complete":
581
- worker.removeEventListener("message", messageHandler);
582
- worker.terminate();
583
- URL.revokeObjectURL(workerUrl);
584
- try {
585
- const processedFile = new File([new Uint8Array(result.data)], result.name, {
586
- type: result.type
587
- });
588
- console.log("[INFO] Silence removal + audio compression results:", result.stats);
589
- setProgress(100);
590
- setStatusMessage("Processing complete!");
591
- setTimeout(() => {
592
- setIsConverting(false);
593
- setProgress(0);
594
- setStatusMessage("");
595
- }, 1e3);
596
- resolve(processedFile);
597
- } catch (fileError) {
598
- console.error("Error creating processed file:", fileError);
599
- setIsConverting(false);
600
- setProgress(0);
601
- setStatusMessage("");
602
- resolve(file);
603
- }
604
- break;
605
- case "error":
606
- worker.removeEventListener("message", messageHandler);
607
- worker.terminate();
608
- URL.revokeObjectURL(workerUrl);
609
- console.error("Worker processing error:", error2);
610
- setError(`Processing failed: ${error2}`);
611
- setIsConverting(false);
612
- setProgress(0);
613
- setStatusMessage("");
614
- resolve(file);
615
- break;
616
- }
617
- };
618
- worker.addEventListener("message", messageHandler);
619
- worker.onerror = (workerError) => {
620
- worker.removeEventListener("message", messageHandler);
621
- worker.terminate();
622
- URL.revokeObjectURL(workerUrl);
623
- console.error("Worker error during processing:", workerError);
624
- setError("Worker processing failed");
328
+ setStatusMessage("Removing silence...");
329
+ console.log(`[SILENCE] Processing file: ${file.size} bytes, name: ${file.name}`);
330
+ if (!ffmpegInstance) {
331
+ setStatusMessage("Loading FFmpeg for silence removal...");
332
+ const loaded = await loadFFmpeg();
333
+ if (!loaded || !ffmpegInstance) {
334
+ console.error("[SILENCE] Failed to load FFmpeg");
625
335
  setIsConverting(false);
626
336
  setProgress(0);
627
337
  setStatusMessage("");
628
- resolve(file);
629
- };
630
- worker.postMessage({
631
- type: "removeSilence",
632
- audioBuffer: float32Data.buffer,
633
- options: {
634
- silenceThreshold: 5e-3,
635
- // Lower threshold to only detect true silence
636
- minSilenceDuration: 1.5,
637
- // Only remove silences longer than 1.5 seconds
638
- sampleRate: originalSampleRate,
639
- // Preserve original sample rate to avoid speed changes
640
- fileName: file.name,
641
- fileType: file.type,
642
- originalSize: file.size
643
- }
644
- });
645
- console.log(`\u{1F680} Sent ${float32Data.length} samples to silence removal worker`);
646
- });
338
+ return file;
339
+ }
340
+ }
341
+ setProgress(10);
342
+ setStatusMessage("Writing audio to FFmpeg...");
343
+ const inputFileName = "input-silence.wav";
344
+ const outputFileName = "output-nosilence.wav";
345
+ const wavData = await ffmpeg.fetchFile(file);
346
+ ffmpegInstance.FS("writeFile", inputFileName, wavData);
347
+ const originalSize = file.size;
348
+ console.log(`[SILENCE] Input file written: ${originalSize} bytes`);
349
+ setProgress(30);
350
+ setStatusMessage("Analyzing and removing silence...");
351
+ await ffmpegInstance.run(
352
+ "-i",
353
+ inputFileName,
354
+ "-af",
355
+ "silenceremove=stop_periods=-1:stop_threshold=-35dB:stop_duration=0.5:detection=peak,apad=pad_dur=0.5",
356
+ "-acodec",
357
+ "pcm_s16le",
358
+ outputFileName
359
+ );
360
+ setProgress(80);
361
+ setStatusMessage("Reading processed audio...");
362
+ const outputData = ffmpegInstance.FS("readFile", outputFileName);
363
+ try {
364
+ ffmpegInstance.FS("unlink", inputFileName);
365
+ ffmpegInstance.FS("unlink", outputFileName);
366
+ } catch (cleanupErr) {
367
+ console.warn("[SILENCE] Cleanup warning:", cleanupErr);
368
+ }
369
+ const processedSize = outputData.length;
370
+ const reductionPercent = Math.round((1 - processedSize / originalSize) * 100);
371
+ console.log(`[SILENCE] Complete: ${originalSize} bytes -> ${processedSize} bytes (${reductionPercent}% reduction)`);
372
+ const processedFile = new File(
373
+ [new Uint8Array(outputData)],
374
+ file.name,
375
+ { type: "audio/wav" }
376
+ );
377
+ setProgress(100);
378
+ setStatusMessage("Silence removal complete");
379
+ setTimeout(() => {
380
+ setIsConverting(false);
381
+ setProgress(0);
382
+ setStatusMessage("");
383
+ }, 500);
384
+ return processedFile;
647
385
  } catch (err) {
648
- console.error("Worker removeSilence failed:", err);
649
- setError("Audio processing failed");
386
+ console.error("[SILENCE] FFmpeg silence removal failed:", err);
387
+ setError("Silence removal failed");
650
388
  setIsConverting(false);
651
389
  setProgress(0);
652
390
  setStatusMessage("");
653
391
  return file;
654
392
  }
655
- }, []);
393
+ }, [loadFFmpeg]);
656
394
  const reset = React3.useCallback(() => {
657
395
  setIsConverting(false);
658
396
  setProgress(0);