n8n-nodes-tts-bigboss 1.0.7 → 1.0.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -73,6 +73,16 @@ const PIPER_MODELS = [
73
73
  { name: 'German - Thorsten (Male) - Low', value: 'de_DE-thorsten-low' },
74
74
  ];
75
75
  const EDGE_URL = 'wss://speech.platform.bing.com/consumer/speech/synthesize/readaloud/edge/v1?TrustedClientToken=6A5AA1D4EAFF4E9FB37E23D68491D6F4';
76
+ const EDGE_HEADERS = {
77
+ 'Authority': 'speech.platform.bing.com',
78
+ 'Sec-CH-UA': '"Not_A Brand";v="8", "Chromium";v="120", "Microsoft Edge";v="120"',
79
+ 'Sec-CH-UA-Mobile': '?0',
80
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36 Edg/120.0.0.0',
81
+ 'Sec-CH-UA-Platform': '"Windows"',
82
+ 'Accept-Encoding': 'gzip, deflate, br',
83
+ 'Accept-Language': 'en-US,en;q=0.9',
84
+ 'Origin': 'chrome-extension://jdiccldimpdaibmpdkjnbmckianbfold'
85
+ };
76
86
  const EDGE_VOICES = [
77
87
  { name: 'Arabic (Egypt) - Salma', value: 'ar-EG-SalmaNeural' },
78
88
  { name: 'Arabic (Egypt) - Shakir', value: 'ar-EG-ShakirNeural' },
@@ -127,6 +137,11 @@ class TTSBigBoss {
127
137
  value: 'coqui',
128
138
  description: 'Connect to a running Coqui TTS/XTTS server.',
129
139
  },
140
+ {
141
+ name: 'Kokoro TTS (Local OpenAI API)',
142
+ value: 'kokoro',
143
+ description: 'Connect to a local Kokoro server compatible with OpenAI API (e.g. /v1/audio/speech).',
144
+ },
130
145
  {
131
146
  name: 'System Command (Custom)',
132
147
  value: 'system',
@@ -276,12 +291,47 @@ class TTSBigBoss {
276
291
  },
277
292
  description: 'Name from Hugging Face (e.g. en_US-bryce-medium) or full URL to .onnx file.',
278
293
  },
294
+ {
295
+ displayName: 'API URL',
296
+ name: 'kokoroUrl',
297
+ type: 'string',
298
+ default: 'http://localhost:8880/v1/audio/speech',
299
+ description: 'Endpoint URL for Kokoro generation (OpenAI compatible).',
300
+ displayOptions: {
301
+ show: {
302
+ engine: ['kokoro'],
303
+ },
304
+ },
305
+ },
306
+ {
307
+ displayName: 'Voice / Model',
308
+ name: 'kokoroVoice',
309
+ type: 'string',
310
+ default: 'af_bella',
311
+ description: 'Voice ID (e.g. af_bella, af_sarah, am_adam). Arabic might require specific model ID.',
312
+ displayOptions: {
313
+ show: {
314
+ engine: ['kokoro'],
315
+ },
316
+ },
317
+ },
318
+ {
319
+ displayName: 'Speed',
320
+ name: 'kokoroSpeed',
321
+ type: 'number',
322
+ default: 1.0,
323
+ displayOptions: {
324
+ show: {
325
+ engine: ['kokoro'],
326
+ },
327
+ },
328
+ },
279
329
  {
280
330
  displayName: 'Base Server URL',
281
331
  name: 'coquiUrl',
282
332
  type: 'string',
283
- default: 'http://host.docker.internal:5002',
284
- description: 'Base URL of Coqui server (e.g. http://172.17.0.1:5002 if in Docker). Do not include /api/tts.',
333
+ default: 'http://localhost:5002',
334
+ description: 'Base URL of Coqui server (e.g. http://localhost:5002 or http://host.docker.internal:5002).',
285
335
  displayOptions: {
286
336
  show: {
287
337
  engine: ['coqui'],
@@ -434,6 +484,21 @@ class TTSBigBoss {
434
484
  srtBuffer = Buffer.from(result.srt, 'utf8');
435
485
  }
436
486
  }
487
+ else if (engine === 'kokoro') {
488
+ const url = this.getNodeParameter('kokoroUrl', i);
489
+ const voice = this.getNodeParameter('kokoroVoice', i);
490
+ const speed = this.getNodeParameter('kokoroSpeed', i);
491
+ const payload = {
492
+ model: 'kokoro',
493
+ input: text,
494
+ voice: voice,
495
+ speed: speed,
496
+ response_format: 'mp3'
497
+ };
498
+ audioBuffer = await httpRequest(url, 'POST', payload);
499
+ const duration = getAudioDuration(audioBuffer, 'mp3');
500
+ srtBuffer = Buffer.from(generateHeuristicSRT(text, duration), 'utf8');
501
+ }
437
502
  else if (engine === 'piper_local') {
438
503
  let piperModel = this.getNodeParameter('piperModel', i);
439
504
  if (piperModel === 'custom') {
@@ -456,7 +521,7 @@ class TTSBigBoss {
456
521
  if (code === 0)
457
522
  resolve();
458
523
  if (errData.includes('json.exception.parse_error')) {
459
- reject(new Error(`Piper Config Error: The downloaded JSON configuration for model '${piperModel}' seems corrupted (HTML instead of JSON?). Try deleting the file at ${configPath} and running again.`));
524
+ reject(new Error(`Piper Config Error: The downloaded JSON configuration for model '${piperModel}' seems corrupted. Try deleting the file at ${configPath}.`));
460
525
  }
461
526
  else {
462
527
  reject(new Error(`Piper failed (exit ${code}): ${errData}`));
@@ -467,7 +532,8 @@ class TTSBigBoss {
467
532
  if (!fs.existsSync(outFile))
468
533
  throw new Error('Piper did not produce output file');
469
534
  audioBuffer = fs.readFileSync(outFile);
470
- srtBuffer = Buffer.from(generateHeuristicSRT(text, audioBuffer.length), 'utf8');
535
+ const duration = getAudioDuration(audioBuffer, 'wav');
536
+ srtBuffer = Buffer.from(generateHeuristicSRT(text, duration), 'utf8');
471
537
  fs.unlinkSync(outFile);
472
538
  }
473
539
  else if (engine === 'coqui') {
@@ -488,7 +554,8 @@ class TTSBigBoss {
488
554
  payload.speaker_id = speakerSelection;
489
555
  }
490
556
  audioBuffer = await httpRequest(url, 'POST', payload);
491
- srtBuffer = Buffer.from(generateHeuristicSRT(text, audioBuffer.length), 'utf8');
557
+ const duration = getAudioDuration(audioBuffer, 'wav');
558
+ srtBuffer = Buffer.from(generateHeuristicSRT(text, duration), 'utf8');
492
559
  }
493
560
  else {
494
561
  const commandTpl = this.getNodeParameter('systemCommand', i);
@@ -522,7 +589,8 @@ class TTSBigBoss {
522
589
  throw new Error('System command did not produce output file at expected path');
523
590
  }
524
591
  audioBuffer = fs.readFileSync(outFile);
525
- srtBuffer = Buffer.from(generateHeuristicSRT(text, audioBuffer.length), 'utf8');
592
+ const duration = getAudioDuration(audioBuffer);
593
+ srtBuffer = Buffer.from(generateHeuristicSRT(text, duration), 'utf8');
526
594
  if (fs.existsSync(outFile))
527
595
  fs.unlinkSync(outFile);
528
596
  }
@@ -667,23 +735,41 @@ function ticksToTime(ticks) {
667
735
  const mili = date.getMilliseconds().toString().padStart(3, '0');
668
736
  return `${h}:${m}:${s},${mili}`;
669
737
  }
670
- function generateHeuristicSRT(text, byteLength) {
671
- const totalDurationSec = text.length / 15;
672
- const sentences = text.match(/[^.!?]+[.!?]*/g) || [text];
673
- let currentStartTime = 0;
674
- let srt = '';
675
- let counter = 1;
676
- const msToSrt = (ms) => {
677
- const date = new Date(0, 0, 0, 0, 0, 0, ms);
678
- return `${date.getHours().toString().padStart(2, '0')}:${date.getMinutes().toString().padStart(2, '0')}:${date.getSeconds().toString().padStart(2, '0')},${date.getMilliseconds().toString().padStart(3, '0')}`;
679
- };
680
- for (const sentence of sentences) {
681
- const sentenceDuration = (sentence.length / text.length) * (totalDurationSec * 1000);
682
- const endTime = currentStartTime + sentenceDuration;
683
- srt += `${counter++}\n${msToSrt(currentStartTime)} --> ${msToSrt(endTime)}\n${sentence.trim()}\n\n`;
684
- currentStartTime = endTime;
685
- }
686
- return srt;
738
+ async function downloadFile(url, dest) {
739
+ return new Promise((resolve, reject) => {
740
+ const file = fs.createWriteStream(dest);
741
+ file.on('error', (err) => {
742
+ fs.unlink(dest, () => { });
743
+ reject(new Error(`File write error: ${err.message}`));
744
+ });
745
+ const request = https.get(url, (response) => {
746
+ if (response.statusCode === 302 || response.statusCode === 301) {
747
+ file.close();
748
+ downloadFile(response.headers.location, dest).then(resolve).catch(reject);
749
+ return;
750
+ }
751
+ if (response.statusCode && response.statusCode !== 200) {
752
+ file.close();
753
+ fs.unlink(dest, () => { });
754
+ reject(new Error(`Download failed with status code: ${response.statusCode} for URL: ${url}`));
755
+ return;
756
+ }
757
+ response.pipe(file);
758
+ file.on('finish', () => {
759
+ file.close((err) => {
760
+ if (err)
761
+ reject(err);
762
+ else
763
+ resolve();
764
+ });
765
+ });
766
+ });
767
+ request.on('error', (err) => {
768
+ file.close();
769
+ fs.unlink(dest, () => { });
770
+ reject(new Error(`Network error: ${err.message}`));
771
+ });
772
+ });
687
773
  }
688
774
  async function ensurePiperBinary(binDir) {
689
775
  const platform = os.platform();
@@ -735,15 +821,15 @@ async function ensurePiperModel(binDir, modelNameOrUrl) {
735
821
  else {
736
822
  const parts = modelNameOrUrl.split('-');
737
823
  if (parts.length >= 3) {
738
- const langRegion = parts[0] + '_' + parts[1];
739
- const voice = parts[2];
740
- const quality = parts[3] || 'medium';
741
- const lang = parts[0];
824
+ const langRegion = parts[0];
825
+ const voice = parts[1];
826
+ const quality = parts[2];
827
+ const lang = langRegion.split('_')[0];
742
828
  modelFilename = modelNameOrUrl + '.onnx';
743
829
  modelUrl = `https://huggingface.co/rhasspy/piper-voices/resolve/main/${lang}/${langRegion}/${voice}/${quality}/${modelFilename}?download=true`;
744
830
  }
745
831
  else {
746
- throw new Error(`Invalid model name format: ${modelNameOrUrl}. Use format lang_REGION-voice-quality`);
832
+ throw new Error(`Invalid model name format: ${modelNameOrUrl}.`);
747
833
  }
748
834
  }
749
835
  const modelPath = path.join(binDir, modelFilename);
@@ -772,41 +858,46 @@ async function ensurePiperModel(binDir, modelNameOrUrl) {
772
858
  }
773
859
  return { modelPath, configPath };
774
860
  }
775
- async function downloadFile(url, dest) {
776
- return new Promise((resolve, reject) => {
777
- const file = fs.createWriteStream(dest);
778
- file.on('error', (err) => {
779
- fs.unlink(dest, () => { });
780
- reject(new Error(`File write error: ${err.message}`));
781
- });
782
- const request = https.get(url, (response) => {
783
- if (response.statusCode === 302 || response.statusCode === 301) {
784
- file.close();
785
- downloadFile(response.headers.location, dest).then(resolve).catch(reject);
786
- return;
787
- }
788
- if (response.statusCode && response.statusCode !== 200) {
789
- file.close();
790
- fs.unlink(dest, () => { });
791
- reject(new Error(`Download failed with status code: ${response.statusCode} for URL: ${url}`));
792
- return;
861
+ function getAudioDuration(buffer, hint = null) {
862
+ if (!buffer || buffer.length === 0)
863
+ return -1;
864
+ if ((hint === 'wav') || (buffer.length > 12 && buffer.toString('ascii', 0, 4) === 'RIFF' && buffer.toString('ascii', 8, 12) === 'WAVE')) {
865
+ try {
866
+ const byteRate = buffer.readUInt32LE(28);
867
+ if (byteRate > 0) {
868
+ const dataSize = buffer.length - 44;
869
+ return dataSize / byteRate;
793
870
  }
794
- response.pipe(file);
795
- file.on('finish', () => {
796
- file.close((err) => {
797
- if (err)
798
- reject(err);
799
- else
800
- resolve();
801
- });
802
- });
803
- });
804
- request.on('error', (err) => {
805
- file.close();
806
- fs.unlink(dest, () => { });
807
- reject(new Error(`Network error: ${err.message}`));
808
- });
809
- });
871
+ }
872
+ catch (e) { }
873
+ }
874
+ return -1;
875
+ }
876
+ function generateHeuristicSRT(text, durationSeconds) {
877
+ if (durationSeconds <= 0) {
878
+ durationSeconds = text.length / 14;
879
+ }
880
+ const sentences = text.match(/[^.!?]+[.!?]*/g) || [text];
881
+ const totalContentLen = text.length;
882
+ let currentStartTime = 0;
883
+ let srt = '';
884
+ let counter = 1;
885
+ const msToSrt = (ms) => {
886
+ const totalSec = Math.floor(ms / 1000);
887
+ const mili = Math.floor(ms % 1000);
888
+ const h = Math.floor(totalSec / 3600);
889
+ const m = Math.floor((totalSec % 3600) / 60);
890
+ const s = totalSec % 60;
891
+ return `${h.toString().padStart(2, '0')}:${m.toString().padStart(2, '0')}:${s.toString().padStart(2, '0')},${mili.toString().padStart(3, '0')}`;
892
+ };
893
+ for (const sentence of sentences) {
894
+ const sentenceRatio = sentence.length / totalContentLen;
895
+ const sentenceDuration = sentenceRatio * durationSeconds;
896
+ const endTime = currentStartTime + sentenceDuration;
897
+ srt += `${counter++}\n${msToSrt(currentStartTime * 1000)} --> ${msToSrt(endTime * 1000)}\n${sentence.trim()}\n\n`;
898
+ currentStartTime = endTime;
899
+ }
900
+ return srt;
810
901
  }
811
902
  async function httpRequest(url, method = 'GET', body = null) {
812
903
  const requestModule = url.startsWith('https') ? https : http;
@@ -61,6 +61,16 @@ const PIPER_MODELS = [
61
61
 
62
62
  // Edge TTS Constants
63
63
  const EDGE_URL = 'wss://speech.platform.bing.com/consumer/speech/synthesize/readaloud/edge/v1?TrustedClientToken=6A5AA1D4EAFF4E9FB37E23D68491D6F4';
64
+ const EDGE_HEADERS = {
65
+ 'Authority': 'speech.platform.bing.com',
66
+ 'Sec-CH-UA': '"Not_A Brand";v="8", "Chromium";v="120", "Microsoft Edge";v="120"',
67
+ 'Sec-CH-UA-Mobile': '?0',
68
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36 Edg/120.0.0.0',
69
+ 'Sec-CH-UA-Platform': '"Windows"',
70
+ 'Accept-Encoding': 'gzip, deflate, br',
71
+ 'Accept-Language': 'en-US,en;q=0.9',
72
+ 'Origin': 'chrome-extension://jdiccldimpdaibmpdkjnbmckianbfold'
73
+ };
64
74
  const EDGE_VOICES = [
65
75
  // Arabic
66
76
  { name: 'Arabic (Egypt) - Salma', value: 'ar-EG-SalmaNeural' },
@@ -127,6 +137,11 @@ export class TTSBigBoss implements INodeType {
127
137
  value: 'coqui',
128
138
  description: 'Connect to a running Coqui TTS/XTTS server.',
129
139
  },
140
+ {
141
+ name: 'Kokoro TTS (Local OpenAI API)',
142
+ value: 'kokoro',
143
+ description: 'Connect to a local Kokoro server compatible with OpenAI API (e.g. /v1/audio/speech).',
144
+ },
130
145
  {
131
146
  name: 'System Command (Custom)',
132
147
  value: 'system',
@@ -289,14 +304,52 @@ export class TTSBigBoss implements INodeType {
289
304
  description: 'Name from Hugging Face (e.g. en_US-bryce-medium) or full URL to .onnx file.',
290
305
  },
291
306
  // ----------------------------------
307
+ // Kokoro Settings
308
+ // ----------------------------------
309
+ {
310
+ displayName: 'API URL',
311
+ name: 'kokoroUrl',
312
+ type: 'string',
313
+ default: 'http://localhost:8880/v1/audio/speech',
314
+ description: 'Endpoint URL for Kokoro generation (OpenAI compatible).',
315
+ displayOptions: {
316
+ show: {
317
+ engine: ['kokoro'],
318
+ },
319
+ },
320
+ },
321
+ {
322
+ displayName: 'Voice / Model',
323
+ name: 'kokoroVoice',
324
+ type: 'string',
325
+ default: 'af_bella',
326
+ description: 'Voice ID (e.g. af_bella, af_sarah, am_adam). Arabic might require specific model ID.',
327
+ displayOptions: {
328
+ show: {
329
+ engine: ['kokoro'],
330
+ },
331
+ },
332
+ },
333
+ {
334
+ displayName: 'Speed',
335
+ name: 'kokoroSpeed',
336
+ type: 'number',
337
+ default: 1.0,
338
+ displayOptions: {
339
+ show: {
340
+ engine: ['kokoro'],
341
+ },
342
+ },
343
+ },
344
+ // ----------------------------------
292
345
  // Coqui Server Settings
293
346
  // ----------------------------------
294
347
  {
295
348
  displayName: 'Base Server URL',
296
349
  name: 'coquiUrl',
297
350
  type: 'string',
298
- default: 'http://host.docker.internal:5002',
299
- description: 'Base URL of Coqui server (e.g. http://172.17.0.1:5002 if in Docker). Do not include /api/tts.',
351
+ default: 'http://localhost:5002',
352
+ description: 'Base URL of Coqui server (e.g. http://localhost:5002 or http://host.docker.internal:5002).',
300
353
  displayOptions: {
301
354
  show: {
302
355
  engine: ['coqui'],
@@ -461,6 +514,28 @@ export class TTSBigBoss implements INodeType {
461
514
  srtBuffer = Buffer.from(result.srt, 'utf8');
462
515
  }
463
516
 
517
+ } else if (engine === 'kokoro') {
518
+ // ----------------------------------
519
+ // KOKORO EXECUTION
520
+ // ----------------------------------
521
+ const url = this.getNodeParameter('kokoroUrl', i) as string;
522
+ const voice = this.getNodeParameter('kokoroVoice', i) as string;
523
+ const speed = this.getNodeParameter('kokoroSpeed', i) as number;
524
+
525
+ // Standard OpenAI 'createSpeech' payload
526
+ const payload = {
527
+ model: 'kokoro', // or whatever the server expects
528
+ input: text,
529
+ voice: voice,
530
+ speed: speed,
531
+ response_format: 'mp3'
532
+ };
533
+
534
+ audioBuffer = await httpRequest(url, 'POST', payload);
535
+
536
+ const duration = getAudioDuration(audioBuffer, 'mp3');
537
+ srtBuffer = Buffer.from(generateHeuristicSRT(text, duration), 'utf8');
538
+
464
539
  } else if (engine === 'piper_local') {
465
540
  // ----------------------------------
466
541
  // PIPER LOCAL AUTOMATION
@@ -478,8 +553,6 @@ export class TTSBigBoss implements INodeType {
478
553
 
479
554
  // 3. Execute
480
555
  const outFile = path.join(tempDir, `piper_out_${uuidv4()}.wav`);
481
- // Piper command: echo "text" | piper --model model.onnx --output_file out.wav
482
- // We use child_process.spawn to pipe text safely
483
556
 
484
557
  await new Promise<void>((resolve, reject) => {
485
558
  const piperProc = child_process.spawn(piperBinPath, [
@@ -496,9 +569,8 @@ export class TTSBigBoss implements INodeType {
496
569
 
497
570
  piperProc.on('close', (code) => {
498
571
  if (code === 0) resolve();
499
- // Check for the specific JSON error in stderr
500
572
  if (errData.includes('json.exception.parse_error')) {
501
- reject(new Error(`Piper Config Error: The downloaded JSON configuration for model '${piperModel}' seems corrupted (HTML instead of JSON?). Try deleting the file at ${configPath} and running again.`));
573
+ reject(new Error(`Piper Config Error: The downloaded JSON configuration for model '${piperModel}' seems corrupted. Try deleting the file at ${configPath}.`));
502
574
  } else {
503
575
  reject(new Error(`Piper failed (exit ${code}): ${errData}`));
504
576
  }
@@ -510,7 +582,8 @@ export class TTSBigBoss implements INodeType {
510
582
  if (!fs.existsSync(outFile)) throw new Error('Piper did not produce output file');
511
583
 
512
584
  audioBuffer = fs.readFileSync(outFile);
513
- srtBuffer = Buffer.from(generateHeuristicSRT(text, audioBuffer.length), 'utf8');
585
+ const duration = getAudioDuration(audioBuffer, 'wav');
586
+ srtBuffer = Buffer.from(generateHeuristicSRT(text, duration), 'utf8');
514
587
 
515
588
  fs.unlinkSync(outFile);
516
589
 
@@ -526,7 +599,6 @@ export class TTSBigBoss implements INodeType {
526
599
  const wavPath = this.getNodeParameter('coquiWavPath', i, '') as string;
527
600
  const lang = this.getNodeParameter('coquiLang', i) as string;
528
601
 
529
- // Construct Payload
530
602
  const payload: any = {
531
603
  text: text,
532
604
  language_id: lang,
@@ -538,9 +610,9 @@ export class TTSBigBoss implements INodeType {
538
610
  payload.speaker_id = speakerSelection;
539
611
  }
540
612
 
541
- // Execute Request
542
613
  audioBuffer = await httpRequest(url, 'POST', payload);
543
- srtBuffer = Buffer.from(generateHeuristicSRT(text, audioBuffer.length), 'utf8');
614
+ const duration = getAudioDuration(audioBuffer, 'wav');
615
+ srtBuffer = Buffer.from(generateHeuristicSRT(text, duration), 'utf8');
544
616
 
545
617
  } else {
546
618
  // ----------------------------------
@@ -556,7 +628,6 @@ export class TTSBigBoss implements INodeType {
556
628
  .replace(/"{text}"/g, `"${text.replace(/"/g, '\\"')}"`) // Basic escape
557
629
  .replace(/{text}/g, `"${text.replace(/"/g, '\\"')}"`);
558
630
 
559
- // Handle Clone Input
560
631
  if (useClone) {
561
632
  const cloneProp = this.getNodeParameter('cloneInputProperty', i) as string;
562
633
  const cloneData = await this.helpers.getBinaryDataBuffer(i, cloneProp);
@@ -567,7 +638,6 @@ export class TTSBigBoss implements INodeType {
567
638
  .replace(/{reference_audio}/g, `"${cloneFile}"`);
568
639
  }
569
640
 
570
- // Execute
571
641
  await new Promise((resolve, reject) => {
572
642
  child_process.exec(cmd, (error, stdout, stderr) => {
573
643
  if (error) {
@@ -583,9 +653,8 @@ export class TTSBigBoss implements INodeType {
583
653
  }
584
654
 
585
655
  audioBuffer = fs.readFileSync(outFile);
586
-
587
- // Generate Heuristic SRT (Estimate timestamps)
588
- srtBuffer = Buffer.from(generateHeuristicSRT(text, audioBuffer.length), 'utf8');
656
+ const duration = getAudioDuration(audioBuffer);
657
+ srtBuffer = Buffer.from(generateHeuristicSRT(text, duration), 'utf8');
589
658
 
590
659
  // Cleanup
591
660
  if (fs.existsSync(outFile)) fs.unlinkSync(outFile);
@@ -784,41 +853,43 @@ function ticksToTime(ticks: number): string {
784
853
  return `${h}:${m}:${s},${mili}`;
785
854
  }
786
855
 
787
- // --------------------------------------------------------------------------
788
- // HEURISTIC SRT IMPLEMENTATION (For System Command)
789
- // --------------------------------------------------------------------------
790
- function generateHeuristicSRT(text: string, byteLength: number): string {
791
- // Estimate duration assuming typical MP3/WAV bitrate.
792
- // Actually, system command usually produces WAV (PCM).
793
- // Wrapper might produce MP3. Let's assume user command output.
794
- // It is safer to assume ~15 chars per second reading speed if we don't know duration.
795
- // Or assume 16000 bytes/sec for mono 16khz? Too unreliable.
796
- // Let's use text length heuristic: Avg reading speed 150 wpm ~ 2.5 words/sec ~ 15 chars/sec?
797
- // Let's try 15 chars / second.
798
-
799
- const totalDurationSec = text.length / 15;
800
- // Ideally we'd use 'ffprobe' to get exact duration, but let's stick to pure TS for now.
801
- // If we really wanted to be robust, we'd add 'ffprobe' execution here.
856
+ // ----------------------------------
857
+ // OLD HEURISTIC REMOVED
858
+ // ----------------------------------
802
859
 
803
- const sentences = text.match(/[^.!?]+[.!?]*/g) || [text];
804
- let currentStartTime = 0;
805
- let srt = '';
806
- let counter = 1;
807
-
808
- const msToSrt = (ms: number) => {
809
- const date = new Date(0, 0, 0, 0, 0, 0, ms);
810
- return `${date.getHours().toString().padStart(2, '0')}:${date.getMinutes().toString().padStart(2, '0')}:${date.getSeconds().toString().padStart(2, '0')},${date.getMilliseconds().toString().padStart(3, '0')}`;
811
- };
812
-
813
- for (const sentence of sentences) {
814
- const sentenceDuration = (sentence.length / text.length) * (totalDurationSec * 1000);
815
- const endTime = currentStartTime + sentenceDuration;
816
-
817
- srt += `${counter++}\n${msToSrt(currentStartTime)} --> ${msToSrt(endTime)}\n${sentence.trim()}\n\n`;
818
- currentStartTime = endTime;
819
- }
820
-
821
- return srt;
860
+ async function downloadFile(url: string, dest: string): Promise<void> {
861
+ return new Promise((resolve, reject) => {
862
+ const file = fs.createWriteStream(dest);
863
+ file.on('error', (err) => {
864
+ fs.unlink(dest, () => { });
865
+ reject(new Error(`File write error: ${err.message}`));
866
+ });
867
+ const request = https.get(url, (response) => {
868
+ if (response.statusCode === 302 || response.statusCode === 301) {
869
+ file.close();
870
+ downloadFile(response.headers.location!, dest).then(resolve).catch(reject);
871
+ return;
872
+ }
873
+ if (response.statusCode && response.statusCode !== 200) {
874
+ file.close();
875
+ fs.unlink(dest, () => { });
876
+ reject(new Error(`Download failed with status code: ${response.statusCode} for URL: ${url}`));
877
+ return;
878
+ }
879
+ response.pipe(file);
880
+ file.on('finish', () => {
881
+ file.close((err) => {
882
+ if (err) reject(err);
883
+ else resolve();
884
+ });
885
+ });
886
+ });
887
+ request.on('error', (err) => {
888
+ file.close();
889
+ fs.unlink(dest, () => { });
890
+ reject(new Error(`Network error: ${err.message}`));
891
+ });
892
+ });
822
893
  }
823
894
 
824
895
  // --------------------------------------------------------------------------
@@ -888,25 +959,23 @@ async function ensurePiperModel(binDir: string, modelNameOrUrl: string): Promise
888
959
  modelUrl = modelNameOrUrl;
889
960
  modelFilename = path.basename(modelNameOrUrl);
890
961
  } else {
891
- // Construct URL from name
962
+ // Correct Parsing for 'lang_REGION-voice-quality'
963
+ // e.g. en_US-lessac-medium -> [en_US, lessac, medium]
964
+ // e.g. ar_JO-kareem-medium -> [ar_JO, kareem, medium]
965
+
892
966
  const parts = modelNameOrUrl.split('-');
893
967
  if (parts.length >= 3) {
894
- const langRegion = parts[0] + '_' + parts[1]; // en_US
895
- const voice = parts[2];
896
- const quality = parts[3] || 'medium';
897
- const lang = parts[0]; // en
968
+ const langRegion = parts[0]; // 'ar_JO' or 'en_US'
969
+ const voice = parts[1]; // 'kareem'
970
+ const quality = parts[2]; // 'medium'
898
971
 
899
- // e.g. en_US-lessac-medium
900
- // lang=en, region=en_US, voice=lessac, quality=medium
901
- // url path: en/en_US/lessac/medium/en_US-lessac-medium.onnx
902
-
903
- // Handle special case: ar_JO (no lang folder? check repo)
904
- // Generally structure is: lang_short/lang_long/voice/quality/filename
972
+ // Lang code is first part of langRegion (split by _)
973
+ const lang = langRegion.split('_')[0]; // 'ar' form 'ar_JO'
905
974
 
906
975
  modelFilename = modelNameOrUrl + '.onnx';
907
- modelUrl = `https://huggingface.co/rhasspy/piper-voices/resolve/main/${lang}/${langRegion}/${voice}/${quality}/${modelFilename}?download=true`; // Add download=true to force direct link
976
+ modelUrl = `https://huggingface.co/rhasspy/piper-voices/resolve/main/${lang}/${langRegion}/${voice}/${quality}/${modelFilename}?download=true`;
908
977
  } else {
909
- throw new Error(`Invalid model name format: ${modelNameOrUrl}. Use format lang_REGION-voice-quality`);
978
+ throw new Error(`Invalid model name format: ${modelNameOrUrl}.`);
910
979
  }
911
980
  }
912
981
 
@@ -942,47 +1011,66 @@ async function ensurePiperModel(binDir: string, modelNameOrUrl: string): Promise
942
1011
  return { modelPath, configPath };
943
1012
  }
944
1013
 
945
- async function downloadFile(url: string, dest: string): Promise<void> {
946
- return new Promise((resolve, reject) => {
947
- const file = fs.createWriteStream(dest);
948
-
949
- // Handle file system errors (e.g. permissions)
950
- file.on('error', (err) => {
951
- fs.unlink(dest, () => { }); // Cleanup
952
- reject(new Error(`File write error: ${err.message}`));
953
- });
1014
+ // --------------------------------------------------------------------------
1015
+ // HELPER: Determine Audio Duration for SRT
1016
+ // --------------------------------------------------------------------------
1017
+ function getAudioDuration(buffer: Buffer, hint: 'mp3' | 'wav' | null = null): number {
1018
+ // 1. Try generic text length if buffer empty (fallback)
1019
+ if (!buffer || buffer.length === 0) return -1;
954
1020
 
955
- const request = https.get(url, (response) => {
956
- if (response.statusCode === 302 || response.statusCode === 301) {
957
- // Follow redirect
958
- file.close();
959
- downloadFile(response.headers.location!, dest).then(resolve).catch(reject);
960
- return;
1021
+ // 2. Try parsing WAV header
1022
+ // RIFF....WAVEfmt
1023
+ if ((hint === 'wav') || (buffer.length > 12 && buffer.toString('ascii', 0, 4) === 'RIFF' && buffer.toString('ascii', 8, 12) === 'WAVE')) {
1024
+ try {
1025
+ // standard header is 44 bytes.
1026
+ const byteRate = buffer.readUInt32LE(28);
1027
+ if (byteRate > 0) {
1028
+ const dataSize = buffer.length - 44;
1029
+ return dataSize / byteRate;
961
1030
  }
1031
+ } catch (e) { /* ignore */ }
1032
+ }
962
1033
 
963
- if (response.statusCode && response.statusCode !== 200) {
964
- file.close();
965
- fs.unlink(dest, () => { });
966
- reject(new Error(`Download failed with status code: ${response.statusCode} for URL: ${url}`));
967
- return;
968
- }
1034
+ // 3. Fallback: Char count estimation? No, we don't have text here.
1035
+ // Return -1 to signal "Use text length"
1036
+ return -1;
1037
+ }
969
1038
 
970
- response.pipe(file);
1039
+ // --------------------------------------------------------------------------
1040
+ // HEURISTIC SRT IMPLEMENTATION
1041
+ // --------------------------------------------------------------------------
1042
+ function generateHeuristicSRT(text: string, durationSeconds: number): string {
1043
+ // If duration unknown (-1), estimate from text length (14 chars/sec)
1044
+ if (durationSeconds <= 0) {
1045
+ durationSeconds = text.length / 14;
1046
+ }
971
1047
 
972
- file.on('finish', () => {
973
- file.close((err) => {
974
- if (err) reject(err);
975
- else resolve();
976
- });
977
- });
978
- });
1048
+ const sentences = text.match(/[^.!?]+[.!?]*/g) || [text];
1049
+ const totalContentLen = text.length;
979
1050
 
980
- request.on('error', (err) => {
981
- file.close();
982
- fs.unlink(dest, () => { });
983
- reject(new Error(`Network error: ${err.message}`));
984
- });
985
- });
1051
+ let currentStartTime = 0;
1052
+ let srt = '';
1053
+ let counter = 1;
1054
+
1055
+ const msToSrt = (ms: number) => {
1056
+ const totalSec = Math.floor(ms / 1000);
1057
+ const mili = Math.floor(ms % 1000);
1058
+ const h = Math.floor(totalSec / 3600);
1059
+ const m = Math.floor((totalSec % 3600) / 60);
1060
+ const s = totalSec % 60;
1061
+ return `${h.toString().padStart(2, '0')}:${m.toString().padStart(2, '0')}:${s.toString().padStart(2, '0')},${mili.toString().padStart(3, '0')}`;
1062
+ };
1063
+
1064
+ for (const sentence of sentences) {
1065
+ // Proportion of time = Proportion of length
1066
+ const sentenceRatio = sentence.length / totalContentLen;
1067
+ const sentenceDuration = sentenceRatio * durationSeconds;
1068
+ const endTime = currentStartTime + sentenceDuration;
1069
+
1070
+ srt += `${counter++}\n${msToSrt(currentStartTime * 1000)} --> ${msToSrt(endTime * 1000)}\n${sentence.trim()}\n\n`;
1071
+ currentStartTime = endTime;
1072
+ }
1073
+ return srt;
986
1074
  }
987
1075
 
988
1076
  async function httpRequest(url: string, method: string = 'GET', body: any = null): Promise<Buffer> {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "n8n-nodes-tts-bigboss",
3
- "version": "1.0.7",
3
+ "version": "1.0.8",
4
4
  "description": "BigBoss TTS node with multi-engine support and automatic SRT generation",
5
5
  "keywords": [
6
6
  "n8n-community-node-package",