n8n-nodes-tts-bigboss 1.0.7 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -48,6 +48,134 @@ const http = __importStar(require("http"));
48
48
  const stream = __importStar(require("stream"));
49
49
  const util_1 = require("util");
50
50
  const pipeline = (0, util_1.promisify)(stream.pipeline);
51
+ const MAX_CHARS_PER_CHUNK = 300;
52
+ const SILENCE_DURATION_MS = 200;
53
+ function splitTextIntoChunks(text) {
54
+ const chunks = [];
55
+ const sentences = text.match(/[^.!?]+[.!?]*/g) || [text];
56
+ let currentChunk = '';
57
+ for (const sentence of sentences) {
58
+ if (currentChunk.length + sentence.length <= MAX_CHARS_PER_CHUNK) {
59
+ currentChunk += sentence;
60
+ }
61
+ else {
62
+ if (currentChunk.length > 0) {
63
+ chunks.push(currentChunk.trim());
64
+ }
65
+ currentChunk = sentence;
66
+ while (currentChunk.length > MAX_CHARS_PER_CHUNK) {
67
+ let splitPoint = currentChunk.lastIndexOf(' ', MAX_CHARS_PER_CHUNK);
68
+ if (splitPoint === -1) {
69
+ splitPoint = MAX_CHARS_PER_CHUNK;
70
+ }
71
+ chunks.push(currentChunk.substring(0, splitPoint).trim());
72
+ currentChunk = currentChunk.substring(splitPoint).trim();
73
+ }
74
+ }
75
+ }
76
+ if (currentChunk.length > 0) {
77
+ chunks.push(currentChunk.trim());
78
+ }
79
+ return chunks.filter(chunk => chunk.length > 0);
80
+ }
81
+ function createWavBuffer(audioData, sampleRate = 24000) {
82
+ const numChannels = 1;
83
+ const bitsPerSample = 16;
84
+ const bytesPerSample = bitsPerSample / 8;
85
+ let int16Data;
86
+ if (audioData instanceof Float32Array) {
87
+ int16Data = new Int16Array(audioData.length);
88
+ for (let i = 0; i < audioData.length; i++) {
89
+ const s = Math.max(-1, Math.min(1, audioData[i]));
90
+ int16Data[i] = s < 0 ? s * 0x8000 : s * 0x7FFF;
91
+ }
92
+ }
93
+ else {
94
+ int16Data = new Int16Array(audioData.buffer, audioData.byteOffset, audioData.byteLength / 2);
95
+ }
96
+ const dataSize = int16Data.length * bytesPerSample;
97
+ const buffer = Buffer.alloc(44 + dataSize);
98
+ buffer.write('RIFF', 0);
99
+ buffer.writeUInt32LE(36 + dataSize, 4);
100
+ buffer.write('WAVE', 8);
101
+ buffer.write('fmt ', 12);
102
+ buffer.writeUInt32LE(16, 16);
103
+ buffer.writeUInt16LE(1, 20);
104
+ buffer.writeUInt16LE(numChannels, 22);
105
+ buffer.writeUInt32LE(sampleRate, 24);
106
+ buffer.writeUInt32LE(sampleRate * numChannels * bytesPerSample, 28);
107
+ buffer.writeUInt16LE(numChannels * bytesPerSample, 32);
108
+ buffer.writeUInt16LE(bitsPerSample, 34);
109
+ buffer.write('data', 36);
110
+ buffer.writeUInt32LE(dataSize, 40);
111
+ for (let i = 0; i < int16Data.length; i++) {
112
+ buffer.writeInt16LE(int16Data[i], 44 + i * 2);
113
+ }
114
+ return buffer;
115
+ }
116
+ function concatenateAudioBuffers(audioChunks, silenceDurationMs, sampleRate = 24000) {
117
+ if (audioChunks.length === 0) {
118
+ return createWavBuffer(new Float32Array(), sampleRate);
119
+ }
120
+ if (audioChunks.length === 1) {
121
+ return audioChunks[0].audio instanceof Buffer
122
+ ? audioChunks[0].audio
123
+ : createWavBuffer(audioChunks[0].audio, audioChunks[0].sampling_rate);
124
+ }
125
+ const silenceSamples = Math.round((silenceDurationMs / 1000) * sampleRate);
126
+ let totalLength = 0;
127
+ const float32Chunks = [];
128
+ for (const chunk of audioChunks) {
129
+ if (chunk.audio instanceof Float32Array) {
130
+ float32Chunks.push(chunk.audio);
131
+ totalLength += chunk.audio.length;
132
+ }
133
+ else {
134
+ const int16 = new Int16Array(chunk.audio.buffer, chunk.audio.byteOffset, chunk.audio.byteLength / 2);
135
+ const float32 = new Float32Array(int16.length);
136
+ for (let i = 0; i < int16.length; i++) {
137
+ float32[i] = int16[i] / (int16[i] < 0 ? 0x8000 : 0x7FFF);
138
+ }
139
+ float32Chunks.push(float32);
140
+ totalLength += float32.length;
141
+ }
142
+ }
143
+ totalLength += (audioChunks.length - 1) * silenceSamples;
144
+ const combinedAudio = new Float32Array(totalLength);
145
+ let offset = 0;
146
+ for (let i = 0; i < float32Chunks.length; i++) {
147
+ const chunk = float32Chunks[i];
148
+ combinedAudio.set(chunk, offset);
149
+ offset += chunk.length;
150
+ if (i < float32Chunks.length - 1) {
151
+ offset += silenceSamples;
152
+ }
153
+ }
154
+ return createWavBuffer(combinedAudio, sampleRate);
155
+ }
156
+ function generateSRTFromChunks(textChunks, audioDurations) {
157
+ if (textChunks.length === 0)
158
+ return '';
159
+ let srt = '';
160
+ let currentTime = 0;
161
+ let counter = 1;
162
+ const msToSrt = (ms) => {
163
+ const totalSec = Math.floor(ms / 1000);
164
+ const mili = Math.floor(ms % 1000);
165
+ const h = Math.floor(totalSec / 3600);
166
+ const m = Math.floor((totalSec % 3600) / 60);
167
+ const s = totalSec % 60;
168
+ return `${h.toString().padStart(2, '0')}:${m.toString().padStart(2, '0')}:${s.toString().padStart(2, '0')},${mili.toString().padStart(3, '0')}`;
169
+ };
170
+ for (let i = 0; i < textChunks.length; i++) {
171
+ const duration = audioDurations[i] || 0;
172
+ const startTime = currentTime;
173
+ const endTime = currentTime + duration;
174
+ srt += `${counter++}\n${msToSrt(startTime)} --> ${msToSrt(endTime)}\n${textChunks[i].trim()}\n\n`;
175
+ currentTime = endTime + SILENCE_DURATION_MS;
176
+ }
177
+ return srt;
178
+ }
51
179
  const PIPER_MODELS = [
52
180
  { name: 'Arabic (Jordan) - Kareem (Male) - Low', value: 'ar_JO-kareem-low' },
53
181
  { name: 'Arabic (Jordan) - Kareem (Male) - Medium', value: 'ar_JO-kareem-medium' },
@@ -73,6 +201,16 @@ const PIPER_MODELS = [
73
201
  { name: 'German - Thorsten (Male) - Low', value: 'de_DE-thorsten-low' },
74
202
  ];
75
203
  const EDGE_URL = 'wss://speech.platform.bing.com/consumer/speech/synthesize/readaloud/edge/v1?TrustedClientToken=6A5AA1D4EAFF4E9FB37E23D68491D6F4';
204
+ const EDGE_HEADERS = {
205
+ 'Authority': 'speech.platform.bing.com',
206
+ 'Sec-CH-UA': '"Not_A Brand";v="8", "Chromium";v="120", "Microsoft Edge";v="120"',
207
+ 'Sec-CH-UA-Mobile': '?0',
208
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36 Edg/120.0.0.0',
209
+ 'Sec-CH-UA-Platform': '"Windows"',
210
+ 'Accept-Encoding': 'gzip, deflate, br',
211
+ 'Accept-Language': 'en-US,en;q=0.9',
212
+ 'Origin': 'chrome-extension://jdiccldimpdaibmpdkjnbmckianbfold'
213
+ };
76
214
  const EDGE_VOICES = [
77
215
  { name: 'Arabic (Egypt) - Salma', value: 'ar-EG-SalmaNeural' },
78
216
  { name: 'Arabic (Egypt) - Shakir', value: 'ar-EG-ShakirNeural' },
@@ -127,6 +265,11 @@ class TTSBigBoss {
127
265
  value: 'coqui',
128
266
  description: 'Connect to a running Coqui TTS/XTTS server.',
129
267
  },
268
+ {
269
+ name: 'Kokoro TTS (Local OpenAI API)',
270
+ value: 'kokoro',
271
+ description: 'Connect to a local Kokoro server compatible with OpenAI API (e.g. /v1/audio/speech).',
272
+ },
130
273
  {
131
274
  name: 'System Command (Custom)',
132
275
  value: 'system',
@@ -276,12 +419,47 @@ class TTSBigBoss {
276
419
  },
277
420
  description: 'Name from Hugging Face (e.g. en_US-bryce-medium) or full URL to .onnx file.',
278
421
  },
422
+ {
423
+ displayName: 'API URL',
424
+ name: 'kokoroUrl',
425
+ type: 'string',
426
+ default: 'http://localhost:8880/v1/audio/speech',
427
+ description: 'Endpoint URL for Kokoro generation (OpenAI compatible).',
428
+ displayOptions: {
429
+ show: {
430
+ engine: ['kokoro'],
431
+ },
432
+ },
433
+ },
434
+ {
435
+ displayName: 'Voice / Model',
436
+ name: 'kokoroVoice',
437
+ type: 'string',
438
+ default: 'af_bella',
439
+ description: 'Voice ID (e.g. af_bella, af_sarah, am_adam). Arabic might require specific model ID.',
440
+ displayOptions: {
441
+ show: {
442
+ engine: ['kokoro'],
443
+ },
444
+ },
445
+ },
446
+ {
447
+ displayName: 'Speed',
448
+ name: 'kokoroSpeed',
449
+ type: 'number',
450
+ default: 1.0,
451
+ displayOptions: {
452
+ show: {
453
+ engine: ['kokoro'],
454
+ },
455
+ },
456
+ },
279
457
  {
280
458
  displayName: 'Base Server URL',
281
459
  name: 'coquiUrl',
282
460
  type: 'string',
283
- default: 'http://host.docker.internal:5002',
284
- description: 'Base URL of Coqui server (e.g. http://172.17.0.1:5002 if in Docker). Do not include /api/tts.',
461
+ default: 'http://localhost:5002',
462
+ description: 'Base URL of Coqui server (e.g. http://localhost:5002 or http://host.docker.internal:5002).',
285
463
  displayOptions: {
286
464
  show: {
287
465
  engine: ['coqui'],
@@ -434,6 +612,21 @@ class TTSBigBoss {
434
612
  srtBuffer = Buffer.from(result.srt, 'utf8');
435
613
  }
436
614
  }
615
+ else if (engine === 'kokoro') {
616
+ const url = this.getNodeParameter('kokoroUrl', i);
617
+ const voice = this.getNodeParameter('kokoroVoice', i);
618
+ const speed = this.getNodeParameter('kokoroSpeed', i);
619
+ const payload = {
620
+ model: 'kokoro',
621
+ input: text,
622
+ voice: voice,
623
+ speed: speed,
624
+ response_format: 'mp3'
625
+ };
626
+ audioBuffer = await httpRequest(url, 'POST', payload);
627
+ const duration = getAudioDuration(audioBuffer, 'mp3');
628
+ srtBuffer = Buffer.from(generateHeuristicSRT(text, duration), 'utf8');
629
+ }
437
630
  else if (engine === 'piper_local') {
438
631
  let piperModel = this.getNodeParameter('piperModel', i);
439
632
  if (piperModel === 'custom') {
@@ -456,7 +649,7 @@ class TTSBigBoss {
456
649
  if (code === 0)
457
650
  resolve();
458
651
  if (errData.includes('json.exception.parse_error')) {
459
- reject(new Error(`Piper Config Error: The downloaded JSON configuration for model '${piperModel}' seems corrupted (HTML instead of JSON?). Try deleting the file at ${configPath} and running again.`));
652
+ reject(new Error(`Piper Config Error: The downloaded JSON configuration for model '${piperModel}' seems corrupted. Try deleting the file at ${configPath}.`));
460
653
  }
461
654
  else {
462
655
  reject(new Error(`Piper failed (exit ${code}): ${errData}`));
@@ -467,7 +660,8 @@ class TTSBigBoss {
467
660
  if (!fs.existsSync(outFile))
468
661
  throw new Error('Piper did not produce output file');
469
662
  audioBuffer = fs.readFileSync(outFile);
470
- srtBuffer = Buffer.from(generateHeuristicSRT(text, audioBuffer.length), 'utf8');
663
+ const duration = getAudioDuration(audioBuffer, 'wav');
664
+ srtBuffer = Buffer.from(generateHeuristicSRT(text, duration), 'utf8');
471
665
  fs.unlinkSync(outFile);
472
666
  }
473
667
  else if (engine === 'coqui') {
@@ -488,7 +682,8 @@ class TTSBigBoss {
488
682
  payload.speaker_id = speakerSelection;
489
683
  }
490
684
  audioBuffer = await httpRequest(url, 'POST', payload);
491
- srtBuffer = Buffer.from(generateHeuristicSRT(text, audioBuffer.length), 'utf8');
685
+ const duration = getAudioDuration(audioBuffer, 'wav');
686
+ srtBuffer = Buffer.from(generateHeuristicSRT(text, duration), 'utf8');
492
687
  }
493
688
  else {
494
689
  const commandTpl = this.getNodeParameter('systemCommand', i);
@@ -522,7 +717,8 @@ class TTSBigBoss {
522
717
  throw new Error('System command did not produce output file at expected path');
523
718
  }
524
719
  audioBuffer = fs.readFileSync(outFile);
525
- srtBuffer = Buffer.from(generateHeuristicSRT(text, audioBuffer.length), 'utf8');
720
+ const duration = getAudioDuration(audioBuffer);
721
+ srtBuffer = Buffer.from(generateHeuristicSRT(text, duration), 'utf8');
526
722
  if (fs.existsSync(outFile))
527
723
  fs.unlinkSync(outFile);
528
724
  }
@@ -667,23 +863,41 @@ function ticksToTime(ticks) {
667
863
  const mili = date.getMilliseconds().toString().padStart(3, '0');
668
864
  return `${h}:${m}:${s},${mili}`;
669
865
  }
670
- function generateHeuristicSRT(text, byteLength) {
671
- const totalDurationSec = text.length / 15;
672
- const sentences = text.match(/[^.!?]+[.!?]*/g) || [text];
673
- let currentStartTime = 0;
674
- let srt = '';
675
- let counter = 1;
676
- const msToSrt = (ms) => {
677
- const date = new Date(0, 0, 0, 0, 0, 0, ms);
678
- return `${date.getHours().toString().padStart(2, '0')}:${date.getMinutes().toString().padStart(2, '0')}:${date.getSeconds().toString().padStart(2, '0')},${date.getMilliseconds().toString().padStart(3, '0')}`;
679
- };
680
- for (const sentence of sentences) {
681
- const sentenceDuration = (sentence.length / text.length) * (totalDurationSec * 1000);
682
- const endTime = currentStartTime + sentenceDuration;
683
- srt += `${counter++}\n${msToSrt(currentStartTime)} --> ${msToSrt(endTime)}\n${sentence.trim()}\n\n`;
684
- currentStartTime = endTime;
685
- }
686
- return srt;
866
+ async function downloadFile(url, dest) {
867
+ return new Promise((resolve, reject) => {
868
+ const file = fs.createWriteStream(dest);
869
+ file.on('error', (err) => {
870
+ fs.unlink(dest, () => { });
871
+ reject(new Error(`File write error: ${err.message}`));
872
+ });
873
+ const request = https.get(url, (response) => {
874
+ if (response.statusCode === 302 || response.statusCode === 301) {
875
+ file.close();
876
+ downloadFile(response.headers.location, dest).then(resolve).catch(reject);
877
+ return;
878
+ }
879
+ if (response.statusCode && response.statusCode !== 200) {
880
+ file.close();
881
+ fs.unlink(dest, () => { });
882
+ reject(new Error(`Download failed with status code: ${response.statusCode} for URL: ${url}`));
883
+ return;
884
+ }
885
+ response.pipe(file);
886
+ file.on('finish', () => {
887
+ file.close((err) => {
888
+ if (err)
889
+ reject(err);
890
+ else
891
+ resolve();
892
+ });
893
+ });
894
+ });
895
+ request.on('error', (err) => {
896
+ file.close();
897
+ fs.unlink(dest, () => { });
898
+ reject(new Error(`Network error: ${err.message}`));
899
+ });
900
+ });
687
901
  }
688
902
  async function ensurePiperBinary(binDir) {
689
903
  const platform = os.platform();
@@ -735,15 +949,15 @@ async function ensurePiperModel(binDir, modelNameOrUrl) {
735
949
  else {
736
950
  const parts = modelNameOrUrl.split('-');
737
951
  if (parts.length >= 3) {
738
- const langRegion = parts[0] + '_' + parts[1];
739
- const voice = parts[2];
740
- const quality = parts[3] || 'medium';
741
- const lang = parts[0];
952
+ const langRegion = parts[0];
953
+ const voice = parts[1];
954
+ const quality = parts[2];
955
+ const lang = langRegion.split('_')[0];
742
956
  modelFilename = modelNameOrUrl + '.onnx';
743
957
  modelUrl = `https://huggingface.co/rhasspy/piper-voices/resolve/main/${lang}/${langRegion}/${voice}/${quality}/${modelFilename}?download=true`;
744
958
  }
745
959
  else {
746
- throw new Error(`Invalid model name format: ${modelNameOrUrl}. Use format lang_REGION-voice-quality`);
960
+ throw new Error(`Invalid model name format: ${modelNameOrUrl}.`);
747
961
  }
748
962
  }
749
963
  const modelPath = path.join(binDir, modelFilename);
@@ -772,41 +986,46 @@ async function ensurePiperModel(binDir, modelNameOrUrl) {
772
986
  }
773
987
  return { modelPath, configPath };
774
988
  }
775
- async function downloadFile(url, dest) {
776
- return new Promise((resolve, reject) => {
777
- const file = fs.createWriteStream(dest);
778
- file.on('error', (err) => {
779
- fs.unlink(dest, () => { });
780
- reject(new Error(`File write error: ${err.message}`));
781
- });
782
- const request = https.get(url, (response) => {
783
- if (response.statusCode === 302 || response.statusCode === 301) {
784
- file.close();
785
- downloadFile(response.headers.location, dest).then(resolve).catch(reject);
786
- return;
787
- }
788
- if (response.statusCode && response.statusCode !== 200) {
789
- file.close();
790
- fs.unlink(dest, () => { });
791
- reject(new Error(`Download failed with status code: ${response.statusCode} for URL: ${url}`));
792
- return;
989
+ function getAudioDuration(buffer, hint = null) {
990
+ if (!buffer || buffer.length === 0)
991
+ return -1;
992
+ if ((hint === 'wav') || (buffer.length > 12 && buffer.toString('ascii', 0, 4) === 'RIFF' && buffer.toString('ascii', 8, 12) === 'WAVE')) {
993
+ try {
994
+ const byteRate = buffer.readUInt32LE(28);
995
+ if (byteRate > 0) {
996
+ const dataSize = buffer.length - 44;
997
+ return dataSize / byteRate;
793
998
  }
794
- response.pipe(file);
795
- file.on('finish', () => {
796
- file.close((err) => {
797
- if (err)
798
- reject(err);
799
- else
800
- resolve();
801
- });
802
- });
803
- });
804
- request.on('error', (err) => {
805
- file.close();
806
- fs.unlink(dest, () => { });
807
- reject(new Error(`Network error: ${err.message}`));
808
- });
809
- });
999
+ }
1000
+ catch (e) { }
1001
+ }
1002
+ return -1;
1003
+ }
1004
+ function generateHeuristicSRT(text, durationSeconds) {
1005
+ if (durationSeconds <= 0) {
1006
+ durationSeconds = text.length / 14;
1007
+ }
1008
+ const sentences = text.match(/[^.!?]+[.!?]*/g) || [text];
1009
+ const totalContentLen = text.length;
1010
+ let currentStartTime = 0;
1011
+ let srt = '';
1012
+ let counter = 1;
1013
+ const msToSrt = (ms) => {
1014
+ const totalSec = Math.floor(ms / 1000);
1015
+ const mili = Math.floor(ms % 1000);
1016
+ const h = Math.floor(totalSec / 3600);
1017
+ const m = Math.floor((totalSec % 3600) / 60);
1018
+ const s = totalSec % 60;
1019
+ return `${h.toString().padStart(2, '0')}:${m.toString().padStart(2, '0')}:${s.toString().padStart(2, '0')},${mili.toString().padStart(3, '0')}`;
1020
+ };
1021
+ for (const sentence of sentences) {
1022
+ const sentenceRatio = sentence.length / totalContentLen;
1023
+ const sentenceDuration = sentenceRatio * durationSeconds;
1024
+ const endTime = currentStartTime + sentenceDuration;
1025
+ srt += `${counter++}\n${msToSrt(currentStartTime * 1000)} --> ${msToSrt(endTime * 1000)}\n${sentence.trim()}\n\n`;
1026
+ currentStartTime = endTime;
1027
+ }
1028
+ return srt;
810
1029
  }
811
1030
  async function httpRequest(url, method = 'GET', body = null) {
812
1031
  const requestModule = url.startsWith('https') ? https : http;
@@ -5,6 +5,7 @@ import {
5
5
  INodeTypeDescription,
6
6
  ILoadOptionsFunctions,
7
7
  INodePropertyOptions,
8
+ NodeOperationError,
8
9
  } from 'n8n-workflow';
9
10
  import { v4 as uuidv4 } from 'uuid';
10
11
  import * as fs from 'fs';
@@ -20,6 +21,194 @@ import * as zlib from 'zlib'; // For extracting .tar.gz if needed, typically usa
20
21
 
21
22
  const pipeline = promisify(stream.pipeline);
22
23
 
24
+ // =============================================================================
25
+ // CORE HELPER FUNCTIONS
26
+ // =============================================================================
27
+
28
+ const MAX_CHARS_PER_CHUNK = 300; // Estimated safe limit for ~20-25 seconds of audio
29
+ const SILENCE_DURATION_MS = 200; // 200ms pause between concatenated audio chunks
30
+
31
+ interface AudioChunk {
32
+ audio: Float32Array | Buffer;
33
+ sampling_rate: number;
34
+ }
35
+
36
+ /**
37
+ * Splits text into chunks based on sentence endings and a maximum character limit.
38
+ * Tries to keep sentences together and avoids splitting words.
39
+ */
40
+ function splitTextIntoChunks(text: string): string[] {
41
+ const chunks: string[] = [];
42
+ const sentences = text.match(/[^.!?]+[.!?]*/g) || [text];
43
+ let currentChunk = '';
44
+
45
+ for (const sentence of sentences) {
46
+ if (currentChunk.length + sentence.length <= MAX_CHARS_PER_CHUNK) {
47
+ currentChunk += sentence;
48
+ } else {
49
+ if (currentChunk.length > 0) {
50
+ chunks.push(currentChunk.trim());
51
+ }
52
+ currentChunk = sentence;
53
+
54
+ // If a single sentence is still too long, split it further
55
+ while (currentChunk.length > MAX_CHARS_PER_CHUNK) {
56
+ let splitPoint = currentChunk.lastIndexOf(' ', MAX_CHARS_PER_CHUNK);
57
+ if (splitPoint === -1) {
58
+ splitPoint = MAX_CHARS_PER_CHUNK;
59
+ }
60
+ chunks.push(currentChunk.substring(0, splitPoint).trim());
61
+ currentChunk = currentChunk.substring(splitPoint).trim();
62
+ }
63
+ }
64
+ }
65
+
66
+ if (currentChunk.length > 0) {
67
+ chunks.push(currentChunk.trim());
68
+ }
69
+
70
+ return chunks.filter(chunk => chunk.length > 0);
71
+ }
72
+
73
+ /**
74
+ * Create a WAV file buffer from raw PCM audio data
75
+ */
76
+ function createWavBuffer(audioData: Float32Array | Buffer, sampleRate: number = 24000): Buffer {
77
+ const numChannels = 1; // Mono
78
+ const bitsPerSample = 16;
79
+ const bytesPerSample = bitsPerSample / 8;
80
+
81
+ // Convert Float32Array to Int16Array if needed
82
+ let int16Data: Int16Array;
83
+ if (audioData instanceof Float32Array) {
84
+ int16Data = new Int16Array(audioData.length);
85
+ for (let i = 0; i < audioData.length; i++) {
86
+ const s = Math.max(-1, Math.min(1, audioData[i]));
87
+ int16Data[i] = s < 0 ? s * 0x8000 : s * 0x7FFF;
88
+ }
89
+ } else {
90
+ // Already a buffer, assume it's raw PCM int16
91
+ int16Data = new Int16Array(audioData.buffer, audioData.byteOffset, audioData.byteLength / 2);
92
+ }
93
+
94
+ const dataSize = int16Data.length * bytesPerSample;
95
+ const buffer = Buffer.alloc(44 + dataSize);
96
+
97
+ // WAV header
98
+ buffer.write('RIFF', 0);
99
+ buffer.writeUInt32LE(36 + dataSize, 4);
100
+ buffer.write('WAVE', 8);
101
+ buffer.write('fmt ', 12);
102
+ buffer.writeUInt32LE(16, 16); // PCM format chunk size
103
+ buffer.writeUInt16LE(1, 20); // PCM format
104
+ buffer.writeUInt16LE(numChannels, 22);
105
+ buffer.writeUInt32LE(sampleRate, 24);
106
+ buffer.writeUInt32LE(sampleRate * numChannels * bytesPerSample, 28); // Byte rate
107
+ buffer.writeUInt16LE(numChannels * bytesPerSample, 32); // Block align
108
+ buffer.writeUInt16LE(bitsPerSample, 34);
109
+ buffer.write('data', 36);
110
+ buffer.writeUInt32LE(dataSize, 40);
111
+
112
+ // Write PCM data
113
+ for (let i = 0; i < int16Data.length; i++) {
114
+ buffer.writeInt16LE(int16Data[i], 44 + i * 2);
115
+ }
116
+
117
+ return buffer;
118
+ }
119
+
120
+ /**
121
+ * Concatenates multiple audio buffers and adds silence between them.
122
+ * Handles both Buffer and Float32Array inputs.
123
+ */
124
+ function concatenateAudioBuffers(
125
+ audioChunks: AudioChunk[],
126
+ silenceDurationMs: number,
127
+ sampleRate: number = 24000
128
+ ): Buffer {
129
+ if (audioChunks.length === 0) {
130
+ return createWavBuffer(new Float32Array(), sampleRate);
131
+ }
132
+
133
+ if (audioChunks.length === 1) {
134
+ return audioChunks[0].audio instanceof Buffer
135
+ ? audioChunks[0].audio
136
+ : createWavBuffer(audioChunks[0].audio, audioChunks[0].sampling_rate);
137
+ }
138
+
139
+ // Calculate total length including silence
140
+ const silenceSamples = Math.round((silenceDurationMs / 1000) * sampleRate);
141
+ let totalLength = 0;
142
+
143
+ // Convert all to Float32Array for easier concatenation
144
+ const float32Chunks: Float32Array[] = [];
145
+ for (const chunk of audioChunks) {
146
+ if (chunk.audio instanceof Float32Array) {
147
+ float32Chunks.push(chunk.audio);
148
+ totalLength += chunk.audio.length;
149
+ } else {
150
+ // Convert Buffer to Float32Array (assuming 16-bit PCM)
151
+ const int16 = new Int16Array(chunk.audio.buffer, chunk.audio.byteOffset, chunk.audio.byteLength / 2);
152
+ const float32 = new Float32Array(int16.length);
153
+ for (let i = 0; i < int16.length; i++) {
154
+ float32[i] = int16[i] / (int16[i] < 0 ? 0x8000 : 0x7FFF);
155
+ }
156
+ float32Chunks.push(float32);
157
+ totalLength += float32.length;
158
+ }
159
+ }
160
+
161
+ totalLength += (audioChunks.length - 1) * silenceSamples;
162
+
163
+ const combinedAudio = new Float32Array(totalLength);
164
+ let offset = 0;
165
+
166
+ for (let i = 0; i < float32Chunks.length; i++) {
167
+ const chunk = float32Chunks[i];
168
+ combinedAudio.set(chunk, offset);
169
+ offset += chunk.length;
170
+
171
+ // Add silence if not the last chunk
172
+ if (i < float32Chunks.length - 1) {
173
+ offset += silenceSamples;
174
+ }
175
+ }
176
+
177
+ return createWavBuffer(combinedAudio, sampleRate);
178
+ }
179
+
180
+ /**
181
+ * Generate SRT subtitles for concatenated audio chunks
182
+ */
183
+ function generateSRTFromChunks(textChunks: string[], audioDurations: number[]): string {
184
+ if (textChunks.length === 0) return '';
185
+
186
+ let srt = '';
187
+ let currentTime = 0;
188
+ let counter = 1;
189
+
190
+ const msToSrt = (ms: number) => {
191
+ const totalSec = Math.floor(ms / 1000);
192
+ const mili = Math.floor(ms % 1000);
193
+ const h = Math.floor(totalSec / 3600);
194
+ const m = Math.floor((totalSec % 3600) / 60);
195
+ const s = totalSec % 60;
196
+ return `${h.toString().padStart(2, '0')}:${m.toString().padStart(2, '0')}:${s.toString().padStart(2, '0')},${mili.toString().padStart(3, '0')}`;
197
+ };
198
+
199
+ for (let i = 0; i < textChunks.length; i++) {
200
+ const duration = audioDurations[i] || 0;
201
+ const startTime = currentTime;
202
+ const endTime = currentTime + duration;
203
+
204
+ srt += `${counter++}\n${msToSrt(startTime)} --> ${msToSrt(endTime)}\n${textChunks[i].trim()}\n\n`;
205
+ currentTime = endTime + SILENCE_DURATION_MS; // Add silence duration
206
+ }
207
+
208
+ return srt;
209
+ }
210
+
211
+
23
212
  // Piper Models List (Curated High Quality)
24
213
  // Note: Official Piper repo currently only has 'kareem' (Male) for Arabic.
25
214
  // For Female Arabic voices, please use the 'Edge TTS' engine (Salma, Zariyah).
@@ -61,6 +250,16 @@ const PIPER_MODELS = [
61
250
 
62
251
  // Edge TTS Constants
63
252
  const EDGE_URL = 'wss://speech.platform.bing.com/consumer/speech/synthesize/readaloud/edge/v1?TrustedClientToken=6A5AA1D4EAFF4E9FB37E23D68491D6F4';
253
+ const EDGE_HEADERS = {
254
+ 'Authority': 'speech.platform.bing.com',
255
+ 'Sec-CH-UA': '"Not_A Brand";v="8", "Chromium";v="120", "Microsoft Edge";v="120"',
256
+ 'Sec-CH-UA-Mobile': '?0',
257
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36 Edg/120.0.0.0',
258
+ 'Sec-CH-UA-Platform': '"Windows"',
259
+ 'Accept-Encoding': 'gzip, deflate, br',
260
+ 'Accept-Language': 'en-US,en;q=0.9',
261
+ 'Origin': 'chrome-extension://jdiccldimpdaibmpdkjnbmckianbfold'
262
+ };
64
263
  const EDGE_VOICES = [
65
264
  // Arabic
66
265
  { name: 'Arabic (Egypt) - Salma', value: 'ar-EG-SalmaNeural' },
@@ -127,6 +326,11 @@ export class TTSBigBoss implements INodeType {
127
326
  value: 'coqui',
128
327
  description: 'Connect to a running Coqui TTS/XTTS server.',
129
328
  },
329
+ {
330
+ name: 'Kokoro TTS (Local OpenAI API)',
331
+ value: 'kokoro',
332
+ description: 'Connect to a local Kokoro server compatible with OpenAI API (e.g. /v1/audio/speech).',
333
+ },
130
334
  {
131
335
  name: 'System Command (Custom)',
132
336
  value: 'system',
@@ -289,14 +493,52 @@ export class TTSBigBoss implements INodeType {
289
493
  description: 'Name from Hugging Face (e.g. en_US-bryce-medium) or full URL to .onnx file.',
290
494
  },
291
495
  // ----------------------------------
496
+ // Kokoro Settings
497
+ // ----------------------------------
498
+ {
499
+ displayName: 'API URL',
500
+ name: 'kokoroUrl',
501
+ type: 'string',
502
+ default: 'http://localhost:8880/v1/audio/speech',
503
+ description: 'Endpoint URL for Kokoro generation (OpenAI compatible).',
504
+ displayOptions: {
505
+ show: {
506
+ engine: ['kokoro'],
507
+ },
508
+ },
509
+ },
510
+ {
511
+ displayName: 'Voice / Model',
512
+ name: 'kokoroVoice',
513
+ type: 'string',
514
+ default: 'af_bella',
515
+ description: 'Voice ID (e.g. af_bella, af_sarah, am_adam). Arabic might require specific model ID.',
516
+ displayOptions: {
517
+ show: {
518
+ engine: ['kokoro'],
519
+ },
520
+ },
521
+ },
522
+ {
523
+ displayName: 'Speed',
524
+ name: 'kokoroSpeed',
525
+ type: 'number',
526
+ default: 1.0,
527
+ displayOptions: {
528
+ show: {
529
+ engine: ['kokoro'],
530
+ },
531
+ },
532
+ },
533
+ // ----------------------------------
292
534
  // Coqui Server Settings
293
535
  // ----------------------------------
294
536
  {
295
537
  displayName: 'Base Server URL',
296
538
  name: 'coquiUrl',
297
539
  type: 'string',
298
- default: 'http://host.docker.internal:5002',
299
- description: 'Base URL of Coqui server (e.g. http://172.17.0.1:5002 if in Docker). Do not include /api/tts.',
540
+ default: 'http://localhost:5002',
541
+ description: 'Base URL of Coqui server (e.g. http://localhost:5002 or http://host.docker.internal:5002).',
300
542
  displayOptions: {
301
543
  show: {
302
544
  engine: ['coqui'],
@@ -461,6 +703,28 @@ export class TTSBigBoss implements INodeType {
461
703
  srtBuffer = Buffer.from(result.srt, 'utf8');
462
704
  }
463
705
 
706
+ } else if (engine === 'kokoro') {
707
+ // ----------------------------------
708
+ // KOKORO EXECUTION
709
+ // ----------------------------------
710
+ const url = this.getNodeParameter('kokoroUrl', i) as string;
711
+ const voice = this.getNodeParameter('kokoroVoice', i) as string;
712
+ const speed = this.getNodeParameter('kokoroSpeed', i) as number;
713
+
714
+ // Standard OpenAI 'createSpeech' payload
715
+ const payload = {
716
+ model: 'kokoro', // or whatever the server expects
717
+ input: text,
718
+ voice: voice,
719
+ speed: speed,
720
+ response_format: 'mp3'
721
+ };
722
+
723
+ audioBuffer = await httpRequest(url, 'POST', payload);
724
+
725
+ const duration = getAudioDuration(audioBuffer, 'mp3');
726
+ srtBuffer = Buffer.from(generateHeuristicSRT(text, duration), 'utf8');
727
+
464
728
  } else if (engine === 'piper_local') {
465
729
  // ----------------------------------
466
730
  // PIPER LOCAL AUTOMATION
@@ -478,8 +742,6 @@ export class TTSBigBoss implements INodeType {
478
742
 
479
743
  // 3. Execute
480
744
  const outFile = path.join(tempDir, `piper_out_${uuidv4()}.wav`);
481
- // Piper command: echo "text" | piper --model model.onnx --output_file out.wav
482
- // We use child_process.spawn to pipe text safely
483
745
 
484
746
  await new Promise<void>((resolve, reject) => {
485
747
  const piperProc = child_process.spawn(piperBinPath, [
@@ -496,9 +758,8 @@ export class TTSBigBoss implements INodeType {
496
758
 
497
759
  piperProc.on('close', (code) => {
498
760
  if (code === 0) resolve();
499
- // Check for the specific JSON error in stderr
500
761
  if (errData.includes('json.exception.parse_error')) {
501
- reject(new Error(`Piper Config Error: The downloaded JSON configuration for model '${piperModel}' seems corrupted (HTML instead of JSON?). Try deleting the file at ${configPath} and running again.`));
762
+ reject(new Error(`Piper Config Error: The downloaded JSON configuration for model '${piperModel}' seems corrupted. Try deleting the file at ${configPath}.`));
502
763
  } else {
503
764
  reject(new Error(`Piper failed (exit ${code}): ${errData}`));
504
765
  }
@@ -510,7 +771,8 @@ export class TTSBigBoss implements INodeType {
510
771
  if (!fs.existsSync(outFile)) throw new Error('Piper did not produce output file');
511
772
 
512
773
  audioBuffer = fs.readFileSync(outFile);
513
- srtBuffer = Buffer.from(generateHeuristicSRT(text, audioBuffer.length), 'utf8');
774
+ const duration = getAudioDuration(audioBuffer, 'wav');
775
+ srtBuffer = Buffer.from(generateHeuristicSRT(text, duration), 'utf8');
514
776
 
515
777
  fs.unlinkSync(outFile);
516
778
 
@@ -526,7 +788,6 @@ export class TTSBigBoss implements INodeType {
526
788
  const wavPath = this.getNodeParameter('coquiWavPath', i, '') as string;
527
789
  const lang = this.getNodeParameter('coquiLang', i) as string;
528
790
 
529
- // Construct Payload
530
791
  const payload: any = {
531
792
  text: text,
532
793
  language_id: lang,
@@ -538,9 +799,9 @@ export class TTSBigBoss implements INodeType {
538
799
  payload.speaker_id = speakerSelection;
539
800
  }
540
801
 
541
- // Execute Request
542
802
  audioBuffer = await httpRequest(url, 'POST', payload);
543
- srtBuffer = Buffer.from(generateHeuristicSRT(text, audioBuffer.length), 'utf8');
803
+ const duration = getAudioDuration(audioBuffer, 'wav');
804
+ srtBuffer = Buffer.from(generateHeuristicSRT(text, duration), 'utf8');
544
805
 
545
806
  } else {
546
807
  // ----------------------------------
@@ -556,7 +817,6 @@ export class TTSBigBoss implements INodeType {
556
817
  .replace(/"{text}"/g, `"${text.replace(/"/g, '\\"')}"`) // Basic escape
557
818
  .replace(/{text}/g, `"${text.replace(/"/g, '\\"')}"`);
558
819
 
559
- // Handle Clone Input
560
820
  if (useClone) {
561
821
  const cloneProp = this.getNodeParameter('cloneInputProperty', i) as string;
562
822
  const cloneData = await this.helpers.getBinaryDataBuffer(i, cloneProp);
@@ -567,7 +827,6 @@ export class TTSBigBoss implements INodeType {
567
827
  .replace(/{reference_audio}/g, `"${cloneFile}"`);
568
828
  }
569
829
 
570
- // Execute
571
830
  await new Promise((resolve, reject) => {
572
831
  child_process.exec(cmd, (error, stdout, stderr) => {
573
832
  if (error) {
@@ -583,9 +842,8 @@ export class TTSBigBoss implements INodeType {
583
842
  }
584
843
 
585
844
  audioBuffer = fs.readFileSync(outFile);
586
-
587
- // Generate Heuristic SRT (Estimate timestamps)
588
- srtBuffer = Buffer.from(generateHeuristicSRT(text, audioBuffer.length), 'utf8');
845
+ const duration = getAudioDuration(audioBuffer);
846
+ srtBuffer = Buffer.from(generateHeuristicSRT(text, duration), 'utf8');
589
847
 
590
848
  // Cleanup
591
849
  if (fs.existsSync(outFile)) fs.unlinkSync(outFile);
@@ -784,41 +1042,43 @@ function ticksToTime(ticks: number): string {
784
1042
  return `${h}:${m}:${s},${mili}`;
785
1043
  }
786
1044
 
787
- // --------------------------------------------------------------------------
788
- // HEURISTIC SRT IMPLEMENTATION (For System Command)
789
- // --------------------------------------------------------------------------
790
- function generateHeuristicSRT(text: string, byteLength: number): string {
791
- // Estimate duration assuming typical MP3/WAV bitrate.
792
- // Actually, system command usually produces WAV (PCM).
793
- // Wrapper might produce MP3. Let's assume user command output.
794
- // It is safer to assume ~15 chars per second reading speed if we don't know duration.
795
- // Or assume 16000 bytes/sec for mono 16khz? Too unreliable.
796
- // Let's use text length heuristic: Avg reading speed 150 wpm ~ 2.5 words/sec ~ 15 chars/sec?
797
- // Let's try 15 chars / second.
798
-
799
- const totalDurationSec = text.length / 15;
800
- // Ideally we'd use 'ffprobe' to get exact duration, but let's stick to pure TS for now.
801
- // If we really wanted to be robust, we'd add 'ffprobe' execution here.
802
-
803
- const sentences = text.match(/[^.!?]+[.!?]*/g) || [text];
804
- let currentStartTime = 0;
805
- let srt = '';
806
- let counter = 1;
1045
+ // ----------------------------------
1046
+ // OLD HEURISTIC REMOVED
1047
+ // ----------------------------------
807
1048
 
808
- const msToSrt = (ms: number) => {
809
- const date = new Date(0, 0, 0, 0, 0, 0, ms);
810
- return `${date.getHours().toString().padStart(2, '0')}:${date.getMinutes().toString().padStart(2, '0')}:${date.getSeconds().toString().padStart(2, '0')},${date.getMilliseconds().toString().padStart(3, '0')}`;
811
- };
812
-
813
- for (const sentence of sentences) {
814
- const sentenceDuration = (sentence.length / text.length) * (totalDurationSec * 1000);
815
- const endTime = currentStartTime + sentenceDuration;
816
-
817
- srt += `${counter++}\n${msToSrt(currentStartTime)} --> ${msToSrt(endTime)}\n${sentence.trim()}\n\n`;
818
- currentStartTime = endTime;
819
- }
820
-
821
- return srt;
1049
+ async function downloadFile(url: string, dest: string): Promise<void> {
1050
+ return new Promise((resolve, reject) => {
1051
+ const file = fs.createWriteStream(dest);
1052
+ file.on('error', (err) => {
1053
+ fs.unlink(dest, () => { });
1054
+ reject(new Error(`File write error: ${err.message}`));
1055
+ });
1056
+ const request = https.get(url, (response) => {
1057
+ if (response.statusCode === 302 || response.statusCode === 301) {
1058
+ file.close();
1059
+ downloadFile(response.headers.location!, dest).then(resolve).catch(reject);
1060
+ return;
1061
+ }
1062
+ if (response.statusCode && response.statusCode !== 200) {
1063
+ file.close();
1064
+ fs.unlink(dest, () => { });
1065
+ reject(new Error(`Download failed with status code: ${response.statusCode} for URL: ${url}`));
1066
+ return;
1067
+ }
1068
+ response.pipe(file);
1069
+ file.on('finish', () => {
1070
+ file.close((err) => {
1071
+ if (err) reject(err);
1072
+ else resolve();
1073
+ });
1074
+ });
1075
+ });
1076
+ request.on('error', (err) => {
1077
+ file.close();
1078
+ fs.unlink(dest, () => { });
1079
+ reject(new Error(`Network error: ${err.message}`));
1080
+ });
1081
+ });
822
1082
  }
823
1083
 
824
1084
  // --------------------------------------------------------------------------
@@ -888,25 +1148,23 @@ async function ensurePiperModel(binDir: string, modelNameOrUrl: string): Promise
888
1148
  modelUrl = modelNameOrUrl;
889
1149
  modelFilename = path.basename(modelNameOrUrl);
890
1150
  } else {
891
- // Construct URL from name
1151
+ // Correct Parsing for 'lang_REGION-voice-quality'
1152
+ // e.g. en_US-lessac-medium -> [en_US, lessac, medium]
1153
+ // e.g. ar_JO-kareem-medium -> [ar_JO, kareem, medium]
1154
+
892
1155
  const parts = modelNameOrUrl.split('-');
893
1156
  if (parts.length >= 3) {
894
- const langRegion = parts[0] + '_' + parts[1]; // en_US
895
- const voice = parts[2];
896
- const quality = parts[3] || 'medium';
897
- const lang = parts[0]; // en
1157
+ const langRegion = parts[0]; // 'ar_JO' or 'en_US'
1158
+ const voice = parts[1]; // 'kareem'
1159
+ const quality = parts[2]; // 'medium'
898
1160
 
899
- // e.g. en_US-lessac-medium
900
- // lang=en, region=en_US, voice=lessac, quality=medium
901
- // url path: en/en_US/lessac/medium/en_US-lessac-medium.onnx
902
-
903
- // Handle special case: ar_JO (no lang folder? check repo)
904
- // Generally structure is: lang_short/lang_long/voice/quality/filename
1161
+ // Lang code is first part of langRegion (split by _)
1162
+ const lang = langRegion.split('_')[0]; // 'ar' form 'ar_JO'
905
1163
 
906
1164
  modelFilename = modelNameOrUrl + '.onnx';
907
- modelUrl = `https://huggingface.co/rhasspy/piper-voices/resolve/main/${lang}/${langRegion}/${voice}/${quality}/${modelFilename}?download=true`; // Add download=true to force direct link
1165
+ modelUrl = `https://huggingface.co/rhasspy/piper-voices/resolve/main/${lang}/${langRegion}/${voice}/${quality}/${modelFilename}?download=true`;
908
1166
  } else {
909
- throw new Error(`Invalid model name format: ${modelNameOrUrl}. Use format lang_REGION-voice-quality`);
1167
+ throw new Error(`Invalid model name format: ${modelNameOrUrl}.`);
910
1168
  }
911
1169
  }
912
1170
 
@@ -942,47 +1200,66 @@ async function ensurePiperModel(binDir: string, modelNameOrUrl: string): Promise
942
1200
  return { modelPath, configPath };
943
1201
  }
944
1202
 
945
- async function downloadFile(url: string, dest: string): Promise<void> {
946
- return new Promise((resolve, reject) => {
947
- const file = fs.createWriteStream(dest);
948
-
949
- // Handle file system errors (e.g. permissions)
950
- file.on('error', (err) => {
951
- fs.unlink(dest, () => { }); // Cleanup
952
- reject(new Error(`File write error: ${err.message}`));
953
- });
1203
+ // --------------------------------------------------------------------------
1204
+ // HELPER: Determine Audio Duration for SRT
1205
+ // --------------------------------------------------------------------------
1206
+ function getAudioDuration(buffer: Buffer, hint: 'mp3' | 'wav' | null = null): number {
1207
+ // 1. Try generic text length if buffer empty (fallback)
1208
+ if (!buffer || buffer.length === 0) return -1;
954
1209
 
955
- const request = https.get(url, (response) => {
956
- if (response.statusCode === 302 || response.statusCode === 301) {
957
- // Follow redirect
958
- file.close();
959
- downloadFile(response.headers.location!, dest).then(resolve).catch(reject);
960
- return;
1210
+ // 2. Try parsing WAV header
1211
+ // RIFF....WAVEfmt
1212
+ if ((hint === 'wav') || (buffer.length > 12 && buffer.toString('ascii', 0, 4) === 'RIFF' && buffer.toString('ascii', 8, 12) === 'WAVE')) {
1213
+ try {
1214
+ // standard header is 44 bytes.
1215
+ const byteRate = buffer.readUInt32LE(28);
1216
+ if (byteRate > 0) {
1217
+ const dataSize = buffer.length - 44;
1218
+ return dataSize / byteRate;
961
1219
  }
1220
+ } catch (e) { /* ignore */ }
1221
+ }
962
1222
 
963
- if (response.statusCode && response.statusCode !== 200) {
964
- file.close();
965
- fs.unlink(dest, () => { });
966
- reject(new Error(`Download failed with status code: ${response.statusCode} for URL: ${url}`));
967
- return;
968
- }
1223
+ // 3. Fallback: Char count estimation? No, we don't have text here.
1224
+ // Return -1 to signal "Use text length"
1225
+ return -1;
1226
+ }
969
1227
 
970
- response.pipe(file);
1228
+ // --------------------------------------------------------------------------
1229
+ // HEURISTIC SRT IMPLEMENTATION
1230
+ // --------------------------------------------------------------------------
1231
+ function generateHeuristicSRT(text: string, durationSeconds: number): string {
1232
+ // If duration unknown (-1), estimate from text length (14 chars/sec)
1233
+ if (durationSeconds <= 0) {
1234
+ durationSeconds = text.length / 14;
1235
+ }
971
1236
 
972
- file.on('finish', () => {
973
- file.close((err) => {
974
- if (err) reject(err);
975
- else resolve();
976
- });
977
- });
978
- });
1237
+ const sentences = text.match(/[^.!?]+[.!?]*/g) || [text];
1238
+ const totalContentLen = text.length;
979
1239
 
980
- request.on('error', (err) => {
981
- file.close();
982
- fs.unlink(dest, () => { });
983
- reject(new Error(`Network error: ${err.message}`));
984
- });
985
- });
1240
+ let currentStartTime = 0;
1241
+ let srt = '';
1242
+ let counter = 1;
1243
+
1244
+ const msToSrt = (ms: number) => {
1245
+ const totalSec = Math.floor(ms / 1000);
1246
+ const mili = Math.floor(ms % 1000);
1247
+ const h = Math.floor(totalSec / 3600);
1248
+ const m = Math.floor((totalSec % 3600) / 60);
1249
+ const s = totalSec % 60;
1250
+ return `${h.toString().padStart(2, '0')}:${m.toString().padStart(2, '0')}:${s.toString().padStart(2, '0')},${mili.toString().padStart(3, '0')}`;
1251
+ };
1252
+
1253
+ for (const sentence of sentences) {
1254
+ // Proportion of time = Proportion of length
1255
+ const sentenceRatio = sentence.length / totalContentLen;
1256
+ const sentenceDuration = sentenceRatio * durationSeconds;
1257
+ const endTime = currentStartTime + sentenceDuration;
1258
+
1259
+ srt += `${counter++}\n${msToSrt(currentStartTime * 1000)} --> ${msToSrt(endTime * 1000)}\n${sentence.trim()}\n\n`;
1260
+ currentStartTime = endTime;
1261
+ }
1262
+ return srt;
986
1263
  }
987
1264
 
988
1265
  async function httpRequest(url: string, method: string = 'GET', body: any = null): Promise<Buffer> {
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "n8n-nodes-tts-bigboss",
3
- "version": "1.0.7",
4
- "description": "BigBoss TTS node with multi-engine support and automatic SRT generation",
3
+ "version": "2.0.0",
4
+ "description": "Professional TTS node with multi-engine support, text chunking, and accurate SRT generation",
5
5
  "keywords": [
6
6
  "n8n-community-node-package",
7
7
  "n8n",
@@ -10,7 +10,8 @@
10
10
  "srt",
11
11
  "arabic",
12
12
  "edge-tts",
13
- "piper"
13
+ "piper",
14
+ "kokoro"
14
15
  ],
15
16
  "license": "MIT",
16
17
  "author": "isemo007",
@@ -35,7 +36,8 @@
35
36
  "n8n-core": "^1.75.0",
36
37
  "n8n-workflow": "^1.70.0",
37
38
  "uuid": "^9.0.0",
38
- "ws": "^8.13.0"
39
+ "ws": "^8.13.0",
40
+ "kokoro-js": "^1.2.1"
39
41
  },
40
42
  "devDependencies": {
41
43
  "@types/lodash": "^4.14.195",
@@ -50,5 +52,8 @@
50
52
  "typescript": "^5.0.0",
51
53
  "webpack": "^5.88.0",
52
54
  "webpack-cli": "^5.1.4"
55
+ },
56
+ "engines": {
57
+ "node": ">=18.0.0"
53
58
  }
54
59
  }