n8n-nodes-tts-bigboss 1.0.8 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -48,6 +48,134 @@ const http = __importStar(require("http"));
48
48
  const stream = __importStar(require("stream"));
49
49
  const util_1 = require("util");
50
50
  const pipeline = (0, util_1.promisify)(stream.pipeline);
51
+ const MAX_CHARS_PER_CHUNK = 300;
52
+ const SILENCE_DURATION_MS = 200;
53
+ function splitTextIntoChunks(text) {
54
+ const chunks = [];
55
+ const sentences = text.match(/[^.!?]+[.!?]*/g) || [text];
56
+ let currentChunk = '';
57
+ for (const sentence of sentences) {
58
+ if (currentChunk.length + sentence.length <= MAX_CHARS_PER_CHUNK) {
59
+ currentChunk += sentence;
60
+ }
61
+ else {
62
+ if (currentChunk.length > 0) {
63
+ chunks.push(currentChunk.trim());
64
+ }
65
+ currentChunk = sentence;
66
+ while (currentChunk.length > MAX_CHARS_PER_CHUNK) {
67
+ let splitPoint = currentChunk.lastIndexOf(' ', MAX_CHARS_PER_CHUNK);
68
+ if (splitPoint === -1) {
69
+ splitPoint = MAX_CHARS_PER_CHUNK;
70
+ }
71
+ chunks.push(currentChunk.substring(0, splitPoint).trim());
72
+ currentChunk = currentChunk.substring(splitPoint).trim();
73
+ }
74
+ }
75
+ }
76
+ if (currentChunk.length > 0) {
77
+ chunks.push(currentChunk.trim());
78
+ }
79
+ return chunks.filter(chunk => chunk.length > 0);
80
+ }
81
+ function createWavBuffer(audioData, sampleRate = 24000) {
82
+ const numChannels = 1;
83
+ const bitsPerSample = 16;
84
+ const bytesPerSample = bitsPerSample / 8;
85
+ let int16Data;
86
+ if (audioData instanceof Float32Array) {
87
+ int16Data = new Int16Array(audioData.length);
88
+ for (let i = 0; i < audioData.length; i++) {
89
+ const s = Math.max(-1, Math.min(1, audioData[i]));
90
+ int16Data[i] = s < 0 ? s * 0x8000 : s * 0x7FFF;
91
+ }
92
+ }
93
+ else {
94
+ int16Data = new Int16Array(audioData.buffer, audioData.byteOffset, audioData.byteLength / 2);
95
+ }
96
+ const dataSize = int16Data.length * bytesPerSample;
97
+ const buffer = Buffer.alloc(44 + dataSize);
98
+ buffer.write('RIFF', 0);
99
+ buffer.writeUInt32LE(36 + dataSize, 4);
100
+ buffer.write('WAVE', 8);
101
+ buffer.write('fmt ', 12);
102
+ buffer.writeUInt32LE(16, 16);
103
+ buffer.writeUInt16LE(1, 20);
104
+ buffer.writeUInt16LE(numChannels, 22);
105
+ buffer.writeUInt32LE(sampleRate, 24);
106
+ buffer.writeUInt32LE(sampleRate * numChannels * bytesPerSample, 28);
107
+ buffer.writeUInt16LE(numChannels * bytesPerSample, 32);
108
+ buffer.writeUInt16LE(bitsPerSample, 34);
109
+ buffer.write('data', 36);
110
+ buffer.writeUInt32LE(dataSize, 40);
111
+ for (let i = 0; i < int16Data.length; i++) {
112
+ buffer.writeInt16LE(int16Data[i], 44 + i * 2);
113
+ }
114
+ return buffer;
115
+ }
116
+ function concatenateAudioBuffers(audioChunks, silenceDurationMs, sampleRate = 24000) {
117
+ if (audioChunks.length === 0) {
118
+ return createWavBuffer(new Float32Array(), sampleRate);
119
+ }
120
+ if (audioChunks.length === 1) {
121
+ return audioChunks[0].audio instanceof Buffer
122
+ ? audioChunks[0].audio
123
+ : createWavBuffer(audioChunks[0].audio, audioChunks[0].sampling_rate);
124
+ }
125
+ const silenceSamples = Math.round((silenceDurationMs / 1000) * sampleRate);
126
+ let totalLength = 0;
127
+ const float32Chunks = [];
128
+ for (const chunk of audioChunks) {
129
+ if (chunk.audio instanceof Float32Array) {
130
+ float32Chunks.push(chunk.audio);
131
+ totalLength += chunk.audio.length;
132
+ }
133
+ else {
134
+ const int16 = new Int16Array(chunk.audio.buffer, chunk.audio.byteOffset, chunk.audio.byteLength / 2);
135
+ const float32 = new Float32Array(int16.length);
136
+ for (let i = 0; i < int16.length; i++) {
137
+ float32[i] = int16[i] / (int16[i] < 0 ? 0x8000 : 0x7FFF);
138
+ }
139
+ float32Chunks.push(float32);
140
+ totalLength += float32.length;
141
+ }
142
+ }
143
+ totalLength += (audioChunks.length - 1) * silenceSamples;
144
+ const combinedAudio = new Float32Array(totalLength);
145
+ let offset = 0;
146
+ for (let i = 0; i < float32Chunks.length; i++) {
147
+ const chunk = float32Chunks[i];
148
+ combinedAudio.set(chunk, offset);
149
+ offset += chunk.length;
150
+ if (i < float32Chunks.length - 1) {
151
+ offset += silenceSamples;
152
+ }
153
+ }
154
+ return createWavBuffer(combinedAudio, sampleRate);
155
+ }
156
+ function generateSRTFromChunks(textChunks, audioDurations) {
157
+ if (textChunks.length === 0)
158
+ return '';
159
+ let srt = '';
160
+ let currentTime = 0;
161
+ let counter = 1;
162
+ const msToSrt = (ms) => {
163
+ const totalSec = Math.floor(ms / 1000);
164
+ const mili = Math.floor(ms % 1000);
165
+ const h = Math.floor(totalSec / 3600);
166
+ const m = Math.floor((totalSec % 3600) / 60);
167
+ const s = totalSec % 60;
168
+ return `${h.toString().padStart(2, '0')}:${m.toString().padStart(2, '0')}:${s.toString().padStart(2, '0')},${mili.toString().padStart(3, '0')}`;
169
+ };
170
+ for (let i = 0; i < textChunks.length; i++) {
171
+ const duration = audioDurations[i] || 0;
172
+ const startTime = currentTime;
173
+ const endTime = currentTime + duration;
174
+ srt += `${counter++}\n${msToSrt(startTime)} --> ${msToSrt(endTime)}\n${textChunks[i].trim()}\n\n`;
175
+ currentTime = endTime + SILENCE_DURATION_MS;
176
+ }
177
+ return srt;
178
+ }
51
179
  const PIPER_MODELS = [
52
180
  { name: 'Arabic (Jordan) - Kareem (Male) - Low', value: 'ar_JO-kareem-low' },
53
181
  { name: 'Arabic (Jordan) - Kareem (Male) - Medium', value: 'ar_JO-kareem-medium' },
@@ -5,6 +5,7 @@ import {
5
5
  INodeTypeDescription,
6
6
  ILoadOptionsFunctions,
7
7
  INodePropertyOptions,
8
+ NodeOperationError,
8
9
  } from 'n8n-workflow';
9
10
  import { v4 as uuidv4 } from 'uuid';
10
11
  import * as fs from 'fs';
@@ -20,6 +21,194 @@ import * as zlib from 'zlib'; // For extracting .tar.gz if needed, typically usa
20
21
 
21
22
  const pipeline = promisify(stream.pipeline);
22
23
 
24
+ // =============================================================================
25
+ // CORE HELPER FUNCTIONS
26
+ // =============================================================================
27
+
28
+ const MAX_CHARS_PER_CHUNK = 300; // Estimated safe limit for ~20-25 seconds of audio
29
+ const SILENCE_DURATION_MS = 200; // 200ms pause between concatenated audio chunks
30
+
31
+ interface AudioChunk {
32
+ audio: Float32Array | Buffer;
33
+ sampling_rate: number;
34
+ }
35
+
36
+ /**
37
+ * Splits text into chunks based on sentence endings and a maximum character limit.
38
+ * Tries to keep sentences together and avoids splitting words.
39
+ */
40
+ function splitTextIntoChunks(text: string): string[] {
41
+ const chunks: string[] = [];
42
+ const sentences = text.match(/[^.!?]+[.!?]*/g) || [text];
43
+ let currentChunk = '';
44
+
45
+ for (const sentence of sentences) {
46
+ if (currentChunk.length + sentence.length <= MAX_CHARS_PER_CHUNK) {
47
+ currentChunk += sentence;
48
+ } else {
49
+ if (currentChunk.length > 0) {
50
+ chunks.push(currentChunk.trim());
51
+ }
52
+ currentChunk = sentence;
53
+
54
+ // If a single sentence is still too long, split it further
55
+ while (currentChunk.length > MAX_CHARS_PER_CHUNK) {
56
+ let splitPoint = currentChunk.lastIndexOf(' ', MAX_CHARS_PER_CHUNK);
57
+ if (splitPoint === -1) {
58
+ splitPoint = MAX_CHARS_PER_CHUNK;
59
+ }
60
+ chunks.push(currentChunk.substring(0, splitPoint).trim());
61
+ currentChunk = currentChunk.substring(splitPoint).trim();
62
+ }
63
+ }
64
+ }
65
+
66
+ if (currentChunk.length > 0) {
67
+ chunks.push(currentChunk.trim());
68
+ }
69
+
70
+ return chunks.filter(chunk => chunk.length > 0);
71
+ }
72
+
73
+ /**
74
+ * Create a WAV file buffer from raw PCM audio data
75
+ */
76
+ function createWavBuffer(audioData: Float32Array | Buffer, sampleRate: number = 24000): Buffer {
77
+ const numChannels = 1; // Mono
78
+ const bitsPerSample = 16;
79
+ const bytesPerSample = bitsPerSample / 8;
80
+
81
+ // Convert Float32Array to Int16Array if needed
82
+ let int16Data: Int16Array;
83
+ if (audioData instanceof Float32Array) {
84
+ int16Data = new Int16Array(audioData.length);
85
+ for (let i = 0; i < audioData.length; i++) {
86
+ const s = Math.max(-1, Math.min(1, audioData[i]));
87
+ int16Data[i] = s < 0 ? s * 0x8000 : s * 0x7FFF;
88
+ }
89
+ } else {
90
+ // Already a buffer, assume it's raw PCM int16
91
+ int16Data = new Int16Array(audioData.buffer, audioData.byteOffset, audioData.byteLength / 2);
92
+ }
93
+
94
+ const dataSize = int16Data.length * bytesPerSample;
95
+ const buffer = Buffer.alloc(44 + dataSize);
96
+
97
+ // WAV header
98
+ buffer.write('RIFF', 0);
99
+ buffer.writeUInt32LE(36 + dataSize, 4);
100
+ buffer.write('WAVE', 8);
101
+ buffer.write('fmt ', 12);
102
+ buffer.writeUInt32LE(16, 16); // PCM format chunk size
103
+ buffer.writeUInt16LE(1, 20); // PCM format
104
+ buffer.writeUInt16LE(numChannels, 22);
105
+ buffer.writeUInt32LE(sampleRate, 24);
106
+ buffer.writeUInt32LE(sampleRate * numChannels * bytesPerSample, 28); // Byte rate
107
+ buffer.writeUInt16LE(numChannels * bytesPerSample, 32); // Block align
108
+ buffer.writeUInt16LE(bitsPerSample, 34);
109
+ buffer.write('data', 36);
110
+ buffer.writeUInt32LE(dataSize, 40);
111
+
112
+ // Write PCM data
113
+ for (let i = 0; i < int16Data.length; i++) {
114
+ buffer.writeInt16LE(int16Data[i], 44 + i * 2);
115
+ }
116
+
117
+ return buffer;
118
+ }
119
+
120
+ /**
121
+ * Concatenates multiple audio buffers and adds silence between them.
122
+ * Handles both Buffer and Float32Array inputs.
123
+ */
124
+ function concatenateAudioBuffers(
125
+ audioChunks: AudioChunk[],
126
+ silenceDurationMs: number,
127
+ sampleRate: number = 24000
128
+ ): Buffer {
129
+ if (audioChunks.length === 0) {
130
+ return createWavBuffer(new Float32Array(), sampleRate);
131
+ }
132
+
133
+ if (audioChunks.length === 1) {
134
+ return audioChunks[0].audio instanceof Buffer
135
+ ? audioChunks[0].audio
136
+ : createWavBuffer(audioChunks[0].audio, audioChunks[0].sampling_rate);
137
+ }
138
+
139
+ // Calculate total length including silence
140
+ const silenceSamples = Math.round((silenceDurationMs / 1000) * sampleRate);
141
+ let totalLength = 0;
142
+
143
+ // Convert all to Float32Array for easier concatenation
144
+ const float32Chunks: Float32Array[] = [];
145
+ for (const chunk of audioChunks) {
146
+ if (chunk.audio instanceof Float32Array) {
147
+ float32Chunks.push(chunk.audio);
148
+ totalLength += chunk.audio.length;
149
+ } else {
150
+ // Convert Buffer to Float32Array (assuming 16-bit PCM)
151
+ const int16 = new Int16Array(chunk.audio.buffer, chunk.audio.byteOffset, chunk.audio.byteLength / 2);
152
+ const float32 = new Float32Array(int16.length);
153
+ for (let i = 0; i < int16.length; i++) {
154
+ float32[i] = int16[i] / (int16[i] < 0 ? 0x8000 : 0x7FFF);
155
+ }
156
+ float32Chunks.push(float32);
157
+ totalLength += float32.length;
158
+ }
159
+ }
160
+
161
+ totalLength += (audioChunks.length - 1) * silenceSamples;
162
+
163
+ const combinedAudio = new Float32Array(totalLength);
164
+ let offset = 0;
165
+
166
+ for (let i = 0; i < float32Chunks.length; i++) {
167
+ const chunk = float32Chunks[i];
168
+ combinedAudio.set(chunk, offset);
169
+ offset += chunk.length;
170
+
171
+ // Add silence if not the last chunk
172
+ if (i < float32Chunks.length - 1) {
173
+ offset += silenceSamples;
174
+ }
175
+ }
176
+
177
+ return createWavBuffer(combinedAudio, sampleRate);
178
+ }
179
+
180
+ /**
181
+ * Generate SRT subtitles for concatenated audio chunks
182
+ */
183
+ function generateSRTFromChunks(textChunks: string[], audioDurations: number[]): string {
184
+ if (textChunks.length === 0) return '';
185
+
186
+ let srt = '';
187
+ let currentTime = 0;
188
+ let counter = 1;
189
+
190
+ const msToSrt = (ms: number) => {
191
+ const totalSec = Math.floor(ms / 1000);
192
+ const mili = Math.floor(ms % 1000);
193
+ const h = Math.floor(totalSec / 3600);
194
+ const m = Math.floor((totalSec % 3600) / 60);
195
+ const s = totalSec % 60;
196
+ return `${h.toString().padStart(2, '0')}:${m.toString().padStart(2, '0')}:${s.toString().padStart(2, '0')},${mili.toString().padStart(3, '0')}`;
197
+ };
198
+
199
+ for (let i = 0; i < textChunks.length; i++) {
200
+ const duration = audioDurations[i] || 0;
201
+ const startTime = currentTime;
202
+ const endTime = currentTime + duration;
203
+
204
+ srt += `${counter++}\n${msToSrt(startTime)} --> ${msToSrt(endTime)}\n${textChunks[i].trim()}\n\n`;
205
+ currentTime = endTime + SILENCE_DURATION_MS; // Add silence duration
206
+ }
207
+
208
+ return srt;
209
+ }
210
+
211
+
23
212
  // Piper Models List (Curated High Quality)
24
213
  // Note: Official Piper repo currently only has 'kareem' (Male) for Arabic.
25
214
  // For Female Arabic voices, please use the 'Edge TTS' engine (Salma, Zariyah).
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "n8n-nodes-tts-bigboss",
3
- "version": "1.0.8",
4
- "description": "BigBoss TTS node with multi-engine support and automatic SRT generation",
3
+ "version": "2.0.0",
4
+ "description": "Professional TTS node with multi-engine support, text chunking, and accurate SRT generation",
5
5
  "keywords": [
6
6
  "n8n-community-node-package",
7
7
  "n8n",
@@ -10,7 +10,8 @@
10
10
  "srt",
11
11
  "arabic",
12
12
  "edge-tts",
13
- "piper"
13
+ "piper",
14
+ "kokoro"
14
15
  ],
15
16
  "license": "MIT",
16
17
  "author": "isemo007",
@@ -35,7 +36,8 @@
35
36
  "n8n-core": "^1.75.0",
36
37
  "n8n-workflow": "^1.70.0",
37
38
  "uuid": "^9.0.0",
38
- "ws": "^8.13.0"
39
+ "ws": "^8.13.0",
40
+ "kokoro-js": "^1.2.1"
39
41
  },
40
42
  "devDependencies": {
41
43
  "@types/lodash": "^4.14.195",
@@ -50,5 +52,8 @@
50
52
  "typescript": "^5.0.0",
51
53
  "webpack": "^5.88.0",
52
54
  "webpack-cli": "^5.1.4"
55
+ },
56
+ "engines": {
57
+ "node": ">=18.0.0"
53
58
  }
54
59
  }