n8n-nodes-tts-bigboss 1.0.8 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/TTSBigBoss.node.js +128 -0
- package/nodes/TTSBigBoss/TTSBigBoss.node.ts +189 -0
- package/package.json +9 -4
package/dist/TTSBigBoss.node.js
CHANGED
|
@@ -48,6 +48,134 @@ const http = __importStar(require("http"));
|
|
|
48
48
|
const stream = __importStar(require("stream"));
|
|
49
49
|
const util_1 = require("util");
|
|
50
50
|
const pipeline = (0, util_1.promisify)(stream.pipeline);
|
|
51
|
+
const MAX_CHARS_PER_CHUNK = 300;
|
|
52
|
+
const SILENCE_DURATION_MS = 200;
|
|
53
|
+
function splitTextIntoChunks(text) {
|
|
54
|
+
const chunks = [];
|
|
55
|
+
const sentences = text.match(/[^.!?]+[.!?]*/g) || [text];
|
|
56
|
+
let currentChunk = '';
|
|
57
|
+
for (const sentence of sentences) {
|
|
58
|
+
if (currentChunk.length + sentence.length <= MAX_CHARS_PER_CHUNK) {
|
|
59
|
+
currentChunk += sentence;
|
|
60
|
+
}
|
|
61
|
+
else {
|
|
62
|
+
if (currentChunk.length > 0) {
|
|
63
|
+
chunks.push(currentChunk.trim());
|
|
64
|
+
}
|
|
65
|
+
currentChunk = sentence;
|
|
66
|
+
while (currentChunk.length > MAX_CHARS_PER_CHUNK) {
|
|
67
|
+
let splitPoint = currentChunk.lastIndexOf(' ', MAX_CHARS_PER_CHUNK);
|
|
68
|
+
if (splitPoint === -1) {
|
|
69
|
+
splitPoint = MAX_CHARS_PER_CHUNK;
|
|
70
|
+
}
|
|
71
|
+
chunks.push(currentChunk.substring(0, splitPoint).trim());
|
|
72
|
+
currentChunk = currentChunk.substring(splitPoint).trim();
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
}
|
|
76
|
+
if (currentChunk.length > 0) {
|
|
77
|
+
chunks.push(currentChunk.trim());
|
|
78
|
+
}
|
|
79
|
+
return chunks.filter(chunk => chunk.length > 0);
|
|
80
|
+
}
|
|
81
|
+
function createWavBuffer(audioData, sampleRate = 24000) {
|
|
82
|
+
const numChannels = 1;
|
|
83
|
+
const bitsPerSample = 16;
|
|
84
|
+
const bytesPerSample = bitsPerSample / 8;
|
|
85
|
+
let int16Data;
|
|
86
|
+
if (audioData instanceof Float32Array) {
|
|
87
|
+
int16Data = new Int16Array(audioData.length);
|
|
88
|
+
for (let i = 0; i < audioData.length; i++) {
|
|
89
|
+
const s = Math.max(-1, Math.min(1, audioData[i]));
|
|
90
|
+
int16Data[i] = s < 0 ? s * 0x8000 : s * 0x7FFF;
|
|
91
|
+
}
|
|
92
|
+
}
|
|
93
|
+
else {
|
|
94
|
+
int16Data = new Int16Array(audioData.buffer, audioData.byteOffset, audioData.byteLength / 2);
|
|
95
|
+
}
|
|
96
|
+
const dataSize = int16Data.length * bytesPerSample;
|
|
97
|
+
const buffer = Buffer.alloc(44 + dataSize);
|
|
98
|
+
buffer.write('RIFF', 0);
|
|
99
|
+
buffer.writeUInt32LE(36 + dataSize, 4);
|
|
100
|
+
buffer.write('WAVE', 8);
|
|
101
|
+
buffer.write('fmt ', 12);
|
|
102
|
+
buffer.writeUInt32LE(16, 16);
|
|
103
|
+
buffer.writeUInt16LE(1, 20);
|
|
104
|
+
buffer.writeUInt16LE(numChannels, 22);
|
|
105
|
+
buffer.writeUInt32LE(sampleRate, 24);
|
|
106
|
+
buffer.writeUInt32LE(sampleRate * numChannels * bytesPerSample, 28);
|
|
107
|
+
buffer.writeUInt16LE(numChannels * bytesPerSample, 32);
|
|
108
|
+
buffer.writeUInt16LE(bitsPerSample, 34);
|
|
109
|
+
buffer.write('data', 36);
|
|
110
|
+
buffer.writeUInt32LE(dataSize, 40);
|
|
111
|
+
for (let i = 0; i < int16Data.length; i++) {
|
|
112
|
+
buffer.writeInt16LE(int16Data[i], 44 + i * 2);
|
|
113
|
+
}
|
|
114
|
+
return buffer;
|
|
115
|
+
}
|
|
116
|
+
function concatenateAudioBuffers(audioChunks, silenceDurationMs, sampleRate = 24000) {
|
|
117
|
+
if (audioChunks.length === 0) {
|
|
118
|
+
return createWavBuffer(new Float32Array(), sampleRate);
|
|
119
|
+
}
|
|
120
|
+
if (audioChunks.length === 1) {
|
|
121
|
+
return audioChunks[0].audio instanceof Buffer
|
|
122
|
+
? audioChunks[0].audio
|
|
123
|
+
: createWavBuffer(audioChunks[0].audio, audioChunks[0].sampling_rate);
|
|
124
|
+
}
|
|
125
|
+
const silenceSamples = Math.round((silenceDurationMs / 1000) * sampleRate);
|
|
126
|
+
let totalLength = 0;
|
|
127
|
+
const float32Chunks = [];
|
|
128
|
+
for (const chunk of audioChunks) {
|
|
129
|
+
if (chunk.audio instanceof Float32Array) {
|
|
130
|
+
float32Chunks.push(chunk.audio);
|
|
131
|
+
totalLength += chunk.audio.length;
|
|
132
|
+
}
|
|
133
|
+
else {
|
|
134
|
+
const int16 = new Int16Array(chunk.audio.buffer, chunk.audio.byteOffset, chunk.audio.byteLength / 2);
|
|
135
|
+
const float32 = new Float32Array(int16.length);
|
|
136
|
+
for (let i = 0; i < int16.length; i++) {
|
|
137
|
+
float32[i] = int16[i] / (int16[i] < 0 ? 0x8000 : 0x7FFF);
|
|
138
|
+
}
|
|
139
|
+
float32Chunks.push(float32);
|
|
140
|
+
totalLength += float32.length;
|
|
141
|
+
}
|
|
142
|
+
}
|
|
143
|
+
totalLength += (audioChunks.length - 1) * silenceSamples;
|
|
144
|
+
const combinedAudio = new Float32Array(totalLength);
|
|
145
|
+
let offset = 0;
|
|
146
|
+
for (let i = 0; i < float32Chunks.length; i++) {
|
|
147
|
+
const chunk = float32Chunks[i];
|
|
148
|
+
combinedAudio.set(chunk, offset);
|
|
149
|
+
offset += chunk.length;
|
|
150
|
+
if (i < float32Chunks.length - 1) {
|
|
151
|
+
offset += silenceSamples;
|
|
152
|
+
}
|
|
153
|
+
}
|
|
154
|
+
return createWavBuffer(combinedAudio, sampleRate);
|
|
155
|
+
}
|
|
156
|
+
function generateSRTFromChunks(textChunks, audioDurations) {
|
|
157
|
+
if (textChunks.length === 0)
|
|
158
|
+
return '';
|
|
159
|
+
let srt = '';
|
|
160
|
+
let currentTime = 0;
|
|
161
|
+
let counter = 1;
|
|
162
|
+
const msToSrt = (ms) => {
|
|
163
|
+
const totalSec = Math.floor(ms / 1000);
|
|
164
|
+
const mili = Math.floor(ms % 1000);
|
|
165
|
+
const h = Math.floor(totalSec / 3600);
|
|
166
|
+
const m = Math.floor((totalSec % 3600) / 60);
|
|
167
|
+
const s = totalSec % 60;
|
|
168
|
+
return `${h.toString().padStart(2, '0')}:${m.toString().padStart(2, '0')}:${s.toString().padStart(2, '0')},${mili.toString().padStart(3, '0')}`;
|
|
169
|
+
};
|
|
170
|
+
for (let i = 0; i < textChunks.length; i++) {
|
|
171
|
+
const duration = audioDurations[i] || 0;
|
|
172
|
+
const startTime = currentTime;
|
|
173
|
+
const endTime = currentTime + duration;
|
|
174
|
+
srt += `${counter++}\n${msToSrt(startTime)} --> ${msToSrt(endTime)}\n${textChunks[i].trim()}\n\n`;
|
|
175
|
+
currentTime = endTime + SILENCE_DURATION_MS;
|
|
176
|
+
}
|
|
177
|
+
return srt;
|
|
178
|
+
}
|
|
51
179
|
const PIPER_MODELS = [
|
|
52
180
|
{ name: 'Arabic (Jordan) - Kareem (Male) - Low', value: 'ar_JO-kareem-low' },
|
|
53
181
|
{ name: 'Arabic (Jordan) - Kareem (Male) - Medium', value: 'ar_JO-kareem-medium' },
|
|
@@ -5,6 +5,7 @@ import {
|
|
|
5
5
|
INodeTypeDescription,
|
|
6
6
|
ILoadOptionsFunctions,
|
|
7
7
|
INodePropertyOptions,
|
|
8
|
+
NodeOperationError,
|
|
8
9
|
} from 'n8n-workflow';
|
|
9
10
|
import { v4 as uuidv4 } from 'uuid';
|
|
10
11
|
import * as fs from 'fs';
|
|
@@ -20,6 +21,194 @@ import * as zlib from 'zlib'; // For extracting .tar.gz if needed, typically usa
|
|
|
20
21
|
|
|
21
22
|
const pipeline = promisify(stream.pipeline);
|
|
22
23
|
|
|
24
|
+
// =============================================================================
|
|
25
|
+
// CORE HELPER FUNCTIONS
|
|
26
|
+
// =============================================================================
|
|
27
|
+
|
|
28
|
+
const MAX_CHARS_PER_CHUNK = 300; // Estimated safe limit for ~20-25 seconds of audio
|
|
29
|
+
const SILENCE_DURATION_MS = 200; // 200ms pause between concatenated audio chunks
|
|
30
|
+
|
|
31
|
+
interface AudioChunk {
|
|
32
|
+
audio: Float32Array | Buffer;
|
|
33
|
+
sampling_rate: number;
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
/**
|
|
37
|
+
* Splits text into chunks based on sentence endings and a maximum character limit.
|
|
38
|
+
* Tries to keep sentences together and avoids splitting words.
|
|
39
|
+
*/
|
|
40
|
+
function splitTextIntoChunks(text: string): string[] {
|
|
41
|
+
const chunks: string[] = [];
|
|
42
|
+
const sentences = text.match(/[^.!?]+[.!?]*/g) || [text];
|
|
43
|
+
let currentChunk = '';
|
|
44
|
+
|
|
45
|
+
for (const sentence of sentences) {
|
|
46
|
+
if (currentChunk.length + sentence.length <= MAX_CHARS_PER_CHUNK) {
|
|
47
|
+
currentChunk += sentence;
|
|
48
|
+
} else {
|
|
49
|
+
if (currentChunk.length > 0) {
|
|
50
|
+
chunks.push(currentChunk.trim());
|
|
51
|
+
}
|
|
52
|
+
currentChunk = sentence;
|
|
53
|
+
|
|
54
|
+
// If a single sentence is still too long, split it further
|
|
55
|
+
while (currentChunk.length > MAX_CHARS_PER_CHUNK) {
|
|
56
|
+
let splitPoint = currentChunk.lastIndexOf(' ', MAX_CHARS_PER_CHUNK);
|
|
57
|
+
if (splitPoint === -1) {
|
|
58
|
+
splitPoint = MAX_CHARS_PER_CHUNK;
|
|
59
|
+
}
|
|
60
|
+
chunks.push(currentChunk.substring(0, splitPoint).trim());
|
|
61
|
+
currentChunk = currentChunk.substring(splitPoint).trim();
|
|
62
|
+
}
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
if (currentChunk.length > 0) {
|
|
67
|
+
chunks.push(currentChunk.trim());
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
return chunks.filter(chunk => chunk.length > 0);
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
/**
|
|
74
|
+
* Create a WAV file buffer from raw PCM audio data
|
|
75
|
+
*/
|
|
76
|
+
function createWavBuffer(audioData: Float32Array | Buffer, sampleRate: number = 24000): Buffer {
|
|
77
|
+
const numChannels = 1; // Mono
|
|
78
|
+
const bitsPerSample = 16;
|
|
79
|
+
const bytesPerSample = bitsPerSample / 8;
|
|
80
|
+
|
|
81
|
+
// Convert Float32Array to Int16Array if needed
|
|
82
|
+
let int16Data: Int16Array;
|
|
83
|
+
if (audioData instanceof Float32Array) {
|
|
84
|
+
int16Data = new Int16Array(audioData.length);
|
|
85
|
+
for (let i = 0; i < audioData.length; i++) {
|
|
86
|
+
const s = Math.max(-1, Math.min(1, audioData[i]));
|
|
87
|
+
int16Data[i] = s < 0 ? s * 0x8000 : s * 0x7FFF;
|
|
88
|
+
}
|
|
89
|
+
} else {
|
|
90
|
+
// Already a buffer, assume it's raw PCM int16
|
|
91
|
+
int16Data = new Int16Array(audioData.buffer, audioData.byteOffset, audioData.byteLength / 2);
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
const dataSize = int16Data.length * bytesPerSample;
|
|
95
|
+
const buffer = Buffer.alloc(44 + dataSize);
|
|
96
|
+
|
|
97
|
+
// WAV header
|
|
98
|
+
buffer.write('RIFF', 0);
|
|
99
|
+
buffer.writeUInt32LE(36 + dataSize, 4);
|
|
100
|
+
buffer.write('WAVE', 8);
|
|
101
|
+
buffer.write('fmt ', 12);
|
|
102
|
+
buffer.writeUInt32LE(16, 16); // PCM format chunk size
|
|
103
|
+
buffer.writeUInt16LE(1, 20); // PCM format
|
|
104
|
+
buffer.writeUInt16LE(numChannels, 22);
|
|
105
|
+
buffer.writeUInt32LE(sampleRate, 24);
|
|
106
|
+
buffer.writeUInt32LE(sampleRate * numChannels * bytesPerSample, 28); // Byte rate
|
|
107
|
+
buffer.writeUInt16LE(numChannels * bytesPerSample, 32); // Block align
|
|
108
|
+
buffer.writeUInt16LE(bitsPerSample, 34);
|
|
109
|
+
buffer.write('data', 36);
|
|
110
|
+
buffer.writeUInt32LE(dataSize, 40);
|
|
111
|
+
|
|
112
|
+
// Write PCM data
|
|
113
|
+
for (let i = 0; i < int16Data.length; i++) {
|
|
114
|
+
buffer.writeInt16LE(int16Data[i], 44 + i * 2);
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
return buffer;
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
/**
|
|
121
|
+
* Concatenates multiple audio buffers and adds silence between them.
|
|
122
|
+
* Handles both Buffer and Float32Array inputs.
|
|
123
|
+
*/
|
|
124
|
+
function concatenateAudioBuffers(
|
|
125
|
+
audioChunks: AudioChunk[],
|
|
126
|
+
silenceDurationMs: number,
|
|
127
|
+
sampleRate: number = 24000
|
|
128
|
+
): Buffer {
|
|
129
|
+
if (audioChunks.length === 0) {
|
|
130
|
+
return createWavBuffer(new Float32Array(), sampleRate);
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
if (audioChunks.length === 1) {
|
|
134
|
+
return audioChunks[0].audio instanceof Buffer
|
|
135
|
+
? audioChunks[0].audio
|
|
136
|
+
: createWavBuffer(audioChunks[0].audio, audioChunks[0].sampling_rate);
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
// Calculate total length including silence
|
|
140
|
+
const silenceSamples = Math.round((silenceDurationMs / 1000) * sampleRate);
|
|
141
|
+
let totalLength = 0;
|
|
142
|
+
|
|
143
|
+
// Convert all to Float32Array for easier concatenation
|
|
144
|
+
const float32Chunks: Float32Array[] = [];
|
|
145
|
+
for (const chunk of audioChunks) {
|
|
146
|
+
if (chunk.audio instanceof Float32Array) {
|
|
147
|
+
float32Chunks.push(chunk.audio);
|
|
148
|
+
totalLength += chunk.audio.length;
|
|
149
|
+
} else {
|
|
150
|
+
// Convert Buffer to Float32Array (assuming 16-bit PCM)
|
|
151
|
+
const int16 = new Int16Array(chunk.audio.buffer, chunk.audio.byteOffset, chunk.audio.byteLength / 2);
|
|
152
|
+
const float32 = new Float32Array(int16.length);
|
|
153
|
+
for (let i = 0; i < int16.length; i++) {
|
|
154
|
+
float32[i] = int16[i] / (int16[i] < 0 ? 0x8000 : 0x7FFF);
|
|
155
|
+
}
|
|
156
|
+
float32Chunks.push(float32);
|
|
157
|
+
totalLength += float32.length;
|
|
158
|
+
}
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
totalLength += (audioChunks.length - 1) * silenceSamples;
|
|
162
|
+
|
|
163
|
+
const combinedAudio = new Float32Array(totalLength);
|
|
164
|
+
let offset = 0;
|
|
165
|
+
|
|
166
|
+
for (let i = 0; i < float32Chunks.length; i++) {
|
|
167
|
+
const chunk = float32Chunks[i];
|
|
168
|
+
combinedAudio.set(chunk, offset);
|
|
169
|
+
offset += chunk.length;
|
|
170
|
+
|
|
171
|
+
// Add silence if not the last chunk
|
|
172
|
+
if (i < float32Chunks.length - 1) {
|
|
173
|
+
offset += silenceSamples;
|
|
174
|
+
}
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
return createWavBuffer(combinedAudio, sampleRate);
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
/**
|
|
181
|
+
* Generate SRT subtitles for concatenated audio chunks
|
|
182
|
+
*/
|
|
183
|
+
function generateSRTFromChunks(textChunks: string[], audioDurations: number[]): string {
|
|
184
|
+
if (textChunks.length === 0) return '';
|
|
185
|
+
|
|
186
|
+
let srt = '';
|
|
187
|
+
let currentTime = 0;
|
|
188
|
+
let counter = 1;
|
|
189
|
+
|
|
190
|
+
const msToSrt = (ms: number) => {
|
|
191
|
+
const totalSec = Math.floor(ms / 1000);
|
|
192
|
+
const mili = Math.floor(ms % 1000);
|
|
193
|
+
const h = Math.floor(totalSec / 3600);
|
|
194
|
+
const m = Math.floor((totalSec % 3600) / 60);
|
|
195
|
+
const s = totalSec % 60;
|
|
196
|
+
return `${h.toString().padStart(2, '0')}:${m.toString().padStart(2, '0')}:${s.toString().padStart(2, '0')},${mili.toString().padStart(3, '0')}`;
|
|
197
|
+
};
|
|
198
|
+
|
|
199
|
+
for (let i = 0; i < textChunks.length; i++) {
|
|
200
|
+
const duration = audioDurations[i] || 0;
|
|
201
|
+
const startTime = currentTime;
|
|
202
|
+
const endTime = currentTime + duration;
|
|
203
|
+
|
|
204
|
+
srt += `${counter++}\n${msToSrt(startTime)} --> ${msToSrt(endTime)}\n${textChunks[i].trim()}\n\n`;
|
|
205
|
+
currentTime = endTime + SILENCE_DURATION_MS; // Add silence duration
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
return srt;
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
|
|
23
212
|
// Piper Models List (Curated High Quality)
|
|
24
213
|
// Note: Official Piper repo currently only has 'kareem' (Male) for Arabic.
|
|
25
214
|
// For Female Arabic voices, please use the 'Edge TTS' engine (Salma, Zariyah).
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "n8n-nodes-tts-bigboss",
|
|
3
|
-
"version": "
|
|
4
|
-
"description": "
|
|
3
|
+
"version": "2.0.0",
|
|
4
|
+
"description": "Professional TTS node with multi-engine support, text chunking, and accurate SRT generation",
|
|
5
5
|
"keywords": [
|
|
6
6
|
"n8n-community-node-package",
|
|
7
7
|
"n8n",
|
|
@@ -10,7 +10,8 @@
|
|
|
10
10
|
"srt",
|
|
11
11
|
"arabic",
|
|
12
12
|
"edge-tts",
|
|
13
|
-
"piper"
|
|
13
|
+
"piper",
|
|
14
|
+
"kokoro"
|
|
14
15
|
],
|
|
15
16
|
"license": "MIT",
|
|
16
17
|
"author": "isemo007",
|
|
@@ -35,7 +36,8 @@
|
|
|
35
36
|
"n8n-core": "^1.75.0",
|
|
36
37
|
"n8n-workflow": "^1.70.0",
|
|
37
38
|
"uuid": "^9.0.0",
|
|
38
|
-
"ws": "^8.13.0"
|
|
39
|
+
"ws": "^8.13.0",
|
|
40
|
+
"kokoro-js": "^1.2.1"
|
|
39
41
|
},
|
|
40
42
|
"devDependencies": {
|
|
41
43
|
"@types/lodash": "^4.14.195",
|
|
@@ -50,5 +52,8 @@
|
|
|
50
52
|
"typescript": "^5.0.0",
|
|
51
53
|
"webpack": "^5.88.0",
|
|
52
54
|
"webpack-cli": "^5.1.4"
|
|
55
|
+
},
|
|
56
|
+
"engines": {
|
|
57
|
+
"node": ">=18.0.0"
|
|
53
58
|
}
|
|
54
59
|
}
|