n8n-nodes-tts-bigboss 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/TTSBigBoss.node.js +423 -0
- package/gulpfile.js +9 -0
- package/nodes/TTSBigBoss/TTSBigBoss.node.ts +505 -0
- package/package.json +54 -0
- package/tsconfig.json +39 -0
|
@@ -0,0 +1,423 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
3
|
+
if (k2 === undefined) k2 = k;
|
|
4
|
+
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
5
|
+
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
6
|
+
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
7
|
+
}
|
|
8
|
+
Object.defineProperty(o, k2, desc);
|
|
9
|
+
}) : (function(o, m, k, k2) {
|
|
10
|
+
if (k2 === undefined) k2 = k;
|
|
11
|
+
o[k2] = m[k];
|
|
12
|
+
}));
|
|
13
|
+
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
|
|
14
|
+
Object.defineProperty(o, "default", { enumerable: true, value: v });
|
|
15
|
+
}) : function(o, v) {
|
|
16
|
+
o["default"] = v;
|
|
17
|
+
});
|
|
18
|
+
var __importStar = (this && this.__importStar) || (function () {
|
|
19
|
+
var ownKeys = function(o) {
|
|
20
|
+
ownKeys = Object.getOwnPropertyNames || function (o) {
|
|
21
|
+
var ar = [];
|
|
22
|
+
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
|
|
23
|
+
return ar;
|
|
24
|
+
};
|
|
25
|
+
return ownKeys(o);
|
|
26
|
+
};
|
|
27
|
+
return function (mod) {
|
|
28
|
+
if (mod && mod.__esModule) return mod;
|
|
29
|
+
var result = {};
|
|
30
|
+
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
|
|
31
|
+
__setModuleDefault(result, mod);
|
|
32
|
+
return result;
|
|
33
|
+
};
|
|
34
|
+
})();
|
|
35
|
+
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
36
|
+
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
37
|
+
};
|
|
38
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
39
|
+
exports.TTSBigBoss = void 0;
|
|
40
|
+
const uuid_1 = require("uuid");
|
|
41
|
+
const fs = __importStar(require("fs"));
|
|
42
|
+
const path = __importStar(require("path"));
|
|
43
|
+
const os = __importStar(require("os"));
|
|
44
|
+
const child_process = __importStar(require("child_process"));
|
|
45
|
+
const ws_1 = __importDefault(require("ws"));
|
|
46
|
+
const EDGE_URL = 'wss://speech.platform.bing.com/consumer/speech/synthesize/readaloud/edge/v1?TrustedClientToken=6A5AA1D4EAFF4E9FB37E23D68491D6F4';
|
|
47
|
+
const EDGE_VOICES = [
|
|
48
|
+
{ name: 'Arabic (Egypt) - Salma', value: 'ar-EG-SalmaNeural' },
|
|
49
|
+
{ name: 'Arabic (Egypt) - Shakir', value: 'ar-EG-ShakirNeural' },
|
|
50
|
+
{ name: 'Arabic (Saudi) - Hamed', value: 'ar-SA-HamedNeural' },
|
|
51
|
+
{ name: 'Arabic (Saudi) - Zariyah', value: 'ar-SA-ZariyahNeural' },
|
|
52
|
+
{ name: 'English (US) - Aria', value: 'en-US-AriaNeural' },
|
|
53
|
+
{ name: 'English (US) - Guy', value: 'en-US-GuyNeural' },
|
|
54
|
+
{ name: 'English (US) - Jenny', value: 'en-US-JennyNeural' },
|
|
55
|
+
{ name: 'English (US) - Multilingual', value: 'en-US-AndrewMultilingualNeural' },
|
|
56
|
+
{ name: 'English (UK) - Sonia', value: 'en-GB-SoniaNeural' },
|
|
57
|
+
{ name: 'English (UK) - Ryan', value: 'en-GB-RyanNeural' },
|
|
58
|
+
{ name: 'French (France) - Denise', value: 'fr-FR-DeniseNeural' },
|
|
59
|
+
{ name: 'French (France) - Henri', value: 'fr-FR-HenriNeural' },
|
|
60
|
+
{ name: 'Spanish (Spain) - Elvira', value: 'es-ES-ElviraNeural' },
|
|
61
|
+
{ name: 'Spanish (Mexico) - Dalia', value: 'es-MX-DaliaNeural' },
|
|
62
|
+
{ name: 'German (Germany) - Katja', value: 'de-DE-KatjaNeural' },
|
|
63
|
+
{ name: 'German (Germany) - Conrad', value: 'de-DE-ConradNeural' },
|
|
64
|
+
{ name: 'Chinese (Mandarin) - Xiaoxiao', value: 'zh-CN-XiaoxiaoNeural' },
|
|
65
|
+
];
|
|
66
|
+
class TTSBigBoss {
|
|
67
|
+
constructor() {
|
|
68
|
+
this.description = {
|
|
69
|
+
displayName: 'TTS BigBoss',
|
|
70
|
+
name: 'tTSBigBoss',
|
|
71
|
+
icon: 'fa:comment-dots',
|
|
72
|
+
group: ['transform'],
|
|
73
|
+
version: 1,
|
|
74
|
+
description: 'Advanced Text-to-Speech (Edge-TTS & System Clone)',
|
|
75
|
+
defaults: {
|
|
76
|
+
name: 'TTS BigBoss',
|
|
77
|
+
},
|
|
78
|
+
inputs: ['main'],
|
|
79
|
+
outputs: ['main'],
|
|
80
|
+
properties: [
|
|
81
|
+
{
|
|
82
|
+
displayName: 'Engine',
|
|
83
|
+
name: 'engine',
|
|
84
|
+
type: 'options',
|
|
85
|
+
options: [
|
|
86
|
+
{
|
|
87
|
+
name: 'Edge TTS (Free Online)',
|
|
88
|
+
value: 'edge',
|
|
89
|
+
description: 'High quality, multilingual, supports Arabic & perfect subtitles. Requires internet.',
|
|
90
|
+
},
|
|
91
|
+
{
|
|
92
|
+
name: 'System Command (Local/Clone)',
|
|
93
|
+
value: 'system',
|
|
94
|
+
description: 'Use locally installed tools (Piper, XTTS, Coqui) via command line.',
|
|
95
|
+
},
|
|
96
|
+
],
|
|
97
|
+
default: 'edge',
|
|
98
|
+
},
|
|
99
|
+
{
|
|
100
|
+
displayName: 'Text',
|
|
101
|
+
name: 'text',
|
|
102
|
+
type: 'string',
|
|
103
|
+
default: '',
|
|
104
|
+
required: true,
|
|
105
|
+
description: 'The text to convert to speech',
|
|
106
|
+
},
|
|
107
|
+
{
|
|
108
|
+
displayName: 'Output Property (Audio)',
|
|
109
|
+
name: 'audioProperty',
|
|
110
|
+
type: 'string',
|
|
111
|
+
default: 'audio',
|
|
112
|
+
description: 'Name of the binary property to output the audio to',
|
|
113
|
+
},
|
|
114
|
+
{
|
|
115
|
+
displayName: 'Output Property (SRT)',
|
|
116
|
+
name: 'srtProperty',
|
|
117
|
+
type: 'string',
|
|
118
|
+
default: 'subtitle',
|
|
119
|
+
description: 'Name of the binary property to output the SRT subtitles to',
|
|
120
|
+
},
|
|
121
|
+
{
|
|
122
|
+
displayName: 'Voice',
|
|
123
|
+
name: 'edgeVoice',
|
|
124
|
+
type: 'options',
|
|
125
|
+
options: [
|
|
126
|
+
...EDGE_VOICES,
|
|
127
|
+
{ name: 'Custom (Enter ID)', value: 'custom' },
|
|
128
|
+
],
|
|
129
|
+
default: 'en-US-AriaNeural',
|
|
130
|
+
displayOptions: {
|
|
131
|
+
show: {
|
|
132
|
+
engine: ['edge'],
|
|
133
|
+
},
|
|
134
|
+
},
|
|
135
|
+
},
|
|
136
|
+
{
|
|
137
|
+
displayName: 'Custom Voice ID',
|
|
138
|
+
name: 'edgeVoiceCustom',
|
|
139
|
+
type: 'string',
|
|
140
|
+
default: '',
|
|
141
|
+
placeholder: 'e.g. it-IT-ElsaNeural',
|
|
142
|
+
displayOptions: {
|
|
143
|
+
show: {
|
|
144
|
+
engine: ['edge'],
|
|
145
|
+
edgeVoice: ['custom'],
|
|
146
|
+
},
|
|
147
|
+
},
|
|
148
|
+
},
|
|
149
|
+
{
|
|
150
|
+
displayName: 'Rate',
|
|
151
|
+
name: 'edgeRate',
|
|
152
|
+
type: 'string',
|
|
153
|
+
default: '+0%',
|
|
154
|
+
description: 'Speed of speech (e.g. +10%, -20%)',
|
|
155
|
+
displayOptions: {
|
|
156
|
+
show: {
|
|
157
|
+
engine: ['edge'],
|
|
158
|
+
},
|
|
159
|
+
},
|
|
160
|
+
},
|
|
161
|
+
{
|
|
162
|
+
displayName: 'Pitch',
|
|
163
|
+
name: 'edgePitch',
|
|
164
|
+
type: 'string',
|
|
165
|
+
default: '+0Hz',
|
|
166
|
+
description: 'Pitch adjustment (e.g. +5Hz, -5Hz)',
|
|
167
|
+
displayOptions: {
|
|
168
|
+
show: {
|
|
169
|
+
engine: ['edge'],
|
|
170
|
+
},
|
|
171
|
+
},
|
|
172
|
+
},
|
|
173
|
+
{
|
|
174
|
+
displayName: 'Command',
|
|
175
|
+
name: 'systemCommand',
|
|
176
|
+
type: 'string',
|
|
177
|
+
default: 'piper --model en_US-lessac-medium --output_file "{output_file}"',
|
|
178
|
+
description: 'Command to execute. Use placeholders: "{text}" for input text, "{output_file}" for the temporary audio path.',
|
|
179
|
+
displayOptions: {
|
|
180
|
+
show: {
|
|
181
|
+
engine: ['system'],
|
|
182
|
+
},
|
|
183
|
+
},
|
|
184
|
+
},
|
|
185
|
+
{
|
|
186
|
+
displayName: 'Clone Audio Input',
|
|
187
|
+
name: 'cloneInput',
|
|
188
|
+
type: 'boolean',
|
|
189
|
+
default: false,
|
|
190
|
+
description: 'If true, allows using an input binary file as a voice cloning reference',
|
|
191
|
+
displayOptions: {
|
|
192
|
+
show: {
|
|
193
|
+
engine: ['system'],
|
|
194
|
+
},
|
|
195
|
+
},
|
|
196
|
+
},
|
|
197
|
+
{
|
|
198
|
+
displayName: 'Clone Input Property',
|
|
199
|
+
name: 'cloneInputProperty',
|
|
200
|
+
type: 'string',
|
|
201
|
+
default: 'reference_audio',
|
|
202
|
+
displayOptions: {
|
|
203
|
+
show: {
|
|
204
|
+
engine: ['system'],
|
|
205
|
+
cloneInput: [true],
|
|
206
|
+
},
|
|
207
|
+
},
|
|
208
|
+
description: 'Binary property name containing the reference audio for cloning. Use placeholder "{reference_audio}" in command.',
|
|
209
|
+
},
|
|
210
|
+
],
|
|
211
|
+
};
|
|
212
|
+
}
|
|
213
|
+
async execute() {
|
|
214
|
+
const items = this.getInputData();
|
|
215
|
+
const returnData = [];
|
|
216
|
+
const tempDir = os.tmpdir();
|
|
217
|
+
for (let i = 0; i < items.length; i++) {
|
|
218
|
+
try {
|
|
219
|
+
const engine = this.getNodeParameter('engine', i);
|
|
220
|
+
const text = this.getNodeParameter('text', i);
|
|
221
|
+
const audioProp = this.getNodeParameter('audioProperty', i);
|
|
222
|
+
const srtProp = this.getNodeParameter('srtProperty', i);
|
|
223
|
+
let audioBuffer;
|
|
224
|
+
let srtBuffer;
|
|
225
|
+
if (engine === 'edge') {
|
|
226
|
+
let voice = this.getNodeParameter('edgeVoice', i);
|
|
227
|
+
if (voice === 'custom') {
|
|
228
|
+
voice = this.getNodeParameter('edgeVoiceCustom', i);
|
|
229
|
+
}
|
|
230
|
+
const rate = this.getNodeParameter('edgeRate', i);
|
|
231
|
+
const pitch = this.getNodeParameter('edgePitch', i);
|
|
232
|
+
const result = await runEdgeTTS(text, voice, rate, pitch);
|
|
233
|
+
audioBuffer = result.audio;
|
|
234
|
+
srtBuffer = Buffer.from(result.srt, 'utf8');
|
|
235
|
+
}
|
|
236
|
+
else {
|
|
237
|
+
const commandTpl = this.getNodeParameter('systemCommand', i);
|
|
238
|
+
const useClone = this.getNodeParameter('cloneInput', i, false);
|
|
239
|
+
const outFile = path.join(tempDir, `tts_out_${(0, uuid_1.v4)()}.wav`);
|
|
240
|
+
let cmd = commandTpl
|
|
241
|
+
.replace(/"{output_file}"/g, `"${outFile}"`)
|
|
242
|
+
.replace(/{output_file}/g, `"${outFile}"`)
|
|
243
|
+
.replace(/"{text}"/g, `"${text.replace(/"/g, '\\"')}"`)
|
|
244
|
+
.replace(/{text}/g, `"${text.replace(/"/g, '\\"')}"`);
|
|
245
|
+
if (useClone) {
|
|
246
|
+
const cloneProp = this.getNodeParameter('cloneInputProperty', i);
|
|
247
|
+
const cloneData = await this.helpers.getBinaryDataBuffer(i, cloneProp);
|
|
248
|
+
const cloneFile = path.join(tempDir, `tts_ref_${(0, uuid_1.v4)()}.wav`);
|
|
249
|
+
fs.writeFileSync(cloneFile, cloneData);
|
|
250
|
+
cmd = cmd
|
|
251
|
+
.replace(/"{reference_audio}"/g, `"${cloneFile}"`)
|
|
252
|
+
.replace(/{reference_audio}/g, `"${cloneFile}"`);
|
|
253
|
+
}
|
|
254
|
+
await new Promise((resolve, reject) => {
|
|
255
|
+
child_process.exec(cmd, (error, stdout, stderr) => {
|
|
256
|
+
if (error) {
|
|
257
|
+
reject(new Error(`System command failed: ${stderr || error.message}`));
|
|
258
|
+
}
|
|
259
|
+
else {
|
|
260
|
+
resolve(stdout);
|
|
261
|
+
}
|
|
262
|
+
});
|
|
263
|
+
});
|
|
264
|
+
if (!fs.existsSync(outFile)) {
|
|
265
|
+
throw new Error('System command did not produce output file at expected path');
|
|
266
|
+
}
|
|
267
|
+
audioBuffer = fs.readFileSync(outFile);
|
|
268
|
+
srtBuffer = Buffer.from(generateHeuristicSRT(text, audioBuffer.length), 'utf8');
|
|
269
|
+
if (fs.existsSync(outFile))
|
|
270
|
+
fs.unlinkSync(outFile);
|
|
271
|
+
}
|
|
272
|
+
const newItem = {
|
|
273
|
+
json: items[i].json,
|
|
274
|
+
binary: {},
|
|
275
|
+
};
|
|
276
|
+
if (items[i].binary) {
|
|
277
|
+
newItem.binary = { ...items[i].binary };
|
|
278
|
+
}
|
|
279
|
+
newItem.binary[audioProp] = await this.helpers.prepareBinaryData(audioBuffer, 'speech.mp3', 'audio/mpeg');
|
|
280
|
+
newItem.binary[srtProp] = await this.helpers.prepareBinaryData(srtBuffer, 'subtitles.srt', 'application/x-subrip');
|
|
281
|
+
returnData.push(newItem);
|
|
282
|
+
}
|
|
283
|
+
catch (error) {
|
|
284
|
+
if (this.continueOnFail()) {
|
|
285
|
+
returnData.push({ json: { error: error.message }, binary: {} });
|
|
286
|
+
continue;
|
|
287
|
+
}
|
|
288
|
+
throw error;
|
|
289
|
+
}
|
|
290
|
+
}
|
|
291
|
+
return [returnData];
|
|
292
|
+
}
|
|
293
|
+
}
|
|
294
|
+
exports.TTSBigBoss = TTSBigBoss;
|
|
295
|
+
async function runEdgeTTS(text, voice, rate, pitch) {
|
|
296
|
+
return new Promise((resolve, reject) => {
|
|
297
|
+
const ws = new ws_1.default(EDGE_URL);
|
|
298
|
+
const requestId = (0, uuid_1.v4)().replace(/-/g, '');
|
|
299
|
+
const audioChunks = [];
|
|
300
|
+
const wordBoundaries = [];
|
|
301
|
+
ws.on('open', () => {
|
|
302
|
+
const configMsg = `X-Timestamp:${new Date().toISOString()}\r\nContent-Type:application/json; charset=utf-8\r\nPath:speech.config\r\n\r\n` +
|
|
303
|
+
JSON.stringify({
|
|
304
|
+
context: {
|
|
305
|
+
synthesis: {
|
|
306
|
+
audio: {
|
|
307
|
+
metadataOptions: {
|
|
308
|
+
sentenceBoundaryEnabled: 'false',
|
|
309
|
+
wordBoundaryEnabled: 'true',
|
|
310
|
+
},
|
|
311
|
+
outputFormat: 'audio-24khz-48kbitrate-mono-mp3',
|
|
312
|
+
},
|
|
313
|
+
},
|
|
314
|
+
},
|
|
315
|
+
});
|
|
316
|
+
ws.send(configMsg);
|
|
317
|
+
const ssml = `<speak version='1.0' xmlns='http://www.w3.org/2001/10/synthesis' xml:lang='en-US'><voice name='${voice}'><prosody pitch='${pitch}' rate='${rate}'>${text}</prosody></voice></speak>`;
|
|
318
|
+
const ssmlMsg = `X-RequestId:${requestId}\r\nX-Timestamp:${new Date().toISOString()}\r\nContent-Type:application/ssml+xml\r\nPath:ssml\r\n\r\n` + ssml;
|
|
319
|
+
ws.send(ssmlMsg);
|
|
320
|
+
});
|
|
321
|
+
ws.on('message', (data, isBinary) => {
|
|
322
|
+
const textData = data.toString();
|
|
323
|
+
if (textData.includes('Path:turn.start')) {
|
|
324
|
+
}
|
|
325
|
+
else if (textData.includes('Path:turn.end')) {
|
|
326
|
+
ws.close();
|
|
327
|
+
const fullAudio = Buffer.concat(audioChunks);
|
|
328
|
+
const srt = buildSRT(wordBoundaries);
|
|
329
|
+
resolve({ audio: fullAudio, srt });
|
|
330
|
+
}
|
|
331
|
+
else if (textData.includes('Path:audio.metadata')) {
|
|
332
|
+
try {
|
|
333
|
+
const parts = textData.split('\r\n\r\n');
|
|
334
|
+
if (parts.length > 1) {
|
|
335
|
+
const json = JSON.parse(parts[1]);
|
|
336
|
+
if (json.Metadata && Array.isArray(json.Metadata)) {
|
|
337
|
+
for (const meta of json.Metadata) {
|
|
338
|
+
if (meta.Type === 'Word') {
|
|
339
|
+
wordBoundaries.push({
|
|
340
|
+
offset: meta.Offset,
|
|
341
|
+
duration: meta.Duration,
|
|
342
|
+
text: meta.Text,
|
|
343
|
+
});
|
|
344
|
+
}
|
|
345
|
+
}
|
|
346
|
+
}
|
|
347
|
+
}
|
|
348
|
+
}
|
|
349
|
+
catch (e) {
|
|
350
|
+
}
|
|
351
|
+
}
|
|
352
|
+
else if (isBinary || (data.length > 2 && data[0] === 0x00 && data[1] === 0x67)) {
|
|
353
|
+
const headerLen = data.readUInt16BE(0);
|
|
354
|
+
if (data.length > headerLen + 2) {
|
|
355
|
+
const audioData = data.slice(headerLen + 2);
|
|
356
|
+
audioChunks.push(audioData);
|
|
357
|
+
}
|
|
358
|
+
}
|
|
359
|
+
});
|
|
360
|
+
ws.on('error', (err) => {
|
|
361
|
+
reject(err);
|
|
362
|
+
});
|
|
363
|
+
});
|
|
364
|
+
}
|
|
365
|
+
function buildSRT(words) {
|
|
366
|
+
let srt = '';
|
|
367
|
+
let counter = 1;
|
|
368
|
+
let currentPhrase = [];
|
|
369
|
+
let currentLength = 0;
|
|
370
|
+
const flushPhrase = () => {
|
|
371
|
+
if (currentPhrase.length === 0)
|
|
372
|
+
return;
|
|
373
|
+
const startTick = currentPhrase[0].offset;
|
|
374
|
+
const endTick = currentPhrase[currentPhrase.length - 1].offset + currentPhrase[currentPhrase.length - 1].duration;
|
|
375
|
+
const startTime = ticksToTime(startTick);
|
|
376
|
+
const endTime = ticksToTime(endTick);
|
|
377
|
+
const text = currentPhrase.map(w => w.text).join(' ');
|
|
378
|
+
srt += `${counter++}\n${startTime} --> ${endTime}\n${text}\n\n`;
|
|
379
|
+
currentPhrase = [];
|
|
380
|
+
currentLength = 0;
|
|
381
|
+
};
|
|
382
|
+
for (let i = 0; i < words.length; i++) {
|
|
383
|
+
const w = words[i];
|
|
384
|
+
const prevW = i > 0 ? words[i - 1] : null;
|
|
385
|
+
if (prevW && (w.offset - (prevW.offset + prevW.duration) > 5000000)) {
|
|
386
|
+
flushPhrase();
|
|
387
|
+
}
|
|
388
|
+
currentPhrase.push(w);
|
|
389
|
+
currentLength += w.text.length;
|
|
390
|
+
if (currentLength > 40 || ['.', '?', '!'].includes(w.text.slice(-1))) {
|
|
391
|
+
flushPhrase();
|
|
392
|
+
}
|
|
393
|
+
}
|
|
394
|
+
flushPhrase();
|
|
395
|
+
return srt;
|
|
396
|
+
}
|
|
397
|
+
function ticksToTime(ticks) {
|
|
398
|
+
const ms = ticks / 10000;
|
|
399
|
+
const date = new Date(0, 0, 0, 0, 0, 0, ms);
|
|
400
|
+
const h = date.getHours().toString().padStart(2, '0');
|
|
401
|
+
const m = date.getMinutes().toString().padStart(2, '0');
|
|
402
|
+
const s = date.getSeconds().toString().padStart(2, '0');
|
|
403
|
+
const mili = date.getMilliseconds().toString().padStart(3, '0');
|
|
404
|
+
return `${h}:${m}:${s},${mili}`;
|
|
405
|
+
}
|
|
406
|
+
function generateHeuristicSRT(text, byteLength) {
|
|
407
|
+
const totalDurationSec = text.length / 15;
|
|
408
|
+
const sentences = text.match(/[^.!?]+[.!?]*/g) || [text];
|
|
409
|
+
let currentStartTime = 0;
|
|
410
|
+
let srt = '';
|
|
411
|
+
let counter = 1;
|
|
412
|
+
const msToSrt = (ms) => {
|
|
413
|
+
const date = new Date(0, 0, 0, 0, 0, 0, ms);
|
|
414
|
+
return `${date.getHours().toString().padStart(2, '0')}:${date.getMinutes().toString().padStart(2, '0')}:${date.getSeconds().toString().padStart(2, '0')},${date.getMilliseconds().toString().padStart(3, '0')}`;
|
|
415
|
+
};
|
|
416
|
+
for (const sentence of sentences) {
|
|
417
|
+
const sentenceDuration = (sentence.length / text.length) * (totalDurationSec * 1000);
|
|
418
|
+
const endTime = currentStartTime + sentenceDuration;
|
|
419
|
+
srt += `${counter++}\n${msToSrt(currentStartTime)} --> ${msToSrt(endTime)}\n${sentence.trim()}\n\n`;
|
|
420
|
+
currentStartTime = endTime;
|
|
421
|
+
}
|
|
422
|
+
return srt;
|
|
423
|
+
}
|
package/gulpfile.js
ADDED
|
@@ -0,0 +1,505 @@
|
|
|
1
|
+
import {
|
|
2
|
+
IExecuteFunctions,
|
|
3
|
+
INodeExecutionData,
|
|
4
|
+
INodeType,
|
|
5
|
+
INodeTypeDescription,
|
|
6
|
+
} from 'n8n-workflow';
|
|
7
|
+
import { v4 as uuidv4 } from 'uuid';
|
|
8
|
+
import * as fs from 'fs';
|
|
9
|
+
import * as path from 'path';
|
|
10
|
+
import * as os from 'os';
|
|
11
|
+
import * as child_process from 'child_process';
|
|
12
|
+
import WebSocket from 'ws';
|
|
13
|
+
|
|
14
|
+
// Edge TTS Constants
|
|
15
|
+
const EDGE_URL = 'wss://speech.platform.bing.com/consumer/speech/synthesize/readaloud/edge/v1?TrustedClientToken=6A5AA1D4EAFF4E9FB37E23D68491D6F4';
|
|
16
|
+
const EDGE_VOICES = [
|
|
17
|
+
// Arabic
|
|
18
|
+
{ name: 'Arabic (Egypt) - Salma', value: 'ar-EG-SalmaNeural' },
|
|
19
|
+
{ name: 'Arabic (Egypt) - Shakir', value: 'ar-EG-ShakirNeural' },
|
|
20
|
+
{ name: 'Arabic (Saudi) - Hamed', value: 'ar-SA-HamedNeural' },
|
|
21
|
+
{ name: 'Arabic (Saudi) - Zariyah', value: 'ar-SA-ZariyahNeural' },
|
|
22
|
+
// English
|
|
23
|
+
{ name: 'English (US) - Aria', value: 'en-US-AriaNeural' },
|
|
24
|
+
{ name: 'English (US) - Guy', value: 'en-US-GuyNeural' },
|
|
25
|
+
{ name: 'English (US) - Jenny', value: 'en-US-JennyNeural' },
|
|
26
|
+
{ name: 'English (US) - Multilingual', value: 'en-US-AndrewMultilingualNeural' },
|
|
27
|
+
{ name: 'English (UK) - Sonia', value: 'en-GB-SoniaNeural' },
|
|
28
|
+
{ name: 'English (UK) - Ryan', value: 'en-GB-RyanNeural' },
|
|
29
|
+
// French
|
|
30
|
+
{ name: 'French (France) - Denise', value: 'fr-FR-DeniseNeural' },
|
|
31
|
+
{ name: 'French (France) - Henri', value: 'fr-FR-HenriNeural' },
|
|
32
|
+
// Spanish
|
|
33
|
+
{ name: 'Spanish (Spain) - Elvira', value: 'es-ES-ElviraNeural' },
|
|
34
|
+
{ name: 'Spanish (Mexico) - Dalia', value: 'es-MX-DaliaNeural' },
|
|
35
|
+
// German
|
|
36
|
+
{ name: 'German (Germany) - Katja', value: 'de-DE-KatjaNeural' },
|
|
37
|
+
{ name: 'German (Germany) - Conrad', value: 'de-DE-ConradNeural' },
|
|
38
|
+
// Multi
|
|
39
|
+
{ name: 'Chinese (Mandarin) - Xiaoxiao', value: 'zh-CN-XiaoxiaoNeural' },
|
|
40
|
+
];
|
|
41
|
+
|
|
42
|
+
interface WordBoundary {
|
|
43
|
+
offset: number;
|
|
44
|
+
duration: number;
|
|
45
|
+
text: string;
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
export class TTSBigBoss implements INodeType {
|
|
49
|
+
description: INodeTypeDescription = {
|
|
50
|
+
displayName: 'TTS BigBoss',
|
|
51
|
+
name: 'tTSBigBoss',
|
|
52
|
+
icon: 'fa:comment-dots',
|
|
53
|
+
group: ['transform'],
|
|
54
|
+
version: 1,
|
|
55
|
+
description: 'Advanced Text-to-Speech (Edge-TTS & System Clone)',
|
|
56
|
+
defaults: {
|
|
57
|
+
name: 'TTS BigBoss',
|
|
58
|
+
},
|
|
59
|
+
inputs: ['main'],
|
|
60
|
+
outputs: ['main'],
|
|
61
|
+
properties: [
|
|
62
|
+
{
|
|
63
|
+
displayName: 'Engine',
|
|
64
|
+
name: 'engine',
|
|
65
|
+
type: 'options',
|
|
66
|
+
options: [
|
|
67
|
+
{
|
|
68
|
+
name: 'Edge TTS (Free Online)',
|
|
69
|
+
value: 'edge',
|
|
70
|
+
description: 'High quality, multilingual, supports Arabic & perfect subtitles. Requires internet.',
|
|
71
|
+
},
|
|
72
|
+
{
|
|
73
|
+
name: 'System Command (Local/Clone)',
|
|
74
|
+
value: 'system',
|
|
75
|
+
description: 'Use locally installed tools (Piper, XTTS, Coqui) via command line.',
|
|
76
|
+
},
|
|
77
|
+
],
|
|
78
|
+
default: 'edge',
|
|
79
|
+
},
|
|
80
|
+
// ----------------------------------
|
|
81
|
+
// Common Settings
|
|
82
|
+
// ----------------------------------
|
|
83
|
+
{
|
|
84
|
+
displayName: 'Text',
|
|
85
|
+
name: 'text',
|
|
86
|
+
type: 'string',
|
|
87
|
+
default: '',
|
|
88
|
+
required: true,
|
|
89
|
+
description: 'The text to convert to speech',
|
|
90
|
+
},
|
|
91
|
+
{
|
|
92
|
+
displayName: 'Output Property (Audio)',
|
|
93
|
+
name: 'audioProperty',
|
|
94
|
+
type: 'string',
|
|
95
|
+
default: 'audio',
|
|
96
|
+
description: 'Name of the binary property to output the audio to',
|
|
97
|
+
},
|
|
98
|
+
{
|
|
99
|
+
displayName: 'Output Property (SRT)',
|
|
100
|
+
name: 'srtProperty',
|
|
101
|
+
type: 'string',
|
|
102
|
+
default: 'subtitle',
|
|
103
|
+
description: 'Name of the binary property to output the SRT subtitles to',
|
|
104
|
+
},
|
|
105
|
+
// ----------------------------------
|
|
106
|
+
// Edge TTS Settings
|
|
107
|
+
// ----------------------------------
|
|
108
|
+
{
|
|
109
|
+
displayName: 'Voice',
|
|
110
|
+
name: 'edgeVoice',
|
|
111
|
+
type: 'options',
|
|
112
|
+
options: [
|
|
113
|
+
...EDGE_VOICES,
|
|
114
|
+
{ name: 'Custom (Enter ID)', value: 'custom' },
|
|
115
|
+
],
|
|
116
|
+
default: 'en-US-AriaNeural',
|
|
117
|
+
displayOptions: {
|
|
118
|
+
show: {
|
|
119
|
+
engine: ['edge'],
|
|
120
|
+
},
|
|
121
|
+
},
|
|
122
|
+
},
|
|
123
|
+
{
|
|
124
|
+
displayName: 'Custom Voice ID',
|
|
125
|
+
name: 'edgeVoiceCustom',
|
|
126
|
+
type: 'string',
|
|
127
|
+
default: '',
|
|
128
|
+
placeholder: 'e.g. it-IT-ElsaNeural',
|
|
129
|
+
displayOptions: {
|
|
130
|
+
show: {
|
|
131
|
+
engine: ['edge'],
|
|
132
|
+
edgeVoice: ['custom'],
|
|
133
|
+
},
|
|
134
|
+
},
|
|
135
|
+
},
|
|
136
|
+
{
|
|
137
|
+
displayName: 'Rate',
|
|
138
|
+
name: 'edgeRate',
|
|
139
|
+
type: 'string',
|
|
140
|
+
default: '+0%',
|
|
141
|
+
description: 'Speed of speech (e.g. +10%, -20%)',
|
|
142
|
+
displayOptions: {
|
|
143
|
+
show: {
|
|
144
|
+
engine: ['edge'],
|
|
145
|
+
},
|
|
146
|
+
},
|
|
147
|
+
},
|
|
148
|
+
{
|
|
149
|
+
displayName: 'Pitch',
|
|
150
|
+
name: 'edgePitch',
|
|
151
|
+
type: 'string',
|
|
152
|
+
default: '+0Hz',
|
|
153
|
+
description: 'Pitch adjustment (e.g. +5Hz, -5Hz)',
|
|
154
|
+
displayOptions: {
|
|
155
|
+
show: {
|
|
156
|
+
engine: ['edge'],
|
|
157
|
+
},
|
|
158
|
+
},
|
|
159
|
+
},
|
|
160
|
+
// ----------------------------------
|
|
161
|
+
// System Command Settings
|
|
162
|
+
// ----------------------------------
|
|
163
|
+
{
|
|
164
|
+
displayName: 'Command',
|
|
165
|
+
name: 'systemCommand',
|
|
166
|
+
type: 'string',
|
|
167
|
+
default: 'piper --model en_US-lessac-medium --output_file "{output_file}"',
|
|
168
|
+
description: 'Command to execute. Use placeholders: "{text}" for input text, "{output_file}" for the temporary audio path.',
|
|
169
|
+
displayOptions: {
|
|
170
|
+
show: {
|
|
171
|
+
engine: ['system'],
|
|
172
|
+
},
|
|
173
|
+
},
|
|
174
|
+
},
|
|
175
|
+
{
|
|
176
|
+
displayName: 'Clone Audio Input',
|
|
177
|
+
name: 'cloneInput',
|
|
178
|
+
type: 'boolean',
|
|
179
|
+
default: false,
|
|
180
|
+
description: 'If true, allows using an input binary file as a voice cloning reference',
|
|
181
|
+
displayOptions: {
|
|
182
|
+
show: {
|
|
183
|
+
engine: ['system'],
|
|
184
|
+
},
|
|
185
|
+
},
|
|
186
|
+
},
|
|
187
|
+
{
|
|
188
|
+
displayName: 'Clone Input Property',
|
|
189
|
+
name: 'cloneInputProperty',
|
|
190
|
+
type: 'string',
|
|
191
|
+
default: 'reference_audio',
|
|
192
|
+
displayOptions: {
|
|
193
|
+
show: {
|
|
194
|
+
engine: ['system'],
|
|
195
|
+
cloneInput: [true],
|
|
196
|
+
},
|
|
197
|
+
},
|
|
198
|
+
description: 'Binary property name containing the reference audio for cloning. Use placeholder "{reference_audio}" in command.',
|
|
199
|
+
},
|
|
200
|
+
],
|
|
201
|
+
};
|
|
202
|
+
|
|
203
|
+
async execute(this: IExecuteFunctions): Promise<INodeExecutionData[][]> {
|
|
204
|
+
const items = this.getInputData();
|
|
205
|
+
const returnData: INodeExecutionData[] = [];
|
|
206
|
+
const tempDir = os.tmpdir();
|
|
207
|
+
|
|
208
|
+
for (let i = 0; i < items.length; i++) {
|
|
209
|
+
try {
|
|
210
|
+
const engine = this.getNodeParameter('engine', i) as string;
|
|
211
|
+
const text = this.getNodeParameter('text', i) as string;
|
|
212
|
+
const audioProp = this.getNodeParameter('audioProperty', i) as string;
|
|
213
|
+
const srtProp = this.getNodeParameter('srtProperty', i) as string;
|
|
214
|
+
|
|
215
|
+
let audioBuffer: Buffer;
|
|
216
|
+
let srtBuffer: Buffer;
|
|
217
|
+
|
|
218
|
+
if (engine === 'edge') {
|
|
219
|
+
// ----------------------------------
|
|
220
|
+
// EDGE TTS EXECUTION
|
|
221
|
+
// ----------------------------------
|
|
222
|
+
let voice = this.getNodeParameter('edgeVoice', i) as string;
|
|
223
|
+
if (voice === 'custom') {
|
|
224
|
+
voice = this.getNodeParameter('edgeVoiceCustom', i) as string;
|
|
225
|
+
}
|
|
226
|
+
const rate = this.getNodeParameter('edgeRate', i) as string;
|
|
227
|
+
const pitch = this.getNodeParameter('edgePitch', i) as string;
|
|
228
|
+
|
|
229
|
+
const result = await runEdgeTTS(text, voice, rate, pitch);
|
|
230
|
+
audioBuffer = result.audio;
|
|
231
|
+
srtBuffer = Buffer.from(result.srt, 'utf8');
|
|
232
|
+
|
|
233
|
+
} else {
|
|
234
|
+
// ----------------------------------
|
|
235
|
+
// SYSTEM COMMAND EXECUTION
|
|
236
|
+
// ----------------------------------
|
|
237
|
+
const commandTpl = this.getNodeParameter('systemCommand', i) as string;
|
|
238
|
+
const useClone = this.getNodeParameter('cloneInput', i, false) as boolean;
|
|
239
|
+
|
|
240
|
+
const outFile = path.join(tempDir, `tts_out_${uuidv4()}.wav`);
|
|
241
|
+
let cmd = commandTpl
|
|
242
|
+
.replace(/"{output_file}"/g, `"${outFile}"`)
|
|
243
|
+
.replace(/{output_file}/g, `"${outFile}"`)
|
|
244
|
+
.replace(/"{text}"/g, `"${text.replace(/"/g, '\\"')}"`) // Basic escape
|
|
245
|
+
.replace(/{text}/g, `"${text.replace(/"/g, '\\"')}"`);
|
|
246
|
+
|
|
247
|
+
// Handle Clone Input
|
|
248
|
+
if (useClone) {
|
|
249
|
+
const cloneProp = this.getNodeParameter('cloneInputProperty', i) as string;
|
|
250
|
+
const cloneData = await this.helpers.getBinaryDataBuffer(i, cloneProp);
|
|
251
|
+
const cloneFile = path.join(tempDir, `tts_ref_${uuidv4()}.wav`);
|
|
252
|
+
fs.writeFileSync(cloneFile, cloneData);
|
|
253
|
+
cmd = cmd
|
|
254
|
+
.replace(/"{reference_audio}"/g, `"${cloneFile}"`)
|
|
255
|
+
.replace(/{reference_audio}/g, `"${cloneFile}"`);
|
|
256
|
+
}
|
|
257
|
+
|
|
258
|
+
// Execute
|
|
259
|
+
await new Promise((resolve, reject) => {
|
|
260
|
+
child_process.exec(cmd, (error, stdout, stderr) => {
|
|
261
|
+
if (error) {
|
|
262
|
+
reject(new Error(`System command failed: ${stderr || error.message}`));
|
|
263
|
+
} else {
|
|
264
|
+
resolve(stdout);
|
|
265
|
+
}
|
|
266
|
+
});
|
|
267
|
+
});
|
|
268
|
+
|
|
269
|
+
if (!fs.existsSync(outFile)) {
|
|
270
|
+
throw new Error('System command did not produce output file at expected path');
|
|
271
|
+
}
|
|
272
|
+
|
|
273
|
+
audioBuffer = fs.readFileSync(outFile);
|
|
274
|
+
|
|
275
|
+
// Generate Heuristic SRT (Estimate timestamps)
|
|
276
|
+
srtBuffer = Buffer.from(generateHeuristicSRT(text, audioBuffer.length), 'utf8');
|
|
277
|
+
|
|
278
|
+
// Cleanup
|
|
279
|
+
if (fs.existsSync(outFile)) fs.unlinkSync(outFile);
|
|
280
|
+
}
|
|
281
|
+
|
|
282
|
+
// ----------------------------------
|
|
283
|
+
// OUTPUT CONSTRUCTION
|
|
284
|
+
// ----------------------------------
|
|
285
|
+
const newItem: INodeExecutionData = {
|
|
286
|
+
json: items[i].json,
|
|
287
|
+
binary: {},
|
|
288
|
+
};
|
|
289
|
+
|
|
290
|
+
if (items[i].binary) {
|
|
291
|
+
newItem.binary = { ...items[i].binary };
|
|
292
|
+
}
|
|
293
|
+
|
|
294
|
+
newItem.binary![audioProp] = await this.helpers.prepareBinaryData(
|
|
295
|
+
audioBuffer,
|
|
296
|
+
'speech.mp3', // Edge sends MP3, Piper usually WAV but generic naming is fine
|
|
297
|
+
'audio/mpeg'
|
|
298
|
+
);
|
|
299
|
+
|
|
300
|
+
newItem.binary![srtProp] = await this.helpers.prepareBinaryData(
|
|
301
|
+
srtBuffer,
|
|
302
|
+
'subtitles.srt',
|
|
303
|
+
'application/x-subrip'
|
|
304
|
+
);
|
|
305
|
+
|
|
306
|
+
returnData.push(newItem);
|
|
307
|
+
|
|
308
|
+
} catch (error) {
|
|
309
|
+
if (this.continueOnFail()) {
|
|
310
|
+
returnData.push({ json: { error: error.message }, binary: {} });
|
|
311
|
+
continue;
|
|
312
|
+
}
|
|
313
|
+
throw error;
|
|
314
|
+
}
|
|
315
|
+
}
|
|
316
|
+
|
|
317
|
+
return [returnData];
|
|
318
|
+
}
|
|
319
|
+
}
|
|
320
|
+
|
|
321
|
+
// --------------------------------------------------------------------------
|
|
322
|
+
// EDGE TTS IMPLEMENTATION
|
|
323
|
+
// --------------------------------------------------------------------------
|
|
324
|
+
async function runEdgeTTS(text: string, voice: string, rate: string, pitch: string): Promise<{ audio: Buffer; srt: string }> {
|
|
325
|
+
return new Promise((resolve, reject) => {
|
|
326
|
+
const ws = new WebSocket(EDGE_URL);
|
|
327
|
+
const requestId = uuidv4().replace(/-/g, '');
|
|
328
|
+
const audioChunks: Buffer[] = [];
|
|
329
|
+
const wordBoundaries: WordBoundary[] = [];
|
|
330
|
+
|
|
331
|
+
ws.on('open', () => {
|
|
332
|
+
// 1. Send Config
|
|
333
|
+
const configMsg = `X-Timestamp:${new Date().toISOString()}\r\nContent-Type:application/json; charset=utf-8\r\nPath:speech.config\r\n\r\n` +
|
|
334
|
+
JSON.stringify({
|
|
335
|
+
context: {
|
|
336
|
+
synthesis: {
|
|
337
|
+
audio: {
|
|
338
|
+
metadataOptions: {
|
|
339
|
+
sentenceBoundaryEnabled: 'false',
|
|
340
|
+
wordBoundaryEnabled: 'true',
|
|
341
|
+
},
|
|
342
|
+
outputFormat: 'audio-24khz-48kbitrate-mono-mp3',
|
|
343
|
+
},
|
|
344
|
+
},
|
|
345
|
+
},
|
|
346
|
+
});
|
|
347
|
+
ws.send(configMsg);
|
|
348
|
+
|
|
349
|
+
// 2. Send SSML
|
|
350
|
+
const ssml = `<speak version='1.0' xmlns='http://www.w3.org/2001/10/synthesis' xml:lang='en-US'><voice name='${voice}'><prosody pitch='${pitch}' rate='${rate}'>${text}</prosody></voice></speak>`;
|
|
351
|
+
const ssmlMsg = `X-RequestId:${requestId}\r\nX-Timestamp:${new Date().toISOString()}\r\nContent-Type:application/ssml+xml\r\nPath:ssml\r\n\r\n` + ssml;
|
|
352
|
+
ws.send(ssmlMsg);
|
|
353
|
+
});
|
|
354
|
+
|
|
355
|
+
ws.on('message', (data: Buffer, isBinary: boolean) => {
|
|
356
|
+
const textData = data.toString();
|
|
357
|
+
|
|
358
|
+
if (textData.includes('Path:turn.start')) {
|
|
359
|
+
// Start of turn
|
|
360
|
+
} else if (textData.includes('Path:turn.end')) {
|
|
361
|
+
// End of turn - Finish
|
|
362
|
+
ws.close();
|
|
363
|
+
const fullAudio = Buffer.concat(audioChunks);
|
|
364
|
+
const srt = buildSRT(wordBoundaries);
|
|
365
|
+
resolve({ audio: fullAudio, srt });
|
|
366
|
+
} else if (textData.includes('Path:audio.metadata')) {
|
|
367
|
+
// Parse Metadata (Word Boundaries)
|
|
368
|
+
try {
|
|
369
|
+
const parts = textData.split('\r\n\r\n');
|
|
370
|
+
if (parts.length > 1) {
|
|
371
|
+
const json = JSON.parse(parts[1]);
|
|
372
|
+
if (json.Metadata && Array.isArray(json.Metadata)) {
|
|
373
|
+
for (const meta of json.Metadata) {
|
|
374
|
+
if (meta.Type === 'Word') {
|
|
375
|
+
wordBoundaries.push({
|
|
376
|
+
offset: meta.Offset, // 100ns units
|
|
377
|
+
duration: meta.Duration,
|
|
378
|
+
text: meta.Text,
|
|
379
|
+
});
|
|
380
|
+
}
|
|
381
|
+
}
|
|
382
|
+
}
|
|
383
|
+
}
|
|
384
|
+
} catch (e) {
|
|
385
|
+
// Ignore parse errors
|
|
386
|
+
}
|
|
387
|
+
} else if (isBinary || (data.length > 2 && data[0] === 0x00 && data[1] === 0x67)) {
|
|
388
|
+
// Binary Audio Data
|
|
389
|
+
// The header is usually 2 bytes + requestID length + timestamp.
|
|
390
|
+
// Edge TTS binary format: Header length (2B) + Header + Audio Data
|
|
391
|
+
// We need to strip the header to get clean MP3.
|
|
392
|
+
// Header format: "X-RequestId:...\r\nPath:audio\r\n..."
|
|
393
|
+
|
|
394
|
+
// Simple Strip: Look for "Path:audio\r\n\r\n" in the buffer?
|
|
395
|
+
// Binary messages have a specific binary header structure.
|
|
396
|
+
// Structure: 2 bytes (header length) + Header Text + Data
|
|
397
|
+
const headerLen = data.readUInt16BE(0);
|
|
398
|
+
if (data.length > headerLen + 2) {
|
|
399
|
+
const audioData = data.slice(headerLen + 2);
|
|
400
|
+
audioChunks.push(audioData);
|
|
401
|
+
}
|
|
402
|
+
}
|
|
403
|
+
});
|
|
404
|
+
|
|
405
|
+
ws.on('error', (err) => {
|
|
406
|
+
reject(err);
|
|
407
|
+
});
|
|
408
|
+
});
|
|
409
|
+
}
|
|
410
|
+
|
|
411
|
+
function buildSRT(words: WordBoundary[]): string {
|
|
412
|
+
let srt = '';
|
|
413
|
+
let counter = 1;
|
|
414
|
+
// Group words into reasonable chunks if they are close?
|
|
415
|
+
// For now, Edge-TTS is word-level. Subtitles are usually sentence/phrase level.
|
|
416
|
+
// A simple heuristic: Combine 3-5 words or until pause.
|
|
417
|
+
// Wait, user asked for "perfect srt". Word-by-word SRT is annoying.
|
|
418
|
+
// Let's group max 40 characters or pause > 500ms.
|
|
419
|
+
|
|
420
|
+
let currentPhrase: WordBoundary[] = [];
|
|
421
|
+
let currentLength = 0;
|
|
422
|
+
|
|
423
|
+
const flushPhrase = () => {
|
|
424
|
+
if (currentPhrase.length === 0) return;
|
|
425
|
+
const startTick = currentPhrase[0].offset;
|
|
426
|
+
const endTick = currentPhrase[currentPhrase.length - 1].offset + currentPhrase[currentPhrase.length - 1].duration;
|
|
427
|
+
|
|
428
|
+
const startTime = ticksToTime(startTick);
|
|
429
|
+
const endTime = ticksToTime(endTick);
|
|
430
|
+
const text = currentPhrase.map(w => w.text).join(' ');
|
|
431
|
+
|
|
432
|
+
srt += `${counter++}\n${startTime} --> ${endTime}\n${text}\n\n`;
|
|
433
|
+
currentPhrase = [];
|
|
434
|
+
currentLength = 0;
|
|
435
|
+
};
|
|
436
|
+
|
|
437
|
+
for (let i = 0; i < words.length; i++) {
|
|
438
|
+
const w = words[i];
|
|
439
|
+
const prevW = i > 0 ? words[i - 1] : null;
|
|
440
|
+
|
|
441
|
+
// Check for pause (gap > 300ms = 3,000,000 ticks)
|
|
442
|
+
// 100ns units -> 1ms = 10,000 ticks. 300ms = 3,000,000.
|
|
443
|
+
if (prevW && (w.offset - (prevW.offset + prevW.duration) > 5000000)) {
|
|
444
|
+
flushPhrase();
|
|
445
|
+
}
|
|
446
|
+
|
|
447
|
+
currentPhrase.push(w);
|
|
448
|
+
currentLength += w.text.length;
|
|
449
|
+
|
|
450
|
+
if (currentLength > 40 || ['.', '?', '!'].includes(w.text.slice(-1))) {
|
|
451
|
+
flushPhrase();
|
|
452
|
+
}
|
|
453
|
+
}
|
|
454
|
+
flushPhrase();
|
|
455
|
+
|
|
456
|
+
return srt;
|
|
457
|
+
}
|
|
458
|
+
|
|
459
|
+
function ticksToTime(ticks: number): string {
|
|
460
|
+
// 1 tick = 100ns = 0.0001ms
|
|
461
|
+
const ms = ticks / 10000;
|
|
462
|
+
const date = new Date(0, 0, 0, 0, 0, 0, ms);
|
|
463
|
+
const h = date.getHours().toString().padStart(2, '0');
|
|
464
|
+
const m = date.getMinutes().toString().padStart(2, '0');
|
|
465
|
+
const s = date.getSeconds().toString().padStart(2, '0');
|
|
466
|
+
const mili = date.getMilliseconds().toString().padStart(3, '0');
|
|
467
|
+
return `${h}:${m}:${s},${mili}`;
|
|
468
|
+
}
|
|
469
|
+
|
|
470
|
+
// --------------------------------------------------------------------------
|
|
471
|
+
// HEURISTIC SRT IMPLEMENTATION (For System Command)
|
|
472
|
+
// --------------------------------------------------------------------------
|
|
473
|
+
function generateHeuristicSRT(text: string, byteLength: number): string {
|
|
474
|
+
// Estimate duration assuming typical MP3/WAV bitrate.
|
|
475
|
+
// Actually, system command usually produces WAV (PCM).
|
|
476
|
+
// Wrapper might produce MP3. Let's assume user command output.
|
|
477
|
+
// It is safer to assume ~15 chars per second reading speed if we don't know duration.
|
|
478
|
+
// Or assume 16000 bytes/sec for mono 16khz? Too unreliable.
|
|
479
|
+
// Let's use text length heuristic: Avg reading speed 150 wpm ~ 2.5 words/sec ~ 15 chars/sec?
|
|
480
|
+
// Let's try 15 chars / second.
|
|
481
|
+
|
|
482
|
+
const totalDurationSec = text.length / 15;
|
|
483
|
+
// Ideally we'd use 'ffprobe' to get exact duration, but let's stick to pure TS for now.
|
|
484
|
+
// If we really wanted to be robust, we'd add 'ffprobe' execution here.
|
|
485
|
+
|
|
486
|
+
const sentences = text.match(/[^.!?]+[.!?]*/g) || [text];
|
|
487
|
+
let currentStartTime = 0;
|
|
488
|
+
let srt = '';
|
|
489
|
+
let counter = 1;
|
|
490
|
+
|
|
491
|
+
const msToSrt = (ms: number) => {
|
|
492
|
+
const date = new Date(0, 0, 0, 0, 0, 0, ms);
|
|
493
|
+
return `${date.getHours().toString().padStart(2, '0')}:${date.getMinutes().toString().padStart(2, '0')}:${date.getSeconds().toString().padStart(2, '0')},${date.getMilliseconds().toString().padStart(3, '0')}`;
|
|
494
|
+
};
|
|
495
|
+
|
|
496
|
+
for (const sentence of sentences) {
|
|
497
|
+
const sentenceDuration = (sentence.length / text.length) * (totalDurationSec * 1000);
|
|
498
|
+
const endTime = currentStartTime + sentenceDuration;
|
|
499
|
+
|
|
500
|
+
srt += `${counter++}\n${msToSrt(currentStartTime)} --> ${msToSrt(endTime)}\n${sentence.trim()}\n\n`;
|
|
501
|
+
currentStartTime = endTime;
|
|
502
|
+
}
|
|
503
|
+
|
|
504
|
+
return srt;
|
|
505
|
+
}
|
package/package.json
ADDED
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "n8n-nodes-tts-bigboss",
|
|
3
|
+
"version": "1.0.0",
|
|
4
|
+
"description": "BigBoss TTS node with multi-engine support and automatic SRT generation",
|
|
5
|
+
"keywords": [
|
|
6
|
+
"n8n-community-node-package",
|
|
7
|
+
"n8n",
|
|
8
|
+
"tts",
|
|
9
|
+
"audio",
|
|
10
|
+
"srt",
|
|
11
|
+
"arabic",
|
|
12
|
+
"edge-tts",
|
|
13
|
+
"piper"
|
|
14
|
+
],
|
|
15
|
+
"license": "MIT",
|
|
16
|
+
"author": "isemo007",
|
|
17
|
+
"main": "index.js",
|
|
18
|
+
"scripts": {
|
|
19
|
+
"build": "tsc && gulp build:icons",
|
|
20
|
+
"dev": "tsc --watch",
|
|
21
|
+
"lint": "tslint -p tsconfig.json -c tslint.json",
|
|
22
|
+
"format": "prettier nodes --write"
|
|
23
|
+
},
|
|
24
|
+
"n8n": {
|
|
25
|
+
"n8nNodesApiVersion": 1,
|
|
26
|
+
"nodes": [
|
|
27
|
+
"dist/nodes/TTSBigBoss/TTSBigBoss.node.js"
|
|
28
|
+
]
|
|
29
|
+
},
|
|
30
|
+
"peerDependencies": {
|
|
31
|
+
"n8n-workflow": "*"
|
|
32
|
+
},
|
|
33
|
+
"dependencies": {
|
|
34
|
+
"lodash": "^4.17.21",
|
|
35
|
+
"n8n-core": "^1.75.0",
|
|
36
|
+
"n8n-workflow": "^1.70.0",
|
|
37
|
+
"uuid": "^9.0.0",
|
|
38
|
+
"ws": "^8.13.0"
|
|
39
|
+
},
|
|
40
|
+
"devDependencies": {
|
|
41
|
+
"@types/lodash": "^4.14.195",
|
|
42
|
+
"@types/node": "^22.13.0",
|
|
43
|
+
"@types/uuid": "^9.0.0",
|
|
44
|
+
"@types/ws": "^8.5.5",
|
|
45
|
+
"copy-webpack-plugin": "^11.0.0",
|
|
46
|
+
"gulp": "^4.0.2",
|
|
47
|
+
"npm-run-all": "^4.1.5",
|
|
48
|
+
"prettier": "^2.8.8",
|
|
49
|
+
"tslint": "^6.1.3",
|
|
50
|
+
"typescript": "^5.0.0",
|
|
51
|
+
"webpack": "^5.88.0",
|
|
52
|
+
"webpack-cli": "^5.1.4"
|
|
53
|
+
}
|
|
54
|
+
}
|
package/tsconfig.json
ADDED
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
{
|
|
2
|
+
"compilerOptions": {
|
|
3
|
+
"rootDirs": [
|
|
4
|
+
"nodes",
|
|
5
|
+
"credentials"
|
|
6
|
+
],
|
|
7
|
+
"outDir": "dist",
|
|
8
|
+
"lib": [
|
|
9
|
+
"es2019",
|
|
10
|
+
"dom"
|
|
11
|
+
],
|
|
12
|
+
"target": "es2019",
|
|
13
|
+
"module": "commonjs",
|
|
14
|
+
"moduleResolution": "node",
|
|
15
|
+
"baseUrl": "./",
|
|
16
|
+
"paths": {
|
|
17
|
+
"n8n-workflow": [
|
|
18
|
+
"node_modules/n8n-workflow"
|
|
19
|
+
],
|
|
20
|
+
"n8n-core": [
|
|
21
|
+
"node_modules/n8n-core"
|
|
22
|
+
]
|
|
23
|
+
},
|
|
24
|
+
"esModuleInterop": true,
|
|
25
|
+
"skipLibCheck": true,
|
|
26
|
+
"experimentalDecorators": true,
|
|
27
|
+
"emitDecoratorMetadata": true,
|
|
28
|
+
"removeComments": true,
|
|
29
|
+
"resolveJsonModule": true
|
|
30
|
+
},
|
|
31
|
+
"include": [
|
|
32
|
+
"nodes/**/*.ts",
|
|
33
|
+
"credentials/**/*.ts"
|
|
34
|
+
],
|
|
35
|
+
"exclude": [
|
|
36
|
+
"node_modules",
|
|
37
|
+
"dist"
|
|
38
|
+
]
|
|
39
|
+
}
|