n8n-nodes-tts-bigboss 1.0.7 → 1.0.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/TTSBigBoss.node.js +153 -62
- package/nodes/TTSBigBoss/TTSBigBoss.node.ts +185 -97
- package/package.json +1 -1
package/dist/TTSBigBoss.node.js
CHANGED
|
@@ -73,6 +73,16 @@ const PIPER_MODELS = [
|
|
|
73
73
|
{ name: 'German - Thorsten (Male) - Low', value: 'de_DE-thorsten-low' },
|
|
74
74
|
];
|
|
75
75
|
const EDGE_URL = 'wss://speech.platform.bing.com/consumer/speech/synthesize/readaloud/edge/v1?TrustedClientToken=6A5AA1D4EAFF4E9FB37E23D68491D6F4';
|
|
76
|
+
const EDGE_HEADERS = {
|
|
77
|
+
'Authority': 'speech.platform.bing.com',
|
|
78
|
+
'Sec-CH-UA': '"Not_A Brand";v="8", "Chromium";v="120", "Microsoft Edge";v="120"',
|
|
79
|
+
'Sec-CH-UA-Mobile': '?0',
|
|
80
|
+
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36 Edg/120.0.0.0',
|
|
81
|
+
'Sec-CH-UA-Platform': '"Windows"',
|
|
82
|
+
'Accept-Encoding': 'gzip, deflate, br',
|
|
83
|
+
'Accept-Language': 'en-US,en;q=0.9',
|
|
84
|
+
'Origin': 'chrome-extension://jdiccldimpdaibmpdkjnbmckianbfold'
|
|
85
|
+
};
|
|
76
86
|
const EDGE_VOICES = [
|
|
77
87
|
{ name: 'Arabic (Egypt) - Salma', value: 'ar-EG-SalmaNeural' },
|
|
78
88
|
{ name: 'Arabic (Egypt) - Shakir', value: 'ar-EG-ShakirNeural' },
|
|
@@ -127,6 +137,11 @@ class TTSBigBoss {
|
|
|
127
137
|
value: 'coqui',
|
|
128
138
|
description: 'Connect to a running Coqui TTS/XTTS server.',
|
|
129
139
|
},
|
|
140
|
+
{
|
|
141
|
+
name: 'Kokoro TTS (Local OpenAI API)',
|
|
142
|
+
value: 'kokoro',
|
|
143
|
+
description: 'Connect to a local Kokoro server compatible with OpenAI API (e.g. /v1/audio/speech).',
|
|
144
|
+
},
|
|
130
145
|
{
|
|
131
146
|
name: 'System Command (Custom)',
|
|
132
147
|
value: 'system',
|
|
@@ -276,12 +291,47 @@ class TTSBigBoss {
|
|
|
276
291
|
},
|
|
277
292
|
description: 'Name from Hugging Face (e.g. en_US-bryce-medium) or full URL to .onnx file.',
|
|
278
293
|
},
|
|
294
|
+
{
|
|
295
|
+
displayName: 'API URL',
|
|
296
|
+
name: 'kokoroUrl',
|
|
297
|
+
type: 'string',
|
|
298
|
+
default: 'http://localhost:8880/v1/audio/speech',
|
|
299
|
+
description: 'Endpoint URL for Kokoro generation (OpenAI compatible).',
|
|
300
|
+
displayOptions: {
|
|
301
|
+
show: {
|
|
302
|
+
engine: ['kokoro'],
|
|
303
|
+
},
|
|
304
|
+
},
|
|
305
|
+
},
|
|
306
|
+
{
|
|
307
|
+
displayName: 'Voice / Model',
|
|
308
|
+
name: 'kokoroVoice',
|
|
309
|
+
type: 'string',
|
|
310
|
+
default: 'af_bella',
|
|
311
|
+
description: 'Voice ID (e.g. af_bella, af_sarah, am_adam). Arabic might require specific model ID.',
|
|
312
|
+
displayOptions: {
|
|
313
|
+
show: {
|
|
314
|
+
engine: ['kokoro'],
|
|
315
|
+
},
|
|
316
|
+
},
|
|
317
|
+
},
|
|
318
|
+
{
|
|
319
|
+
displayName: 'Speed',
|
|
320
|
+
name: 'kokoroSpeed',
|
|
321
|
+
type: 'number',
|
|
322
|
+
default: 1.0,
|
|
323
|
+
displayOptions: {
|
|
324
|
+
show: {
|
|
325
|
+
engine: ['kokoro'],
|
|
326
|
+
},
|
|
327
|
+
},
|
|
328
|
+
},
|
|
279
329
|
{
|
|
280
330
|
displayName: 'Base Server URL',
|
|
281
331
|
name: 'coquiUrl',
|
|
282
332
|
type: 'string',
|
|
283
|
-
default: 'http://
|
|
284
|
-
description: 'Base URL of Coqui server (e.g. http://
|
|
333
|
+
default: 'http://localhost:5002',
|
|
334
|
+
description: 'Base URL of Coqui server (e.g. http://localhost:5002 or http://host.docker.internal:5002).',
|
|
285
335
|
displayOptions: {
|
|
286
336
|
show: {
|
|
287
337
|
engine: ['coqui'],
|
|
@@ -434,6 +484,21 @@ class TTSBigBoss {
|
|
|
434
484
|
srtBuffer = Buffer.from(result.srt, 'utf8');
|
|
435
485
|
}
|
|
436
486
|
}
|
|
487
|
+
else if (engine === 'kokoro') {
|
|
488
|
+
const url = this.getNodeParameter('kokoroUrl', i);
|
|
489
|
+
const voice = this.getNodeParameter('kokoroVoice', i);
|
|
490
|
+
const speed = this.getNodeParameter('kokoroSpeed', i);
|
|
491
|
+
const payload = {
|
|
492
|
+
model: 'kokoro',
|
|
493
|
+
input: text,
|
|
494
|
+
voice: voice,
|
|
495
|
+
speed: speed,
|
|
496
|
+
response_format: 'mp3'
|
|
497
|
+
};
|
|
498
|
+
audioBuffer = await httpRequest(url, 'POST', payload);
|
|
499
|
+
const duration = getAudioDuration(audioBuffer, 'mp3');
|
|
500
|
+
srtBuffer = Buffer.from(generateHeuristicSRT(text, duration), 'utf8');
|
|
501
|
+
}
|
|
437
502
|
else if (engine === 'piper_local') {
|
|
438
503
|
let piperModel = this.getNodeParameter('piperModel', i);
|
|
439
504
|
if (piperModel === 'custom') {
|
|
@@ -456,7 +521,7 @@ class TTSBigBoss {
|
|
|
456
521
|
if (code === 0)
|
|
457
522
|
resolve();
|
|
458
523
|
if (errData.includes('json.exception.parse_error')) {
|
|
459
|
-
reject(new Error(`Piper Config Error: The downloaded JSON configuration for model '${piperModel}' seems corrupted
|
|
524
|
+
reject(new Error(`Piper Config Error: The downloaded JSON configuration for model '${piperModel}' seems corrupted. Try deleting the file at ${configPath}.`));
|
|
460
525
|
}
|
|
461
526
|
else {
|
|
462
527
|
reject(new Error(`Piper failed (exit ${code}): ${errData}`));
|
|
@@ -467,7 +532,8 @@ class TTSBigBoss {
|
|
|
467
532
|
if (!fs.existsSync(outFile))
|
|
468
533
|
throw new Error('Piper did not produce output file');
|
|
469
534
|
audioBuffer = fs.readFileSync(outFile);
|
|
470
|
-
|
|
535
|
+
const duration = getAudioDuration(audioBuffer, 'wav');
|
|
536
|
+
srtBuffer = Buffer.from(generateHeuristicSRT(text, duration), 'utf8');
|
|
471
537
|
fs.unlinkSync(outFile);
|
|
472
538
|
}
|
|
473
539
|
else if (engine === 'coqui') {
|
|
@@ -488,7 +554,8 @@ class TTSBigBoss {
|
|
|
488
554
|
payload.speaker_id = speakerSelection;
|
|
489
555
|
}
|
|
490
556
|
audioBuffer = await httpRequest(url, 'POST', payload);
|
|
491
|
-
|
|
557
|
+
const duration = getAudioDuration(audioBuffer, 'wav');
|
|
558
|
+
srtBuffer = Buffer.from(generateHeuristicSRT(text, duration), 'utf8');
|
|
492
559
|
}
|
|
493
560
|
else {
|
|
494
561
|
const commandTpl = this.getNodeParameter('systemCommand', i);
|
|
@@ -522,7 +589,8 @@ class TTSBigBoss {
|
|
|
522
589
|
throw new Error('System command did not produce output file at expected path');
|
|
523
590
|
}
|
|
524
591
|
audioBuffer = fs.readFileSync(outFile);
|
|
525
|
-
|
|
592
|
+
const duration = getAudioDuration(audioBuffer);
|
|
593
|
+
srtBuffer = Buffer.from(generateHeuristicSRT(text, duration), 'utf8');
|
|
526
594
|
if (fs.existsSync(outFile))
|
|
527
595
|
fs.unlinkSync(outFile);
|
|
528
596
|
}
|
|
@@ -667,23 +735,41 @@ function ticksToTime(ticks) {
|
|
|
667
735
|
const mili = date.getMilliseconds().toString().padStart(3, '0');
|
|
668
736
|
return `${h}:${m}:${s},${mili}`;
|
|
669
737
|
}
|
|
670
|
-
function
|
|
671
|
-
|
|
672
|
-
|
|
673
|
-
|
|
674
|
-
|
|
675
|
-
|
|
676
|
-
|
|
677
|
-
const
|
|
678
|
-
|
|
679
|
-
|
|
680
|
-
|
|
681
|
-
|
|
682
|
-
|
|
683
|
-
|
|
684
|
-
|
|
685
|
-
|
|
686
|
-
|
|
738
|
+
async function downloadFile(url, dest) {
|
|
739
|
+
return new Promise((resolve, reject) => {
|
|
740
|
+
const file = fs.createWriteStream(dest);
|
|
741
|
+
file.on('error', (err) => {
|
|
742
|
+
fs.unlink(dest, () => { });
|
|
743
|
+
reject(new Error(`File write error: ${err.message}`));
|
|
744
|
+
});
|
|
745
|
+
const request = https.get(url, (response) => {
|
|
746
|
+
if (response.statusCode === 302 || response.statusCode === 301) {
|
|
747
|
+
file.close();
|
|
748
|
+
downloadFile(response.headers.location, dest).then(resolve).catch(reject);
|
|
749
|
+
return;
|
|
750
|
+
}
|
|
751
|
+
if (response.statusCode && response.statusCode !== 200) {
|
|
752
|
+
file.close();
|
|
753
|
+
fs.unlink(dest, () => { });
|
|
754
|
+
reject(new Error(`Download failed with status code: ${response.statusCode} for URL: ${url}`));
|
|
755
|
+
return;
|
|
756
|
+
}
|
|
757
|
+
response.pipe(file);
|
|
758
|
+
file.on('finish', () => {
|
|
759
|
+
file.close((err) => {
|
|
760
|
+
if (err)
|
|
761
|
+
reject(err);
|
|
762
|
+
else
|
|
763
|
+
resolve();
|
|
764
|
+
});
|
|
765
|
+
});
|
|
766
|
+
});
|
|
767
|
+
request.on('error', (err) => {
|
|
768
|
+
file.close();
|
|
769
|
+
fs.unlink(dest, () => { });
|
|
770
|
+
reject(new Error(`Network error: ${err.message}`));
|
|
771
|
+
});
|
|
772
|
+
});
|
|
687
773
|
}
|
|
688
774
|
async function ensurePiperBinary(binDir) {
|
|
689
775
|
const platform = os.platform();
|
|
@@ -735,15 +821,15 @@ async function ensurePiperModel(binDir, modelNameOrUrl) {
|
|
|
735
821
|
else {
|
|
736
822
|
const parts = modelNameOrUrl.split('-');
|
|
737
823
|
if (parts.length >= 3) {
|
|
738
|
-
const langRegion = parts[0]
|
|
739
|
-
const voice = parts[
|
|
740
|
-
const quality = parts[
|
|
741
|
-
const lang =
|
|
824
|
+
const langRegion = parts[0];
|
|
825
|
+
const voice = parts[1];
|
|
826
|
+
const quality = parts[2];
|
|
827
|
+
const lang = langRegion.split('_')[0];
|
|
742
828
|
modelFilename = modelNameOrUrl + '.onnx';
|
|
743
829
|
modelUrl = `https://huggingface.co/rhasspy/piper-voices/resolve/main/${lang}/${langRegion}/${voice}/${quality}/${modelFilename}?download=true`;
|
|
744
830
|
}
|
|
745
831
|
else {
|
|
746
|
-
throw new Error(`Invalid model name format: ${modelNameOrUrl}
|
|
832
|
+
throw new Error(`Invalid model name format: ${modelNameOrUrl}.`);
|
|
747
833
|
}
|
|
748
834
|
}
|
|
749
835
|
const modelPath = path.join(binDir, modelFilename);
|
|
@@ -772,41 +858,46 @@ async function ensurePiperModel(binDir, modelNameOrUrl) {
|
|
|
772
858
|
}
|
|
773
859
|
return { modelPath, configPath };
|
|
774
860
|
}
|
|
775
|
-
|
|
776
|
-
|
|
777
|
-
|
|
778
|
-
|
|
779
|
-
|
|
780
|
-
|
|
781
|
-
|
|
782
|
-
|
|
783
|
-
|
|
784
|
-
file.close();
|
|
785
|
-
downloadFile(response.headers.location, dest).then(resolve).catch(reject);
|
|
786
|
-
return;
|
|
787
|
-
}
|
|
788
|
-
if (response.statusCode && response.statusCode !== 200) {
|
|
789
|
-
file.close();
|
|
790
|
-
fs.unlink(dest, () => { });
|
|
791
|
-
reject(new Error(`Download failed with status code: ${response.statusCode} for URL: ${url}`));
|
|
792
|
-
return;
|
|
861
|
+
function getAudioDuration(buffer, hint = null) {
|
|
862
|
+
if (!buffer || buffer.length === 0)
|
|
863
|
+
return -1;
|
|
864
|
+
if ((hint === 'wav') || (buffer.length > 12 && buffer.toString('ascii', 0, 4) === 'RIFF' && buffer.toString('ascii', 8, 12) === 'WAVE')) {
|
|
865
|
+
try {
|
|
866
|
+
const byteRate = buffer.readUInt32LE(28);
|
|
867
|
+
if (byteRate > 0) {
|
|
868
|
+
const dataSize = buffer.length - 44;
|
|
869
|
+
return dataSize / byteRate;
|
|
793
870
|
}
|
|
794
|
-
|
|
795
|
-
|
|
796
|
-
|
|
797
|
-
|
|
798
|
-
|
|
799
|
-
|
|
800
|
-
|
|
801
|
-
|
|
802
|
-
|
|
803
|
-
|
|
804
|
-
|
|
805
|
-
|
|
806
|
-
|
|
807
|
-
|
|
808
|
-
|
|
809
|
-
|
|
871
|
+
}
|
|
872
|
+
catch (e) { }
|
|
873
|
+
}
|
|
874
|
+
return -1;
|
|
875
|
+
}
|
|
876
|
+
function generateHeuristicSRT(text, durationSeconds) {
|
|
877
|
+
if (durationSeconds <= 0) {
|
|
878
|
+
durationSeconds = text.length / 14;
|
|
879
|
+
}
|
|
880
|
+
const sentences = text.match(/[^.!?]+[.!?]*/g) || [text];
|
|
881
|
+
const totalContentLen = text.length;
|
|
882
|
+
let currentStartTime = 0;
|
|
883
|
+
let srt = '';
|
|
884
|
+
let counter = 1;
|
|
885
|
+
const msToSrt = (ms) => {
|
|
886
|
+
const totalSec = Math.floor(ms / 1000);
|
|
887
|
+
const mili = Math.floor(ms % 1000);
|
|
888
|
+
const h = Math.floor(totalSec / 3600);
|
|
889
|
+
const m = Math.floor((totalSec % 3600) / 60);
|
|
890
|
+
const s = totalSec % 60;
|
|
891
|
+
return `${h.toString().padStart(2, '0')}:${m.toString().padStart(2, '0')}:${s.toString().padStart(2, '0')},${mili.toString().padStart(3, '0')}`;
|
|
892
|
+
};
|
|
893
|
+
for (const sentence of sentences) {
|
|
894
|
+
const sentenceRatio = sentence.length / totalContentLen;
|
|
895
|
+
const sentenceDuration = sentenceRatio * durationSeconds;
|
|
896
|
+
const endTime = currentStartTime + sentenceDuration;
|
|
897
|
+
srt += `${counter++}\n${msToSrt(currentStartTime * 1000)} --> ${msToSrt(endTime * 1000)}\n${sentence.trim()}\n\n`;
|
|
898
|
+
currentStartTime = endTime;
|
|
899
|
+
}
|
|
900
|
+
return srt;
|
|
810
901
|
}
|
|
811
902
|
async function httpRequest(url, method = 'GET', body = null) {
|
|
812
903
|
const requestModule = url.startsWith('https') ? https : http;
|
|
@@ -61,6 +61,16 @@ const PIPER_MODELS = [
|
|
|
61
61
|
|
|
62
62
|
// Edge TTS Constants
|
|
63
63
|
const EDGE_URL = 'wss://speech.platform.bing.com/consumer/speech/synthesize/readaloud/edge/v1?TrustedClientToken=6A5AA1D4EAFF4E9FB37E23D68491D6F4';
|
|
64
|
+
const EDGE_HEADERS = {
|
|
65
|
+
'Authority': 'speech.platform.bing.com',
|
|
66
|
+
'Sec-CH-UA': '"Not_A Brand";v="8", "Chromium";v="120", "Microsoft Edge";v="120"',
|
|
67
|
+
'Sec-CH-UA-Mobile': '?0',
|
|
68
|
+
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36 Edg/120.0.0.0',
|
|
69
|
+
'Sec-CH-UA-Platform': '"Windows"',
|
|
70
|
+
'Accept-Encoding': 'gzip, deflate, br',
|
|
71
|
+
'Accept-Language': 'en-US,en;q=0.9',
|
|
72
|
+
'Origin': 'chrome-extension://jdiccldimpdaibmpdkjnbmckianbfold'
|
|
73
|
+
};
|
|
64
74
|
const EDGE_VOICES = [
|
|
65
75
|
// Arabic
|
|
66
76
|
{ name: 'Arabic (Egypt) - Salma', value: 'ar-EG-SalmaNeural' },
|
|
@@ -127,6 +137,11 @@ export class TTSBigBoss implements INodeType {
|
|
|
127
137
|
value: 'coqui',
|
|
128
138
|
description: 'Connect to a running Coqui TTS/XTTS server.',
|
|
129
139
|
},
|
|
140
|
+
{
|
|
141
|
+
name: 'Kokoro TTS (Local OpenAI API)',
|
|
142
|
+
value: 'kokoro',
|
|
143
|
+
description: 'Connect to a local Kokoro server compatible with OpenAI API (e.g. /v1/audio/speech).',
|
|
144
|
+
},
|
|
130
145
|
{
|
|
131
146
|
name: 'System Command (Custom)',
|
|
132
147
|
value: 'system',
|
|
@@ -289,14 +304,52 @@ export class TTSBigBoss implements INodeType {
|
|
|
289
304
|
description: 'Name from Hugging Face (e.g. en_US-bryce-medium) or full URL to .onnx file.',
|
|
290
305
|
},
|
|
291
306
|
// ----------------------------------
|
|
307
|
+
// Kokoro Settings
|
|
308
|
+
// ----------------------------------
|
|
309
|
+
{
|
|
310
|
+
displayName: 'API URL',
|
|
311
|
+
name: 'kokoroUrl',
|
|
312
|
+
type: 'string',
|
|
313
|
+
default: 'http://localhost:8880/v1/audio/speech',
|
|
314
|
+
description: 'Endpoint URL for Kokoro generation (OpenAI compatible).',
|
|
315
|
+
displayOptions: {
|
|
316
|
+
show: {
|
|
317
|
+
engine: ['kokoro'],
|
|
318
|
+
},
|
|
319
|
+
},
|
|
320
|
+
},
|
|
321
|
+
{
|
|
322
|
+
displayName: 'Voice / Model',
|
|
323
|
+
name: 'kokoroVoice',
|
|
324
|
+
type: 'string',
|
|
325
|
+
default: 'af_bella',
|
|
326
|
+
description: 'Voice ID (e.g. af_bella, af_sarah, am_adam). Arabic might require specific model ID.',
|
|
327
|
+
displayOptions: {
|
|
328
|
+
show: {
|
|
329
|
+
engine: ['kokoro'],
|
|
330
|
+
},
|
|
331
|
+
},
|
|
332
|
+
},
|
|
333
|
+
{
|
|
334
|
+
displayName: 'Speed',
|
|
335
|
+
name: 'kokoroSpeed',
|
|
336
|
+
type: 'number',
|
|
337
|
+
default: 1.0,
|
|
338
|
+
displayOptions: {
|
|
339
|
+
show: {
|
|
340
|
+
engine: ['kokoro'],
|
|
341
|
+
},
|
|
342
|
+
},
|
|
343
|
+
},
|
|
344
|
+
// ----------------------------------
|
|
292
345
|
// Coqui Server Settings
|
|
293
346
|
// ----------------------------------
|
|
294
347
|
{
|
|
295
348
|
displayName: 'Base Server URL',
|
|
296
349
|
name: 'coquiUrl',
|
|
297
350
|
type: 'string',
|
|
298
|
-
default: 'http://
|
|
299
|
-
description: 'Base URL of Coqui server (e.g. http://
|
|
351
|
+
default: 'http://localhost:5002',
|
|
352
|
+
description: 'Base URL of Coqui server (e.g. http://localhost:5002 or http://host.docker.internal:5002).',
|
|
300
353
|
displayOptions: {
|
|
301
354
|
show: {
|
|
302
355
|
engine: ['coqui'],
|
|
@@ -461,6 +514,28 @@ export class TTSBigBoss implements INodeType {
|
|
|
461
514
|
srtBuffer = Buffer.from(result.srt, 'utf8');
|
|
462
515
|
}
|
|
463
516
|
|
|
517
|
+
} else if (engine === 'kokoro') {
|
|
518
|
+
// ----------------------------------
|
|
519
|
+
// KOKORO EXECUTION
|
|
520
|
+
// ----------------------------------
|
|
521
|
+
const url = this.getNodeParameter('kokoroUrl', i) as string;
|
|
522
|
+
const voice = this.getNodeParameter('kokoroVoice', i) as string;
|
|
523
|
+
const speed = this.getNodeParameter('kokoroSpeed', i) as number;
|
|
524
|
+
|
|
525
|
+
// Standard OpenAI 'createSpeech' payload
|
|
526
|
+
const payload = {
|
|
527
|
+
model: 'kokoro', // or whatever the server expects
|
|
528
|
+
input: text,
|
|
529
|
+
voice: voice,
|
|
530
|
+
speed: speed,
|
|
531
|
+
response_format: 'mp3'
|
|
532
|
+
};
|
|
533
|
+
|
|
534
|
+
audioBuffer = await httpRequest(url, 'POST', payload);
|
|
535
|
+
|
|
536
|
+
const duration = getAudioDuration(audioBuffer, 'mp3');
|
|
537
|
+
srtBuffer = Buffer.from(generateHeuristicSRT(text, duration), 'utf8');
|
|
538
|
+
|
|
464
539
|
} else if (engine === 'piper_local') {
|
|
465
540
|
// ----------------------------------
|
|
466
541
|
// PIPER LOCAL AUTOMATION
|
|
@@ -478,8 +553,6 @@ export class TTSBigBoss implements INodeType {
|
|
|
478
553
|
|
|
479
554
|
// 3. Execute
|
|
480
555
|
const outFile = path.join(tempDir, `piper_out_${uuidv4()}.wav`);
|
|
481
|
-
// Piper command: echo "text" | piper --model model.onnx --output_file out.wav
|
|
482
|
-
// We use child_process.spawn to pipe text safely
|
|
483
556
|
|
|
484
557
|
await new Promise<void>((resolve, reject) => {
|
|
485
558
|
const piperProc = child_process.spawn(piperBinPath, [
|
|
@@ -496,9 +569,8 @@ export class TTSBigBoss implements INodeType {
|
|
|
496
569
|
|
|
497
570
|
piperProc.on('close', (code) => {
|
|
498
571
|
if (code === 0) resolve();
|
|
499
|
-
// Check for the specific JSON error in stderr
|
|
500
572
|
if (errData.includes('json.exception.parse_error')) {
|
|
501
|
-
reject(new Error(`Piper Config Error: The downloaded JSON configuration for model '${piperModel}' seems corrupted
|
|
573
|
+
reject(new Error(`Piper Config Error: The downloaded JSON configuration for model '${piperModel}' seems corrupted. Try deleting the file at ${configPath}.`));
|
|
502
574
|
} else {
|
|
503
575
|
reject(new Error(`Piper failed (exit ${code}): ${errData}`));
|
|
504
576
|
}
|
|
@@ -510,7 +582,8 @@ export class TTSBigBoss implements INodeType {
|
|
|
510
582
|
if (!fs.existsSync(outFile)) throw new Error('Piper did not produce output file');
|
|
511
583
|
|
|
512
584
|
audioBuffer = fs.readFileSync(outFile);
|
|
513
|
-
|
|
585
|
+
const duration = getAudioDuration(audioBuffer, 'wav');
|
|
586
|
+
srtBuffer = Buffer.from(generateHeuristicSRT(text, duration), 'utf8');
|
|
514
587
|
|
|
515
588
|
fs.unlinkSync(outFile);
|
|
516
589
|
|
|
@@ -526,7 +599,6 @@ export class TTSBigBoss implements INodeType {
|
|
|
526
599
|
const wavPath = this.getNodeParameter('coquiWavPath', i, '') as string;
|
|
527
600
|
const lang = this.getNodeParameter('coquiLang', i) as string;
|
|
528
601
|
|
|
529
|
-
// Construct Payload
|
|
530
602
|
const payload: any = {
|
|
531
603
|
text: text,
|
|
532
604
|
language_id: lang,
|
|
@@ -538,9 +610,9 @@ export class TTSBigBoss implements INodeType {
|
|
|
538
610
|
payload.speaker_id = speakerSelection;
|
|
539
611
|
}
|
|
540
612
|
|
|
541
|
-
// Execute Request
|
|
542
613
|
audioBuffer = await httpRequest(url, 'POST', payload);
|
|
543
|
-
|
|
614
|
+
const duration = getAudioDuration(audioBuffer, 'wav');
|
|
615
|
+
srtBuffer = Buffer.from(generateHeuristicSRT(text, duration), 'utf8');
|
|
544
616
|
|
|
545
617
|
} else {
|
|
546
618
|
// ----------------------------------
|
|
@@ -556,7 +628,6 @@ export class TTSBigBoss implements INodeType {
|
|
|
556
628
|
.replace(/"{text}"/g, `"${text.replace(/"/g, '\\"')}"`) // Basic escape
|
|
557
629
|
.replace(/{text}/g, `"${text.replace(/"/g, '\\"')}"`);
|
|
558
630
|
|
|
559
|
-
// Handle Clone Input
|
|
560
631
|
if (useClone) {
|
|
561
632
|
const cloneProp = this.getNodeParameter('cloneInputProperty', i) as string;
|
|
562
633
|
const cloneData = await this.helpers.getBinaryDataBuffer(i, cloneProp);
|
|
@@ -567,7 +638,6 @@ export class TTSBigBoss implements INodeType {
|
|
|
567
638
|
.replace(/{reference_audio}/g, `"${cloneFile}"`);
|
|
568
639
|
}
|
|
569
640
|
|
|
570
|
-
// Execute
|
|
571
641
|
await new Promise((resolve, reject) => {
|
|
572
642
|
child_process.exec(cmd, (error, stdout, stderr) => {
|
|
573
643
|
if (error) {
|
|
@@ -583,9 +653,8 @@ export class TTSBigBoss implements INodeType {
|
|
|
583
653
|
}
|
|
584
654
|
|
|
585
655
|
audioBuffer = fs.readFileSync(outFile);
|
|
586
|
-
|
|
587
|
-
|
|
588
|
-
srtBuffer = Buffer.from(generateHeuristicSRT(text, audioBuffer.length), 'utf8');
|
|
656
|
+
const duration = getAudioDuration(audioBuffer);
|
|
657
|
+
srtBuffer = Buffer.from(generateHeuristicSRT(text, duration), 'utf8');
|
|
589
658
|
|
|
590
659
|
// Cleanup
|
|
591
660
|
if (fs.existsSync(outFile)) fs.unlinkSync(outFile);
|
|
@@ -784,41 +853,43 @@ function ticksToTime(ticks: number): string {
|
|
|
784
853
|
return `${h}:${m}:${s},${mili}`;
|
|
785
854
|
}
|
|
786
855
|
|
|
787
|
-
//
|
|
788
|
-
// HEURISTIC
|
|
789
|
-
//
|
|
790
|
-
function generateHeuristicSRT(text: string, byteLength: number): string {
|
|
791
|
-
// Estimate duration assuming typical MP3/WAV bitrate.
|
|
792
|
-
// Actually, system command usually produces WAV (PCM).
|
|
793
|
-
// Wrapper might produce MP3. Let's assume user command output.
|
|
794
|
-
// It is safer to assume ~15 chars per second reading speed if we don't know duration.
|
|
795
|
-
// Or assume 16000 bytes/sec for mono 16khz? Too unreliable.
|
|
796
|
-
// Let's use text length heuristic: Avg reading speed 150 wpm ~ 2.5 words/sec ~ 15 chars/sec?
|
|
797
|
-
// Let's try 15 chars / second.
|
|
798
|
-
|
|
799
|
-
const totalDurationSec = text.length / 15;
|
|
800
|
-
// Ideally we'd use 'ffprobe' to get exact duration, but let's stick to pure TS for now.
|
|
801
|
-
// If we really wanted to be robust, we'd add 'ffprobe' execution here.
|
|
856
|
+
// ----------------------------------
|
|
857
|
+
// OLD HEURISTIC REMOVED
|
|
858
|
+
// ----------------------------------
|
|
802
859
|
|
|
803
|
-
|
|
804
|
-
|
|
805
|
-
|
|
806
|
-
|
|
807
|
-
|
|
808
|
-
|
|
809
|
-
|
|
810
|
-
|
|
811
|
-
|
|
812
|
-
|
|
813
|
-
|
|
814
|
-
|
|
815
|
-
|
|
816
|
-
|
|
817
|
-
|
|
818
|
-
|
|
819
|
-
|
|
820
|
-
|
|
821
|
-
|
|
860
|
+
async function downloadFile(url: string, dest: string): Promise<void> {
|
|
861
|
+
return new Promise((resolve, reject) => {
|
|
862
|
+
const file = fs.createWriteStream(dest);
|
|
863
|
+
file.on('error', (err) => {
|
|
864
|
+
fs.unlink(dest, () => { });
|
|
865
|
+
reject(new Error(`File write error: ${err.message}`));
|
|
866
|
+
});
|
|
867
|
+
const request = https.get(url, (response) => {
|
|
868
|
+
if (response.statusCode === 302 || response.statusCode === 301) {
|
|
869
|
+
file.close();
|
|
870
|
+
downloadFile(response.headers.location!, dest).then(resolve).catch(reject);
|
|
871
|
+
return;
|
|
872
|
+
}
|
|
873
|
+
if (response.statusCode && response.statusCode !== 200) {
|
|
874
|
+
file.close();
|
|
875
|
+
fs.unlink(dest, () => { });
|
|
876
|
+
reject(new Error(`Download failed with status code: ${response.statusCode} for URL: ${url}`));
|
|
877
|
+
return;
|
|
878
|
+
}
|
|
879
|
+
response.pipe(file);
|
|
880
|
+
file.on('finish', () => {
|
|
881
|
+
file.close((err) => {
|
|
882
|
+
if (err) reject(err);
|
|
883
|
+
else resolve();
|
|
884
|
+
});
|
|
885
|
+
});
|
|
886
|
+
});
|
|
887
|
+
request.on('error', (err) => {
|
|
888
|
+
file.close();
|
|
889
|
+
fs.unlink(dest, () => { });
|
|
890
|
+
reject(new Error(`Network error: ${err.message}`));
|
|
891
|
+
});
|
|
892
|
+
});
|
|
822
893
|
}
|
|
823
894
|
|
|
824
895
|
// --------------------------------------------------------------------------
|
|
@@ -888,25 +959,23 @@ async function ensurePiperModel(binDir: string, modelNameOrUrl: string): Promise
|
|
|
888
959
|
modelUrl = modelNameOrUrl;
|
|
889
960
|
modelFilename = path.basename(modelNameOrUrl);
|
|
890
961
|
} else {
|
|
891
|
-
//
|
|
962
|
+
// Correct Parsing for 'lang_REGION-voice-quality'
|
|
963
|
+
// e.g. en_US-lessac-medium -> [en_US, lessac, medium]
|
|
964
|
+
// e.g. ar_JO-kareem-medium -> [ar_JO, kareem, medium]
|
|
965
|
+
|
|
892
966
|
const parts = modelNameOrUrl.split('-');
|
|
893
967
|
if (parts.length >= 3) {
|
|
894
|
-
const langRegion = parts[0]
|
|
895
|
-
const voice = parts[
|
|
896
|
-
const quality = parts[
|
|
897
|
-
const lang = parts[0]; // en
|
|
968
|
+
const langRegion = parts[0]; // 'ar_JO' or 'en_US'
|
|
969
|
+
const voice = parts[1]; // 'kareem'
|
|
970
|
+
const quality = parts[2]; // 'medium'
|
|
898
971
|
|
|
899
|
-
//
|
|
900
|
-
|
|
901
|
-
// url path: en/en_US/lessac/medium/en_US-lessac-medium.onnx
|
|
902
|
-
|
|
903
|
-
// Handle special case: ar_JO (no lang folder? check repo)
|
|
904
|
-
// Generally structure is: lang_short/lang_long/voice/quality/filename
|
|
972
|
+
// Lang code is first part of langRegion (split by _)
|
|
973
|
+
const lang = langRegion.split('_')[0]; // 'ar' form 'ar_JO'
|
|
905
974
|
|
|
906
975
|
modelFilename = modelNameOrUrl + '.onnx';
|
|
907
|
-
modelUrl = `https://huggingface.co/rhasspy/piper-voices/resolve/main/${lang}/${langRegion}/${voice}/${quality}/${modelFilename}?download=true`;
|
|
976
|
+
modelUrl = `https://huggingface.co/rhasspy/piper-voices/resolve/main/${lang}/${langRegion}/${voice}/${quality}/${modelFilename}?download=true`;
|
|
908
977
|
} else {
|
|
909
|
-
throw new Error(`Invalid model name format: ${modelNameOrUrl}
|
|
978
|
+
throw new Error(`Invalid model name format: ${modelNameOrUrl}.`);
|
|
910
979
|
}
|
|
911
980
|
}
|
|
912
981
|
|
|
@@ -942,47 +1011,66 @@ async function ensurePiperModel(binDir: string, modelNameOrUrl: string): Promise
|
|
|
942
1011
|
return { modelPath, configPath };
|
|
943
1012
|
}
|
|
944
1013
|
|
|
945
|
-
|
|
946
|
-
|
|
947
|
-
|
|
948
|
-
|
|
949
|
-
|
|
950
|
-
|
|
951
|
-
fs.unlink(dest, () => { }); // Cleanup
|
|
952
|
-
reject(new Error(`File write error: ${err.message}`));
|
|
953
|
-
});
|
|
1014
|
+
// --------------------------------------------------------------------------
|
|
1015
|
+
// HELPER: Determine Audio Duration for SRT
|
|
1016
|
+
// --------------------------------------------------------------------------
|
|
1017
|
+
function getAudioDuration(buffer: Buffer, hint: 'mp3' | 'wav' | null = null): number {
|
|
1018
|
+
// 1. Try generic text length if buffer empty (fallback)
|
|
1019
|
+
if (!buffer || buffer.length === 0) return -1;
|
|
954
1020
|
|
|
955
|
-
|
|
956
|
-
|
|
957
|
-
|
|
958
|
-
|
|
959
|
-
|
|
960
|
-
|
|
1021
|
+
// 2. Try parsing WAV header
|
|
1022
|
+
// RIFF....WAVEfmt
|
|
1023
|
+
if ((hint === 'wav') || (buffer.length > 12 && buffer.toString('ascii', 0, 4) === 'RIFF' && buffer.toString('ascii', 8, 12) === 'WAVE')) {
|
|
1024
|
+
try {
|
|
1025
|
+
// standard header is 44 bytes.
|
|
1026
|
+
const byteRate = buffer.readUInt32LE(28);
|
|
1027
|
+
if (byteRate > 0) {
|
|
1028
|
+
const dataSize = buffer.length - 44;
|
|
1029
|
+
return dataSize / byteRate;
|
|
961
1030
|
}
|
|
1031
|
+
} catch (e) { /* ignore */ }
|
|
1032
|
+
}
|
|
962
1033
|
|
|
963
|
-
|
|
964
|
-
|
|
965
|
-
|
|
966
|
-
|
|
967
|
-
return;
|
|
968
|
-
}
|
|
1034
|
+
// 3. Fallback: Char count estimation? No, we don't have text here.
|
|
1035
|
+
// Return -1 to signal "Use text length"
|
|
1036
|
+
return -1;
|
|
1037
|
+
}
|
|
969
1038
|
|
|
970
|
-
|
|
1039
|
+
// --------------------------------------------------------------------------
|
|
1040
|
+
// HEURISTIC SRT IMPLEMENTATION
|
|
1041
|
+
// --------------------------------------------------------------------------
|
|
1042
|
+
function generateHeuristicSRT(text: string, durationSeconds: number): string {
|
|
1043
|
+
// If duration unknown (-1), estimate from text length (14 chars/sec)
|
|
1044
|
+
if (durationSeconds <= 0) {
|
|
1045
|
+
durationSeconds = text.length / 14;
|
|
1046
|
+
}
|
|
971
1047
|
|
|
972
|
-
|
|
973
|
-
|
|
974
|
-
if (err) reject(err);
|
|
975
|
-
else resolve();
|
|
976
|
-
});
|
|
977
|
-
});
|
|
978
|
-
});
|
|
1048
|
+
const sentences = text.match(/[^.!?]+[.!?]*/g) || [text];
|
|
1049
|
+
const totalContentLen = text.length;
|
|
979
1050
|
|
|
980
|
-
|
|
981
|
-
|
|
982
|
-
|
|
983
|
-
|
|
984
|
-
|
|
985
|
-
|
|
1051
|
+
let currentStartTime = 0;
|
|
1052
|
+
let srt = '';
|
|
1053
|
+
let counter = 1;
|
|
1054
|
+
|
|
1055
|
+
const msToSrt = (ms: number) => {
|
|
1056
|
+
const totalSec = Math.floor(ms / 1000);
|
|
1057
|
+
const mili = Math.floor(ms % 1000);
|
|
1058
|
+
const h = Math.floor(totalSec / 3600);
|
|
1059
|
+
const m = Math.floor((totalSec % 3600) / 60);
|
|
1060
|
+
const s = totalSec % 60;
|
|
1061
|
+
return `${h.toString().padStart(2, '0')}:${m.toString().padStart(2, '0')}:${s.toString().padStart(2, '0')},${mili.toString().padStart(3, '0')}`;
|
|
1062
|
+
};
|
|
1063
|
+
|
|
1064
|
+
for (const sentence of sentences) {
|
|
1065
|
+
// Proportion of time = Proportion of length
|
|
1066
|
+
const sentenceRatio = sentence.length / totalContentLen;
|
|
1067
|
+
const sentenceDuration = sentenceRatio * durationSeconds;
|
|
1068
|
+
const endTime = currentStartTime + sentenceDuration;
|
|
1069
|
+
|
|
1070
|
+
srt += `${counter++}\n${msToSrt(currentStartTime * 1000)} --> ${msToSrt(endTime * 1000)}\n${sentence.trim()}\n\n`;
|
|
1071
|
+
currentStartTime = endTime;
|
|
1072
|
+
}
|
|
1073
|
+
return srt;
|
|
986
1074
|
}
|
|
987
1075
|
|
|
988
1076
|
async function httpRequest(url: string, method: string = 'GET', body: any = null): Promise<Buffer> {
|