n8n-nodes-tts-bigboss 1.0.6 → 1.0.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/TTSBigBoss.node.js +281 -92
- package/nodes/TTSBigBoss/TTSBigBoss.node.ts +321 -131
- package/package.json +1 -1
package/dist/TTSBigBoss.node.js
CHANGED
|
@@ -73,6 +73,16 @@ const PIPER_MODELS = [
|
|
|
73
73
|
{ name: 'German - Thorsten (Male) - Low', value: 'de_DE-thorsten-low' },
|
|
74
74
|
];
|
|
75
75
|
const EDGE_URL = 'wss://speech.platform.bing.com/consumer/speech/synthesize/readaloud/edge/v1?TrustedClientToken=6A5AA1D4EAFF4E9FB37E23D68491D6F4';
|
|
76
|
+
const EDGE_HEADERS = {
|
|
77
|
+
'Authority': 'speech.platform.bing.com',
|
|
78
|
+
'Sec-CH-UA': '"Not_A Brand";v="8", "Chromium";v="120", "Microsoft Edge";v="120"',
|
|
79
|
+
'Sec-CH-UA-Mobile': '?0',
|
|
80
|
+
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36 Edg/120.0.0.0',
|
|
81
|
+
'Sec-CH-UA-Platform': '"Windows"',
|
|
82
|
+
'Accept-Encoding': 'gzip, deflate, br',
|
|
83
|
+
'Accept-Language': 'en-US,en;q=0.9',
|
|
84
|
+
'Origin': 'chrome-extension://jdiccldimpdaibmpdkjnbmckianbfold'
|
|
85
|
+
};
|
|
76
86
|
const EDGE_VOICES = [
|
|
77
87
|
{ name: 'Arabic (Egypt) - Salma', value: 'ar-EG-SalmaNeural' },
|
|
78
88
|
{ name: 'Arabic (Egypt) - Shakir', value: 'ar-EG-ShakirNeural' },
|
|
@@ -125,7 +135,12 @@ class TTSBigBoss {
|
|
|
125
135
|
{
|
|
126
136
|
name: 'Coqui TTS (Local Server)',
|
|
127
137
|
value: 'coqui',
|
|
128
|
-
description: 'Connect to a running Coqui TTS/XTTS server
|
|
138
|
+
description: 'Connect to a running Coqui TTS/XTTS server.',
|
|
139
|
+
},
|
|
140
|
+
{
|
|
141
|
+
name: 'Kokoro TTS (Local OpenAI API)',
|
|
142
|
+
value: 'kokoro',
|
|
143
|
+
description: 'Connect to a local Kokoro server compatible with OpenAI API (e.g. /v1/audio/speech).',
|
|
129
144
|
},
|
|
130
145
|
{
|
|
131
146
|
name: 'System Command (Custom)',
|
|
@@ -277,11 +292,46 @@ class TTSBigBoss {
|
|
|
277
292
|
description: 'Name from Hugging Face (e.g. en_US-bryce-medium) or full URL to .onnx file.',
|
|
278
293
|
},
|
|
279
294
|
{
|
|
280
|
-
displayName: '
|
|
295
|
+
displayName: 'API URL',
|
|
296
|
+
name: 'kokoroUrl',
|
|
297
|
+
type: 'string',
|
|
298
|
+
default: 'http://localhost:8880/v1/audio/speech',
|
|
299
|
+
description: 'Endpoint URL for Kokoro generation (OpenAI compatible).',
|
|
300
|
+
displayOptions: {
|
|
301
|
+
show: {
|
|
302
|
+
engine: ['kokoro'],
|
|
303
|
+
},
|
|
304
|
+
},
|
|
305
|
+
},
|
|
306
|
+
{
|
|
307
|
+
displayName: 'Voice / Model',
|
|
308
|
+
name: 'kokoroVoice',
|
|
309
|
+
type: 'string',
|
|
310
|
+
default: 'af_bella',
|
|
311
|
+
description: 'Voice ID (e.g. af_bella, af_sarah, am_adam). Arabic might require specific model ID.',
|
|
312
|
+
displayOptions: {
|
|
313
|
+
show: {
|
|
314
|
+
engine: ['kokoro'],
|
|
315
|
+
},
|
|
316
|
+
},
|
|
317
|
+
},
|
|
318
|
+
{
|
|
319
|
+
displayName: 'Speed',
|
|
320
|
+
name: 'kokoroSpeed',
|
|
321
|
+
type: 'number',
|
|
322
|
+
default: 1.0,
|
|
323
|
+
displayOptions: {
|
|
324
|
+
show: {
|
|
325
|
+
engine: ['kokoro'],
|
|
326
|
+
},
|
|
327
|
+
},
|
|
328
|
+
},
|
|
329
|
+
{
|
|
330
|
+
displayName: 'Base Server URL',
|
|
281
331
|
name: 'coquiUrl',
|
|
282
332
|
type: 'string',
|
|
283
|
-
default: 'http://localhost:5002
|
|
284
|
-
description: 'URL of
|
|
333
|
+
default: 'http://localhost:5002',
|
|
334
|
+
description: 'Base URL of Coqui server (e.g. http://localhost:5002 or http://host.docker.internal:5002).',
|
|
285
335
|
displayOptions: {
|
|
286
336
|
show: {
|
|
287
337
|
engine: ['coqui'],
|
|
@@ -289,23 +339,56 @@ class TTSBigBoss {
|
|
|
289
339
|
},
|
|
290
340
|
},
|
|
291
341
|
{
|
|
292
|
-
displayName: 'Speaker
|
|
342
|
+
displayName: 'Speaker',
|
|
293
343
|
name: 'coquiSpeaker',
|
|
344
|
+
type: 'options',
|
|
345
|
+
typeOptions: {
|
|
346
|
+
loadOptionsMethod: 'getCoquiSpeakers',
|
|
347
|
+
loadOptionsDependsOn: ['coquiUrl'],
|
|
348
|
+
},
|
|
349
|
+
default: '',
|
|
350
|
+
description: 'Select a speaker ID loaded from the server.',
|
|
351
|
+
displayOptions: {
|
|
352
|
+
show: {
|
|
353
|
+
engine: ['coqui'],
|
|
354
|
+
},
|
|
355
|
+
},
|
|
356
|
+
},
|
|
357
|
+
{
|
|
358
|
+
displayName: 'Use Custom WAV Path',
|
|
359
|
+
name: 'coquiUseWav',
|
|
360
|
+
type: 'boolean',
|
|
361
|
+
default: false,
|
|
362
|
+
description: 'Check to use a local WAV file path instead of a Speaker ID (for cloning).',
|
|
363
|
+
displayOptions: {
|
|
364
|
+
show: {
|
|
365
|
+
engine: ['coqui'],
|
|
366
|
+
},
|
|
367
|
+
},
|
|
368
|
+
},
|
|
369
|
+
{
|
|
370
|
+
displayName: 'WAV Path',
|
|
371
|
+
name: 'coquiWavPath',
|
|
294
372
|
type: 'string',
|
|
295
373
|
default: '',
|
|
296
|
-
description: '
|
|
374
|
+
description: 'Absolute path to the reference WAV file on the server.',
|
|
297
375
|
displayOptions: {
|
|
298
376
|
show: {
|
|
299
377
|
engine: ['coqui'],
|
|
378
|
+
coquiUseWav: [true],
|
|
300
379
|
},
|
|
301
380
|
},
|
|
302
381
|
},
|
|
303
382
|
{
|
|
304
383
|
displayName: 'Language',
|
|
305
384
|
name: 'coquiLang',
|
|
306
|
-
type: '
|
|
385
|
+
type: 'options',
|
|
386
|
+
typeOptions: {
|
|
387
|
+
loadOptionsMethod: 'getCoquiLanguages',
|
|
388
|
+
loadOptionsDependsOn: ['coquiUrl'],
|
|
389
|
+
},
|
|
307
390
|
default: 'en',
|
|
308
|
-
description: '
|
|
391
|
+
description: 'Select language.',
|
|
309
392
|
displayOptions: {
|
|
310
393
|
show: {
|
|
311
394
|
engine: ['coqui'],
|
|
@@ -314,6 +397,60 @@ class TTSBigBoss {
|
|
|
314
397
|
},
|
|
315
398
|
],
|
|
316
399
|
};
|
|
400
|
+
this.methods = {
|
|
401
|
+
loadOptions: {
|
|
402
|
+
async getCoquiSpeakers() {
|
|
403
|
+
const baseUrl = this.getNodeParameter('coquiUrl');
|
|
404
|
+
const cleanUrl = baseUrl.replace(/\/$/, '');
|
|
405
|
+
const targetUrl = `${cleanUrl}/api/speakers`;
|
|
406
|
+
try {
|
|
407
|
+
const data = await httpRequest(targetUrl);
|
|
408
|
+
const json = JSON.parse(data.toString());
|
|
409
|
+
let speakers = [];
|
|
410
|
+
if (Array.isArray(json))
|
|
411
|
+
speakers = json;
|
|
412
|
+
else if (json.speakers)
|
|
413
|
+
speakers = json.speakers;
|
|
414
|
+
else if (typeof json === 'object')
|
|
415
|
+
speakers = Object.keys(json);
|
|
416
|
+
return speakers.map((s) => {
|
|
417
|
+
const name = typeof s === 'string' ? s : (s.name || s.id);
|
|
418
|
+
const value = typeof s === 'string' ? s : (s.id || s.name);
|
|
419
|
+
return { name, value };
|
|
420
|
+
});
|
|
421
|
+
}
|
|
422
|
+
catch (e) {
|
|
423
|
+
return [{ name: `Error loading: ${e.message}. Check URL & Connection.`, value: '' }];
|
|
424
|
+
}
|
|
425
|
+
},
|
|
426
|
+
async getCoquiLanguages() {
|
|
427
|
+
const baseUrl = this.getNodeParameter('coquiUrl');
|
|
428
|
+
const cleanUrl = baseUrl.replace(/\/$/, '');
|
|
429
|
+
const targetUrl = `${cleanUrl}/api/languages`;
|
|
430
|
+
try {
|
|
431
|
+
const data = await httpRequest(targetUrl);
|
|
432
|
+
const json = JSON.parse(data.toString());
|
|
433
|
+
let langs = [];
|
|
434
|
+
if (Array.isArray(json))
|
|
435
|
+
langs = json;
|
|
436
|
+
else if (json.languages)
|
|
437
|
+
langs = json.languages;
|
|
438
|
+
return langs.map((l) => {
|
|
439
|
+
const name = typeof l === 'string' ? l : (l.name || l.code);
|
|
440
|
+
const value = typeof l === 'string' ? l : (l.code || l.name);
|
|
441
|
+
return { name, value };
|
|
442
|
+
});
|
|
443
|
+
}
|
|
444
|
+
catch (e) {
|
|
445
|
+
return [
|
|
446
|
+
{ name: 'English (en)', value: 'en' },
|
|
447
|
+
{ name: 'Arabic (ar)', value: 'ar' },
|
|
448
|
+
{ name: 'Examples (Fix URL to load)', value: 'en' }
|
|
449
|
+
];
|
|
450
|
+
}
|
|
451
|
+
},
|
|
452
|
+
},
|
|
453
|
+
};
|
|
317
454
|
}
|
|
318
455
|
async execute() {
|
|
319
456
|
const items = this.getInputData();
|
|
@@ -347,6 +484,21 @@ class TTSBigBoss {
|
|
|
347
484
|
srtBuffer = Buffer.from(result.srt, 'utf8');
|
|
348
485
|
}
|
|
349
486
|
}
|
|
487
|
+
else if (engine === 'kokoro') {
|
|
488
|
+
const url = this.getNodeParameter('kokoroUrl', i);
|
|
489
|
+
const voice = this.getNodeParameter('kokoroVoice', i);
|
|
490
|
+
const speed = this.getNodeParameter('kokoroSpeed', i);
|
|
491
|
+
const payload = {
|
|
492
|
+
model: 'kokoro',
|
|
493
|
+
input: text,
|
|
494
|
+
voice: voice,
|
|
495
|
+
speed: speed,
|
|
496
|
+
response_format: 'mp3'
|
|
497
|
+
};
|
|
498
|
+
audioBuffer = await httpRequest(url, 'POST', payload);
|
|
499
|
+
const duration = getAudioDuration(audioBuffer, 'mp3');
|
|
500
|
+
srtBuffer = Buffer.from(generateHeuristicSRT(text, duration), 'utf8');
|
|
501
|
+
}
|
|
350
502
|
else if (engine === 'piper_local') {
|
|
351
503
|
let piperModel = this.getNodeParameter('piperModel', i);
|
|
352
504
|
if (piperModel === 'custom') {
|
|
@@ -369,7 +521,7 @@ class TTSBigBoss {
|
|
|
369
521
|
if (code === 0)
|
|
370
522
|
resolve();
|
|
371
523
|
if (errData.includes('json.exception.parse_error')) {
|
|
372
|
-
reject(new Error(`Piper Config Error: The downloaded JSON configuration for model '${piperModel}' seems corrupted
|
|
524
|
+
reject(new Error(`Piper Config Error: The downloaded JSON configuration for model '${piperModel}' seems corrupted. Try deleting the file at ${configPath}.`));
|
|
373
525
|
}
|
|
374
526
|
else {
|
|
375
527
|
reject(new Error(`Piper failed (exit ${code}): ${errData}`));
|
|
@@ -380,43 +532,30 @@ class TTSBigBoss {
|
|
|
380
532
|
if (!fs.existsSync(outFile))
|
|
381
533
|
throw new Error('Piper did not produce output file');
|
|
382
534
|
audioBuffer = fs.readFileSync(outFile);
|
|
383
|
-
|
|
535
|
+
const duration = getAudioDuration(audioBuffer, 'wav');
|
|
536
|
+
srtBuffer = Buffer.from(generateHeuristicSRT(text, duration), 'utf8');
|
|
384
537
|
fs.unlinkSync(outFile);
|
|
385
538
|
}
|
|
386
539
|
else if (engine === 'coqui') {
|
|
387
|
-
|
|
388
|
-
|
|
540
|
+
let url = this.getNodeParameter('coquiUrl', i);
|
|
541
|
+
url = url.replace(/\/$/, '') + '/api/tts';
|
|
542
|
+
const speakerSelection = this.getNodeParameter('coquiSpeaker', i);
|
|
543
|
+
const useWav = this.getNodeParameter('coquiUseWav', i, false);
|
|
544
|
+
const wavPath = this.getNodeParameter('coquiWavPath', i, '');
|
|
389
545
|
const lang = this.getNodeParameter('coquiLang', i);
|
|
390
546
|
const payload = {
|
|
391
547
|
text: text,
|
|
392
548
|
language_id: lang,
|
|
393
549
|
};
|
|
394
|
-
if (
|
|
395
|
-
payload.
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
}, (res) => {
|
|
404
|
-
const chunks = [];
|
|
405
|
-
res.on('data', (d) => chunks.push(d));
|
|
406
|
-
res.on('end', () => {
|
|
407
|
-
if (res.statusCode >= 200 && res.statusCode < 300) {
|
|
408
|
-
resolve(Buffer.concat(chunks));
|
|
409
|
-
}
|
|
410
|
-
else {
|
|
411
|
-
reject(new Error(`Coqui Server Error ${res.statusCode}: ${Buffer.concat(chunks).toString()}`));
|
|
412
|
-
}
|
|
413
|
-
});
|
|
414
|
-
});
|
|
415
|
-
req.on('error', reject);
|
|
416
|
-
req.write(JSON.stringify(payload));
|
|
417
|
-
req.end();
|
|
418
|
-
});
|
|
419
|
-
srtBuffer = Buffer.from(generateHeuristicSRT(text, audioBuffer.length), 'utf8');
|
|
550
|
+
if (useWav && wavPath) {
|
|
551
|
+
payload.speaker_wav = wavPath;
|
|
552
|
+
}
|
|
553
|
+
else if (speakerSelection) {
|
|
554
|
+
payload.speaker_id = speakerSelection;
|
|
555
|
+
}
|
|
556
|
+
audioBuffer = await httpRequest(url, 'POST', payload);
|
|
557
|
+
const duration = getAudioDuration(audioBuffer, 'wav');
|
|
558
|
+
srtBuffer = Buffer.from(generateHeuristicSRT(text, duration), 'utf8');
|
|
420
559
|
}
|
|
421
560
|
else {
|
|
422
561
|
const commandTpl = this.getNodeParameter('systemCommand', i);
|
|
@@ -450,7 +589,8 @@ class TTSBigBoss {
|
|
|
450
589
|
throw new Error('System command did not produce output file at expected path');
|
|
451
590
|
}
|
|
452
591
|
audioBuffer = fs.readFileSync(outFile);
|
|
453
|
-
|
|
592
|
+
const duration = getAudioDuration(audioBuffer);
|
|
593
|
+
srtBuffer = Buffer.from(generateHeuristicSRT(text, duration), 'utf8');
|
|
454
594
|
if (fs.existsSync(outFile))
|
|
455
595
|
fs.unlinkSync(outFile);
|
|
456
596
|
}
|
|
@@ -481,7 +621,7 @@ async function runEdgeTTS(text, voice, rate, pitch) {
|
|
|
481
621
|
return new Promise((resolve, reject) => {
|
|
482
622
|
const ws = new ws_1.default(EDGE_URL, {
|
|
483
623
|
headers: {
|
|
484
|
-
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/
|
|
624
|
+
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36 Edg/128.0.0.0',
|
|
485
625
|
'Origin': 'chrome-extension://jdiccldimpdaibmpdkjnbmckianbfold',
|
|
486
626
|
'TrustedClientToken': '6A5AA1D4EAFF4E9FB37E23D68491D6F4'
|
|
487
627
|
}
|
|
@@ -595,23 +735,41 @@ function ticksToTime(ticks) {
|
|
|
595
735
|
const mili = date.getMilliseconds().toString().padStart(3, '0');
|
|
596
736
|
return `${h}:${m}:${s},${mili}`;
|
|
597
737
|
}
|
|
598
|
-
function
|
|
599
|
-
|
|
600
|
-
|
|
601
|
-
|
|
602
|
-
|
|
603
|
-
|
|
604
|
-
|
|
605
|
-
const
|
|
606
|
-
|
|
607
|
-
|
|
608
|
-
|
|
609
|
-
|
|
610
|
-
|
|
611
|
-
|
|
612
|
-
|
|
613
|
-
|
|
614
|
-
|
|
738
|
+
async function downloadFile(url, dest) {
|
|
739
|
+
return new Promise((resolve, reject) => {
|
|
740
|
+
const file = fs.createWriteStream(dest);
|
|
741
|
+
file.on('error', (err) => {
|
|
742
|
+
fs.unlink(dest, () => { });
|
|
743
|
+
reject(new Error(`File write error: ${err.message}`));
|
|
744
|
+
});
|
|
745
|
+
const request = https.get(url, (response) => {
|
|
746
|
+
if (response.statusCode === 302 || response.statusCode === 301) {
|
|
747
|
+
file.close();
|
|
748
|
+
downloadFile(response.headers.location, dest).then(resolve).catch(reject);
|
|
749
|
+
return;
|
|
750
|
+
}
|
|
751
|
+
if (response.statusCode && response.statusCode !== 200) {
|
|
752
|
+
file.close();
|
|
753
|
+
fs.unlink(dest, () => { });
|
|
754
|
+
reject(new Error(`Download failed with status code: ${response.statusCode} for URL: ${url}`));
|
|
755
|
+
return;
|
|
756
|
+
}
|
|
757
|
+
response.pipe(file);
|
|
758
|
+
file.on('finish', () => {
|
|
759
|
+
file.close((err) => {
|
|
760
|
+
if (err)
|
|
761
|
+
reject(err);
|
|
762
|
+
else
|
|
763
|
+
resolve();
|
|
764
|
+
});
|
|
765
|
+
});
|
|
766
|
+
});
|
|
767
|
+
request.on('error', (err) => {
|
|
768
|
+
file.close();
|
|
769
|
+
fs.unlink(dest, () => { });
|
|
770
|
+
reject(new Error(`Network error: ${err.message}`));
|
|
771
|
+
});
|
|
772
|
+
});
|
|
615
773
|
}
|
|
616
774
|
async function ensurePiperBinary(binDir) {
|
|
617
775
|
const platform = os.platform();
|
|
@@ -663,15 +821,15 @@ async function ensurePiperModel(binDir, modelNameOrUrl) {
|
|
|
663
821
|
else {
|
|
664
822
|
const parts = modelNameOrUrl.split('-');
|
|
665
823
|
if (parts.length >= 3) {
|
|
666
|
-
const langRegion = parts[0]
|
|
667
|
-
const voice = parts[
|
|
668
|
-
const quality = parts[
|
|
669
|
-
const lang =
|
|
824
|
+
const langRegion = parts[0];
|
|
825
|
+
const voice = parts[1];
|
|
826
|
+
const quality = parts[2];
|
|
827
|
+
const lang = langRegion.split('_')[0];
|
|
670
828
|
modelFilename = modelNameOrUrl + '.onnx';
|
|
671
829
|
modelUrl = `https://huggingface.co/rhasspy/piper-voices/resolve/main/${lang}/${langRegion}/${voice}/${quality}/${modelFilename}?download=true`;
|
|
672
830
|
}
|
|
673
831
|
else {
|
|
674
|
-
throw new Error(`Invalid model name format: ${modelNameOrUrl}
|
|
832
|
+
throw new Error(`Invalid model name format: ${modelNameOrUrl}.`);
|
|
675
833
|
}
|
|
676
834
|
}
|
|
677
835
|
const modelPath = path.join(binDir, modelFilename);
|
|
@@ -700,39 +858,70 @@ async function ensurePiperModel(binDir, modelNameOrUrl) {
|
|
|
700
858
|
}
|
|
701
859
|
return { modelPath, configPath };
|
|
702
860
|
}
|
|
703
|
-
|
|
704
|
-
|
|
705
|
-
|
|
706
|
-
|
|
707
|
-
|
|
708
|
-
|
|
709
|
-
|
|
710
|
-
|
|
711
|
-
|
|
712
|
-
file.close();
|
|
713
|
-
downloadFile(response.headers.location, dest).then(resolve).catch(reject);
|
|
714
|
-
return;
|
|
861
|
+
function getAudioDuration(buffer, hint = null) {
|
|
862
|
+
if (!buffer || buffer.length === 0)
|
|
863
|
+
return -1;
|
|
864
|
+
if ((hint === 'wav') || (buffer.length > 12 && buffer.toString('ascii', 0, 4) === 'RIFF' && buffer.toString('ascii', 8, 12) === 'WAVE')) {
|
|
865
|
+
try {
|
|
866
|
+
const byteRate = buffer.readUInt32LE(28);
|
|
867
|
+
if (byteRate > 0) {
|
|
868
|
+
const dataSize = buffer.length - 44;
|
|
869
|
+
return dataSize / byteRate;
|
|
715
870
|
}
|
|
716
|
-
|
|
717
|
-
|
|
718
|
-
|
|
719
|
-
|
|
720
|
-
|
|
871
|
+
}
|
|
872
|
+
catch (e) { }
|
|
873
|
+
}
|
|
874
|
+
return -1;
|
|
875
|
+
}
|
|
876
|
+
function generateHeuristicSRT(text, durationSeconds) {
|
|
877
|
+
if (durationSeconds <= 0) {
|
|
878
|
+
durationSeconds = text.length / 14;
|
|
879
|
+
}
|
|
880
|
+
const sentences = text.match(/[^.!?]+[.!?]*/g) || [text];
|
|
881
|
+
const totalContentLen = text.length;
|
|
882
|
+
let currentStartTime = 0;
|
|
883
|
+
let srt = '';
|
|
884
|
+
let counter = 1;
|
|
885
|
+
const msToSrt = (ms) => {
|
|
886
|
+
const totalSec = Math.floor(ms / 1000);
|
|
887
|
+
const mili = Math.floor(ms % 1000);
|
|
888
|
+
const h = Math.floor(totalSec / 3600);
|
|
889
|
+
const m = Math.floor((totalSec % 3600) / 60);
|
|
890
|
+
const s = totalSec % 60;
|
|
891
|
+
return `${h.toString().padStart(2, '0')}:${m.toString().padStart(2, '0')}:${s.toString().padStart(2, '0')},${mili.toString().padStart(3, '0')}`;
|
|
892
|
+
};
|
|
893
|
+
for (const sentence of sentences) {
|
|
894
|
+
const sentenceRatio = sentence.length / totalContentLen;
|
|
895
|
+
const sentenceDuration = sentenceRatio * durationSeconds;
|
|
896
|
+
const endTime = currentStartTime + sentenceDuration;
|
|
897
|
+
srt += `${counter++}\n${msToSrt(currentStartTime * 1000)} --> ${msToSrt(endTime * 1000)}\n${sentence.trim()}\n\n`;
|
|
898
|
+
currentStartTime = endTime;
|
|
899
|
+
}
|
|
900
|
+
return srt;
|
|
901
|
+
}
|
|
902
|
+
async function httpRequest(url, method = 'GET', body = null) {
|
|
903
|
+
const requestModule = url.startsWith('https') ? https : http;
|
|
904
|
+
return new Promise((resolve, reject) => {
|
|
905
|
+
const req = requestModule.request(url, {
|
|
906
|
+
method: method,
|
|
907
|
+
headers: {
|
|
908
|
+
'Content-Type': 'application/json',
|
|
721
909
|
}
|
|
722
|
-
|
|
723
|
-
|
|
724
|
-
|
|
725
|
-
|
|
726
|
-
|
|
727
|
-
|
|
728
|
-
|
|
729
|
-
|
|
910
|
+
}, (res) => {
|
|
911
|
+
const chunks = [];
|
|
912
|
+
res.on('data', (d) => chunks.push(d));
|
|
913
|
+
res.on('end', () => {
|
|
914
|
+
if (res.statusCode >= 200 && res.statusCode < 300) {
|
|
915
|
+
resolve(Buffer.concat(chunks));
|
|
916
|
+
}
|
|
917
|
+
else {
|
|
918
|
+
reject(new Error(`Server Request Failed ${res.statusCode}: ${Buffer.concat(chunks).toString()}`));
|
|
919
|
+
}
|
|
730
920
|
});
|
|
731
921
|
});
|
|
732
|
-
|
|
733
|
-
|
|
734
|
-
|
|
735
|
-
|
|
736
|
-
});
|
|
922
|
+
req.on('error', reject);
|
|
923
|
+
if (body)
|
|
924
|
+
req.write(JSON.stringify(body));
|
|
925
|
+
req.end();
|
|
737
926
|
});
|
|
738
927
|
}
|
|
@@ -3,6 +3,8 @@ import {
|
|
|
3
3
|
INodeExecutionData,
|
|
4
4
|
INodeType,
|
|
5
5
|
INodeTypeDescription,
|
|
6
|
+
ILoadOptionsFunctions,
|
|
7
|
+
INodePropertyOptions,
|
|
6
8
|
} from 'n8n-workflow';
|
|
7
9
|
import { v4 as uuidv4 } from 'uuid';
|
|
8
10
|
import * as fs from 'fs';
|
|
@@ -59,6 +61,16 @@ const PIPER_MODELS = [
|
|
|
59
61
|
|
|
60
62
|
// Edge TTS Constants
|
|
61
63
|
const EDGE_URL = 'wss://speech.platform.bing.com/consumer/speech/synthesize/readaloud/edge/v1?TrustedClientToken=6A5AA1D4EAFF4E9FB37E23D68491D6F4';
|
|
64
|
+
const EDGE_HEADERS = {
|
|
65
|
+
'Authority': 'speech.platform.bing.com',
|
|
66
|
+
'Sec-CH-UA': '"Not_A Brand";v="8", "Chromium";v="120", "Microsoft Edge";v="120"',
|
|
67
|
+
'Sec-CH-UA-Mobile': '?0',
|
|
68
|
+
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36 Edg/120.0.0.0',
|
|
69
|
+
'Sec-CH-UA-Platform': '"Windows"',
|
|
70
|
+
'Accept-Encoding': 'gzip, deflate, br',
|
|
71
|
+
'Accept-Language': 'en-US,en;q=0.9',
|
|
72
|
+
'Origin': 'chrome-extension://jdiccldimpdaibmpdkjnbmckianbfold'
|
|
73
|
+
};
|
|
62
74
|
const EDGE_VOICES = [
|
|
63
75
|
// Arabic
|
|
64
76
|
{ name: 'Arabic (Egypt) - Salma', value: 'ar-EG-SalmaNeural' },
|
|
@@ -123,7 +135,12 @@ export class TTSBigBoss implements INodeType {
|
|
|
123
135
|
{
|
|
124
136
|
name: 'Coqui TTS (Local Server)',
|
|
125
137
|
value: 'coqui',
|
|
126
|
-
description: 'Connect to a running Coqui TTS/XTTS server
|
|
138
|
+
description: 'Connect to a running Coqui TTS/XTTS server.',
|
|
139
|
+
},
|
|
140
|
+
{
|
|
141
|
+
name: 'Kokoro TTS (Local OpenAI API)',
|
|
142
|
+
value: 'kokoro',
|
|
143
|
+
description: 'Connect to a local Kokoro server compatible with OpenAI API (e.g. /v1/audio/speech).',
|
|
127
144
|
},
|
|
128
145
|
{
|
|
129
146
|
name: 'System Command (Custom)',
|
|
@@ -287,14 +304,52 @@ export class TTSBigBoss implements INodeType {
|
|
|
287
304
|
description: 'Name from Hugging Face (e.g. en_US-bryce-medium) or full URL to .onnx file.',
|
|
288
305
|
},
|
|
289
306
|
// ----------------------------------
|
|
307
|
+
// Kokoro Settings
|
|
308
|
+
// ----------------------------------
|
|
309
|
+
{
|
|
310
|
+
displayName: 'API URL',
|
|
311
|
+
name: 'kokoroUrl',
|
|
312
|
+
type: 'string',
|
|
313
|
+
default: 'http://localhost:8880/v1/audio/speech',
|
|
314
|
+
description: 'Endpoint URL for Kokoro generation (OpenAI compatible).',
|
|
315
|
+
displayOptions: {
|
|
316
|
+
show: {
|
|
317
|
+
engine: ['kokoro'],
|
|
318
|
+
},
|
|
319
|
+
},
|
|
320
|
+
},
|
|
321
|
+
{
|
|
322
|
+
displayName: 'Voice / Model',
|
|
323
|
+
name: 'kokoroVoice',
|
|
324
|
+
type: 'string',
|
|
325
|
+
default: 'af_bella',
|
|
326
|
+
description: 'Voice ID (e.g. af_bella, af_sarah, am_adam). Arabic might require specific model ID.',
|
|
327
|
+
displayOptions: {
|
|
328
|
+
show: {
|
|
329
|
+
engine: ['kokoro'],
|
|
330
|
+
},
|
|
331
|
+
},
|
|
332
|
+
},
|
|
333
|
+
{
|
|
334
|
+
displayName: 'Speed',
|
|
335
|
+
name: 'kokoroSpeed',
|
|
336
|
+
type: 'number',
|
|
337
|
+
default: 1.0,
|
|
338
|
+
displayOptions: {
|
|
339
|
+
show: {
|
|
340
|
+
engine: ['kokoro'],
|
|
341
|
+
},
|
|
342
|
+
},
|
|
343
|
+
},
|
|
344
|
+
// ----------------------------------
|
|
290
345
|
// Coqui Server Settings
|
|
291
346
|
// ----------------------------------
|
|
292
347
|
{
|
|
293
|
-
displayName: 'Server URL',
|
|
348
|
+
displayName: 'Base Server URL',
|
|
294
349
|
name: 'coquiUrl',
|
|
295
350
|
type: 'string',
|
|
296
|
-
default: 'http://localhost:5002
|
|
297
|
-
description: 'URL of
|
|
351
|
+
default: 'http://localhost:5002',
|
|
352
|
+
description: 'Base URL of Coqui server (e.g. http://localhost:5002 or http://host.docker.internal:5002).',
|
|
298
353
|
displayOptions: {
|
|
299
354
|
show: {
|
|
300
355
|
engine: ['coqui'],
|
|
@@ -302,23 +357,56 @@ export class TTSBigBoss implements INodeType {
|
|
|
302
357
|
},
|
|
303
358
|
},
|
|
304
359
|
{
|
|
305
|
-
displayName: 'Speaker
|
|
360
|
+
displayName: 'Speaker',
|
|
306
361
|
name: 'coquiSpeaker',
|
|
362
|
+
type: 'options',
|
|
363
|
+
typeOptions: {
|
|
364
|
+
loadOptionsMethod: 'getCoquiSpeakers',
|
|
365
|
+
loadOptionsDependsOn: ['coquiUrl'],
|
|
366
|
+
},
|
|
367
|
+
default: '',
|
|
368
|
+
description: 'Select a speaker ID loaded from the server.',
|
|
369
|
+
displayOptions: {
|
|
370
|
+
show: {
|
|
371
|
+
engine: ['coqui'],
|
|
372
|
+
},
|
|
373
|
+
},
|
|
374
|
+
},
|
|
375
|
+
{
|
|
376
|
+
displayName: 'Use Custom WAV Path',
|
|
377
|
+
name: 'coquiUseWav',
|
|
378
|
+
type: 'boolean',
|
|
379
|
+
default: false,
|
|
380
|
+
description: 'Check to use a local WAV file path instead of a Speaker ID (for cloning).',
|
|
381
|
+
displayOptions: {
|
|
382
|
+
show: {
|
|
383
|
+
engine: ['coqui'],
|
|
384
|
+
},
|
|
385
|
+
},
|
|
386
|
+
},
|
|
387
|
+
{
|
|
388
|
+
displayName: 'WAV Path',
|
|
389
|
+
name: 'coquiWavPath',
|
|
307
390
|
type: 'string',
|
|
308
391
|
default: '',
|
|
309
|
-
description: '
|
|
392
|
+
description: 'Absolute path to the reference WAV file on the server.',
|
|
310
393
|
displayOptions: {
|
|
311
394
|
show: {
|
|
312
395
|
engine: ['coqui'],
|
|
396
|
+
coquiUseWav: [true],
|
|
313
397
|
},
|
|
314
398
|
},
|
|
315
399
|
},
|
|
316
400
|
{
|
|
317
401
|
displayName: 'Language',
|
|
318
402
|
name: 'coquiLang',
|
|
319
|
-
type: '
|
|
403
|
+
type: 'options',
|
|
404
|
+
typeOptions: {
|
|
405
|
+
loadOptionsMethod: 'getCoquiLanguages',
|
|
406
|
+
loadOptionsDependsOn: ['coquiUrl'],
|
|
407
|
+
},
|
|
320
408
|
default: 'en',
|
|
321
|
-
description: '
|
|
409
|
+
description: 'Select language.',
|
|
322
410
|
displayOptions: {
|
|
323
411
|
show: {
|
|
324
412
|
engine: ['coqui'],
|
|
@@ -328,6 +416,63 @@ export class TTSBigBoss implements INodeType {
|
|
|
328
416
|
],
|
|
329
417
|
};
|
|
330
418
|
|
|
419
|
+
methods = {
|
|
420
|
+
loadOptions: {
|
|
421
|
+
async getCoquiSpeakers(this: ILoadOptionsFunctions): Promise<INodePropertyOptions[]> {
|
|
422
|
+
const baseUrl = this.getNodeParameter('coquiUrl') as string;
|
|
423
|
+
// clean url
|
|
424
|
+
const cleanUrl = baseUrl.replace(/\/$/, '');
|
|
425
|
+
const targetUrl = `${cleanUrl}/api/speakers`; // Assumption: endpoints exist
|
|
426
|
+
|
|
427
|
+
try {
|
|
428
|
+
const data = await httpRequest(targetUrl);
|
|
429
|
+
// Assume data is [ {name: "id", ...} ] or [ "id", "id" ] or { "speakers": [...] }
|
|
430
|
+
const json = JSON.parse(data.toString());
|
|
431
|
+
let speakers: any[] = [];
|
|
432
|
+
|
|
433
|
+
if (Array.isArray(json)) speakers = json;
|
|
434
|
+
else if (json.speakers) speakers = json.speakers;
|
|
435
|
+
else if (typeof json === 'object') speakers = Object.keys(json);
|
|
436
|
+
|
|
437
|
+
return speakers.map((s: any) => {
|
|
438
|
+
const name = typeof s === 'string' ? s : (s.name || s.id);
|
|
439
|
+
const value = typeof s === 'string' ? s : (s.id || s.name);
|
|
440
|
+
return { name, value };
|
|
441
|
+
});
|
|
442
|
+
} catch (e: any) {
|
|
443
|
+
return [{ name: `Error loading: ${e.message}. Check URL & Connection.`, value: '' }];
|
|
444
|
+
}
|
|
445
|
+
},
|
|
446
|
+
async getCoquiLanguages(this: ILoadOptionsFunctions): Promise<INodePropertyOptions[]> {
|
|
447
|
+
const baseUrl = this.getNodeParameter('coquiUrl') as string;
|
|
448
|
+
const cleanUrl = baseUrl.replace(/\/$/, '');
|
|
449
|
+
const targetUrl = `${cleanUrl}/api/languages`;
|
|
450
|
+
|
|
451
|
+
try {
|
|
452
|
+
const data = await httpRequest(targetUrl);
|
|
453
|
+
const json = JSON.parse(data.toString());
|
|
454
|
+
let langs: any[] = [];
|
|
455
|
+
|
|
456
|
+
if (Array.isArray(json)) langs = json;
|
|
457
|
+
else if (json.languages) langs = json.languages;
|
|
458
|
+
|
|
459
|
+
return langs.map((l: any) => {
|
|
460
|
+
const name = typeof l === 'string' ? l : (l.name || l.code);
|
|
461
|
+
const value = typeof l === 'string' ? l : (l.code || l.name);
|
|
462
|
+
return { name, value };
|
|
463
|
+
});
|
|
464
|
+
} catch (e) {
|
|
465
|
+
// Fallback defaults if api fails
|
|
466
|
+
return [
|
|
467
|
+
{ name: 'English (en)', value: 'en' },
|
|
468
|
+
{ name: 'Arabic (ar)', value: 'ar' },
|
|
469
|
+
{ name: 'Examples (Fix URL to load)', value: 'en' }
|
|
470
|
+
];
|
|
471
|
+
}
|
|
472
|
+
},
|
|
473
|
+
},
|
|
474
|
+
};
|
|
475
|
+
|
|
331
476
|
async execute(this: IExecuteFunctions): Promise<INodeExecutionData[][]> {
|
|
332
477
|
const items = this.getInputData();
|
|
333
478
|
const returnData: INodeExecutionData[] = [];
|
|
@@ -369,6 +514,28 @@ export class TTSBigBoss implements INodeType {
|
|
|
369
514
|
srtBuffer = Buffer.from(result.srt, 'utf8');
|
|
370
515
|
}
|
|
371
516
|
|
|
517
|
+
} else if (engine === 'kokoro') {
|
|
518
|
+
// ----------------------------------
|
|
519
|
+
// KOKORO EXECUTION
|
|
520
|
+
// ----------------------------------
|
|
521
|
+
const url = this.getNodeParameter('kokoroUrl', i) as string;
|
|
522
|
+
const voice = this.getNodeParameter('kokoroVoice', i) as string;
|
|
523
|
+
const speed = this.getNodeParameter('kokoroSpeed', i) as number;
|
|
524
|
+
|
|
525
|
+
// Standard OpenAI 'createSpeech' payload
|
|
526
|
+
const payload = {
|
|
527
|
+
model: 'kokoro', // or whatever the server expects
|
|
528
|
+
input: text,
|
|
529
|
+
voice: voice,
|
|
530
|
+
speed: speed,
|
|
531
|
+
response_format: 'mp3'
|
|
532
|
+
};
|
|
533
|
+
|
|
534
|
+
audioBuffer = await httpRequest(url, 'POST', payload);
|
|
535
|
+
|
|
536
|
+
const duration = getAudioDuration(audioBuffer, 'mp3');
|
|
537
|
+
srtBuffer = Buffer.from(generateHeuristicSRT(text, duration), 'utf8');
|
|
538
|
+
|
|
372
539
|
} else if (engine === 'piper_local') {
|
|
373
540
|
// ----------------------------------
|
|
374
541
|
// PIPER LOCAL AUTOMATION
|
|
@@ -386,8 +553,6 @@ export class TTSBigBoss implements INodeType {
|
|
|
386
553
|
|
|
387
554
|
// 3. Execute
|
|
388
555
|
const outFile = path.join(tempDir, `piper_out_${uuidv4()}.wav`);
|
|
389
|
-
// Piper command: echo "text" | piper --model model.onnx --output_file out.wav
|
|
390
|
-
// We use child_process.spawn to pipe text safely
|
|
391
556
|
|
|
392
557
|
await new Promise<void>((resolve, reject) => {
|
|
393
558
|
const piperProc = child_process.spawn(piperBinPath, [
|
|
@@ -404,9 +569,8 @@ export class TTSBigBoss implements INodeType {
|
|
|
404
569
|
|
|
405
570
|
piperProc.on('close', (code) => {
|
|
406
571
|
if (code === 0) resolve();
|
|
407
|
-
// Check for the specific JSON error in stderr
|
|
408
572
|
if (errData.includes('json.exception.parse_error')) {
|
|
409
|
-
reject(new Error(`Piper Config Error: The downloaded JSON configuration for model '${piperModel}' seems corrupted
|
|
573
|
+
reject(new Error(`Piper Config Error: The downloaded JSON configuration for model '${piperModel}' seems corrupted. Try deleting the file at ${configPath}.`));
|
|
410
574
|
} else {
|
|
411
575
|
reject(new Error(`Piper failed (exit ${code}): ${errData}`));
|
|
412
576
|
}
|
|
@@ -418,7 +582,8 @@ export class TTSBigBoss implements INodeType {
|
|
|
418
582
|
if (!fs.existsSync(outFile)) throw new Error('Piper did not produce output file');
|
|
419
583
|
|
|
420
584
|
audioBuffer = fs.readFileSync(outFile);
|
|
421
|
-
|
|
585
|
+
const duration = getAudioDuration(audioBuffer, 'wav');
|
|
586
|
+
srtBuffer = Buffer.from(generateHeuristicSRT(text, duration), 'utf8');
|
|
422
587
|
|
|
423
588
|
fs.unlinkSync(outFile);
|
|
424
589
|
|
|
@@ -426,44 +591,28 @@ export class TTSBigBoss implements INodeType {
|
|
|
426
591
|
// ----------------------------------
|
|
427
592
|
// COQUI SEVER EXECUTION
|
|
428
593
|
// ----------------------------------
|
|
429
|
-
|
|
430
|
-
|
|
594
|
+
let url = this.getNodeParameter('coquiUrl', i) as string;
|
|
595
|
+
url = url.replace(/\/$/, '') + '/api/tts'; // Append standard endpoint
|
|
596
|
+
|
|
597
|
+
const speakerSelection = this.getNodeParameter('coquiSpeaker', i) as string;
|
|
598
|
+
const useWav = this.getNodeParameter('coquiUseWav', i, false) as boolean;
|
|
599
|
+
const wavPath = this.getNodeParameter('coquiWavPath', i, '') as string;
|
|
431
600
|
const lang = this.getNodeParameter('coquiLang', i) as string;
|
|
432
601
|
|
|
433
|
-
// Construct Payload
|
|
434
|
-
// Standard XTTS/Coqui API expects: text, speaker_id, language_id
|
|
435
602
|
const payload: any = {
|
|
436
603
|
text: text,
|
|
437
604
|
language_id: lang,
|
|
438
605
|
};
|
|
439
|
-
if (speaker) payload.speaker_id = speaker;
|
|
440
|
-
|
|
441
|
-
// Allow http and https
|
|
442
|
-
const requestModule = url.startsWith('https') ? https : http;
|
|
443
606
|
|
|
444
|
-
|
|
445
|
-
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
|
|
449
|
-
}
|
|
450
|
-
}, (res: any) => {
|
|
451
|
-
const chunks: any[] = [];
|
|
452
|
-
res.on('data', (d: any) => chunks.push(d));
|
|
453
|
-
res.on('end', () => {
|
|
454
|
-
if (res.statusCode >= 200 && res.statusCode < 300) {
|
|
455
|
-
resolve(Buffer.concat(chunks));
|
|
456
|
-
} else {
|
|
457
|
-
reject(new Error(`Coqui Server Error ${res.statusCode}: ${Buffer.concat(chunks).toString()}`));
|
|
458
|
-
}
|
|
459
|
-
});
|
|
460
|
-
});
|
|
461
|
-
req.on('error', reject);
|
|
462
|
-
req.write(JSON.stringify(payload));
|
|
463
|
-
req.end();
|
|
464
|
-
});
|
|
607
|
+
if (useWav && wavPath) {
|
|
608
|
+
payload.speaker_wav = wavPath;
|
|
609
|
+
} else if (speakerSelection) {
|
|
610
|
+
payload.speaker_id = speakerSelection;
|
|
611
|
+
}
|
|
465
612
|
|
|
466
|
-
|
|
613
|
+
audioBuffer = await httpRequest(url, 'POST', payload);
|
|
614
|
+
const duration = getAudioDuration(audioBuffer, 'wav');
|
|
615
|
+
srtBuffer = Buffer.from(generateHeuristicSRT(text, duration), 'utf8');
|
|
467
616
|
|
|
468
617
|
} else {
|
|
469
618
|
// ----------------------------------
|
|
@@ -479,7 +628,6 @@ export class TTSBigBoss implements INodeType {
|
|
|
479
628
|
.replace(/"{text}"/g, `"${text.replace(/"/g, '\\"')}"`) // Basic escape
|
|
480
629
|
.replace(/{text}/g, `"${text.replace(/"/g, '\\"')}"`);
|
|
481
630
|
|
|
482
|
-
// Handle Clone Input
|
|
483
631
|
if (useClone) {
|
|
484
632
|
const cloneProp = this.getNodeParameter('cloneInputProperty', i) as string;
|
|
485
633
|
const cloneData = await this.helpers.getBinaryDataBuffer(i, cloneProp);
|
|
@@ -490,7 +638,6 @@ export class TTSBigBoss implements INodeType {
|
|
|
490
638
|
.replace(/{reference_audio}/g, `"${cloneFile}"`);
|
|
491
639
|
}
|
|
492
640
|
|
|
493
|
-
// Execute
|
|
494
641
|
await new Promise((resolve, reject) => {
|
|
495
642
|
child_process.exec(cmd, (error, stdout, stderr) => {
|
|
496
643
|
if (error) {
|
|
@@ -506,9 +653,8 @@ export class TTSBigBoss implements INodeType {
|
|
|
506
653
|
}
|
|
507
654
|
|
|
508
655
|
audioBuffer = fs.readFileSync(outFile);
|
|
509
|
-
|
|
510
|
-
|
|
511
|
-
srtBuffer = Buffer.from(generateHeuristicSRT(text, audioBuffer.length), 'utf8');
|
|
656
|
+
const duration = getAudioDuration(audioBuffer);
|
|
657
|
+
srtBuffer = Buffer.from(generateHeuristicSRT(text, duration), 'utf8');
|
|
512
658
|
|
|
513
659
|
// Cleanup
|
|
514
660
|
if (fs.existsSync(outFile)) fs.unlinkSync(outFile);
|
|
@@ -540,7 +686,7 @@ export class TTSBigBoss implements INodeType {
|
|
|
540
686
|
|
|
541
687
|
returnData.push(newItem);
|
|
542
688
|
|
|
543
|
-
} catch (error) {
|
|
689
|
+
} catch (error: any) {
|
|
544
690
|
if (this.continueOnFail()) {
|
|
545
691
|
returnData.push({ json: { error: error.message }, binary: {} });
|
|
546
692
|
continue;
|
|
@@ -560,8 +706,8 @@ async function runEdgeTTS(text: string, voice: string, rate: string, pitch: stri
|
|
|
560
706
|
return new Promise((resolve, reject) => {
|
|
561
707
|
const ws = new WebSocket(EDGE_URL, {
|
|
562
708
|
headers: {
|
|
563
|
-
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/
|
|
564
|
-
'Origin': 'chrome-extension://jdiccldimpdaibmpdkjnbmckianbfold',
|
|
709
|
+
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36 Edg/128.0.0.0', // Updated UA to Edge
|
|
710
|
+
'Origin': 'chrome-extension://jdiccldimpdaibmpdkjnbmckianbfold', // Keep origin for now, usually required
|
|
565
711
|
'TrustedClientToken': '6A5AA1D4EAFF4E9FB37E23D68491D6F4'
|
|
566
712
|
}
|
|
567
713
|
});
|
|
@@ -707,41 +853,43 @@ function ticksToTime(ticks: number): string {
|
|
|
707
853
|
return `${h}:${m}:${s},${mili}`;
|
|
708
854
|
}
|
|
709
855
|
|
|
710
|
-
//
|
|
711
|
-
// HEURISTIC
|
|
712
|
-
//
|
|
713
|
-
function generateHeuristicSRT(text: string, byteLength: number): string {
|
|
714
|
-
// Estimate duration assuming typical MP3/WAV bitrate.
|
|
715
|
-
// Actually, system command usually produces WAV (PCM).
|
|
716
|
-
// Wrapper might produce MP3. Let's assume user command output.
|
|
717
|
-
// It is safer to assume ~15 chars per second reading speed if we don't know duration.
|
|
718
|
-
// Or assume 16000 bytes/sec for mono 16khz? Too unreliable.
|
|
719
|
-
// Let's use text length heuristic: Avg reading speed 150 wpm ~ 2.5 words/sec ~ 15 chars/sec?
|
|
720
|
-
// Let's try 15 chars / second.
|
|
721
|
-
|
|
722
|
-
const totalDurationSec = text.length / 15;
|
|
723
|
-
// Ideally we'd use 'ffprobe' to get exact duration, but let's stick to pure TS for now.
|
|
724
|
-
// If we really wanted to be robust, we'd add 'ffprobe' execution here.
|
|
725
|
-
|
|
726
|
-
const sentences = text.match(/[^.!?]+[.!?]*/g) || [text];
|
|
727
|
-
let currentStartTime = 0;
|
|
728
|
-
let srt = '';
|
|
729
|
-
let counter = 1;
|
|
730
|
-
|
|
731
|
-
const msToSrt = (ms: number) => {
|
|
732
|
-
const date = new Date(0, 0, 0, 0, 0, 0, ms);
|
|
733
|
-
return `${date.getHours().toString().padStart(2, '0')}:${date.getMinutes().toString().padStart(2, '0')}:${date.getSeconds().toString().padStart(2, '0')},${date.getMilliseconds().toString().padStart(3, '0')}`;
|
|
734
|
-
};
|
|
735
|
-
|
|
736
|
-
for (const sentence of sentences) {
|
|
737
|
-
const sentenceDuration = (sentence.length / text.length) * (totalDurationSec * 1000);
|
|
738
|
-
const endTime = currentStartTime + sentenceDuration;
|
|
739
|
-
|
|
740
|
-
srt += `${counter++}\n${msToSrt(currentStartTime)} --> ${msToSrt(endTime)}\n${sentence.trim()}\n\n`;
|
|
741
|
-
currentStartTime = endTime;
|
|
742
|
-
}
|
|
856
|
+
// ----------------------------------
|
|
857
|
+
// OLD HEURISTIC REMOVED
|
|
858
|
+
// ----------------------------------
|
|
743
859
|
|
|
744
|
-
|
|
860
|
+
async function downloadFile(url: string, dest: string): Promise<void> {
|
|
861
|
+
return new Promise((resolve, reject) => {
|
|
862
|
+
const file = fs.createWriteStream(dest);
|
|
863
|
+
file.on('error', (err) => {
|
|
864
|
+
fs.unlink(dest, () => { });
|
|
865
|
+
reject(new Error(`File write error: ${err.message}`));
|
|
866
|
+
});
|
|
867
|
+
const request = https.get(url, (response) => {
|
|
868
|
+
if (response.statusCode === 302 || response.statusCode === 301) {
|
|
869
|
+
file.close();
|
|
870
|
+
downloadFile(response.headers.location!, dest).then(resolve).catch(reject);
|
|
871
|
+
return;
|
|
872
|
+
}
|
|
873
|
+
if (response.statusCode && response.statusCode !== 200) {
|
|
874
|
+
file.close();
|
|
875
|
+
fs.unlink(dest, () => { });
|
|
876
|
+
reject(new Error(`Download failed with status code: ${response.statusCode} for URL: ${url}`));
|
|
877
|
+
return;
|
|
878
|
+
}
|
|
879
|
+
response.pipe(file);
|
|
880
|
+
file.on('finish', () => {
|
|
881
|
+
file.close((err) => {
|
|
882
|
+
if (err) reject(err);
|
|
883
|
+
else resolve();
|
|
884
|
+
});
|
|
885
|
+
});
|
|
886
|
+
});
|
|
887
|
+
request.on('error', (err) => {
|
|
888
|
+
file.close();
|
|
889
|
+
fs.unlink(dest, () => { });
|
|
890
|
+
reject(new Error(`Network error: ${err.message}`));
|
|
891
|
+
});
|
|
892
|
+
});
|
|
745
893
|
}
|
|
746
894
|
|
|
747
895
|
// --------------------------------------------------------------------------
|
|
@@ -811,25 +959,23 @@ async function ensurePiperModel(binDir: string, modelNameOrUrl: string): Promise
|
|
|
811
959
|
modelUrl = modelNameOrUrl;
|
|
812
960
|
modelFilename = path.basename(modelNameOrUrl);
|
|
813
961
|
} else {
|
|
814
|
-
//
|
|
962
|
+
// Correct Parsing for 'lang_REGION-voice-quality'
|
|
963
|
+
// e.g. en_US-lessac-medium -> [en_US, lessac, medium]
|
|
964
|
+
// e.g. ar_JO-kareem-medium -> [ar_JO, kareem, medium]
|
|
965
|
+
|
|
815
966
|
const parts = modelNameOrUrl.split('-');
|
|
816
967
|
if (parts.length >= 3) {
|
|
817
|
-
const langRegion = parts[0]
|
|
818
|
-
const voice = parts[
|
|
819
|
-
const quality = parts[
|
|
820
|
-
const lang = parts[0]; // en
|
|
968
|
+
const langRegion = parts[0]; // 'ar_JO' or 'en_US'
|
|
969
|
+
const voice = parts[1]; // 'kareem'
|
|
970
|
+
const quality = parts[2]; // 'medium'
|
|
821
971
|
|
|
822
|
-
//
|
|
823
|
-
|
|
824
|
-
// url path: en/en_US/lessac/medium/en_US-lessac-medium.onnx
|
|
825
|
-
|
|
826
|
-
// Handle special case: ar_JO (no lang folder? check repo)
|
|
827
|
-
// Generally structure is: lang_short/lang_long/voice/quality/filename
|
|
972
|
+
// Lang code is first part of langRegion (split by _)
|
|
973
|
+
const lang = langRegion.split('_')[0]; // 'ar' form 'ar_JO'
|
|
828
974
|
|
|
829
975
|
modelFilename = modelNameOrUrl + '.onnx';
|
|
830
|
-
modelUrl = `https://huggingface.co/rhasspy/piper-voices/resolve/main/${lang}/${langRegion}/${voice}/${quality}/${modelFilename}?download=true`;
|
|
976
|
+
modelUrl = `https://huggingface.co/rhasspy/piper-voices/resolve/main/${lang}/${langRegion}/${voice}/${quality}/${modelFilename}?download=true`;
|
|
831
977
|
} else {
|
|
832
|
-
throw new Error(`Invalid model name format: ${modelNameOrUrl}
|
|
978
|
+
throw new Error(`Invalid model name format: ${modelNameOrUrl}.`);
|
|
833
979
|
}
|
|
834
980
|
}
|
|
835
981
|
|
|
@@ -865,45 +1011,89 @@ async function ensurePiperModel(binDir: string, modelNameOrUrl: string): Promise
|
|
|
865
1011
|
return { modelPath, configPath };
|
|
866
1012
|
}
|
|
867
1013
|
|
|
868
|
-
|
|
869
|
-
|
|
870
|
-
|
|
871
|
-
|
|
872
|
-
|
|
873
|
-
|
|
874
|
-
fs.unlink(dest, () => { }); // Cleanup
|
|
875
|
-
reject(new Error(`File write error: ${err.message}`));
|
|
876
|
-
});
|
|
1014
|
+
// --------------------------------------------------------------------------
|
|
1015
|
+
// HELPER: Determine Audio Duration for SRT
|
|
1016
|
+
// --------------------------------------------------------------------------
|
|
1017
|
+
function getAudioDuration(buffer: Buffer, hint: 'mp3' | 'wav' | null = null): number {
|
|
1018
|
+
// 1. Try generic text length if buffer empty (fallback)
|
|
1019
|
+
if (!buffer || buffer.length === 0) return -1;
|
|
877
1020
|
|
|
878
|
-
|
|
879
|
-
|
|
880
|
-
|
|
881
|
-
|
|
882
|
-
|
|
883
|
-
|
|
1021
|
+
// 2. Try parsing WAV header
|
|
1022
|
+
// RIFF....WAVEfmt
|
|
1023
|
+
if ((hint === 'wav') || (buffer.length > 12 && buffer.toString('ascii', 0, 4) === 'RIFF' && buffer.toString('ascii', 8, 12) === 'WAVE')) {
|
|
1024
|
+
try {
|
|
1025
|
+
// standard header is 44 bytes.
|
|
1026
|
+
const byteRate = buffer.readUInt32LE(28);
|
|
1027
|
+
if (byteRate > 0) {
|
|
1028
|
+
const dataSize = buffer.length - 44;
|
|
1029
|
+
return dataSize / byteRate;
|
|
884
1030
|
}
|
|
1031
|
+
} catch (e) { /* ignore */ }
|
|
1032
|
+
}
|
|
885
1033
|
|
|
886
|
-
|
|
887
|
-
|
|
888
|
-
|
|
889
|
-
|
|
890
|
-
return;
|
|
891
|
-
}
|
|
1034
|
+
// 3. Fallback: Char count estimation? No, we don't have text here.
|
|
1035
|
+
// Return -1 to signal "Use text length"
|
|
1036
|
+
return -1;
|
|
1037
|
+
}
|
|
892
1038
|
|
|
893
|
-
|
|
1039
|
+
// --------------------------------------------------------------------------
|
|
1040
|
+
// HEURISTIC SRT IMPLEMENTATION
|
|
1041
|
+
// --------------------------------------------------------------------------
|
|
1042
|
+
function generateHeuristicSRT(text: string, durationSeconds: number): string {
|
|
1043
|
+
// If duration unknown (-1), estimate from text length (14 chars/sec)
|
|
1044
|
+
if (durationSeconds <= 0) {
|
|
1045
|
+
durationSeconds = text.length / 14;
|
|
1046
|
+
}
|
|
894
1047
|
|
|
895
|
-
|
|
896
|
-
|
|
897
|
-
if (err) reject(err);
|
|
898
|
-
else resolve();
|
|
899
|
-
});
|
|
900
|
-
});
|
|
901
|
-
});
|
|
1048
|
+
const sentences = text.match(/[^.!?]+[.!?]*/g) || [text];
|
|
1049
|
+
const totalContentLen = text.length;
|
|
902
1050
|
|
|
903
|
-
|
|
904
|
-
|
|
905
|
-
|
|
906
|
-
|
|
1051
|
+
let currentStartTime = 0;
|
|
1052
|
+
let srt = '';
|
|
1053
|
+
let counter = 1;
|
|
1054
|
+
|
|
1055
|
+
const msToSrt = (ms: number) => {
|
|
1056
|
+
const totalSec = Math.floor(ms / 1000);
|
|
1057
|
+
const mili = Math.floor(ms % 1000);
|
|
1058
|
+
const h = Math.floor(totalSec / 3600);
|
|
1059
|
+
const m = Math.floor((totalSec % 3600) / 60);
|
|
1060
|
+
const s = totalSec % 60;
|
|
1061
|
+
return `${h.toString().padStart(2, '0')}:${m.toString().padStart(2, '0')}:${s.toString().padStart(2, '0')},${mili.toString().padStart(3, '0')}`;
|
|
1062
|
+
};
|
|
1063
|
+
|
|
1064
|
+
for (const sentence of sentences) {
|
|
1065
|
+
// Proportion of time = Proportion of length
|
|
1066
|
+
const sentenceRatio = sentence.length / totalContentLen;
|
|
1067
|
+
const sentenceDuration = sentenceRatio * durationSeconds;
|
|
1068
|
+
const endTime = currentStartTime + sentenceDuration;
|
|
1069
|
+
|
|
1070
|
+
srt += `${counter++}\n${msToSrt(currentStartTime * 1000)} --> ${msToSrt(endTime * 1000)}\n${sentence.trim()}\n\n`;
|
|
1071
|
+
currentStartTime = endTime;
|
|
1072
|
+
}
|
|
1073
|
+
return srt;
|
|
1074
|
+
}
|
|
1075
|
+
|
|
1076
|
+
async function httpRequest(url: string, method: string = 'GET', body: any = null): Promise<Buffer> {
|
|
1077
|
+
const requestModule = url.startsWith('https') ? https : http;
|
|
1078
|
+
return new Promise((resolve, reject) => {
|
|
1079
|
+
const req = requestModule.request(url, {
|
|
1080
|
+
method: method,
|
|
1081
|
+
headers: {
|
|
1082
|
+
'Content-Type': 'application/json',
|
|
1083
|
+
}
|
|
1084
|
+
}, (res: any) => {
|
|
1085
|
+
const chunks: any[] = [];
|
|
1086
|
+
res.on('data', (d: any) => chunks.push(d));
|
|
1087
|
+
res.on('end', () => {
|
|
1088
|
+
if (res.statusCode >= 200 && res.statusCode < 300) {
|
|
1089
|
+
resolve(Buffer.concat(chunks));
|
|
1090
|
+
} else {
|
|
1091
|
+
reject(new Error(`Server Request Failed ${res.statusCode}: ${Buffer.concat(chunks).toString()}`));
|
|
1092
|
+
}
|
|
1093
|
+
});
|
|
907
1094
|
});
|
|
1095
|
+
req.on('error', reject);
|
|
1096
|
+
if (body) req.write(JSON.stringify(body));
|
|
1097
|
+
req.end();
|
|
908
1098
|
});
|
|
909
1099
|
}
|