kingkont 0.7.2 → 0.7.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/index.html +56 -5
- package/main.js +1 -2
- package/package.json +1 -1
- package/server.js +45 -2
package/index.html
CHANGED
|
@@ -1343,7 +1343,15 @@
|
|
|
1343
1343
|
</div>
|
|
1344
1344
|
</div>
|
|
1345
1345
|
</label>
|
|
1346
|
-
<label id="
|
|
1346
|
+
<label id="ttsModelRow" style="display: none;">Модель TTS
|
|
1347
|
+
<div class="seg-control" style="flex-wrap:wrap;">
|
|
1348
|
+
<button class="seg active" data-tts-model="qwen/qwen3-tts" type="button" title="Qwen TTS — мульти-язык, ready-голоса">Qwen TTS</button>
|
|
1349
|
+
<button class="seg" data-tts-model="elevenlabs/v3" type="button" title="ElevenLabs v3 — лучший EN, тоны">ElevenLabs v3</button>
|
|
1350
|
+
<button class="seg" data-tts-model="minimax/speech-02-hd" type="button" title="MiniMax Speech HD — клон-голоса">MiniMax Speech HD</button>
|
|
1351
|
+
<button class="seg" data-tts-model="google/gemini-3.1-flash-tts-preview" type="button" title="Gemini 3.1 Flash TTS">Gemini Flash TTS</button>
|
|
1352
|
+
</div>
|
|
1353
|
+
</label>
|
|
1354
|
+
<label id="voiceRow" style="display: none;">Голос
|
|
1347
1355
|
<select id="genVoice"></select>
|
|
1348
1356
|
</label>
|
|
1349
1357
|
<label id="tonesRow" style="display: none;">Тоны
|
|
@@ -1729,6 +1737,7 @@ const state = {
|
|
|
1729
1737
|
genKind: 'image',
|
|
1730
1738
|
imageModel: 'nano-banana-2', // 'nano-banana-2' | 'grok' | ...
|
|
1731
1739
|
videoModel: localStorage.getItem('videoModel') || 'seedance-2', // 'seedance-2' | 'kling-o1' | 'kling-3.0' | ...
|
|
1740
|
+
ttsModel: localStorage.getItem('ttsModel') || 'qwen/qwen3-tts', // qwen/elevenlabs/v3/minimax/speech-02-hd/gemini
|
|
1732
1741
|
videoDuration: +(localStorage.getItem('videoDuration') || 5),
|
|
1733
1742
|
videoResolution: localStorage.getItem('videoResolution') || '720p',
|
|
1734
1743
|
videoAspect: localStorage.getItem('videoAspect') || '9:16',
|
|
@@ -3760,6 +3769,8 @@ async function openGenerateForRef(fromNode, clientX, clientY, forceKind) {
|
|
|
3760
3769
|
|
|
3761
3770
|
$('videoModelRow').style.display = forceKind === 'video' ? '' : 'none';
|
|
3762
3771
|
$('voiceRow').style.display = forceKind === 'audio' ? '' : 'none';
|
|
3772
|
+
|
|
3773
|
+
$('ttsModelRow').style.display = forceKind === 'audio' ? '' : 'none';
|
|
3763
3774
|
$('tonesRow').style.display = forceKind === 'audio' ? '' : 'none';
|
|
3764
3775
|
const titleEl = $('genTitle');
|
|
3765
3776
|
if (titleEl) {
|
|
@@ -5013,6 +5024,8 @@ async function regenerateNode(node) {
|
|
|
5013
5024
|
$('videoModelRow').style.display = state.genKind === 'video' ? '' : 'none';
|
|
5014
5025
|
$('voiceRow').style.display = state.genKind === 'audio' ? '' : 'none';
|
|
5015
5026
|
|
|
5027
|
+
$('ttsModelRow').style.display = state.genKind === 'audio' ? '' : 'none';
|
|
5028
|
+
|
|
5016
5029
|
if (g.modelKey && state.genKind === 'image') {
|
|
5017
5030
|
state.imageModel = g.modelKey;
|
|
5018
5031
|
document.querySelectorAll('#genModal [data-img-model]').forEach(b =>
|
|
@@ -5028,7 +5041,9 @@ async function regenerateNode(node) {
|
|
|
5028
5041
|
syncVideoModelActive();
|
|
5029
5042
|
}
|
|
5030
5043
|
if (state.genKind === 'audio') {
|
|
5031
|
-
|
|
5044
|
+
if (g.ttsModel) state.ttsModel = g.ttsModel;
|
|
5045
|
+
syncTtsModelActive();
|
|
5046
|
+
if (state.ttsModel === 'elevenlabs/v3') await loadVoices();
|
|
5032
5047
|
if (g.voiceId) $('genVoice').value = g.voiceId;
|
|
5033
5048
|
state.activeTones = (g.tones || []).slice();
|
|
5034
5049
|
state.toneSuggestions = (g.tones || []).slice();
|
|
@@ -5175,6 +5190,7 @@ async function regenerateInto(node, kind, rawPrompt, opts = {}) {
|
|
|
5175
5190
|
|
|
5176
5191
|
const seedGen = kind === 'audio'
|
|
5177
5192
|
? { kind, prompt: resolvedPrompt, rawPrompt, model: modelId, voiceId, voiceName,
|
|
5193
|
+
ttsModel: state.ttsModel || node.generated?.ttsModel || 'qwen/qwen3-tts',
|
|
5178
5194
|
tones: [...state.activeTones], state: 'submitting' }
|
|
5179
5195
|
: { kind, prompt: resolvedPrompt, rawPrompt, modelKey, model: modelId,
|
|
5180
5196
|
refs: refs ? refs.map(r => ({ name: r.name, type: r.type, file: r.file })) : [],
|
|
@@ -5961,6 +5977,8 @@ function openPhraseFor(charInfo) {
|
|
|
5961
5977
|
|
|
5962
5978
|
$('videoModelRow').style.display = 'none';
|
|
5963
5979
|
$('voiceRow').style.display = '';
|
|
5980
|
+
|
|
5981
|
+
$('ttsModelRow').style.display = '';
|
|
5964
5982
|
$('tonesRow').style.display = '';
|
|
5965
5983
|
loadVoices().then(() => {
|
|
5966
5984
|
if (charInfo.voice) $('genVoice').value = charInfo.voice;
|
|
@@ -6142,6 +6160,8 @@ async function openGenModal(kind) {
|
|
|
6142
6160
|
|
|
6143
6161
|
$('videoModelRow').style.display = kind === 'video' ? '' : 'none';
|
|
6144
6162
|
$('voiceRow').style.display = kind === 'audio' ? '' : 'none';
|
|
6163
|
+
|
|
6164
|
+
$('ttsModelRow').style.display = kind === 'audio' ? '' : 'none';
|
|
6145
6165
|
$('tonesRow').style.display = kind === 'audio' ? '' : 'none';
|
|
6146
6166
|
// Заголовок модалки = действие
|
|
6147
6167
|
const title = $('genTitle');
|
|
@@ -6562,8 +6582,14 @@ document.querySelectorAll('#genModal [data-kind]').forEach(b => {
|
|
|
6562
6582
|
|
|
6563
6583
|
$('videoModelRow').style.display = state.genKind === 'video' ? '' : 'none';
|
|
6564
6584
|
$('voiceRow').style.display = state.genKind === 'audio' ? '' : 'none';
|
|
6585
|
+
|
|
6586
|
+
$('ttsModelRow').style.display = state.genKind === 'audio' ? '' : 'none';
|
|
6565
6587
|
$('tonesRow').style.display = state.genKind === 'audio' ? '' : 'none';
|
|
6566
|
-
if (state.genKind === 'audio') {
|
|
6588
|
+
if (state.genKind === 'audio') {
|
|
6589
|
+
syncTtsModelActive();
|
|
6590
|
+
if (state.ttsModel === 'elevenlabs/v3') loadVoices();
|
|
6591
|
+
renderTones();
|
|
6592
|
+
}
|
|
6567
6593
|
const ph = state.genKind === 'audio'
|
|
6568
6594
|
? 'Текст, который надо озвучить...'
|
|
6569
6595
|
: 'Что должно быть. Печатай @ чтобы вставить ссылку на ноду...';
|
|
@@ -6825,6 +6851,25 @@ document.querySelectorAll('#genModal [data-vid-model]').forEach(b => {
|
|
|
6825
6851
|
localStorage.setItem('videoModel', state.videoModel);
|
|
6826
6852
|
});
|
|
6827
6853
|
});
|
|
6854
|
+
// Переключатель модели TTS
|
|
6855
|
+
document.querySelectorAll('#genModal [data-tts-model]').forEach(b => {
|
|
6856
|
+
b.addEventListener('click', () => {
|
|
6857
|
+
document.querySelectorAll('#genModal [data-tts-model]').forEach(x => x.classList.remove('active'));
|
|
6858
|
+
b.classList.add('active');
|
|
6859
|
+
state.ttsModel = b.dataset.ttsModel;
|
|
6860
|
+
localStorage.setItem('ttsModel', state.ttsModel);
|
|
6861
|
+
// voiceRow с ElevenLabs-голосами имеет смысл только для elevenlabs/v3.
|
|
6862
|
+
const showVoice = state.ttsModel === 'elevenlabs/v3';
|
|
6863
|
+
$('voiceRow').style.display = showVoice ? '' : 'none';
|
|
6864
|
+
});
|
|
6865
|
+
});
|
|
6866
|
+
function syncTtsModelActive() {
|
|
6867
|
+
document.querySelectorAll('#genModal [data-tts-model]').forEach(b =>
|
|
6868
|
+
b.classList.toggle('active', b.dataset.ttsModel === state.ttsModel));
|
|
6869
|
+
// Скрыть voiceRow если модель не elevenlabs (только для неё имеет смысл список).
|
|
6870
|
+
const showVoice = state.ttsModel === 'elevenlabs/v3';
|
|
6871
|
+
$('voiceRow').style.display = showVoice ? '' : 'none';
|
|
6872
|
+
}
|
|
6828
6873
|
// Подсветить активную video-модель при открытии modal'а
|
|
6829
6874
|
function syncVideoModelActive() {
|
|
6830
6875
|
document.querySelectorAll('#genModal [data-vid-model]').forEach(b =>
|
|
@@ -6956,6 +7001,7 @@ $('genSubmit').addEventListener('click', async () => {
|
|
|
6956
7001
|
kind: 'audio',
|
|
6957
7002
|
prompt: finalText, rawPrompt,
|
|
6958
7003
|
model: 'eleven_v3', voiceId, voiceName,
|
|
7004
|
+
ttsModel: state.ttsModel || 'qwen/qwen3-tts',
|
|
6959
7005
|
tones: [...state.activeTones],
|
|
6960
7006
|
},
|
|
6961
7007
|
};
|
|
@@ -7116,11 +7162,14 @@ async function runTTSJob(node, text, boardHandle, bKey, voiceId) {
|
|
|
7116
7162
|
n.generated = { ...(n.generated || {}), state: 'submitting' };
|
|
7117
7163
|
});
|
|
7118
7164
|
const provider = await plannedProvider('tts');
|
|
7119
|
-
|
|
7165
|
+
// ttsModel может быть сохранён в node.generated.ttsModel (при regenerate)
|
|
7166
|
+
// или в текущем глобальном state.ttsModel (новая генерация).
|
|
7167
|
+
const ttsModel = node.generated?.ttsModel || state.ttsModel || 'qwen/qwen3-tts';
|
|
7168
|
+
logJob(node.id, `→ POST /api/tts → ${provider} (model=${ttsModel} voice=${voiceId || '—'})`);
|
|
7120
7169
|
const r = await fetch('/api/tts', {
|
|
7121
7170
|
method: 'POST',
|
|
7122
7171
|
headers: { 'Content-Type': 'application/json' },
|
|
7123
|
-
body: JSON.stringify({ text, voiceId,
|
|
7172
|
+
body: JSON.stringify({ text, voiceId, ttsModel }),
|
|
7124
7173
|
});
|
|
7125
7174
|
logJob(node.id, `← via ${r.headers.get('x-provider') || '?'} HTTP ${r.status}`);
|
|
7126
7175
|
if (!r.ok) {
|
|
@@ -7785,6 +7834,8 @@ async function openGenAudioForTimeline(charInfo, track, time) {
|
|
|
7785
7834
|
|
|
7786
7835
|
$('videoModelRow').style.display = 'none';
|
|
7787
7836
|
$('voiceRow').style.display = '';
|
|
7837
|
+
|
|
7838
|
+
$('ttsModelRow').style.display = '';
|
|
7788
7839
|
$('tonesRow').style.display = '';
|
|
7789
7840
|
$('sourceRefRow').style.display = 'none';
|
|
7790
7841
|
$('charsPickRow').style.display = 'none';
|
package/main.js
CHANGED
|
@@ -415,8 +415,7 @@ ipcMain.handle('updates:check', async () => {
|
|
|
415
415
|
//
|
|
416
416
|
// Streamим stdout/stderr в renderer через 'updates:install-output' events.
|
|
417
417
|
// На EACCES (нет прав на global install) renderer покажет команду для
|
|
418
|
-
// ручного запуска с sudo.
|
|
419
|
-
const { spawn } = require('child_process');
|
|
418
|
+
// ручного запуска с sudo. spawn уже импортирован в начале файла.
|
|
420
419
|
ipcMain.handle('updates:install', async (e, target = 'latest') => {
|
|
421
420
|
return new Promise((resolve, reject) => {
|
|
422
421
|
const shell = process.env.SHELL || '/bin/bash';
|
package/package.json
CHANGED
package/server.js
CHANGED
|
@@ -743,19 +743,62 @@ async function handleMusic(req, res) {
|
|
|
743
743
|
}
|
|
744
744
|
|
|
745
745
|
// ---------- /api/tts (Chatium ИЛИ ElevenLabs v3) ----------
|
|
746
|
+
// Body клиента (всё опц. кроме text):
|
|
747
|
+
// {
|
|
748
|
+
// text,
|
|
749
|
+
// ttsModel?: 'qwen/qwen3-tts' | 'elevenlabs/v3' | 'minimax/speech-02-hd'
|
|
750
|
+
// | 'google/gemini-3.1-flash-tts-preview',
|
|
751
|
+
// voice?: string, // voiceId/speaker зависит от модели
|
|
752
|
+
// voiceId?: string, // legacy alias для voice (ElevenLabs)
|
|
753
|
+
// modelId?: string, // legacy: 'eleven_v3' и т.д.
|
|
754
|
+
// // Любые per-provider параметры пробрасываются в Chatium как есть:
|
|
755
|
+
// stability, similarity_boost, style, speed, language_code,
|
|
756
|
+
// pitch, volume, emotion, sample_rate, audio_format, language_boost,
|
|
757
|
+
// mode, speaker, voice_description, style_instruction, ...
|
|
758
|
+
// }
|
|
759
|
+
const TTS_PASSTHROUGH = new Set([
|
|
760
|
+
'voice', 'voiceId', 'voice_id', 'speaker',
|
|
761
|
+
'language', 'language_code', 'language_boost',
|
|
762
|
+
'speed', 'pitch', 'volume',
|
|
763
|
+
'stability', 'similarity_boost', 'style', 'style_instruction',
|
|
764
|
+
'audio_format', 'sample_rate', 'bitrate', 'channel', 'emotion',
|
|
765
|
+
'subtitle_enable', 'english_normalization',
|
|
766
|
+
'voice_description', 'reference_audio', 'reference_text', 'mode',
|
|
767
|
+
'previous_text', 'next_text',
|
|
768
|
+
]);
|
|
746
769
|
async function handleTts(req, res) {
|
|
747
|
-
const
|
|
770
|
+
const body = await readJson(req);
|
|
771
|
+
const text = body.text;
|
|
748
772
|
if (!text) return send(res, 400, { error: 'нужен text' });
|
|
749
773
|
const s = getSettings();
|
|
750
774
|
|
|
775
|
+
// Chatium-путь — поддерживает 4 модели (Qwen TTS, ElevenLabs v3,
|
|
776
|
+
// MiniMax Speech HD, Gemini 3.1 Flash TTS — см. spaces/api/execAudioNode.ts).
|
|
751
777
|
if (s.useChatium && s.chatium?.token && s.chatium?.base) {
|
|
752
|
-
|
|
778
|
+
const ttsBody = { kind: 'tts', text };
|
|
779
|
+
// Маппинг легаси `modelId` → `model` (старый клиент шлёт modelId='eleven_v3').
|
|
780
|
+
if (body.ttsModel) ttsBody.model = body.ttsModel;
|
|
781
|
+
else if (body.modelId === 'eleven_v3') ttsBody.model = 'elevenlabs/v3';
|
|
782
|
+
else if (body.modelId) ttsBody.model = body.modelId;
|
|
783
|
+
// voice: поддерживаем оба имени (voice или voiceId).
|
|
784
|
+
if (body.voice) ttsBody.voice = body.voice;
|
|
785
|
+
else if (body.voiceId) ttsBody.voice = body.voiceId;
|
|
786
|
+
// Per-model passthrough.
|
|
787
|
+
for (const k of Object.keys(body)) {
|
|
788
|
+
if (k === 'text' || k === 'voice' || k === 'voiceId' || k === 'ttsModel' || k === 'modelId') continue;
|
|
789
|
+
if (TTS_PASSTHROUGH.has(k)) ttsBody[k] = body[k];
|
|
790
|
+
}
|
|
791
|
+
return handleAudioViaChatium(res, s, ttsBody);
|
|
753
792
|
}
|
|
793
|
+
|
|
794
|
+
// Прямой ElevenLabs (только eleven_v3, остальные модели только через Chatium).
|
|
754
795
|
if (!s.useElevenlabs) {
|
|
755
796
|
return send(res, 503, { error: 'Аудио-коннектор отключён. Включите Chatium или ElevenLabs.' });
|
|
756
797
|
}
|
|
757
798
|
const key = process.env.ELEVENLABS_API_KEY;
|
|
758
799
|
if (!key) return send(res, 500, { error: 'ELEVENLABS_API_KEY не задан' });
|
|
800
|
+
const voiceId = body.voiceId || body.voice || 'JBFqnCBsd6RMkjVDRZzb';
|
|
801
|
+
const modelId = body.modelId || 'eleven_v3';
|
|
759
802
|
logProviderCall('POST', 'ElevenLabs', `${ELEVEN_BASE}/v1/text-to-speech/${voiceId}`, `model=${modelId} text=${text.length}ch`);
|
|
760
803
|
const r = await fetch(`${ELEVEN_BASE}/v1/text-to-speech/${voiceId}`, {
|
|
761
804
|
method: 'POST',
|