webtalk 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/tts-models.js ADDED
@@ -0,0 +1,97 @@
1
+ const fs = require('fs');
2
+ const path = require('path');
3
+ const { createDownloadLock, resolveDownloadLock, rejectDownloadLock, getDownloadPromise, isDownloading } = require('./download-lock');
4
+ const { downloadFile, ensureDir, isFileCorrupted } = require('./whisper-models');
5
+
6
+ const TTS_FILES = [
7
+ { name: 'mimi_encoder.onnx', size: '73MB', minBytes: 73 * 1024 * 1024 * 0.8 },
8
+ { name: 'text_conditioner.onnx', size: '16MB', minBytes: 16 * 1024 * 1024 * 0.8 },
9
+ { name: 'flow_lm_main_int8.onnx', size: '76MB', minBytes: 76 * 1024 * 1024 * 0.8 },
10
+ { name: 'flow_lm_flow_int8.onnx', size: '10MB', minBytes: 10 * 1024 * 1024 * 0.8 },
11
+ { name: 'mimi_decoder_int8.onnx', size: '23MB', minBytes: 23 * 1024 * 1024 * 0.8 },
12
+ { name: 'tokenizer.model', size: '59KB', minBytes: 59 * 1024 * 0.8 }
13
+ ];
14
+
15
+ const TTS_WEB_FILES = [
16
+ { name: 'sentencepiece.js', url: 'https://cdn.jsdelivr.net/npm/@facebookresearch/fasttext@1.0.2/dist/fasttext.js', size: '4MB' }
17
+ ];
18
+
19
+ async function checkTTSModelExists(config) {
20
+ if (!fs.existsSync(config.ttsModelsDir)) return false;
21
+
22
+ const mainFiles = [
23
+ { name: 'mimi_encoder.onnx', minBytes: 73 * 1024 * 1024 * 0.8 },
24
+ { name: 'flow_lm_main_int8.onnx', minBytes: 76 * 1024 * 1024 * 0.8 },
25
+ { name: 'mimi_decoder_int8.onnx', minBytes: 23 * 1024 * 1024 * 0.8 }
26
+ ];
27
+
28
+ for (const file of mainFiles) {
29
+ const filePath = path.join(config.ttsModelsDir, file.name);
30
+ if (!fs.existsSync(filePath) || isFileCorrupted(filePath, file.minBytes)) {
31
+ return false;
32
+ }
33
+ }
34
+ return true;
35
+ }
36
+
37
+ async function downloadTTSModels(config) {
38
+ ensureDir(config.ttsModelsDir);
39
+
40
+ let downloadedCount = 0;
41
+
42
+ for (const file of TTS_FILES) {
43
+ const destPath = path.join(config.ttsModelsDir, file.name);
44
+
45
+ if (fs.existsSync(destPath)) {
46
+ if (isFileCorrupted(destPath, file.minBytes)) {
47
+ fs.unlinkSync(destPath);
48
+ } else {
49
+ continue;
50
+ }
51
+ }
52
+
53
+ const url = config.ttsBaseUrl + file.name;
54
+
55
+ try {
56
+ await downloadFile(url, destPath, 3);
57
+ downloadedCount++;
58
+ } catch (err) {}
59
+ }
60
+ }
61
+
62
+ async function downloadTTSWebFiles(config) {
63
+ ensureDir(config.ttsDir);
64
+ for (const file of TTS_WEB_FILES) {
65
+ const destPath = path.join(config.ttsDir, file.name);
66
+ if (fs.existsSync(destPath)) continue;
67
+ try { await downloadFile(file.url, destPath); } catch (err) {}
68
+ }
69
+ }
70
+
71
+ async function ensureTTSModels(config) {
72
+ const lockKey = 'tts-models';
73
+
74
+ if (isDownloading(lockKey)) {
75
+ return getDownloadPromise(lockKey);
76
+ }
77
+
78
+ const downloadPromise = (async () => {
79
+ try {
80
+ const exists = await checkTTSModelExists(config);
81
+ if (!exists) {
82
+ await downloadTTSModels(config);
83
+ }
84
+
85
+ await downloadTTSWebFiles(config);
86
+ resolveDownloadLock(lockKey, true);
87
+ } catch (err) {
88
+ rejectDownloadLock(lockKey, err);
89
+ throw err;
90
+ }
91
+ })();
92
+
93
+ createDownloadLock(lockKey);
94
+ return downloadPromise;
95
+ }
96
+
97
+ module.exports = { ensureTTSModels, checkTTSModelExists };
package/tts-utils.js ADDED
@@ -0,0 +1,52 @@
1
+ const SAMPLE_RATE = 24000;
2
+
3
+ function resample(data, fromRate, toRate) {
4
+ const ratio = fromRate / toRate;
5
+ const newLen = Math.round(data.length / ratio);
6
+ const out = new Float32Array(newLen);
7
+ for (let i = 0; i < newLen; i++) {
8
+ const idx = i * ratio;
9
+ const lo = Math.floor(idx);
10
+ const hi = Math.min(lo + 1, data.length - 1);
11
+ const frac = idx - lo;
12
+ out[i] = data[lo] * (1 - frac) + data[hi] * frac;
13
+ }
14
+ return out;
15
+ }
16
+
17
+ function encodeWAV(audioBuffer) {
18
+ const totalLen = audioBuffer.reduce((s, b) => s + b.length, 0);
19
+ const merged = new Float32Array(totalLen);
20
+ let off = 0;
21
+ for (const buf of audioBuffer) {
22
+ merged.set(buf, off);
23
+ off += buf.length;
24
+ }
25
+
26
+ const wavBuf = new ArrayBuffer(44 + merged.length * 2);
27
+ const view = new DataView(wavBuf);
28
+ const writeStr = (o, s) => {
29
+ for (let i = 0; i < s.length; i++) view.setUint8(o + i, s.charCodeAt(i));
30
+ };
31
+ writeStr(0, 'RIFF');
32
+ view.setUint32(4, 36 + merged.length * 2, true);
33
+ writeStr(8, 'WAVE');
34
+ writeStr(12, 'fmt ');
35
+ view.setUint32(16, 16, true);
36
+ view.setUint16(20, 1, true);
37
+ view.setUint16(22, 1, true);
38
+ view.setUint32(24, SAMPLE_RATE, true);
39
+ view.setUint32(28, SAMPLE_RATE * 2, true);
40
+ view.setUint16(32, 2, true);
41
+ view.setUint16(34, 16, true);
42
+ writeStr(36, 'data');
43
+ view.setUint32(40, merged.length * 2, true);
44
+ for (let i = 0; i < merged.length; i++) {
45
+ const s = Math.max(-1, Math.min(1, merged[i]));
46
+ view.setInt16(44 + i * 2, s < 0 ? s * 0x8000 : s * 0x7FFF, true);
47
+ }
48
+
49
+ return wavBuf;
50
+ }
51
+
52
+ export { SAMPLE_RATE, resample, encodeWAV };
package/tts.js ADDED
@@ -0,0 +1,167 @@
1
+ import { SAMPLE_RATE, resample, encodeWAV } from './tts-utils.js';
2
+
3
+ const STATUS = {
4
+ LOADING: 'loading',
5
+ READY: 'ready',
6
+ SPEAKING: 'speaking',
7
+ ERROR: 'error'
8
+ };
9
+
10
+ class TTS {
11
+ constructor(options = {}) {
12
+ this.voice = options.voice || null;
13
+ this.onAudioReady = options.onAudioReady || null;
14
+ this.onStatus = options.onStatus || null;
15
+ this.onVoicesLoaded = options.onVoicesLoaded || null;
16
+ this.onMetrics = options.onMetrics || null;
17
+ this.onAudioChunk = options.onAudioChunk || null;
18
+ this.basePath = options.basePath || '';
19
+ this.apiBasePath = options.apiBasePath || '';
20
+ this.ttsWorkerFile = options.ttsWorkerFile || 'inference-worker.js';
21
+ this.worker = null;
22
+ this.audioContext = null;
23
+ this.audioBuffer = [];
24
+ this.currentAudioUrl = null;
25
+ this.startTime = 0;
26
+ this.firstChunkTime = null;
27
+ this.ready = false;
28
+ }
29
+
30
+ async init() {
31
+ const statusUrl = this.apiBasePath + '/api/tts-status';
32
+ const statusResponse = await fetch(statusUrl);
33
+ const statusData = await statusResponse.json();
34
+ if (!statusData.available) throw new Error('TTS models not available');
35
+
36
+ this.audioContext = new (globalThis.AudioContext || globalThis.webkitAudioContext)({ sampleRate: SAMPLE_RATE });
37
+
38
+ return new Promise((resolve, reject) => {
39
+ try {
40
+ const workerPath = this.basePath + '/tts/' + this.ttsWorkerFile;
41
+ this.worker = new Worker(workerPath, { type: 'module' });
42
+ this.worker.onmessage = (e) => this._handleMessage(e.data, resolve);
43
+ this.worker.onerror = (e) => reject(e);
44
+ this.worker.postMessage({ type: 'load' });
45
+ } catch (err) {
46
+ reject(err);
47
+ }
48
+ });
49
+ }
50
+
51
+ _handleMessage(msg, initResolve) {
52
+ switch (msg.type) {
53
+ case 'status':
54
+ this.onStatus?.(msg.status || msg.data?.status, msg.state || msg.data?.state);
55
+ break;
56
+ case 'voices_loaded':
57
+ this.onVoicesLoaded?.(msg.voices, msg.defaultVoice);
58
+ break;
59
+ case 'loaded':
60
+ this.ready = true;
61
+ initResolve?.();
62
+ this.onStatus?.(STATUS.READY, 'ready');
63
+ break;
64
+ case 'audio_chunk':
65
+ this.audioBuffer.push(new Float32Array(msg.data));
66
+ this.onAudioChunk?.();
67
+ if (!this.firstChunkTime) {
68
+ this.firstChunkTime = performance.now();
69
+ this.onMetrics?.({ ttfb: this.firstChunkTime - this.startTime });
70
+ }
71
+ if (msg.metrics) {
72
+ const elapsed = (performance.now() - this.startTime) / 1000;
73
+ const audioDur = this.audioBuffer.reduce((s, b) => s + b.length, 0) / SAMPLE_RATE;
74
+ if (elapsed > 0) this.onMetrics?.({ rtfx: audioDur / elapsed });
75
+ }
76
+ break;
77
+ case 'stream_ended':
78
+ this._finalize();
79
+ break;
80
+ case 'error':
81
+ this.onStatus?.(STATUS.ERROR, 'error');
82
+ this._generateReject?.(new Error(msg.error));
83
+ this._generateReject = null;
84
+ this._generateResolve = null;
85
+ break;
86
+ }
87
+ }
88
+
89
+ generate(text, voice) {
90
+ return new Promise((resolve, reject) => {
91
+ if (!this.worker || !this.ready) return reject(new Error('TTS not initialized'));
92
+ this.audioBuffer = [];
93
+ this.startTime = performance.now();
94
+ this.firstChunkTime = null;
95
+ if (this.currentAudioUrl) {
96
+ URL.revokeObjectURL(this.currentAudioUrl);
97
+ this.currentAudioUrl = null;
98
+ }
99
+ this._generateResolve = resolve;
100
+ this._generateReject = reject;
101
+ const v = voice || this.voice;
102
+ if (v && v !== 'custom') {
103
+ this.worker.postMessage({ type: 'set_voice', data: { voiceName: v } });
104
+ }
105
+ this.worker.postMessage({ type: 'generate', data: { text, voice: v } });
106
+ });
107
+ }
108
+
109
+ stop() {
110
+ this.worker?.postMessage({ type: 'stop' });
111
+ }
112
+
113
+ async uploadVoice(file) {
114
+ if (!this.worker || !this.audioContext) return;
115
+ const arrayBuffer = await file.arrayBuffer();
116
+ const audioBuffer = await this.audioContext.decodeAudioData(arrayBuffer);
117
+ let audioData = audioBuffer.getChannelData(0);
118
+ if (audioBuffer.sampleRate !== SAMPLE_RATE) {
119
+ audioData = resample(audioData, audioBuffer.sampleRate, SAMPLE_RATE);
120
+ }
121
+ this.worker.postMessage({ type: 'encode_voice', data: { audio: audioData } });
122
+ }
123
+
124
+ _finalize() {
125
+ if (this.audioBuffer.length === 0) {
126
+ this._generateResolve?.(null);
127
+ this._generateResolve = null;
128
+ return;
129
+ }
130
+ const wavBuf = encodeWAV(this.audioBuffer);
131
+ if (this.currentAudioUrl) URL.revokeObjectURL(this.currentAudioUrl);
132
+ this.currentAudioUrl = URL.createObjectURL(new Blob([wavBuf], { type: 'audio/wav' }));
133
+ this.onAudioReady?.(this.currentAudioUrl);
134
+ this._generateResolve?.(this.currentAudioUrl);
135
+ this._generateResolve = null;
136
+ }
137
+
138
+ downloadAudio() {
139
+ if (this.currentAudioUrl) {
140
+ const a = document.createElement('a');
141
+ a.href = this.currentAudioUrl;
142
+ a.download = 'tts-output.wav';
143
+ a.click();
144
+ }
145
+ }
146
+
147
+ getStatus() {
148
+ return {
149
+ ready: this.ready,
150
+ voice: this.voice,
151
+ workerFile: this.ttsWorkerFile,
152
+ hasWorker: !!this.worker,
153
+ hasAudioContext: !!this.audioContext,
154
+ audioBuffered: this.audioBuffer.length
155
+ };
156
+ }
157
+
158
+ destroy() {
159
+ this.worker?.terminate();
160
+ this.worker = null;
161
+ this.ready = false;
162
+ if (this.currentAudioUrl) URL.revokeObjectURL(this.currentAudioUrl);
163
+ this.audioContext?.close();
164
+ }
165
+ }
166
+
167
+ export { TTS };
@@ -0,0 +1,161 @@
1
+ const fs = require('fs');
2
+ const path = require('path');
3
+ const https = require('https');
4
+ const { createDownloadLock, resolveDownloadLock, rejectDownloadLock, getDownloadPromise, isDownloading } = require('./download-lock');
5
+
6
+ const WHISPER_REQUIRED_FILES = [
7
+ 'config.json',
8
+ 'preprocessor_config.json',
9
+ 'tokenizer.json',
10
+ 'tokenizer_config.json',
11
+ 'vocab.json',
12
+ 'merges.txt',
13
+ 'model_quantized.onnx',
14
+ 'onnx/encoder_model.onnx',
15
+ 'onnx/decoder_model_merged_q4.onnx',
16
+ 'onnx/decoder_model_merged.onnx'
17
+ ];
18
+
19
+ function ensureDir(dir) {
20
+ if (!fs.existsSync(dir)) {
21
+ fs.mkdirSync(dir, { recursive: true });
22
+ }
23
+ }
24
+
25
+ function downloadFile(url, dest, maxRetries = 3, attempt = 0) {
26
+ return new Promise((resolve, reject) => {
27
+ ensureDir(path.dirname(dest));
28
+ const file = fs.createWriteStream(dest);
29
+ https.get(url, (response) => {
30
+ if (response.statusCode === 302 || response.statusCode === 301 || response.statusCode === 307 || response.statusCode === 308) {
31
+ file.close();
32
+ if (fs.existsSync(dest)) fs.unlinkSync(dest);
33
+ downloadFile(response.headers.location, dest, maxRetries, attempt).then(resolve).catch(reject);
34
+ return;
35
+ }
36
+ if (response.statusCode !== 200) {
37
+ file.close();
38
+ if (fs.existsSync(dest)) fs.unlinkSync(dest);
39
+ const error = new Error(`Failed to download: ${response.statusCode}`);
40
+ if (attempt < maxRetries - 1) {
41
+ const delayMs = Math.pow(2, attempt) * 1000;
42
+ setTimeout(() => downloadFile(url, dest, maxRetries, attempt + 1).then(resolve).catch(reject), delayMs);
43
+ } else {
44
+ reject(error);
45
+ }
46
+ return;
47
+ }
48
+
49
+ let downloaded = 0;
50
+ response.on('data', (chunk) => {
51
+ downloaded += chunk.length;
52
+ process.stdout.write(`\r ↓ ${path.basename(dest)} ... ${(downloaded / 1024 / 1024).toFixed(2)}MB`);
53
+ });
54
+
55
+ response.pipe(file);
56
+ file.on('finish', () => {
57
+ file.close();
58
+ process.stdout.write(' ✓\n');
59
+ resolve();
60
+ });
61
+ }).on('error', (err) => {
62
+ if (fs.existsSync(dest)) fs.unlinkSync(dest);
63
+ if (attempt < maxRetries - 1) {
64
+ const delayMs = Math.pow(2, attempt) * 1000;
65
+ setTimeout(() => downloadFile(url, dest, maxRetries, attempt + 1).then(resolve).catch(reject), delayMs);
66
+ } else {
67
+ reject(err);
68
+ }
69
+ });
70
+ });
71
+ }
72
+
73
+ function isFileCorrupted(filePath, minSizeBytes = null) {
74
+ try {
75
+ const stats = fs.statSync(filePath);
76
+ if (minSizeBytes !== null && stats.size < minSizeBytes) {
77
+ return true;
78
+ }
79
+ return false;
80
+ } catch (err) {
81
+ return true;
82
+ }
83
+ }
84
+
85
+ async function checkWhisperModelExists(modelName, config) {
86
+ const modelDir = path.join(config.modelsDir, modelName);
87
+ if (!fs.existsSync(modelDir)) return false;
88
+
89
+ const encoderPath = path.join(modelDir, 'onnx', 'encoder_model.onnx');
90
+ const decoderPath = path.join(modelDir, 'onnx', 'decoder_model_merged_q4.onnx');
91
+ const decoderFallback = path.join(modelDir, 'onnx', 'decoder_model_merged.onnx');
92
+
93
+ const hasEncoder = fs.existsSync(encoderPath);
94
+ const hasDecoder = fs.existsSync(decoderPath) || fs.existsSync(decoderFallback);
95
+
96
+ if (!hasEncoder || !hasDecoder) return false;
97
+
98
+ const encoderValid = !isFileCorrupted(encoderPath, 40 * 1024 * 1024);
99
+ const decoderValid = isFileCorrupted(decoderPath, 100 * 1024 * 1024) === false ||
100
+ isFileCorrupted(decoderFallback, 100 * 1024 * 1024) === false;
101
+
102
+ return encoderValid && decoderValid;
103
+ }
104
+
105
+ async function downloadWhisperModel(modelName, config) {
106
+ const modelDir = path.join(config.modelsDir, modelName);
107
+ ensureDir(modelDir);
108
+
109
+ const baseUrl = `${config.whisperBaseUrl}${modelName}/resolve/main/`;
110
+
111
+ let downloadedCount = 0;
112
+ let failedCount = 0;
113
+
114
+ for (const file of WHISPER_REQUIRED_FILES) {
115
+ const destPath = path.join(modelDir, file);
116
+
117
+ if (fs.existsSync(destPath)) {
118
+ if (isFileCorrupted(destPath)) {
119
+ fs.unlinkSync(destPath);
120
+ } else {
121
+ continue;
122
+ }
123
+ }
124
+
125
+ const url = baseUrl + file;
126
+
127
+ try {
128
+ await downloadFile(url, destPath, 3);
129
+ downloadedCount++;
130
+ } catch (err) {
131
+ failedCount++;
132
+ if (fs.existsSync(destPath)) fs.unlinkSync(destPath);
133
+ }
134
+ }
135
+ }
136
+
137
+ async function ensureModel(modelName, config) {
138
+ const lockKey = `whisper-${modelName}`;
139
+
140
+ if (isDownloading(lockKey)) {
141
+ return getDownloadPromise(lockKey);
142
+ }
143
+
144
+ const downloadPromise = (async () => {
145
+ try {
146
+ const exists = await checkWhisperModelExists(modelName, config);
147
+ if (!exists) {
148
+ await downloadWhisperModel(modelName, config);
149
+ }
150
+ resolveDownloadLock(lockKey, true);
151
+ } catch (err) {
152
+ rejectDownloadLock(lockKey, err);
153
+ throw err;
154
+ }
155
+ })();
156
+
157
+ createDownloadLock(lockKey);
158
+ return downloadPromise;
159
+ }
160
+
161
+ module.exports = { ensureModel, downloadFile, ensureDir, isFileCorrupted };
@@ -0,0 +1,32 @@
1
+ const fs = require('fs');
2
+ const path = require('path');
3
+
4
+ const FETCH_PATCH = `const originalFetch = self.fetch;
5
+ self.fetch = function(input, init) {
6
+ let url = typeof input === 'string' ? input : input.url;
7
+ if (url.includes('huggingface.co') && url.includes('/resolve/main/')) {
8
+ const match = url.match(/huggingface\\.co\\/([^\\/]+\\/[^\\/]+)\\/resolve\\/main\\/(.*)/);
9
+ if (match) {
10
+ const [, modelName, filePath] = match;
11
+ return originalFetch('/models/' + modelName + '/' + filePath, init);
12
+ }
13
+ }
14
+ return originalFetch(input, init);
15
+ };
16
+ `;
17
+
18
+ function patchWorker(config) {
19
+ const workerPath = path.join(config.assetsDir, config.workerFile);
20
+ const backupPath = path.join(config.assetsDir, config.workerBackup);
21
+
22
+ try {
23
+ if (fs.existsSync(backupPath)) {
24
+ fs.copyFileSync(backupPath, workerPath);
25
+ }
26
+ const content = fs.readFileSync(workerPath, 'utf8');
27
+ if (content.includes('originalFetch')) return;
28
+ fs.writeFileSync(workerPath, FETCH_PATCH + content);
29
+ } catch (err) {}
30
+ }
31
+
32
+ module.exports = { patchWorker };