webtalk 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/.gitattributes ADDED
@@ -0,0 +1,35 @@
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
@@ -0,0 +1,26 @@
1
+ name: Publish to npm
2
+
3
+ on:
4
+ push:
5
+ branches:
6
+ - main
7
+
8
+ jobs:
9
+ publish:
10
+ runs-on: ubuntu-latest
11
+ steps:
12
+ - uses: actions/checkout@v4
13
+
14
+ - name: Set up Node.js
15
+ uses: actions/setup-node@v4
16
+ with:
17
+ node-version: '18'
18
+ registry-url: 'https://registry.npmjs.org'
19
+
20
+ - name: Install dependencies
21
+ run: npm ci
22
+
23
+ - name: Publish to npm
24
+ run: npm publish
25
+ env:
26
+ NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }}
package/README.md ADDED
@@ -0,0 +1 @@
1
+ coming soon
package/app.html ADDED
@@ -0,0 +1,519 @@
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="UTF-8">
5
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
+ <title>Webtalk - Whisper STT + Pocket TTS</title>
7
+ <link rel="icon" type="image/png" href="/logo.png">
8
+ <style>
9
+ * { box-sizing: border-box; margin: 0; padding: 0; }
10
+ body {
11
+ font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
12
+ background: #0a0a0f;
13
+ color: #e0e0e0;
14
+ min-height: 100vh;
15
+ }
16
+ .header {
17
+ background: #15151f;
18
+ border-bottom: 1px solid #252535;
19
+ padding: 15px 20px;
20
+ display: flex;
21
+ align-items: center;
22
+ justify-content: space-between;
23
+ }
24
+ .logo {
25
+ display: flex;
26
+ align-items: center;
27
+ gap: 10px;
28
+ font-size: 20px;
29
+ font-weight: 600;
30
+ color: #00d4aa;
31
+ }
32
+ .logo-icon {
33
+ width: 32px;
34
+ height: 32px;
35
+ }
36
+ .subtitle {
37
+ color: #666;
38
+ font-size: 12px;
39
+ font-weight: normal;
40
+ }
41
+ .container {
42
+ display: grid;
43
+ grid-template-columns: 1fr 1fr;
44
+ gap: 0;
45
+ height: calc(100vh - 65px);
46
+ }
47
+ .panel {
48
+ padding: 20px;
49
+ overflow-y: auto;
50
+ }
51
+ .panel-left {
52
+ border-right: 1px solid #252535;
53
+ }
54
+ .panel-header {
55
+ display: flex;
56
+ align-items: center;
57
+ gap: 8px;
58
+ margin-bottom: 20px;
59
+ padding-bottom: 15px;
60
+ border-bottom: 1px solid #252535;
61
+ }
62
+ .panel-header h2 {
63
+ font-size: 18px;
64
+ color: #00d4aa;
65
+ }
66
+ .panel-header .icon {
67
+ width: 24px;
68
+ height: 24px;
69
+ }
70
+ .section {
71
+ background: #15151f;
72
+ border-radius: 12px;
73
+ padding: 16px;
74
+ margin-bottom: 16px;
75
+ border: 1px solid #252535;
76
+ }
77
+ .section-title {
78
+ font-size: 13px;
79
+ color: #888;
80
+ text-transform: uppercase;
81
+ letter-spacing: 0.5px;
82
+ margin-bottom: 12px;
83
+ }
84
+ button {
85
+ background: #00d4aa;
86
+ border: none;
87
+ border-radius: 8px;
88
+ padding: 12px 20px;
89
+ color: #0a0a0f;
90
+ font-weight: 600;
91
+ cursor: pointer;
92
+ transition: all 0.2s;
93
+ font-size: 14px;
94
+ }
95
+ button:hover:not(:disabled) {
96
+ background: #00e6b8;
97
+ transform: translateY(-1px);
98
+ }
99
+ button:disabled {
100
+ opacity: 0.5;
101
+ cursor: not-allowed;
102
+ }
103
+ button.secondary {
104
+ background: #252535;
105
+ color: #e0e0e0;
106
+ }
107
+ button.secondary:hover:not(:disabled) {
108
+ background: #303045;
109
+ }
110
+ button.danger {
111
+ background: #ff6b6b;
112
+ color: white;
113
+ }
114
+ textarea, select {
115
+ width: 100%;
116
+ background: #0f0f15;
117
+ border: 1px solid #303040;
118
+ border-radius: 8px;
119
+ padding: 12px;
120
+ color: #e0e0e0;
121
+ font-size: 14px;
122
+ font-family: inherit;
123
+ }
124
+ textarea {
125
+ min-height: 100px;
126
+ resize: vertical;
127
+ }
128
+ textarea:focus, select:focus {
129
+ outline: none;
130
+ border-color: #00d4aa;
131
+ }
132
+ .controls {
133
+ display: flex;
134
+ gap: 10px;
135
+ flex-wrap: wrap;
136
+ margin-top: 12px;
137
+ }
138
+ .status {
139
+ padding: 8px 12px;
140
+ border-radius: 6px;
141
+ font-size: 13px;
142
+ background: #1a1a25;
143
+ color: #888;
144
+ margin-top: 10px;
145
+ }
146
+ .status.loading { background: #1a2a35; color: #00d4aa; }
147
+ .status.generating { background: #1a2535; color: #4a9eff; }
148
+ .status.ready { background: #1a2f25; color: #00d4aa; }
149
+ .status.recording { background: #351a1a; color: #ff6b6b; }
150
+ .status.error { background: #351a1a; color: #ff6b6b; }
151
+
152
+ .output-box {
153
+ background: #0f0f15;
154
+ border: 1px solid #303040;
155
+ border-radius: 8px;
156
+ padding: 12px;
157
+ min-height: 80px;
158
+ font-size: 14px;
159
+ line-height: 1.5;
160
+ }
161
+ .output-box:empty::before {
162
+ content: 'Transcribed text will appear here...';
163
+ color: #555;
164
+ font-style: italic;
165
+ }
166
+ .output-box.has-content:empty::before {
167
+ content: none;
168
+ }
169
+
170
+ .transfer-btn {
171
+ position: absolute;
172
+ left: 50%;
173
+ top: 50%;
174
+ transform: translate(-50%, -50%);
175
+ background: #252535;
176
+ border: 2px solid #00d4aa;
177
+ border-radius: 50%;
178
+ width: 48px;
179
+ height: 48px;
180
+ display: flex;
181
+ align-items: center;
182
+ justify-content: center;
183
+ cursor: pointer;
184
+ z-index: 10;
185
+ transition: all 0.2s;
186
+ }
187
+ .transfer-btn:hover {
188
+ background: #00d4aa;
189
+ }
190
+ .transfer-btn svg {
191
+ width: 20px;
192
+ height: 20px;
193
+ fill: #00d4aa;
194
+ }
195
+ .transfer-btn:hover svg {
196
+ fill: #0a0a0f;
197
+ }
198
+
199
+ .audio-player {
200
+ width: 100%;
201
+ margin-top: 12px;
202
+ }
203
+
204
+ .voice-controls {
205
+ display: flex;
206
+ gap: 10px;
207
+ align-items: center;
208
+ flex-wrap: wrap;
209
+ }
210
+
211
+ .metrics {
212
+ display: flex;
213
+ gap: 15px;
214
+ margin-top: 10px;
215
+ font-size: 12px;
216
+ color: #666;
217
+ }
218
+ .metric-value {
219
+ color: #00d4aa;
220
+ font-weight: 600;
221
+ }
222
+
223
+ @media (max-width: 900px) {
224
+ .container {
225
+ grid-template-columns: 1fr;
226
+ grid-template-rows: auto auto;
227
+ }
228
+ .panel-left {
229
+ border-right: none;
230
+ border-bottom: 1px solid #252535;
231
+ }
232
+ .transfer-btn {
233
+ position: static;
234
+ transform: none;
235
+ margin: 10px auto;
236
+ }
237
+ }
238
+
239
+ .hidden { display: none !important; }
240
+
241
+ #whisper-container {
242
+ min-height: 200px;
243
+ }
244
+
245
+ .loading-overlay {
246
+ position: fixed;
247
+ top: 0; left: 0; right: 0; bottom: 0;
248
+ background: rgba(10, 10, 15, 0.9);
249
+ display: flex;
250
+ flex-direction: column;
251
+ align-items: center;
252
+ justify-content: center;
253
+ z-index: 1000;
254
+ }
255
+ .loading-overlay.hidden {
256
+ display: none;
257
+ }
258
+ .spinner {
259
+ width: 50px;
260
+ height: 50px;
261
+ border: 3px solid #252535;
262
+ border-top-color: #00d4aa;
263
+ border-radius: 50%;
264
+ animation: spin 1s linear infinite;
265
+ }
266
+ @keyframes spin {
267
+ to { transform: rotate(360deg); }
268
+ }
269
+ .loading-text {
270
+ margin-top: 20px;
271
+ color: #888;
272
+ }
273
+ </style>
274
+ </head>
275
+ <body>
276
+ <div class="loading-overlay" id="loading-screen">
277
+ <div class="spinner"></div>
278
+ <div class="loading-text">Loading models...</div>
279
+ </div>
280
+
281
+ <header class="header">
282
+ <div class="logo">
283
+ <svg class="logo-icon" viewBox="0 0 32 32" fill="none">
284
+ <circle cx="16" cy="16" r="14" stroke="#00d4aa" stroke-width="2"/>
285
+ <path d="M10 16h12M16 10v12" stroke="#00d4aa" stroke-width="2" stroke-linecap="round"/>
286
+ </svg>
287
+ Webtalk
288
+ <span class="subtitle">Whisper STT + Pocket TTS</span>
289
+ </div>
290
+ <div style="color: #666; font-size: 13px;">
291
+ Local AI Speech Processing
292
+ </div>
293
+ </header>
294
+
295
+ <div class="container">
296
+ <!-- Whisper STT Panel -->
297
+ <div class="panel panel-left">
298
+ <div class="panel-header">
299
+ <svg class="icon" viewBox="0 0 24 24" fill="none" stroke="#00d4aa" stroke-width="2">
300
+ <path d="M12 1a3 3 0 0 0-3 3v8a3 3 0 0 0 6 0V4a3 3 0 0 0-3-3z"/>
301
+ <path d="M19 10v2a7 7 0 0 1-14 0v-2"/>
302
+ <line x1="12" y1="19" x2="12" y2="23"/>
303
+ <line x1="8" y1="23" x2="16" y2="23"/>
304
+ </svg>
305
+ <h2>Speech to Text (Whisper)</h2>
306
+ </div>
307
+
308
+ <div class="section">
309
+ <div class="section-title">Audio Input</div>
310
+ <div class="controls">
311
+ <button id="record-btn" onclick="startRecording()">
312
+ Start Recording
313
+ </button>
314
+ <button id="stop-record-btn" class="danger" onclick="stopRecording()" disabled>
315
+ Stop
316
+ </button>
317
+ <input type="file" id="audio-upload" accept="audio/*" hidden onchange="handleAudioUpload(event)">
318
+ <button class="secondary" onclick="document.getElementById('audio-upload').click()">
319
+ Upload Audio
320
+ </button>
321
+ </div>
322
+ <div id="stt-status" class="status">Ready</div>
323
+ </div>
324
+
325
+ <div class="section">
326
+ <div class="section-title">Transcription</div>
327
+ <div id="transcription-output" class="output-box"></div>
328
+ <div class="controls">
329
+ <button class="secondary" onclick="copyTranscription()">Copy</button>
330
+ <button class="secondary" onclick="clearTranscription()">Clear</button>
331
+ </div>
332
+ </div>
333
+
334
+ <div id="root" style="display: none;"></div>
335
+ </div>
336
+
337
+ <!-- Pocket TTS Panel -->
338
+ <div class="panel">
339
+ <div class="panel-header">
340
+ <svg class="icon" viewBox="0 0 24 24" fill="none" stroke="#00d4aa" stroke-width="2">
341
+ <polygon points="11 5 6 9 2 9 2 15 6 15 11 19 11 5"/>
342
+ <path d="M19.07 4.93a10 10 0 0 1 0 14.14M15.54 8.46a5 5 0 0 1 0 7.07"/>
343
+ </svg>
344
+ <h2>Text to Speech (Pocket TTS)</h2>
345
+ </div>
346
+
347
+ <div class="section">
348
+ <div class="section-title">Voice Selection</div>
349
+ <div class="voice-controls">
350
+ <select id="voice-select" style="flex: 1;">
351
+ <option value="">Loading voices...</option>
352
+ </select>
353
+ <input type="file" id="voice-upload" accept="audio/*" hidden onchange="handleVoiceUpload(event)">
354
+ <button class="secondary" onclick="document.getElementById('voice-upload').click()">
355
+ Custom Voice
356
+ </button>
357
+ </div>
358
+ </div>
359
+
360
+ <div class="section">
361
+ <div class="section-title">Text Input</div>
362
+ <textarea id="tts-input" placeholder="Enter text to synthesize..."></textarea>
363
+ <div class="controls">
364
+ <button id="generate-btn" onclick="generateSpeech()" disabled>
365
+ Generate Speech
366
+ </button>
367
+ <button id="stop-tts-btn" class="danger" onclick="stopTTS()" disabled>
368
+ Stop
369
+ </button>
370
+ <button class="secondary" onclick="useTranscription()">
371
+ Use Transcription ↑
372
+ </button>
373
+ </div>
374
+ <div id="tts-status" class="status">Loading models...</div>
375
+ <div class="metrics" id="tts-metrics" style="display: none;">
376
+ <div>RTFx: <span class="metric-value" id="rtfx">--</span></div>
377
+ <div>TTFB: <span class="metric-value" id="ttfb">--</span>ms</div>
378
+ </div>
379
+ </div>
380
+
381
+ <div class="section" id="audio-output-section" style="display: none;">
382
+ <div class="section-title">Generated Audio</div>
383
+ <audio id="tts-audio" class="audio-player" controls></audio>
384
+ <div class="controls">
385
+ <button class="secondary" onclick="downloadAudio()">Download WAV</button>
386
+ </div>
387
+ </div>
388
+ </div>
389
+ </div>
390
+
391
+ <script type="module">
392
+ import { STT, TTS, debug } from '/webtalk/sdk.js';
393
+
394
+ window.webtalkSDK = { STT, TTS, debug };
395
+
396
+ const sttStatus = document.getElementById('stt-status');
397
+ const ttsStatus = document.getElementById('tts-status');
398
+
399
+ // === STT ===
400
+ const stt = new STT({
401
+ language: 'en',
402
+ onTranscript: (text) => {
403
+ document.getElementById('transcription-output').textContent = text;
404
+ },
405
+ onPartial: (text) => {
406
+ document.getElementById('transcription-output').textContent = text;
407
+ },
408
+ onStatus: (state, msg) => {
409
+ sttStatus.textContent = msg;
410
+ const classMap = { loading: 'loading', ready: 'ready', recording: 'recording', transcribing: 'loading' };
411
+ sttStatus.className = 'status ' + (classMap[state] || 'ready');
412
+ if (state === 'ready') document.getElementById('loading-screen').classList.add('hidden');
413
+ }
414
+ });
415
+
416
+ stt.init().then(() => {
417
+ document.getElementById('loading-screen').classList.add('hidden');
418
+ }).catch(err => {
419
+ sttStatus.textContent = 'STT error: ' + err.message;
420
+ sttStatus.className = 'status error';
421
+ document.getElementById('loading-screen').classList.add('hidden');
422
+ });
423
+
424
+ window.startRecording = async () => {
425
+ await stt.startRecording();
426
+ document.getElementById('record-btn').disabled = true;
427
+ document.getElementById('stop-record-btn').disabled = false;
428
+ };
429
+
430
+ window.stopRecording = async () => {
431
+ document.getElementById('record-btn').disabled = false;
432
+ document.getElementById('stop-record-btn').disabled = true;
433
+ await stt.stopRecording();
434
+ };
435
+
436
+ window.handleAudioUpload = (e) => {
437
+ const file = e.target.files[0];
438
+ if (file) stt.transcribeBlob(file);
439
+ };
440
+
441
+ window.copyTranscription = () => {
442
+ navigator.clipboard.writeText(document.getElementById('transcription-output').textContent);
443
+ };
444
+
445
+ window.clearTranscription = () => {
446
+ document.getElementById('transcription-output').textContent = '';
447
+ };
448
+
449
+ const tts = new TTS({
450
+ onStatus: (status, state) => {
451
+ ttsStatus.textContent = status;
452
+ ttsStatus.className = 'status ' + (state || 'ready');
453
+ },
454
+ onVoicesLoaded: (voices, defaultVoice) => {
455
+ const select = document.getElementById('voice-select');
456
+ select.innerHTML = '';
457
+ voices.forEach(voice => {
458
+ const option = document.createElement('option');
459
+ option.value = voice;
460
+ option.textContent = voice.charAt(0).toUpperCase() + voice.slice(1);
461
+ if (voice === defaultVoice) option.selected = true;
462
+ select.appendChild(option);
463
+ });
464
+ const custom = document.createElement('option');
465
+ custom.value = 'custom';
466
+ custom.textContent = 'Custom Voice';
467
+ select.appendChild(custom);
468
+ },
469
+ onAudioChunk: () => {
470
+ document.getElementById('audio-output-section').style.display = 'block';
471
+ },
472
+ onMetrics: (metrics) => {
473
+ document.getElementById('tts-metrics').style.display = 'flex';
474
+ if (metrics.rtfx) document.getElementById('rtfx').textContent = metrics.rtfx.toFixed(2) + 'x';
475
+ if (metrics.ttfb) document.getElementById('ttfb').textContent = Math.round(metrics.ttfb);
476
+ },
477
+ onAudioReady: (audioUrl) => {
478
+ if (audioUrl) document.getElementById('tts-audio').src = audioUrl;
479
+ document.getElementById('generate-btn').disabled = false;
480
+ document.getElementById('stop-tts-btn').disabled = true;
481
+ }
482
+ });
483
+
484
+ tts.init().then(() => {
485
+ document.getElementById('generate-btn').disabled = false;
486
+ }).catch(err => {
487
+ ttsStatus.textContent = 'TTS error: ' + err.message;
488
+ ttsStatus.className = 'status error';
489
+ });
490
+
491
+ window.generateSpeech = async () => {
492
+ const text = document.getElementById('tts-input').value.trim();
493
+ if (!text) return;
494
+ document.getElementById('generate-btn').disabled = true;
495
+ document.getElementById('stop-tts-btn').disabled = false;
496
+ document.getElementById('tts-metrics').style.display = 'none';
497
+ await tts.generate(text, document.getElementById('voice-select').value);
498
+ };
499
+
500
+ window.stopTTS = () => {
501
+ tts.stop();
502
+ document.getElementById('generate-btn').disabled = false;
503
+ document.getElementById('stop-tts-btn').disabled = true;
504
+ };
505
+
506
+ window.handleVoiceUpload = (e) => {
507
+ const file = e.target.files[0];
508
+ if (file) tts.uploadVoice(file);
509
+ };
510
+
511
+ window.useTranscription = () => {
512
+ const t = document.getElementById('transcription-output').textContent;
513
+ if (t) document.getElementById('tts-input').value = t;
514
+ };
515
+
516
+ window.downloadAudio = () => tts.downloadAudio();
517
+ </script>
518
+ </body>
519
+ </html>