webtalk 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.gitattributes +35 -0
- package/.github/workflows/publish.yml +26 -0
- package/README.md +1 -0
- package/app.html +519 -0
- package/assets/index-ClpvH5Vn.js +40 -0
- package/assets/index-DUYekU7u.css +1 -0
- package/assets/worker-BPxxCWVT.js +2679 -0
- package/config.js +36 -0
- package/debug.js +21 -0
- package/download-lock.js +26 -0
- package/hot-reload.js +78 -0
- package/middleware.js +62 -0
- package/package.json +33 -0
- package/persistent-state.js +62 -0
- package/sdk.js +22 -0
- package/serve-static.js +45 -0
- package/server.js +177 -0
- package/setup-npm-publishing.sh +140 -0
- package/stt.js +141 -0
- package/test.mp3 +0 -0
- package/tts/EventEmitter.js +59 -0
- package/tts/PCMPlayerWorklet.js +563 -0
- package/tts/inference-worker.js +1121 -0
- package/tts/onnx-streaming.js +721 -0
- package/tts-models.js +97 -0
- package/tts-utils.js +52 -0
- package/tts.js +167 -0
- package/whisper-models.js +161 -0
- package/worker-patch.js +32 -0
package/.gitattributes
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
*.7z filter=lfs diff=lfs merge=lfs -text
|
|
2
|
+
*.arrow filter=lfs diff=lfs merge=lfs -text
|
|
3
|
+
*.bin filter=lfs diff=lfs merge=lfs -text
|
|
4
|
+
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
|
5
|
+
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
|
6
|
+
*.ftz filter=lfs diff=lfs merge=lfs -text
|
|
7
|
+
*.gz filter=lfs diff=lfs merge=lfs -text
|
|
8
|
+
*.h5 filter=lfs diff=lfs merge=lfs -text
|
|
9
|
+
*.joblib filter=lfs diff=lfs merge=lfs -text
|
|
10
|
+
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
|
11
|
+
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
|
12
|
+
*.model filter=lfs diff=lfs merge=lfs -text
|
|
13
|
+
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
|
14
|
+
*.npy filter=lfs diff=lfs merge=lfs -text
|
|
15
|
+
*.npz filter=lfs diff=lfs merge=lfs -text
|
|
16
|
+
*.onnx filter=lfs diff=lfs merge=lfs -text
|
|
17
|
+
*.ot filter=lfs diff=lfs merge=lfs -text
|
|
18
|
+
*.parquet filter=lfs diff=lfs merge=lfs -text
|
|
19
|
+
*.pb filter=lfs diff=lfs merge=lfs -text
|
|
20
|
+
*.pickle filter=lfs diff=lfs merge=lfs -text
|
|
21
|
+
*.pkl filter=lfs diff=lfs merge=lfs -text
|
|
22
|
+
*.pt filter=lfs diff=lfs merge=lfs -text
|
|
23
|
+
*.pth filter=lfs diff=lfs merge=lfs -text
|
|
24
|
+
*.rar filter=lfs diff=lfs merge=lfs -text
|
|
25
|
+
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
|
26
|
+
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
27
|
+
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
|
28
|
+
*.tar filter=lfs diff=lfs merge=lfs -text
|
|
29
|
+
*.tflite filter=lfs diff=lfs merge=lfs -text
|
|
30
|
+
*.tgz filter=lfs diff=lfs merge=lfs -text
|
|
31
|
+
*.wasm filter=lfs diff=lfs merge=lfs -text
|
|
32
|
+
*.xz filter=lfs diff=lfs merge=lfs -text
|
|
33
|
+
*.zip filter=lfs diff=lfs merge=lfs -text
|
|
34
|
+
*.zst filter=lfs diff=lfs merge=lfs -text
|
|
35
|
+
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
name: Publish to npm
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
branches:
|
|
6
|
+
- main
|
|
7
|
+
|
|
8
|
+
jobs:
|
|
9
|
+
publish:
|
|
10
|
+
runs-on: ubuntu-latest
|
|
11
|
+
steps:
|
|
12
|
+
- uses: actions/checkout@v4
|
|
13
|
+
|
|
14
|
+
- name: Set up Node.js
|
|
15
|
+
uses: actions/setup-node@v4
|
|
16
|
+
with:
|
|
17
|
+
node-version: '18'
|
|
18
|
+
registry-url: 'https://registry.npmjs.org'
|
|
19
|
+
|
|
20
|
+
- name: Install dependencies
|
|
21
|
+
run: npm ci
|
|
22
|
+
|
|
23
|
+
- name: Publish to npm
|
|
24
|
+
run: npm publish
|
|
25
|
+
env:
|
|
26
|
+
NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }}
|
package/README.md
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
coming soon
|
package/app.html
ADDED
|
@@ -0,0 +1,519 @@
|
|
|
1
|
+
<!DOCTYPE html>
|
|
2
|
+
<html lang="en">
|
|
3
|
+
<head>
|
|
4
|
+
<meta charset="UTF-8">
|
|
5
|
+
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
|
6
|
+
<title>Webtalk - Whisper STT + Pocket TTS</title>
|
|
7
|
+
<link rel="icon" type="image/png" href="/logo.png">
|
|
8
|
+
<style>
|
|
9
|
+
* { box-sizing: border-box; margin: 0; padding: 0; }
|
|
10
|
+
body {
|
|
11
|
+
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
|
|
12
|
+
background: #0a0a0f;
|
|
13
|
+
color: #e0e0e0;
|
|
14
|
+
min-height: 100vh;
|
|
15
|
+
}
|
|
16
|
+
.header {
|
|
17
|
+
background: #15151f;
|
|
18
|
+
border-bottom: 1px solid #252535;
|
|
19
|
+
padding: 15px 20px;
|
|
20
|
+
display: flex;
|
|
21
|
+
align-items: center;
|
|
22
|
+
justify-content: space-between;
|
|
23
|
+
}
|
|
24
|
+
.logo {
|
|
25
|
+
display: flex;
|
|
26
|
+
align-items: center;
|
|
27
|
+
gap: 10px;
|
|
28
|
+
font-size: 20px;
|
|
29
|
+
font-weight: 600;
|
|
30
|
+
color: #00d4aa;
|
|
31
|
+
}
|
|
32
|
+
.logo-icon {
|
|
33
|
+
width: 32px;
|
|
34
|
+
height: 32px;
|
|
35
|
+
}
|
|
36
|
+
.subtitle {
|
|
37
|
+
color: #666;
|
|
38
|
+
font-size: 12px;
|
|
39
|
+
font-weight: normal;
|
|
40
|
+
}
|
|
41
|
+
.container {
|
|
42
|
+
display: grid;
|
|
43
|
+
grid-template-columns: 1fr 1fr;
|
|
44
|
+
gap: 0;
|
|
45
|
+
height: calc(100vh - 65px);
|
|
46
|
+
}
|
|
47
|
+
.panel {
|
|
48
|
+
padding: 20px;
|
|
49
|
+
overflow-y: auto;
|
|
50
|
+
}
|
|
51
|
+
.panel-left {
|
|
52
|
+
border-right: 1px solid #252535;
|
|
53
|
+
}
|
|
54
|
+
.panel-header {
|
|
55
|
+
display: flex;
|
|
56
|
+
align-items: center;
|
|
57
|
+
gap: 8px;
|
|
58
|
+
margin-bottom: 20px;
|
|
59
|
+
padding-bottom: 15px;
|
|
60
|
+
border-bottom: 1px solid #252535;
|
|
61
|
+
}
|
|
62
|
+
.panel-header h2 {
|
|
63
|
+
font-size: 18px;
|
|
64
|
+
color: #00d4aa;
|
|
65
|
+
}
|
|
66
|
+
.panel-header .icon {
|
|
67
|
+
width: 24px;
|
|
68
|
+
height: 24px;
|
|
69
|
+
}
|
|
70
|
+
.section {
|
|
71
|
+
background: #15151f;
|
|
72
|
+
border-radius: 12px;
|
|
73
|
+
padding: 16px;
|
|
74
|
+
margin-bottom: 16px;
|
|
75
|
+
border: 1px solid #252535;
|
|
76
|
+
}
|
|
77
|
+
.section-title {
|
|
78
|
+
font-size: 13px;
|
|
79
|
+
color: #888;
|
|
80
|
+
text-transform: uppercase;
|
|
81
|
+
letter-spacing: 0.5px;
|
|
82
|
+
margin-bottom: 12px;
|
|
83
|
+
}
|
|
84
|
+
button {
|
|
85
|
+
background: #00d4aa;
|
|
86
|
+
border: none;
|
|
87
|
+
border-radius: 8px;
|
|
88
|
+
padding: 12px 20px;
|
|
89
|
+
color: #0a0a0f;
|
|
90
|
+
font-weight: 600;
|
|
91
|
+
cursor: pointer;
|
|
92
|
+
transition: all 0.2s;
|
|
93
|
+
font-size: 14px;
|
|
94
|
+
}
|
|
95
|
+
button:hover:not(:disabled) {
|
|
96
|
+
background: #00e6b8;
|
|
97
|
+
transform: translateY(-1px);
|
|
98
|
+
}
|
|
99
|
+
button:disabled {
|
|
100
|
+
opacity: 0.5;
|
|
101
|
+
cursor: not-allowed;
|
|
102
|
+
}
|
|
103
|
+
button.secondary {
|
|
104
|
+
background: #252535;
|
|
105
|
+
color: #e0e0e0;
|
|
106
|
+
}
|
|
107
|
+
button.secondary:hover:not(:disabled) {
|
|
108
|
+
background: #303045;
|
|
109
|
+
}
|
|
110
|
+
button.danger {
|
|
111
|
+
background: #ff6b6b;
|
|
112
|
+
color: white;
|
|
113
|
+
}
|
|
114
|
+
textarea, select {
|
|
115
|
+
width: 100%;
|
|
116
|
+
background: #0f0f15;
|
|
117
|
+
border: 1px solid #303040;
|
|
118
|
+
border-radius: 8px;
|
|
119
|
+
padding: 12px;
|
|
120
|
+
color: #e0e0e0;
|
|
121
|
+
font-size: 14px;
|
|
122
|
+
font-family: inherit;
|
|
123
|
+
}
|
|
124
|
+
textarea {
|
|
125
|
+
min-height: 100px;
|
|
126
|
+
resize: vertical;
|
|
127
|
+
}
|
|
128
|
+
textarea:focus, select:focus {
|
|
129
|
+
outline: none;
|
|
130
|
+
border-color: #00d4aa;
|
|
131
|
+
}
|
|
132
|
+
.controls {
|
|
133
|
+
display: flex;
|
|
134
|
+
gap: 10px;
|
|
135
|
+
flex-wrap: wrap;
|
|
136
|
+
margin-top: 12px;
|
|
137
|
+
}
|
|
138
|
+
.status {
|
|
139
|
+
padding: 8px 12px;
|
|
140
|
+
border-radius: 6px;
|
|
141
|
+
font-size: 13px;
|
|
142
|
+
background: #1a1a25;
|
|
143
|
+
color: #888;
|
|
144
|
+
margin-top: 10px;
|
|
145
|
+
}
|
|
146
|
+
.status.loading { background: #1a2a35; color: #00d4aa; }
|
|
147
|
+
.status.generating { background: #1a2535; color: #4a9eff; }
|
|
148
|
+
.status.ready { background: #1a2f25; color: #00d4aa; }
|
|
149
|
+
.status.recording { background: #351a1a; color: #ff6b6b; }
|
|
150
|
+
.status.error { background: #351a1a; color: #ff6b6b; }
|
|
151
|
+
|
|
152
|
+
.output-box {
|
|
153
|
+
background: #0f0f15;
|
|
154
|
+
border: 1px solid #303040;
|
|
155
|
+
border-radius: 8px;
|
|
156
|
+
padding: 12px;
|
|
157
|
+
min-height: 80px;
|
|
158
|
+
font-size: 14px;
|
|
159
|
+
line-height: 1.5;
|
|
160
|
+
}
|
|
161
|
+
.output-box:empty::before {
|
|
162
|
+
content: 'Transcribed text will appear here...';
|
|
163
|
+
color: #555;
|
|
164
|
+
font-style: italic;
|
|
165
|
+
}
|
|
166
|
+
.output-box.has-content:empty::before {
|
|
167
|
+
content: none;
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
.transfer-btn {
|
|
171
|
+
position: absolute;
|
|
172
|
+
left: 50%;
|
|
173
|
+
top: 50%;
|
|
174
|
+
transform: translate(-50%, -50%);
|
|
175
|
+
background: #252535;
|
|
176
|
+
border: 2px solid #00d4aa;
|
|
177
|
+
border-radius: 50%;
|
|
178
|
+
width: 48px;
|
|
179
|
+
height: 48px;
|
|
180
|
+
display: flex;
|
|
181
|
+
align-items: center;
|
|
182
|
+
justify-content: center;
|
|
183
|
+
cursor: pointer;
|
|
184
|
+
z-index: 10;
|
|
185
|
+
transition: all 0.2s;
|
|
186
|
+
}
|
|
187
|
+
.transfer-btn:hover {
|
|
188
|
+
background: #00d4aa;
|
|
189
|
+
}
|
|
190
|
+
.transfer-btn svg {
|
|
191
|
+
width: 20px;
|
|
192
|
+
height: 20px;
|
|
193
|
+
fill: #00d4aa;
|
|
194
|
+
}
|
|
195
|
+
.transfer-btn:hover svg {
|
|
196
|
+
fill: #0a0a0f;
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
.audio-player {
|
|
200
|
+
width: 100%;
|
|
201
|
+
margin-top: 12px;
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
.voice-controls {
|
|
205
|
+
display: flex;
|
|
206
|
+
gap: 10px;
|
|
207
|
+
align-items: center;
|
|
208
|
+
flex-wrap: wrap;
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
.metrics {
|
|
212
|
+
display: flex;
|
|
213
|
+
gap: 15px;
|
|
214
|
+
margin-top: 10px;
|
|
215
|
+
font-size: 12px;
|
|
216
|
+
color: #666;
|
|
217
|
+
}
|
|
218
|
+
.metric-value {
|
|
219
|
+
color: #00d4aa;
|
|
220
|
+
font-weight: 600;
|
|
221
|
+
}
|
|
222
|
+
|
|
223
|
+
@media (max-width: 900px) {
|
|
224
|
+
.container {
|
|
225
|
+
grid-template-columns: 1fr;
|
|
226
|
+
grid-template-rows: auto auto;
|
|
227
|
+
}
|
|
228
|
+
.panel-left {
|
|
229
|
+
border-right: none;
|
|
230
|
+
border-bottom: 1px solid #252535;
|
|
231
|
+
}
|
|
232
|
+
.transfer-btn {
|
|
233
|
+
position: static;
|
|
234
|
+
transform: none;
|
|
235
|
+
margin: 10px auto;
|
|
236
|
+
}
|
|
237
|
+
}
|
|
238
|
+
|
|
239
|
+
.hidden { display: none !important; }
|
|
240
|
+
|
|
241
|
+
#whisper-container {
|
|
242
|
+
min-height: 200px;
|
|
243
|
+
}
|
|
244
|
+
|
|
245
|
+
.loading-overlay {
|
|
246
|
+
position: fixed;
|
|
247
|
+
top: 0; left: 0; right: 0; bottom: 0;
|
|
248
|
+
background: rgba(10, 10, 15, 0.9);
|
|
249
|
+
display: flex;
|
|
250
|
+
flex-direction: column;
|
|
251
|
+
align-items: center;
|
|
252
|
+
justify-content: center;
|
|
253
|
+
z-index: 1000;
|
|
254
|
+
}
|
|
255
|
+
.loading-overlay.hidden {
|
|
256
|
+
display: none;
|
|
257
|
+
}
|
|
258
|
+
.spinner {
|
|
259
|
+
width: 50px;
|
|
260
|
+
height: 50px;
|
|
261
|
+
border: 3px solid #252535;
|
|
262
|
+
border-top-color: #00d4aa;
|
|
263
|
+
border-radius: 50%;
|
|
264
|
+
animation: spin 1s linear infinite;
|
|
265
|
+
}
|
|
266
|
+
@keyframes spin {
|
|
267
|
+
to { transform: rotate(360deg); }
|
|
268
|
+
}
|
|
269
|
+
.loading-text {
|
|
270
|
+
margin-top: 20px;
|
|
271
|
+
color: #888;
|
|
272
|
+
}
|
|
273
|
+
</style>
|
|
274
|
+
</head>
|
|
275
|
+
<body>
|
|
276
|
+
<div class="loading-overlay" id="loading-screen">
|
|
277
|
+
<div class="spinner"></div>
|
|
278
|
+
<div class="loading-text">Loading models...</div>
|
|
279
|
+
</div>
|
|
280
|
+
|
|
281
|
+
<header class="header">
|
|
282
|
+
<div class="logo">
|
|
283
|
+
<svg class="logo-icon" viewBox="0 0 32 32" fill="none">
|
|
284
|
+
<circle cx="16" cy="16" r="14" stroke="#00d4aa" stroke-width="2"/>
|
|
285
|
+
<path d="M10 16h12M16 10v12" stroke="#00d4aa" stroke-width="2" stroke-linecap="round"/>
|
|
286
|
+
</svg>
|
|
287
|
+
Webtalk
|
|
288
|
+
<span class="subtitle">Whisper STT + Pocket TTS</span>
|
|
289
|
+
</div>
|
|
290
|
+
<div style="color: #666; font-size: 13px;">
|
|
291
|
+
Local AI Speech Processing
|
|
292
|
+
</div>
|
|
293
|
+
</header>
|
|
294
|
+
|
|
295
|
+
<div class="container">
|
|
296
|
+
<!-- Whisper STT Panel -->
|
|
297
|
+
<div class="panel panel-left">
|
|
298
|
+
<div class="panel-header">
|
|
299
|
+
<svg class="icon" viewBox="0 0 24 24" fill="none" stroke="#00d4aa" stroke-width="2">
|
|
300
|
+
<path d="M12 1a3 3 0 0 0-3 3v8a3 3 0 0 0 6 0V4a3 3 0 0 0-3-3z"/>
|
|
301
|
+
<path d="M19 10v2a7 7 0 0 1-14 0v-2"/>
|
|
302
|
+
<line x1="12" y1="19" x2="12" y2="23"/>
|
|
303
|
+
<line x1="8" y1="23" x2="16" y2="23"/>
|
|
304
|
+
</svg>
|
|
305
|
+
<h2>Speech to Text (Whisper)</h2>
|
|
306
|
+
</div>
|
|
307
|
+
|
|
308
|
+
<div class="section">
|
|
309
|
+
<div class="section-title">Audio Input</div>
|
|
310
|
+
<div class="controls">
|
|
311
|
+
<button id="record-btn" onclick="startRecording()">
|
|
312
|
+
Start Recording
|
|
313
|
+
</button>
|
|
314
|
+
<button id="stop-record-btn" class="danger" onclick="stopRecording()" disabled>
|
|
315
|
+
Stop
|
|
316
|
+
</button>
|
|
317
|
+
<input type="file" id="audio-upload" accept="audio/*" hidden onchange="handleAudioUpload(event)">
|
|
318
|
+
<button class="secondary" onclick="document.getElementById('audio-upload').click()">
|
|
319
|
+
Upload Audio
|
|
320
|
+
</button>
|
|
321
|
+
</div>
|
|
322
|
+
<div id="stt-status" class="status">Ready</div>
|
|
323
|
+
</div>
|
|
324
|
+
|
|
325
|
+
<div class="section">
|
|
326
|
+
<div class="section-title">Transcription</div>
|
|
327
|
+
<div id="transcription-output" class="output-box"></div>
|
|
328
|
+
<div class="controls">
|
|
329
|
+
<button class="secondary" onclick="copyTranscription()">Copy</button>
|
|
330
|
+
<button class="secondary" onclick="clearTranscription()">Clear</button>
|
|
331
|
+
</div>
|
|
332
|
+
</div>
|
|
333
|
+
|
|
334
|
+
<div id="root" style="display: none;"></div>
|
|
335
|
+
</div>
|
|
336
|
+
|
|
337
|
+
<!-- Pocket TTS Panel -->
|
|
338
|
+
<div class="panel">
|
|
339
|
+
<div class="panel-header">
|
|
340
|
+
<svg class="icon" viewBox="0 0 24 24" fill="none" stroke="#00d4aa" stroke-width="2">
|
|
341
|
+
<polygon points="11 5 6 9 2 9 2 15 6 15 11 19 11 5"/>
|
|
342
|
+
<path d="M19.07 4.93a10 10 0 0 1 0 14.14M15.54 8.46a5 5 0 0 1 0 7.07"/>
|
|
343
|
+
</svg>
|
|
344
|
+
<h2>Text to Speech (Pocket TTS)</h2>
|
|
345
|
+
</div>
|
|
346
|
+
|
|
347
|
+
<div class="section">
|
|
348
|
+
<div class="section-title">Voice Selection</div>
|
|
349
|
+
<div class="voice-controls">
|
|
350
|
+
<select id="voice-select" style="flex: 1;">
|
|
351
|
+
<option value="">Loading voices...</option>
|
|
352
|
+
</select>
|
|
353
|
+
<input type="file" id="voice-upload" accept="audio/*" hidden onchange="handleVoiceUpload(event)">
|
|
354
|
+
<button class="secondary" onclick="document.getElementById('voice-upload').click()">
|
|
355
|
+
Custom Voice
|
|
356
|
+
</button>
|
|
357
|
+
</div>
|
|
358
|
+
</div>
|
|
359
|
+
|
|
360
|
+
<div class="section">
|
|
361
|
+
<div class="section-title">Text Input</div>
|
|
362
|
+
<textarea id="tts-input" placeholder="Enter text to synthesize..."></textarea>
|
|
363
|
+
<div class="controls">
|
|
364
|
+
<button id="generate-btn" onclick="generateSpeech()" disabled>
|
|
365
|
+
Generate Speech
|
|
366
|
+
</button>
|
|
367
|
+
<button id="stop-tts-btn" class="danger" onclick="stopTTS()" disabled>
|
|
368
|
+
Stop
|
|
369
|
+
</button>
|
|
370
|
+
<button class="secondary" onclick="useTranscription()">
|
|
371
|
+
Use Transcription ↑
|
|
372
|
+
</button>
|
|
373
|
+
</div>
|
|
374
|
+
<div id="tts-status" class="status">Loading models...</div>
|
|
375
|
+
<div class="metrics" id="tts-metrics" style="display: none;">
|
|
376
|
+
<div>RTFx: <span class="metric-value" id="rtfx">--</span></div>
|
|
377
|
+
<div>TTFB: <span class="metric-value" id="ttfb">--</span>ms</div>
|
|
378
|
+
</div>
|
|
379
|
+
</div>
|
|
380
|
+
|
|
381
|
+
<div class="section" id="audio-output-section" style="display: none;">
|
|
382
|
+
<div class="section-title">Generated Audio</div>
|
|
383
|
+
<audio id="tts-audio" class="audio-player" controls></audio>
|
|
384
|
+
<div class="controls">
|
|
385
|
+
<button class="secondary" onclick="downloadAudio()">Download WAV</button>
|
|
386
|
+
</div>
|
|
387
|
+
</div>
|
|
388
|
+
</div>
|
|
389
|
+
</div>
|
|
390
|
+
|
|
391
|
+
<script type="module">
|
|
392
|
+
import { STT, TTS, debug } from '/webtalk/sdk.js';
|
|
393
|
+
|
|
394
|
+
window.webtalkSDK = { STT, TTS, debug };
|
|
395
|
+
|
|
396
|
+
const sttStatus = document.getElementById('stt-status');
|
|
397
|
+
const ttsStatus = document.getElementById('tts-status');
|
|
398
|
+
|
|
399
|
+
// === STT ===
|
|
400
|
+
const stt = new STT({
|
|
401
|
+
language: 'en',
|
|
402
|
+
onTranscript: (text) => {
|
|
403
|
+
document.getElementById('transcription-output').textContent = text;
|
|
404
|
+
},
|
|
405
|
+
onPartial: (text) => {
|
|
406
|
+
document.getElementById('transcription-output').textContent = text;
|
|
407
|
+
},
|
|
408
|
+
onStatus: (state, msg) => {
|
|
409
|
+
sttStatus.textContent = msg;
|
|
410
|
+
const classMap = { loading: 'loading', ready: 'ready', recording: 'recording', transcribing: 'loading' };
|
|
411
|
+
sttStatus.className = 'status ' + (classMap[state] || 'ready');
|
|
412
|
+
if (state === 'ready') document.getElementById('loading-screen').classList.add('hidden');
|
|
413
|
+
}
|
|
414
|
+
});
|
|
415
|
+
|
|
416
|
+
stt.init().then(() => {
|
|
417
|
+
document.getElementById('loading-screen').classList.add('hidden');
|
|
418
|
+
}).catch(err => {
|
|
419
|
+
sttStatus.textContent = 'STT error: ' + err.message;
|
|
420
|
+
sttStatus.className = 'status error';
|
|
421
|
+
document.getElementById('loading-screen').classList.add('hidden');
|
|
422
|
+
});
|
|
423
|
+
|
|
424
|
+
window.startRecording = async () => {
|
|
425
|
+
await stt.startRecording();
|
|
426
|
+
document.getElementById('record-btn').disabled = true;
|
|
427
|
+
document.getElementById('stop-record-btn').disabled = false;
|
|
428
|
+
};
|
|
429
|
+
|
|
430
|
+
window.stopRecording = async () => {
|
|
431
|
+
document.getElementById('record-btn').disabled = false;
|
|
432
|
+
document.getElementById('stop-record-btn').disabled = true;
|
|
433
|
+
await stt.stopRecording();
|
|
434
|
+
};
|
|
435
|
+
|
|
436
|
+
window.handleAudioUpload = (e) => {
|
|
437
|
+
const file = e.target.files[0];
|
|
438
|
+
if (file) stt.transcribeBlob(file);
|
|
439
|
+
};
|
|
440
|
+
|
|
441
|
+
window.copyTranscription = () => {
|
|
442
|
+
navigator.clipboard.writeText(document.getElementById('transcription-output').textContent);
|
|
443
|
+
};
|
|
444
|
+
|
|
445
|
+
window.clearTranscription = () => {
|
|
446
|
+
document.getElementById('transcription-output').textContent = '';
|
|
447
|
+
};
|
|
448
|
+
|
|
449
|
+
const tts = new TTS({
|
|
450
|
+
onStatus: (status, state) => {
|
|
451
|
+
ttsStatus.textContent = status;
|
|
452
|
+
ttsStatus.className = 'status ' + (state || 'ready');
|
|
453
|
+
},
|
|
454
|
+
onVoicesLoaded: (voices, defaultVoice) => {
|
|
455
|
+
const select = document.getElementById('voice-select');
|
|
456
|
+
select.innerHTML = '';
|
|
457
|
+
voices.forEach(voice => {
|
|
458
|
+
const option = document.createElement('option');
|
|
459
|
+
option.value = voice;
|
|
460
|
+
option.textContent = voice.charAt(0).toUpperCase() + voice.slice(1);
|
|
461
|
+
if (voice === defaultVoice) option.selected = true;
|
|
462
|
+
select.appendChild(option);
|
|
463
|
+
});
|
|
464
|
+
const custom = document.createElement('option');
|
|
465
|
+
custom.value = 'custom';
|
|
466
|
+
custom.textContent = 'Custom Voice';
|
|
467
|
+
select.appendChild(custom);
|
|
468
|
+
},
|
|
469
|
+
onAudioChunk: () => {
|
|
470
|
+
document.getElementById('audio-output-section').style.display = 'block';
|
|
471
|
+
},
|
|
472
|
+
onMetrics: (metrics) => {
|
|
473
|
+
document.getElementById('tts-metrics').style.display = 'flex';
|
|
474
|
+
if (metrics.rtfx) document.getElementById('rtfx').textContent = metrics.rtfx.toFixed(2) + 'x';
|
|
475
|
+
if (metrics.ttfb) document.getElementById('ttfb').textContent = Math.round(metrics.ttfb);
|
|
476
|
+
},
|
|
477
|
+
onAudioReady: (audioUrl) => {
|
|
478
|
+
if (audioUrl) document.getElementById('tts-audio').src = audioUrl;
|
|
479
|
+
document.getElementById('generate-btn').disabled = false;
|
|
480
|
+
document.getElementById('stop-tts-btn').disabled = true;
|
|
481
|
+
}
|
|
482
|
+
});
|
|
483
|
+
|
|
484
|
+
tts.init().then(() => {
|
|
485
|
+
document.getElementById('generate-btn').disabled = false;
|
|
486
|
+
}).catch(err => {
|
|
487
|
+
ttsStatus.textContent = 'TTS error: ' + err.message;
|
|
488
|
+
ttsStatus.className = 'status error';
|
|
489
|
+
});
|
|
490
|
+
|
|
491
|
+
window.generateSpeech = async () => {
|
|
492
|
+
const text = document.getElementById('tts-input').value.trim();
|
|
493
|
+
if (!text) return;
|
|
494
|
+
document.getElementById('generate-btn').disabled = true;
|
|
495
|
+
document.getElementById('stop-tts-btn').disabled = false;
|
|
496
|
+
document.getElementById('tts-metrics').style.display = 'none';
|
|
497
|
+
await tts.generate(text, document.getElementById('voice-select').value);
|
|
498
|
+
};
|
|
499
|
+
|
|
500
|
+
window.stopTTS = () => {
|
|
501
|
+
tts.stop();
|
|
502
|
+
document.getElementById('generate-btn').disabled = false;
|
|
503
|
+
document.getElementById('stop-tts-btn').disabled = true;
|
|
504
|
+
};
|
|
505
|
+
|
|
506
|
+
window.handleVoiceUpload = (e) => {
|
|
507
|
+
const file = e.target.files[0];
|
|
508
|
+
if (file) tts.uploadVoice(file);
|
|
509
|
+
};
|
|
510
|
+
|
|
511
|
+
window.useTranscription = () => {
|
|
512
|
+
const t = document.getElementById('transcription-output').textContent;
|
|
513
|
+
if (t) document.getElementById('tts-input').value = t;
|
|
514
|
+
};
|
|
515
|
+
|
|
516
|
+
window.downloadAudio = () => tts.downloadAudio();
|
|
517
|
+
</script>
|
|
518
|
+
</body>
|
|
519
|
+
</html>
|