parakeet.js 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,327 @@
1
+ import React, { useState, useRef, useEffect } from 'react';
2
+ import { ParakeetModel, getParakeetModel } from 'parakeet.js';
3
+ import './App.css';
4
+
5
+ export default function App() {
6
+ const repoId = 'ysdede/parakeet-tdt-0.6b-v2-onnx';
7
+ const [backend, setBackend] = useState('webgpu-hybrid');
8
+ const [quant, setQuant] = useState('fp32');
9
+ const [preprocessor, setPreprocessor] = useState('nemo128');
10
+ const [status, setStatus] = useState('Idle');
11
+ const [progress, setProgress] = useState('');
12
+ const [progressText, setProgressText] = useState('');
13
+ const [progressPct, setProgressPct] = useState(null);
14
+ const [text, setText] = useState('');
15
+ const [latestMetrics, setLatestMetrics] = useState(null);
16
+ const [transcriptions, setTranscriptions] = useState([]);
17
+ const [isTranscribing, setIsTranscribing] = useState(false);
18
+ const [verboseLog, setVerboseLog] = useState(false);
19
+ const [decoderInt8, setDecoderInt8] = useState(true);
20
+ const [frameStride, setFrameStride] = useState(1);
21
+ const [dumpDetail, setDumpDetail] = useState(false);
22
+ const maxCores = navigator.hardwareConcurrency || 8;
23
+ const [cpuThreads, setCpuThreads] = useState(Math.max(1, maxCores - 2));
24
+ const modelRef = useRef(null);
25
+ const fileInputRef = useRef(null);
26
+
27
+ // Auto-adjust quant preset when backend changes
28
+ useEffect(() => {
29
+ if (backend.startsWith('webgpu')) {
30
+ setQuant('fp32');
31
+ } else if (backend === 'wasm') {
32
+ setQuant('int8');
33
+ }
34
+ }, [backend]);
35
+
36
+ async function loadModel() {
37
+ setStatus('Loading model…');
38
+ setProgress('');
39
+ setProgressText('');
40
+ setProgressPct(0);
41
+ console.time('LoadModel');
42
+
43
+ try {
44
+ const progressCallback = ({ loaded, total, file }) => {
45
+ const pct = total > 0 ? Math.round((loaded / total) * 100) : 0;
46
+ setProgressText(`${file}: ${pct}%`);
47
+ setProgressPct(pct);
48
+ };
49
+
50
+ // 1. Download all model files from HuggingFace Hub
51
+ const modelUrls = await getParakeetModel(repoId, {
52
+ quantization: quant,
53
+ preprocessor,
54
+ backend, // Pass backend to enable automatic fp32 selection for WebGPU
55
+ decoderInt8,
56
+ progress: progressCallback
57
+ });
58
+
59
+ // Show compiling sessions stage
60
+ setStatus('Creating sessions…');
61
+ setProgressText('Compiling model (this may take ~10 s)…');
62
+ setProgressPct(null);
63
+
64
+ // 2. Create the model instance with all file URLs
65
+ modelRef.current = await ParakeetModel.fromUrls({
66
+ ...modelUrls.urls,
67
+ filenames: modelUrls.filenames,
68
+ backend,
69
+ verbose: verboseLog,
70
+ decoderOnWasm: decoderInt8, // if we selected int8 decoder, keep it on WASM
71
+ decoderInt8,
72
+ cpuThreads,
73
+ });
74
+
75
+ // 3. Warm-up and verify
76
+ setStatus('Warming up & verifying…');
77
+ setProgressText('Running a test transcription…');
78
+ const expectedText = 'it is not life as we know or understand it';
79
+
80
+ try {
81
+ const audioRes = await fetch('/assets/life_Jim.wav');
82
+ const buf = await audioRes.arrayBuffer();
83
+ const audioCtx = new AudioContext({ sampleRate: 16000 });
84
+ const decoded = await audioCtx.decodeAudioData(buf);
85
+ const pcm = decoded.getChannelData(0);
86
+
87
+ const { utterance_text } = await modelRef.current.transcribe(pcm, 16000);
88
+
89
+ // Normalize both texts: lowercase and remove punctuation
90
+ const normalize = (str) => str.toLowerCase().replace(/[.,\/#!$%\^&\*;:{}=\-_`~()]/g,"");
91
+
92
+ if (normalize(utterance_text).includes(normalize(expectedText))) {
93
+ console.log('[App] Model verification successful.');
94
+ setStatus('Model ready ✔');
95
+ } else {
96
+ console.error(`[App] Model verification failed! Expected: "${expectedText}", Got: "${utterance_text}"`);
97
+ setStatus('Model verification failed!');
98
+ }
99
+ } catch (err) {
100
+ console.error('[App] Warm-up transcription failed', err);
101
+ setStatus('Warm-up failed!');
102
+ }
103
+
104
+ console.timeEnd('LoadModel');
105
+ // setStatus('Model ready ✔'); // Status is now set by verification
106
+ setProgressText('');
107
+ setProgressPct(null);
108
+ } catch (e) {
109
+ console.error(e);
110
+ setStatus(`Failed: ${e.message}`);
111
+ setProgress('');
112
+ }
113
+ }
114
+
115
+ async function transcribeFile(e) {
116
+ if (!modelRef.current) return alert('Load model first');
117
+ const file = e.target.files?.[0];
118
+ if (!file) return;
119
+
120
+ setIsTranscribing(true);
121
+ setStatus(`Transcribing "${file.name}"…`);
122
+
123
+ try {
124
+ const buf = await file.arrayBuffer();
125
+ const audioCtx = new AudioContext({ sampleRate: 16000 });
126
+ const decoded = await audioCtx.decodeAudioData(buf);
127
+ const pcm = decoded.getChannelData(0);
128
+
129
+ console.time(`Transcribe-${file.name}`);
130
+ const res = await modelRef.current.transcribe(pcm, 16_000, {
131
+ returnTimestamps: true,
132
+ returnConfidences: true , frameStride
133
+ });
134
+ console.timeEnd(`Transcribe-${file.name}`);
135
+
136
+ if (dumpDetail) {
137
+ console.log('[Parakeet] Detailed transcription output', res);
138
+ }
139
+ setLatestMetrics(res.metrics);
140
+ // Add to transcriptions list
141
+ const newTranscription = {
142
+ id: Date.now(),
143
+ filename: file.name,
144
+ text: res.utterance_text,
145
+ timestamp: new Date().toLocaleTimeString(),
146
+ duration: pcm.length / 16000, // duration in seconds
147
+ wordCount: res.words?.length || 0,
148
+ confidence: res.confidence_scores?.overall_log_prob || null,
149
+ metrics: res.metrics
150
+ };
151
+
152
+ setTranscriptions(prev => [newTranscription, ...prev]);
153
+ setText(res.utterance_text); // Show latest transcription
154
+ setStatus('Model ready ✔'); // Ready for next file
155
+
156
+ } catch (error) {
157
+ console.error('Transcription failed:', error);
158
+ setStatus('Transcription failed');
159
+ alert(`Failed to transcribe "${file.name}": ${error.message}`);
160
+ } finally {
161
+ setIsTranscribing(false);
162
+ // Clear the file input so the same file can be selected again
163
+ if (fileInputRef.current) {
164
+ fileInputRef.current.value = '';
165
+ }
166
+ }
167
+ }
168
+
169
+ function clearTranscriptions() {
170
+ setTranscriptions([]);
171
+ setText('');
172
+ }
173
+
174
+ return (
175
+ <div className="app">
176
+ <h2>Parakeet JS React Demo</h2>
177
+
178
+ <div className="controls">
179
+ <p>
180
+ <strong>Model:</strong> {repoId}
181
+ </p>
182
+ </div>
183
+
184
+ <div className="controls">
185
+ <label>
186
+ Backend:
187
+ <select value={backend} onChange={e=>setBackend(e.target.value)}>
188
+ <option value="webgpu-hybrid">WebGPU (Hybrid)</option>
189
+ <option value="webgpu-strict">WebGPU (Strict)</option>
190
+ <option value="wasm">WASM (CPU)</option>
191
+ </select>
192
+ </label>
193
+ {' '}
194
+ <label>
195
+ Quant:
196
+ <select value={quant} onChange={e=>setQuant(e.target.value)}>
197
+ <option value="int8">int8 (faster)</option>
198
+ <option value="fp32">fp32 (higher quality)</option>
199
+ </select>
200
+ </label>
201
+ {' '}
202
+ {backend.startsWith('webgpu') && (
203
+ <label style={{ fontSize:'0.9em' }}>
204
+ <input type="checkbox" checked={decoderInt8} onChange={e=>setDecoderInt8(e.target.checked)} />
205
+ Decoder INT8 on CPU
206
+ </label>
207
+ )}
208
+ {' '}
209
+ <label>
210
+ Preprocessor:
211
+ <select value={preprocessor} onChange={e=>setPreprocessor(e.target.value)}>
212
+ <option value="nemo80">nemo80 (smaller)</option>
213
+ <option value="nemo128">nemo128 (default)</option>
214
+ </select>
215
+ </label>
216
+ {' '}
217
+ <label>
218
+ Stride:
219
+ <select value={frameStride} onChange={e=>setFrameStride(Number(e.target.value))}>
220
+ <option value={1}>1</option>
221
+ <option value={2}>2</option>
222
+ <option value={4}>4</option>
223
+ </select>
224
+ </label>
225
+ {' '}
226
+ <label>
227
+ <input type="checkbox" checked={verboseLog} onChange={e => setVerboseLog(e.target.checked)} />
228
+ Verbose Log
229
+ </label>
230
+ {' '}
231
+ <label style={{fontSize:'0.9em'}}>
232
+ <input type="checkbox" checked={dumpDetail} onChange={e=>setDumpDetail(e.target.checked)} />
233
+ Dump result to console
234
+ </label>
235
+ {(backend === 'wasm' || decoderInt8) && (
236
+ <label style={{fontSize:'0.9em'}}>
237
+ Threads:
238
+ <input type="number" min="1" max={maxCores} value={cpuThreads} onChange={e=>setCpuThreads(Number(e.target.value))} style={{width:'4rem'}} />
239
+ </label>
240
+ )}
241
+ <button
242
+ onClick={loadModel}
243
+ disabled={!status.toLowerCase().includes('fail') && status !== 'Idle'}
244
+ className="primary"
245
+ >
246
+ {status === 'Model ready ✔' ? 'Model Loaded' : 'Load Model'}
247
+ </button>
248
+ </div>
249
+
250
+ {typeof SharedArrayBuffer === 'undefined' && backend === 'wasm' && (
251
+ <div style={{
252
+ marginBottom: '1rem',
253
+ padding: '0.5rem',
254
+ backgroundColor: '#fff3cd',
255
+ border: '1px solid #ffeaa7',
256
+ borderRadius: '4px',
257
+ fontSize: '0.9em'
258
+ }}>
259
+ ⚠️ <strong>Performance Note:</strong> SharedArrayBuffer is not available.
260
+ WASM will run single-threaded. For better performance, serve over HTTPS
261
+ with proper headers or use WebGPU.
262
+ </div>
263
+ )}
264
+
265
+ <div className="controls">
266
+ <input
267
+ ref={fileInputRef}
268
+ type="file"
269
+ accept="audio/*"
270
+ onChange={transcribeFile}
271
+ disabled={status !== 'Model ready ✔' || isTranscribing}
272
+ />
273
+ {transcriptions.length > 0 && (
274
+ <button
275
+ onClick={clearTranscriptions}
276
+ style={{ marginLeft: '1rem', padding: '0.25rem 0.5rem' }}
277
+ >
278
+ Clear History
279
+ </button>
280
+ )}
281
+ </div>
282
+
283
+ <p>Status: {status}</p>
284
+ {progressPct!==null && (
285
+ <div className="progress-wrapper">
286
+ <div className="progress-bar"><div style={{ width: `${progressPct}%` }} /></div>
287
+ <p className="progress-text">{progressText}</p>
288
+ </div>
289
+ )}
290
+
291
+ {/* Latest transcription */}
292
+ <div className="controls">
293
+ <h3>Latest Transcription:</h3>
294
+ <textarea
295
+ value={text}
296
+ readOnly
297
+ className="textarea"
298
+ placeholder="Transcribed text will appear here..."
299
+ />
300
+ </div>
301
+
302
+ {/* Latest transcription performace info */}
303
+ {latestMetrics && (
304
+ <div className="performance">
305
+ <strong>RTF:</strong> {latestMetrics.rtf?.toFixed(2)}x &nbsp;|&nbsp; Total: {latestMetrics.total_ms} ms<br/>
306
+ Preprocess {latestMetrics.preprocess_ms} ms · Encode {latestMetrics.encode_ms} ms · Decode {latestMetrics.decode_ms} ms · Tokenize {latestMetrics.tokenize_ms} ms
307
+ </div>
308
+ )}
309
+
310
+ {/* Transcription history */}
311
+ {transcriptions.length > 0 && (
312
+ <div className="history">
313
+ <h3>Transcription History ({transcriptions.length} files):</h3>
314
+ <div style={{ maxHeight: '400px', overflowY: 'auto', border: '1px solid #ddd', borderRadius: '4px' }}>
315
+ {transcriptions.map((trans) => (
316
+ <div className="history-item" key={trans.id}>
317
+ <div className="history-meta"><strong>{trans.filename}</strong><span>{trans.timestamp}</span></div>
318
+ <div className="history-stats">Duration: {trans.duration.toFixed(1)}s | Words: {trans.wordCount}{trans.confidence && ` | Confidence: ${trans.confidence.toFixed(2)}`}{trans.metrics && ` | RTF: ${trans.metrics.rtf?.toFixed(2)}x`}</div>
319
+ <div className="history-text">{trans.text}</div>
320
+ </div>
321
+ ))}
322
+ </div>
323
+ </div>
324
+ )}
325
+ </div>
326
+ );
327
+ }
@@ -0,0 +1,6 @@
1
+ import React from 'react';
2
+ import { createRoot } from 'react-dom/client';
3
+ import App from './App.jsx';
4
+
5
+ const root = createRoot(document.getElementById('root'));
6
+ root.render(<App />);
@@ -0,0 +1,41 @@
1
+ import { defineConfig } from 'vite';
2
+ import react from '@vitejs/plugin-react';
3
+ import fs from 'fs';
4
+ import path from 'path';
5
+
6
+ // Optional HTTPS setup - only if certificates exist
7
+ let httpsConfig = false;
8
+ try {
9
+ const keyPath = path.resolve('./localhost-key.pem');
10
+ const certPath = path.resolve('./localhost.pem');
11
+
12
+ if (fs.existsSync(keyPath) && fs.existsSync(certPath)) {
13
+ httpsConfig = {
14
+ key: fs.readFileSync(keyPath),
15
+ cert: fs.readFileSync(certPath),
16
+ };
17
+ console.log('✅ HTTPS enabled with local certificates');
18
+ } else {
19
+ console.log('ℹ️ No local certificates found, running on HTTP');
20
+ }
21
+ } catch (err) {
22
+ console.log('ℹ️ HTTPS setup failed, running on HTTP:', err.message);
23
+ }
24
+
25
+ export default defineConfig({
26
+ plugins: [react()],
27
+ server: {
28
+ port: 5173,
29
+ ...(httpsConfig && { https: httpsConfig }),
30
+ headers: {
31
+ 'Cross-Origin-Opener-Policy': 'same-origin',
32
+ 'Cross-Origin-Embedder-Policy': 'require-corp',
33
+ },
34
+ },
35
+ optimizeDeps: {
36
+ include: ['onnxruntime-web'],
37
+ },
38
+ define: {
39
+ global: 'globalThis',
40
+ },
41
+ });
package/package.json ADDED
@@ -0,0 +1,30 @@
1
+ {
2
+ "name": "parakeet.js",
3
+ "version": "0.0.1",
4
+ "description": "NVIDIA Parakeet speech recognition for the browser (WebGPU/WASM) powered by ONNX Runtime Web.",
5
+ "type": "module",
6
+ "exports": {
7
+ ".": "./src/index.js"
8
+ },
9
+ "keywords": [
10
+ "parakeet",
11
+ "speech",
12
+ "onnx",
13
+ "webgpu",
14
+ "wasm",
15
+ "transcription"
16
+ ],
17
+ "dependencies": {
18
+ "onnxruntime-web": "1.22.0-dev.20250409-89f8206ba4"
19
+ },
20
+ "author": "Yunus Seyhan Dede",
21
+ "license": "MIT",
22
+ "homepage": "https://github.com/ysdede/parakeet.js",
23
+ "repository": {
24
+ "type": "git",
25
+ "url": "git+https://github.com/ysdede/parakeet.js.git"
26
+ },
27
+ "bugs": {
28
+ "url": "https://github.com/ysdede/parakeet.js/issues"
29
+ }
30
+ }
package/src/backend.js ADDED
@@ -0,0 +1,99 @@
1
+ // Back-end initialisation helper for ONNX Runtime Web.
2
+ // At runtime the caller can specify preferred backend ("webgpu", "wasm").
3
+ // The function resolves once ONNX Runtime is ready and returns the `ort` module.
4
+
5
+ /**
6
+ * Initialise ONNX Runtime Web and pick the execution provider.
7
+ * If WebGPU is requested but not supported, we transparently fall back to WASM.
8
+ * @param {Object} opts
9
+ * @param {('webgpu'|'wasm')} [opts.backend='webgpu'] Desired backend.
10
+ * @param {string} [opts.wasmPaths] Optional path prefix for WASM binaries.
11
+ * @returns {Promise<typeof import('onnxruntime-web').default>}
12
+ */
13
+ export async function initOrt({ backend = 'webgpu', wasmPaths, numThreads } = {}) {
14
+ // Dynamic import to handle Vite bundling issues
15
+ let ort;
16
+
17
+ try {
18
+ const ortModule = await import('onnxruntime-web');
19
+ ort = ortModule.default || ortModule;
20
+
21
+ // Debug: Check the structure of ort
22
+ console.log('[Parakeet.js] ORT structure:', {
23
+ hasDefault: !!ortModule.default,
24
+ hasEnv: !!ort.env,
25
+ hasWasm: !!ort.env?.wasm,
26
+ hasWebgpu: !!ort.env?.webgpu,
27
+ keys: Object.keys(ort).slice(0, 10) // Show first 10 keys
28
+ });
29
+
30
+ // If still no env, try accessing it differently
31
+ if (!ort.env) {
32
+ console.log('[Parakeet.js] Trying alternative access patterns...');
33
+ console.log('[Parakeet.js] ortModule keys:', Object.keys(ortModule));
34
+
35
+ // Sometimes the module structure is nested
36
+ if (ortModule.ort) {
37
+ ort = ortModule.ort;
38
+ console.log('[Parakeet.js] Found ort in ortModule.ort');
39
+ }
40
+ }
41
+ } catch (e) {
42
+ console.error('[Parakeet.js] Failed to import onnxruntime-web:', e);
43
+ throw new Error('Failed to load ONNX Runtime Web. Please check your network connection.');
44
+ }
45
+
46
+ if (!ort || !ort.env) {
47
+ throw new Error('ONNX Runtime Web loaded but env is not available. This might be a bundling issue.');
48
+ }
49
+
50
+ // Set up WASM paths first (needed for all backends)
51
+ if (!ort.env.wasm.wasmPaths) {
52
+ // Use the same version as in package.json
53
+ const ver = '1.22.0-dev.20250409-89f8206ba4';
54
+ ort.env.wasm.wasmPaths = `https://cdn.jsdelivr.net/npm/onnxruntime-web@${ver}/dist/`;
55
+ }
56
+
57
+ // Configure WASM for better performance
58
+ if (backend === 'wasm' || backend === 'webgpu') {
59
+ // Enable multi-threading if supported
60
+ if (typeof SharedArrayBuffer !== 'undefined') {
61
+ ort.env.wasm.numThreads = numThreads || navigator.hardwareConcurrency || 4;
62
+ ort.env.wasm.simd = true;
63
+ console.log(`[Parakeet.js] WASM configured with ${ort.env.wasm.numThreads} threads, SIMD enabled`);
64
+ } else {
65
+ console.warn('[Parakeet.js] SharedArrayBuffer not available - using single-threaded WASM');
66
+ ort.env.wasm.numThreads = 1;
67
+ }
68
+
69
+ // Enable other WASM optimizations
70
+ ort.env.wasm.proxy = false; // Direct execution for better performance
71
+ }
72
+
73
+ if (backend === 'webgpu') {
74
+ // Check WebGPU support properly
75
+ const webgpuSupported = 'gpu' in navigator;
76
+ console.log(`[Parakeet.js] WebGPU supported: ${webgpuSupported}`);
77
+
78
+ if (webgpuSupported) {
79
+ try {
80
+ // In newer versions of ONNX Runtime Web, WebGPU initialization is automatic
81
+ // No need to call ort.env.webgpu.init() manually
82
+ console.log('[Parakeet.js] WebGPU will be initialized automatically when creating session');
83
+ } catch (error) {
84
+ console.warn('[Parakeet.js] WebGPU initialization failed:', error);
85
+ console.warn('[Parakeet.js] Falling back to WASM');
86
+ backend = 'wasm';
87
+ }
88
+ } else {
89
+ console.warn('[Parakeet.js] WebGPU not supported – falling back to WASM');
90
+ backend = 'wasm';
91
+ }
92
+ }
93
+
94
+ // Store the final backend choice for use in model selection
95
+ ort._selectedBackend = backend;
96
+
97
+ // Return the ort module for use in creating sessions and tensors
98
+ return ort;
99
+ }