parakeet.js 0.0.3 โ 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +118 -21
- package/package.json +9 -2
- package/src/backend.js +106 -98
- package/src/hub.js +16 -6
- package/src/index.js +36 -28
- package/src/models.js +138 -0
- package/src/parakeet.js +158 -32
- package/src/preprocessor.js +12 -2
- package/src/tokenizer.js +24 -6
- package/.gitmodules +0 -3
- package/examples/hf-spaces-demo/.gitattributes +0 -35
- package/examples/hf-spaces-demo/README.md +0 -92
- package/examples/hf-spaces-demo/package-lock.json +0 -17690
- package/examples/hf-spaces-demo/package.json +0 -41
- package/examples/hf-spaces-demo/public/favicon.ico +0 -0
- package/examples/hf-spaces-demo/public/index.html +0 -43
- package/examples/hf-spaces-demo/public/logo192.png +0 -0
- package/examples/hf-spaces-demo/public/logo512.png +0 -0
- package/examples/hf-spaces-demo/public/manifest.json +0 -25
- package/examples/hf-spaces-demo/public/robots.txt +0 -3
- package/examples/hf-spaces-demo/src/App.css +0 -170
- package/examples/hf-spaces-demo/src/App.js +0 -307
- package/examples/hf-spaces-demo/src/App.test.js +0 -8
- package/examples/hf-spaces-demo/src/index.css +0 -13
- package/examples/hf-spaces-demo/src/index.js +0 -17
- package/examples/hf-spaces-demo/src/logo.svg +0 -1
- package/examples/hf-spaces-demo/src/reportWebVitals.js +0 -13
- package/examples/hf-spaces-demo/src/setupTests.js +0 -5
- package/examples/react-demo/index.html +0 -12
- package/examples/react-demo/package.json +0 -20
- package/examples/react-demo/src/App.css +0 -134
- package/examples/react-demo/src/App.jsx +0 -325
- package/examples/react-demo/src/main.jsx +0 -6
- package/examples/react-demo/vite.config.js +0 -41
- package/examples/react-demo-dev/index.html +0 -12
- package/examples/react-demo-dev/package-lock.json +0 -1417
- package/examples/react-demo-dev/package.json +0 -20
- package/examples/react-demo-dev/public/assets/life_Jim.wav +0 -0
- package/examples/react-demo-dev/src/App.css +0 -134
- package/examples/react-demo-dev/src/App.jsx +0 -326
- package/examples/react-demo-dev/src/main.jsx +0 -6
- package/examples/react-demo-dev/vite.config.js +0 -41
- package/publish.ps1 +0 -65
package/README.md
CHANGED
|
@@ -7,7 +7,27 @@ Runs entirely in the browser on **WebGPU** or **WASM** via
|
|
|
7
7
|
> **Parakeet.js** offers a high-performance, browser-first implementation for NVIDIA's Parakeet-TDT speech-to-text models, running entirely client-side via WebGPU and WASM. Powered by ONNX Runtime Web, this library makes it simple to integrate state-of-the-art transcription into any web application.
|
|
8
8
|
|
|
9
9
|
> **Status:** Early preview โ API is subject to change while things stabilise.
|
|
10
|
-
> **Note:** Currently
|
|
10
|
+
> **Note:** Currently supports Parakeet-TDT v2 (English) and v3 (Multilingual) model architectures.
|
|
11
|
+
|
|
12
|
+
---
|
|
13
|
+
|
|
14
|
+
## What's New (v0.3.x)
|
|
15
|
+
|
|
16
|
+
### ๐ Parakeet TDT v3 Multilingual Support
|
|
17
|
+
- Added support for **Parakeet TDT 0.6B v3** with 13 languages: English, French, German, Spanish, Italian, Portuguese, Dutch, Polish, Russian, Ukrainian, Japanese, Korean, Chinese
|
|
18
|
+
- Both v2 (English-only) and v3 (Multilingual) models now work out of the box
|
|
19
|
+
- Use model keys for easier loading: `'parakeet-tdt-0.6b-v2'` or `'parakeet-tdt-0.6b-v3'`
|
|
20
|
+
|
|
21
|
+
### ๐๏ธ Model Configuration API
|
|
22
|
+
- New `MODELS` export with model metadata (supported languages, vocab size, etc.)
|
|
23
|
+
- `getModelConfig()` for programmatic model introspection
|
|
24
|
+
- `supportsLanguage()` helper to check language compatibility
|
|
25
|
+
|
|
26
|
+
### ๐งช Demo App Improvements
|
|
27
|
+
- **Model selector** dropdown to switch between v2 and v3
|
|
28
|
+
- **Language selector** (context-aware, shows only supported languages)
|
|
29
|
+
- **Quick Test** feature with HuggingFace speech datasets (People's Speech, MLS)
|
|
30
|
+
- **Reference text** display for comparing transcription accuracy
|
|
11
31
|
|
|
12
32
|
---
|
|
13
33
|
|
|
@@ -29,23 +49,29 @@ yarn add parakeet.js onnxruntime-web
|
|
|
29
49
|
|
|
30
50
|
We host ready-to-use ONNX exports on the HuggingFace Hub:
|
|
31
51
|
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
52
|
+
| Model | Languages | Repo ID |
|
|
53
|
+
|-------|-----------|---------|
|
|
54
|
+
| Parakeet TDT 0.6B v2 | English | `istupakov/parakeet-tdt-0.6b-v2-onnx` |
|
|
55
|
+
| Parakeet TDT 0.6B v3 | 13 languages | `istupakov/parakeet-tdt-0.6b-v3-onnx` |
|
|
35
56
|
|
|
36
57
|
The helper `getParakeetModel()` downloads all required files and caches them in **IndexedDB**:
|
|
37
58
|
|
|
38
59
|
```js
|
|
39
|
-
import { getParakeetModel } from 'parakeet.js';
|
|
60
|
+
import { getParakeetModel, MODELS } from 'parakeet.js';
|
|
40
61
|
|
|
41
|
-
|
|
42
|
-
const { urls, filenames } = await getParakeetModel(
|
|
43
|
-
backend: 'webgpu',
|
|
44
|
-
encoderQuant: 'fp32', // 'fp32' or 'int8'
|
|
45
|
-
decoderQuant: 'int8', // 'fp32' or 'int8'
|
|
46
|
-
preprocessor: 'nemo128',
|
|
62
|
+
// Option 1: Use model key (recommended)
|
|
63
|
+
const { urls, filenames, modelConfig } = await getParakeetModel('parakeet-tdt-0.6b-v3', {
|
|
64
|
+
backend: 'webgpu',
|
|
47
65
|
progress: ({file,loaded,total}) => console.log(file, loaded/total)
|
|
48
66
|
});
|
|
67
|
+
|
|
68
|
+
// Option 2: Use repo ID directly
|
|
69
|
+
const { urls, filenames } = await getParakeetModel('istupakov/parakeet-tdt-0.6b-v2-onnx', {
|
|
70
|
+
backend: 'webgpu',
|
|
71
|
+
encoderQuant: 'fp32',
|
|
72
|
+
decoderQuant: 'int8',
|
|
73
|
+
preprocessor: 'nemo128',
|
|
74
|
+
});
|
|
49
75
|
```
|
|
50
76
|
|
|
51
77
|
Returned structure:
|
|
@@ -109,7 +135,7 @@ Extra options:
|
|
|
109
135
|
|
|
110
136
|
| Option | Default | Description |
|
|
111
137
|
|--------|---------|-------------|
|
|
112
|
-
| `temperature` | 1.
|
|
138
|
+
| `temperature` | 1.0 | Softmax temperature for decoding (1.0 = greedy, >1.0 = sampling) |
|
|
113
139
|
| `frameStride` | 1 | Advance decoder by *n* encoder frames per step |
|
|
114
140
|
|
|
115
141
|
### Result schema
|
|
@@ -170,25 +196,83 @@ if (utterance_text.toLowerCase().includes(expected)) {
|
|
|
170
196
|
|
|
171
197
|
---
|
|
172
198
|
|
|
199
|
+
## Model Configuration API
|
|
200
|
+
|
|
201
|
+
Query model metadata programmatically:
|
|
202
|
+
|
|
203
|
+
```js
|
|
204
|
+
import { MODELS, LANGUAGE_NAMES, getModelConfig, supportsLanguage } from 'parakeet.js';
|
|
205
|
+
|
|
206
|
+
// List all available models
|
|
207
|
+
console.log(Object.keys(MODELS));
|
|
208
|
+
// ['parakeet-tdt-0.6b-v2', 'parakeet-tdt-0.6b-v3']
|
|
209
|
+
|
|
210
|
+
// Get model config
|
|
211
|
+
const config = getModelConfig('parakeet-tdt-0.6b-v3');
|
|
212
|
+
console.log(config.languages); // ['en', 'fr', 'de', 'es', ...]
|
|
213
|
+
console.log(config.displayName); // 'Parakeet TDT 0.6B v3 (Multilingual)'
|
|
214
|
+
|
|
215
|
+
// Check language support
|
|
216
|
+
supportsLanguage('parakeet-tdt-0.6b-v3', 'fr'); // true
|
|
217
|
+
supportsLanguage('parakeet-tdt-0.6b-v2', 'fr'); // false
|
|
218
|
+
|
|
219
|
+
// Get language display names
|
|
220
|
+
console.log(LANGUAGE_NAMES['fr']); // 'French'
|
|
221
|
+
```
|
|
222
|
+
|
|
223
|
+
---
|
|
224
|
+
|
|
173
225
|
## Using the React demo as a template
|
|
174
226
|
|
|
175
|
-
Located at `examples/react-demo
|
|
227
|
+
Located at `examples/react-demo` (production) and `examples/react-demo-dev` (development).
|
|
176
228
|
|
|
177
229
|
Quick start:
|
|
178
230
|
|
|
179
231
|
```bash
|
|
180
|
-
cd examples/react-demo
|
|
232
|
+
cd examples/react-demo-dev
|
|
181
233
|
npm i
|
|
182
234
|
npm run dev # Vite => http://localhost:5173
|
|
183
235
|
```
|
|
184
236
|
|
|
185
|
-
|
|
237
|
+
### Demo Features
|
|
238
|
+
|
|
239
|
+
The development demo (`react-demo-dev`) includes advanced features:
|
|
240
|
+
|
|
241
|
+
- **Model Selector**: Switch between v2 (English) and v3 (Multilingual)
|
|
242
|
+
- **Language Selector**: Context-aware dropdown showing only supported languages
|
|
243
|
+
- **Quick Test**: Load random samples from HuggingFace speech datasets
|
|
244
|
+
- **Reference Text**: Compare transcription against ground truth
|
|
245
|
+
|
|
246
|
+
### Speech Dataset Utilities (Demo Only)
|
|
247
|
+
|
|
248
|
+
The demo includes reusable utilities for testing with HuggingFace datasets:
|
|
249
|
+
|
|
250
|
+
```js
|
|
251
|
+
// Located in: examples/react-demo-dev/src/utils/speechDatasets.js
|
|
252
|
+
import { fetchRandomSample, hasTestSamples, SPEECH_DATASETS } from './utils/speechDatasets';
|
|
253
|
+
|
|
254
|
+
// Check if test samples are available for a language
|
|
255
|
+
if (hasTestSamples('fr')) {
|
|
256
|
+
// Fetch a random French audio sample with transcription
|
|
257
|
+
const sample = await fetchRandomSample('fr', {
|
|
258
|
+
targetSampleRate: 16000,
|
|
259
|
+
onProgress: ({ message }) => console.log(message),
|
|
260
|
+
});
|
|
261
|
+
|
|
262
|
+
console.log(sample.transcription); // Ground truth text
|
|
263
|
+
console.log(sample.pcm); // Float32Array audio
|
|
264
|
+
console.log(sample.duration); // Duration in seconds
|
|
265
|
+
}
|
|
266
|
+
```
|
|
267
|
+
|
|
268
|
+
**Supported languages for testing:** English (People's Speech), French, German, Spanish, Italian, Portuguese, Dutch, Polish (Multilingual LibriSpeech)
|
|
269
|
+
|
|
270
|
+
### Key Files
|
|
186
271
|
|
|
187
272
|
| File | Purpose |
|
|
188
273
|
|------|---------|
|
|
189
|
-
| `App.jsx` | Complete end-to-end reference UI
|
|
190
|
-
| `
|
|
191
|
-
| `hub.js` | Lightweight HuggingFace Hub helper โ downloads and caches model binaries. |
|
|
274
|
+
| `App.jsx` | Complete end-to-end reference UI with model/language selection, performance metrics, and transcription history |
|
|
275
|
+
| `utils/speechDatasets.js` | Reusable utilities for fetching test samples from HuggingFace datasets |
|
|
192
276
|
|
|
193
277
|
Copy-paste the `loadModel()` and `transcribeFile()` functions into your app, adjust UI bindings, and you are ready to go.
|
|
194
278
|
|
|
@@ -223,7 +307,19 @@ The demo is also available locally at `examples/hf-spaces-demo` and can be deplo
|
|
|
223
307
|
|
|
224
308
|
## Changelog
|
|
225
309
|
|
|
226
|
-
|
|
310
|
+
### v0.3.x (January 2026)
|
|
311
|
+
- โจ **Multilingual Support**: Added Parakeet TDT 0.6B v3 with 13 languages
|
|
312
|
+
- ๐๏ธ **Model Config API**: New `MODELS`, `LANGUAGE_NAMES`, `getModelConfig()`, `supportsLanguage()` exports
|
|
313
|
+
- ๐งช **Demo Enhancements**: Model/language selectors, HuggingFace dataset testing
|
|
314
|
+
- ๐ง **TDT Decoding Fix**: Aligned decoding logic with NeMo framework for improved accuracy
|
|
315
|
+
- ๐ **Streaming Support**: Added incremental transcription capabilities
|
|
316
|
+
|
|
317
|
+
### v0.2.x
|
|
318
|
+
- Initial WebGPU/WASM hybrid backend
|
|
319
|
+
- IndexedDB model caching
|
|
320
|
+
- Performance instrumentation (RTF, timing metrics)
|
|
321
|
+
|
|
322
|
+
See `OPTIMIZATION_PLAN.md` for detailed performance notes.
|
|
227
323
|
|
|
228
324
|
---
|
|
229
325
|
|
|
@@ -232,9 +328,10 @@ See `OPTIMIZATION_PLAN.md` for a timeline of performance tweaks and planned feat
|
|
|
232
328
|
This project builds upon the excellent work of:
|
|
233
329
|
|
|
234
330
|
- **[istupakov](https://github.com/istupakov)** - For providing the [ONNX-ASR](https://github.com/istupakov/onnx-asr) repository, which served as the foundation and starting point for this JavaScript implementation
|
|
235
|
-
- **[istupakov/parakeet-tdt-0.6b-v2-onnx](https://huggingface.co/istupakov/parakeet-tdt-0.6b-v2-onnx)** -
|
|
331
|
+
- **[istupakov/parakeet-tdt-0.6b-v2-onnx](https://huggingface.co/istupakov/parakeet-tdt-0.6b-v2-onnx)** - English model exports
|
|
332
|
+
- **[istupakov/parakeet-tdt-0.6b-v3-onnx](https://huggingface.co/istupakov/parakeet-tdt-0.6b-v3-onnx)** - Multilingual model exports
|
|
236
333
|
- **ONNX Runtime Web** - For powering the browser-based inference engine
|
|
237
|
-
- **
|
|
334
|
+
- **HuggingFace Datasets** - People's Speech, Multilingual LibriSpeech for testing
|
|
238
335
|
|
|
239
336
|
The Python-based ONNX-ASR project provided crucial insights into model handling, preprocessing pipelines, and served as a reference implementation during the development of this browser-compatible version.
|
|
240
337
|
|
package/package.json
CHANGED
|
@@ -1,18 +1,25 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "parakeet.js",
|
|
3
|
-
"version": "0.0
|
|
3
|
+
"version": "1.0.0",
|
|
4
4
|
"description": "NVIDIA Parakeet speech recognition for the browser (WebGPU/WASM) powered by ONNX Runtime Web.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"exports": {
|
|
7
7
|
".": "./src/index.js"
|
|
8
8
|
},
|
|
9
|
+
"files": [
|
|
10
|
+
"src",
|
|
11
|
+
"README.md",
|
|
12
|
+
"LICENSE"
|
|
13
|
+
],
|
|
9
14
|
"keywords": [
|
|
10
15
|
"parakeet",
|
|
11
16
|
"speech",
|
|
12
17
|
"onnx",
|
|
13
18
|
"webgpu",
|
|
14
19
|
"wasm",
|
|
15
|
-
"transcription"
|
|
20
|
+
"transcription",
|
|
21
|
+
"multilingual",
|
|
22
|
+
"asr"
|
|
16
23
|
],
|
|
17
24
|
"dependencies": {
|
|
18
25
|
"onnxruntime-web": "1.22.0-dev.20250409-89f8206ba4"
|
package/src/backend.js
CHANGED
|
@@ -1,99 +1,107 @@
|
|
|
1
|
-
// Back-end initialisation helper for ONNX Runtime Web.
|
|
2
|
-
// At runtime the caller can specify preferred backend ("webgpu", "wasm").
|
|
3
|
-
// The function resolves once ONNX Runtime is ready and returns the `ort` module.
|
|
4
|
-
|
|
5
|
-
/**
|
|
6
|
-
* Initialise ONNX Runtime Web and pick the execution provider.
|
|
7
|
-
* If WebGPU is requested but not supported, we transparently fall back to WASM.
|
|
8
|
-
* @param {Object} opts
|
|
9
|
-
* @param {('webgpu'|'wasm')} [opts.backend='webgpu'] Desired backend.
|
|
10
|
-
* @param {string} [opts.wasmPaths] Optional path prefix for WASM binaries.
|
|
11
|
-
* @returns {Promise<typeof import('onnxruntime-web').default>}
|
|
12
|
-
*/
|
|
13
|
-
export async function initOrt({ backend = 'webgpu', wasmPaths, numThreads } = {}) {
|
|
14
|
-
// Dynamic import to handle Vite bundling issues
|
|
15
|
-
let ort;
|
|
16
|
-
|
|
17
|
-
try {
|
|
18
|
-
const ortModule = await import('onnxruntime-web');
|
|
19
|
-
ort = ortModule.default || ortModule;
|
|
20
|
-
|
|
21
|
-
// Debug: Check the structure of ort
|
|
22
|
-
console.log('[Parakeet.js] ORT structure:', {
|
|
23
|
-
hasDefault: !!ortModule.default,
|
|
24
|
-
hasEnv: !!ort.env,
|
|
25
|
-
hasWasm: !!ort.env?.wasm,
|
|
26
|
-
hasWebgpu: !!ort.env?.webgpu,
|
|
27
|
-
keys: Object.keys(ort).slice(0, 10) // Show first 10 keys
|
|
28
|
-
});
|
|
29
|
-
|
|
30
|
-
// If still no env, try accessing it differently
|
|
31
|
-
if (!ort.env) {
|
|
32
|
-
console.log('[Parakeet.js] Trying alternative access patterns...');
|
|
33
|
-
console.log('[Parakeet.js] ortModule keys:', Object.keys(ortModule));
|
|
34
|
-
|
|
35
|
-
// Sometimes the module structure is nested
|
|
36
|
-
if (ortModule.ort) {
|
|
37
|
-
ort = ortModule.ort;
|
|
38
|
-
console.log('[Parakeet.js] Found ort in ortModule.ort');
|
|
39
|
-
}
|
|
40
|
-
}
|
|
41
|
-
} catch (e) {
|
|
42
|
-
console.error('[Parakeet.js] Failed to import onnxruntime-web:', e);
|
|
43
|
-
throw new Error('Failed to load ONNX Runtime Web. Please check your network connection.');
|
|
44
|
-
}
|
|
45
|
-
|
|
46
|
-
if (!ort || !ort.env) {
|
|
47
|
-
throw new Error('ONNX Runtime Web loaded but env is not available. This might be a bundling issue.');
|
|
48
|
-
}
|
|
49
|
-
|
|
50
|
-
// Set up WASM paths first (needed for all backends)
|
|
51
|
-
if (!ort.env.wasm.wasmPaths) {
|
|
52
|
-
// Use the same version as in package.json
|
|
53
|
-
const ver = '1.22.0-dev.20250409-89f8206ba4';
|
|
54
|
-
ort.env.wasm.wasmPaths = `https://cdn.jsdelivr.net/npm/onnxruntime-web@${ver}/dist/`;
|
|
55
|
-
}
|
|
56
|
-
|
|
57
|
-
// Configure WASM for better performance
|
|
58
|
-
if (backend === 'wasm' || backend === 'webgpu') {
|
|
59
|
-
// Enable multi-threading if supported
|
|
60
|
-
if (typeof SharedArrayBuffer !== 'undefined') {
|
|
61
|
-
ort.env.wasm.numThreads = numThreads || navigator.hardwareConcurrency || 4;
|
|
62
|
-
ort.env.wasm.simd = true;
|
|
63
|
-
console.log(`[Parakeet.js] WASM configured with ${ort.env.wasm.numThreads} threads, SIMD enabled`);
|
|
64
|
-
} else {
|
|
65
|
-
console.warn('[Parakeet.js] SharedArrayBuffer not available - using single-threaded WASM');
|
|
66
|
-
ort.env.wasm.numThreads = 1;
|
|
67
|
-
}
|
|
68
|
-
|
|
69
|
-
// Enable other WASM optimizations
|
|
70
|
-
ort.env.wasm.proxy = false; // Direct execution for better performance
|
|
71
|
-
}
|
|
72
|
-
|
|
73
|
-
if (backend === 'webgpu') {
|
|
74
|
-
// Check WebGPU support properly
|
|
75
|
-
const webgpuSupported = 'gpu' in navigator;
|
|
76
|
-
console.log(`[Parakeet.js] WebGPU supported: ${webgpuSupported}`);
|
|
77
|
-
|
|
78
|
-
if (webgpuSupported) {
|
|
79
|
-
try {
|
|
80
|
-
// In newer versions of ONNX Runtime Web, WebGPU initialization is automatic
|
|
81
|
-
// No need to call ort.env.webgpu.init() manually
|
|
82
|
-
console.log('[Parakeet.js] WebGPU will be initialized automatically when creating session');
|
|
83
|
-
} catch (error) {
|
|
84
|
-
console.warn('[Parakeet.js] WebGPU initialization failed:', error);
|
|
85
|
-
console.warn('[Parakeet.js] Falling back to WASM');
|
|
86
|
-
backend = 'wasm';
|
|
87
|
-
}
|
|
88
|
-
} else {
|
|
89
|
-
console.warn('[Parakeet.js] WebGPU not supported โ falling back to WASM');
|
|
90
|
-
backend = 'wasm';
|
|
91
|
-
}
|
|
92
|
-
}
|
|
93
|
-
|
|
94
|
-
// Store the final backend choice for use in model selection
|
|
95
|
-
ort._selectedBackend = backend;
|
|
96
|
-
|
|
97
|
-
//
|
|
98
|
-
|
|
1
|
+
// Back-end initialisation helper for ONNX Runtime Web.
|
|
2
|
+
// At runtime the caller can specify preferred backend ("webgpu", "wasm").
|
|
3
|
+
// The function resolves once ONNX Runtime is ready and returns the `ort` module.
|
|
4
|
+
|
|
5
|
+
/**
|
|
6
|
+
* Initialise ONNX Runtime Web and pick the execution provider.
|
|
7
|
+
* If WebGPU is requested but not supported, we transparently fall back to WASM.
|
|
8
|
+
* @param {Object} opts
|
|
9
|
+
* @param {('webgpu'|'wasm')} [opts.backend='webgpu'] Desired backend.
|
|
10
|
+
* @param {string} [opts.wasmPaths] Optional path prefix for WASM binaries.
|
|
11
|
+
* @returns {Promise<typeof import('onnxruntime-web').default>}
|
|
12
|
+
*/
|
|
13
|
+
export async function initOrt({ backend = 'webgpu', wasmPaths, numThreads } = {}) {
|
|
14
|
+
// Dynamic import to handle Vite bundling issues
|
|
15
|
+
let ort;
|
|
16
|
+
|
|
17
|
+
try {
|
|
18
|
+
const ortModule = await import('onnxruntime-web');
|
|
19
|
+
ort = ortModule.default || ortModule;
|
|
20
|
+
|
|
21
|
+
// Debug: Check the structure of ort
|
|
22
|
+
console.log('[Parakeet.js] ORT structure:', {
|
|
23
|
+
hasDefault: !!ortModule.default,
|
|
24
|
+
hasEnv: !!ort.env,
|
|
25
|
+
hasWasm: !!ort.env?.wasm,
|
|
26
|
+
hasWebgpu: !!ort.env?.webgpu,
|
|
27
|
+
keys: Object.keys(ort).slice(0, 10) // Show first 10 keys
|
|
28
|
+
});
|
|
29
|
+
|
|
30
|
+
// If still no env, try accessing it differently
|
|
31
|
+
if (!ort.env) {
|
|
32
|
+
console.log('[Parakeet.js] Trying alternative access patterns...');
|
|
33
|
+
console.log('[Parakeet.js] ortModule keys:', Object.keys(ortModule));
|
|
34
|
+
|
|
35
|
+
// Sometimes the module structure is nested
|
|
36
|
+
if (ortModule.ort) {
|
|
37
|
+
ort = ortModule.ort;
|
|
38
|
+
console.log('[Parakeet.js] Found ort in ortModule.ort');
|
|
39
|
+
}
|
|
40
|
+
}
|
|
41
|
+
} catch (e) {
|
|
42
|
+
console.error('[Parakeet.js] Failed to import onnxruntime-web:', e);
|
|
43
|
+
throw new Error('Failed to load ONNX Runtime Web. Please check your network connection.');
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
if (!ort || !ort.env) {
|
|
47
|
+
throw new Error('ONNX Runtime Web loaded but env is not available. This might be a bundling issue.');
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
// Set up WASM paths first (needed for all backends)
|
|
51
|
+
if (!ort.env.wasm.wasmPaths) {
|
|
52
|
+
// Use the same version as in package.json
|
|
53
|
+
const ver = '1.22.0-dev.20250409-89f8206ba4';
|
|
54
|
+
ort.env.wasm.wasmPaths = `https://cdn.jsdelivr.net/npm/onnxruntime-web@${ver}/dist/`;
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
// Configure WASM for better performance
|
|
58
|
+
if (backend === 'wasm' || backend === 'webgpu') {
|
|
59
|
+
// Enable multi-threading if supported
|
|
60
|
+
if (typeof SharedArrayBuffer !== 'undefined') {
|
|
61
|
+
ort.env.wasm.numThreads = numThreads || navigator.hardwareConcurrency || 4;
|
|
62
|
+
ort.env.wasm.simd = true;
|
|
63
|
+
console.log(`[Parakeet.js] WASM configured with ${ort.env.wasm.numThreads} threads, SIMD enabled`);
|
|
64
|
+
} else {
|
|
65
|
+
console.warn('[Parakeet.js] SharedArrayBuffer not available - using single-threaded WASM');
|
|
66
|
+
ort.env.wasm.numThreads = 1;
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
// Enable other WASM optimizations
|
|
70
|
+
ort.env.wasm.proxy = false; // Direct execution for better performance
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
if (backend === 'webgpu') {
|
|
74
|
+
// Check WebGPU support properly
|
|
75
|
+
const webgpuSupported = 'gpu' in navigator;
|
|
76
|
+
console.log(`[Parakeet.js] WebGPU supported: ${webgpuSupported}`);
|
|
77
|
+
|
|
78
|
+
if (webgpuSupported) {
|
|
79
|
+
try {
|
|
80
|
+
// In newer versions of ONNX Runtime Web, WebGPU initialization is automatic
|
|
81
|
+
// No need to call ort.env.webgpu.init() manually
|
|
82
|
+
console.log('[Parakeet.js] WebGPU will be initialized automatically when creating session');
|
|
83
|
+
} catch (error) {
|
|
84
|
+
console.warn('[Parakeet.js] WebGPU initialization failed:', error);
|
|
85
|
+
console.warn('[Parakeet.js] Falling back to WASM');
|
|
86
|
+
backend = 'wasm';
|
|
87
|
+
}
|
|
88
|
+
} else {
|
|
89
|
+
console.warn('[Parakeet.js] WebGPU not supported โ falling back to WASM');
|
|
90
|
+
backend = 'wasm';
|
|
91
|
+
}
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
// Store the final backend choice for use in model selection
|
|
95
|
+
ort._selectedBackend = backend;
|
|
96
|
+
|
|
97
|
+
// Expose ort globally so other modules (like SileroVAD) can use the same configured instance
|
|
98
|
+
if (typeof globalThis !== 'undefined') {
|
|
99
|
+
globalThis.ort = ort;
|
|
100
|
+
}
|
|
101
|
+
if (typeof self !== 'undefined') {
|
|
102
|
+
self.ort = ort;
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
// Return the ort module for use in creating sessions and tensors
|
|
106
|
+
return ort;
|
|
99
107
|
}
|
package/src/hub.js
CHANGED
|
@@ -3,6 +3,8 @@
|
|
|
3
3
|
* Downloads models from HF and caches them in browser storage.
|
|
4
4
|
*/
|
|
5
5
|
|
|
6
|
+
import { MODELS, getModelConfig } from './models.js';
|
|
7
|
+
|
|
6
8
|
const DB_NAME = 'parakeet-cache-db';
|
|
7
9
|
const STORE_NAME = 'file-store';
|
|
8
10
|
let dbPromise = null;
|
|
@@ -164,17 +166,24 @@ export async function getModelText(repoId, filename, options = {}) {
|
|
|
164
166
|
|
|
165
167
|
/**
|
|
166
168
|
* Convenience function to get all Parakeet model files for a given architecture.
|
|
167
|
-
* @param {string}
|
|
169
|
+
* @param {string} repoIdOrModelKey HF repo (e.g., 'nvidia/parakeet-tdt-1.1b') or model key (e.g., 'parakeet-tdt-0.6b-v3')
|
|
168
170
|
* @param {Object} [options]
|
|
169
171
|
* @param {('int8'|'fp32')} [options.encoderQuant='int8'] Encoder quantization
|
|
170
172
|
* @param {('int8'|'fp32')} [options.decoderQuant='int8'] Decoder quantization
|
|
171
|
-
* @param {('nemo80'|'nemo128')} [options.preprocessor
|
|
173
|
+
* @param {('nemo80'|'nemo128')} [options.preprocessor] Preprocessor variant (auto-detected from model config if not specified)
|
|
172
174
|
* @param {('webgpu'|'wasm')} [options.backend='webgpu'] Backend to use
|
|
173
175
|
* @param {Function} [options.progress] Progress callback
|
|
174
|
-
* @returns {Promise<{urls: object, filenames: object}>}
|
|
176
|
+
* @returns {Promise<{urls: object, filenames: object, modelConfig: object|null}>}
|
|
175
177
|
*/
|
|
176
|
-
export async function getParakeetModel(
|
|
177
|
-
|
|
178
|
+
export async function getParakeetModel(repoIdOrModelKey, options = {}) {
|
|
179
|
+
// Resolve model key to repo ID and get config
|
|
180
|
+
const modelConfig = getModelConfig(repoIdOrModelKey);
|
|
181
|
+
const repoId = modelConfig?.repoId || repoIdOrModelKey;
|
|
182
|
+
|
|
183
|
+
// Use model config defaults if available
|
|
184
|
+
const defaultPreprocessor = modelConfig?.preprocessor || 'nemo128';
|
|
185
|
+
|
|
186
|
+
const { encoderQuant = 'int8', decoderQuant = 'int8', preprocessor = defaultPreprocessor, backend = 'webgpu', progress } = options;
|
|
178
187
|
|
|
179
188
|
// Decide quantisation per component
|
|
180
189
|
let encoderQ = encoderQuant;
|
|
@@ -215,7 +224,8 @@ export async function getParakeetModel(repoId, options = {}) {
|
|
|
215
224
|
encoder: encoderName,
|
|
216
225
|
decoder: decoderName
|
|
217
226
|
},
|
|
218
|
-
quantisation: { encoder: encoderQ, decoder: decoderQ }
|
|
227
|
+
quantisation: { encoder: encoderQ, decoder: decoderQ },
|
|
228
|
+
modelConfig: modelConfig || null, // Include model config for downstream use
|
|
219
229
|
};
|
|
220
230
|
|
|
221
231
|
for (const { key, name } of filesToGet) {
|
package/src/index.js
CHANGED
|
@@ -1,29 +1,37 @@
|
|
|
1
|
-
export { ParakeetModel } from './parakeet.js';
|
|
2
|
-
export { getModelFile, getModelText, getParakeetModel } from './hub.js';
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
*
|
|
7
|
-
*
|
|
8
|
-
*
|
|
9
|
-
*
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
*
|
|
19
|
-
*
|
|
20
|
-
*
|
|
21
|
-
*
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
1
|
+
export { ParakeetModel } from './parakeet.js';
|
|
2
|
+
export { getModelFile, getModelText, getParakeetModel } from './hub.js';
|
|
3
|
+
export { MODELS, LANGUAGE_NAMES, DEFAULT_MODEL, getModelConfig, getModelKeyFromRepoId, supportsLanguage, listModels, getLanguageName } from './models.js';
|
|
4
|
+
|
|
5
|
+
/**
|
|
6
|
+
* Convenience factory to load from a local path.
|
|
7
|
+
*
|
|
8
|
+
* Example:
|
|
9
|
+
* import { fromUrls } from 'parakeet.js';
|
|
10
|
+
* const model = await fromUrls({ ... });
|
|
11
|
+
*/
|
|
12
|
+
export async function fromUrls(cfg) {
|
|
13
|
+
const { ParakeetModel } = await import('./parakeet.js');
|
|
14
|
+
return ParakeetModel.fromUrls(cfg);
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
/**
|
|
18
|
+
* Convenience factory to load from HuggingFace Hub.
|
|
19
|
+
*
|
|
20
|
+
* Example:
|
|
21
|
+
* import { fromHub } from 'parakeet.js';
|
|
22
|
+
* const model = await fromHub('nvidia/parakeet-tdt-1.1b', { quantization: 'int8' });
|
|
23
|
+
*
|
|
24
|
+
* // Or use a model key for known models:
|
|
25
|
+
* const model = await fromHub('parakeet-tdt-0.6b-v3', { quantization: 'int8' });
|
|
26
|
+
*/
|
|
27
|
+
export async function fromHub(repoIdOrModelKey, options = {}) {
|
|
28
|
+
const { getParakeetModel } = await import('./hub.js');
|
|
29
|
+
const { ParakeetModel } = await import('./parakeet.js');
|
|
30
|
+
const { MODELS } = await import('./models.js');
|
|
31
|
+
|
|
32
|
+
// Resolve model key to repo ID if needed
|
|
33
|
+
const repoId = MODELS[repoIdOrModelKey]?.repoId || repoIdOrModelKey;
|
|
34
|
+
|
|
35
|
+
const urls = await getParakeetModel(repoId, options);
|
|
36
|
+
return ParakeetModel.fromUrls({ ...urls, ...options });
|
|
29
37
|
}
|