parakeet.js 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +210 -0
- package/examples/react-demo/index.html +12 -0
- package/examples/react-demo/package.json +20 -0
- package/examples/react-demo/src/App.css +134 -0
- package/examples/react-demo/src/App.jsx +327 -0
- package/examples/react-demo/src/main.jsx +6 -0
- package/examples/react-demo/vite.config.js +41 -0
- package/package.json +30 -0
- package/src/backend.js +99 -0
- package/src/hub.js +242 -0
- package/src/index.js +29 -0
- package/src/parakeet.js +481 -0
- package/src/preprocessor.js +69 -0
- package/src/tokenizer.js +54 -0
package/src/hub.js
ADDED
|
@@ -0,0 +1,242 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Simplified HuggingFace Hub utilities for parakeet.js
|
|
3
|
+
* Downloads models from HF and caches them in browser storage.
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
const DB_NAME = 'parakeet-cache-db';
|
|
7
|
+
const STORE_NAME = 'file-store';
|
|
8
|
+
let dbPromise = null;
|
|
9
|
+
|
|
10
|
+
// Cache for repo file listings so we only hit the HF API once per page load
|
|
11
|
+
const repoFileCache = new Map();
|
|
12
|
+
|
|
13
|
+
async function listRepoFiles(repoId, revision = 'main') {
|
|
14
|
+
const cacheKey = `${repoId}@${revision}`;
|
|
15
|
+
if (repoFileCache.has(cacheKey)) return repoFileCache.get(cacheKey);
|
|
16
|
+
|
|
17
|
+
const url = `https://huggingface.co/api/models/${repoId}?revision=${revision}`;
|
|
18
|
+
try {
|
|
19
|
+
const resp = await fetch(url);
|
|
20
|
+
if (!resp.ok) throw new Error(`Failed to list repo files: ${resp.status}`);
|
|
21
|
+
const json = await resp.json();
|
|
22
|
+
const files = json.siblings?.map(s => s.rfilename) || [];
|
|
23
|
+
repoFileCache.set(cacheKey, files);
|
|
24
|
+
return files;
|
|
25
|
+
} catch (err) {
|
|
26
|
+
console.warn('[Hub] Could not fetch repo file list – falling back to optimistic fetch', err);
|
|
27
|
+
// Return empty list so caller behaves like old code (may attempt fetch and catch 404)
|
|
28
|
+
repoFileCache.set(cacheKey, []);
|
|
29
|
+
return [];
|
|
30
|
+
}
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
function getDb() {
|
|
34
|
+
if (!dbPromise) {
|
|
35
|
+
dbPromise = new Promise((resolve, reject) => {
|
|
36
|
+
const request = indexedDB.open(DB_NAME, 1);
|
|
37
|
+
request.onerror = () => reject("Error opening IndexedDB");
|
|
38
|
+
request.onsuccess = () => resolve(request.result);
|
|
39
|
+
request.onupgradeneeded = (event) => {
|
|
40
|
+
const db = event.target.result;
|
|
41
|
+
if (!db.objectStoreNames.contains(STORE_NAME)) {
|
|
42
|
+
db.createObjectStore(STORE_NAME);
|
|
43
|
+
}
|
|
44
|
+
};
|
|
45
|
+
});
|
|
46
|
+
}
|
|
47
|
+
return dbPromise;
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
async function getFileFromDb(key) {
|
|
51
|
+
const db = await getDb();
|
|
52
|
+
return new Promise((resolve, reject) => {
|
|
53
|
+
const transaction = db.transaction([STORE_NAME], 'readonly');
|
|
54
|
+
const store = transaction.objectStore(STORE_NAME);
|
|
55
|
+
const request = store.get(key);
|
|
56
|
+
request.onerror = () => reject("Error reading from DB");
|
|
57
|
+
request.onsuccess = () => resolve(request.result);
|
|
58
|
+
});
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
async function saveFileToDb(key, blob) {
|
|
62
|
+
const db = await getDb();
|
|
63
|
+
return new Promise((resolve, reject) => {
|
|
64
|
+
const transaction = db.transaction([STORE_NAME], 'readwrite');
|
|
65
|
+
const store = transaction.objectStore(STORE_NAME);
|
|
66
|
+
const request = store.put(blob, key);
|
|
67
|
+
request.onerror = () => reject("Error writing to DB");
|
|
68
|
+
request.onsuccess = () => resolve(request.result);
|
|
69
|
+
});
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
/**
|
|
73
|
+
* Download a file from HuggingFace Hub with caching support.
|
|
74
|
+
* @param {string} repoId Model repo ID (e.g., 'nvidia/parakeet-tdt-1.1b')
|
|
75
|
+
* @param {string} filename File to download (e.g., 'encoder-model.onnx')
|
|
76
|
+
* @param {Object} [options]
|
|
77
|
+
* @param {string} [options.revision='main'] Git revision
|
|
78
|
+
* @param {string} [options.subfolder=''] Subfolder within repo
|
|
79
|
+
* @param {Function} [options.progress] Progress callback
|
|
80
|
+
* @returns {Promise<string>} URL to cached file (blob URL)
|
|
81
|
+
*/
|
|
82
|
+
export async function getModelFile(repoId, filename, options = {}) {
|
|
83
|
+
const { revision = 'main', subfolder = '', progress } = options;
|
|
84
|
+
|
|
85
|
+
// Construct HF URL
|
|
86
|
+
const baseUrl = 'https://huggingface.co';
|
|
87
|
+
const pathParts = [repoId, 'resolve', revision];
|
|
88
|
+
if (subfolder) pathParts.push(subfolder);
|
|
89
|
+
pathParts.push(filename);
|
|
90
|
+
const url = `${baseUrl}/${pathParts.join('/')}`;
|
|
91
|
+
|
|
92
|
+
// Check IndexedDB first
|
|
93
|
+
const cacheKey = `hf-${repoId}-${revision}-${subfolder}-${filename}`;
|
|
94
|
+
|
|
95
|
+
if (typeof indexedDB !== 'undefined') {
|
|
96
|
+
try {
|
|
97
|
+
const cachedBlob = await getFileFromDb(cacheKey);
|
|
98
|
+
if (cachedBlob) {
|
|
99
|
+
console.log(`[Hub] Using cached ${filename} from IndexedDB`);
|
|
100
|
+
return URL.createObjectURL(cachedBlob);
|
|
101
|
+
}
|
|
102
|
+
} catch (e) {
|
|
103
|
+
console.warn('[Hub] IndexedDB cache check failed:', e);
|
|
104
|
+
}
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
// Download from HF
|
|
108
|
+
console.log(`[Hub] Downloading ${filename} from ${repoId}...`);
|
|
109
|
+
const response = await fetch(url);
|
|
110
|
+
if (!response.ok) {
|
|
111
|
+
throw new Error(`Failed to download ${filename}: ${response.status} ${response.statusText}`);
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
// Stream with progress
|
|
115
|
+
const contentLength = response.headers.get('content-length');
|
|
116
|
+
const total = contentLength ? parseInt(contentLength) : 0;
|
|
117
|
+
let loaded = 0;
|
|
118
|
+
|
|
119
|
+
const reader = response.body.getReader();
|
|
120
|
+
const chunks = [];
|
|
121
|
+
|
|
122
|
+
while (true) {
|
|
123
|
+
const { done, value } = await reader.read();
|
|
124
|
+
if (done) break;
|
|
125
|
+
|
|
126
|
+
chunks.push(value);
|
|
127
|
+
loaded += value.length;
|
|
128
|
+
|
|
129
|
+
if (progress && total > 0) {
|
|
130
|
+
progress({ loaded, total, file: filename });
|
|
131
|
+
}
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
// Reconstruct blob
|
|
135
|
+
const blob = new Blob(chunks, { type: response.headers.get('content-type') || 'application/octet-stream' });
|
|
136
|
+
|
|
137
|
+
// Cache the blob in IndexedDB
|
|
138
|
+
if (typeof indexedDB !== 'undefined') {
|
|
139
|
+
try {
|
|
140
|
+
await saveFileToDb(cacheKey, blob);
|
|
141
|
+
console.log(`[Hub] Cached ${filename} in IndexedDB`);
|
|
142
|
+
} catch (e) {
|
|
143
|
+
console.warn('[Hub] Failed to cache in IndexedDB:', e);
|
|
144
|
+
}
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
return URL.createObjectURL(blob);
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
/**
|
|
151
|
+
* Download text file from HF Hub.
|
|
152
|
+
* @param {string} repoId Model repo ID
|
|
153
|
+
* @param {string} filename Text file to download
|
|
154
|
+
* @param {Object} [options] Same as getModelFile
|
|
155
|
+
* @returns {Promise<string>} File content as text
|
|
156
|
+
*/
|
|
157
|
+
export async function getModelText(repoId, filename, options = {}) {
|
|
158
|
+
const blobUrl = await getModelFile(repoId, filename, options);
|
|
159
|
+
const response = await fetch(blobUrl);
|
|
160
|
+
const text = await response.text();
|
|
161
|
+
URL.revokeObjectURL(blobUrl); // Clean up blob URL
|
|
162
|
+
return text;
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
/**
|
|
166
|
+
* Convenience function to get all Parakeet model files for a given architecture.
|
|
167
|
+
* @param {string} repoId HF repo (e.g., 'nvidia/parakeet-tdt-1.1b')
|
|
168
|
+
* @param {Object} [options]
|
|
169
|
+
* @param {('int8'|'fp32')} [options.quantization='int8'] Model quantization
|
|
170
|
+
* @param {('nemo80'|'nemo128')} [options.preprocessor='nemo128'] Preprocessor variant
|
|
171
|
+
* @param {('webgpu'|'wasm')} [options.backend='webgpu'] Backend to use
|
|
172
|
+
* @param {Function} [options.progress] Progress callback
|
|
173
|
+
* @returns {Promise<{urls: object, filenames: object}>}
|
|
174
|
+
*/
|
|
175
|
+
export async function getParakeetModel(repoId, options = {}) {
|
|
176
|
+
const { quantization = 'int8', preprocessor = 'nemo128', backend = 'webgpu', progress, decoderInt8 = false } = options;
|
|
177
|
+
|
|
178
|
+
// Decide quantisation per component
|
|
179
|
+
let encoderQ = quantization;
|
|
180
|
+
let decoderQ = quantization;
|
|
181
|
+
|
|
182
|
+
if (backend.startsWith('webgpu')) {
|
|
183
|
+
if (encoderQ === 'int8') {
|
|
184
|
+
console.log('[Hub] WebGPU encoder -> forcing fp32 for compatibility');
|
|
185
|
+
encoderQ = 'fp32';
|
|
186
|
+
}
|
|
187
|
+
if (decoderInt8) {
|
|
188
|
+
decoderQ = 'int8';
|
|
189
|
+
} else {
|
|
190
|
+
decoderQ = encoderQ; // same as encoder otherwise
|
|
191
|
+
}
|
|
192
|
+
}
|
|
193
|
+
|
|
194
|
+
const encoderSuffix = encoderQ === 'int8' ? '.int8.onnx' : '.onnx';
|
|
195
|
+
const decoderSuffix = decoderQ === 'int8' ? '.int8.onnx' : '.onnx';
|
|
196
|
+
|
|
197
|
+
const encoderName = `encoder-model${encoderSuffix}`;
|
|
198
|
+
const decoderName = `decoder_joint-model${decoderSuffix}`;
|
|
199
|
+
|
|
200
|
+
const repoFiles = await listRepoFiles(repoId, options.revision || 'main');
|
|
201
|
+
|
|
202
|
+
const filesToGet = [
|
|
203
|
+
{ key: 'encoderUrl', name: encoderName },
|
|
204
|
+
{ key: 'decoderUrl', name: decoderName },
|
|
205
|
+
{ key: 'tokenizerUrl', name: 'vocab.txt' },
|
|
206
|
+
{ key: 'preprocessorUrl', name: `${preprocessor}.onnx` },
|
|
207
|
+
];
|
|
208
|
+
|
|
209
|
+
// Conditionally include external data files only if they exist in the repo file list.
|
|
210
|
+
if (repoFiles.includes(`${encoderName}.data`)) {
|
|
211
|
+
filesToGet.push({ key: 'encoderDataUrl', name: `${encoderName}.data` });
|
|
212
|
+
}
|
|
213
|
+
|
|
214
|
+
if (repoFiles.includes(`${decoderName}.data`)) {
|
|
215
|
+
filesToGet.push({ key: 'decoderDataUrl', name: `${decoderName}.data` });
|
|
216
|
+
}
|
|
217
|
+
|
|
218
|
+
const results = {
|
|
219
|
+
urls: {},
|
|
220
|
+
filenames: {
|
|
221
|
+
encoder: encoderName,
|
|
222
|
+
decoder: decoderName
|
|
223
|
+
},
|
|
224
|
+
quantisation: { encoder: encoderQ, decoder: decoderQ }
|
|
225
|
+
};
|
|
226
|
+
|
|
227
|
+
for (const { key, name } of filesToGet) {
|
|
228
|
+
try {
|
|
229
|
+
const wrappedProgress = progress ? (p) => progress({ ...p, file: name }) : undefined;
|
|
230
|
+
results.urls[key] = await getModelFile(repoId, name, { ...options, progress: wrappedProgress });
|
|
231
|
+
} catch (e) {
|
|
232
|
+
if (key.endsWith('DataUrl')) {
|
|
233
|
+
console.warn(`[Hub] Optional external data file not found: ${name}. This is expected if the model is small.`);
|
|
234
|
+
results.urls[key] = null;
|
|
235
|
+
} else {
|
|
236
|
+
throw e;
|
|
237
|
+
}
|
|
238
|
+
}
|
|
239
|
+
}
|
|
240
|
+
|
|
241
|
+
return results;
|
|
242
|
+
}
|
package/src/index.js
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
export { ParakeetModel } from './parakeet.js';
|
|
2
|
+
export { getModelFile, getModelText, getParakeetModel } from './hub.js';
|
|
3
|
+
|
|
4
|
+
/**
|
|
5
|
+
* Convenience factory to load from a local path.
|
|
6
|
+
*
|
|
7
|
+
* Example:
|
|
8
|
+
* import { fromUrls } from 'parakeet.js';
|
|
9
|
+
* const model = await fromUrls({ ... });
|
|
10
|
+
*/
|
|
11
|
+
export async function fromUrls(cfg) {
|
|
12
|
+
const { ParakeetModel } = await import('./parakeet.js');
|
|
13
|
+
return ParakeetModel.fromUrls(cfg);
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
/**
|
|
17
|
+
* Convenience factory to load from HuggingFace Hub.
|
|
18
|
+
*
|
|
19
|
+
* Example:
|
|
20
|
+
* import { fromHub } from 'parakeet.js';
|
|
21
|
+
* const model = await fromHub('nvidia/parakeet-tdt-1.1b', { quantization: 'int8' });
|
|
22
|
+
*/
|
|
23
|
+
export async function fromHub(repoId, options = {}) {
|
|
24
|
+
const { getParakeetModel } = await import('./hub.js');
|
|
25
|
+
const { ParakeetModel } = await import('./parakeet.js');
|
|
26
|
+
|
|
27
|
+
const urls = await getParakeetModel(repoId, options);
|
|
28
|
+
return ParakeetModel.fromUrls({ ...urls, ...options });
|
|
29
|
+
}
|