parakeet.js 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/hub.js ADDED
@@ -0,0 +1,242 @@
1
+ /**
2
+ * Simplified HuggingFace Hub utilities for parakeet.js
3
+ * Downloads models from HF and caches them in browser storage.
4
+ */
5
+
6
+ const DB_NAME = 'parakeet-cache-db';
7
+ const STORE_NAME = 'file-store';
8
+ let dbPromise = null;
9
+
10
+ // Cache for repo file listings so we only hit the HF API once per page load
11
+ const repoFileCache = new Map();
12
+
13
+ async function listRepoFiles(repoId, revision = 'main') {
14
+ const cacheKey = `${repoId}@${revision}`;
15
+ if (repoFileCache.has(cacheKey)) return repoFileCache.get(cacheKey);
16
+
17
+ const url = `https://huggingface.co/api/models/${repoId}?revision=${revision}`;
18
+ try {
19
+ const resp = await fetch(url);
20
+ if (!resp.ok) throw new Error(`Failed to list repo files: ${resp.status}`);
21
+ const json = await resp.json();
22
+ const files = json.siblings?.map(s => s.rfilename) || [];
23
+ repoFileCache.set(cacheKey, files);
24
+ return files;
25
+ } catch (err) {
26
+ console.warn('[Hub] Could not fetch repo file list – falling back to optimistic fetch', err);
27
+ // Return empty list so caller behaves like old code (may attempt fetch and catch 404)
28
+ repoFileCache.set(cacheKey, []);
29
+ return [];
30
+ }
31
+ }
32
+
33
+ function getDb() {
34
+ if (!dbPromise) {
35
+ dbPromise = new Promise((resolve, reject) => {
36
+ const request = indexedDB.open(DB_NAME, 1);
37
+ request.onerror = () => reject("Error opening IndexedDB");
38
+ request.onsuccess = () => resolve(request.result);
39
+ request.onupgradeneeded = (event) => {
40
+ const db = event.target.result;
41
+ if (!db.objectStoreNames.contains(STORE_NAME)) {
42
+ db.createObjectStore(STORE_NAME);
43
+ }
44
+ };
45
+ });
46
+ }
47
+ return dbPromise;
48
+ }
49
+
50
+ async function getFileFromDb(key) {
51
+ const db = await getDb();
52
+ return new Promise((resolve, reject) => {
53
+ const transaction = db.transaction([STORE_NAME], 'readonly');
54
+ const store = transaction.objectStore(STORE_NAME);
55
+ const request = store.get(key);
56
+ request.onerror = () => reject("Error reading from DB");
57
+ request.onsuccess = () => resolve(request.result);
58
+ });
59
+ }
60
+
61
+ async function saveFileToDb(key, blob) {
62
+ const db = await getDb();
63
+ return new Promise((resolve, reject) => {
64
+ const transaction = db.transaction([STORE_NAME], 'readwrite');
65
+ const store = transaction.objectStore(STORE_NAME);
66
+ const request = store.put(blob, key);
67
+ request.onerror = () => reject("Error writing to DB");
68
+ request.onsuccess = () => resolve(request.result);
69
+ });
70
+ }
71
+
72
+ /**
73
+ * Download a file from HuggingFace Hub with caching support.
74
+ * @param {string} repoId Model repo ID (e.g., 'nvidia/parakeet-tdt-1.1b')
75
+ * @param {string} filename File to download (e.g., 'encoder-model.onnx')
76
+ * @param {Object} [options]
77
+ * @param {string} [options.revision='main'] Git revision
78
+ * @param {string} [options.subfolder=''] Subfolder within repo
79
+ * @param {Function} [options.progress] Progress callback
80
+ * @returns {Promise<string>} URL to cached file (blob URL)
81
+ */
82
+ export async function getModelFile(repoId, filename, options = {}) {
83
+ const { revision = 'main', subfolder = '', progress } = options;
84
+
85
+ // Construct HF URL
86
+ const baseUrl = 'https://huggingface.co';
87
+ const pathParts = [repoId, 'resolve', revision];
88
+ if (subfolder) pathParts.push(subfolder);
89
+ pathParts.push(filename);
90
+ const url = `${baseUrl}/${pathParts.join('/')}`;
91
+
92
+ // Check IndexedDB first
93
+ const cacheKey = `hf-${repoId}-${revision}-${subfolder}-${filename}`;
94
+
95
+ if (typeof indexedDB !== 'undefined') {
96
+ try {
97
+ const cachedBlob = await getFileFromDb(cacheKey);
98
+ if (cachedBlob) {
99
+ console.log(`[Hub] Using cached ${filename} from IndexedDB`);
100
+ return URL.createObjectURL(cachedBlob);
101
+ }
102
+ } catch (e) {
103
+ console.warn('[Hub] IndexedDB cache check failed:', e);
104
+ }
105
+ }
106
+
107
+ // Download from HF
108
+ console.log(`[Hub] Downloading ${filename} from ${repoId}...`);
109
+ const response = await fetch(url);
110
+ if (!response.ok) {
111
+ throw new Error(`Failed to download ${filename}: ${response.status} ${response.statusText}`);
112
+ }
113
+
114
+ // Stream with progress
115
+ const contentLength = response.headers.get('content-length');
116
+ const total = contentLength ? parseInt(contentLength) : 0;
117
+ let loaded = 0;
118
+
119
+ const reader = response.body.getReader();
120
+ const chunks = [];
121
+
122
+ while (true) {
123
+ const { done, value } = await reader.read();
124
+ if (done) break;
125
+
126
+ chunks.push(value);
127
+ loaded += value.length;
128
+
129
+ if (progress && total > 0) {
130
+ progress({ loaded, total, file: filename });
131
+ }
132
+ }
133
+
134
+ // Reconstruct blob
135
+ const blob = new Blob(chunks, { type: response.headers.get('content-type') || 'application/octet-stream' });
136
+
137
+ // Cache the blob in IndexedDB
138
+ if (typeof indexedDB !== 'undefined') {
139
+ try {
140
+ await saveFileToDb(cacheKey, blob);
141
+ console.log(`[Hub] Cached ${filename} in IndexedDB`);
142
+ } catch (e) {
143
+ console.warn('[Hub] Failed to cache in IndexedDB:', e);
144
+ }
145
+ }
146
+
147
+ return URL.createObjectURL(blob);
148
+ }
149
+
150
+ /**
151
+ * Download text file from HF Hub.
152
+ * @param {string} repoId Model repo ID
153
+ * @param {string} filename Text file to download
154
+ * @param {Object} [options] Same as getModelFile
155
+ * @returns {Promise<string>} File content as text
156
+ */
157
+ export async function getModelText(repoId, filename, options = {}) {
158
+ const blobUrl = await getModelFile(repoId, filename, options);
159
+ const response = await fetch(blobUrl);
160
+ const text = await response.text();
161
+ URL.revokeObjectURL(blobUrl); // Clean up blob URL
162
+ return text;
163
+ }
164
+
165
+ /**
166
+ * Convenience function to get all Parakeet model files for a given architecture.
167
+ * @param {string} repoId HF repo (e.g., 'nvidia/parakeet-tdt-1.1b')
168
+ * @param {Object} [options]
169
+ * @param {('int8'|'fp32')} [options.quantization='int8'] Model quantization
170
+ * @param {('nemo80'|'nemo128')} [options.preprocessor='nemo128'] Preprocessor variant
171
+ * @param {('webgpu'|'wasm')} [options.backend='webgpu'] Backend to use
172
+ * @param {Function} [options.progress] Progress callback
173
+ * @returns {Promise<{urls: object, filenames: object}>}
174
+ */
175
+ export async function getParakeetModel(repoId, options = {}) {
176
+ const { quantization = 'int8', preprocessor = 'nemo128', backend = 'webgpu', progress, decoderInt8 = false } = options;
177
+
178
+ // Decide quantisation per component
179
+ let encoderQ = quantization;
180
+ let decoderQ = quantization;
181
+
182
+ if (backend.startsWith('webgpu')) {
183
+ if (encoderQ === 'int8') {
184
+ console.log('[Hub] WebGPU encoder -> forcing fp32 for compatibility');
185
+ encoderQ = 'fp32';
186
+ }
187
+ if (decoderInt8) {
188
+ decoderQ = 'int8';
189
+ } else {
190
+ decoderQ = encoderQ; // same as encoder otherwise
191
+ }
192
+ }
193
+
194
+ const encoderSuffix = encoderQ === 'int8' ? '.int8.onnx' : '.onnx';
195
+ const decoderSuffix = decoderQ === 'int8' ? '.int8.onnx' : '.onnx';
196
+
197
+ const encoderName = `encoder-model${encoderSuffix}`;
198
+ const decoderName = `decoder_joint-model${decoderSuffix}`;
199
+
200
+ const repoFiles = await listRepoFiles(repoId, options.revision || 'main');
201
+
202
+ const filesToGet = [
203
+ { key: 'encoderUrl', name: encoderName },
204
+ { key: 'decoderUrl', name: decoderName },
205
+ { key: 'tokenizerUrl', name: 'vocab.txt' },
206
+ { key: 'preprocessorUrl', name: `${preprocessor}.onnx` },
207
+ ];
208
+
209
+ // Conditionally include external data files only if they exist in the repo file list.
210
+ if (repoFiles.includes(`${encoderName}.data`)) {
211
+ filesToGet.push({ key: 'encoderDataUrl', name: `${encoderName}.data` });
212
+ }
213
+
214
+ if (repoFiles.includes(`${decoderName}.data`)) {
215
+ filesToGet.push({ key: 'decoderDataUrl', name: `${decoderName}.data` });
216
+ }
217
+
218
+ const results = {
219
+ urls: {},
220
+ filenames: {
221
+ encoder: encoderName,
222
+ decoder: decoderName
223
+ },
224
+ quantisation: { encoder: encoderQ, decoder: decoderQ }
225
+ };
226
+
227
+ for (const { key, name } of filesToGet) {
228
+ try {
229
+ const wrappedProgress = progress ? (p) => progress({ ...p, file: name }) : undefined;
230
+ results.urls[key] = await getModelFile(repoId, name, { ...options, progress: wrappedProgress });
231
+ } catch (e) {
232
+ if (key.endsWith('DataUrl')) {
233
+ console.warn(`[Hub] Optional external data file not found: ${name}. This is expected if the model is small.`);
234
+ results.urls[key] = null;
235
+ } else {
236
+ throw e;
237
+ }
238
+ }
239
+ }
240
+
241
+ return results;
242
+ }
package/src/index.js ADDED
@@ -0,0 +1,29 @@
1
+ export { ParakeetModel } from './parakeet.js';
2
+ export { getModelFile, getModelText, getParakeetModel } from './hub.js';
3
+
4
+ /**
5
+ * Convenience factory to load from a local path.
6
+ *
7
+ * Example:
8
+ * import { fromUrls } from 'parakeet.js';
9
+ * const model = await fromUrls({ ... });
10
+ */
11
+ export async function fromUrls(cfg) {
12
+ const { ParakeetModel } = await import('./parakeet.js');
13
+ return ParakeetModel.fromUrls(cfg);
14
+ }
15
+
16
+ /**
17
+ * Convenience factory to load from HuggingFace Hub.
18
+ *
19
+ * Example:
20
+ * import { fromHub } from 'parakeet.js';
21
+ * const model = await fromHub('nvidia/parakeet-tdt-1.1b', { quantization: 'int8' });
22
+ */
23
+ export async function fromHub(repoId, options = {}) {
24
+ const { getParakeetModel } = await import('./hub.js');
25
+ const { ParakeetModel } = await import('./parakeet.js');
26
+
27
+ const urls = await getParakeetModel(repoId, options);
28
+ return ParakeetModel.fromUrls({ ...urls, ...options });
29
+ }