parakeet.js 0.0.2 → 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (160) hide show
  1. package/.gitmodules +3 -0
  2. package/README.md +240 -239
  3. package/examples/hf-spaces-demo/README.md +6 -9
  4. package/examples/hf-spaces-demo/package.json +1 -1
  5. package/examples/hf-spaces-demo/src/App.js +307 -316
  6. package/examples/react-demo/package.json +19 -19
  7. package/examples/react-demo/src/App.jsx +324 -326
  8. package/examples/react-demo-dev/src/App.jsx +23 -24
  9. package/package.json +1 -1
  10. package/publish.ps1 +65 -0
  11. package/src/hub.js +235 -241
  12. package/src/parakeet.js +15 -8
  13. package/src/preprocessor.js +75 -68
  14. package/docs/parakeet-transformers-js/.gitattributes +0 -2
  15. package/docs/parakeet-transformers-js/.prettierignore +0 -8
  16. package/docs/parakeet-transformers-js/.prettierrc +0 -10
  17. package/docs/parakeet-transformers-js/.tmp_features.json +0 -1
  18. package/docs/parakeet-transformers-js/LICENSE +0 -202
  19. package/docs/parakeet-transformers-js/README.md +0 -448
  20. package/docs/parakeet-transformers-js/assets/nemo128.onnx +0 -0
  21. package/docs/parakeet-transformers-js/assets/nemo80.onnx +0 -0
  22. package/docs/parakeet-transformers-js/debug_test.js +0 -84
  23. package/docs/parakeet-transformers-js/dev/inspect_decoder.cjs +0 -9
  24. package/docs/parakeet-transformers-js/dev/inspect_joiner.cjs +0 -9
  25. package/docs/parakeet-transformers-js/dev/js_step_by_step.js +0 -249
  26. package/docs/parakeet-transformers-js/dev/parakeet_cli.js +0 -91
  27. package/docs/parakeet-transformers-js/jest.config.mjs +0 -194
  28. package/docs/parakeet-transformers-js/js_preprocessing.json +0 -225
  29. package/docs/parakeet-transformers-js/js_step_by_step.json +0 -837
  30. package/docs/parakeet-transformers-js/js_step_by_step_v2.json +0 -450
  31. package/docs/parakeet-transformers-js/js_step_by_step_v3.json +0 -450
  32. package/docs/parakeet-transformers-js/js_steps.json +0 -821
  33. package/docs/parakeet-transformers-js/package-lock.json +0 -12251
  34. package/docs/parakeet-transformers-js/package.json +0 -96
  35. package/docs/parakeet-transformers-js/src/audio_features.js +0 -178
  36. package/docs/parakeet-transformers-js/src/backends/onnx.js +0 -210
  37. package/docs/parakeet-transformers-js/src/base/feature_extraction_utils.js +0 -54
  38. package/docs/parakeet-transformers-js/src/base/image_processors_utils.js +0 -1105
  39. package/docs/parakeet-transformers-js/src/base/processing_utils.js +0 -173
  40. package/docs/parakeet-transformers-js/src/configs.js +0 -455
  41. package/docs/parakeet-transformers-js/src/env.js +0 -167
  42. package/docs/parakeet-transformers-js/src/generation/configuration_utils.js +0 -388
  43. package/docs/parakeet-transformers-js/src/generation/logits_process.js +0 -727
  44. package/docs/parakeet-transformers-js/src/generation/logits_sampler.js +0 -204
  45. package/docs/parakeet-transformers-js/src/generation/parameters.js +0 -35
  46. package/docs/parakeet-transformers-js/src/generation/stopping_criteria.js +0 -156
  47. package/docs/parakeet-transformers-js/src/generation/streamers.js +0 -225
  48. package/docs/parakeet-transformers-js/src/models/audio_spectrogram_transformer/feature_extraction_audio_spectrogram_transformer.js +0 -85
  49. package/docs/parakeet-transformers-js/src/models/auto/feature_extraction_auto.js +0 -25
  50. package/docs/parakeet-transformers-js/src/models/auto/image_processing_auto.js +0 -29
  51. package/docs/parakeet-transformers-js/src/models/auto/processing_auto.js +0 -85
  52. package/docs/parakeet-transformers-js/src/models/beit/image_processing_beit.js +0 -5
  53. package/docs/parakeet-transformers-js/src/models/bit/image_processing_bit.js +0 -5
  54. package/docs/parakeet-transformers-js/src/models/chinese_clip/image_processing_chinese_clip.js +0 -5
  55. package/docs/parakeet-transformers-js/src/models/clap/feature_extraction_clap.js +0 -159
  56. package/docs/parakeet-transformers-js/src/models/clip/image_processing_clip.js +0 -6
  57. package/docs/parakeet-transformers-js/src/models/convnext/image_processing_convnext.js +0 -46
  58. package/docs/parakeet-transformers-js/src/models/dac/feature_extraction_dac.js +0 -3
  59. package/docs/parakeet-transformers-js/src/models/deit/image_processing_deit.js +0 -6
  60. package/docs/parakeet-transformers-js/src/models/detr/image_processing_detr.js +0 -52
  61. package/docs/parakeet-transformers-js/src/models/donut/image_processing_donut.js +0 -31
  62. package/docs/parakeet-transformers-js/src/models/dpt/image_processing_dpt.js +0 -6
  63. package/docs/parakeet-transformers-js/src/models/efficientnet/image_processing_efficientnet.js +0 -14
  64. package/docs/parakeet-transformers-js/src/models/encodec/feature_extraction_encodec.js +0 -32
  65. package/docs/parakeet-transformers-js/src/models/feature_extractors.js +0 -17
  66. package/docs/parakeet-transformers-js/src/models/florence2/processing_florence2.js +0 -131
  67. package/docs/parakeet-transformers-js/src/models/gemma3n/feature_extraction_gemma3n.js +0 -97
  68. package/docs/parakeet-transformers-js/src/models/gemma3n/processing_gemma3n.js +0 -74
  69. package/docs/parakeet-transformers-js/src/models/glpn/image_processing_glpn.js +0 -5
  70. package/docs/parakeet-transformers-js/src/models/grounding_dino/image_processing_grounding_dino.js +0 -29
  71. package/docs/parakeet-transformers-js/src/models/grounding_dino/processing_grounding_dino.js +0 -101
  72. package/docs/parakeet-transformers-js/src/models/idefics3/image_processing_idefics3.js +0 -232
  73. package/docs/parakeet-transformers-js/src/models/idefics3/processing_idefics3.js +0 -136
  74. package/docs/parakeet-transformers-js/src/models/image_processors.js +0 -40
  75. package/docs/parakeet-transformers-js/src/models/janus/image_processing_janus.js +0 -27
  76. package/docs/parakeet-transformers-js/src/models/janus/processing_janus.js +0 -123
  77. package/docs/parakeet-transformers-js/src/models/jina_clip/image_processing_jina_clip.js +0 -26
  78. package/docs/parakeet-transformers-js/src/models/jina_clip/processing_jina_clip.js +0 -24
  79. package/docs/parakeet-transformers-js/src/models/llava/processing_llava.js +0 -44
  80. package/docs/parakeet-transformers-js/src/models/llava_onevision/image_processing_llava_onevision.js +0 -5
  81. package/docs/parakeet-transformers-js/src/models/mask2former/image_processing_mask2former.js +0 -5
  82. package/docs/parakeet-transformers-js/src/models/maskformer/image_processing_maskformer.js +0 -18
  83. package/docs/parakeet-transformers-js/src/models/mgp_str/processing_mgp_str.js +0 -172
  84. package/docs/parakeet-transformers-js/src/models/mobilenet_v1/image_processing_mobilenet_v1.js +0 -7
  85. package/docs/parakeet-transformers-js/src/models/mobilenet_v2/image_processing_mobilenet_v2.js +0 -7
  86. package/docs/parakeet-transformers-js/src/models/mobilenet_v3/image_processing_mobilenet_v3.js +0 -7
  87. package/docs/parakeet-transformers-js/src/models/mobilenet_v4/image_processing_mobilenet_v4.js +0 -7
  88. package/docs/parakeet-transformers-js/src/models/mobilevit/image_processing_mobilevit.js +0 -6
  89. package/docs/parakeet-transformers-js/src/models/moonshine/feature_extraction_moonshine.js +0 -26
  90. package/docs/parakeet-transformers-js/src/models/moonshine/processing_moonshine.js +0 -20
  91. package/docs/parakeet-transformers-js/src/models/nougat/image_processing_nougat.js +0 -5
  92. package/docs/parakeet-transformers-js/src/models/owlv2/image_processing_owlv2.js +0 -5
  93. package/docs/parakeet-transformers-js/src/models/owlvit/image_processing_owlvit.js +0 -12
  94. package/docs/parakeet-transformers-js/src/models/owlvit/processing_owlvit.js +0 -7
  95. package/docs/parakeet-transformers-js/src/models/paligemma/processing_paligemma.js +0 -83
  96. package/docs/parakeet-transformers-js/src/models/parakeet/feature_extraction_parakeet.js +0 -3
  97. package/docs/parakeet-transformers-js/src/models/parakeet/modeling_parakeet.js +0 -3
  98. package/docs/parakeet-transformers-js/src/models/parakeet/processing_parakeet.js +0 -3
  99. package/docs/parakeet-transformers-js/src/models/parakeet/tokenization_parakeet.js +0 -3
  100. package/docs/parakeet-transformers-js/src/models/phi3_v/image_processing_phi3_v.js +0 -163
  101. package/docs/parakeet-transformers-js/src/models/phi3_v/processing_phi3_v.js +0 -53
  102. package/docs/parakeet-transformers-js/src/models/processors.js +0 -22
  103. package/docs/parakeet-transformers-js/src/models/pvt/image_processing_pvt.js +0 -5
  104. package/docs/parakeet-transformers-js/src/models/pyannote/feature_extraction_pyannote.js +0 -85
  105. package/docs/parakeet-transformers-js/src/models/pyannote/processing_pyannote.js +0 -24
  106. package/docs/parakeet-transformers-js/src/models/qwen2_vl/image_processing_qwen2_vl.js +0 -52
  107. package/docs/parakeet-transformers-js/src/models/qwen2_vl/processing_qwen2_vl.js +0 -53
  108. package/docs/parakeet-transformers-js/src/models/rt_detr/image_processing_rt_detr.js +0 -12
  109. package/docs/parakeet-transformers-js/src/models/sam/image_processing_sam.js +0 -242
  110. package/docs/parakeet-transformers-js/src/models/sam/processing_sam.js +0 -20
  111. package/docs/parakeet-transformers-js/src/models/sapiens/image_processing_sapiens.js +0 -13
  112. package/docs/parakeet-transformers-js/src/models/seamless_m4t/feature_extraction_seamless_m4t.js +0 -175
  113. package/docs/parakeet-transformers-js/src/models/segformer/image_processing_segformer.js +0 -13
  114. package/docs/parakeet-transformers-js/src/models/siglip/image_processing_siglip.js +0 -5
  115. package/docs/parakeet-transformers-js/src/models/smolvlm/image_processing_smolvlm.js +0 -2
  116. package/docs/parakeet-transformers-js/src/models/smolvlm/processing_smolvlm.js +0 -2
  117. package/docs/parakeet-transformers-js/src/models/snac/feature_extraction_snac.js +0 -3
  118. package/docs/parakeet-transformers-js/src/models/speecht5/feature_extraction_speecht5.js +0 -4
  119. package/docs/parakeet-transformers-js/src/models/speecht5/processing_speecht5.js +0 -17
  120. package/docs/parakeet-transformers-js/src/models/swin2sr/image_processing_swin2sr.js +0 -24
  121. package/docs/parakeet-transformers-js/src/models/ultravox/processing_ultravox.js +0 -54
  122. package/docs/parakeet-transformers-js/src/models/vit/image_processing_vit.js +0 -7
  123. package/docs/parakeet-transformers-js/src/models/vitmatte/image_processing_vitmatte.js +0 -50
  124. package/docs/parakeet-transformers-js/src/models/vitpose/image_processing_vitpose.js +0 -89
  125. package/docs/parakeet-transformers-js/src/models/wav2vec2/feature_extraction_wav2vec2.js +0 -44
  126. package/docs/parakeet-transformers-js/src/models/wav2vec2/processing_wav2vec2.js +0 -17
  127. package/docs/parakeet-transformers-js/src/models/wav2vec2_with_lm/processing_wav2vec2_with_lm.js +0 -17
  128. package/docs/parakeet-transformers-js/src/models/wespeaker/feature_extraction_wespeaker.js +0 -95
  129. package/docs/parakeet-transformers-js/src/models/whisper/common_whisper.js +0 -157
  130. package/docs/parakeet-transformers-js/src/models/whisper/feature_extraction_whisper.js +0 -92
  131. package/docs/parakeet-transformers-js/src/models/whisper/generation_whisper.js +0 -89
  132. package/docs/parakeet-transformers-js/src/models/whisper/processing_whisper.js +0 -21
  133. package/docs/parakeet-transformers-js/src/models/yolos/image_processing_yolos.js +0 -12
  134. package/docs/parakeet-transformers-js/src/models.js +0 -8644
  135. package/docs/parakeet-transformers-js/src/ops/registry.js +0 -133
  136. package/docs/parakeet-transformers-js/src/ort_env.js +0 -8
  137. package/docs/parakeet-transformers-js/src/parakeet.js +0 -792
  138. package/docs/parakeet-transformers-js/src/pipelines.js +0 -3540
  139. package/docs/parakeet-transformers-js/src/processors.js +0 -16
  140. package/docs/parakeet-transformers-js/src/tokenizers.js +0 -4432
  141. package/docs/parakeet-transformers-js/src/transformers.js +0 -50
  142. package/docs/parakeet-transformers-js/src/utils/audio.js +0 -893
  143. package/docs/parakeet-transformers-js/src/utils/constants.js +0 -9
  144. package/docs/parakeet-transformers-js/src/utils/core.js +0 -259
  145. package/docs/parakeet-transformers-js/src/utils/data-structures.js +0 -574
  146. package/docs/parakeet-transformers-js/src/utils/devices.js +0 -22
  147. package/docs/parakeet-transformers-js/src/utils/dtypes.js +0 -63
  148. package/docs/parakeet-transformers-js/src/utils/generic.js +0 -35
  149. package/docs/parakeet-transformers-js/src/utils/hub.js +0 -780
  150. package/docs/parakeet-transformers-js/src/utils/image.js +0 -834
  151. package/docs/parakeet-transformers-js/src/utils/maths.js +0 -1061
  152. package/docs/parakeet-transformers-js/src/utils/tensor.js +0 -1539
  153. package/docs/parakeet-transformers-js/src/utils/video.js +0 -128
  154. package/docs/parakeet-transformers-js/test/decoder.test.js +0 -114
  155. package/docs/parakeet-transformers-js/test/encoder.test.js +0 -108
  156. package/docs/parakeet-transformers-js/test/preprocessor.test.js +0 -85
  157. package/docs/parakeet-transformers-js/test/tokenizer.test.js +0 -24
  158. package/docs/parakeet-transformers-js/test/transcribe.js +0 -89
  159. package/docs/parakeet-transformers-js/tsconfig.json +0 -21
  160. package/docs/parakeet-transformers-js/webpack.config.js +0 -223
package/src/hub.js CHANGED
@@ -1,242 +1,236 @@
1
- /**
2
- * Simplified HuggingFace Hub utilities for parakeet.js
3
- * Downloads models from HF and caches them in browser storage.
4
- */
5
-
6
- const DB_NAME = 'parakeet-cache-db';
7
- const STORE_NAME = 'file-store';
8
- let dbPromise = null;
9
-
10
- // Cache for repo file listings so we only hit the HF API once per page load
11
- const repoFileCache = new Map();
12
-
13
- async function listRepoFiles(repoId, revision = 'main') {
14
- const cacheKey = `${repoId}@${revision}`;
15
- if (repoFileCache.has(cacheKey)) return repoFileCache.get(cacheKey);
16
-
17
- const url = `https://huggingface.co/api/models/${repoId}?revision=${revision}`;
18
- try {
19
- const resp = await fetch(url);
20
- if (!resp.ok) throw new Error(`Failed to list repo files: ${resp.status}`);
21
- const json = await resp.json();
22
- const files = json.siblings?.map(s => s.rfilename) || [];
23
- repoFileCache.set(cacheKey, files);
24
- return files;
25
- } catch (err) {
26
- console.warn('[Hub] Could not fetch repo file list – falling back to optimistic fetch', err);
27
- // Return empty list so caller behaves like old code (may attempt fetch and catch 404)
28
- repoFileCache.set(cacheKey, []);
29
- return [];
30
- }
31
- }
32
-
33
- function getDb() {
34
- if (!dbPromise) {
35
- dbPromise = new Promise((resolve, reject) => {
36
- const request = indexedDB.open(DB_NAME, 1);
37
- request.onerror = () => reject("Error opening IndexedDB");
38
- request.onsuccess = () => resolve(request.result);
39
- request.onupgradeneeded = (event) => {
40
- const db = event.target.result;
41
- if (!db.objectStoreNames.contains(STORE_NAME)) {
42
- db.createObjectStore(STORE_NAME);
43
- }
44
- };
45
- });
46
- }
47
- return dbPromise;
48
- }
49
-
50
- async function getFileFromDb(key) {
51
- const db = await getDb();
52
- return new Promise((resolve, reject) => {
53
- const transaction = db.transaction([STORE_NAME], 'readonly');
54
- const store = transaction.objectStore(STORE_NAME);
55
- const request = store.get(key);
56
- request.onerror = () => reject("Error reading from DB");
57
- request.onsuccess = () => resolve(request.result);
58
- });
59
- }
60
-
61
- async function saveFileToDb(key, blob) {
62
- const db = await getDb();
63
- return new Promise((resolve, reject) => {
64
- const transaction = db.transaction([STORE_NAME], 'readwrite');
65
- const store = transaction.objectStore(STORE_NAME);
66
- const request = store.put(blob, key);
67
- request.onerror = () => reject("Error writing to DB");
68
- request.onsuccess = () => resolve(request.result);
69
- });
70
- }
71
-
72
- /**
73
- * Download a file from HuggingFace Hub with caching support.
74
- * @param {string} repoId Model repo ID (e.g., 'nvidia/parakeet-tdt-1.1b')
75
- * @param {string} filename File to download (e.g., 'encoder-model.onnx')
76
- * @param {Object} [options]
77
- * @param {string} [options.revision='main'] Git revision
78
- * @param {string} [options.subfolder=''] Subfolder within repo
79
- * @param {Function} [options.progress] Progress callback
80
- * @returns {Promise<string>} URL to cached file (blob URL)
81
- */
82
- export async function getModelFile(repoId, filename, options = {}) {
83
- const { revision = 'main', subfolder = '', progress } = options;
84
-
85
- // Construct HF URL
86
- const baseUrl = 'https://huggingface.co';
87
- const pathParts = [repoId, 'resolve', revision];
88
- if (subfolder) pathParts.push(subfolder);
89
- pathParts.push(filename);
90
- const url = `${baseUrl}/${pathParts.join('/')}`;
91
-
92
- // Check IndexedDB first
93
- const cacheKey = `hf-${repoId}-${revision}-${subfolder}-${filename}`;
94
-
95
- if (typeof indexedDB !== 'undefined') {
96
- try {
97
- const cachedBlob = await getFileFromDb(cacheKey);
98
- if (cachedBlob) {
99
- console.log(`[Hub] Using cached ${filename} from IndexedDB`);
100
- return URL.createObjectURL(cachedBlob);
101
- }
102
- } catch (e) {
103
- console.warn('[Hub] IndexedDB cache check failed:', e);
104
- }
105
- }
106
-
107
- // Download from HF
108
- console.log(`[Hub] Downloading ${filename} from ${repoId}...`);
109
- const response = await fetch(url);
110
- if (!response.ok) {
111
- throw new Error(`Failed to download ${filename}: ${response.status} ${response.statusText}`);
112
- }
113
-
114
- // Stream with progress
115
- const contentLength = response.headers.get('content-length');
116
- const total = contentLength ? parseInt(contentLength) : 0;
117
- let loaded = 0;
118
-
119
- const reader = response.body.getReader();
120
- const chunks = [];
121
-
122
- while (true) {
123
- const { done, value } = await reader.read();
124
- if (done) break;
125
-
126
- chunks.push(value);
127
- loaded += value.length;
128
-
129
- if (progress && total > 0) {
130
- progress({ loaded, total, file: filename });
131
- }
132
- }
133
-
134
- // Reconstruct blob
135
- const blob = new Blob(chunks, { type: response.headers.get('content-type') || 'application/octet-stream' });
136
-
137
- // Cache the blob in IndexedDB
138
- if (typeof indexedDB !== 'undefined') {
139
- try {
140
- await saveFileToDb(cacheKey, blob);
141
- console.log(`[Hub] Cached ${filename} in IndexedDB`);
142
- } catch (e) {
143
- console.warn('[Hub] Failed to cache in IndexedDB:', e);
144
- }
145
- }
146
-
147
- return URL.createObjectURL(blob);
148
- }
149
-
150
- /**
151
- * Download text file from HF Hub.
152
- * @param {string} repoId Model repo ID
153
- * @param {string} filename Text file to download
154
- * @param {Object} [options] Same as getModelFile
155
- * @returns {Promise<string>} File content as text
156
- */
157
- export async function getModelText(repoId, filename, options = {}) {
158
- const blobUrl = await getModelFile(repoId, filename, options);
159
- const response = await fetch(blobUrl);
160
- const text = await response.text();
161
- URL.revokeObjectURL(blobUrl); // Clean up blob URL
162
- return text;
163
- }
164
-
165
- /**
166
- * Convenience function to get all Parakeet model files for a given architecture.
167
- * @param {string} repoId HF repo (e.g., 'nvidia/parakeet-tdt-1.1b')
168
- * @param {Object} [options]
169
- * @param {('int8'|'fp32')} [options.quantization='int8'] Model quantization
170
- * @param {('nemo80'|'nemo128')} [options.preprocessor='nemo128'] Preprocessor variant
171
- * @param {('webgpu'|'wasm')} [options.backend='webgpu'] Backend to use
172
- * @param {Function} [options.progress] Progress callback
173
- * @returns {Promise<{urls: object, filenames: object}>}
174
- */
175
- export async function getParakeetModel(repoId, options = {}) {
176
- const { quantization = 'int8', preprocessor = 'nemo128', backend = 'webgpu', progress, decoderInt8 = false } = options;
177
-
178
- // Decide quantisation per component
179
- let encoderQ = quantization;
180
- let decoderQ = quantization;
181
-
182
- if (backend.startsWith('webgpu')) {
183
- if (encoderQ === 'int8') {
184
- console.log('[Hub] WebGPU encoder -> forcing fp32 for compatibility');
185
- encoderQ = 'fp32';
186
- }
187
- if (decoderInt8) {
188
- decoderQ = 'int8';
189
- } else {
190
- decoderQ = encoderQ; // same as encoder otherwise
191
- }
192
- }
193
-
194
- const encoderSuffix = encoderQ === 'int8' ? '.int8.onnx' : '.onnx';
195
- const decoderSuffix = decoderQ === 'int8' ? '.int8.onnx' : '.onnx';
196
-
197
- const encoderName = `encoder-model${encoderSuffix}`;
198
- const decoderName = `decoder_joint-model${decoderSuffix}`;
199
-
200
- const repoFiles = await listRepoFiles(repoId, options.revision || 'main');
201
-
202
- const filesToGet = [
203
- { key: 'encoderUrl', name: encoderName },
204
- { key: 'decoderUrl', name: decoderName },
205
- { key: 'tokenizerUrl', name: 'vocab.txt' },
206
- { key: 'preprocessorUrl', name: `${preprocessor}.onnx` },
207
- ];
208
-
209
- // Conditionally include external data files only if they exist in the repo file list.
210
- if (repoFiles.includes(`${encoderName}.data`)) {
211
- filesToGet.push({ key: 'encoderDataUrl', name: `${encoderName}.data` });
212
- }
213
-
214
- if (repoFiles.includes(`${decoderName}.data`)) {
215
- filesToGet.push({ key: 'decoderDataUrl', name: `${decoderName}.data` });
216
- }
217
-
218
- const results = {
219
- urls: {},
220
- filenames: {
221
- encoder: encoderName,
222
- decoder: decoderName
223
- },
224
- quantisation: { encoder: encoderQ, decoder: decoderQ }
225
- };
226
-
227
- for (const { key, name } of filesToGet) {
228
- try {
229
- const wrappedProgress = progress ? (p) => progress({ ...p, file: name }) : undefined;
230
- results.urls[key] = await getModelFile(repoId, name, { ...options, progress: wrappedProgress });
231
- } catch (e) {
232
- if (key.endsWith('DataUrl')) {
233
- console.warn(`[Hub] Optional external data file not found: ${name}. This is expected if the model is small.`);
234
- results.urls[key] = null;
235
- } else {
236
- throw e;
237
- }
238
- }
239
- }
240
-
241
- return results;
1
+ /**
2
+ * Simplified HuggingFace Hub utilities for parakeet.js
3
+ * Downloads models from HF and caches them in browser storage.
4
+ */
5
+
6
+ const DB_NAME = 'parakeet-cache-db';
7
+ const STORE_NAME = 'file-store';
8
+ let dbPromise = null;
9
+
10
+ // Cache for repo file listings so we only hit the HF API once per page load
11
+ const repoFileCache = new Map();
12
+
13
+ async function listRepoFiles(repoId, revision = 'main') {
14
+ const cacheKey = `${repoId}@${revision}`;
15
+ if (repoFileCache.has(cacheKey)) return repoFileCache.get(cacheKey);
16
+
17
+ const url = `https://huggingface.co/api/models/${repoId}?revision=${revision}`;
18
+ try {
19
+ const resp = await fetch(url);
20
+ if (!resp.ok) throw new Error(`Failed to list repo files: ${resp.status}`);
21
+ const json = await resp.json();
22
+ const files = json.siblings?.map(s => s.rfilename) || [];
23
+ repoFileCache.set(cacheKey, files);
24
+ return files;
25
+ } catch (err) {
26
+ console.warn('[Hub] Could not fetch repo file list – falling back to optimistic fetch', err);
27
+ // Return empty list so caller behaves like old code (may attempt fetch and catch 404)
28
+ repoFileCache.set(cacheKey, []);
29
+ return [];
30
+ }
31
+ }
32
+
33
+ function getDb() {
34
+ if (!dbPromise) {
35
+ dbPromise = new Promise((resolve, reject) => {
36
+ const request = indexedDB.open(DB_NAME, 1);
37
+ request.onerror = () => reject("Error opening IndexedDB");
38
+ request.onsuccess = () => resolve(request.result);
39
+ request.onupgradeneeded = (event) => {
40
+ const db = event.target.result;
41
+ if (!db.objectStoreNames.contains(STORE_NAME)) {
42
+ db.createObjectStore(STORE_NAME);
43
+ }
44
+ };
45
+ });
46
+ }
47
+ return dbPromise;
48
+ }
49
+
50
+ async function getFileFromDb(key) {
51
+ const db = await getDb();
52
+ return new Promise((resolve, reject) => {
53
+ const transaction = db.transaction([STORE_NAME], 'readonly');
54
+ const store = transaction.objectStore(STORE_NAME);
55
+ const request = store.get(key);
56
+ request.onerror = () => reject("Error reading from DB");
57
+ request.onsuccess = () => resolve(request.result);
58
+ });
59
+ }
60
+
61
+ async function saveFileToDb(key, blob) {
62
+ const db = await getDb();
63
+ return new Promise((resolve, reject) => {
64
+ const transaction = db.transaction([STORE_NAME], 'readwrite');
65
+ const store = transaction.objectStore(STORE_NAME);
66
+ const request = store.put(blob, key);
67
+ request.onerror = () => reject("Error writing to DB");
68
+ request.onsuccess = () => resolve(request.result);
69
+ });
70
+ }
71
+
72
+ /**
73
+ * Download a file from HuggingFace Hub with caching support.
74
+ * @param {string} repoId Model repo ID (e.g., 'nvidia/parakeet-tdt-1.1b')
75
+ * @param {string} filename File to download (e.g., 'encoder-model.onnx')
76
+ * @param {Object} [options]
77
+ * @param {string} [options.revision='main'] Git revision
78
+ * @param {string} [options.subfolder=''] Subfolder within repo
79
+ * @param {Function} [options.progress] Progress callback
80
+ * @returns {Promise<string>} URL to cached file (blob URL)
81
+ */
82
+ export async function getModelFile(repoId, filename, options = {}) {
83
+ const { revision = 'main', subfolder = '', progress } = options;
84
+
85
+ // Construct HF URL
86
+ const baseUrl = 'https://huggingface.co';
87
+ const pathParts = [repoId, 'resolve', revision];
88
+ if (subfolder) pathParts.push(subfolder);
89
+ pathParts.push(filename);
90
+ const url = `${baseUrl}/${pathParts.join('/')}`;
91
+
92
+ // Check IndexedDB first
93
+ const cacheKey = `hf-${repoId}-${revision}-${subfolder}-${filename}`;
94
+
95
+ if (typeof indexedDB !== 'undefined') {
96
+ try {
97
+ const cachedBlob = await getFileFromDb(cacheKey);
98
+ if (cachedBlob) {
99
+ console.log(`[Hub] Using cached ${filename} from IndexedDB`);
100
+ return URL.createObjectURL(cachedBlob);
101
+ }
102
+ } catch (e) {
103
+ console.warn('[Hub] IndexedDB cache check failed:', e);
104
+ }
105
+ }
106
+
107
+ // Download from HF
108
+ console.log(`[Hub] Downloading ${filename} from ${repoId}...`);
109
+ const response = await fetch(url);
110
+ if (!response.ok) {
111
+ throw new Error(`Failed to download ${filename}: ${response.status} ${response.statusText}`);
112
+ }
113
+
114
+ // Stream with progress
115
+ const contentLength = response.headers.get('content-length');
116
+ const total = contentLength ? parseInt(contentLength) : 0;
117
+ let loaded = 0;
118
+
119
+ const reader = response.body.getReader();
120
+ const chunks = [];
121
+
122
+ while (true) {
123
+ const { done, value } = await reader.read();
124
+ if (done) break;
125
+
126
+ chunks.push(value);
127
+ loaded += value.length;
128
+
129
+ if (progress && total > 0) {
130
+ progress({ loaded, total, file: filename });
131
+ }
132
+ }
133
+
134
+ // Reconstruct blob
135
+ const blob = new Blob(chunks, { type: response.headers.get('content-type') || 'application/octet-stream' });
136
+
137
+ // Cache the blob in IndexedDB
138
+ if (typeof indexedDB !== 'undefined') {
139
+ try {
140
+ await saveFileToDb(cacheKey, blob);
141
+ console.log(`[Hub] Cached ${filename} in IndexedDB`);
142
+ } catch (e) {
143
+ console.warn('[Hub] Failed to cache in IndexedDB:', e);
144
+ }
145
+ }
146
+
147
+ return URL.createObjectURL(blob);
148
+ }
149
+
150
+ /**
151
+ * Download text file from HF Hub.
152
+ * @param {string} repoId Model repo ID
153
+ * @param {string} filename Text file to download
154
+ * @param {Object} [options] Same as getModelFile
155
+ * @returns {Promise<string>} File content as text
156
+ */
157
+ export async function getModelText(repoId, filename, options = {}) {
158
+ const blobUrl = await getModelFile(repoId, filename, options);
159
+ const response = await fetch(blobUrl);
160
+ const text = await response.text();
161
+ URL.revokeObjectURL(blobUrl); // Clean up blob URL
162
+ return text;
163
+ }
164
+
165
+ /**
166
+ * Convenience function to get all Parakeet model files for a given architecture.
167
+ * @param {string} repoId HF repo (e.g., 'nvidia/parakeet-tdt-1.1b')
168
+ * @param {Object} [options]
169
+ * @param {('int8'|'fp32')} [options.encoderQuant='int8'] Encoder quantization
170
+ * @param {('int8'|'fp32')} [options.decoderQuant='int8'] Decoder quantization
171
+ * @param {('nemo80'|'nemo128')} [options.preprocessor='nemo128'] Preprocessor variant
172
+ * @param {('webgpu'|'wasm')} [options.backend='webgpu'] Backend to use
173
+ * @param {Function} [options.progress] Progress callback
174
+ * @returns {Promise<{urls: object, filenames: object}>}
175
+ */
176
+ export async function getParakeetModel(repoId, options = {}) {
177
+ const { encoderQuant = 'int8', decoderQuant = 'int8', preprocessor = 'nemo128', backend = 'webgpu', progress } = options;
178
+
179
+ // Decide quantisation per component
180
+ let encoderQ = encoderQuant;
181
+ let decoderQ = decoderQuant;
182
+
183
+ if (backend.startsWith('webgpu') && encoderQ === 'int8') {
184
+ console.warn('[Hub] Forcing encoder to fp32 on WebGPU (int8 unsupported)');
185
+ encoderQ = 'fp32';
186
+ }
187
+
188
+ const encoderSuffix = encoderQ === 'int8' ? '.int8.onnx' : '.onnx';
189
+ const decoderSuffix = decoderQ === 'int8' ? '.int8.onnx' : '.onnx';
190
+
191
+ const encoderName = `encoder-model${encoderSuffix}`;
192
+ const decoderName = `decoder_joint-model${decoderSuffix}`;
193
+
194
+ const repoFiles = await listRepoFiles(repoId, options.revision || 'main');
195
+
196
+ const filesToGet = [
197
+ { key: 'encoderUrl', name: encoderName },
198
+ { key: 'decoderUrl', name: decoderName },
199
+ { key: 'tokenizerUrl', name: 'vocab.txt' },
200
+ { key: 'preprocessorUrl', name: `${preprocessor}.onnx` },
201
+ ];
202
+
203
+ // Conditionally include external data files only if they exist in the repo file list.
204
+ if (repoFiles.includes(`${encoderName}.data`)) {
205
+ filesToGet.push({ key: 'encoderDataUrl', name: `${encoderName}.data` });
206
+ }
207
+
208
+ if (repoFiles.includes(`${decoderName}.data`)) {
209
+ filesToGet.push({ key: 'decoderDataUrl', name: `${decoderName}.data` });
210
+ }
211
+
212
+ const results = {
213
+ urls: {},
214
+ filenames: {
215
+ encoder: encoderName,
216
+ decoder: decoderName
217
+ },
218
+ quantisation: { encoder: encoderQ, decoder: decoderQ }
219
+ };
220
+
221
+ for (const { key, name } of filesToGet) {
222
+ try {
223
+ const wrappedProgress = progress ? (p) => progress({ ...p, file: name }) : undefined;
224
+ results.urls[key] = await getModelFile(repoId, name, { ...options, progress: wrappedProgress });
225
+ } catch (e) {
226
+ if (key.endsWith('DataUrl')) {
227
+ console.warn(`[Hub] Optional external data file not found: ${name}. This is expected if the model is small.`);
228
+ results.urls[key] = null;
229
+ } else {
230
+ throw e;
231
+ }
232
+ }
233
+ }
234
+
235
+ return results;
242
236
  }
package/src/parakeet.js CHANGED
@@ -63,8 +63,6 @@ export class ParakeetModel {
63
63
  verbose = false,
64
64
  enableProfiling = false,
65
65
  enableGraphCapture,
66
- decoderOnWasm = true,
67
- decoderInt8 = false,
68
66
  cpuThreads = undefined,
69
67
  } = cfg;
70
68
 
@@ -80,8 +78,15 @@ export class ParakeetModel {
80
78
  const ort = await initOrt({ backend: ortBackend, wasmPaths, numThreads: cpuThreads });
81
79
 
82
80
  // 2. Configure session options for better performance
83
- // Determine default graph-capture: only safe on pure WASM.
84
- const graphCaptureEnabled = enableGraphCapture ?? (backend === 'wasm');
81
+ // Graph-capture is beneficial only when every node runs on the same EP and
82
+ // ORT can fully record the graph (currently true only for a “strict”
83
+ // WebGPU session). We therefore enable it *only* when the caller passes
84
+ // `enableGraphCapture:true` **and** the selected backend is the strict
85
+ // WebGPU preset. In all other scenarios (hybrid WebGPU or pure WASM)
86
+ // it is forced off to avoid the “External buffer must be provided …”
87
+ // runtime error on recent ORT builds.
88
+ const graphCaptureEnabled = !!enableGraphCapture && backend === 'webgpu-strict';
89
+ const isFullWasm = backend === 'wasm';
85
90
 
86
91
  const baseSessionOptions = {
87
92
  executionProviders: [],
@@ -139,8 +144,10 @@ export class ParakeetModel {
139
144
  }];
140
145
  }
141
146
 
142
- // If requested, run decoder entirely on WASM to avoid per-step GPU overhead.
143
- if (backend === 'webgpu-hybrid' && decoderOnWasm) {
147
+ // In hybrid mode, the decoder is always run on WASM to avoid per-step
148
+ // stalls. In pure WASM mode, both EPs are WASM anyway.
149
+ if (backend.startsWith('webgpu')) {
150
+ // Force decoder to run on WASM
144
151
  decoderSessionOptions.executionProviders = ['wasm'];
145
152
  }
146
153
 
@@ -161,10 +168,10 @@ export class ParakeetModel {
161
168
  }
162
169
 
163
170
  const tokenizerPromise = ParakeetTokenizer.fromUrl(tokenizerUrl);
164
- const preprocPromise = Promise.resolve(new OnnxPreprocessor(preprocessorUrl, { backend, wasmPaths, enableProfiling, enableGraphCapture: graphCaptureEnabled, numThreads: cpuThreads }));
171
+ const preprocPromise = Promise.resolve(new OnnxPreprocessor(preprocessorUrl, { backend, wasmPaths, enableProfiling, enableGraphCapture: isFullWasm ? false : graphCaptureEnabled, numThreads: cpuThreads }));
165
172
 
166
173
  let encoderSession, joinerSession;
167
- if (backend === 'webgpu-hybrid' && decoderOnWasm) {
174
+ if (backend === 'webgpu-hybrid') {
168
175
  // avoid parallel create to prevent double initWasm race
169
176
  encoderSession = await createSession(encoderUrl, encoderSessionOptions);
170
177
  joinerSession = await createSession(decoderUrl, decoderSessionOptions);