parakeet.js 0.0.2 → 0.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.gitmodules +3 -0
- package/README.md +240 -239
- package/examples/hf-spaces-demo/README.md +6 -9
- package/examples/hf-spaces-demo/package.json +1 -1
- package/examples/hf-spaces-demo/src/App.js +307 -316
- package/examples/react-demo/package.json +19 -19
- package/examples/react-demo/src/App.jsx +324 -326
- package/examples/react-demo-dev/src/App.jsx +23 -24
- package/package.json +1 -1
- package/publish.ps1 +65 -0
- package/src/hub.js +235 -241
- package/src/parakeet.js +15 -8
- package/src/preprocessor.js +75 -68
- package/docs/parakeet-transformers-js/.gitattributes +0 -2
- package/docs/parakeet-transformers-js/.prettierignore +0 -8
- package/docs/parakeet-transformers-js/.prettierrc +0 -10
- package/docs/parakeet-transformers-js/.tmp_features.json +0 -1
- package/docs/parakeet-transformers-js/LICENSE +0 -202
- package/docs/parakeet-transformers-js/README.md +0 -448
- package/docs/parakeet-transformers-js/assets/nemo128.onnx +0 -0
- package/docs/parakeet-transformers-js/assets/nemo80.onnx +0 -0
- package/docs/parakeet-transformers-js/debug_test.js +0 -84
- package/docs/parakeet-transformers-js/dev/inspect_decoder.cjs +0 -9
- package/docs/parakeet-transformers-js/dev/inspect_joiner.cjs +0 -9
- package/docs/parakeet-transformers-js/dev/js_step_by_step.js +0 -249
- package/docs/parakeet-transformers-js/dev/parakeet_cli.js +0 -91
- package/docs/parakeet-transformers-js/jest.config.mjs +0 -194
- package/docs/parakeet-transformers-js/js_preprocessing.json +0 -225
- package/docs/parakeet-transformers-js/js_step_by_step.json +0 -837
- package/docs/parakeet-transformers-js/js_step_by_step_v2.json +0 -450
- package/docs/parakeet-transformers-js/js_step_by_step_v3.json +0 -450
- package/docs/parakeet-transformers-js/js_steps.json +0 -821
- package/docs/parakeet-transformers-js/package-lock.json +0 -12251
- package/docs/parakeet-transformers-js/package.json +0 -96
- package/docs/parakeet-transformers-js/src/audio_features.js +0 -178
- package/docs/parakeet-transformers-js/src/backends/onnx.js +0 -210
- package/docs/parakeet-transformers-js/src/base/feature_extraction_utils.js +0 -54
- package/docs/parakeet-transformers-js/src/base/image_processors_utils.js +0 -1105
- package/docs/parakeet-transformers-js/src/base/processing_utils.js +0 -173
- package/docs/parakeet-transformers-js/src/configs.js +0 -455
- package/docs/parakeet-transformers-js/src/env.js +0 -167
- package/docs/parakeet-transformers-js/src/generation/configuration_utils.js +0 -388
- package/docs/parakeet-transformers-js/src/generation/logits_process.js +0 -727
- package/docs/parakeet-transformers-js/src/generation/logits_sampler.js +0 -204
- package/docs/parakeet-transformers-js/src/generation/parameters.js +0 -35
- package/docs/parakeet-transformers-js/src/generation/stopping_criteria.js +0 -156
- package/docs/parakeet-transformers-js/src/generation/streamers.js +0 -225
- package/docs/parakeet-transformers-js/src/models/audio_spectrogram_transformer/feature_extraction_audio_spectrogram_transformer.js +0 -85
- package/docs/parakeet-transformers-js/src/models/auto/feature_extraction_auto.js +0 -25
- package/docs/parakeet-transformers-js/src/models/auto/image_processing_auto.js +0 -29
- package/docs/parakeet-transformers-js/src/models/auto/processing_auto.js +0 -85
- package/docs/parakeet-transformers-js/src/models/beit/image_processing_beit.js +0 -5
- package/docs/parakeet-transformers-js/src/models/bit/image_processing_bit.js +0 -5
- package/docs/parakeet-transformers-js/src/models/chinese_clip/image_processing_chinese_clip.js +0 -5
- package/docs/parakeet-transformers-js/src/models/clap/feature_extraction_clap.js +0 -159
- package/docs/parakeet-transformers-js/src/models/clip/image_processing_clip.js +0 -6
- package/docs/parakeet-transformers-js/src/models/convnext/image_processing_convnext.js +0 -46
- package/docs/parakeet-transformers-js/src/models/dac/feature_extraction_dac.js +0 -3
- package/docs/parakeet-transformers-js/src/models/deit/image_processing_deit.js +0 -6
- package/docs/parakeet-transformers-js/src/models/detr/image_processing_detr.js +0 -52
- package/docs/parakeet-transformers-js/src/models/donut/image_processing_donut.js +0 -31
- package/docs/parakeet-transformers-js/src/models/dpt/image_processing_dpt.js +0 -6
- package/docs/parakeet-transformers-js/src/models/efficientnet/image_processing_efficientnet.js +0 -14
- package/docs/parakeet-transformers-js/src/models/encodec/feature_extraction_encodec.js +0 -32
- package/docs/parakeet-transformers-js/src/models/feature_extractors.js +0 -17
- package/docs/parakeet-transformers-js/src/models/florence2/processing_florence2.js +0 -131
- package/docs/parakeet-transformers-js/src/models/gemma3n/feature_extraction_gemma3n.js +0 -97
- package/docs/parakeet-transformers-js/src/models/gemma3n/processing_gemma3n.js +0 -74
- package/docs/parakeet-transformers-js/src/models/glpn/image_processing_glpn.js +0 -5
- package/docs/parakeet-transformers-js/src/models/grounding_dino/image_processing_grounding_dino.js +0 -29
- package/docs/parakeet-transformers-js/src/models/grounding_dino/processing_grounding_dino.js +0 -101
- package/docs/parakeet-transformers-js/src/models/idefics3/image_processing_idefics3.js +0 -232
- package/docs/parakeet-transformers-js/src/models/idefics3/processing_idefics3.js +0 -136
- package/docs/parakeet-transformers-js/src/models/image_processors.js +0 -40
- package/docs/parakeet-transformers-js/src/models/janus/image_processing_janus.js +0 -27
- package/docs/parakeet-transformers-js/src/models/janus/processing_janus.js +0 -123
- package/docs/parakeet-transformers-js/src/models/jina_clip/image_processing_jina_clip.js +0 -26
- package/docs/parakeet-transformers-js/src/models/jina_clip/processing_jina_clip.js +0 -24
- package/docs/parakeet-transformers-js/src/models/llava/processing_llava.js +0 -44
- package/docs/parakeet-transformers-js/src/models/llava_onevision/image_processing_llava_onevision.js +0 -5
- package/docs/parakeet-transformers-js/src/models/mask2former/image_processing_mask2former.js +0 -5
- package/docs/parakeet-transformers-js/src/models/maskformer/image_processing_maskformer.js +0 -18
- package/docs/parakeet-transformers-js/src/models/mgp_str/processing_mgp_str.js +0 -172
- package/docs/parakeet-transformers-js/src/models/mobilenet_v1/image_processing_mobilenet_v1.js +0 -7
- package/docs/parakeet-transformers-js/src/models/mobilenet_v2/image_processing_mobilenet_v2.js +0 -7
- package/docs/parakeet-transformers-js/src/models/mobilenet_v3/image_processing_mobilenet_v3.js +0 -7
- package/docs/parakeet-transformers-js/src/models/mobilenet_v4/image_processing_mobilenet_v4.js +0 -7
- package/docs/parakeet-transformers-js/src/models/mobilevit/image_processing_mobilevit.js +0 -6
- package/docs/parakeet-transformers-js/src/models/moonshine/feature_extraction_moonshine.js +0 -26
- package/docs/parakeet-transformers-js/src/models/moonshine/processing_moonshine.js +0 -20
- package/docs/parakeet-transformers-js/src/models/nougat/image_processing_nougat.js +0 -5
- package/docs/parakeet-transformers-js/src/models/owlv2/image_processing_owlv2.js +0 -5
- package/docs/parakeet-transformers-js/src/models/owlvit/image_processing_owlvit.js +0 -12
- package/docs/parakeet-transformers-js/src/models/owlvit/processing_owlvit.js +0 -7
- package/docs/parakeet-transformers-js/src/models/paligemma/processing_paligemma.js +0 -83
- package/docs/parakeet-transformers-js/src/models/parakeet/feature_extraction_parakeet.js +0 -3
- package/docs/parakeet-transformers-js/src/models/parakeet/modeling_parakeet.js +0 -3
- package/docs/parakeet-transformers-js/src/models/parakeet/processing_parakeet.js +0 -3
- package/docs/parakeet-transformers-js/src/models/parakeet/tokenization_parakeet.js +0 -3
- package/docs/parakeet-transformers-js/src/models/phi3_v/image_processing_phi3_v.js +0 -163
- package/docs/parakeet-transformers-js/src/models/phi3_v/processing_phi3_v.js +0 -53
- package/docs/parakeet-transformers-js/src/models/processors.js +0 -22
- package/docs/parakeet-transformers-js/src/models/pvt/image_processing_pvt.js +0 -5
- package/docs/parakeet-transformers-js/src/models/pyannote/feature_extraction_pyannote.js +0 -85
- package/docs/parakeet-transformers-js/src/models/pyannote/processing_pyannote.js +0 -24
- package/docs/parakeet-transformers-js/src/models/qwen2_vl/image_processing_qwen2_vl.js +0 -52
- package/docs/parakeet-transformers-js/src/models/qwen2_vl/processing_qwen2_vl.js +0 -53
- package/docs/parakeet-transformers-js/src/models/rt_detr/image_processing_rt_detr.js +0 -12
- package/docs/parakeet-transformers-js/src/models/sam/image_processing_sam.js +0 -242
- package/docs/parakeet-transformers-js/src/models/sam/processing_sam.js +0 -20
- package/docs/parakeet-transformers-js/src/models/sapiens/image_processing_sapiens.js +0 -13
- package/docs/parakeet-transformers-js/src/models/seamless_m4t/feature_extraction_seamless_m4t.js +0 -175
- package/docs/parakeet-transformers-js/src/models/segformer/image_processing_segformer.js +0 -13
- package/docs/parakeet-transformers-js/src/models/siglip/image_processing_siglip.js +0 -5
- package/docs/parakeet-transformers-js/src/models/smolvlm/image_processing_smolvlm.js +0 -2
- package/docs/parakeet-transformers-js/src/models/smolvlm/processing_smolvlm.js +0 -2
- package/docs/parakeet-transformers-js/src/models/snac/feature_extraction_snac.js +0 -3
- package/docs/parakeet-transformers-js/src/models/speecht5/feature_extraction_speecht5.js +0 -4
- package/docs/parakeet-transformers-js/src/models/speecht5/processing_speecht5.js +0 -17
- package/docs/parakeet-transformers-js/src/models/swin2sr/image_processing_swin2sr.js +0 -24
- package/docs/parakeet-transformers-js/src/models/ultravox/processing_ultravox.js +0 -54
- package/docs/parakeet-transformers-js/src/models/vit/image_processing_vit.js +0 -7
- package/docs/parakeet-transformers-js/src/models/vitmatte/image_processing_vitmatte.js +0 -50
- package/docs/parakeet-transformers-js/src/models/vitpose/image_processing_vitpose.js +0 -89
- package/docs/parakeet-transformers-js/src/models/wav2vec2/feature_extraction_wav2vec2.js +0 -44
- package/docs/parakeet-transformers-js/src/models/wav2vec2/processing_wav2vec2.js +0 -17
- package/docs/parakeet-transformers-js/src/models/wav2vec2_with_lm/processing_wav2vec2_with_lm.js +0 -17
- package/docs/parakeet-transformers-js/src/models/wespeaker/feature_extraction_wespeaker.js +0 -95
- package/docs/parakeet-transformers-js/src/models/whisper/common_whisper.js +0 -157
- package/docs/parakeet-transformers-js/src/models/whisper/feature_extraction_whisper.js +0 -92
- package/docs/parakeet-transformers-js/src/models/whisper/generation_whisper.js +0 -89
- package/docs/parakeet-transformers-js/src/models/whisper/processing_whisper.js +0 -21
- package/docs/parakeet-transformers-js/src/models/yolos/image_processing_yolos.js +0 -12
- package/docs/parakeet-transformers-js/src/models.js +0 -8644
- package/docs/parakeet-transformers-js/src/ops/registry.js +0 -133
- package/docs/parakeet-transformers-js/src/ort_env.js +0 -8
- package/docs/parakeet-transformers-js/src/parakeet.js +0 -792
- package/docs/parakeet-transformers-js/src/pipelines.js +0 -3540
- package/docs/parakeet-transformers-js/src/processors.js +0 -16
- package/docs/parakeet-transformers-js/src/tokenizers.js +0 -4432
- package/docs/parakeet-transformers-js/src/transformers.js +0 -50
- package/docs/parakeet-transformers-js/src/utils/audio.js +0 -893
- package/docs/parakeet-transformers-js/src/utils/constants.js +0 -9
- package/docs/parakeet-transformers-js/src/utils/core.js +0 -259
- package/docs/parakeet-transformers-js/src/utils/data-structures.js +0 -574
- package/docs/parakeet-transformers-js/src/utils/devices.js +0 -22
- package/docs/parakeet-transformers-js/src/utils/dtypes.js +0 -63
- package/docs/parakeet-transformers-js/src/utils/generic.js +0 -35
- package/docs/parakeet-transformers-js/src/utils/hub.js +0 -780
- package/docs/parakeet-transformers-js/src/utils/image.js +0 -834
- package/docs/parakeet-transformers-js/src/utils/maths.js +0 -1061
- package/docs/parakeet-transformers-js/src/utils/tensor.js +0 -1539
- package/docs/parakeet-transformers-js/src/utils/video.js +0 -128
- package/docs/parakeet-transformers-js/test/decoder.test.js +0 -114
- package/docs/parakeet-transformers-js/test/encoder.test.js +0 -108
- package/docs/parakeet-transformers-js/test/preprocessor.test.js +0 -85
- package/docs/parakeet-transformers-js/test/tokenizer.test.js +0 -24
- package/docs/parakeet-transformers-js/test/transcribe.js +0 -89
- package/docs/parakeet-transformers-js/tsconfig.json +0 -21
- package/docs/parakeet-transformers-js/webpack.config.js +0 -223
package/src/hub.js
CHANGED
|
@@ -1,242 +1,236 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Simplified HuggingFace Hub utilities for parakeet.js
|
|
3
|
-
* Downloads models from HF and caches them in browser storage.
|
|
4
|
-
*/
|
|
5
|
-
|
|
6
|
-
const DB_NAME = 'parakeet-cache-db';
|
|
7
|
-
const STORE_NAME = 'file-store';
|
|
8
|
-
let dbPromise = null;
|
|
9
|
-
|
|
10
|
-
// Cache for repo file listings so we only hit the HF API once per page load
|
|
11
|
-
const repoFileCache = new Map();
|
|
12
|
-
|
|
13
|
-
async function listRepoFiles(repoId, revision = 'main') {
|
|
14
|
-
const cacheKey = `${repoId}@${revision}`;
|
|
15
|
-
if (repoFileCache.has(cacheKey)) return repoFileCache.get(cacheKey);
|
|
16
|
-
|
|
17
|
-
const url = `https://huggingface.co/api/models/${repoId}?revision=${revision}`;
|
|
18
|
-
try {
|
|
19
|
-
const resp = await fetch(url);
|
|
20
|
-
if (!resp.ok) throw new Error(`Failed to list repo files: ${resp.status}`);
|
|
21
|
-
const json = await resp.json();
|
|
22
|
-
const files = json.siblings?.map(s => s.rfilename) || [];
|
|
23
|
-
repoFileCache.set(cacheKey, files);
|
|
24
|
-
return files;
|
|
25
|
-
} catch (err) {
|
|
26
|
-
console.warn('[Hub] Could not fetch repo file list – falling back to optimistic fetch', err);
|
|
27
|
-
// Return empty list so caller behaves like old code (may attempt fetch and catch 404)
|
|
28
|
-
repoFileCache.set(cacheKey, []);
|
|
29
|
-
return [];
|
|
30
|
-
}
|
|
31
|
-
}
|
|
32
|
-
|
|
33
|
-
function getDb() {
|
|
34
|
-
if (!dbPromise) {
|
|
35
|
-
dbPromise = new Promise((resolve, reject) => {
|
|
36
|
-
const request = indexedDB.open(DB_NAME, 1);
|
|
37
|
-
request.onerror = () => reject("Error opening IndexedDB");
|
|
38
|
-
request.onsuccess = () => resolve(request.result);
|
|
39
|
-
request.onupgradeneeded = (event) => {
|
|
40
|
-
const db = event.target.result;
|
|
41
|
-
if (!db.objectStoreNames.contains(STORE_NAME)) {
|
|
42
|
-
db.createObjectStore(STORE_NAME);
|
|
43
|
-
}
|
|
44
|
-
};
|
|
45
|
-
});
|
|
46
|
-
}
|
|
47
|
-
return dbPromise;
|
|
48
|
-
}
|
|
49
|
-
|
|
50
|
-
async function getFileFromDb(key) {
|
|
51
|
-
const db = await getDb();
|
|
52
|
-
return new Promise((resolve, reject) => {
|
|
53
|
-
const transaction = db.transaction([STORE_NAME], 'readonly');
|
|
54
|
-
const store = transaction.objectStore(STORE_NAME);
|
|
55
|
-
const request = store.get(key);
|
|
56
|
-
request.onerror = () => reject("Error reading from DB");
|
|
57
|
-
request.onsuccess = () => resolve(request.result);
|
|
58
|
-
});
|
|
59
|
-
}
|
|
60
|
-
|
|
61
|
-
async function saveFileToDb(key, blob) {
|
|
62
|
-
const db = await getDb();
|
|
63
|
-
return new Promise((resolve, reject) => {
|
|
64
|
-
const transaction = db.transaction([STORE_NAME], 'readwrite');
|
|
65
|
-
const store = transaction.objectStore(STORE_NAME);
|
|
66
|
-
const request = store.put(blob, key);
|
|
67
|
-
request.onerror = () => reject("Error writing to DB");
|
|
68
|
-
request.onsuccess = () => resolve(request.result);
|
|
69
|
-
});
|
|
70
|
-
}
|
|
71
|
-
|
|
72
|
-
/**
|
|
73
|
-
* Download a file from HuggingFace Hub with caching support.
|
|
74
|
-
* @param {string} repoId Model repo ID (e.g., 'nvidia/parakeet-tdt-1.1b')
|
|
75
|
-
* @param {string} filename File to download (e.g., 'encoder-model.onnx')
|
|
76
|
-
* @param {Object} [options]
|
|
77
|
-
* @param {string} [options.revision='main'] Git revision
|
|
78
|
-
* @param {string} [options.subfolder=''] Subfolder within repo
|
|
79
|
-
* @param {Function} [options.progress] Progress callback
|
|
80
|
-
* @returns {Promise<string>} URL to cached file (blob URL)
|
|
81
|
-
*/
|
|
82
|
-
export async function getModelFile(repoId, filename, options = {}) {
|
|
83
|
-
const { revision = 'main', subfolder = '', progress } = options;
|
|
84
|
-
|
|
85
|
-
// Construct HF URL
|
|
86
|
-
const baseUrl = 'https://huggingface.co';
|
|
87
|
-
const pathParts = [repoId, 'resolve', revision];
|
|
88
|
-
if (subfolder) pathParts.push(subfolder);
|
|
89
|
-
pathParts.push(filename);
|
|
90
|
-
const url = `${baseUrl}/${pathParts.join('/')}`;
|
|
91
|
-
|
|
92
|
-
// Check IndexedDB first
|
|
93
|
-
const cacheKey = `hf-${repoId}-${revision}-${subfolder}-${filename}`;
|
|
94
|
-
|
|
95
|
-
if (typeof indexedDB !== 'undefined') {
|
|
96
|
-
try {
|
|
97
|
-
const cachedBlob = await getFileFromDb(cacheKey);
|
|
98
|
-
if (cachedBlob) {
|
|
99
|
-
console.log(`[Hub] Using cached ${filename} from IndexedDB`);
|
|
100
|
-
return URL.createObjectURL(cachedBlob);
|
|
101
|
-
}
|
|
102
|
-
} catch (e) {
|
|
103
|
-
console.warn('[Hub] IndexedDB cache check failed:', e);
|
|
104
|
-
}
|
|
105
|
-
}
|
|
106
|
-
|
|
107
|
-
// Download from HF
|
|
108
|
-
console.log(`[Hub] Downloading ${filename} from ${repoId}...`);
|
|
109
|
-
const response = await fetch(url);
|
|
110
|
-
if (!response.ok) {
|
|
111
|
-
throw new Error(`Failed to download ${filename}: ${response.status} ${response.statusText}`);
|
|
112
|
-
}
|
|
113
|
-
|
|
114
|
-
// Stream with progress
|
|
115
|
-
const contentLength = response.headers.get('content-length');
|
|
116
|
-
const total = contentLength ? parseInt(contentLength) : 0;
|
|
117
|
-
let loaded = 0;
|
|
118
|
-
|
|
119
|
-
const reader = response.body.getReader();
|
|
120
|
-
const chunks = [];
|
|
121
|
-
|
|
122
|
-
while (true) {
|
|
123
|
-
const { done, value } = await reader.read();
|
|
124
|
-
if (done) break;
|
|
125
|
-
|
|
126
|
-
chunks.push(value);
|
|
127
|
-
loaded += value.length;
|
|
128
|
-
|
|
129
|
-
if (progress && total > 0) {
|
|
130
|
-
progress({ loaded, total, file: filename });
|
|
131
|
-
}
|
|
132
|
-
}
|
|
133
|
-
|
|
134
|
-
// Reconstruct blob
|
|
135
|
-
const blob = new Blob(chunks, { type: response.headers.get('content-type') || 'application/octet-stream' });
|
|
136
|
-
|
|
137
|
-
// Cache the blob in IndexedDB
|
|
138
|
-
if (typeof indexedDB !== 'undefined') {
|
|
139
|
-
try {
|
|
140
|
-
await saveFileToDb(cacheKey, blob);
|
|
141
|
-
console.log(`[Hub] Cached ${filename} in IndexedDB`);
|
|
142
|
-
} catch (e) {
|
|
143
|
-
console.warn('[Hub] Failed to cache in IndexedDB:', e);
|
|
144
|
-
}
|
|
145
|
-
}
|
|
146
|
-
|
|
147
|
-
return URL.createObjectURL(blob);
|
|
148
|
-
}
|
|
149
|
-
|
|
150
|
-
/**
|
|
151
|
-
* Download text file from HF Hub.
|
|
152
|
-
* @param {string} repoId Model repo ID
|
|
153
|
-
* @param {string} filename Text file to download
|
|
154
|
-
* @param {Object} [options] Same as getModelFile
|
|
155
|
-
* @returns {Promise<string>} File content as text
|
|
156
|
-
*/
|
|
157
|
-
export async function getModelText(repoId, filename, options = {}) {
|
|
158
|
-
const blobUrl = await getModelFile(repoId, filename, options);
|
|
159
|
-
const response = await fetch(blobUrl);
|
|
160
|
-
const text = await response.text();
|
|
161
|
-
URL.revokeObjectURL(blobUrl); // Clean up blob URL
|
|
162
|
-
return text;
|
|
163
|
-
}
|
|
164
|
-
|
|
165
|
-
/**
|
|
166
|
-
* Convenience function to get all Parakeet model files for a given architecture.
|
|
167
|
-
* @param {string} repoId HF repo (e.g., 'nvidia/parakeet-tdt-1.1b')
|
|
168
|
-
* @param {Object} [options]
|
|
169
|
-
* @param {('int8'|'fp32')} [options.
|
|
170
|
-
* @param {('
|
|
171
|
-
* @param {('
|
|
172
|
-
* @param {
|
|
173
|
-
* @
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
let
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
}
|
|
193
|
-
|
|
194
|
-
const
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
{ key: '
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
throw e;
|
|
237
|
-
}
|
|
238
|
-
}
|
|
239
|
-
}
|
|
240
|
-
|
|
241
|
-
return results;
|
|
1
|
+
/**
|
|
2
|
+
* Simplified HuggingFace Hub utilities for parakeet.js
|
|
3
|
+
* Downloads models from HF and caches them in browser storage.
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
const DB_NAME = 'parakeet-cache-db';
|
|
7
|
+
const STORE_NAME = 'file-store';
|
|
8
|
+
let dbPromise = null;
|
|
9
|
+
|
|
10
|
+
// Cache for repo file listings so we only hit the HF API once per page load
|
|
11
|
+
const repoFileCache = new Map();
|
|
12
|
+
|
|
13
|
+
async function listRepoFiles(repoId, revision = 'main') {
|
|
14
|
+
const cacheKey = `${repoId}@${revision}`;
|
|
15
|
+
if (repoFileCache.has(cacheKey)) return repoFileCache.get(cacheKey);
|
|
16
|
+
|
|
17
|
+
const url = `https://huggingface.co/api/models/${repoId}?revision=${revision}`;
|
|
18
|
+
try {
|
|
19
|
+
const resp = await fetch(url);
|
|
20
|
+
if (!resp.ok) throw new Error(`Failed to list repo files: ${resp.status}`);
|
|
21
|
+
const json = await resp.json();
|
|
22
|
+
const files = json.siblings?.map(s => s.rfilename) || [];
|
|
23
|
+
repoFileCache.set(cacheKey, files);
|
|
24
|
+
return files;
|
|
25
|
+
} catch (err) {
|
|
26
|
+
console.warn('[Hub] Could not fetch repo file list – falling back to optimistic fetch', err);
|
|
27
|
+
// Return empty list so caller behaves like old code (may attempt fetch and catch 404)
|
|
28
|
+
repoFileCache.set(cacheKey, []);
|
|
29
|
+
return [];
|
|
30
|
+
}
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
function getDb() {
|
|
34
|
+
if (!dbPromise) {
|
|
35
|
+
dbPromise = new Promise((resolve, reject) => {
|
|
36
|
+
const request = indexedDB.open(DB_NAME, 1);
|
|
37
|
+
request.onerror = () => reject("Error opening IndexedDB");
|
|
38
|
+
request.onsuccess = () => resolve(request.result);
|
|
39
|
+
request.onupgradeneeded = (event) => {
|
|
40
|
+
const db = event.target.result;
|
|
41
|
+
if (!db.objectStoreNames.contains(STORE_NAME)) {
|
|
42
|
+
db.createObjectStore(STORE_NAME);
|
|
43
|
+
}
|
|
44
|
+
};
|
|
45
|
+
});
|
|
46
|
+
}
|
|
47
|
+
return dbPromise;
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
async function getFileFromDb(key) {
|
|
51
|
+
const db = await getDb();
|
|
52
|
+
return new Promise((resolve, reject) => {
|
|
53
|
+
const transaction = db.transaction([STORE_NAME], 'readonly');
|
|
54
|
+
const store = transaction.objectStore(STORE_NAME);
|
|
55
|
+
const request = store.get(key);
|
|
56
|
+
request.onerror = () => reject("Error reading from DB");
|
|
57
|
+
request.onsuccess = () => resolve(request.result);
|
|
58
|
+
});
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
async function saveFileToDb(key, blob) {
|
|
62
|
+
const db = await getDb();
|
|
63
|
+
return new Promise((resolve, reject) => {
|
|
64
|
+
const transaction = db.transaction([STORE_NAME], 'readwrite');
|
|
65
|
+
const store = transaction.objectStore(STORE_NAME);
|
|
66
|
+
const request = store.put(blob, key);
|
|
67
|
+
request.onerror = () => reject("Error writing to DB");
|
|
68
|
+
request.onsuccess = () => resolve(request.result);
|
|
69
|
+
});
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
/**
|
|
73
|
+
* Download a file from HuggingFace Hub with caching support.
|
|
74
|
+
* @param {string} repoId Model repo ID (e.g., 'nvidia/parakeet-tdt-1.1b')
|
|
75
|
+
* @param {string} filename File to download (e.g., 'encoder-model.onnx')
|
|
76
|
+
* @param {Object} [options]
|
|
77
|
+
* @param {string} [options.revision='main'] Git revision
|
|
78
|
+
* @param {string} [options.subfolder=''] Subfolder within repo
|
|
79
|
+
* @param {Function} [options.progress] Progress callback
|
|
80
|
+
* @returns {Promise<string>} URL to cached file (blob URL)
|
|
81
|
+
*/
|
|
82
|
+
export async function getModelFile(repoId, filename, options = {}) {
|
|
83
|
+
const { revision = 'main', subfolder = '', progress } = options;
|
|
84
|
+
|
|
85
|
+
// Construct HF URL
|
|
86
|
+
const baseUrl = 'https://huggingface.co';
|
|
87
|
+
const pathParts = [repoId, 'resolve', revision];
|
|
88
|
+
if (subfolder) pathParts.push(subfolder);
|
|
89
|
+
pathParts.push(filename);
|
|
90
|
+
const url = `${baseUrl}/${pathParts.join('/')}`;
|
|
91
|
+
|
|
92
|
+
// Check IndexedDB first
|
|
93
|
+
const cacheKey = `hf-${repoId}-${revision}-${subfolder}-${filename}`;
|
|
94
|
+
|
|
95
|
+
if (typeof indexedDB !== 'undefined') {
|
|
96
|
+
try {
|
|
97
|
+
const cachedBlob = await getFileFromDb(cacheKey);
|
|
98
|
+
if (cachedBlob) {
|
|
99
|
+
console.log(`[Hub] Using cached ${filename} from IndexedDB`);
|
|
100
|
+
return URL.createObjectURL(cachedBlob);
|
|
101
|
+
}
|
|
102
|
+
} catch (e) {
|
|
103
|
+
console.warn('[Hub] IndexedDB cache check failed:', e);
|
|
104
|
+
}
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
// Download from HF
|
|
108
|
+
console.log(`[Hub] Downloading ${filename} from ${repoId}...`);
|
|
109
|
+
const response = await fetch(url);
|
|
110
|
+
if (!response.ok) {
|
|
111
|
+
throw new Error(`Failed to download ${filename}: ${response.status} ${response.statusText}`);
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
// Stream with progress
|
|
115
|
+
const contentLength = response.headers.get('content-length');
|
|
116
|
+
const total = contentLength ? parseInt(contentLength) : 0;
|
|
117
|
+
let loaded = 0;
|
|
118
|
+
|
|
119
|
+
const reader = response.body.getReader();
|
|
120
|
+
const chunks = [];
|
|
121
|
+
|
|
122
|
+
while (true) {
|
|
123
|
+
const { done, value } = await reader.read();
|
|
124
|
+
if (done) break;
|
|
125
|
+
|
|
126
|
+
chunks.push(value);
|
|
127
|
+
loaded += value.length;
|
|
128
|
+
|
|
129
|
+
if (progress && total > 0) {
|
|
130
|
+
progress({ loaded, total, file: filename });
|
|
131
|
+
}
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
// Reconstruct blob
|
|
135
|
+
const blob = new Blob(chunks, { type: response.headers.get('content-type') || 'application/octet-stream' });
|
|
136
|
+
|
|
137
|
+
// Cache the blob in IndexedDB
|
|
138
|
+
if (typeof indexedDB !== 'undefined') {
|
|
139
|
+
try {
|
|
140
|
+
await saveFileToDb(cacheKey, blob);
|
|
141
|
+
console.log(`[Hub] Cached ${filename} in IndexedDB`);
|
|
142
|
+
} catch (e) {
|
|
143
|
+
console.warn('[Hub] Failed to cache in IndexedDB:', e);
|
|
144
|
+
}
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
return URL.createObjectURL(blob);
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
/**
|
|
151
|
+
* Download text file from HF Hub.
|
|
152
|
+
* @param {string} repoId Model repo ID
|
|
153
|
+
* @param {string} filename Text file to download
|
|
154
|
+
* @param {Object} [options] Same as getModelFile
|
|
155
|
+
* @returns {Promise<string>} File content as text
|
|
156
|
+
*/
|
|
157
|
+
export async function getModelText(repoId, filename, options = {}) {
|
|
158
|
+
const blobUrl = await getModelFile(repoId, filename, options);
|
|
159
|
+
const response = await fetch(blobUrl);
|
|
160
|
+
const text = await response.text();
|
|
161
|
+
URL.revokeObjectURL(blobUrl); // Clean up blob URL
|
|
162
|
+
return text;
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
/**
|
|
166
|
+
* Convenience function to get all Parakeet model files for a given architecture.
|
|
167
|
+
* @param {string} repoId HF repo (e.g., 'nvidia/parakeet-tdt-1.1b')
|
|
168
|
+
* @param {Object} [options]
|
|
169
|
+
* @param {('int8'|'fp32')} [options.encoderQuant='int8'] Encoder quantization
|
|
170
|
+
* @param {('int8'|'fp32')} [options.decoderQuant='int8'] Decoder quantization
|
|
171
|
+
* @param {('nemo80'|'nemo128')} [options.preprocessor='nemo128'] Preprocessor variant
|
|
172
|
+
* @param {('webgpu'|'wasm')} [options.backend='webgpu'] Backend to use
|
|
173
|
+
* @param {Function} [options.progress] Progress callback
|
|
174
|
+
* @returns {Promise<{urls: object, filenames: object}>}
|
|
175
|
+
*/
|
|
176
|
+
export async function getParakeetModel(repoId, options = {}) {
|
|
177
|
+
const { encoderQuant = 'int8', decoderQuant = 'int8', preprocessor = 'nemo128', backend = 'webgpu', progress } = options;
|
|
178
|
+
|
|
179
|
+
// Decide quantisation per component
|
|
180
|
+
let encoderQ = encoderQuant;
|
|
181
|
+
let decoderQ = decoderQuant;
|
|
182
|
+
|
|
183
|
+
if (backend.startsWith('webgpu') && encoderQ === 'int8') {
|
|
184
|
+
console.warn('[Hub] Forcing encoder to fp32 on WebGPU (int8 unsupported)');
|
|
185
|
+
encoderQ = 'fp32';
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
const encoderSuffix = encoderQ === 'int8' ? '.int8.onnx' : '.onnx';
|
|
189
|
+
const decoderSuffix = decoderQ === 'int8' ? '.int8.onnx' : '.onnx';
|
|
190
|
+
|
|
191
|
+
const encoderName = `encoder-model${encoderSuffix}`;
|
|
192
|
+
const decoderName = `decoder_joint-model${decoderSuffix}`;
|
|
193
|
+
|
|
194
|
+
const repoFiles = await listRepoFiles(repoId, options.revision || 'main');
|
|
195
|
+
|
|
196
|
+
const filesToGet = [
|
|
197
|
+
{ key: 'encoderUrl', name: encoderName },
|
|
198
|
+
{ key: 'decoderUrl', name: decoderName },
|
|
199
|
+
{ key: 'tokenizerUrl', name: 'vocab.txt' },
|
|
200
|
+
{ key: 'preprocessorUrl', name: `${preprocessor}.onnx` },
|
|
201
|
+
];
|
|
202
|
+
|
|
203
|
+
// Conditionally include external data files only if they exist in the repo file list.
|
|
204
|
+
if (repoFiles.includes(`${encoderName}.data`)) {
|
|
205
|
+
filesToGet.push({ key: 'encoderDataUrl', name: `${encoderName}.data` });
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
if (repoFiles.includes(`${decoderName}.data`)) {
|
|
209
|
+
filesToGet.push({ key: 'decoderDataUrl', name: `${decoderName}.data` });
|
|
210
|
+
}
|
|
211
|
+
|
|
212
|
+
const results = {
|
|
213
|
+
urls: {},
|
|
214
|
+
filenames: {
|
|
215
|
+
encoder: encoderName,
|
|
216
|
+
decoder: decoderName
|
|
217
|
+
},
|
|
218
|
+
quantisation: { encoder: encoderQ, decoder: decoderQ }
|
|
219
|
+
};
|
|
220
|
+
|
|
221
|
+
for (const { key, name } of filesToGet) {
|
|
222
|
+
try {
|
|
223
|
+
const wrappedProgress = progress ? (p) => progress({ ...p, file: name }) : undefined;
|
|
224
|
+
results.urls[key] = await getModelFile(repoId, name, { ...options, progress: wrappedProgress });
|
|
225
|
+
} catch (e) {
|
|
226
|
+
if (key.endsWith('DataUrl')) {
|
|
227
|
+
console.warn(`[Hub] Optional external data file not found: ${name}. This is expected if the model is small.`);
|
|
228
|
+
results.urls[key] = null;
|
|
229
|
+
} else {
|
|
230
|
+
throw e;
|
|
231
|
+
}
|
|
232
|
+
}
|
|
233
|
+
}
|
|
234
|
+
|
|
235
|
+
return results;
|
|
242
236
|
}
|
package/src/parakeet.js
CHANGED
|
@@ -63,8 +63,6 @@ export class ParakeetModel {
|
|
|
63
63
|
verbose = false,
|
|
64
64
|
enableProfiling = false,
|
|
65
65
|
enableGraphCapture,
|
|
66
|
-
decoderOnWasm = true,
|
|
67
|
-
decoderInt8 = false,
|
|
68
66
|
cpuThreads = undefined,
|
|
69
67
|
} = cfg;
|
|
70
68
|
|
|
@@ -80,8 +78,15 @@ export class ParakeetModel {
|
|
|
80
78
|
const ort = await initOrt({ backend: ortBackend, wasmPaths, numThreads: cpuThreads });
|
|
81
79
|
|
|
82
80
|
// 2. Configure session options for better performance
|
|
83
|
-
//
|
|
84
|
-
|
|
81
|
+
// Graph-capture is beneficial only when every node runs on the same EP and
|
|
82
|
+
// ORT can fully record the graph (currently true only for a “strict”
|
|
83
|
+
// WebGPU session). We therefore enable it *only* when the caller passes
|
|
84
|
+
// `enableGraphCapture:true` **and** the selected backend is the strict
|
|
85
|
+
// WebGPU preset. In all other scenarios (hybrid WebGPU or pure WASM)
|
|
86
|
+
// it is forced off to avoid the “External buffer must be provided …”
|
|
87
|
+
// runtime error on recent ORT builds.
|
|
88
|
+
const graphCaptureEnabled = !!enableGraphCapture && backend === 'webgpu-strict';
|
|
89
|
+
const isFullWasm = backend === 'wasm';
|
|
85
90
|
|
|
86
91
|
const baseSessionOptions = {
|
|
87
92
|
executionProviders: [],
|
|
@@ -139,8 +144,10 @@ export class ParakeetModel {
|
|
|
139
144
|
}];
|
|
140
145
|
}
|
|
141
146
|
|
|
142
|
-
//
|
|
143
|
-
|
|
147
|
+
// In hybrid mode, the decoder is always run on WASM to avoid per-step
|
|
148
|
+
// stalls. In pure WASM mode, both EPs are WASM anyway.
|
|
149
|
+
if (backend.startsWith('webgpu')) {
|
|
150
|
+
// Force decoder to run on WASM
|
|
144
151
|
decoderSessionOptions.executionProviders = ['wasm'];
|
|
145
152
|
}
|
|
146
153
|
|
|
@@ -161,10 +168,10 @@ export class ParakeetModel {
|
|
|
161
168
|
}
|
|
162
169
|
|
|
163
170
|
const tokenizerPromise = ParakeetTokenizer.fromUrl(tokenizerUrl);
|
|
164
|
-
const preprocPromise = Promise.resolve(new OnnxPreprocessor(preprocessorUrl, { backend, wasmPaths, enableProfiling, enableGraphCapture: graphCaptureEnabled, numThreads: cpuThreads }));
|
|
171
|
+
const preprocPromise = Promise.resolve(new OnnxPreprocessor(preprocessorUrl, { backend, wasmPaths, enableProfiling, enableGraphCapture: isFullWasm ? false : graphCaptureEnabled, numThreads: cpuThreads }));
|
|
165
172
|
|
|
166
173
|
let encoderSession, joinerSession;
|
|
167
|
-
if (backend === 'webgpu-hybrid'
|
|
174
|
+
if (backend === 'webgpu-hybrid') {
|
|
168
175
|
// avoid parallel create to prevent double initWasm race
|
|
169
176
|
encoderSession = await createSession(encoderUrl, encoderSessionOptions);
|
|
170
177
|
joinerSession = await createSession(decoderUrl, decoderSessionOptions);
|