parakeet.js 0.0.2 → 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (160) hide show
  1. package/.gitmodules +3 -0
  2. package/README.md +240 -239
  3. package/examples/hf-spaces-demo/README.md +6 -9
  4. package/examples/hf-spaces-demo/package.json +1 -1
  5. package/examples/hf-spaces-demo/src/App.js +307 -316
  6. package/examples/react-demo/package.json +19 -19
  7. package/examples/react-demo/src/App.jsx +324 -326
  8. package/examples/react-demo-dev/src/App.jsx +23 -24
  9. package/package.json +1 -1
  10. package/publish.ps1 +65 -0
  11. package/src/hub.js +235 -241
  12. package/src/parakeet.js +15 -8
  13. package/src/preprocessor.js +75 -68
  14. package/docs/parakeet-transformers-js/.gitattributes +0 -2
  15. package/docs/parakeet-transformers-js/.prettierignore +0 -8
  16. package/docs/parakeet-transformers-js/.prettierrc +0 -10
  17. package/docs/parakeet-transformers-js/.tmp_features.json +0 -1
  18. package/docs/parakeet-transformers-js/LICENSE +0 -202
  19. package/docs/parakeet-transformers-js/README.md +0 -448
  20. package/docs/parakeet-transformers-js/assets/nemo128.onnx +0 -0
  21. package/docs/parakeet-transformers-js/assets/nemo80.onnx +0 -0
  22. package/docs/parakeet-transformers-js/debug_test.js +0 -84
  23. package/docs/parakeet-transformers-js/dev/inspect_decoder.cjs +0 -9
  24. package/docs/parakeet-transformers-js/dev/inspect_joiner.cjs +0 -9
  25. package/docs/parakeet-transformers-js/dev/js_step_by_step.js +0 -249
  26. package/docs/parakeet-transformers-js/dev/parakeet_cli.js +0 -91
  27. package/docs/parakeet-transformers-js/jest.config.mjs +0 -194
  28. package/docs/parakeet-transformers-js/js_preprocessing.json +0 -225
  29. package/docs/parakeet-transformers-js/js_step_by_step.json +0 -837
  30. package/docs/parakeet-transformers-js/js_step_by_step_v2.json +0 -450
  31. package/docs/parakeet-transformers-js/js_step_by_step_v3.json +0 -450
  32. package/docs/parakeet-transformers-js/js_steps.json +0 -821
  33. package/docs/parakeet-transformers-js/package-lock.json +0 -12251
  34. package/docs/parakeet-transformers-js/package.json +0 -96
  35. package/docs/parakeet-transformers-js/src/audio_features.js +0 -178
  36. package/docs/parakeet-transformers-js/src/backends/onnx.js +0 -210
  37. package/docs/parakeet-transformers-js/src/base/feature_extraction_utils.js +0 -54
  38. package/docs/parakeet-transformers-js/src/base/image_processors_utils.js +0 -1105
  39. package/docs/parakeet-transformers-js/src/base/processing_utils.js +0 -173
  40. package/docs/parakeet-transformers-js/src/configs.js +0 -455
  41. package/docs/parakeet-transformers-js/src/env.js +0 -167
  42. package/docs/parakeet-transformers-js/src/generation/configuration_utils.js +0 -388
  43. package/docs/parakeet-transformers-js/src/generation/logits_process.js +0 -727
  44. package/docs/parakeet-transformers-js/src/generation/logits_sampler.js +0 -204
  45. package/docs/parakeet-transformers-js/src/generation/parameters.js +0 -35
  46. package/docs/parakeet-transformers-js/src/generation/stopping_criteria.js +0 -156
  47. package/docs/parakeet-transformers-js/src/generation/streamers.js +0 -225
  48. package/docs/parakeet-transformers-js/src/models/audio_spectrogram_transformer/feature_extraction_audio_spectrogram_transformer.js +0 -85
  49. package/docs/parakeet-transformers-js/src/models/auto/feature_extraction_auto.js +0 -25
  50. package/docs/parakeet-transformers-js/src/models/auto/image_processing_auto.js +0 -29
  51. package/docs/parakeet-transformers-js/src/models/auto/processing_auto.js +0 -85
  52. package/docs/parakeet-transformers-js/src/models/beit/image_processing_beit.js +0 -5
  53. package/docs/parakeet-transformers-js/src/models/bit/image_processing_bit.js +0 -5
  54. package/docs/parakeet-transformers-js/src/models/chinese_clip/image_processing_chinese_clip.js +0 -5
  55. package/docs/parakeet-transformers-js/src/models/clap/feature_extraction_clap.js +0 -159
  56. package/docs/parakeet-transformers-js/src/models/clip/image_processing_clip.js +0 -6
  57. package/docs/parakeet-transformers-js/src/models/convnext/image_processing_convnext.js +0 -46
  58. package/docs/parakeet-transformers-js/src/models/dac/feature_extraction_dac.js +0 -3
  59. package/docs/parakeet-transformers-js/src/models/deit/image_processing_deit.js +0 -6
  60. package/docs/parakeet-transformers-js/src/models/detr/image_processing_detr.js +0 -52
  61. package/docs/parakeet-transformers-js/src/models/donut/image_processing_donut.js +0 -31
  62. package/docs/parakeet-transformers-js/src/models/dpt/image_processing_dpt.js +0 -6
  63. package/docs/parakeet-transformers-js/src/models/efficientnet/image_processing_efficientnet.js +0 -14
  64. package/docs/parakeet-transformers-js/src/models/encodec/feature_extraction_encodec.js +0 -32
  65. package/docs/parakeet-transformers-js/src/models/feature_extractors.js +0 -17
  66. package/docs/parakeet-transformers-js/src/models/florence2/processing_florence2.js +0 -131
  67. package/docs/parakeet-transformers-js/src/models/gemma3n/feature_extraction_gemma3n.js +0 -97
  68. package/docs/parakeet-transformers-js/src/models/gemma3n/processing_gemma3n.js +0 -74
  69. package/docs/parakeet-transformers-js/src/models/glpn/image_processing_glpn.js +0 -5
  70. package/docs/parakeet-transformers-js/src/models/grounding_dino/image_processing_grounding_dino.js +0 -29
  71. package/docs/parakeet-transformers-js/src/models/grounding_dino/processing_grounding_dino.js +0 -101
  72. package/docs/parakeet-transformers-js/src/models/idefics3/image_processing_idefics3.js +0 -232
  73. package/docs/parakeet-transformers-js/src/models/idefics3/processing_idefics3.js +0 -136
  74. package/docs/parakeet-transformers-js/src/models/image_processors.js +0 -40
  75. package/docs/parakeet-transformers-js/src/models/janus/image_processing_janus.js +0 -27
  76. package/docs/parakeet-transformers-js/src/models/janus/processing_janus.js +0 -123
  77. package/docs/parakeet-transformers-js/src/models/jina_clip/image_processing_jina_clip.js +0 -26
  78. package/docs/parakeet-transformers-js/src/models/jina_clip/processing_jina_clip.js +0 -24
  79. package/docs/parakeet-transformers-js/src/models/llava/processing_llava.js +0 -44
  80. package/docs/parakeet-transformers-js/src/models/llava_onevision/image_processing_llava_onevision.js +0 -5
  81. package/docs/parakeet-transformers-js/src/models/mask2former/image_processing_mask2former.js +0 -5
  82. package/docs/parakeet-transformers-js/src/models/maskformer/image_processing_maskformer.js +0 -18
  83. package/docs/parakeet-transformers-js/src/models/mgp_str/processing_mgp_str.js +0 -172
  84. package/docs/parakeet-transformers-js/src/models/mobilenet_v1/image_processing_mobilenet_v1.js +0 -7
  85. package/docs/parakeet-transformers-js/src/models/mobilenet_v2/image_processing_mobilenet_v2.js +0 -7
  86. package/docs/parakeet-transformers-js/src/models/mobilenet_v3/image_processing_mobilenet_v3.js +0 -7
  87. package/docs/parakeet-transformers-js/src/models/mobilenet_v4/image_processing_mobilenet_v4.js +0 -7
  88. package/docs/parakeet-transformers-js/src/models/mobilevit/image_processing_mobilevit.js +0 -6
  89. package/docs/parakeet-transformers-js/src/models/moonshine/feature_extraction_moonshine.js +0 -26
  90. package/docs/parakeet-transformers-js/src/models/moonshine/processing_moonshine.js +0 -20
  91. package/docs/parakeet-transformers-js/src/models/nougat/image_processing_nougat.js +0 -5
  92. package/docs/parakeet-transformers-js/src/models/owlv2/image_processing_owlv2.js +0 -5
  93. package/docs/parakeet-transformers-js/src/models/owlvit/image_processing_owlvit.js +0 -12
  94. package/docs/parakeet-transformers-js/src/models/owlvit/processing_owlvit.js +0 -7
  95. package/docs/parakeet-transformers-js/src/models/paligemma/processing_paligemma.js +0 -83
  96. package/docs/parakeet-transformers-js/src/models/parakeet/feature_extraction_parakeet.js +0 -3
  97. package/docs/parakeet-transformers-js/src/models/parakeet/modeling_parakeet.js +0 -3
  98. package/docs/parakeet-transformers-js/src/models/parakeet/processing_parakeet.js +0 -3
  99. package/docs/parakeet-transformers-js/src/models/parakeet/tokenization_parakeet.js +0 -3
  100. package/docs/parakeet-transformers-js/src/models/phi3_v/image_processing_phi3_v.js +0 -163
  101. package/docs/parakeet-transformers-js/src/models/phi3_v/processing_phi3_v.js +0 -53
  102. package/docs/parakeet-transformers-js/src/models/processors.js +0 -22
  103. package/docs/parakeet-transformers-js/src/models/pvt/image_processing_pvt.js +0 -5
  104. package/docs/parakeet-transformers-js/src/models/pyannote/feature_extraction_pyannote.js +0 -85
  105. package/docs/parakeet-transformers-js/src/models/pyannote/processing_pyannote.js +0 -24
  106. package/docs/parakeet-transformers-js/src/models/qwen2_vl/image_processing_qwen2_vl.js +0 -52
  107. package/docs/parakeet-transformers-js/src/models/qwen2_vl/processing_qwen2_vl.js +0 -53
  108. package/docs/parakeet-transformers-js/src/models/rt_detr/image_processing_rt_detr.js +0 -12
  109. package/docs/parakeet-transformers-js/src/models/sam/image_processing_sam.js +0 -242
  110. package/docs/parakeet-transformers-js/src/models/sam/processing_sam.js +0 -20
  111. package/docs/parakeet-transformers-js/src/models/sapiens/image_processing_sapiens.js +0 -13
  112. package/docs/parakeet-transformers-js/src/models/seamless_m4t/feature_extraction_seamless_m4t.js +0 -175
  113. package/docs/parakeet-transformers-js/src/models/segformer/image_processing_segformer.js +0 -13
  114. package/docs/parakeet-transformers-js/src/models/siglip/image_processing_siglip.js +0 -5
  115. package/docs/parakeet-transformers-js/src/models/smolvlm/image_processing_smolvlm.js +0 -2
  116. package/docs/parakeet-transformers-js/src/models/smolvlm/processing_smolvlm.js +0 -2
  117. package/docs/parakeet-transformers-js/src/models/snac/feature_extraction_snac.js +0 -3
  118. package/docs/parakeet-transformers-js/src/models/speecht5/feature_extraction_speecht5.js +0 -4
  119. package/docs/parakeet-transformers-js/src/models/speecht5/processing_speecht5.js +0 -17
  120. package/docs/parakeet-transformers-js/src/models/swin2sr/image_processing_swin2sr.js +0 -24
  121. package/docs/parakeet-transformers-js/src/models/ultravox/processing_ultravox.js +0 -54
  122. package/docs/parakeet-transformers-js/src/models/vit/image_processing_vit.js +0 -7
  123. package/docs/parakeet-transformers-js/src/models/vitmatte/image_processing_vitmatte.js +0 -50
  124. package/docs/parakeet-transformers-js/src/models/vitpose/image_processing_vitpose.js +0 -89
  125. package/docs/parakeet-transformers-js/src/models/wav2vec2/feature_extraction_wav2vec2.js +0 -44
  126. package/docs/parakeet-transformers-js/src/models/wav2vec2/processing_wav2vec2.js +0 -17
  127. package/docs/parakeet-transformers-js/src/models/wav2vec2_with_lm/processing_wav2vec2_with_lm.js +0 -17
  128. package/docs/parakeet-transformers-js/src/models/wespeaker/feature_extraction_wespeaker.js +0 -95
  129. package/docs/parakeet-transformers-js/src/models/whisper/common_whisper.js +0 -157
  130. package/docs/parakeet-transformers-js/src/models/whisper/feature_extraction_whisper.js +0 -92
  131. package/docs/parakeet-transformers-js/src/models/whisper/generation_whisper.js +0 -89
  132. package/docs/parakeet-transformers-js/src/models/whisper/processing_whisper.js +0 -21
  133. package/docs/parakeet-transformers-js/src/models/yolos/image_processing_yolos.js +0 -12
  134. package/docs/parakeet-transformers-js/src/models.js +0 -8644
  135. package/docs/parakeet-transformers-js/src/ops/registry.js +0 -133
  136. package/docs/parakeet-transformers-js/src/ort_env.js +0 -8
  137. package/docs/parakeet-transformers-js/src/parakeet.js +0 -792
  138. package/docs/parakeet-transformers-js/src/pipelines.js +0 -3540
  139. package/docs/parakeet-transformers-js/src/processors.js +0 -16
  140. package/docs/parakeet-transformers-js/src/tokenizers.js +0 -4432
  141. package/docs/parakeet-transformers-js/src/transformers.js +0 -50
  142. package/docs/parakeet-transformers-js/src/utils/audio.js +0 -893
  143. package/docs/parakeet-transformers-js/src/utils/constants.js +0 -9
  144. package/docs/parakeet-transformers-js/src/utils/core.js +0 -259
  145. package/docs/parakeet-transformers-js/src/utils/data-structures.js +0 -574
  146. package/docs/parakeet-transformers-js/src/utils/devices.js +0 -22
  147. package/docs/parakeet-transformers-js/src/utils/dtypes.js +0 -63
  148. package/docs/parakeet-transformers-js/src/utils/generic.js +0 -35
  149. package/docs/parakeet-transformers-js/src/utils/hub.js +0 -780
  150. package/docs/parakeet-transformers-js/src/utils/image.js +0 -834
  151. package/docs/parakeet-transformers-js/src/utils/maths.js +0 -1061
  152. package/docs/parakeet-transformers-js/src/utils/tensor.js +0 -1539
  153. package/docs/parakeet-transformers-js/src/utils/video.js +0 -128
  154. package/docs/parakeet-transformers-js/test/decoder.test.js +0 -114
  155. package/docs/parakeet-transformers-js/test/encoder.test.js +0 -108
  156. package/docs/parakeet-transformers-js/test/preprocessor.test.js +0 -85
  157. package/docs/parakeet-transformers-js/test/tokenizer.test.js +0 -24
  158. package/docs/parakeet-transformers-js/test/transcribe.js +0 -89
  159. package/docs/parakeet-transformers-js/tsconfig.json +0 -21
  160. package/docs/parakeet-transformers-js/webpack.config.js +0 -223
@@ -1,316 +1,307 @@
1
- import React, { useState, useRef, useEffect } from 'react';
2
- import { ParakeetModel, getParakeetModel } from 'parakeet.js';
3
- import './App.css';
4
-
5
- export default function App() {
6
- const repoId = 'ysdede/parakeet-tdt-0.6b-v2-onnx';
7
- const [backend, setBackend] = useState('webgpu-hybrid');
8
- const [quant, setQuant] = useState('fp32');
9
- const [preprocessor, setPreprocessor] = useState('nemo128');
10
- const [status, setStatus] = useState('Idle');
11
- const [progress, setProgress] = useState('');
12
- const [progressText, setProgressText] = useState('');
13
- const [progressPct, setProgressPct] = useState(null);
14
- const [text, setText] = useState('');
15
- const [latestMetrics, setLatestMetrics] = useState(null);
16
- const [transcriptions, setTranscriptions] = useState([]);
17
- const [isTranscribing, setIsTranscribing] = useState(false);
18
- const [verboseLog, setVerboseLog] = useState(false);
19
- const [decoderInt8, setDecoderInt8] = useState(true);
20
- const [frameStride, setFrameStride] = useState(1);
21
- const [dumpDetail, setDumpDetail] = useState(false);
22
- const maxCores = navigator.hardwareConcurrency || 8;
23
- const [cpuThreads, setCpuThreads] = useState(Math.max(1, maxCores - 2));
24
- const modelRef = useRef(null);
25
- const fileInputRef = useRef(null);
26
-
27
- // Auto-adjust quant preset when backend changes
28
- useEffect(() => {
29
- if (backend.startsWith('webgpu')) {
30
- setQuant('fp32');
31
- } else if (backend === 'wasm') {
32
- setQuant('int8');
33
- }
34
- }, [backend]);
35
-
36
- async function loadModel() {
37
- setStatus('Loading model…');
38
- setProgress('');
39
- setProgressText('');
40
- setProgressPct(0);
41
- console.time('LoadModel');
42
-
43
- try {
44
- const progressCallback = ({ loaded, total, file }) => {
45
- const pct = total > 0 ? Math.round((loaded / total) * 100) : 0;
46
- setProgressText(`${file}: ${pct}%`);
47
- setProgressPct(pct);
48
- };
49
-
50
- // 1. Download all model files from HuggingFace Hub
51
- const modelUrls = await getParakeetModel(repoId, {
52
- quantization: quant,
53
- preprocessor,
54
- backend, // Pass backend to enable automatic fp32 selection for WebGPU
55
- decoderInt8,
56
- progress: progressCallback
57
- });
58
-
59
- // Show compiling sessions stage
60
- setStatus('Creating sessions…');
61
- setProgressText('Compiling model (this may take ~10 s)…');
62
- setProgressPct(null);
63
-
64
- // 2. Create the model instance with all file URLs
65
- modelRef.current = await ParakeetModel.fromUrls({
66
- ...modelUrls.urls,
67
- filenames: modelUrls.filenames,
68
- backend,
69
- verbose: verboseLog,
70
- decoderOnWasm: decoderInt8, // if we selected int8 decoder, keep it on WASM
71
- decoderInt8,
72
- cpuThreads,
73
- });
74
-
75
- // 3. Warm-up and verify
76
- setStatus('Warming up & verifying…');
77
- setProgressText('Model ready! Upload an audio file to transcribe.');
78
- setProgressPct(null);
79
-
80
- console.timeEnd('LoadModel');
81
- setStatus('Model ready ✔');
82
- setProgressText('');
83
- } catch (e) {
84
- console.error(e);
85
- setStatus(`Failed: ${e.message}`);
86
- setProgress('');
87
- }
88
- }
89
-
90
- async function transcribeFile(e) {
91
- if (!modelRef.current) return alert('Load model first');
92
- const file = e.target.files?.[0];
93
- if (!file) return;
94
-
95
- setIsTranscribing(true);
96
- setStatus(`Transcribing "${file.name}"…`);
97
-
98
- try {
99
- const buf = await file.arrayBuffer();
100
- const audioCtx = new AudioContext({ sampleRate: 16000 });
101
- const decoded = await audioCtx.decodeAudioData(buf);
102
- const pcm = decoded.getChannelData(0);
103
-
104
- console.time(`Transcribe-${file.name}`);
105
- const res = await modelRef.current.transcribe(pcm, 16_000, {
106
- returnTimestamps: true,
107
- returnConfidences: true,
108
- frameStride
109
- });
110
- console.timeEnd(`Transcribe-${file.name}`);
111
-
112
- if (dumpDetail) {
113
- console.log('[Parakeet] Detailed transcription output', res);
114
- }
115
- setLatestMetrics(res.metrics);
116
- // Add to transcriptions list
117
- const newTranscription = {
118
- id: Date.now(),
119
- filename: file.name,
120
- text: res.utterance_text,
121
- timestamp: new Date().toLocaleTimeString(),
122
- duration: pcm.length / 16000, // duration in seconds
123
- wordCount: res.words?.length || 0,
124
- confidence: res.confidence_scores?.overall_log_prob || null,
125
- metrics: res.metrics
126
- };
127
-
128
- setTranscriptions(prev => [newTranscription, ...prev]);
129
- setText(res.utterance_text); // Show latest transcription
130
- setStatus('Model ready ✔'); // Ready for next file
131
-
132
- } catch (error) {
133
- console.error('Transcription failed:', error);
134
- setStatus('Transcription failed');
135
- alert(`Failed to transcribe "${file.name}": ${error.message}`);
136
- } finally {
137
- setIsTranscribing(false);
138
- // Clear the file input so the same file can be selected again
139
- if (fileInputRef.current) {
140
- fileInputRef.current.value = '';
141
- }
142
- }
143
- }
144
-
145
- function clearTranscriptions() {
146
- setTranscriptions([]);
147
- setText('');
148
- }
149
-
150
- return (
151
- <div className="app">
152
- <h2>🦜 Parakeet.js - HF Spaces Demo</h2>
153
- <p>NVIDIA Parakeet speech recognition for the browser using WebGPU/WASM</p>
154
-
155
- <div className="controls">
156
- <p>
157
- <strong>Model:</strong> {repoId}
158
- </p>
159
- </div>
160
-
161
- <div className="controls">
162
- <label>
163
- Backend:
164
- <select value={backend} onChange={e=>setBackend(e.target.value)}>
165
- <option value="webgpu-hybrid">WebGPU (Hybrid)</option>
166
- <option value="webgpu-strict">WebGPU (Strict)</option>
167
- <option value="wasm">WASM (CPU)</option>
168
- </select>
169
- </label>
170
- {' '}
171
- <label>
172
- Quant:
173
- <select value={quant} onChange={e=>setQuant(e.target.value)}>
174
- <option value="int8">int8 (faster)</option>
175
- <option value="fp32">fp32 (higher quality)</option>
176
- </select>
177
- </label>
178
- {' '}
179
- {backend.startsWith('webgpu') && (
180
- <label style={{ fontSize:'0.9em' }}>
181
- <input type="checkbox" checked={decoderInt8} onChange={e=>setDecoderInt8(e.target.checked)} />
182
- Decoder INT8 on CPU
183
- </label>
184
- )}
185
- {' '}
186
- <label>
187
- Preprocessor:
188
- <select value={preprocessor} onChange={e=>setPreprocessor(e.target.value)}>
189
- <option value="nemo80">nemo80 (smaller)</option>
190
- <option value="nemo128">nemo128 (default)</option>
191
- </select>
192
- </label>
193
- {' '}
194
- <label>
195
- Stride:
196
- <select value={frameStride} onChange={e=>setFrameStride(Number(e.target.value))}>
197
- <option value={1}>1</option>
198
- <option value={2}>2</option>
199
- <option value={4}>4</option>
200
- </select>
201
- </label>
202
- {' '}
203
- <label>
204
- <input type="checkbox" checked={verboseLog} onChange={e => setVerboseLog(e.target.checked)} />
205
- Verbose Log
206
- </label>
207
- {' '}
208
- <label style={{fontSize:'0.9em'}}>
209
- <input type="checkbox" checked={dumpDetail} onChange={e=>setDumpDetail(e.target.checked)} />
210
- Dump result to console
211
- </label>
212
- {(backend === 'wasm' || decoderInt8) && (
213
- <label style={{fontSize:'0.9em'}}>
214
- Threads:
215
- <input type="number" min="1" max={maxCores} value={cpuThreads} onChange={e=>setCpuThreads(Number(e.target.value))} style={{width:'4rem'}} />
216
- </label>
217
- )}
218
- <button
219
- onClick={loadModel}
220
- disabled={!status.toLowerCase().includes('fail') && status !== 'Idle'}
221
- className="primary"
222
- >
223
- {status === 'Model ready ' ? 'Model Loaded' : 'Load Model'}
224
- </button>
225
- </div>
226
-
227
- {typeof SharedArrayBuffer === 'undefined' && backend === 'wasm' && (
228
- <div style={{
229
- marginBottom: '1rem',
230
- padding: '0.5rem',
231
- backgroundColor: '#fff3cd',
232
- border: '1px solid #ffeaa7',
233
- borderRadius: '4px',
234
- fontSize: '0.9em'
235
- }}>
236
- ⚠️ <strong>Performance Note:</strong> SharedArrayBuffer is not available.
237
- WASM will run single-threaded. For better performance, use WebGPU.
238
- </div>
239
- )}
240
-
241
- <div className="controls">
242
- <input
243
- ref={fileInputRef}
244
- type="file"
245
- accept="audio/*"
246
- onChange={transcribeFile}
247
- disabled={status !== 'Model ready ✔' || isTranscribing}
248
- />
249
- {transcriptions.length > 0 && (
250
- <button
251
- onClick={clearTranscriptions}
252
- style={{ marginLeft: '1rem', padding: '0.25rem 0.5rem' }}
253
- >
254
- Clear History
255
- </button>
256
- )}
257
- </div>
258
-
259
- <p>Status: {status}</p>
260
- {progressPct!==null && (
261
- <div className="progress-wrapper">
262
- <div className="progress-bar"><div style={{ width: `${progressPct}%` }} /></div>
263
- <p className="progress-text">{progressText}</p>
264
- </div>
265
- )}
266
-
267
- {/* Latest transcription */}
268
- <div className="controls">
269
- <h3>Latest Transcription:</h3>
270
- <textarea
271
- value={text}
272
- readOnly
273
- className="textarea"
274
- placeholder="Transcribed text will appear here..."
275
- />
276
- </div>
277
-
278
- {/* Latest transcription performace info */}
279
- {latestMetrics && (
280
- <div className="performance">
281
- <strong>RTF:</strong> {latestMetrics.rtf?.toFixed(2)}x &nbsp;|&nbsp; Total: {latestMetrics.total_ms} ms<br/>
282
- Preprocess {latestMetrics.preprocess_ms} ms · Encode {latestMetrics.encode_ms} ms · Decode {latestMetrics.decode_ms} ms · Tokenize {latestMetrics.tokenize_ms} ms
283
- </div>
284
- )}
285
-
286
- {/* Transcription history */}
287
- {transcriptions.length > 0 && (
288
- <div className="history">
289
- <h3>Transcription History ({transcriptions.length} files):</h3>
290
- <div style={{ maxHeight: '400px', overflowY: 'auto', border: '1px solid #ddd', borderRadius: '4px' }}>
291
- {transcriptions.map((trans) => (
292
- <div className="history-item" key={trans.id}>
293
- <div className="history-meta"><strong>{trans.filename}</strong><span>{trans.timestamp}</span></div>
294
- <div className="history-stats">Duration: {trans.duration.toFixed(1)}s | Words: {trans.wordCount}{trans.confidence && ` | Confidence: ${trans.confidence.toFixed(2)}`}{trans.metrics && ` | RTF: ${trans.metrics.rtf?.toFixed(2)}x`}</div>
295
- <div className="history-text">{trans.text}</div>
296
- </div>
297
- ))}
298
- </div>
299
- </div>
300
- )}
301
-
302
- <div style={{ marginTop: '2rem', padding: '1rem', backgroundColor: '#f8f9fa', borderRadius: '4px', fontSize: '0.9em' }}>
303
- <h4>🔗 Links:</h4>
304
- <p>
305
- <a href="https://github.com/ysdede/parakeet.js" target="_blank" rel="noopener noreferrer">
306
- GitHub Repository
307
- </a>
308
- {' | '}
309
- <a href="https://www.npmjs.com/package/parakeet.js" target="_blank" rel="noopener noreferrer">
310
- npm Package
311
- </a>
312
- </p>
313
- </div>
314
- </div>
315
- );
316
- }
1
+ import React, { useState, useRef, useEffect } from 'react';
2
+ import { ParakeetModel, getParakeetModel } from 'parakeet.js';
3
+ import './App.css';
4
+
5
+ export default function App() {
6
+ const repoId = 'istupakov/parakeet-tdt-0.6b-v2-onnx';
7
+ const [backend, setBackend] = useState('webgpu-hybrid');
8
+ const [encoderQuant, setEncoderQuant] = useState('fp32');
9
+ const [decoderQuant, setDecoderQuant] = useState('int8');
10
+ const [preprocessor, setPreprocessor] = useState('nemo128');
11
+ const [status, setStatus] = useState('Idle');
12
+ const [progress, setProgress] = useState('');
13
+ const [progressText, setProgressText] = useState('');
14
+ const [progressPct, setProgressPct] = useState(null);
15
+ const [text, setText] = useState('');
16
+ const [latestMetrics, setLatestMetrics] = useState(null);
17
+ const [transcriptions, setTranscriptions] = useState([]);
18
+ const [isTranscribing, setIsTranscribing] = useState(false);
19
+ const [verboseLog, setVerboseLog] = useState(false);
20
+ const [frameStride, setFrameStride] = useState(1);
21
+ const [dumpDetail, setDumpDetail] = useState(false);
22
+ const maxCores = navigator.hardwareConcurrency || 8;
23
+ const [cpuThreads, setCpuThreads] = useState(Math.max(1, maxCores - 2));
24
+ const modelRef = useRef(null);
25
+ const fileInputRef = useRef(null);
26
+
27
+ // Auto-adjust quant presets when backend changes
28
+ useEffect(() => {
29
+ if (backend.startsWith('webgpu')) {
30
+ setEncoderQuant('fp32');
31
+ setDecoderQuant('int8');
32
+ } else {
33
+ setEncoderQuant('int8');
34
+ setDecoderQuant('int8');
35
+ }
36
+ }, [backend]);
37
+
38
+ async function loadModel() {
39
+ setStatus('Loading model…');
40
+ setProgress('');
41
+ setProgressText('');
42
+ setProgressPct(0);
43
+ console.time('LoadModel');
44
+
45
+ try {
46
+ const progressCallback = (p) => setProgress(`${p.file}: ${Math.round(p.loaded/p.total*100)}%`);
47
+
48
+ // 1. Download all model files from HuggingFace Hub
49
+ const modelUrls = await getParakeetModel(repoId, {
50
+ encoderQuant,
51
+ decoderQuant,
52
+ preprocessor,
53
+ progress: progressCallback
54
+ });
55
+
56
+ // Show compiling sessions stage
57
+ setStatus('Creating sessions…');
58
+ setProgressText('Compiling model (this may take ~10 s)…');
59
+ setProgressPct(null);
60
+
61
+ // 2. Create the model instance with all file URLs
62
+ modelRef.current = await ParakeetModel.fromUrls({
63
+ ...modelUrls.urls,
64
+ backend,
65
+ });
66
+
67
+ // 3. Warm-up and verify
68
+ setStatus('Warming up & verifying…');
69
+ setProgressText('Model ready! Upload an audio file to transcribe.');
70
+ setProgressPct(null);
71
+
72
+ console.timeEnd('LoadModel');
73
+ setStatus('Model ready ✔');
74
+ setProgressText('');
75
+ } catch (e) {
76
+ console.error(e);
77
+ setStatus(`Failed: ${e.message}`);
78
+ setProgress('');
79
+ }
80
+ }
81
+
82
+ async function transcribeFile(e) {
83
+ if (!modelRef.current) return alert('Load model first');
84
+ const file = e.target.files?.[0];
85
+ if (!file) return;
86
+
87
+ setIsTranscribing(true);
88
+ setStatus(`Transcribing "${file.name}"…`);
89
+
90
+ try {
91
+ const buf = await file.arrayBuffer();
92
+ const audioCtx = new AudioContext({ sampleRate: 16000 });
93
+ const decoded = await audioCtx.decodeAudioData(buf);
94
+ const pcm = decoded.getChannelData(0);
95
+
96
+ console.time(`Transcribe-${file.name}`);
97
+ const res = await modelRef.current.transcribe(pcm, 16_000, {
98
+ returnTimestamps: true,
99
+ returnConfidences: true,
100
+ frameStride
101
+ });
102
+ console.timeEnd(`Transcribe-${file.name}`);
103
+
104
+ if (dumpDetail) {
105
+ console.log('[Parakeet] Detailed transcription output', res);
106
+ }
107
+ setLatestMetrics(res.metrics);
108
+ // Add to transcriptions list
109
+ const newTranscription = {
110
+ id: Date.now(),
111
+ filename: file.name,
112
+ text: res.utterance_text,
113
+ timestamp: new Date().toLocaleTimeString(),
114
+ duration: pcm.length / 16000, // duration in seconds
115
+ wordCount: res.words?.length || 0,
116
+ confidence: res.confidence_scores?.token_avg ?? res.confidence_scores?.word_avg ?? null,
117
+ metrics: res.metrics
118
+ };
119
+
120
+ setTranscriptions(prev => [newTranscription, ...prev]);
121
+ setText(res.utterance_text); // Show latest transcription
122
+ setStatus('Model ready ✔'); // Ready for next file
123
+
124
+ } catch (error) {
125
+ console.error('Transcription failed:', error);
126
+ setStatus('Transcription failed');
127
+ alert(`Failed to transcribe "${file.name}": ${error.message}`);
128
+ } finally {
129
+ setIsTranscribing(false);
130
+ // Clear the file input so the same file can be selected again
131
+ if (fileInputRef.current) {
132
+ fileInputRef.current.value = '';
133
+ }
134
+ }
135
+ }
136
+
137
+ function clearTranscriptions() {
138
+ setTranscriptions([]);
139
+ setText('');
140
+ }
141
+
142
+ return (
143
+ <div className="app">
144
+ <h2>🦜 Parakeet.js - HF Spaces Demo</h2>
145
+ <p>NVIDIA Parakeet speech recognition for the browser using WebGPU/WASM</p>
146
+
147
+ <div className="controls">
148
+ <p>
149
+ <strong>Model:</strong> {repoId}
150
+ </p>
151
+ </div>
152
+
153
+ <div className="controls">
154
+ <label>
155
+ Backend:
156
+ <select value={backend} onChange={e=>setBackend(e.target.value)}>
157
+ <option value="webgpu-hybrid">WebGPU</option>
158
+ <option value="wasm">WASM (CPU)</option>
159
+ </select>
160
+ </label>
161
+ {' '}
162
+ <label>
163
+ Encoder Quant:
164
+ <select value={encoderQuant} onChange={e=>setEncoderQuant(e.target.value)}>
165
+ <option value="int8">int8 (faster)</option>
166
+ <option value="fp32">fp32 (higher quality)</option>
167
+ </select>
168
+ </label>
169
+ {' '}
170
+ <label>
171
+ Decoder Quant:
172
+ <select value={decoderQuant} onChange={e=>setDecoderQuant(e.target.value)}>
173
+ <option value="int8">int8 (faster)</option>
174
+ <option value="fp32">fp32 (higher quality)</option>
175
+ </select>
176
+ </label>
177
+ {' '}
178
+ <label>
179
+ Preprocessor:
180
+ <select value={preprocessor} onChange={e=>setPreprocessor(e.target.value)}>
181
+ <option value="nemo128">nemo128 (default)</option>
182
+ </select>
183
+ </label>
184
+ {' '}
185
+ <label>
186
+ Stride:
187
+ <select value={frameStride} onChange={e=>setFrameStride(Number(e.target.value))}>
188
+ <option value={1}>1</option>
189
+ <option value={2}>2</option>
190
+ <option value={4}>4</option>
191
+ </select>
192
+ </label>
193
+ {' '}
194
+ <label>
195
+ <input type="checkbox" checked={verboseLog} onChange={e => setVerboseLog(e.target.checked)} />
196
+ Verbose Log
197
+ </label>
198
+ {' '}
199
+ <label style={{fontSize:'0.9em'}}>
200
+ <input type="checkbox" checked={dumpDetail} onChange={e=>setDumpDetail(e.target.checked)} />
201
+ Dump result to console
202
+ </label>
203
+ {(backend === 'wasm') && (
204
+ <label style={{fontSize:'0.9em'}}>
205
+ Threads:
206
+ <input type="number" min="1" max={maxCores} value={cpuThreads} onChange={e=>setCpuThreads(Number(e.target.value))} style={{width:'4rem'}} />
207
+ </label>
208
+ )}
209
+ <button
210
+ onClick={loadModel}
211
+ disabled={!status.toLowerCase().includes('fail') && status !== 'Idle'}
212
+ className="primary"
213
+ >
214
+ {status === 'Model ready ✔' ? 'Model Loaded' : 'Load Model'}
215
+ </button>
216
+ </div>
217
+
218
+ {typeof SharedArrayBuffer === 'undefined' && backend === 'wasm' && (
219
+ <div style={{
220
+ marginBottom: '1rem',
221
+ padding: '0.5rem',
222
+ backgroundColor: '#fff3cd',
223
+ border: '1px solid #ffeaa7',
224
+ borderRadius: '4px',
225
+ fontSize: '0.9em'
226
+ }}>
227
+ ⚠️ <strong>Performance Note:</strong> SharedArrayBuffer is not available.
228
+ WASM will run single-threaded. For better performance, use WebGPU.
229
+ </div>
230
+ )}
231
+
232
+ <div className="controls">
233
+ <input
234
+ ref={fileInputRef}
235
+ type="file"
236
+ accept="audio/*"
237
+ onChange={transcribeFile}
238
+ disabled={status !== 'Model ready ✔' || isTranscribing}
239
+ />
240
+ {transcriptions.length > 0 && (
241
+ <button
242
+ onClick={clearTranscriptions}
243
+ style={{ marginLeft: '1rem', padding: '0.25rem 0.5rem' }}
244
+ >
245
+ Clear History
246
+ </button>
247
+ )}
248
+ </div>
249
+
250
+ <p>Status: {status}</p>
251
+ {progressPct!==null && (
252
+ <div className="progress-wrapper">
253
+ <div className="progress-bar"><div style={{ width: `${progressPct}%` }} /></div>
254
+ <p className="progress-text">{progressText}</p>
255
+ </div>
256
+ )}
257
+
258
+ {/* Latest transcription */}
259
+ <div className="controls">
260
+ <h3>Latest Transcription:</h3>
261
+ <textarea
262
+ value={text}
263
+ readOnly
264
+ className="textarea"
265
+ placeholder="Transcribed text will appear here..."
266
+ />
267
+ </div>
268
+
269
+ {/* Latest transcription performace info */}
270
+ {latestMetrics && (
271
+ <div className="performance">
272
+ <strong>RTF:</strong> {latestMetrics.rtf?.toFixed(2)}x &nbsp;|&nbsp; Total: {latestMetrics.total_ms} ms<br/>
273
+ Preprocess {latestMetrics.preprocess_ms} ms · Encode {latestMetrics.encode_ms} ms · Decode {latestMetrics.decode_ms} ms · Tokenize {latestMetrics.tokenize_ms} ms
274
+ </div>
275
+ )}
276
+
277
+ {/* Transcription history */}
278
+ {transcriptions.length > 0 && (
279
+ <div className="history">
280
+ <h3>Transcription History ({transcriptions.length} files):</h3>
281
+ <div style={{ maxHeight: '400px', overflowY: 'auto', border: '1px solid #ddd', borderRadius: '4px' }}>
282
+ {transcriptions.map((trans) => (
283
+ <div className="history-item" key={trans.id}>
284
+ <div className="history-meta"><strong>{trans.filename}</strong><span>{trans.timestamp}</span></div>
285
+ <div className="history-stats">Duration: {trans.duration.toFixed(1)}s | Words: {trans.wordCount}{trans.confidence && ` | Confidence: ${trans.confidence.toFixed(2)}`}{trans.metrics && ` | RTF: ${trans.metrics.rtf?.toFixed(2)}x`}</div>
286
+ <div className="history-text">{trans.text}</div>
287
+ </div>
288
+ ))}
289
+ </div>
290
+ </div>
291
+ )}
292
+
293
+ <div style={{ marginTop: '2rem', padding: '1rem', backgroundColor: '#f8f9fa', borderRadius: '4px', fontSize: '0.9em' }}>
294
+ <h4>🔗 Links:</h4>
295
+ <p>
296
+ <a href="https://github.com/ysdede/parakeet.js" target="_blank" rel="noopener noreferrer">
297
+ GitHub Repository
298
+ </a>
299
+ {' | '}
300
+ <a href="https://www.npmjs.com/package/parakeet.js" target="_blank" rel="noopener noreferrer">
301
+ npm Package
302
+ </a>
303
+ </p>
304
+ </div>
305
+ </div>
306
+ );
307
+ }