parakeet.js 0.0.2 → 0.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.gitmodules +3 -0
- package/README.md +240 -239
- package/examples/hf-spaces-demo/README.md +6 -9
- package/examples/hf-spaces-demo/package.json +1 -1
- package/examples/hf-spaces-demo/src/App.js +307 -316
- package/examples/react-demo/package.json +19 -19
- package/examples/react-demo/src/App.jsx +324 -326
- package/examples/react-demo-dev/src/App.jsx +23 -24
- package/package.json +1 -1
- package/publish.ps1 +65 -0
- package/src/hub.js +235 -241
- package/src/parakeet.js +15 -8
- package/src/preprocessor.js +75 -68
- package/docs/parakeet-transformers-js/.gitattributes +0 -2
- package/docs/parakeet-transformers-js/.prettierignore +0 -8
- package/docs/parakeet-transformers-js/.prettierrc +0 -10
- package/docs/parakeet-transformers-js/.tmp_features.json +0 -1
- package/docs/parakeet-transformers-js/LICENSE +0 -202
- package/docs/parakeet-transformers-js/README.md +0 -448
- package/docs/parakeet-transformers-js/assets/nemo128.onnx +0 -0
- package/docs/parakeet-transformers-js/assets/nemo80.onnx +0 -0
- package/docs/parakeet-transformers-js/debug_test.js +0 -84
- package/docs/parakeet-transformers-js/dev/inspect_decoder.cjs +0 -9
- package/docs/parakeet-transformers-js/dev/inspect_joiner.cjs +0 -9
- package/docs/parakeet-transformers-js/dev/js_step_by_step.js +0 -249
- package/docs/parakeet-transformers-js/dev/parakeet_cli.js +0 -91
- package/docs/parakeet-transformers-js/jest.config.mjs +0 -194
- package/docs/parakeet-transformers-js/js_preprocessing.json +0 -225
- package/docs/parakeet-transformers-js/js_step_by_step.json +0 -837
- package/docs/parakeet-transformers-js/js_step_by_step_v2.json +0 -450
- package/docs/parakeet-transformers-js/js_step_by_step_v3.json +0 -450
- package/docs/parakeet-transformers-js/js_steps.json +0 -821
- package/docs/parakeet-transformers-js/package-lock.json +0 -12251
- package/docs/parakeet-transformers-js/package.json +0 -96
- package/docs/parakeet-transformers-js/src/audio_features.js +0 -178
- package/docs/parakeet-transformers-js/src/backends/onnx.js +0 -210
- package/docs/parakeet-transformers-js/src/base/feature_extraction_utils.js +0 -54
- package/docs/parakeet-transformers-js/src/base/image_processors_utils.js +0 -1105
- package/docs/parakeet-transformers-js/src/base/processing_utils.js +0 -173
- package/docs/parakeet-transformers-js/src/configs.js +0 -455
- package/docs/parakeet-transformers-js/src/env.js +0 -167
- package/docs/parakeet-transformers-js/src/generation/configuration_utils.js +0 -388
- package/docs/parakeet-transformers-js/src/generation/logits_process.js +0 -727
- package/docs/parakeet-transformers-js/src/generation/logits_sampler.js +0 -204
- package/docs/parakeet-transformers-js/src/generation/parameters.js +0 -35
- package/docs/parakeet-transformers-js/src/generation/stopping_criteria.js +0 -156
- package/docs/parakeet-transformers-js/src/generation/streamers.js +0 -225
- package/docs/parakeet-transformers-js/src/models/audio_spectrogram_transformer/feature_extraction_audio_spectrogram_transformer.js +0 -85
- package/docs/parakeet-transformers-js/src/models/auto/feature_extraction_auto.js +0 -25
- package/docs/parakeet-transformers-js/src/models/auto/image_processing_auto.js +0 -29
- package/docs/parakeet-transformers-js/src/models/auto/processing_auto.js +0 -85
- package/docs/parakeet-transformers-js/src/models/beit/image_processing_beit.js +0 -5
- package/docs/parakeet-transformers-js/src/models/bit/image_processing_bit.js +0 -5
- package/docs/parakeet-transformers-js/src/models/chinese_clip/image_processing_chinese_clip.js +0 -5
- package/docs/parakeet-transformers-js/src/models/clap/feature_extraction_clap.js +0 -159
- package/docs/parakeet-transformers-js/src/models/clip/image_processing_clip.js +0 -6
- package/docs/parakeet-transformers-js/src/models/convnext/image_processing_convnext.js +0 -46
- package/docs/parakeet-transformers-js/src/models/dac/feature_extraction_dac.js +0 -3
- package/docs/parakeet-transformers-js/src/models/deit/image_processing_deit.js +0 -6
- package/docs/parakeet-transformers-js/src/models/detr/image_processing_detr.js +0 -52
- package/docs/parakeet-transformers-js/src/models/donut/image_processing_donut.js +0 -31
- package/docs/parakeet-transformers-js/src/models/dpt/image_processing_dpt.js +0 -6
- package/docs/parakeet-transformers-js/src/models/efficientnet/image_processing_efficientnet.js +0 -14
- package/docs/parakeet-transformers-js/src/models/encodec/feature_extraction_encodec.js +0 -32
- package/docs/parakeet-transformers-js/src/models/feature_extractors.js +0 -17
- package/docs/parakeet-transformers-js/src/models/florence2/processing_florence2.js +0 -131
- package/docs/parakeet-transformers-js/src/models/gemma3n/feature_extraction_gemma3n.js +0 -97
- package/docs/parakeet-transformers-js/src/models/gemma3n/processing_gemma3n.js +0 -74
- package/docs/parakeet-transformers-js/src/models/glpn/image_processing_glpn.js +0 -5
- package/docs/parakeet-transformers-js/src/models/grounding_dino/image_processing_grounding_dino.js +0 -29
- package/docs/parakeet-transformers-js/src/models/grounding_dino/processing_grounding_dino.js +0 -101
- package/docs/parakeet-transformers-js/src/models/idefics3/image_processing_idefics3.js +0 -232
- package/docs/parakeet-transformers-js/src/models/idefics3/processing_idefics3.js +0 -136
- package/docs/parakeet-transformers-js/src/models/image_processors.js +0 -40
- package/docs/parakeet-transformers-js/src/models/janus/image_processing_janus.js +0 -27
- package/docs/parakeet-transformers-js/src/models/janus/processing_janus.js +0 -123
- package/docs/parakeet-transformers-js/src/models/jina_clip/image_processing_jina_clip.js +0 -26
- package/docs/parakeet-transformers-js/src/models/jina_clip/processing_jina_clip.js +0 -24
- package/docs/parakeet-transformers-js/src/models/llava/processing_llava.js +0 -44
- package/docs/parakeet-transformers-js/src/models/llava_onevision/image_processing_llava_onevision.js +0 -5
- package/docs/parakeet-transformers-js/src/models/mask2former/image_processing_mask2former.js +0 -5
- package/docs/parakeet-transformers-js/src/models/maskformer/image_processing_maskformer.js +0 -18
- package/docs/parakeet-transformers-js/src/models/mgp_str/processing_mgp_str.js +0 -172
- package/docs/parakeet-transformers-js/src/models/mobilenet_v1/image_processing_mobilenet_v1.js +0 -7
- package/docs/parakeet-transformers-js/src/models/mobilenet_v2/image_processing_mobilenet_v2.js +0 -7
- package/docs/parakeet-transformers-js/src/models/mobilenet_v3/image_processing_mobilenet_v3.js +0 -7
- package/docs/parakeet-transformers-js/src/models/mobilenet_v4/image_processing_mobilenet_v4.js +0 -7
- package/docs/parakeet-transformers-js/src/models/mobilevit/image_processing_mobilevit.js +0 -6
- package/docs/parakeet-transformers-js/src/models/moonshine/feature_extraction_moonshine.js +0 -26
- package/docs/parakeet-transformers-js/src/models/moonshine/processing_moonshine.js +0 -20
- package/docs/parakeet-transformers-js/src/models/nougat/image_processing_nougat.js +0 -5
- package/docs/parakeet-transformers-js/src/models/owlv2/image_processing_owlv2.js +0 -5
- package/docs/parakeet-transformers-js/src/models/owlvit/image_processing_owlvit.js +0 -12
- package/docs/parakeet-transformers-js/src/models/owlvit/processing_owlvit.js +0 -7
- package/docs/parakeet-transformers-js/src/models/paligemma/processing_paligemma.js +0 -83
- package/docs/parakeet-transformers-js/src/models/parakeet/feature_extraction_parakeet.js +0 -3
- package/docs/parakeet-transformers-js/src/models/parakeet/modeling_parakeet.js +0 -3
- package/docs/parakeet-transformers-js/src/models/parakeet/processing_parakeet.js +0 -3
- package/docs/parakeet-transformers-js/src/models/parakeet/tokenization_parakeet.js +0 -3
- package/docs/parakeet-transformers-js/src/models/phi3_v/image_processing_phi3_v.js +0 -163
- package/docs/parakeet-transformers-js/src/models/phi3_v/processing_phi3_v.js +0 -53
- package/docs/parakeet-transformers-js/src/models/processors.js +0 -22
- package/docs/parakeet-transformers-js/src/models/pvt/image_processing_pvt.js +0 -5
- package/docs/parakeet-transformers-js/src/models/pyannote/feature_extraction_pyannote.js +0 -85
- package/docs/parakeet-transformers-js/src/models/pyannote/processing_pyannote.js +0 -24
- package/docs/parakeet-transformers-js/src/models/qwen2_vl/image_processing_qwen2_vl.js +0 -52
- package/docs/parakeet-transformers-js/src/models/qwen2_vl/processing_qwen2_vl.js +0 -53
- package/docs/parakeet-transformers-js/src/models/rt_detr/image_processing_rt_detr.js +0 -12
- package/docs/parakeet-transformers-js/src/models/sam/image_processing_sam.js +0 -242
- package/docs/parakeet-transformers-js/src/models/sam/processing_sam.js +0 -20
- package/docs/parakeet-transformers-js/src/models/sapiens/image_processing_sapiens.js +0 -13
- package/docs/parakeet-transformers-js/src/models/seamless_m4t/feature_extraction_seamless_m4t.js +0 -175
- package/docs/parakeet-transformers-js/src/models/segformer/image_processing_segformer.js +0 -13
- package/docs/parakeet-transformers-js/src/models/siglip/image_processing_siglip.js +0 -5
- package/docs/parakeet-transformers-js/src/models/smolvlm/image_processing_smolvlm.js +0 -2
- package/docs/parakeet-transformers-js/src/models/smolvlm/processing_smolvlm.js +0 -2
- package/docs/parakeet-transformers-js/src/models/snac/feature_extraction_snac.js +0 -3
- package/docs/parakeet-transformers-js/src/models/speecht5/feature_extraction_speecht5.js +0 -4
- package/docs/parakeet-transformers-js/src/models/speecht5/processing_speecht5.js +0 -17
- package/docs/parakeet-transformers-js/src/models/swin2sr/image_processing_swin2sr.js +0 -24
- package/docs/parakeet-transformers-js/src/models/ultravox/processing_ultravox.js +0 -54
- package/docs/parakeet-transformers-js/src/models/vit/image_processing_vit.js +0 -7
- package/docs/parakeet-transformers-js/src/models/vitmatte/image_processing_vitmatte.js +0 -50
- package/docs/parakeet-transformers-js/src/models/vitpose/image_processing_vitpose.js +0 -89
- package/docs/parakeet-transformers-js/src/models/wav2vec2/feature_extraction_wav2vec2.js +0 -44
- package/docs/parakeet-transformers-js/src/models/wav2vec2/processing_wav2vec2.js +0 -17
- package/docs/parakeet-transformers-js/src/models/wav2vec2_with_lm/processing_wav2vec2_with_lm.js +0 -17
- package/docs/parakeet-transformers-js/src/models/wespeaker/feature_extraction_wespeaker.js +0 -95
- package/docs/parakeet-transformers-js/src/models/whisper/common_whisper.js +0 -157
- package/docs/parakeet-transformers-js/src/models/whisper/feature_extraction_whisper.js +0 -92
- package/docs/parakeet-transformers-js/src/models/whisper/generation_whisper.js +0 -89
- package/docs/parakeet-transformers-js/src/models/whisper/processing_whisper.js +0 -21
- package/docs/parakeet-transformers-js/src/models/yolos/image_processing_yolos.js +0 -12
- package/docs/parakeet-transformers-js/src/models.js +0 -8644
- package/docs/parakeet-transformers-js/src/ops/registry.js +0 -133
- package/docs/parakeet-transformers-js/src/ort_env.js +0 -8
- package/docs/parakeet-transformers-js/src/parakeet.js +0 -792
- package/docs/parakeet-transformers-js/src/pipelines.js +0 -3540
- package/docs/parakeet-transformers-js/src/processors.js +0 -16
- package/docs/parakeet-transformers-js/src/tokenizers.js +0 -4432
- package/docs/parakeet-transformers-js/src/transformers.js +0 -50
- package/docs/parakeet-transformers-js/src/utils/audio.js +0 -893
- package/docs/parakeet-transformers-js/src/utils/constants.js +0 -9
- package/docs/parakeet-transformers-js/src/utils/core.js +0 -259
- package/docs/parakeet-transformers-js/src/utils/data-structures.js +0 -574
- package/docs/parakeet-transformers-js/src/utils/devices.js +0 -22
- package/docs/parakeet-transformers-js/src/utils/dtypes.js +0 -63
- package/docs/parakeet-transformers-js/src/utils/generic.js +0 -35
- package/docs/parakeet-transformers-js/src/utils/hub.js +0 -780
- package/docs/parakeet-transformers-js/src/utils/image.js +0 -834
- package/docs/parakeet-transformers-js/src/utils/maths.js +0 -1061
- package/docs/parakeet-transformers-js/src/utils/tensor.js +0 -1539
- package/docs/parakeet-transformers-js/src/utils/video.js +0 -128
- package/docs/parakeet-transformers-js/test/decoder.test.js +0 -114
- package/docs/parakeet-transformers-js/test/encoder.test.js +0 -108
- package/docs/parakeet-transformers-js/test/preprocessor.test.js +0 -85
- package/docs/parakeet-transformers-js/test/tokenizer.test.js +0 -24
- package/docs/parakeet-transformers-js/test/transcribe.js +0 -89
- package/docs/parakeet-transformers-js/tsconfig.json +0 -21
- package/docs/parakeet-transformers-js/webpack.config.js +0 -223
package/src/preprocessor.js
CHANGED
|
@@ -1,69 +1,76 @@
|
|
|
1
|
-
import { initOrt } from './backend.js';
|
|
2
|
-
|
|
3
|
-
// Runs the Nemo-style preprocessor ONNX model (80- or 128-bin log-mel spectrogram).
|
|
4
|
-
export class OnnxPreprocessor {
|
|
5
|
-
/**
|
|
6
|
-
* @param {string} modelUrl URL to the preprocessor onnx file (e.g. nemo128.onnx)
|
|
7
|
-
* @param {Object} [opts]
|
|
8
|
-
* @param {('webgpu'|'wasm')} [opts.backend]
|
|
9
|
-
*/
|
|
10
|
-
constructor(modelUrl, opts = {}) {
|
|
11
|
-
this.modelUrl = modelUrl;
|
|
12
|
-
this.opts = opts;
|
|
13
|
-
if (this.opts.enableGraphCapture === undefined) {
|
|
14
|
-
this.opts.enableGraphCapture = this.opts.backend === 'wasm';
|
|
15
|
-
}
|
|
16
|
-
this.session = null;
|
|
17
|
-
this.ort = null;
|
|
18
|
-
}
|
|
19
|
-
|
|
20
|
-
async _ensureSession() {
|
|
21
|
-
if (!this.session) {
|
|
22
|
-
this.ort = await initOrt(this.opts);
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
}
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
const
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
const
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
1
|
+
import { initOrt } from './backend.js';
|
|
2
|
+
|
|
3
|
+
// Runs the Nemo-style preprocessor ONNX model (80- or 128-bin log-mel spectrogram).
|
|
4
|
+
export class OnnxPreprocessor {
|
|
5
|
+
/**
|
|
6
|
+
* @param {string} modelUrl URL to the preprocessor onnx file (e.g. nemo128.onnx)
|
|
7
|
+
* @param {Object} [opts]
|
|
8
|
+
* @param {('webgpu'|'wasm')} [opts.backend]
|
|
9
|
+
*/
|
|
10
|
+
constructor(modelUrl, opts = {}) {
|
|
11
|
+
this.modelUrl = modelUrl;
|
|
12
|
+
this.opts = opts;
|
|
13
|
+
if (this.opts.enableGraphCapture === undefined) {
|
|
14
|
+
this.opts.enableGraphCapture = this.opts.backend === 'wasm';
|
|
15
|
+
}
|
|
16
|
+
this.session = null;
|
|
17
|
+
this.ort = null;
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
async _ensureSession() {
|
|
21
|
+
if (!this.session) {
|
|
22
|
+
this.ort = await initOrt(this.opts);
|
|
23
|
+
// Build session options. Workaround for ORT-web bug where
|
|
24
|
+
// passing `enableGraphCapture:false` still triggers the
|
|
25
|
+
// graph-capture execution path (which then requires external
|
|
26
|
+
// buffers). We therefore only include the flag when it is
|
|
27
|
+
// explicitly **true**.
|
|
28
|
+
const sessOpts = this.opts.enableGraphCapture ? {
|
|
29
|
+
enableProfiling: this.opts.enableProfiling || false,
|
|
30
|
+
enableGraphCapture: true
|
|
31
|
+
} : {
|
|
32
|
+
enableProfiling: this.opts.enableProfiling || false
|
|
33
|
+
};
|
|
34
|
+
const create = async () => {
|
|
35
|
+
try {
|
|
36
|
+
return await this.ort.InferenceSession.create(this.modelUrl, sessOpts);
|
|
37
|
+
} catch (e) {
|
|
38
|
+
const msg = (e.message || '') + '';
|
|
39
|
+
if (sessOpts.enableGraphCapture && msg.includes('graph capture')) {
|
|
40
|
+
console.warn('[Preprocessor] Graph capture unsupported, retrying without it');
|
|
41
|
+
return await this.ort.InferenceSession.create(this.modelUrl, { ...sessOpts, enableGraphCapture: false });
|
|
42
|
+
}
|
|
43
|
+
throw e;
|
|
44
|
+
}
|
|
45
|
+
};
|
|
46
|
+
this.session = await create();
|
|
47
|
+
}
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
/**
|
|
51
|
+
* Convert PCM audio Float32Array into log-mel features recognised by Parakeet.
|
|
52
|
+
* @param {Float32Array} audio Normalised mono PCM [-1,1] at 16 kHz.
|
|
53
|
+
* @returns {Promise<{features:Float32Array,length:number}>}
|
|
54
|
+
*/
|
|
55
|
+
async process(audio) {
|
|
56
|
+
await this._ensureSession();
|
|
57
|
+
|
|
58
|
+
// The model expects [B, N] float32 waveforms and lengths.
|
|
59
|
+
const buffer = new Float32Array(audio); // copy to ensure contiguous
|
|
60
|
+
const waveforms = new this.ort.Tensor('float32', buffer, [1, buffer.length]);
|
|
61
|
+
|
|
62
|
+
const lenArr = new BigInt64Array([BigInt(buffer.length)]);
|
|
63
|
+
const waveforms_lens = new this.ort.Tensor('int64', lenArr, [1]);
|
|
64
|
+
|
|
65
|
+
const feeds = { waveforms, waveforms_lens };
|
|
66
|
+
const outs = await this.session.run(feeds);
|
|
67
|
+
|
|
68
|
+
const featuresTensor = outs['features'];
|
|
69
|
+
const features_lens = outs['features_lens'];
|
|
70
|
+
|
|
71
|
+
return {
|
|
72
|
+
features: featuresTensor.data,
|
|
73
|
+
length: Number(features_lens.data[0])
|
|
74
|
+
};
|
|
75
|
+
}
|
|
69
76
|
}
|