parakeet.js 0.0.2 → 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (160) hide show
  1. package/.gitmodules +3 -0
  2. package/README.md +240 -239
  3. package/examples/hf-spaces-demo/README.md +6 -9
  4. package/examples/hf-spaces-demo/package.json +1 -1
  5. package/examples/hf-spaces-demo/src/App.js +307 -316
  6. package/examples/react-demo/package.json +19 -19
  7. package/examples/react-demo/src/App.jsx +324 -326
  8. package/examples/react-demo-dev/src/App.jsx +23 -24
  9. package/package.json +1 -1
  10. package/publish.ps1 +65 -0
  11. package/src/hub.js +235 -241
  12. package/src/parakeet.js +15 -8
  13. package/src/preprocessor.js +75 -68
  14. package/docs/parakeet-transformers-js/.gitattributes +0 -2
  15. package/docs/parakeet-transformers-js/.prettierignore +0 -8
  16. package/docs/parakeet-transformers-js/.prettierrc +0 -10
  17. package/docs/parakeet-transformers-js/.tmp_features.json +0 -1
  18. package/docs/parakeet-transformers-js/LICENSE +0 -202
  19. package/docs/parakeet-transformers-js/README.md +0 -448
  20. package/docs/parakeet-transformers-js/assets/nemo128.onnx +0 -0
  21. package/docs/parakeet-transformers-js/assets/nemo80.onnx +0 -0
  22. package/docs/parakeet-transformers-js/debug_test.js +0 -84
  23. package/docs/parakeet-transformers-js/dev/inspect_decoder.cjs +0 -9
  24. package/docs/parakeet-transformers-js/dev/inspect_joiner.cjs +0 -9
  25. package/docs/parakeet-transformers-js/dev/js_step_by_step.js +0 -249
  26. package/docs/parakeet-transformers-js/dev/parakeet_cli.js +0 -91
  27. package/docs/parakeet-transformers-js/jest.config.mjs +0 -194
  28. package/docs/parakeet-transformers-js/js_preprocessing.json +0 -225
  29. package/docs/parakeet-transformers-js/js_step_by_step.json +0 -837
  30. package/docs/parakeet-transformers-js/js_step_by_step_v2.json +0 -450
  31. package/docs/parakeet-transformers-js/js_step_by_step_v3.json +0 -450
  32. package/docs/parakeet-transformers-js/js_steps.json +0 -821
  33. package/docs/parakeet-transformers-js/package-lock.json +0 -12251
  34. package/docs/parakeet-transformers-js/package.json +0 -96
  35. package/docs/parakeet-transformers-js/src/audio_features.js +0 -178
  36. package/docs/parakeet-transformers-js/src/backends/onnx.js +0 -210
  37. package/docs/parakeet-transformers-js/src/base/feature_extraction_utils.js +0 -54
  38. package/docs/parakeet-transformers-js/src/base/image_processors_utils.js +0 -1105
  39. package/docs/parakeet-transformers-js/src/base/processing_utils.js +0 -173
  40. package/docs/parakeet-transformers-js/src/configs.js +0 -455
  41. package/docs/parakeet-transformers-js/src/env.js +0 -167
  42. package/docs/parakeet-transformers-js/src/generation/configuration_utils.js +0 -388
  43. package/docs/parakeet-transformers-js/src/generation/logits_process.js +0 -727
  44. package/docs/parakeet-transformers-js/src/generation/logits_sampler.js +0 -204
  45. package/docs/parakeet-transformers-js/src/generation/parameters.js +0 -35
  46. package/docs/parakeet-transformers-js/src/generation/stopping_criteria.js +0 -156
  47. package/docs/parakeet-transformers-js/src/generation/streamers.js +0 -225
  48. package/docs/parakeet-transformers-js/src/models/audio_spectrogram_transformer/feature_extraction_audio_spectrogram_transformer.js +0 -85
  49. package/docs/parakeet-transformers-js/src/models/auto/feature_extraction_auto.js +0 -25
  50. package/docs/parakeet-transformers-js/src/models/auto/image_processing_auto.js +0 -29
  51. package/docs/parakeet-transformers-js/src/models/auto/processing_auto.js +0 -85
  52. package/docs/parakeet-transformers-js/src/models/beit/image_processing_beit.js +0 -5
  53. package/docs/parakeet-transformers-js/src/models/bit/image_processing_bit.js +0 -5
  54. package/docs/parakeet-transformers-js/src/models/chinese_clip/image_processing_chinese_clip.js +0 -5
  55. package/docs/parakeet-transformers-js/src/models/clap/feature_extraction_clap.js +0 -159
  56. package/docs/parakeet-transformers-js/src/models/clip/image_processing_clip.js +0 -6
  57. package/docs/parakeet-transformers-js/src/models/convnext/image_processing_convnext.js +0 -46
  58. package/docs/parakeet-transformers-js/src/models/dac/feature_extraction_dac.js +0 -3
  59. package/docs/parakeet-transformers-js/src/models/deit/image_processing_deit.js +0 -6
  60. package/docs/parakeet-transformers-js/src/models/detr/image_processing_detr.js +0 -52
  61. package/docs/parakeet-transformers-js/src/models/donut/image_processing_donut.js +0 -31
  62. package/docs/parakeet-transformers-js/src/models/dpt/image_processing_dpt.js +0 -6
  63. package/docs/parakeet-transformers-js/src/models/efficientnet/image_processing_efficientnet.js +0 -14
  64. package/docs/parakeet-transformers-js/src/models/encodec/feature_extraction_encodec.js +0 -32
  65. package/docs/parakeet-transformers-js/src/models/feature_extractors.js +0 -17
  66. package/docs/parakeet-transformers-js/src/models/florence2/processing_florence2.js +0 -131
  67. package/docs/parakeet-transformers-js/src/models/gemma3n/feature_extraction_gemma3n.js +0 -97
  68. package/docs/parakeet-transformers-js/src/models/gemma3n/processing_gemma3n.js +0 -74
  69. package/docs/parakeet-transformers-js/src/models/glpn/image_processing_glpn.js +0 -5
  70. package/docs/parakeet-transformers-js/src/models/grounding_dino/image_processing_grounding_dino.js +0 -29
  71. package/docs/parakeet-transformers-js/src/models/grounding_dino/processing_grounding_dino.js +0 -101
  72. package/docs/parakeet-transformers-js/src/models/idefics3/image_processing_idefics3.js +0 -232
  73. package/docs/parakeet-transformers-js/src/models/idefics3/processing_idefics3.js +0 -136
  74. package/docs/parakeet-transformers-js/src/models/image_processors.js +0 -40
  75. package/docs/parakeet-transformers-js/src/models/janus/image_processing_janus.js +0 -27
  76. package/docs/parakeet-transformers-js/src/models/janus/processing_janus.js +0 -123
  77. package/docs/parakeet-transformers-js/src/models/jina_clip/image_processing_jina_clip.js +0 -26
  78. package/docs/parakeet-transformers-js/src/models/jina_clip/processing_jina_clip.js +0 -24
  79. package/docs/parakeet-transformers-js/src/models/llava/processing_llava.js +0 -44
  80. package/docs/parakeet-transformers-js/src/models/llava_onevision/image_processing_llava_onevision.js +0 -5
  81. package/docs/parakeet-transformers-js/src/models/mask2former/image_processing_mask2former.js +0 -5
  82. package/docs/parakeet-transformers-js/src/models/maskformer/image_processing_maskformer.js +0 -18
  83. package/docs/parakeet-transformers-js/src/models/mgp_str/processing_mgp_str.js +0 -172
  84. package/docs/parakeet-transformers-js/src/models/mobilenet_v1/image_processing_mobilenet_v1.js +0 -7
  85. package/docs/parakeet-transformers-js/src/models/mobilenet_v2/image_processing_mobilenet_v2.js +0 -7
  86. package/docs/parakeet-transformers-js/src/models/mobilenet_v3/image_processing_mobilenet_v3.js +0 -7
  87. package/docs/parakeet-transformers-js/src/models/mobilenet_v4/image_processing_mobilenet_v4.js +0 -7
  88. package/docs/parakeet-transformers-js/src/models/mobilevit/image_processing_mobilevit.js +0 -6
  89. package/docs/parakeet-transformers-js/src/models/moonshine/feature_extraction_moonshine.js +0 -26
  90. package/docs/parakeet-transformers-js/src/models/moonshine/processing_moonshine.js +0 -20
  91. package/docs/parakeet-transformers-js/src/models/nougat/image_processing_nougat.js +0 -5
  92. package/docs/parakeet-transformers-js/src/models/owlv2/image_processing_owlv2.js +0 -5
  93. package/docs/parakeet-transformers-js/src/models/owlvit/image_processing_owlvit.js +0 -12
  94. package/docs/parakeet-transformers-js/src/models/owlvit/processing_owlvit.js +0 -7
  95. package/docs/parakeet-transformers-js/src/models/paligemma/processing_paligemma.js +0 -83
  96. package/docs/parakeet-transformers-js/src/models/parakeet/feature_extraction_parakeet.js +0 -3
  97. package/docs/parakeet-transformers-js/src/models/parakeet/modeling_parakeet.js +0 -3
  98. package/docs/parakeet-transformers-js/src/models/parakeet/processing_parakeet.js +0 -3
  99. package/docs/parakeet-transformers-js/src/models/parakeet/tokenization_parakeet.js +0 -3
  100. package/docs/parakeet-transformers-js/src/models/phi3_v/image_processing_phi3_v.js +0 -163
  101. package/docs/parakeet-transformers-js/src/models/phi3_v/processing_phi3_v.js +0 -53
  102. package/docs/parakeet-transformers-js/src/models/processors.js +0 -22
  103. package/docs/parakeet-transformers-js/src/models/pvt/image_processing_pvt.js +0 -5
  104. package/docs/parakeet-transformers-js/src/models/pyannote/feature_extraction_pyannote.js +0 -85
  105. package/docs/parakeet-transformers-js/src/models/pyannote/processing_pyannote.js +0 -24
  106. package/docs/parakeet-transformers-js/src/models/qwen2_vl/image_processing_qwen2_vl.js +0 -52
  107. package/docs/parakeet-transformers-js/src/models/qwen2_vl/processing_qwen2_vl.js +0 -53
  108. package/docs/parakeet-transformers-js/src/models/rt_detr/image_processing_rt_detr.js +0 -12
  109. package/docs/parakeet-transformers-js/src/models/sam/image_processing_sam.js +0 -242
  110. package/docs/parakeet-transformers-js/src/models/sam/processing_sam.js +0 -20
  111. package/docs/parakeet-transformers-js/src/models/sapiens/image_processing_sapiens.js +0 -13
  112. package/docs/parakeet-transformers-js/src/models/seamless_m4t/feature_extraction_seamless_m4t.js +0 -175
  113. package/docs/parakeet-transformers-js/src/models/segformer/image_processing_segformer.js +0 -13
  114. package/docs/parakeet-transformers-js/src/models/siglip/image_processing_siglip.js +0 -5
  115. package/docs/parakeet-transformers-js/src/models/smolvlm/image_processing_smolvlm.js +0 -2
  116. package/docs/parakeet-transformers-js/src/models/smolvlm/processing_smolvlm.js +0 -2
  117. package/docs/parakeet-transformers-js/src/models/snac/feature_extraction_snac.js +0 -3
  118. package/docs/parakeet-transformers-js/src/models/speecht5/feature_extraction_speecht5.js +0 -4
  119. package/docs/parakeet-transformers-js/src/models/speecht5/processing_speecht5.js +0 -17
  120. package/docs/parakeet-transformers-js/src/models/swin2sr/image_processing_swin2sr.js +0 -24
  121. package/docs/parakeet-transformers-js/src/models/ultravox/processing_ultravox.js +0 -54
  122. package/docs/parakeet-transformers-js/src/models/vit/image_processing_vit.js +0 -7
  123. package/docs/parakeet-transformers-js/src/models/vitmatte/image_processing_vitmatte.js +0 -50
  124. package/docs/parakeet-transformers-js/src/models/vitpose/image_processing_vitpose.js +0 -89
  125. package/docs/parakeet-transformers-js/src/models/wav2vec2/feature_extraction_wav2vec2.js +0 -44
  126. package/docs/parakeet-transformers-js/src/models/wav2vec2/processing_wav2vec2.js +0 -17
  127. package/docs/parakeet-transformers-js/src/models/wav2vec2_with_lm/processing_wav2vec2_with_lm.js +0 -17
  128. package/docs/parakeet-transformers-js/src/models/wespeaker/feature_extraction_wespeaker.js +0 -95
  129. package/docs/parakeet-transformers-js/src/models/whisper/common_whisper.js +0 -157
  130. package/docs/parakeet-transformers-js/src/models/whisper/feature_extraction_whisper.js +0 -92
  131. package/docs/parakeet-transformers-js/src/models/whisper/generation_whisper.js +0 -89
  132. package/docs/parakeet-transformers-js/src/models/whisper/processing_whisper.js +0 -21
  133. package/docs/parakeet-transformers-js/src/models/yolos/image_processing_yolos.js +0 -12
  134. package/docs/parakeet-transformers-js/src/models.js +0 -8644
  135. package/docs/parakeet-transformers-js/src/ops/registry.js +0 -133
  136. package/docs/parakeet-transformers-js/src/ort_env.js +0 -8
  137. package/docs/parakeet-transformers-js/src/parakeet.js +0 -792
  138. package/docs/parakeet-transformers-js/src/pipelines.js +0 -3540
  139. package/docs/parakeet-transformers-js/src/processors.js +0 -16
  140. package/docs/parakeet-transformers-js/src/tokenizers.js +0 -4432
  141. package/docs/parakeet-transformers-js/src/transformers.js +0 -50
  142. package/docs/parakeet-transformers-js/src/utils/audio.js +0 -893
  143. package/docs/parakeet-transformers-js/src/utils/constants.js +0 -9
  144. package/docs/parakeet-transformers-js/src/utils/core.js +0 -259
  145. package/docs/parakeet-transformers-js/src/utils/data-structures.js +0 -574
  146. package/docs/parakeet-transformers-js/src/utils/devices.js +0 -22
  147. package/docs/parakeet-transformers-js/src/utils/dtypes.js +0 -63
  148. package/docs/parakeet-transformers-js/src/utils/generic.js +0 -35
  149. package/docs/parakeet-transformers-js/src/utils/hub.js +0 -780
  150. package/docs/parakeet-transformers-js/src/utils/image.js +0 -834
  151. package/docs/parakeet-transformers-js/src/utils/maths.js +0 -1061
  152. package/docs/parakeet-transformers-js/src/utils/tensor.js +0 -1539
  153. package/docs/parakeet-transformers-js/src/utils/video.js +0 -128
  154. package/docs/parakeet-transformers-js/test/decoder.test.js +0 -114
  155. package/docs/parakeet-transformers-js/test/encoder.test.js +0 -108
  156. package/docs/parakeet-transformers-js/test/preprocessor.test.js +0 -85
  157. package/docs/parakeet-transformers-js/test/tokenizer.test.js +0 -24
  158. package/docs/parakeet-transformers-js/test/transcribe.js +0 -89
  159. package/docs/parakeet-transformers-js/tsconfig.json +0 -21
  160. package/docs/parakeet-transformers-js/webpack.config.js +0 -223
@@ -1,128 +0,0 @@
1
- import { RawImage } from "./image.js";
2
- import { apis } from "../env.js";
3
-
4
- export class RawVideoFrame {
5
-
6
- /**
7
- * @param {RawImage} image
8
- * @param {number} timestamp
9
- */
10
- constructor(image, timestamp) {
11
- this.image = image;
12
- this.timestamp = timestamp;
13
- }
14
- }
15
-
16
- export class RawVideo {
17
- /**
18
- * @param {RawVideoFrame[]|RawImage[]} frames
19
- * @param {number} duration
20
- */
21
- constructor(frames, duration) {
22
- if (frames.length > 0 && frames[0] instanceof RawImage) {
23
- // Assume uniform timestamps
24
- frames = frames.map((image, i) => new RawVideoFrame(image, (i + 1) / (frames.length + 1) * duration));
25
- }
26
- this.frames = /** @type {RawVideoFrame[]} */ (frames);
27
- this.duration = duration;
28
- }
29
-
30
- get width() {
31
- return this.frames[0].image.width;
32
- }
33
- get height() {
34
- return this.frames[0].image.height;
35
- }
36
-
37
- get fps() {
38
- return this.frames.length / this.duration;
39
- }
40
- }
41
-
42
-
43
- /**
44
- * Loads a video.
45
- *
46
- * @param {string|Blob|HTMLVideoElement} src The video to process.
47
- * @param {Object} [options] Optional parameters.
48
- * @param {number} [options.num_frames=null] The number of frames to sample uniformly.
49
- * @param {number} [options.fps=null] The number of frames to sample per second.
50
- *
51
- * @returns {Promise<RawVideo>} The loaded video.
52
- */
53
- export async function load_video(src, { num_frames = null, fps = null } = {}) {
54
- if (!apis.IS_BROWSER_ENV) {
55
- throw new Error("`load_video` is currently only supported in browser environments.");
56
- }
57
-
58
- // TODO: Support efficiently loading all frames using the WebCodecs API.
59
- // Specfically, https://developer.mozilla.org/en-US/docs/Web/API/VideoDecoder
60
- if (num_frames == null && fps == null) {
61
- throw new Error("Either num_frames or fps must be provided.");
62
- }
63
-
64
- const frames = [];
65
-
66
- const video = document.createElement("video");
67
- video.crossOrigin = "anonymous";
68
- video.muted = true; // mute to allow autoplay and seeking
69
-
70
- if (typeof src === 'string') {
71
- video.src = src;
72
- } else if (src instanceof Blob) {
73
- video.src = URL.createObjectURL(src);
74
- } else if (src instanceof HTMLVideoElement) {
75
- video.src = src.src;
76
- } else {
77
- throw new Error("Invalid URL or video element provided.");
78
- }
79
- // Wait for metadata to load to obtain duration
80
- await new Promise((resolve) => video.onloadedmetadata = resolve);
81
-
82
- if (video.seekable.start(0) === video.seekable.end(0)) {
83
- // Fallback: Download entire video if not seekable
84
- const response = await fetch(video.src);
85
- const blob = await response.blob();
86
- video.src = URL.createObjectURL(blob);
87
- await new Promise((resolve) => video.onloadedmetadata = resolve);
88
- }
89
-
90
- const duration = video.duration;
91
-
92
- let count, step;
93
- if (num_frames != null) {
94
- count = num_frames;
95
- step = num_frames === 1 ? 0 : duration / (num_frames - 1);
96
- } else {
97
- step = 1 / fps;
98
- count = Math.floor(duration / step);
99
- }
100
-
101
- // Build an array of sample times based on num_frames or fps
102
- let sampleTimes = [];
103
- for (let i = 0; i < count; ++i) {
104
- sampleTimes.push(num_frames === 1 ? duration / 2 : i * step);
105
- }
106
-
107
- const canvas = document.createElement("canvas");
108
- canvas.width = video.videoWidth;
109
- canvas.height = video.videoHeight;
110
- const ctx = canvas.getContext("2d", { willReadFrequently: true });
111
- for (const t of sampleTimes) {
112
- video.currentTime = t;
113
- await new Promise((resolve) => {
114
- video.onseeked = resolve;
115
- });
116
- ctx.drawImage(video, 0, 0, canvas.width, canvas.height);
117
- const imageData = ctx.getImageData(0, 0, canvas.width, canvas.height);
118
- const frameData = new RawImage(imageData.data, canvas.width, canvas.height, 4);
119
-
120
- const frame = new RawVideoFrame(frameData, t);
121
- frames.push(frame);
122
- }
123
-
124
- // Clean up video element.
125
- video.remove();
126
-
127
- return new RawVideo(frames, duration);
128
- }
@@ -1,114 +0,0 @@
1
- import fs from 'fs';
2
- import path from 'path';
3
- import { spawnSync } from 'child_process';
4
- import { ParakeetModel } from '../src/parakeet.js';
5
- import { MEL_BINS } from '../src/audio_features.js';
6
- import { fileURLToPath } from 'url';
7
-
8
- const CHUNK = 128;
9
- const BLANK_ID = 1024;
10
-
11
- const __dirname = path.dirname(fileURLToPath(import.meta.url));
12
-
13
- test('Decoder joint first step logits match python reference (tolerance 1e-3)', async () => {
14
- const projectRoot = path.resolve(__dirname, '..');
15
- const assetsDir = path.join(projectRoot, 'assets');
16
- const preprocPath = path.join(assetsDir, 'nemo128.onnx');
17
- if (!fs.existsSync(preprocPath)) {
18
- console.warn('Preprocessor not found, skipping');
19
- return;
20
- }
21
-
22
- const modelDir = path.resolve(projectRoot, '..', 'parakeet-tdt-0.6b-v2-onnx');
23
- let encoderPath = path.join(modelDir, 'encoder-model.int8.onnx');
24
- if (!fs.existsSync(encoderPath)) encoderPath = path.join(modelDir, 'encoder-model.onnx');
25
- let decJointPath = path.join(modelDir, 'decoder_joint-model.int8.onnx');
26
- if (!fs.existsSync(decJointPath)) decJointPath = path.join(modelDir, 'decoder_joint-model.onnx');
27
- if (!fs.existsSync(encoderPath) || !fs.existsSync(decJointPath)) {
28
- console.warn('Model files not found, skipping');
29
- return;
30
- }
31
-
32
- const wavPath = path.resolve(projectRoot, '..', 'jfk.wav');
33
- if (!fs.existsSync(wavPath)) {
34
- console.warn('WAV not found, skipping');
35
- return;
36
- }
37
-
38
- const tmpJson = path.join(projectRoot, '.tmp_dec.json');
39
- const pyScript = path.join(projectRoot, '..', 'scripts', 'decoder_step.py');
40
- const pyRes = spawnSync('python', [pyScript, encoderPath, decJointPath, preprocPath, wavPath, tmpJson, BLANK_ID.toString(), CHUNK.toString()], { stdio: 'inherit' });
41
- expect(pyRes.status).toBe(0);
42
- const ref = JSON.parse(fs.readFileSync(tmpJson, 'utf8'));
43
- fs.unlinkSync(tmpJson);
44
-
45
- // JS path via ParakeetModel
46
- const model = await ParakeetModel.fromDirectory(modelDir);
47
- const audioData = fs.readFileSync(wavPath);
48
- // quick wav parse using node-wav? Instead reuse test helper for reading audio; replicate minimal parse.
49
- const wavBuf = audioData;
50
- // To avoid dependency, skip JS compute; just validate length of logits using model.joinerSession dims
51
- // Instead replicate encoder+preproc as in previous test but using model
52
- // We'll reuse OnnxPreprocessor from model.preprocessor
53
- const wav = await import('wav');
54
- const { Readable } = await import('stream');
55
- function readWavFloat(file) {
56
- return new Promise((resolve, reject) => {
57
- const reader = new wav.Reader();
58
- const samples = [];
59
- let channels = 0;
60
- reader.on('format', fmt=>{ channels=fmt.channels; });
61
- reader.on('data', chunk=>{ for(let i=0;i<chunk.length;i+=2) samples.push(chunk.readInt16LE(i)); });
62
- reader.on('end', ()=>{
63
- const mono = new Float32Array(Math.floor(samples.length/channels));
64
- for(let i=0;i<mono.length;++i){
65
- let sum=0; for(let c=0;c<channels;++c){ sum+=samples[i*channels+c]; }
66
- mono[i]=sum/channels/32768;
67
- }
68
- resolve(mono);
69
- });
70
- reader.on('error', reject);
71
- Readable.from(file).pipe(reader);
72
- });
73
- }
74
- const audioFloat = await readWavFloat(fs.readFileSync(wavPath));
75
- const featRes = await model.preprocessor.process(audioFloat);
76
- const T = Math.min(featRes.length, CHUNK);
77
- const bufMel = new Float32Array(MEL_BINS * T);
78
- for (let t=0;t<T;++t){
79
- for (let m=0;m<MEL_BINS;++m){
80
- bufMel[m*T+t]=featRes.features[t*MEL_BINS+m];
81
- }
82
- }
83
- const ortMod = await import('onnxruntime-node');
84
- const ort = ortMod.default ?? ortMod;
85
- const encTensor = new ort.Tensor('float32', bufMel, [1, MEL_BINS, T]);
86
- const lenTensor = new ort.Tensor('int64', BigInt64Array.from([BigInt(T)]), [1]);
87
- const encOut = await model.encoderSession.run({ 'audio_signal': encTensor, 'length': lenTensor });
88
- const encTensorFrame = (encOut['outputs']||Object.values(encOut)[0]);
89
- const hidden = encTensorFrame.dims[1];
90
- // slice first frame
91
- const frameBuf = new Float32Array(hidden);
92
- for(let i=0;i<hidden;++i) frameBuf[i]=encTensorFrame.data[i];
93
- const encFrame = new ort.Tensor('float32', frameBuf, [1, hidden, 1]);
94
- // zero states
95
- const numLayers = model.predLayers||2;
96
- const state1 = new ort.Tensor('float32', new Float32Array(numLayers*1*model.predHidden), [numLayers,1,model.predHidden]);
97
- const state2 = new ort.Tensor('float32', new Float32Array(numLayers*1*model.predHidden), [numLayers,1,model.predHidden]);
98
- const out = await model.joinerSession.run({
99
- encoder_outputs: encFrame,
100
- targets: new ort.Tensor('int32', Int32Array.from([BLANK_ID]), [1,1]),
101
- target_length: new ort.Tensor('int32', Int32Array.from([1]), [1]),
102
- input_states_1: state1,
103
- input_states_2: state2,
104
- });
105
- const logitsJS = (out['outputs']||out[0]||Object.values(out)[0]).data;
106
-
107
- expect(logitsJS.length).toBe(ref.logits.length);
108
- let maxDiff=0;
109
- for(let i=0;i<logitsJS.length;++i){
110
- const diff=Math.abs(logitsJS[i]-ref.logits[i]);
111
- if(diff>maxDiff) maxDiff=diff;
112
- }
113
- expect(maxDiff).toBeLessThan(1e-3);
114
- });
@@ -1,108 +0,0 @@
1
- import fs from 'fs';
2
- import path from 'path';
3
- import { spawnSync } from 'child_process';
4
- import { OnnxPreprocessor } from '../src/parakeet.js';
5
- import * as ort from 'onnxruntime-node';
6
- import wav from 'wav';
7
- import { MEL_BINS } from '../src/audio_features.js';
8
- import { fileURLToPath } from 'url';
9
-
10
- const __dirname = path.dirname(fileURLToPath(import.meta.url));
11
-
12
- const CHUNK = 128;
13
-
14
- function readWavFloat32(filePath) {
15
- return new Promise((resolve, reject) => {
16
- const file = fs.createReadStream(filePath);
17
- const reader = new wav.Reader();
18
- const samples = [];
19
- let sampleRate = 0;
20
- let channels = 0;
21
-
22
- reader.on('format', (format) => {
23
- sampleRate = format.sampleRate;
24
- channels = format.channels;
25
- });
26
-
27
- reader.on('data', (chunk) => {
28
- for (let i = 0; i < chunk.length; i += 2) {
29
- samples.push(chunk.readInt16LE(i));
30
- }
31
- });
32
-
33
- reader.on('end', () => {
34
- const mono = new Float32Array(Math.floor(samples.length / channels));
35
- for (let i = 0; i < mono.length; ++i) {
36
- let sum = 0;
37
- for (let c = 0; c < channels; ++c) {
38
- sum += samples[i * channels + c];
39
- }
40
- mono[i] = (sum / channels) / 32768;
41
- }
42
- resolve({ audio: mono, sampleRate });
43
- });
44
-
45
- reader.on('error', reject);
46
- file.on('error', reject);
47
-
48
- file.pipe(reader);
49
- });
50
- }
51
-
52
- test('Encoder first vector matches python reference (tolerance 1e-3)', async () => {
53
- const projectRoot = path.resolve(__dirname, '..');
54
- const assetsDir = path.join(projectRoot, 'assets');
55
- const preprocPath = path.join(assetsDir, 'nemo128.onnx');
56
- if (!fs.existsSync(preprocPath)) {
57
- console.warn('Preprocessor not found, skipping');
58
- return;
59
- }
60
-
61
- const modelDir = path.resolve(projectRoot, '..', 'parakeet-tdt-0.6b-v2-onnx');
62
- let encoderPath = path.join(modelDir, 'encoder-model.int8.onnx');
63
- if (!fs.existsSync(encoderPath)) encoderPath = path.join(modelDir, 'encoder-model.onnx');
64
- if (!fs.existsSync(encoderPath)) {
65
- console.warn('Encoder model not found, skipping');
66
- return;
67
- }
68
-
69
- const wavPath = path.resolve(projectRoot, '..', 'jfk.wav');
70
- if (!fs.existsSync(wavPath)) {
71
- console.warn('Test WAV not found, skipping');
72
- return;
73
- }
74
-
75
- // Run python helper
76
- const tmpJson = path.join(projectRoot, '.tmp_enc.json');
77
- const pyScript = path.join(projectRoot, '..', 'scripts', 'encode_chunk.py');
78
- const pyRes = spawnSync('python', [pyScript, encoderPath, preprocPath, wavPath, tmpJson, CHUNK.toString()], { stdio: 'inherit' });
79
- expect(pyRes.status).toBe(0);
80
- const ref = JSON.parse(fs.readFileSync(tmpJson, 'utf8'));
81
- fs.unlinkSync(tmpJson);
82
-
83
- // JS pipeline
84
- const { audio } = await readWavFloat32(wavPath);
85
- const preproc = new OnnxPreprocessor(preprocPath);
86
- const procRes = await preproc.process(audio);
87
- const T = Math.min(procRes.length, CHUNK);
88
- const buf = new Float32Array(MEL_BINS * T);
89
- for (let t = 0; t < T; ++t) {
90
- for (let m = 0; m < MEL_BINS; ++m) {
91
- buf[m * T + t] = procRes.features[t * MEL_BINS + m];
92
- }
93
- }
94
- const inputTensor = new ort.Tensor('float32', buf, [1, MEL_BINS, T]);
95
- const lenTensor = new ort.Tensor('int64', BigInt64Array.from([BigInt(T)]), [1]);
96
- const encSession = await ort.InferenceSession.create(encoderPath);
97
- const encOut = await encSession.run({ 'audio_signal': inputTensor, 'length': lenTensor });
98
- const enc = encOut['outputs'] || Object.values(encOut)[0];
99
- const jsVec = enc.data.slice(0, ref.hidden);
100
-
101
- expect(jsVec.length).toBe(ref.vector.length);
102
- let maxDiff = 0;
103
- for (let i = 0; i < jsVec.length; ++i) {
104
- const diff = Math.abs(jsVec[i] - ref.vector[i]);
105
- if (diff > maxDiff) maxDiff = diff;
106
- }
107
- expect(maxDiff).toBeLessThan(1e-3);
108
- });
@@ -1,85 +0,0 @@
1
- import fs from 'fs';
2
- import path from 'path';
3
- import { spawnSync } from 'child_process';
4
- import { OnnxPreprocessor } from '../src/parakeet.js';
5
- import wav from 'wav';
6
- import { fileURLToPath } from 'url';
7
-
8
- const __dirname = path.dirname(fileURLToPath(import.meta.url));
9
-
10
- function readWavFloat32(filePath) {
11
- return new Promise((resolve, reject) => {
12
- const file = fs.createReadStream(filePath);
13
- const reader = new wav.Reader();
14
- const samples = [];
15
- let sampleRate = 0;
16
- let channels = 0;
17
-
18
- reader.on('format', (format) => {
19
- sampleRate = format.sampleRate;
20
- channels = format.channels;
21
- });
22
-
23
- reader.on('data', (chunk) => {
24
- for (let i = 0; i < chunk.length; i += 2) {
25
- samples.push(chunk.readInt16LE(i));
26
- }
27
- });
28
-
29
- reader.on('end', () => {
30
- const mono = new Float32Array(Math.floor(samples.length / channels));
31
- for (let i = 0; i < mono.length; ++i) {
32
- let sum = 0;
33
- for (let c = 0; c < channels; ++c) {
34
- sum += samples[i * channels + c];
35
- }
36
- mono[i] = (sum / channels) / 32768;
37
- }
38
- resolve({ audio: mono, sampleRate });
39
- });
40
-
41
- reader.on('error', reject);
42
- file.on('error', reject);
43
-
44
- file.pipe(reader);
45
- });
46
- }
47
-
48
- test('ONNX preprocessor matches python reference output', async () => {
49
- const projectRoot = path.resolve(__dirname, '..');
50
- const assetsDir = path.join(projectRoot, 'assets');
51
- const preprocPath = path.join(assetsDir, 'nemo128.onnx');
52
- if (!fs.existsSync(preprocPath)) {
53
- console.warn('Preprocessor model not found, skipping test');
54
- return;
55
- }
56
-
57
- const wavPath = path.resolve(projectRoot, '..', 'jfk.wav');
58
- if (!fs.existsSync(wavPath)) {
59
- console.warn('Test WAV not found, skipping test');
60
- return;
61
- }
62
-
63
- const tmpJson = path.join(projectRoot, '.tmp_features.json');
64
- const pythonScript = path.join(projectRoot, '..', 'scripts', 'compute_features.py');
65
- const pyRes = spawnSync('python', [pythonScript, preprocPath, wavPath, tmpJson], { stdio: 'inherit' });
66
- expect(pyRes.status).toBe(0);
67
- const ref = JSON.parse(fs.readFileSync(tmpJson, 'utf8'));
68
- fs.unlinkSync(tmpJson);
69
-
70
- const { audio } = await readWavFloat32(wavPath);
71
- const jsPreproc = new OnnxPreprocessor(preprocPath);
72
- const jsRes = await jsPreproc.process(audio);
73
-
74
- expect(jsRes.length).toBe(ref.length);
75
- const refFeat = Float32Array.from(ref.features);
76
- const jsFeat = jsRes.features;
77
- expect(jsFeat.length).toBe(refFeat.length);
78
-
79
- let maxDiff = 0;
80
- for (let i = 0; i < jsFeat.length; ++i) {
81
- const diff = Math.abs(jsFeat[i] - refFeat[i]);
82
- if (diff > maxDiff) maxDiff = diff;
83
- }
84
- expect(maxDiff).toBeLessThan(1e-4);
85
- });
@@ -1,24 +0,0 @@
1
- import { ParakeetTokenizer } from '../src/parakeet.js';
2
-
3
- describe('ParakeetTokenizer', () => {
4
- test('decode joins tokens with spaces correctly', () => {
5
- const id2token = ['<blk>', '▁hello', '▁world', '!'];
6
- const tok = new ParakeetTokenizer(id2token);
7
- const text = tok.decode([1, 2, 3]);
8
- expect(text).toBe('hello world!');
9
- });
10
-
11
- test('blank tokens are skipped', () => {
12
- const id2token = ['<blk>', '▁test'];
13
- const tok = new ParakeetTokenizer(id2token);
14
- const text = tok.decode([0, 1, 0]);
15
- expect(text).toBe('test');
16
- });
17
-
18
- test('unknown ids are ignored', () => {
19
- const id2token = ['<blk>', '▁foo'];
20
- const tok = new ParakeetTokenizer(id2token);
21
- const text = tok.decode([1, 99, 0]);
22
- expect(text).toBe('foo');
23
- });
24
- });
@@ -1,89 +0,0 @@
1
- // Quick smoke test to transcribe sherpa-onnx-parakeet-model/test_wavs/0.wav
2
- import fs from 'fs';
3
- import path from 'path';
4
- import { ParakeetModel } from '../src/parakeet.js';
5
- import wav from 'wav';
6
- const { Reader } = wav;
7
- import { Writable } from 'stream';
8
-
9
- function readWav(filePath) {
10
- return new Promise((resolve, reject) => {
11
- const file = fs.createReadStream(filePath);
12
- const reader = new Reader();
13
- const samples = [];
14
- let sampleRate = 0;
15
- let channels = 0;
16
-
17
- const writer = new Writable({
18
- write(chunk, encoding, callback) {
19
- for (let i = 0; i < chunk.length; i += 2) {
20
- samples.push(chunk.readInt16LE(i));
21
- }
22
- callback();
23
- }
24
- });
25
-
26
- reader.on('format', (format) => {
27
- sampleRate = format.sampleRate;
28
- channels = format.channels;
29
- console.log(`[Test] WAV format: ${sampleRate} Hz, ${channels} channels, ${format.bitDepth}-bit`);
30
- });
31
-
32
- file.pipe(reader).pipe(writer);
33
-
34
- writer.on('finish', () => {
35
- let monoSamples = new Float32Array(Math.floor(samples.length / channels));
36
- for (let i = 0; i < monoSamples.length; i++) {
37
- let sum = 0;
38
- for (let c = 0; c < channels; c++) {
39
- sum += samples[i * channels + c];
40
- }
41
- monoSamples[i] = (sum / channels) / 32768;
42
- }
43
- resolve({ audio: monoSamples, sampleRate });
44
- });
45
-
46
- writer.on('error', reject);
47
- reader.on('error', reject);
48
- file.on('error', reject);
49
- });
50
- }
51
-
52
- async function main() {
53
- const modelDir = path.resolve('../parakeet-transformers-js-model');
54
- const model = await ParakeetModel.fromDirectory(modelDir);
55
- const wavPath = path.resolve('../jfk.wav');
56
- let { audio, sampleRate } = await readWav(wavPath);
57
-
58
- // Resample if necessary (simple linear interpolation)
59
- if (sampleRate !== 16000) {
60
- console.log(`[Test] Resampling from ${sampleRate} Hz to 16000 Hz...`);
61
- const ratio = sampleRate / 16000;
62
- const newLength = Math.floor(audio.length / ratio);
63
- const resampled = new Float32Array(newLength);
64
- for (let i = 0; i < newLength; i++) {
65
- const index = i * ratio;
66
- const i_low = Math.floor(index);
67
- const i_high = Math.ceil(index);
68
- if (i_high >= audio.length) {
69
- resampled[i] = audio[i_low];
70
- continue;
71
- }
72
- const w = index - i_low;
73
- resampled[i] = audio[i_low] * (1 - w) + audio[i_high] * w;
74
- }
75
- audio = resampled;
76
- sampleRate = 16000;
77
- }
78
-
79
- // Tail padding 2 s
80
- const pad = new Float32Array(sampleRate * 2);
81
- const padded = new Float32Array(audio.length + pad.length);
82
- padded.set(audio);
83
- padded.set(pad, audio.length);
84
-
85
- const result = await model.transcribe(padded, sampleRate, { returnTimestamps: true, returnConfidences: true, temperature: 2.5, debug: true, skipCMVN: true });
86
- console.log(JSON.stringify(result, null, 2));
87
- }
88
-
89
- main();
@@ -1,21 +0,0 @@
1
- {
2
- // Only include files in the src directory
3
- "include": ["src/**/*"],
4
- "compilerOptions": {
5
- // Tells the compiler to check JS files
6
- "checkJs": true,
7
- "target": "esnext",
8
- "module": "nodenext",
9
- "moduleResolution": "nodenext",
10
- "outDir": "types",
11
- "strict": false,
12
- "skipLibCheck": true,
13
- "declaration": true,
14
- "declarationMap": true,
15
- "noEmit": false,
16
- "emitDeclarationOnly": true
17
- },
18
- "typeAcquisition": {
19
- "include": ["jest"]
20
- }
21
- }