parakeet.js 0.0.2 → 0.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.gitmodules +3 -0
- package/README.md +240 -239
- package/examples/hf-spaces-demo/README.md +6 -9
- package/examples/hf-spaces-demo/package.json +1 -1
- package/examples/hf-spaces-demo/src/App.js +307 -316
- package/examples/react-demo/package.json +19 -19
- package/examples/react-demo/src/App.jsx +324 -326
- package/examples/react-demo-dev/src/App.jsx +23 -24
- package/package.json +1 -1
- package/publish.ps1 +65 -0
- package/src/hub.js +235 -241
- package/src/parakeet.js +15 -8
- package/src/preprocessor.js +75 -68
- package/docs/parakeet-transformers-js/.gitattributes +0 -2
- package/docs/parakeet-transformers-js/.prettierignore +0 -8
- package/docs/parakeet-transformers-js/.prettierrc +0 -10
- package/docs/parakeet-transformers-js/.tmp_features.json +0 -1
- package/docs/parakeet-transformers-js/LICENSE +0 -202
- package/docs/parakeet-transformers-js/README.md +0 -448
- package/docs/parakeet-transformers-js/assets/nemo128.onnx +0 -0
- package/docs/parakeet-transformers-js/assets/nemo80.onnx +0 -0
- package/docs/parakeet-transformers-js/debug_test.js +0 -84
- package/docs/parakeet-transformers-js/dev/inspect_decoder.cjs +0 -9
- package/docs/parakeet-transformers-js/dev/inspect_joiner.cjs +0 -9
- package/docs/parakeet-transformers-js/dev/js_step_by_step.js +0 -249
- package/docs/parakeet-transformers-js/dev/parakeet_cli.js +0 -91
- package/docs/parakeet-transformers-js/jest.config.mjs +0 -194
- package/docs/parakeet-transformers-js/js_preprocessing.json +0 -225
- package/docs/parakeet-transformers-js/js_step_by_step.json +0 -837
- package/docs/parakeet-transformers-js/js_step_by_step_v2.json +0 -450
- package/docs/parakeet-transformers-js/js_step_by_step_v3.json +0 -450
- package/docs/parakeet-transformers-js/js_steps.json +0 -821
- package/docs/parakeet-transformers-js/package-lock.json +0 -12251
- package/docs/parakeet-transformers-js/package.json +0 -96
- package/docs/parakeet-transformers-js/src/audio_features.js +0 -178
- package/docs/parakeet-transformers-js/src/backends/onnx.js +0 -210
- package/docs/parakeet-transformers-js/src/base/feature_extraction_utils.js +0 -54
- package/docs/parakeet-transformers-js/src/base/image_processors_utils.js +0 -1105
- package/docs/parakeet-transformers-js/src/base/processing_utils.js +0 -173
- package/docs/parakeet-transformers-js/src/configs.js +0 -455
- package/docs/parakeet-transformers-js/src/env.js +0 -167
- package/docs/parakeet-transformers-js/src/generation/configuration_utils.js +0 -388
- package/docs/parakeet-transformers-js/src/generation/logits_process.js +0 -727
- package/docs/parakeet-transformers-js/src/generation/logits_sampler.js +0 -204
- package/docs/parakeet-transformers-js/src/generation/parameters.js +0 -35
- package/docs/parakeet-transformers-js/src/generation/stopping_criteria.js +0 -156
- package/docs/parakeet-transformers-js/src/generation/streamers.js +0 -225
- package/docs/parakeet-transformers-js/src/models/audio_spectrogram_transformer/feature_extraction_audio_spectrogram_transformer.js +0 -85
- package/docs/parakeet-transformers-js/src/models/auto/feature_extraction_auto.js +0 -25
- package/docs/parakeet-transformers-js/src/models/auto/image_processing_auto.js +0 -29
- package/docs/parakeet-transformers-js/src/models/auto/processing_auto.js +0 -85
- package/docs/parakeet-transformers-js/src/models/beit/image_processing_beit.js +0 -5
- package/docs/parakeet-transformers-js/src/models/bit/image_processing_bit.js +0 -5
- package/docs/parakeet-transformers-js/src/models/chinese_clip/image_processing_chinese_clip.js +0 -5
- package/docs/parakeet-transformers-js/src/models/clap/feature_extraction_clap.js +0 -159
- package/docs/parakeet-transformers-js/src/models/clip/image_processing_clip.js +0 -6
- package/docs/parakeet-transformers-js/src/models/convnext/image_processing_convnext.js +0 -46
- package/docs/parakeet-transformers-js/src/models/dac/feature_extraction_dac.js +0 -3
- package/docs/parakeet-transformers-js/src/models/deit/image_processing_deit.js +0 -6
- package/docs/parakeet-transformers-js/src/models/detr/image_processing_detr.js +0 -52
- package/docs/parakeet-transformers-js/src/models/donut/image_processing_donut.js +0 -31
- package/docs/parakeet-transformers-js/src/models/dpt/image_processing_dpt.js +0 -6
- package/docs/parakeet-transformers-js/src/models/efficientnet/image_processing_efficientnet.js +0 -14
- package/docs/parakeet-transformers-js/src/models/encodec/feature_extraction_encodec.js +0 -32
- package/docs/parakeet-transformers-js/src/models/feature_extractors.js +0 -17
- package/docs/parakeet-transformers-js/src/models/florence2/processing_florence2.js +0 -131
- package/docs/parakeet-transformers-js/src/models/gemma3n/feature_extraction_gemma3n.js +0 -97
- package/docs/parakeet-transformers-js/src/models/gemma3n/processing_gemma3n.js +0 -74
- package/docs/parakeet-transformers-js/src/models/glpn/image_processing_glpn.js +0 -5
- package/docs/parakeet-transformers-js/src/models/grounding_dino/image_processing_grounding_dino.js +0 -29
- package/docs/parakeet-transformers-js/src/models/grounding_dino/processing_grounding_dino.js +0 -101
- package/docs/parakeet-transformers-js/src/models/idefics3/image_processing_idefics3.js +0 -232
- package/docs/parakeet-transformers-js/src/models/idefics3/processing_idefics3.js +0 -136
- package/docs/parakeet-transformers-js/src/models/image_processors.js +0 -40
- package/docs/parakeet-transformers-js/src/models/janus/image_processing_janus.js +0 -27
- package/docs/parakeet-transformers-js/src/models/janus/processing_janus.js +0 -123
- package/docs/parakeet-transformers-js/src/models/jina_clip/image_processing_jina_clip.js +0 -26
- package/docs/parakeet-transformers-js/src/models/jina_clip/processing_jina_clip.js +0 -24
- package/docs/parakeet-transformers-js/src/models/llava/processing_llava.js +0 -44
- package/docs/parakeet-transformers-js/src/models/llava_onevision/image_processing_llava_onevision.js +0 -5
- package/docs/parakeet-transformers-js/src/models/mask2former/image_processing_mask2former.js +0 -5
- package/docs/parakeet-transformers-js/src/models/maskformer/image_processing_maskformer.js +0 -18
- package/docs/parakeet-transformers-js/src/models/mgp_str/processing_mgp_str.js +0 -172
- package/docs/parakeet-transformers-js/src/models/mobilenet_v1/image_processing_mobilenet_v1.js +0 -7
- package/docs/parakeet-transformers-js/src/models/mobilenet_v2/image_processing_mobilenet_v2.js +0 -7
- package/docs/parakeet-transformers-js/src/models/mobilenet_v3/image_processing_mobilenet_v3.js +0 -7
- package/docs/parakeet-transformers-js/src/models/mobilenet_v4/image_processing_mobilenet_v4.js +0 -7
- package/docs/parakeet-transformers-js/src/models/mobilevit/image_processing_mobilevit.js +0 -6
- package/docs/parakeet-transformers-js/src/models/moonshine/feature_extraction_moonshine.js +0 -26
- package/docs/parakeet-transformers-js/src/models/moonshine/processing_moonshine.js +0 -20
- package/docs/parakeet-transformers-js/src/models/nougat/image_processing_nougat.js +0 -5
- package/docs/parakeet-transformers-js/src/models/owlv2/image_processing_owlv2.js +0 -5
- package/docs/parakeet-transformers-js/src/models/owlvit/image_processing_owlvit.js +0 -12
- package/docs/parakeet-transformers-js/src/models/owlvit/processing_owlvit.js +0 -7
- package/docs/parakeet-transformers-js/src/models/paligemma/processing_paligemma.js +0 -83
- package/docs/parakeet-transformers-js/src/models/parakeet/feature_extraction_parakeet.js +0 -3
- package/docs/parakeet-transformers-js/src/models/parakeet/modeling_parakeet.js +0 -3
- package/docs/parakeet-transformers-js/src/models/parakeet/processing_parakeet.js +0 -3
- package/docs/parakeet-transformers-js/src/models/parakeet/tokenization_parakeet.js +0 -3
- package/docs/parakeet-transformers-js/src/models/phi3_v/image_processing_phi3_v.js +0 -163
- package/docs/parakeet-transformers-js/src/models/phi3_v/processing_phi3_v.js +0 -53
- package/docs/parakeet-transformers-js/src/models/processors.js +0 -22
- package/docs/parakeet-transformers-js/src/models/pvt/image_processing_pvt.js +0 -5
- package/docs/parakeet-transformers-js/src/models/pyannote/feature_extraction_pyannote.js +0 -85
- package/docs/parakeet-transformers-js/src/models/pyannote/processing_pyannote.js +0 -24
- package/docs/parakeet-transformers-js/src/models/qwen2_vl/image_processing_qwen2_vl.js +0 -52
- package/docs/parakeet-transformers-js/src/models/qwen2_vl/processing_qwen2_vl.js +0 -53
- package/docs/parakeet-transformers-js/src/models/rt_detr/image_processing_rt_detr.js +0 -12
- package/docs/parakeet-transformers-js/src/models/sam/image_processing_sam.js +0 -242
- package/docs/parakeet-transformers-js/src/models/sam/processing_sam.js +0 -20
- package/docs/parakeet-transformers-js/src/models/sapiens/image_processing_sapiens.js +0 -13
- package/docs/parakeet-transformers-js/src/models/seamless_m4t/feature_extraction_seamless_m4t.js +0 -175
- package/docs/parakeet-transformers-js/src/models/segformer/image_processing_segformer.js +0 -13
- package/docs/parakeet-transformers-js/src/models/siglip/image_processing_siglip.js +0 -5
- package/docs/parakeet-transformers-js/src/models/smolvlm/image_processing_smolvlm.js +0 -2
- package/docs/parakeet-transformers-js/src/models/smolvlm/processing_smolvlm.js +0 -2
- package/docs/parakeet-transformers-js/src/models/snac/feature_extraction_snac.js +0 -3
- package/docs/parakeet-transformers-js/src/models/speecht5/feature_extraction_speecht5.js +0 -4
- package/docs/parakeet-transformers-js/src/models/speecht5/processing_speecht5.js +0 -17
- package/docs/parakeet-transformers-js/src/models/swin2sr/image_processing_swin2sr.js +0 -24
- package/docs/parakeet-transformers-js/src/models/ultravox/processing_ultravox.js +0 -54
- package/docs/parakeet-transformers-js/src/models/vit/image_processing_vit.js +0 -7
- package/docs/parakeet-transformers-js/src/models/vitmatte/image_processing_vitmatte.js +0 -50
- package/docs/parakeet-transformers-js/src/models/vitpose/image_processing_vitpose.js +0 -89
- package/docs/parakeet-transformers-js/src/models/wav2vec2/feature_extraction_wav2vec2.js +0 -44
- package/docs/parakeet-transformers-js/src/models/wav2vec2/processing_wav2vec2.js +0 -17
- package/docs/parakeet-transformers-js/src/models/wav2vec2_with_lm/processing_wav2vec2_with_lm.js +0 -17
- package/docs/parakeet-transformers-js/src/models/wespeaker/feature_extraction_wespeaker.js +0 -95
- package/docs/parakeet-transformers-js/src/models/whisper/common_whisper.js +0 -157
- package/docs/parakeet-transformers-js/src/models/whisper/feature_extraction_whisper.js +0 -92
- package/docs/parakeet-transformers-js/src/models/whisper/generation_whisper.js +0 -89
- package/docs/parakeet-transformers-js/src/models/whisper/processing_whisper.js +0 -21
- package/docs/parakeet-transformers-js/src/models/yolos/image_processing_yolos.js +0 -12
- package/docs/parakeet-transformers-js/src/models.js +0 -8644
- package/docs/parakeet-transformers-js/src/ops/registry.js +0 -133
- package/docs/parakeet-transformers-js/src/ort_env.js +0 -8
- package/docs/parakeet-transformers-js/src/parakeet.js +0 -792
- package/docs/parakeet-transformers-js/src/pipelines.js +0 -3540
- package/docs/parakeet-transformers-js/src/processors.js +0 -16
- package/docs/parakeet-transformers-js/src/tokenizers.js +0 -4432
- package/docs/parakeet-transformers-js/src/transformers.js +0 -50
- package/docs/parakeet-transformers-js/src/utils/audio.js +0 -893
- package/docs/parakeet-transformers-js/src/utils/constants.js +0 -9
- package/docs/parakeet-transformers-js/src/utils/core.js +0 -259
- package/docs/parakeet-transformers-js/src/utils/data-structures.js +0 -574
- package/docs/parakeet-transformers-js/src/utils/devices.js +0 -22
- package/docs/parakeet-transformers-js/src/utils/dtypes.js +0 -63
- package/docs/parakeet-transformers-js/src/utils/generic.js +0 -35
- package/docs/parakeet-transformers-js/src/utils/hub.js +0 -780
- package/docs/parakeet-transformers-js/src/utils/image.js +0 -834
- package/docs/parakeet-transformers-js/src/utils/maths.js +0 -1061
- package/docs/parakeet-transformers-js/src/utils/tensor.js +0 -1539
- package/docs/parakeet-transformers-js/src/utils/video.js +0 -128
- package/docs/parakeet-transformers-js/test/decoder.test.js +0 -114
- package/docs/parakeet-transformers-js/test/encoder.test.js +0 -108
- package/docs/parakeet-transformers-js/test/preprocessor.test.js +0 -85
- package/docs/parakeet-transformers-js/test/tokenizer.test.js +0 -24
- package/docs/parakeet-transformers-js/test/transcribe.js +0 -89
- package/docs/parakeet-transformers-js/tsconfig.json +0 -21
- package/docs/parakeet-transformers-js/webpack.config.js +0 -223
|
@@ -1,84 +0,0 @@
|
|
|
1
|
-
// Debug test to capture full error
|
|
2
|
-
import fs from 'fs';
|
|
3
|
-
import path from 'path';
|
|
4
|
-
import { ParakeetModel } from './src/parakeet.js';
|
|
5
|
-
import wav from 'wav';
|
|
6
|
-
const { Reader } = wav;
|
|
7
|
-
import { Writable } from 'stream';
|
|
8
|
-
|
|
9
|
-
function readWav(filePath) {
|
|
10
|
-
return new Promise((resolve, reject) => {
|
|
11
|
-
const file = fs.createReadStream(filePath);
|
|
12
|
-
const reader = new Reader();
|
|
13
|
-
const samples = [];
|
|
14
|
-
let sampleRate = 0;
|
|
15
|
-
let channels = 0;
|
|
16
|
-
|
|
17
|
-
const writer = new Writable({
|
|
18
|
-
write(chunk, encoding, callback) {
|
|
19
|
-
for (let i = 0; i < chunk.length; i += 2) {
|
|
20
|
-
samples.push(chunk.readInt16LE(i));
|
|
21
|
-
}
|
|
22
|
-
callback();
|
|
23
|
-
}
|
|
24
|
-
});
|
|
25
|
-
|
|
26
|
-
reader.on('format', (format) => {
|
|
27
|
-
sampleRate = format.sampleRate;
|
|
28
|
-
channels = format.channels;
|
|
29
|
-
});
|
|
30
|
-
|
|
31
|
-
file.pipe(reader).pipe(writer);
|
|
32
|
-
|
|
33
|
-
writer.on('finish', () => {
|
|
34
|
-
let monoSamples = new Float32Array(Math.floor(samples.length / channels));
|
|
35
|
-
for (let i = 0; i < monoSamples.length; i++) {
|
|
36
|
-
let sum = 0;
|
|
37
|
-
for (let c = 0; c < channels; c++) {
|
|
38
|
-
sum += samples[i * channels + c];
|
|
39
|
-
}
|
|
40
|
-
monoSamples[i] = (sum / channels) / 32768;
|
|
41
|
-
}
|
|
42
|
-
resolve({ audio: monoSamples, sampleRate });
|
|
43
|
-
});
|
|
44
|
-
|
|
45
|
-
writer.on('error', reject);
|
|
46
|
-
reader.on('error', reject);
|
|
47
|
-
file.on('error', reject);
|
|
48
|
-
});
|
|
49
|
-
}
|
|
50
|
-
|
|
51
|
-
async function main() {
|
|
52
|
-
try {
|
|
53
|
-
const modelDir = path.resolve('../sherpa-onnx-parakeet-model');
|
|
54
|
-
console.log('Loading model from:', modelDir);
|
|
55
|
-
const model = await ParakeetModel.fromDirectory(modelDir);
|
|
56
|
-
console.log('Model loaded successfully');
|
|
57
|
-
|
|
58
|
-
const wavPath = path.resolve('../jfk.wav');
|
|
59
|
-
console.log('Loading audio from:', wavPath);
|
|
60
|
-
let { audio, sampleRate } = await readWav(wavPath);
|
|
61
|
-
console.log(`Audio loaded: ${audio.length} samples at ${sampleRate} Hz`);
|
|
62
|
-
|
|
63
|
-
// Just test a small portion first
|
|
64
|
-
const testAudio = audio.slice(0, 16000); // 1 second
|
|
65
|
-
|
|
66
|
-
console.log('Starting transcription...');
|
|
67
|
-
const result = await model.transcribe(testAudio, sampleRate, {
|
|
68
|
-
returnTimestamps: true,
|
|
69
|
-
returnConfidences: true,
|
|
70
|
-
temperature: 1.2,
|
|
71
|
-
debug: true
|
|
72
|
-
});
|
|
73
|
-
console.log('Result:', JSON.stringify(result, null, 2));
|
|
74
|
-
} catch (error) {
|
|
75
|
-
console.error('Full error details:');
|
|
76
|
-
console.error('Error message:', error.message);
|
|
77
|
-
console.error('Error stack:', error.stack);
|
|
78
|
-
if (error.cause) {
|
|
79
|
-
console.error('Error cause:', error.cause);
|
|
80
|
-
}
|
|
81
|
-
}
|
|
82
|
-
}
|
|
83
|
-
|
|
84
|
-
main();
|
|
@@ -1,9 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env node
|
|
2
|
-
const ort = require('onnxruntime-node');
|
|
3
|
-
const path = process.argv[2] || 'sherpa-onnx-parakeet-model/decoder.int8.onnx';
|
|
4
|
-
(async () => {
|
|
5
|
-
const session = await ort.InferenceSession.create(path);
|
|
6
|
-
console.log('Model:', path);
|
|
7
|
-
console.log('Inputs:', session.inputNames, 'metaKeys', Object.keys(session.inputMetadata));
|
|
8
|
-
console.log('Outputs:', session.outputNames);
|
|
9
|
-
})();
|
|
@@ -1,9 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env node
|
|
2
|
-
const ort = require('onnxruntime-node');
|
|
3
|
-
const path = process.argv[2] || 'sherpa-onnx-parakeet-model/joiner.int8.onnx';
|
|
4
|
-
(async () => {
|
|
5
|
-
const s = await ort.InferenceSession.create(path);
|
|
6
|
-
console.log('Model', path);
|
|
7
|
-
console.log('Inputs', Object.keys(s.inputMetadata));
|
|
8
|
-
console.log('Outputs', s.outputNames);
|
|
9
|
-
})();
|
|
@@ -1,249 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env node
|
|
2
|
-
/**
|
|
3
|
-
* Step-by-step intermediate value dumper for JS implementation to compare with onnx-asr.
|
|
4
|
-
*/
|
|
5
|
-
|
|
6
|
-
import fs from 'fs';
|
|
7
|
-
import path from 'path';
|
|
8
|
-
import { ParakeetModel } from '../src/parakeet.js';
|
|
9
|
-
import wav from 'wav';
|
|
10
|
-
import { Writable } from 'stream';
|
|
11
|
-
|
|
12
|
-
function parseArgs() {
|
|
13
|
-
const args = process.argv.slice(2);
|
|
14
|
-
if (args.length < 2) {
|
|
15
|
-
console.error('Usage: node js_step_by_step.js <model_dir> <audio.wav> [output.json]');
|
|
16
|
-
process.exit(1);
|
|
17
|
-
}
|
|
18
|
-
return {
|
|
19
|
-
modelDir: args[0],
|
|
20
|
-
wavPath: args[1],
|
|
21
|
-
outputFile: args[2] || 'js_step_by_step.json'
|
|
22
|
-
};
|
|
23
|
-
}
|
|
24
|
-
|
|
25
|
-
function readWav(filePath) {
|
|
26
|
-
const buffer = fs.readFileSync(filePath);
|
|
27
|
-
|
|
28
|
-
// Basic WAV header parsing (assumes 16-bit PCM)
|
|
29
|
-
const sampleRate = buffer.readUInt32LE(24);
|
|
30
|
-
const bitsPerSample = buffer.readUInt16LE(34);
|
|
31
|
-
const dataStart = 44; // Standard WAV header size
|
|
32
|
-
|
|
33
|
-
console.log(`WAV: ${sampleRate} Hz, ${bitsPerSample} bits`);
|
|
34
|
-
|
|
35
|
-
// Convert 16-bit PCM to Float32Array
|
|
36
|
-
const samples = (buffer.length - dataStart) / 2;
|
|
37
|
-
const audio = new Float32Array(samples);
|
|
38
|
-
|
|
39
|
-
for (let i = 0; i < samples; i++) {
|
|
40
|
-
const sample = buffer.readInt16LE(dataStart + i * 2);
|
|
41
|
-
audio[i] = sample / 32768.0; // Normalize to [-1, 1]
|
|
42
|
-
}
|
|
43
|
-
|
|
44
|
-
return { audio, sampleRate };
|
|
45
|
-
}
|
|
46
|
-
|
|
47
|
-
async function main() {
|
|
48
|
-
const { modelDir, wavPath, outputFile } = parseArgs();
|
|
49
|
-
|
|
50
|
-
console.log(`Loading model from: ${modelDir}`);
|
|
51
|
-
console.log(`Processing audio: ${wavPath}`);
|
|
52
|
-
|
|
53
|
-
const model = await ParakeetModel.fromDirectory(path.resolve(modelDir), { preferFloat32: true });
|
|
54
|
-
const { audio, sampleRate } = readWav(path.resolve(wavPath));
|
|
55
|
-
|
|
56
|
-
console.log(`Audio: ${audio.length} samples @ ${sampleRate} Hz, duration: ${(audio.length / sampleRate).toFixed(2)}s`);
|
|
57
|
-
|
|
58
|
-
const outputData = {
|
|
59
|
-
audio_length: audio.length,
|
|
60
|
-
sample_rate: sampleRate,
|
|
61
|
-
model_config: {
|
|
62
|
-
vocab_size: model.tokenizer.id2token.length,
|
|
63
|
-
blank_idx: model.blankId,
|
|
64
|
-
max_tokens_per_step: model.maxTokensPerStep,
|
|
65
|
-
subsampling_factor: model.subsampling,
|
|
66
|
-
pred_layers: model.predLayers,
|
|
67
|
-
pred_hidden: model.predHidden
|
|
68
|
-
},
|
|
69
|
-
steps: []
|
|
70
|
-
};
|
|
71
|
-
|
|
72
|
-
// Step 1: Preprocessing
|
|
73
|
-
console.log("Step 1: Preprocessing...");
|
|
74
|
-
const features = await model.computeFeatures(audio, sampleRate);
|
|
75
|
-
const MEL_BINS = 128;
|
|
76
|
-
const T = features.length / MEL_BINS;
|
|
77
|
-
|
|
78
|
-
outputData.preprocessing = {
|
|
79
|
-
features_shape: [1, MEL_BINS, T],
|
|
80
|
-
features_lens: [T],
|
|
81
|
-
features_sample: Array.from(features.slice(0, 50)).map(v => parseFloat(v.toFixed(6))),
|
|
82
|
-
features_dtype: "float32"
|
|
83
|
-
};
|
|
84
|
-
|
|
85
|
-
// Step 2: Manual decoding with step-by-step logging
|
|
86
|
-
console.log("Step 2: Manual decoding...");
|
|
87
|
-
|
|
88
|
-
const ort = await import('onnxruntime-node');
|
|
89
|
-
|
|
90
|
-
// Correctly initialize variables for decoding loop
|
|
91
|
-
const tokens = [];
|
|
92
|
-
const timestamps = [];
|
|
93
|
-
let decoderState = null;
|
|
94
|
-
let stepIdx = 0;
|
|
95
|
-
|
|
96
|
-
// --- Start of new non-chunking logic ---
|
|
97
|
-
|
|
98
|
-
// 1. Encode the entire feature set at once
|
|
99
|
-
const input = new ort.Tensor('float32', features, [1, MEL_BINS, T]);
|
|
100
|
-
const lenTensor = new ort.Tensor('int64', BigInt64Array.from([BigInt(T)]), [1]);
|
|
101
|
-
const encOut = await model.encoderSession.run({ 'audio_signal': input, 'length': lenTensor });
|
|
102
|
-
const enc = encOut['outputs'] ?? Object.values(encOut)[0];
|
|
103
|
-
|
|
104
|
-
// 2. Transpose encoder output to match onnx-asr: [B, D, T] -> [B, T, D]
|
|
105
|
-
const [B, D, T_enc] = enc.dims;
|
|
106
|
-
const transposedEncData = new Float32Array(B * T_enc * D);
|
|
107
|
-
for (let b = 0; b < B; ++b) {
|
|
108
|
-
for (let t_idx = 0; t_idx < T_enc; ++t_idx) {
|
|
109
|
-
for (let d = 0; d < D; ++d) {
|
|
110
|
-
const srcIdx = b * (D * T_enc) + d * T_enc + t_idx;
|
|
111
|
-
const destIdx = b * (T_enc * D) + t_idx * D + d;
|
|
112
|
-
transposedEncData[destIdx] = enc.data[srcIdx];
|
|
113
|
-
}
|
|
114
|
-
}
|
|
115
|
-
}
|
|
116
|
-
const encTransposed = {
|
|
117
|
-
data: transposedEncData,
|
|
118
|
-
dims: [B, T_enc, D],
|
|
119
|
-
};
|
|
120
|
-
|
|
121
|
-
outputData.encoding = {
|
|
122
|
-
encoder_out_shape: encTransposed.dims,
|
|
123
|
-
encoder_out_lens: [encTransposed.dims[1]],
|
|
124
|
-
encoder_out_sample: Array.from(encTransposed.data.slice(0, 50)).map(v => parseFloat(v.toFixed(6))),
|
|
125
|
-
encoder_out_dtype: "float32"
|
|
126
|
-
};
|
|
127
|
-
|
|
128
|
-
// 3. Detailed step-by-step decoding loop
|
|
129
|
-
console.log(`Processing ${T_enc} encoder frames...`);
|
|
130
|
-
const hiddenSize = encTransposed.dims[2];
|
|
131
|
-
const seqLen = encTransposed.dims[1];
|
|
132
|
-
|
|
133
|
-
let t = 0;
|
|
134
|
-
let emittedTokens = 0;
|
|
135
|
-
|
|
136
|
-
while (t < seqLen) {
|
|
137
|
-
const currFrameIdx = t;
|
|
138
|
-
|
|
139
|
-
// Slice encoder output for timestep t
|
|
140
|
-
const frameBuf = new Float32Array(hiddenSize);
|
|
141
|
-
const frameOffset = t * hiddenSize;
|
|
142
|
-
for (let i = 0; i < hiddenSize; ++i) {
|
|
143
|
-
frameBuf[i] = encTransposed.data[frameOffset + i];
|
|
144
|
-
}
|
|
145
|
-
const encTensor = new ort.Tensor('float32', frameBuf, [1, hiddenSize, 1]);
|
|
146
|
-
|
|
147
|
-
const lastToken = tokens.length > 0 ? tokens[tokens.length - 1] : model.blankId;
|
|
148
|
-
const res = await model._runCombinedStep(encTensor, lastToken, decoderState);
|
|
149
|
-
const { tokenLogits, step, newState } = res;
|
|
150
|
-
|
|
151
|
-
let max = -Infinity, maxId = 0;
|
|
152
|
-
for (let i = 0; i < tokenLogits.length; ++i) {
|
|
153
|
-
if (tokenLogits[i] > max) {
|
|
154
|
-
max = tokenLogits[i];
|
|
155
|
-
maxId = i;
|
|
156
|
-
}
|
|
157
|
-
}
|
|
158
|
-
|
|
159
|
-
const topK = Array.from({length: tokenLogits.length}, (_, i) => ({id: i, logit: tokenLogits[i]}))
|
|
160
|
-
.sort((a, b) => b.logit - a.logit).slice(0, 5);
|
|
161
|
-
|
|
162
|
-
const stepData = {
|
|
163
|
-
step: stepIdx,
|
|
164
|
-
t: t,
|
|
165
|
-
emitted_tokens: emittedTokens,
|
|
166
|
-
probs_shape: [tokenLogits.length],
|
|
167
|
-
token: maxId,
|
|
168
|
-
token_text: model.tokenizer.id2token[maxId] || `UNK_${maxId}`,
|
|
169
|
-
is_blank: maxId === model.blankId,
|
|
170
|
-
step_prediction: step,
|
|
171
|
-
top_5_tokens: topK.map(({id, logit}) => ({
|
|
172
|
-
token: id,
|
|
173
|
-
text: model.tokenizer.id2token[id] || `UNK_${id}`,
|
|
174
|
-
prob: parseFloat(logit.toFixed(6))
|
|
175
|
-
})),
|
|
176
|
-
action: '',
|
|
177
|
-
advance: 0,
|
|
178
|
-
advance_reason: ''
|
|
179
|
-
};
|
|
180
|
-
|
|
181
|
-
if (maxId !== model.blankId) {
|
|
182
|
-
tokens.push(maxId);
|
|
183
|
-
timestamps.push(currFrameIdx);
|
|
184
|
-
decoderState = newState;
|
|
185
|
-
emittedTokens += 1;
|
|
186
|
-
stepData.action = "emit_token";
|
|
187
|
-
} else {
|
|
188
|
-
stepData.action = "blank";
|
|
189
|
-
}
|
|
190
|
-
|
|
191
|
-
if (step > 0) {
|
|
192
|
-
t += step;
|
|
193
|
-
emittedTokens = 0;
|
|
194
|
-
stepData.advance = step;
|
|
195
|
-
stepData.advance_reason = "duration_step";
|
|
196
|
-
} else if (maxId === model.blankId || emittedTokens >= model.maxTokensPerStep) {
|
|
197
|
-
t += 1;
|
|
198
|
-
emittedTokens = 0;
|
|
199
|
-
stepData.advance = 1;
|
|
200
|
-
stepData.advance_reason = "blank_or_max_tokens";
|
|
201
|
-
} else {
|
|
202
|
-
stepData.advance = 0;
|
|
203
|
-
stepData.advance_reason = "continue_emitting";
|
|
204
|
-
}
|
|
205
|
-
|
|
206
|
-
outputData.steps.push(stepData);
|
|
207
|
-
stepIdx++;
|
|
208
|
-
}
|
|
209
|
-
|
|
210
|
-
// --- End of new non-chunking logic ---
|
|
211
|
-
|
|
212
|
-
// Step 3: Final results
|
|
213
|
-
console.log("Step 3: Final results...");
|
|
214
|
-
|
|
215
|
-
// Final results from the step-by-step loop
|
|
216
|
-
outputData.final_results = {
|
|
217
|
-
tokens: tokens,
|
|
218
|
-
timestamps: timestamps,
|
|
219
|
-
decoded_text: model.tokenizer.decode(tokens),
|
|
220
|
-
total_steps: stepIdx,
|
|
221
|
-
final_t: t,
|
|
222
|
-
encodings_len: T_enc
|
|
223
|
-
};
|
|
224
|
-
|
|
225
|
-
// Full transcription for comparison, now with timestamps and confidences
|
|
226
|
-
console.log("Running full transcription for comparison...");
|
|
227
|
-
const fullResult = await model.transcribe(audio, sampleRate, {
|
|
228
|
-
returnTimestamps: true,
|
|
229
|
-
returnConfidences: true,
|
|
230
|
-
debug: true
|
|
231
|
-
});
|
|
232
|
-
outputData.full_transcription = fullResult;
|
|
233
|
-
|
|
234
|
-
// Save results
|
|
235
|
-
fs.writeFileSync(outputFile, JSON.stringify(outputData, null, 2));
|
|
236
|
-
|
|
237
|
-
console.log(`Step-by-step data saved to: ${outputFile}`);
|
|
238
|
-
console.log(`Final decoded text from steps: ${outputData.final_results.decoded_text}`);
|
|
239
|
-
if (typeof fullResult === 'string') {
|
|
240
|
-
console.log(`Full transcription result: ${fullResult}`);
|
|
241
|
-
} else {
|
|
242
|
-
console.log(`Full transcription result: ${fullResult.text}`);
|
|
243
|
-
}
|
|
244
|
-
}
|
|
245
|
-
|
|
246
|
-
main().catch(e => {
|
|
247
|
-
console.error(e);
|
|
248
|
-
process.exit(1);
|
|
249
|
-
});
|
|
@@ -1,91 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env node
|
|
2
|
-
// Simple CLI to transcribe a WAV file with ParakeetModel.
|
|
3
|
-
// Usage: parakeet_cli.js <model_dir> <audio.wav> [--json]
|
|
4
|
-
import fs from 'fs';
|
|
5
|
-
import path from 'path';
|
|
6
|
-
import { ParakeetModel } from '../src/parakeet.js';
|
|
7
|
-
import wav from 'wav';
|
|
8
|
-
import { Writable } from 'stream';
|
|
9
|
-
|
|
10
|
-
function parseArgs() {
|
|
11
|
-
const args = process.argv.slice(2);
|
|
12
|
-
if (args.length < 2) {
|
|
13
|
-
console.error('Usage: parakeet_cli.js <model_dir> <audio.wav> [--json] [--debug]');
|
|
14
|
-
process.exit(1);
|
|
15
|
-
}
|
|
16
|
-
const opts = {
|
|
17
|
-
modelDir: args[0],
|
|
18
|
-
wavPath: args[1],
|
|
19
|
-
json: args.includes('--json'),
|
|
20
|
-
debug: args.includes('--debug'),
|
|
21
|
-
float32: args.includes('--float32') || args.includes('--fp32')
|
|
22
|
-
};
|
|
23
|
-
return opts;
|
|
24
|
-
}
|
|
25
|
-
|
|
26
|
-
function readWav(filePath) {
|
|
27
|
-
return new Promise((resolve, reject) => {
|
|
28
|
-
const file = fs.createReadStream(filePath);
|
|
29
|
-
const reader = new wav.Reader();
|
|
30
|
-
const samples = [];
|
|
31
|
-
let sampleRate = 0;
|
|
32
|
-
let channels = 0;
|
|
33
|
-
|
|
34
|
-
const writer = new Writable({
|
|
35
|
-
write(chunk, _enc, cb) {
|
|
36
|
-
for (let i = 0; i < chunk.length; i += 2) {
|
|
37
|
-
samples.push(chunk.readInt16LE(i));
|
|
38
|
-
}
|
|
39
|
-
cb();
|
|
40
|
-
}
|
|
41
|
-
});
|
|
42
|
-
|
|
43
|
-
reader.on('format', (fmt) => { sampleRate=fmt.sampleRate; channels=fmt.channels; });
|
|
44
|
-
file.pipe(reader).pipe(writer);
|
|
45
|
-
|
|
46
|
-
writer.on('finish', () => {
|
|
47
|
-
const mono = new Float32Array(Math.floor(samples.length / channels));
|
|
48
|
-
for (let i=0;i<mono.length;++i){
|
|
49
|
-
let sum=0; for(let c=0;c<channels;++c){ sum+=samples[i*channels+c]; }
|
|
50
|
-
mono[i]=sum/channels/32768;
|
|
51
|
-
}
|
|
52
|
-
resolve({ audio: mono, sampleRate });
|
|
53
|
-
});
|
|
54
|
-
writer.on('error', reject);
|
|
55
|
-
reader.on('error', reject);
|
|
56
|
-
file.on('error', reject);
|
|
57
|
-
});
|
|
58
|
-
}
|
|
59
|
-
|
|
60
|
-
async function main(){
|
|
61
|
-
const { modelDir, wavPath, json, debug, float32 } = parseArgs();
|
|
62
|
-
const model = await ParakeetModel.fromDirectory(path.resolve(modelDir), { preferFloat32: float32 });
|
|
63
|
-
const { audio, sampleRate } = await readWav(path.resolve(wavPath));
|
|
64
|
-
const padded = new Float32Array(audio.length + sampleRate*2);
|
|
65
|
-
padded.set(audio);
|
|
66
|
-
const result = await model.transcribe(padded, sampleRate, {
|
|
67
|
-
returnTimestamps: true,
|
|
68
|
-
returnConfidences: true,
|
|
69
|
-
temperature: 1.2,
|
|
70
|
-
debug: debug
|
|
71
|
-
});
|
|
72
|
-
const wrapped = { utterance: 1, ...result };
|
|
73
|
-
|
|
74
|
-
if (json) {
|
|
75
|
-
console.log(JSON.stringify(wrapped, null, 2));
|
|
76
|
-
} else {
|
|
77
|
-
console.log(result.text);
|
|
78
|
-
}
|
|
79
|
-
|
|
80
|
-
// Always write detailed JSON output to a file for diffing / inspection
|
|
81
|
-
const quantTag = float32 ? 'fp32' : 'int8';
|
|
82
|
-
const outPath = path.resolve(`transcription_output_js_${quantTag}.json`);
|
|
83
|
-
try {
|
|
84
|
-
await fs.promises.writeFile(outPath, JSON.stringify(wrapped, null, 2));
|
|
85
|
-
if (debug) console.log(`[ParakeetCLI] Wrote JSON output to ${outPath}`);
|
|
86
|
-
} catch (e) {
|
|
87
|
-
console.error(`[ParakeetCLI] Failed to write JSON output:`, e);
|
|
88
|
-
}
|
|
89
|
-
}
|
|
90
|
-
|
|
91
|
-
main().catch(e=>{ console.error(e); process.exit(1); });
|
|
@@ -1,194 +0,0 @@
|
|
|
1
|
-
/*
|
|
2
|
-
* For a detailed explanation regarding each configuration property, visit:
|
|
3
|
-
* https://jestjs.io/docs/configuration
|
|
4
|
-
*/
|
|
5
|
-
|
|
6
|
-
export default {
|
|
7
|
-
// All imported modules in your tests should be mocked automatically
|
|
8
|
-
// automock: false,
|
|
9
|
-
|
|
10
|
-
// Stop running tests after `n` failures
|
|
11
|
-
// bail: 0,
|
|
12
|
-
|
|
13
|
-
// Automatically clear mock calls, instances, contexts and results before every test
|
|
14
|
-
clearMocks: true,
|
|
15
|
-
|
|
16
|
-
// Set timeout for long-running ONNX tests
|
|
17
|
-
testTimeout: 30000,
|
|
18
|
-
|
|
19
|
-
// Indicates whether the coverage information should be collected while executing the test
|
|
20
|
-
collectCoverage: true,
|
|
21
|
-
|
|
22
|
-
// An array of glob patterns indicating a set of files for which coverage information should be collected
|
|
23
|
-
// collectCoverageFrom: undefined,
|
|
24
|
-
|
|
25
|
-
// The directory where Jest should output its coverage files
|
|
26
|
-
coverageDirectory: "coverage",
|
|
27
|
-
|
|
28
|
-
// An array of regexp pattern strings used to skip coverage collection
|
|
29
|
-
coveragePathIgnorePatterns: [
|
|
30
|
-
"node_modules",
|
|
31
|
-
"tests",
|
|
32
|
-
],
|
|
33
|
-
|
|
34
|
-
// Indicates which provider should be used to instrument code for coverage
|
|
35
|
-
coverageProvider: "v8",
|
|
36
|
-
|
|
37
|
-
// A list of reporter names that Jest uses when writing coverage reports
|
|
38
|
-
// coverageReporters: [
|
|
39
|
-
// "json",
|
|
40
|
-
// "text",
|
|
41
|
-
// "lcov",
|
|
42
|
-
// "clover"
|
|
43
|
-
// ],
|
|
44
|
-
|
|
45
|
-
// An object that configures minimum threshold enforcement for coverage results
|
|
46
|
-
// coverageThreshold: undefined,
|
|
47
|
-
|
|
48
|
-
// A path to a custom dependency extractor
|
|
49
|
-
// dependencyExtractor: undefined,
|
|
50
|
-
|
|
51
|
-
// Make calling deprecated APIs throw helpful error messages
|
|
52
|
-
// errorOnDeprecated: false,
|
|
53
|
-
|
|
54
|
-
// The default configuration for fake timers
|
|
55
|
-
// fakeTimers: {
|
|
56
|
-
// "enableGlobally": false
|
|
57
|
-
// },
|
|
58
|
-
|
|
59
|
-
// Force coverage collection from ignored files using an array of glob patterns
|
|
60
|
-
// forceCoverageMatch: [],
|
|
61
|
-
|
|
62
|
-
// A path to a module which exports an async function that is triggered once before all test suites
|
|
63
|
-
// globalSetup: undefined,
|
|
64
|
-
|
|
65
|
-
// A path to a module which exports an async function that is triggered once after all test suites
|
|
66
|
-
// globalTeardown: undefined,
|
|
67
|
-
|
|
68
|
-
// A set of global variables that need to be available in all test environments
|
|
69
|
-
// globals: {},
|
|
70
|
-
|
|
71
|
-
// The maximum amount of workers used to run your tests. Can be specified as % or a number. E.g. maxWorkers: 10% will use 10% of your CPU amount + 1 as the maximum worker number. maxWorkers: 2 will use a maximum of 2 workers.
|
|
72
|
-
// maxWorkers: "50%",
|
|
73
|
-
|
|
74
|
-
// An array of directory names to be searched recursively up from the requiring module's location
|
|
75
|
-
// moduleDirectories: [
|
|
76
|
-
// "node_modules"
|
|
77
|
-
// ],
|
|
78
|
-
|
|
79
|
-
// An array of file extensions your modules use
|
|
80
|
-
// moduleFileExtensions: [
|
|
81
|
-
// "js",
|
|
82
|
-
// "mjs",
|
|
83
|
-
// "cjs",
|
|
84
|
-
// "jsx",
|
|
85
|
-
// "ts",
|
|
86
|
-
// "tsx",
|
|
87
|
-
// "json",
|
|
88
|
-
// "node"
|
|
89
|
-
// ],
|
|
90
|
-
|
|
91
|
-
// A map from regular expressions to module names or to arrays of module names that allow to stub out resources with a single module
|
|
92
|
-
// moduleNameMapper: {},
|
|
93
|
-
|
|
94
|
-
// An array of regexp pattern strings, matched against all module paths before considered 'visible' to the module loader
|
|
95
|
-
// modulePathIgnorePatterns: [],
|
|
96
|
-
|
|
97
|
-
// Activates notifications for test results
|
|
98
|
-
// notify: false,
|
|
99
|
-
|
|
100
|
-
// An enum that specifies notification mode. Requires { notify: true }
|
|
101
|
-
// notifyMode: "failure-change",
|
|
102
|
-
|
|
103
|
-
// A preset that is used as a base for Jest's configuration
|
|
104
|
-
// preset: undefined,
|
|
105
|
-
|
|
106
|
-
// Run tests from one or more projects
|
|
107
|
-
// projects: undefined,
|
|
108
|
-
|
|
109
|
-
// Use this configuration option to add custom reporters to Jest
|
|
110
|
-
// reporters: undefined,
|
|
111
|
-
|
|
112
|
-
// Automatically reset mock state before every test
|
|
113
|
-
// resetMocks: false,
|
|
114
|
-
|
|
115
|
-
// Reset the module registry before running each individual test
|
|
116
|
-
// resetModules: false,
|
|
117
|
-
|
|
118
|
-
// A path to a custom resolver
|
|
119
|
-
// resolver: undefined,
|
|
120
|
-
|
|
121
|
-
// Automatically restore mock state and implementation before every test
|
|
122
|
-
// restoreMocks: false,
|
|
123
|
-
|
|
124
|
-
// The root directory that Jest should scan for tests and modules within
|
|
125
|
-
// rootDir: undefined,
|
|
126
|
-
|
|
127
|
-
// A list of paths to directories that Jest should use to search for files in
|
|
128
|
-
roots: ["./test/"],
|
|
129
|
-
|
|
130
|
-
// Allows you to use a custom runner instead of Jest's default test runner
|
|
131
|
-
// runner: "jest-runner",
|
|
132
|
-
|
|
133
|
-
// The paths to modules that run some code to configure or set up the testing environment before each test
|
|
134
|
-
// setupFiles: [],
|
|
135
|
-
|
|
136
|
-
// A list of paths to modules that run some code to configure or set up the testing framework before each test
|
|
137
|
-
// setupFilesAfterEnv: [],
|
|
138
|
-
|
|
139
|
-
// The number of seconds after which a test is considered as slow and reported as such in the results.
|
|
140
|
-
// slowTestThreshold: 5,
|
|
141
|
-
|
|
142
|
-
// A list of paths to snapshot serializer modules Jest should use for snapshot testing
|
|
143
|
-
// snapshotSerializers: [],
|
|
144
|
-
|
|
145
|
-
// The test environment that will be used for testing
|
|
146
|
-
// testEnvironment: "jest-environment-node",
|
|
147
|
-
|
|
148
|
-
// Options that will be passed to the testEnvironment
|
|
149
|
-
// testEnvironmentOptions: {},
|
|
150
|
-
|
|
151
|
-
// Adds a location field to test results
|
|
152
|
-
// testLocationInResults: false,
|
|
153
|
-
|
|
154
|
-
// The glob patterns Jest uses to detect test files
|
|
155
|
-
// testMatch: [
|
|
156
|
-
// "**/__tests__/**/*.[jt]s?(x)",
|
|
157
|
-
// "**/?(*.)+(spec|test).[tj]s?(x)"
|
|
158
|
-
// ],
|
|
159
|
-
|
|
160
|
-
// An array of regexp pattern strings that are matched against all test paths, matched tests are skipped
|
|
161
|
-
// testPathIgnorePatterns: [
|
|
162
|
-
// "\\\\node_modules\\\\"
|
|
163
|
-
// ],
|
|
164
|
-
|
|
165
|
-
// The regexp pattern or array of patterns that Jest uses to detect test files
|
|
166
|
-
// testRegex: [],
|
|
167
|
-
|
|
168
|
-
// This option allows the use of a custom results processor
|
|
169
|
-
// testResultsProcessor: undefined,
|
|
170
|
-
|
|
171
|
-
// This option allows use of a custom test runner
|
|
172
|
-
// testRunner: "jest-circus/runner",
|
|
173
|
-
|
|
174
|
-
// A map from regular expressions to paths to transformers
|
|
175
|
-
transform: {},
|
|
176
|
-
|
|
177
|
-
// An array of regexp pattern strings that are matched against all source file paths, matched files will skip transformation
|
|
178
|
-
// transformIgnorePatterns: [
|
|
179
|
-
// "\\\\node_modules\\\\",
|
|
180
|
-
// "\\.pnp\\.[^\\\\]+$"
|
|
181
|
-
// ],
|
|
182
|
-
|
|
183
|
-
// An array of regexp pattern strings that are matched against all modules before the module loader will automatically return a mock for them
|
|
184
|
-
// unmockedModulePathPatterns: undefined,
|
|
185
|
-
|
|
186
|
-
// Indicates whether each individual test should be reported during the run
|
|
187
|
-
// verbose: undefined,
|
|
188
|
-
|
|
189
|
-
// An array of regexp patterns that are matched against all source file paths before re-running tests in watch mode
|
|
190
|
-
// watchPathIgnorePatterns: [],
|
|
191
|
-
|
|
192
|
-
// Whether to use watchman for file crawling
|
|
193
|
-
// watchman: true,
|
|
194
|
-
};
|