parakeet.js 0.0.2 → 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (160) hide show
  1. package/.gitmodules +3 -0
  2. package/README.md +240 -239
  3. package/examples/hf-spaces-demo/README.md +6 -9
  4. package/examples/hf-spaces-demo/package.json +1 -1
  5. package/examples/hf-spaces-demo/src/App.js +307 -316
  6. package/examples/react-demo/package.json +19 -19
  7. package/examples/react-demo/src/App.jsx +324 -326
  8. package/examples/react-demo-dev/src/App.jsx +23 -24
  9. package/package.json +1 -1
  10. package/publish.ps1 +65 -0
  11. package/src/hub.js +235 -241
  12. package/src/parakeet.js +15 -8
  13. package/src/preprocessor.js +75 -68
  14. package/docs/parakeet-transformers-js/.gitattributes +0 -2
  15. package/docs/parakeet-transformers-js/.prettierignore +0 -8
  16. package/docs/parakeet-transformers-js/.prettierrc +0 -10
  17. package/docs/parakeet-transformers-js/.tmp_features.json +0 -1
  18. package/docs/parakeet-transformers-js/LICENSE +0 -202
  19. package/docs/parakeet-transformers-js/README.md +0 -448
  20. package/docs/parakeet-transformers-js/assets/nemo128.onnx +0 -0
  21. package/docs/parakeet-transformers-js/assets/nemo80.onnx +0 -0
  22. package/docs/parakeet-transformers-js/debug_test.js +0 -84
  23. package/docs/parakeet-transformers-js/dev/inspect_decoder.cjs +0 -9
  24. package/docs/parakeet-transformers-js/dev/inspect_joiner.cjs +0 -9
  25. package/docs/parakeet-transformers-js/dev/js_step_by_step.js +0 -249
  26. package/docs/parakeet-transformers-js/dev/parakeet_cli.js +0 -91
  27. package/docs/parakeet-transformers-js/jest.config.mjs +0 -194
  28. package/docs/parakeet-transformers-js/js_preprocessing.json +0 -225
  29. package/docs/parakeet-transformers-js/js_step_by_step.json +0 -837
  30. package/docs/parakeet-transformers-js/js_step_by_step_v2.json +0 -450
  31. package/docs/parakeet-transformers-js/js_step_by_step_v3.json +0 -450
  32. package/docs/parakeet-transformers-js/js_steps.json +0 -821
  33. package/docs/parakeet-transformers-js/package-lock.json +0 -12251
  34. package/docs/parakeet-transformers-js/package.json +0 -96
  35. package/docs/parakeet-transformers-js/src/audio_features.js +0 -178
  36. package/docs/parakeet-transformers-js/src/backends/onnx.js +0 -210
  37. package/docs/parakeet-transformers-js/src/base/feature_extraction_utils.js +0 -54
  38. package/docs/parakeet-transformers-js/src/base/image_processors_utils.js +0 -1105
  39. package/docs/parakeet-transformers-js/src/base/processing_utils.js +0 -173
  40. package/docs/parakeet-transformers-js/src/configs.js +0 -455
  41. package/docs/parakeet-transformers-js/src/env.js +0 -167
  42. package/docs/parakeet-transformers-js/src/generation/configuration_utils.js +0 -388
  43. package/docs/parakeet-transformers-js/src/generation/logits_process.js +0 -727
  44. package/docs/parakeet-transformers-js/src/generation/logits_sampler.js +0 -204
  45. package/docs/parakeet-transformers-js/src/generation/parameters.js +0 -35
  46. package/docs/parakeet-transformers-js/src/generation/stopping_criteria.js +0 -156
  47. package/docs/parakeet-transformers-js/src/generation/streamers.js +0 -225
  48. package/docs/parakeet-transformers-js/src/models/audio_spectrogram_transformer/feature_extraction_audio_spectrogram_transformer.js +0 -85
  49. package/docs/parakeet-transformers-js/src/models/auto/feature_extraction_auto.js +0 -25
  50. package/docs/parakeet-transformers-js/src/models/auto/image_processing_auto.js +0 -29
  51. package/docs/parakeet-transformers-js/src/models/auto/processing_auto.js +0 -85
  52. package/docs/parakeet-transformers-js/src/models/beit/image_processing_beit.js +0 -5
  53. package/docs/parakeet-transformers-js/src/models/bit/image_processing_bit.js +0 -5
  54. package/docs/parakeet-transformers-js/src/models/chinese_clip/image_processing_chinese_clip.js +0 -5
  55. package/docs/parakeet-transformers-js/src/models/clap/feature_extraction_clap.js +0 -159
  56. package/docs/parakeet-transformers-js/src/models/clip/image_processing_clip.js +0 -6
  57. package/docs/parakeet-transformers-js/src/models/convnext/image_processing_convnext.js +0 -46
  58. package/docs/parakeet-transformers-js/src/models/dac/feature_extraction_dac.js +0 -3
  59. package/docs/parakeet-transformers-js/src/models/deit/image_processing_deit.js +0 -6
  60. package/docs/parakeet-transformers-js/src/models/detr/image_processing_detr.js +0 -52
  61. package/docs/parakeet-transformers-js/src/models/donut/image_processing_donut.js +0 -31
  62. package/docs/parakeet-transformers-js/src/models/dpt/image_processing_dpt.js +0 -6
  63. package/docs/parakeet-transformers-js/src/models/efficientnet/image_processing_efficientnet.js +0 -14
  64. package/docs/parakeet-transformers-js/src/models/encodec/feature_extraction_encodec.js +0 -32
  65. package/docs/parakeet-transformers-js/src/models/feature_extractors.js +0 -17
  66. package/docs/parakeet-transformers-js/src/models/florence2/processing_florence2.js +0 -131
  67. package/docs/parakeet-transformers-js/src/models/gemma3n/feature_extraction_gemma3n.js +0 -97
  68. package/docs/parakeet-transformers-js/src/models/gemma3n/processing_gemma3n.js +0 -74
  69. package/docs/parakeet-transformers-js/src/models/glpn/image_processing_glpn.js +0 -5
  70. package/docs/parakeet-transformers-js/src/models/grounding_dino/image_processing_grounding_dino.js +0 -29
  71. package/docs/parakeet-transformers-js/src/models/grounding_dino/processing_grounding_dino.js +0 -101
  72. package/docs/parakeet-transformers-js/src/models/idefics3/image_processing_idefics3.js +0 -232
  73. package/docs/parakeet-transformers-js/src/models/idefics3/processing_idefics3.js +0 -136
  74. package/docs/parakeet-transformers-js/src/models/image_processors.js +0 -40
  75. package/docs/parakeet-transformers-js/src/models/janus/image_processing_janus.js +0 -27
  76. package/docs/parakeet-transformers-js/src/models/janus/processing_janus.js +0 -123
  77. package/docs/parakeet-transformers-js/src/models/jina_clip/image_processing_jina_clip.js +0 -26
  78. package/docs/parakeet-transformers-js/src/models/jina_clip/processing_jina_clip.js +0 -24
  79. package/docs/parakeet-transformers-js/src/models/llava/processing_llava.js +0 -44
  80. package/docs/parakeet-transformers-js/src/models/llava_onevision/image_processing_llava_onevision.js +0 -5
  81. package/docs/parakeet-transformers-js/src/models/mask2former/image_processing_mask2former.js +0 -5
  82. package/docs/parakeet-transformers-js/src/models/maskformer/image_processing_maskformer.js +0 -18
  83. package/docs/parakeet-transformers-js/src/models/mgp_str/processing_mgp_str.js +0 -172
  84. package/docs/parakeet-transformers-js/src/models/mobilenet_v1/image_processing_mobilenet_v1.js +0 -7
  85. package/docs/parakeet-transformers-js/src/models/mobilenet_v2/image_processing_mobilenet_v2.js +0 -7
  86. package/docs/parakeet-transformers-js/src/models/mobilenet_v3/image_processing_mobilenet_v3.js +0 -7
  87. package/docs/parakeet-transformers-js/src/models/mobilenet_v4/image_processing_mobilenet_v4.js +0 -7
  88. package/docs/parakeet-transformers-js/src/models/mobilevit/image_processing_mobilevit.js +0 -6
  89. package/docs/parakeet-transformers-js/src/models/moonshine/feature_extraction_moonshine.js +0 -26
  90. package/docs/parakeet-transformers-js/src/models/moonshine/processing_moonshine.js +0 -20
  91. package/docs/parakeet-transformers-js/src/models/nougat/image_processing_nougat.js +0 -5
  92. package/docs/parakeet-transformers-js/src/models/owlv2/image_processing_owlv2.js +0 -5
  93. package/docs/parakeet-transformers-js/src/models/owlvit/image_processing_owlvit.js +0 -12
  94. package/docs/parakeet-transformers-js/src/models/owlvit/processing_owlvit.js +0 -7
  95. package/docs/parakeet-transformers-js/src/models/paligemma/processing_paligemma.js +0 -83
  96. package/docs/parakeet-transformers-js/src/models/parakeet/feature_extraction_parakeet.js +0 -3
  97. package/docs/parakeet-transformers-js/src/models/parakeet/modeling_parakeet.js +0 -3
  98. package/docs/parakeet-transformers-js/src/models/parakeet/processing_parakeet.js +0 -3
  99. package/docs/parakeet-transformers-js/src/models/parakeet/tokenization_parakeet.js +0 -3
  100. package/docs/parakeet-transformers-js/src/models/phi3_v/image_processing_phi3_v.js +0 -163
  101. package/docs/parakeet-transformers-js/src/models/phi3_v/processing_phi3_v.js +0 -53
  102. package/docs/parakeet-transformers-js/src/models/processors.js +0 -22
  103. package/docs/parakeet-transformers-js/src/models/pvt/image_processing_pvt.js +0 -5
  104. package/docs/parakeet-transformers-js/src/models/pyannote/feature_extraction_pyannote.js +0 -85
  105. package/docs/parakeet-transformers-js/src/models/pyannote/processing_pyannote.js +0 -24
  106. package/docs/parakeet-transformers-js/src/models/qwen2_vl/image_processing_qwen2_vl.js +0 -52
  107. package/docs/parakeet-transformers-js/src/models/qwen2_vl/processing_qwen2_vl.js +0 -53
  108. package/docs/parakeet-transformers-js/src/models/rt_detr/image_processing_rt_detr.js +0 -12
  109. package/docs/parakeet-transformers-js/src/models/sam/image_processing_sam.js +0 -242
  110. package/docs/parakeet-transformers-js/src/models/sam/processing_sam.js +0 -20
  111. package/docs/parakeet-transformers-js/src/models/sapiens/image_processing_sapiens.js +0 -13
  112. package/docs/parakeet-transformers-js/src/models/seamless_m4t/feature_extraction_seamless_m4t.js +0 -175
  113. package/docs/parakeet-transformers-js/src/models/segformer/image_processing_segformer.js +0 -13
  114. package/docs/parakeet-transformers-js/src/models/siglip/image_processing_siglip.js +0 -5
  115. package/docs/parakeet-transformers-js/src/models/smolvlm/image_processing_smolvlm.js +0 -2
  116. package/docs/parakeet-transformers-js/src/models/smolvlm/processing_smolvlm.js +0 -2
  117. package/docs/parakeet-transformers-js/src/models/snac/feature_extraction_snac.js +0 -3
  118. package/docs/parakeet-transformers-js/src/models/speecht5/feature_extraction_speecht5.js +0 -4
  119. package/docs/parakeet-transformers-js/src/models/speecht5/processing_speecht5.js +0 -17
  120. package/docs/parakeet-transformers-js/src/models/swin2sr/image_processing_swin2sr.js +0 -24
  121. package/docs/parakeet-transformers-js/src/models/ultravox/processing_ultravox.js +0 -54
  122. package/docs/parakeet-transformers-js/src/models/vit/image_processing_vit.js +0 -7
  123. package/docs/parakeet-transformers-js/src/models/vitmatte/image_processing_vitmatte.js +0 -50
  124. package/docs/parakeet-transformers-js/src/models/vitpose/image_processing_vitpose.js +0 -89
  125. package/docs/parakeet-transformers-js/src/models/wav2vec2/feature_extraction_wav2vec2.js +0 -44
  126. package/docs/parakeet-transformers-js/src/models/wav2vec2/processing_wav2vec2.js +0 -17
  127. package/docs/parakeet-transformers-js/src/models/wav2vec2_with_lm/processing_wav2vec2_with_lm.js +0 -17
  128. package/docs/parakeet-transformers-js/src/models/wespeaker/feature_extraction_wespeaker.js +0 -95
  129. package/docs/parakeet-transformers-js/src/models/whisper/common_whisper.js +0 -157
  130. package/docs/parakeet-transformers-js/src/models/whisper/feature_extraction_whisper.js +0 -92
  131. package/docs/parakeet-transformers-js/src/models/whisper/generation_whisper.js +0 -89
  132. package/docs/parakeet-transformers-js/src/models/whisper/processing_whisper.js +0 -21
  133. package/docs/parakeet-transformers-js/src/models/yolos/image_processing_yolos.js +0 -12
  134. package/docs/parakeet-transformers-js/src/models.js +0 -8644
  135. package/docs/parakeet-transformers-js/src/ops/registry.js +0 -133
  136. package/docs/parakeet-transformers-js/src/ort_env.js +0 -8
  137. package/docs/parakeet-transformers-js/src/parakeet.js +0 -792
  138. package/docs/parakeet-transformers-js/src/pipelines.js +0 -3540
  139. package/docs/parakeet-transformers-js/src/processors.js +0 -16
  140. package/docs/parakeet-transformers-js/src/tokenizers.js +0 -4432
  141. package/docs/parakeet-transformers-js/src/transformers.js +0 -50
  142. package/docs/parakeet-transformers-js/src/utils/audio.js +0 -893
  143. package/docs/parakeet-transformers-js/src/utils/constants.js +0 -9
  144. package/docs/parakeet-transformers-js/src/utils/core.js +0 -259
  145. package/docs/parakeet-transformers-js/src/utils/data-structures.js +0 -574
  146. package/docs/parakeet-transformers-js/src/utils/devices.js +0 -22
  147. package/docs/parakeet-transformers-js/src/utils/dtypes.js +0 -63
  148. package/docs/parakeet-transformers-js/src/utils/generic.js +0 -35
  149. package/docs/parakeet-transformers-js/src/utils/hub.js +0 -780
  150. package/docs/parakeet-transformers-js/src/utils/image.js +0 -834
  151. package/docs/parakeet-transformers-js/src/utils/maths.js +0 -1061
  152. package/docs/parakeet-transformers-js/src/utils/tensor.js +0 -1539
  153. package/docs/parakeet-transformers-js/src/utils/video.js +0 -128
  154. package/docs/parakeet-transformers-js/test/decoder.test.js +0 -114
  155. package/docs/parakeet-transformers-js/test/encoder.test.js +0 -108
  156. package/docs/parakeet-transformers-js/test/preprocessor.test.js +0 -85
  157. package/docs/parakeet-transformers-js/test/tokenizer.test.js +0 -24
  158. package/docs/parakeet-transformers-js/test/transcribe.js +0 -89
  159. package/docs/parakeet-transformers-js/tsconfig.json +0 -21
  160. package/docs/parakeet-transformers-js/webpack.config.js +0 -223
@@ -1,792 +0,0 @@
1
- // @ts-nocheck
2
-
3
- import * as fs from 'fs';
4
- import * as path from 'path';
5
- let ort;
6
- if (isNode()) {
7
- // eslint-disable-next-line global-require, import/no-extraneous-dependencies
8
- ort = await import('onnxruntime-node');
9
- ort = ort.default ?? ort;
10
- } else {
11
- ort = await import('onnxruntime-web').then(m=>m.default ?? m);
12
- }
13
- import { logMelSpectrogram, MEL_BINS } from './audio_features.js';
14
-
15
- /**
16
- * Small helper to detect if we are in Node (vs browser).
17
- */
18
- function isNode() {
19
- return typeof process !== 'undefined' && process.versions?.node;
20
- }
21
-
22
- /**
23
- * ONNX-based preprocessor for audio feature extraction.
24
- */
25
- export class OnnxPreprocessor {
26
- constructor(modelPath) {
27
- this.modelPath = modelPath;
28
- this.session = null;
29
- }
30
-
31
- async init() {
32
- if (!this.session) {
33
- this.session = await ort.InferenceSession.create(this.modelPath);
34
- }
35
- }
36
-
37
- async process(audio) {
38
- if (!this.session) await this.init();
39
-
40
- // Create a proper Float32Array with explicit ArrayBuffer
41
- const buffer = new ArrayBuffer(audio.length * 4); // 4 bytes per float32
42
- const audioFloat32 = new Float32Array(buffer);
43
- for (let i = 0; i < audio.length; i++) {
44
- audioFloat32[i] = audio[i];
45
- }
46
-
47
- // Reshape audio to [1, audio.length] for batch processing
48
- const waveforms = new ort.Tensor('float32', audioFloat32, [1, audioFloat32.length]);
49
-
50
- // Create proper BigInt64Array for lengths
51
- const lenBuffer = new ArrayBuffer(8); // 8 bytes per int64
52
- const lenArray = new BigInt64Array(lenBuffer);
53
- lenArray[0] = BigInt(audioFloat32.length);
54
- const waveforms_lens = new ort.Tensor('int64', lenArray, [1]);
55
-
56
- const feeds = {
57
- waveforms: waveforms,
58
- waveforms_lens: waveforms_lens
59
- };
60
-
61
- const outputs = await this.session.run(feeds);
62
- const featuresTensor = outputs['features'];
63
- const features_lens = outputs['features_lens'];
64
-
65
- // The preprocessor ONNX model outputs features in [batch, features, time]
66
- // format. The python equivalent (onnx-asr) does not transpose here, so we will
67
- // match that behavior and return the raw features. The consuming `transcribe`
68
- // function will be updated to handle this layout.
69
- return {
70
- features: featuresTensor.data,
71
- length: Number(features_lens.data[0]),
72
- };
73
- }
74
- }
75
-
76
- /**
77
- * Load a text file returning its content as string.
78
- * Works in both Node and browser (using fetch).
79
- */
80
- async function loadText(filePath) {
81
- if (isNode()) {
82
- return fs.promises.readFile(filePath, 'utf8');
83
- }
84
- const resp = await fetch(filePath);
85
- if (!resp.ok) throw new Error(`Failed to fetch ${filePath}: ${resp.status}`);
86
- return resp.text();
87
- }
88
-
89
- /**
90
- * Basic tokenizer that only supports decoding (id -> text).
91
- * `tokens.txt` format: one token per line: "token id".
92
- */
93
- export class ParakeetTokenizer {
94
- /**
95
- * @param {string[]} id2token Array where index=id, value=token string
96
- */
97
- constructor(id2token) {
98
- this.id2token = id2token;
99
- this.blankToken = '<blk>';
100
- }
101
-
102
- static async fromTokensFile(tokensPath) {
103
- const content = await loadText(tokensPath);
104
- const lines = content.split(/\r?\n/).filter(Boolean);
105
- const id2token = [];
106
- for (const line of lines) {
107
- const [tok, idStr] = line.split(/\s+/);
108
- const id = parseInt(idStr, 10);
109
- id2token[id] = tok;
110
- }
111
- return new ParakeetTokenizer(id2token);
112
- }
113
-
114
- /**
115
- * Decode an array of token ids to string.
116
- * Implements simple SentencePiece-style rule: leading "▁" marks space.
117
- */
118
- decode(ids) {
119
- let text = '';
120
- for (const id of ids) {
121
- const token = this.id2token[id];
122
- if (token === undefined) continue;
123
- if (token === this.blankToken) continue;
124
- if (token.startsWith('▁')) {
125
- // word start
126
- text += ' ' + token.slice(1);
127
- } else {
128
- text += token;
129
- }
130
- }
131
- return text.trim();
132
- }
133
- }
134
-
135
- /**
136
- * Skeleton for Parakeet RNNT model inference.
137
- */
138
- export class ParakeetModel {
139
- constructor(opts) {
140
- this.tokenizer = opts.tokenizer;
141
- this.encoderSession = opts.encoderSession;
142
- this.decoderSession = opts.decoderSession;
143
- this.joinerSession = opts.joinerSession;
144
- this.blankId = opts.blankId ?? 1024; // default
145
- this.predLayers = opts.predLayers;
146
- this.predHidden = opts.predHidden;
147
- this._normalizer = opts.normalizer ?? (s=>s);
148
- this.modelFormat = opts.modelFormat;
149
- this.isCombined = this.modelFormat === 'transformers.js';
150
- this.maxTokensPerStep = opts.maxTokensPerStep ?? 10;
151
- this.preprocessor = opts.preprocessor; // ONNX preprocessor
152
- this.subsampling = opts.subsampling ?? 8;
153
- this.windowStride = opts.windowStride ?? 0.01; // Store windowStride
154
-
155
- if (this.isCombined) {
156
- // Pre-create zero state tensors expected by combined model
157
- const numLayers = this.predLayers ?? 2;
158
- const hidden = this.predHidden ?? 640;
159
- const size = numLayers * 1 * hidden;
160
- const z = new Float32Array(size);
161
- this.zeroState1 = new ort.Tensor('float32', z, [numLayers, 1, hidden]);
162
- this.zeroState2 = new ort.Tensor('float32', z.slice(), [numLayers, 1, hidden]);
163
- this._combState1 = this.zeroState1;
164
- this._combState2 = this.zeroState2;
165
- }
166
-
167
- // cache for constant inputs that quantised decoder expects
168
- this.constInputs = {};
169
-
170
- // Determine joiner input names (encoder & decoder)
171
- this.joinerInputs = Object.keys(this.joinerSession.inputMetadata ?? {});
172
- if (isNode()) {
173
- console.log('[Parakeet] Joiner input names:', this.joinerInputs);
174
- console.log('[Parakeet] Encoder inputs:', Object.keys(this.encoderSession.inputMetadata ?? {}));
175
- console.log('[Parakeet] Encoder outputs:', Object.keys(this.encoderSession.outputMetadata ?? {}));
176
- }
177
- }
178
-
179
- static async fromDirectory(baseDir, { ortOptions, preferFloat32 = false } = {}) {
180
- const modelFormat = 'transformers.js';
181
-
182
- // Helper to select model path
183
- const selectModelPath = (name) => {
184
- const floatPath = path.join(baseDir, `${name}.onnx`);
185
- const int8Path = path.join(baseDir, `${name}.int8.onnx`);
186
-
187
- if (preferFloat32) {
188
- if (fs.existsSync(floatPath)) return floatPath;
189
- if (fs.existsSync(int8Path)) {
190
- console.warn(`[Parakeet] preferFloat32=true but only found ${int8Path}`);
191
- return int8Path;
192
- }
193
- } else {
194
- // Default behavior: prefer int8
195
- if (fs.existsSync(int8Path)) return int8Path;
196
- if (fs.existsSync(floatPath)) return floatPath;
197
- }
198
- return null; // Not found
199
- };
200
-
201
- const encoderPath = selectModelPath('encoder-model');
202
- const decoderJointPath = selectModelPath('decoder_joint-model');
203
-
204
- if (isNode()) {
205
- console.log(`[Parakeet] Selected encoder model: ${path.basename(encoderPath)}`);
206
- console.log(`[Parakeet] Selected decoder/joiner model: ${path.basename(decoderJointPath)}`);
207
- }
208
-
209
- if (!encoderPath || !decoderJointPath) {
210
- throw new Error('Model directory must contain encoder/decoder_joint ONNX files.');
211
- }
212
-
213
- // Try vocab.txt first, then tokens.txt
214
- let tokensPath = path.join(baseDir, 'vocab.txt');
215
- if (!fs.existsSync(tokensPath)) {
216
- tokensPath = path.join(baseDir, 'tokens.txt');
217
- }
218
-
219
- console.log(`[Parakeet] Loading ${modelFormat} format models from ${baseDir}`);
220
-
221
- const [tokenizer, encoderSession, joinerSession] = await Promise.all([
222
- ParakeetTokenizer.fromTokensFile(tokensPath),
223
- ort.InferenceSession.create(encoderPath, ortOptions),
224
- ort.InferenceSession.create(decoderJointPath, ortOptions),
225
- ]);
226
-
227
- // Load text normalizer rules if present
228
- let normalizer = (s)=>s;
229
-
230
- const normPath = path.join(baseDir, 'normalizer.json');
231
- if (fs.existsSync(normPath)) {
232
- try {
233
- const rules = JSON.parse(await fs.promises.readFile(normPath, 'utf8'));
234
- normalizer = (text)=>{
235
- let out = text;
236
- for (const [pattern, repl] of rules) {
237
- out = out.replace(new RegExp(pattern, 'g'), repl);
238
- }
239
- return out;
240
- };
241
- } catch(e) { console.warn('[Parakeet] Failed to load normalizer.json', e); }
242
- }
243
-
244
- // Extract prediction network metadata (layer & hidden size) if provided by Sherpa-ONNX.
245
- let predLayers = 2, predHidden = 640;
246
- try {
247
- const meta = encoderSession.metadata?.custom_metadata_map ?? {};
248
- if (meta.pred_rnn_layers) predLayers = parseInt(meta.pred_rnn_layers, 10);
249
- if (meta.pred_hidden) predHidden = parseInt(meta.pred_hidden, 10);
250
- } catch (_) { /* ignore */ }
251
-
252
- const configPath = path.join(baseDir, 'config.json');
253
- let cfg = {};
254
- if (fs.existsSync(configPath)) {
255
- try { cfg = JSON.parse(await fs.promises.readFile(configPath,'utf8')); }
256
- catch { cfg = {}; }
257
- }
258
- const maxTokensPerStep = cfg.max_tokens_per_step ?? 10;
259
-
260
- const featureSize = cfg.features_size ?? 128;
261
- const subsamplingFactor = cfg.subsampling_factor ?? 8;
262
- const windowStride = cfg.window_stride ?? 0.01; // Read from config, default 0.01
263
-
264
- // Create ONNX preprocessor (try nemo128 first, fallback to nemo80)
265
- const __filename = new URL(import.meta.url).pathname;
266
- const __dirname = path.dirname(__filename);
267
-
268
- // Get the directory of the current script and find assets
269
- let assetsDir = path.join(__dirname, '..', 'assets');
270
- let preprocessorPath = path.join(assetsDir, featureSize === 80 ? 'nemo80.onnx' : 'nemo128.onnx');
271
-
272
- // Fix Windows path issues - remove leading slash on Windows
273
- if (process.platform === 'win32' && __dirname.startsWith('/')) {
274
- assetsDir = path.join(__dirname.substring(1), '..', 'assets');
275
- preprocessorPath = path.join(assetsDir, featureSize === 80 ? 'nemo80.onnx' : 'nemo128.onnx');
276
- }
277
-
278
- if (!fs.existsSync(preprocessorPath)) {
279
- // fallback try the other size asset
280
- preprocessorPath = path.join(assetsDir, featureSize === 80 ? 'nemo128.onnx' : 'nemo80.onnx');
281
- }
282
-
283
- // Final fallback - try relative to current working directory
284
- if (!fs.existsSync(preprocessorPath)) {
285
- console.warn('[Parakeet] Preprocessor not found at:', preprocessorPath);
286
- preprocessorPath = path.resolve('./assets/nemo128.onnx');
287
- if (!fs.existsSync(preprocessorPath)) {
288
- preprocessorPath = path.resolve('./assets/nemo80.onnx');
289
- }
290
- }
291
-
292
- const preprocessor = new OnnxPreprocessor(preprocessorPath);
293
-
294
- // store subsampling factor for timestamp stride
295
- const subsampling = subsamplingFactor;
296
-
297
- return new ParakeetModel({
298
- tokenizer,
299
- encoderSession,
300
- decoderSession: null,
301
- joinerSession,
302
- predLayers,
303
- predHidden,
304
- normalizer,
305
- modelFormat,
306
- maxTokensPerStep,
307
- preprocessor,
308
- subsampling,
309
- windowStride
310
- });
311
- }
312
-
313
- /**
314
- * Compute 80-dim log-mel spectrogram using ONNX preprocessor.
315
- * @param {Float32Array} audio PCM normalized -1..1 @ sampleRate Hz
316
- * @param {number} sampleRate
317
- * @param {{skipCMVN?:boolean, debug?:boolean}} opts
318
- * @returns {Promise<Float32Array>} [T, 80]
319
- */
320
- async computeFeatures(audio, sampleRate, {skipCMVN=false, debug=false}={}) {
321
- if (!this.preprocessor) {
322
- throw new Error('ONNX preprocessor not initialized');
323
- }
324
-
325
- const result = await this.preprocessor.process(audio);
326
- const features = result.features;
327
- const T = result.length;
328
- const MEL = MEL_BINS;
329
-
330
- if (debug && isNode()) {
331
- console.log(`[Debug] ONNX preprocessor: ${T} frames x ${MEL} mel bins`);
332
- console.log('[Debug] first-frame mel (20 bins):', Array.from(features.slice(0,20)).map(v=>v.toFixed(3)).join(' '));
333
- }
334
-
335
- return features;
336
- }
337
-
338
- async _runDecoderStep(token, state) {
339
- const prev = new ort.Tensor('int32', new Int32Array([token]), [1, 1]);
340
- const decInputs = { targets: prev, target_length: new ort.Tensor('int32', new Int32Array([1]), [1]) };
341
- Object.assign(decInputs, state, this.constInputs);
342
-
343
- while (true) {
344
- try {
345
- const decOut = await this.decoderSession.run(decInputs);
346
- // Reshape decoder output to [1, 640, 1] for the joiner
347
- const decTensor = decOut['outputs'] ?? Object.values(decOut)[0];
348
- const reshapedData = new Float32Array(decTensor.data);
349
- const reshapedTensor = new ort.Tensor('float32', reshapedData, [1, this.predHidden ?? 640, 1]);
350
-
351
- const newState = Object.fromEntries(Object.entries(decOut).filter(([k]) => k.startsWith('states')));
352
-
353
- return { dec: reshapedTensor, state: newState };
354
- } catch (e) {
355
- let handled = false;
356
- // Missing input
357
- let m = /input '([^']+)' is missing/.exec(e.message);
358
- if (m) {
359
- const missing = m[1];
360
- const meta = this.decoderSession.inputMetadata?.[missing];
361
- let dims = meta?.dimensions ?? [1];
362
- dims = dims.map(d => (typeof d === 'number' && d > 0) ? d : 1);
363
- const size = dims.reduce((a, b) => a * b, 1);
364
- const type = meta?.type ?? 'float32';
365
- let tensor;
366
- switch (type) {
367
- case 'int32':
368
- tensor = new ort.Tensor('int32', new Int32Array(size), dims);
369
- break;
370
- case 'int64':
371
- tensor = new ort.Tensor('int64', new BigInt64Array(size), dims);
372
- break;
373
- default:
374
- tensor = new ort.Tensor('float32', new Float32Array(size), dims);
375
- }
376
- this.constInputs[missing] = tensor;
377
- decInputs[missing] = tensor;
378
- handled = true;
379
- }
380
-
381
- // Dimension mismatch (rank ok but specific axes wrong)
382
- const dimLines = [...e.message.matchAll(/index:\s*(\d+)\s*Got:\s*\d+\s*Expected:\s*(\d+)/g)];
383
- if (dimLines.length > 0) {
384
- const nameMatch = /input: ([^ ]+)/.exec(e.message);
385
- const tensorName = nameMatch ? nameMatch[1] : null;
386
- if (tensorName) {
387
- const dimsMap = {};
388
- let maxIdx = 0;
389
- for (const [, idxStr, expStr] of dimLines) {
390
- const idx = parseInt(idxStr, 10);
391
- const exp = parseInt(expStr, 10);
392
- dimsMap[idx] = exp;
393
- if (idx > maxIdx) maxIdx = idx;
394
- }
395
- const newDims = Array(maxIdx + 1).fill(1);
396
- for (const k in dimsMap) newDims[k] = dimsMap[k];
397
- const size = newDims.reduce((a, b) => a * b, 1);
398
- this.constInputs[tensorName] = new ort.Tensor('float32', new Float32Array(size), newDims);
399
- decInputs[tensorName] = this.constInputs[tensorName];
400
- handled = true;
401
- }
402
- }
403
-
404
- // Invalid rank mismatch
405
- const rankMatch = /Invalid rank for input:?\s*([^ ]+)\s*Got:\s*(\d+)\s*Expected:\s*(\d+)/.exec(e.message);
406
- if (rankMatch) {
407
- const tensorName = rankMatch[1];
408
- const expectedRank = parseInt(rankMatch[3], 10);
409
- const meta = this.decoderSession.inputMetadata?.[tensorName];
410
- let dims = meta?.dimensions ?? Array(expectedRank).fill(1);
411
- if (dims.length !== expectedRank) {
412
- dims = Array.from({ length: expectedRank }, (_, i) => (dims[i] && typeof dims[i] === 'number' && dims[i] > 0) ? dims[i] : 1);
413
- } else {
414
- dims = dims.map(d => (typeof d === 'number' && d > 0) ? d : 1);
415
- }
416
- const size = dims.reduce((a, b) => a * b, 1);
417
- this.constInputs[tensorName] = new ort.Tensor('float32', new Float32Array(size), dims);
418
- decInputs[tensorName] = this.constInputs[tensorName];
419
- handled = true;
420
- }
421
-
422
- if (!handled) throw e;
423
- }
424
- }
425
- }
426
-
427
- async _runCombinedStep(encTensor, token, currentState = null) {
428
- // Always pass a single token, matching Python onnx-asr behavior
429
- const singleToken = typeof token === 'number' ? token : this.blankId;
430
-
431
- const targetTensor = new ort.Tensor('int32', new Int32Array([singleToken]), [1, 1]);
432
- const lenTensor = new ort.Tensor('int32', new Int32Array([1]), [1]);
433
-
434
- // Use provided state or default internal state
435
- const state1 = currentState?.state1 || this._combState1;
436
- const state2 = currentState?.state2 || this._combState2;
437
-
438
- const feeds = {
439
- encoder_outputs: encTensor,
440
- targets: targetTensor,
441
- target_length: lenTensor,
442
- input_states_1: state1,
443
- input_states_2: state2,
444
- };
445
-
446
- const out = await this.joinerSession.run(feeds);
447
- const logits = out['outputs'];
448
-
449
- // The output tensor shape is [B, 1, 1, D] since we're passing a single token.
450
- // We can directly use the logits without slicing.
451
- const [_B, _, N, D] = logits.dims;
452
- const lastLogits = logits.data.subarray(0, D);
453
-
454
- // Split token logits / duration logits
455
- const vocab = this.tokenizer.id2token.length;
456
- const tokenLogits = lastLogits.slice(0, vocab);
457
- const durLogits = lastLogits.slice(vocab);
458
- let step = 0;
459
- if (durLogits.length > 0) {
460
- let max = -Infinity;
461
- for (let i = 0; i < durLogits.length; ++i) if (durLogits[i] > max) { max = durLogits[i]; step = i; }
462
-
463
- // Debug: Log duration logits to understand the pattern
464
- if (isNode() && Math.random() < 0.1) { // Log 10% of the time to avoid spam
465
- console.log('[DEBUG] Duration logits:', Array.from(durLogits).map((v, i) => `${i}:${v.toFixed(2)}`).join(' '));
466
- console.log('[DEBUG] Duration argmax:', step, 'max_value:', max.toFixed(2));
467
- }
468
- }
469
-
470
- // Return new state for external management
471
- const newState = {
472
- state1: out['output_states_1'] || state1,
473
- state2: out['output_states_2'] || state2
474
- };
475
-
476
- return { tokenLogits, step, newState };
477
- }
478
-
479
- /**
480
- * Transcribe audio.
481
- * @param {Float32Array} audio 16 kHz mono PCM [-1,1]
482
- * @param {number} sampleRate input sample rate (must be 16k for now)
483
- * @param {{returnTimestamps?:boolean, returnConfidences?:boolean, temperature?:number, skipCMVN?:boolean, debug?:boolean}} opts
484
- * @returns {Promise<string|{text:string,tokens:number[],timestamps?:[number,number][],confidences?:number[]}>}
485
- */
486
- async transcribe(audio, sampleRate = 16000, opts = {}) {
487
- const {
488
- returnTimestamps = false,
489
- returnConfidences = false,
490
- temperature = 1.2,
491
- debug = false,
492
- skipCMVN = false
493
- } = opts;
494
- // 1. feature extraction
495
- console.log(`[Parakeet] Got ${audio.length} audio samples @ ${sampleRate} Hz`);
496
- const features = await this.computeFeatures(audio, sampleRate, {skipCMVN,debug});
497
- const MEL = MEL_BINS;
498
- const T = features.length / MEL;
499
-
500
- // Debug audio preprocessing
501
- if (debug && isNode()) {
502
- console.log(`[Parakeet] Audio duration: ${(audio.length / sampleRate).toFixed(2)}s`);
503
- console.log(`[Parakeet] Feature dims: ${T} frames x ${MEL} mel bins`);
504
-
505
- // Find min/max without spread operator to avoid stack overflow
506
- let audioMin = audio[0], audioMax = audio[0];
507
- for (let i = 1; i < audio.length; i++) {
508
- if (audio[i] < audioMin) audioMin = audio[i];
509
- if (audio[i] > audioMax) audioMax = audio[i];
510
- }
511
- let featMin = features[0], featMax = features[0];
512
- for (let i = 1; i < features.length; i++) {
513
- if (features[i] < featMin) featMin = features[i];
514
- if (features[i] > featMax) featMax = features[i];
515
- }
516
-
517
- console.log(`[Parakeet] Audio energy: min=${audioMin.toFixed(3)} max=${audioMax.toFixed(3)}`);
518
- console.log(`[Parakeet] Feature range: min=${featMin.toFixed(3)} max=${featMax.toFixed(3)}`);
519
- }
520
-
521
- const ids = [];
522
- const tokenTimes = [];
523
- const tokenConfs = [];
524
- const frameConfs = []; // per-frame confidences
525
- let overallLogProb = 0;
526
-
527
- // Track decoder state separately for proper state management
528
- let decoderState = null;
529
-
530
- // The entire logic is now simplified to process the full feature set at once,
531
- // eliminating the need for chunking.
532
-
533
- // 1. Encode the entire feature set
534
- const input = new ort.Tensor('float32', features, [1, MEL, T]);
535
- const lenTensor = new ort.Tensor('int64', BigInt64Array.from([BigInt(T)]), [1]);
536
- const encOut = await this.encoderSession.run({ 'audio_signal': input, 'length': lenTensor });
537
- const enc = encOut['outputs'] ?? Object.values(encOut)[0];
538
- if (debug && isNode()) console.log(`[Parakeet] Encoder output dims: ${enc.dims}`);
539
-
540
- // 2. Transpose encoder output to match onnx-asr: [B, D, T] -> [B, T, D]
541
- const [B, D, T_enc] = enc.dims;
542
- const transposedEncData = new Float32Array(B * T_enc * D);
543
- for (let b = 0; b < B; ++b) {
544
- for (let t = 0; t < T_enc; ++t) {
545
- for (let d = 0; d < D; ++d) {
546
- const srcIdx = b * (D * T_enc) + d * T_enc + t;
547
- const destIdx = b * (T_enc * D) + t * D + d;
548
- transposedEncData[destIdx] = enc.data[srcIdx];
549
- }
550
- }
551
- }
552
- const encTransposed = {
553
- data: transposedEncData,
554
- dims: [B, T_enc, D],
555
- };
556
-
557
- // 3. Decode the transposed encoder output frame by frame
558
- const hiddenSize = encTransposed.dims[2]; // D
559
- const seqLen = encTransposed.dims[1]; // T_enc (downsampled time-steps)
560
- let t = 0;
561
- let emittedTokens = 0;
562
-
563
- while (t < seqLen) {
564
- const currFrameIdx = t; // Absolute index in encoder frames
565
-
566
- // Slice encoder output for timestep t.
567
- const frameBuf = new Float32Array(hiddenSize);
568
- const frameOffset = t * hiddenSize; // Since B=1, offset is t * D
569
- for (let i = 0; i < hiddenSize; i++) {
570
- frameBuf[i] = encTransposed.data[frameOffset + i];
571
- }
572
- const encTensor = new ort.Tensor('float32', frameBuf, [1, hiddenSize, 1]);
573
-
574
- let tokenLogitsData, step; let durLogitsArr=null;
575
- let newDecoderState = null;
576
- if (this.isCombined) {
577
- const lastToken = ids.length > 0 ? ids[ids.length - 1] : this.blankId;
578
- const res = await this._runCombinedStep(encTensor, lastToken, decoderState);
579
- tokenLogitsData = res.tokenLogits;
580
- step = res.step;
581
- newDecoderState = res.newState;
582
- } else {
583
- // This block for separate decoder/joiner is preserved but may need review
584
- // if still in use.
585
- const joinFeeds = {};
586
- if (this.joinerInputs.length > 0) {
587
- this.joinerInputs.forEach((name, idx) => {
588
- if (/enc/i.test(name)) joinFeeds[name] = encTensor;
589
- else if (/dec/i.test(name)) joinFeeds[name] = dec;
590
- else joinFeeds[name] = idx === 0 ? encTensor : dec;
591
- });
592
- }
593
- joinFeeds['encoder_outputs'] = encTensor;
594
- joinFeeds['decoder_outputs'] = dec;
595
- const joinOut = await this.joinerSession.run(joinFeeds);
596
- const joinTensor = Object.values(joinOut)[0];
597
- const vocabSize = this.tokenizer.id2token.length;
598
- tokenLogitsData = joinTensor.data.slice(0, vocabSize);
599
- durLogitsArr = joinTensor.data.slice(vocabSize);
600
- step = 0;
601
- if (durLogitsArr.length > 0) {
602
- let max=-Infinity; for (let i=0;i<durLogitsArr.length;++i) if (durLogitsArr[i]>max){max=durLogitsArr[i]; step=i;}
603
- }
604
- }
605
-
606
- const tokenLogits = tokenLogitsData;
607
- // Apply temperature scaling
608
- const scaled = new Float32Array(tokenLogits.length);
609
- for (let i = 0; i < tokenLogits.length; ++i) scaled[i] = tokenLogits[i] / temperature;
610
-
611
- // Argmax token & confidence
612
- let max = -Infinity, maxId = 0;
613
- for (let i = 0; i < scaled.length; ++i) if (scaled[i] > max) { max = scaled[i]; maxId = i; }
614
-
615
- let confVal = 0;
616
- if (returnConfidences) {
617
- let sum = 0;
618
- const maxLogit = max;
619
- for (let i = 0; i < scaled.length; ++i) sum += Math.exp(scaled[i] - maxLogit);
620
- confVal = 1 / sum;
621
- frameConfs.push(confVal);
622
- overallLogProb += Math.log(confVal);
623
- }
624
-
625
- // Use onnx-asr algorithm: first process token, then decide advancement
626
- if (maxId !== this.blankId) {
627
- ids.push(maxId);
628
- if (returnTimestamps) {
629
- const TIME_STRIDE = this.subsampling * this.windowStride;
630
- const durationFrames = step > 0 ? step : 1; // at least 1 frame
631
- const start = currFrameIdx * TIME_STRIDE;
632
- const end = (currFrameIdx + durationFrames) * TIME_STRIDE;
633
- tokenTimes.push([start,end]);
634
- }
635
- if (returnConfidences) tokenConfs.push(confVal);
636
-
637
- if (!this.isCombined) {
638
- ({dec, state: newState} = await this._runDecoderStep(maxId,state));
639
- state = newState;
640
- } else {
641
- decoderState = newDecoderState;
642
- }
643
- emittedTokens += 1;
644
- }
645
-
646
- // Now decide advancement based on onnx-asr algorithm
647
- const shouldAdvance = maxId === this.blankId || emittedTokens >= this.maxTokensPerStep;
648
-
649
- if (step > 0) {
650
- t += step;
651
- emittedTokens = 0;
652
- } else if (shouldAdvance) {
653
- t += 1;
654
- emittedTokens = 0;
655
- }
656
- }
657
-
658
- if (debug && isNode()) {
659
- console.log(`[Parakeet] Processed ${t} feature frames (${T} total)`);
660
- }
661
-
662
- const text = this._normalizer(this.tokenizer.decode(ids));
663
-
664
- if (!returnTimestamps && !returnConfidences) {
665
- return { utterance_text: text, words: [], metrics: {}, is_final: true };
666
- }
667
-
668
- const words = [];
669
- let avgWordConfidence = 0;
670
- // Prepare arrays for per-token information regardless of timestamp/confidence flags so that
671
- // they are in scope for the final return object.
672
- const tokensDetailed = [];
673
- let avgTokenConfidence = 0;
674
-
675
- if (returnTimestamps) {
676
- let currentWord = '';
677
- let wordStartTime = 0;
678
- let wordEndTime = 0;
679
- let wordConfidences = [];
680
-
681
- ids.forEach((tokenId, i) => {
682
- const tokenText = this.tokenizer.id2token[tokenId];
683
- if (tokenText === this.tokenizer.blankToken) return;
684
-
685
- const tokenTime = tokenTimes[i];
686
- const tokenConf = returnConfidences ? tokenConfs[i] : 0;
687
-
688
- // SentencePiece uses a special character '\u2581' (visible as "▁") to mark the
689
- // beginning of a new word. Detect this character instead of a normal space.
690
- const isWordStart = tokenText.startsWith('▁');
691
-
692
- if (isWordStart) {
693
- // A new word is starting. First, push the old one if it exists.
694
- if (currentWord) {
695
- const avgConfidence = wordConfidences.length > 0 ? wordConfidences.reduce((a, b) => a + b, 0) / wordConfidences.length : 0;
696
- words.push({
697
- text: currentWord,
698
- start_time: Number(wordStartTime.toFixed(3)),
699
- end_time: Number(wordEndTime.toFixed(3)),
700
- confidence: Number(avgConfidence.toFixed(4))
701
- });
702
- }
703
-
704
- // Now, start the new word
705
- currentWord = tokenText.slice(1);
706
- wordStartTime = tokenTime[0];
707
- wordEndTime = tokenTime[1];
708
- wordConfidences = [tokenConf];
709
-
710
- } else {
711
- // Not a word start, so it's a continuation of the current word.
712
- // This also handles the very first token if it has no leading space.
713
- if (!currentWord) {
714
- wordStartTime = tokenTime[0];
715
- }
716
- currentWord += tokenText;
717
- wordEndTime = tokenTime[1]; // just update the end time
718
- if (returnConfidences) {
719
- wordConfidences.push(tokenConf);
720
- }
721
- }
722
- });
723
-
724
- // Add the very last word after the loop finishes
725
- if (currentWord) {
726
- const avgConfidence = wordConfidences.length > 0 ? wordConfidences.reduce((a, b) => a + b, 0) / wordConfidences.length : 0;
727
- words.push({
728
- text: currentWord,
729
- start_time: Number(wordStartTime.toFixed(3)),
730
- end_time: Number(wordEndTime.toFixed(3)),
731
- confidence: Number(avgConfidence.toFixed(4))
732
- });
733
- }
734
-
735
- if (words.length > 0) {
736
- avgWordConfidence = words.map(w => w.confidence).reduce((a,b) => a + b, 0) / words.length;
737
- }
738
-
739
- // ------------------------------------------------------------------
740
- // Build per-token information array similar to NeMo output structure
741
- // ------------------------------------------------------------------
742
- ids.forEach((tokenId, i) => {
743
- const rawToken = this.tokenizer.id2token[tokenId];
744
- if (rawToken === this.tokenizer.blankToken) return; // skip blanks
745
-
746
- const cleanToken = rawToken.startsWith('▁') ? rawToken.slice(1) : rawToken;
747
-
748
- const tokEntry = { token: [cleanToken] };
749
-
750
- if (returnTimestamps && tokenTimes[i]) {
751
- const [s, e] = tokenTimes[i];
752
- tokEntry.start_time = Number(s.toFixed(3));
753
- tokEntry.end_time = Number(e.toFixed(3));
754
- }
755
-
756
- if (returnConfidences && tokenConfs[i] !== undefined) {
757
- const conf = tokenConfs[i];
758
- tokEntry.confidence = Number(conf.toFixed(4));
759
- }
760
-
761
- tokensDetailed.push(tokEntry);
762
- });
763
-
764
- if (tokensDetailed.length > 0 && returnConfidences) {
765
- avgTokenConfidence = tokensDetailed.map(t => t.confidence || 0).reduce((a, b) => a + b, 0) / tokensDetailed.length;
766
- }
767
- }
768
-
769
- return {
770
- utterance_text: text,
771
- words,
772
- tokens: tokensDetailed,
773
- confidence_scores: {
774
- ...(returnConfidences ? {
775
- token: tokenConfs.map(c=>Number(c.toFixed(4))),
776
- token_avg: Number(avgTokenConfidence.toFixed(4)),
777
- word: words.map(w=>w.confidence),
778
- word_avg: Number(avgWordConfidence.toFixed(4)),
779
- frame: frameConfs.map(f=>Number(f.toFixed(4))),
780
- frame_avg: frameConfs.length ? Number((frameConfs.reduce((a,b)=>a+b,0)/frameConfs.length).toFixed(4)) : null,
781
- overall_log_prob: Number(overallLogProb.toFixed(6))
782
- } : {
783
- overall_log_prob: null,
784
- frame_avg: null,
785
- frame: null,
786
- }),
787
- },
788
- is_final: true,
789
- ...(debug ? { raw_tokens: ids, raw_token_timestamps: tokenTimes, raw_token_confidences: tokenConfs } : {})
790
- };
791
- }
792
- }