parakeet.js 0.0.2 → 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (160) hide show
  1. package/.gitmodules +3 -0
  2. package/README.md +240 -239
  3. package/examples/hf-spaces-demo/README.md +6 -9
  4. package/examples/hf-spaces-demo/package.json +1 -1
  5. package/examples/hf-spaces-demo/src/App.js +307 -316
  6. package/examples/react-demo/package.json +19 -19
  7. package/examples/react-demo/src/App.jsx +324 -326
  8. package/examples/react-demo-dev/src/App.jsx +23 -24
  9. package/package.json +1 -1
  10. package/publish.ps1 +65 -0
  11. package/src/hub.js +235 -241
  12. package/src/parakeet.js +15 -8
  13. package/src/preprocessor.js +75 -68
  14. package/docs/parakeet-transformers-js/.gitattributes +0 -2
  15. package/docs/parakeet-transformers-js/.prettierignore +0 -8
  16. package/docs/parakeet-transformers-js/.prettierrc +0 -10
  17. package/docs/parakeet-transformers-js/.tmp_features.json +0 -1
  18. package/docs/parakeet-transformers-js/LICENSE +0 -202
  19. package/docs/parakeet-transformers-js/README.md +0 -448
  20. package/docs/parakeet-transformers-js/assets/nemo128.onnx +0 -0
  21. package/docs/parakeet-transformers-js/assets/nemo80.onnx +0 -0
  22. package/docs/parakeet-transformers-js/debug_test.js +0 -84
  23. package/docs/parakeet-transformers-js/dev/inspect_decoder.cjs +0 -9
  24. package/docs/parakeet-transformers-js/dev/inspect_joiner.cjs +0 -9
  25. package/docs/parakeet-transformers-js/dev/js_step_by_step.js +0 -249
  26. package/docs/parakeet-transformers-js/dev/parakeet_cli.js +0 -91
  27. package/docs/parakeet-transformers-js/jest.config.mjs +0 -194
  28. package/docs/parakeet-transformers-js/js_preprocessing.json +0 -225
  29. package/docs/parakeet-transformers-js/js_step_by_step.json +0 -837
  30. package/docs/parakeet-transformers-js/js_step_by_step_v2.json +0 -450
  31. package/docs/parakeet-transformers-js/js_step_by_step_v3.json +0 -450
  32. package/docs/parakeet-transformers-js/js_steps.json +0 -821
  33. package/docs/parakeet-transformers-js/package-lock.json +0 -12251
  34. package/docs/parakeet-transformers-js/package.json +0 -96
  35. package/docs/parakeet-transformers-js/src/audio_features.js +0 -178
  36. package/docs/parakeet-transformers-js/src/backends/onnx.js +0 -210
  37. package/docs/parakeet-transformers-js/src/base/feature_extraction_utils.js +0 -54
  38. package/docs/parakeet-transformers-js/src/base/image_processors_utils.js +0 -1105
  39. package/docs/parakeet-transformers-js/src/base/processing_utils.js +0 -173
  40. package/docs/parakeet-transformers-js/src/configs.js +0 -455
  41. package/docs/parakeet-transformers-js/src/env.js +0 -167
  42. package/docs/parakeet-transformers-js/src/generation/configuration_utils.js +0 -388
  43. package/docs/parakeet-transformers-js/src/generation/logits_process.js +0 -727
  44. package/docs/parakeet-transformers-js/src/generation/logits_sampler.js +0 -204
  45. package/docs/parakeet-transformers-js/src/generation/parameters.js +0 -35
  46. package/docs/parakeet-transformers-js/src/generation/stopping_criteria.js +0 -156
  47. package/docs/parakeet-transformers-js/src/generation/streamers.js +0 -225
  48. package/docs/parakeet-transformers-js/src/models/audio_spectrogram_transformer/feature_extraction_audio_spectrogram_transformer.js +0 -85
  49. package/docs/parakeet-transformers-js/src/models/auto/feature_extraction_auto.js +0 -25
  50. package/docs/parakeet-transformers-js/src/models/auto/image_processing_auto.js +0 -29
  51. package/docs/parakeet-transformers-js/src/models/auto/processing_auto.js +0 -85
  52. package/docs/parakeet-transformers-js/src/models/beit/image_processing_beit.js +0 -5
  53. package/docs/parakeet-transformers-js/src/models/bit/image_processing_bit.js +0 -5
  54. package/docs/parakeet-transformers-js/src/models/chinese_clip/image_processing_chinese_clip.js +0 -5
  55. package/docs/parakeet-transformers-js/src/models/clap/feature_extraction_clap.js +0 -159
  56. package/docs/parakeet-transformers-js/src/models/clip/image_processing_clip.js +0 -6
  57. package/docs/parakeet-transformers-js/src/models/convnext/image_processing_convnext.js +0 -46
  58. package/docs/parakeet-transformers-js/src/models/dac/feature_extraction_dac.js +0 -3
  59. package/docs/parakeet-transformers-js/src/models/deit/image_processing_deit.js +0 -6
  60. package/docs/parakeet-transformers-js/src/models/detr/image_processing_detr.js +0 -52
  61. package/docs/parakeet-transformers-js/src/models/donut/image_processing_donut.js +0 -31
  62. package/docs/parakeet-transformers-js/src/models/dpt/image_processing_dpt.js +0 -6
  63. package/docs/parakeet-transformers-js/src/models/efficientnet/image_processing_efficientnet.js +0 -14
  64. package/docs/parakeet-transformers-js/src/models/encodec/feature_extraction_encodec.js +0 -32
  65. package/docs/parakeet-transformers-js/src/models/feature_extractors.js +0 -17
  66. package/docs/parakeet-transformers-js/src/models/florence2/processing_florence2.js +0 -131
  67. package/docs/parakeet-transformers-js/src/models/gemma3n/feature_extraction_gemma3n.js +0 -97
  68. package/docs/parakeet-transformers-js/src/models/gemma3n/processing_gemma3n.js +0 -74
  69. package/docs/parakeet-transformers-js/src/models/glpn/image_processing_glpn.js +0 -5
  70. package/docs/parakeet-transformers-js/src/models/grounding_dino/image_processing_grounding_dino.js +0 -29
  71. package/docs/parakeet-transformers-js/src/models/grounding_dino/processing_grounding_dino.js +0 -101
  72. package/docs/parakeet-transformers-js/src/models/idefics3/image_processing_idefics3.js +0 -232
  73. package/docs/parakeet-transformers-js/src/models/idefics3/processing_idefics3.js +0 -136
  74. package/docs/parakeet-transformers-js/src/models/image_processors.js +0 -40
  75. package/docs/parakeet-transformers-js/src/models/janus/image_processing_janus.js +0 -27
  76. package/docs/parakeet-transformers-js/src/models/janus/processing_janus.js +0 -123
  77. package/docs/parakeet-transformers-js/src/models/jina_clip/image_processing_jina_clip.js +0 -26
  78. package/docs/parakeet-transformers-js/src/models/jina_clip/processing_jina_clip.js +0 -24
  79. package/docs/parakeet-transformers-js/src/models/llava/processing_llava.js +0 -44
  80. package/docs/parakeet-transformers-js/src/models/llava_onevision/image_processing_llava_onevision.js +0 -5
  81. package/docs/parakeet-transformers-js/src/models/mask2former/image_processing_mask2former.js +0 -5
  82. package/docs/parakeet-transformers-js/src/models/maskformer/image_processing_maskformer.js +0 -18
  83. package/docs/parakeet-transformers-js/src/models/mgp_str/processing_mgp_str.js +0 -172
  84. package/docs/parakeet-transformers-js/src/models/mobilenet_v1/image_processing_mobilenet_v1.js +0 -7
  85. package/docs/parakeet-transformers-js/src/models/mobilenet_v2/image_processing_mobilenet_v2.js +0 -7
  86. package/docs/parakeet-transformers-js/src/models/mobilenet_v3/image_processing_mobilenet_v3.js +0 -7
  87. package/docs/parakeet-transformers-js/src/models/mobilenet_v4/image_processing_mobilenet_v4.js +0 -7
  88. package/docs/parakeet-transformers-js/src/models/mobilevit/image_processing_mobilevit.js +0 -6
  89. package/docs/parakeet-transformers-js/src/models/moonshine/feature_extraction_moonshine.js +0 -26
  90. package/docs/parakeet-transformers-js/src/models/moonshine/processing_moonshine.js +0 -20
  91. package/docs/parakeet-transformers-js/src/models/nougat/image_processing_nougat.js +0 -5
  92. package/docs/parakeet-transformers-js/src/models/owlv2/image_processing_owlv2.js +0 -5
  93. package/docs/parakeet-transformers-js/src/models/owlvit/image_processing_owlvit.js +0 -12
  94. package/docs/parakeet-transformers-js/src/models/owlvit/processing_owlvit.js +0 -7
  95. package/docs/parakeet-transformers-js/src/models/paligemma/processing_paligemma.js +0 -83
  96. package/docs/parakeet-transformers-js/src/models/parakeet/feature_extraction_parakeet.js +0 -3
  97. package/docs/parakeet-transformers-js/src/models/parakeet/modeling_parakeet.js +0 -3
  98. package/docs/parakeet-transformers-js/src/models/parakeet/processing_parakeet.js +0 -3
  99. package/docs/parakeet-transformers-js/src/models/parakeet/tokenization_parakeet.js +0 -3
  100. package/docs/parakeet-transformers-js/src/models/phi3_v/image_processing_phi3_v.js +0 -163
  101. package/docs/parakeet-transformers-js/src/models/phi3_v/processing_phi3_v.js +0 -53
  102. package/docs/parakeet-transformers-js/src/models/processors.js +0 -22
  103. package/docs/parakeet-transformers-js/src/models/pvt/image_processing_pvt.js +0 -5
  104. package/docs/parakeet-transformers-js/src/models/pyannote/feature_extraction_pyannote.js +0 -85
  105. package/docs/parakeet-transformers-js/src/models/pyannote/processing_pyannote.js +0 -24
  106. package/docs/parakeet-transformers-js/src/models/qwen2_vl/image_processing_qwen2_vl.js +0 -52
  107. package/docs/parakeet-transformers-js/src/models/qwen2_vl/processing_qwen2_vl.js +0 -53
  108. package/docs/parakeet-transformers-js/src/models/rt_detr/image_processing_rt_detr.js +0 -12
  109. package/docs/parakeet-transformers-js/src/models/sam/image_processing_sam.js +0 -242
  110. package/docs/parakeet-transformers-js/src/models/sam/processing_sam.js +0 -20
  111. package/docs/parakeet-transformers-js/src/models/sapiens/image_processing_sapiens.js +0 -13
  112. package/docs/parakeet-transformers-js/src/models/seamless_m4t/feature_extraction_seamless_m4t.js +0 -175
  113. package/docs/parakeet-transformers-js/src/models/segformer/image_processing_segformer.js +0 -13
  114. package/docs/parakeet-transformers-js/src/models/siglip/image_processing_siglip.js +0 -5
  115. package/docs/parakeet-transformers-js/src/models/smolvlm/image_processing_smolvlm.js +0 -2
  116. package/docs/parakeet-transformers-js/src/models/smolvlm/processing_smolvlm.js +0 -2
  117. package/docs/parakeet-transformers-js/src/models/snac/feature_extraction_snac.js +0 -3
  118. package/docs/parakeet-transformers-js/src/models/speecht5/feature_extraction_speecht5.js +0 -4
  119. package/docs/parakeet-transformers-js/src/models/speecht5/processing_speecht5.js +0 -17
  120. package/docs/parakeet-transformers-js/src/models/swin2sr/image_processing_swin2sr.js +0 -24
  121. package/docs/parakeet-transformers-js/src/models/ultravox/processing_ultravox.js +0 -54
  122. package/docs/parakeet-transformers-js/src/models/vit/image_processing_vit.js +0 -7
  123. package/docs/parakeet-transformers-js/src/models/vitmatte/image_processing_vitmatte.js +0 -50
  124. package/docs/parakeet-transformers-js/src/models/vitpose/image_processing_vitpose.js +0 -89
  125. package/docs/parakeet-transformers-js/src/models/wav2vec2/feature_extraction_wav2vec2.js +0 -44
  126. package/docs/parakeet-transformers-js/src/models/wav2vec2/processing_wav2vec2.js +0 -17
  127. package/docs/parakeet-transformers-js/src/models/wav2vec2_with_lm/processing_wav2vec2_with_lm.js +0 -17
  128. package/docs/parakeet-transformers-js/src/models/wespeaker/feature_extraction_wespeaker.js +0 -95
  129. package/docs/parakeet-transformers-js/src/models/whisper/common_whisper.js +0 -157
  130. package/docs/parakeet-transformers-js/src/models/whisper/feature_extraction_whisper.js +0 -92
  131. package/docs/parakeet-transformers-js/src/models/whisper/generation_whisper.js +0 -89
  132. package/docs/parakeet-transformers-js/src/models/whisper/processing_whisper.js +0 -21
  133. package/docs/parakeet-transformers-js/src/models/yolos/image_processing_yolos.js +0 -12
  134. package/docs/parakeet-transformers-js/src/models.js +0 -8644
  135. package/docs/parakeet-transformers-js/src/ops/registry.js +0 -133
  136. package/docs/parakeet-transformers-js/src/ort_env.js +0 -8
  137. package/docs/parakeet-transformers-js/src/parakeet.js +0 -792
  138. package/docs/parakeet-transformers-js/src/pipelines.js +0 -3540
  139. package/docs/parakeet-transformers-js/src/processors.js +0 -16
  140. package/docs/parakeet-transformers-js/src/tokenizers.js +0 -4432
  141. package/docs/parakeet-transformers-js/src/transformers.js +0 -50
  142. package/docs/parakeet-transformers-js/src/utils/audio.js +0 -893
  143. package/docs/parakeet-transformers-js/src/utils/constants.js +0 -9
  144. package/docs/parakeet-transformers-js/src/utils/core.js +0 -259
  145. package/docs/parakeet-transformers-js/src/utils/data-structures.js +0 -574
  146. package/docs/parakeet-transformers-js/src/utils/devices.js +0 -22
  147. package/docs/parakeet-transformers-js/src/utils/dtypes.js +0 -63
  148. package/docs/parakeet-transformers-js/src/utils/generic.js +0 -35
  149. package/docs/parakeet-transformers-js/src/utils/hub.js +0 -780
  150. package/docs/parakeet-transformers-js/src/utils/image.js +0 -834
  151. package/docs/parakeet-transformers-js/src/utils/maths.js +0 -1061
  152. package/docs/parakeet-transformers-js/src/utils/tensor.js +0 -1539
  153. package/docs/parakeet-transformers-js/src/utils/video.js +0 -128
  154. package/docs/parakeet-transformers-js/test/decoder.test.js +0 -114
  155. package/docs/parakeet-transformers-js/test/encoder.test.js +0 -108
  156. package/docs/parakeet-transformers-js/test/preprocessor.test.js +0 -85
  157. package/docs/parakeet-transformers-js/test/tokenizer.test.js +0 -24
  158. package/docs/parakeet-transformers-js/test/transcribe.js +0 -89
  159. package/docs/parakeet-transformers-js/tsconfig.json +0 -21
  160. package/docs/parakeet-transformers-js/webpack.config.js +0 -223
@@ -1,821 +0,0 @@
1
- {
2
- "audio_length": 176000,
3
- "sample_rate": 16000,
4
- "model_config": {
5
- "vocab_size": 1025,
6
- "blank_idx": 1024,
7
- "max_tokens_per_step": 10,
8
- "subsampling_factor": 8,
9
- "pred_layers": 2,
10
- "pred_hidden": 640
11
- },
12
- "steps": [
13
- {
14
- "step": 0,
15
- "t": 0,
16
- "emitted_tokens": 0,
17
- "probs_shape": [
18
- 1025
19
- ],
20
- "token": 1024,
21
- "token_text": "<blk>",
22
- "is_blank": true,
23
- "step_prediction": 1,
24
- "top_5_tokens": [
25
- {
26
- "token": 1024,
27
- "text": "<blk>",
28
- "prob": 43.882271
29
- },
30
- {
31
- "token": 34,
32
- "text": "▁I",
33
- "prob": 36.354946
34
- },
35
- {
36
- "token": 248,
37
- "text": "▁We",
38
- "prob": 35.99194
39
- },
40
- {
41
- "token": 139,
42
- "text": "▁So",
43
- "prob": 35.869053
44
- },
45
- {
46
- "token": 108,
47
- "text": "▁W",
48
- "prob": 35.202621
49
- }
50
- ],
51
- "action": "blank",
52
- "advance": 1,
53
- "advance_reason": "duration_step"
54
- },
55
- {
56
- "step": 1,
57
- "t": 1,
58
- "emitted_tokens": 0,
59
- "probs_shape": [
60
- 1025
61
- ],
62
- "token": 1024,
63
- "token_text": "<blk>",
64
- "is_blank": true,
65
- "step_prediction": 1,
66
- "top_5_tokens": [
67
- {
68
- "token": 1024,
69
- "text": "<blk>",
70
- "prob": 30.915867
71
- },
72
- {
73
- "token": 31,
74
- "text": "▁and",
75
- "prob": 25.953573
76
- },
77
- {
78
- "token": 123,
79
- "text": "▁And",
80
- "prob": 25.671907
81
- },
82
- {
83
- "token": 34,
84
- "text": "▁I",
85
- "prob": 25.004002
86
- },
87
- {
88
- "token": 20,
89
- "text": "▁to",
90
- "prob": 24.753914
91
- }
92
- ],
93
- "action": "blank",
94
- "advance": 1,
95
- "advance_reason": "duration_step"
96
- },
97
- {
98
- "step": 2,
99
- "t": 2,
100
- "emitted_tokens": 0,
101
- "probs_shape": [
102
- 1025
103
- ],
104
- "token": 1024,
105
- "token_text": "<blk>",
106
- "is_blank": true,
107
- "step_prediction": 1,
108
- "top_5_tokens": [
109
- {
110
- "token": 1024,
111
- "text": "<blk>",
112
- "prob": 40.861206
113
- },
114
- {
115
- "token": 5,
116
- "text": "▁the",
117
- "prob": 35.668388
118
- },
119
- {
120
- "token": 3,
121
- "text": "▁a",
122
- "prob": 35.413628
123
- },
124
- {
125
- "token": 20,
126
- "text": "▁to",
127
- "prob": 35.111492
128
- },
129
- {
130
- "token": 51,
131
- "text": "▁it",
132
- "prob": 34.783482
133
- }
134
- ],
135
- "action": "blank",
136
- "advance": 1,
137
- "advance_reason": "duration_step"
138
- },
139
- {
140
- "step": 3,
141
- "t": 3,
142
- "emitted_tokens": 0,
143
- "probs_shape": [
144
- 1025
145
- ],
146
- "token": 1024,
147
- "token_text": "<blk>",
148
- "is_blank": true,
149
- "step_prediction": 1,
150
- "top_5_tokens": [
151
- {
152
- "token": 1024,
153
- "text": "<blk>",
154
- "prob": 49.709625
155
- },
156
- {
157
- "token": 3,
158
- "text": "▁a",
159
- "prob": 44.681561
160
- },
161
- {
162
- "token": 20,
163
- "text": "▁to",
164
- "prob": 44.337646
165
- },
166
- {
167
- "token": 84,
168
- "text": "▁go",
169
- "prob": 44.20607
170
- },
171
- {
172
- "token": 51,
173
- "text": "▁it",
174
- "prob": 44.175995
175
- }
176
- ],
177
- "action": "blank",
178
- "advance": 1,
179
- "advance_reason": "duration_step"
180
- },
181
- {
182
- "step": 4,
183
- "t": 4,
184
- "emitted_tokens": 0,
185
- "probs_shape": [
186
- 1025
187
- ],
188
- "token": 1024,
189
- "token_text": "<blk>",
190
- "is_blank": true,
191
- "step_prediction": 1,
192
- "top_5_tokens": [
193
- {
194
- "token": 1024,
195
- "text": "<blk>",
196
- "prob": 58.481571
197
- },
198
- {
199
- "token": 34,
200
- "text": "▁I",
201
- "prob": 56.156162
202
- },
203
- {
204
- "token": 157,
205
- "text": "▁H",
206
- "prob": 55.002003
207
- },
208
- {
209
- "token": 66,
210
- "text": "▁A",
211
- "prob": 54.671837
212
- },
213
- {
214
- "token": 121,
215
- "text": "▁B",
216
- "prob": 54.349628
217
- }
218
- ],
219
- "action": "blank",
220
- "advance": 1,
221
- "advance_reason": "duration_step"
222
- },
223
- {
224
- "step": 5,
225
- "t": 5,
226
- "emitted_tokens": 0,
227
- "probs_shape": [
228
- 1025
229
- ],
230
- "token": 1024,
231
- "token_text": "<blk>",
232
- "is_blank": true,
233
- "step_prediction": 1,
234
- "top_5_tokens": [
235
- {
236
- "token": 1024,
237
- "text": "<blk>",
238
- "prob": 73.044655
239
- },
240
- {
241
- "token": 34,
242
- "text": "▁I",
243
- "prob": 69.69529
244
- },
245
- {
246
- "token": 157,
247
- "text": "▁H",
248
- "prob": 67.772011
249
- },
250
- {
251
- "token": 5,
252
- "text": "▁the",
253
- "prob": 66.878075
254
- },
255
- {
256
- "token": 3,
257
- "text": "▁a",
258
- "prob": 66.411865
259
- }
260
- ],
261
- "action": "blank",
262
- "advance": 1,
263
- "advance_reason": "duration_step"
264
- },
265
- {
266
- "step": 6,
267
- "t": 6,
268
- "emitted_tokens": 0,
269
- "probs_shape": [
270
- 1025
271
- ],
272
- "token": 1024,
273
- "token_text": "<blk>",
274
- "is_blank": true,
275
- "step_prediction": 1,
276
- "top_5_tokens": [
277
- {
278
- "token": 1024,
279
- "text": "<blk>",
280
- "prob": 82.074425
281
- },
282
- {
283
- "token": 121,
284
- "text": "▁B",
285
- "prob": 75.904678
286
- },
287
- {
288
- "token": 157,
289
- "text": "▁H",
290
- "prob": 75.194649
291
- },
292
- {
293
- "token": 260,
294
- "text": "▁E",
295
- "prob": 75.060715
296
- },
297
- {
298
- "token": 34,
299
- "text": "▁I",
300
- "prob": 74.639343
301
- }
302
- ],
303
- "action": "blank",
304
- "advance": 1,
305
- "advance_reason": "duration_step"
306
- },
307
- {
308
- "step": 7,
309
- "t": 7,
310
- "emitted_tokens": 0,
311
- "probs_shape": [
312
- 1025
313
- ],
314
- "token": 1024,
315
- "token_text": "<blk>",
316
- "is_blank": true,
317
- "step_prediction": 1,
318
- "top_5_tokens": [
319
- {
320
- "token": 1024,
321
- "text": "<blk>",
322
- "prob": 88.262825
323
- },
324
- {
325
- "token": 212,
326
- "text": "▁N",
327
- "prob": 81.152191
328
- },
329
- {
330
- "token": 164,
331
- "text": "▁M",
332
- "prob": 80.99865
333
- },
334
- {
335
- "token": 34,
336
- "text": "▁I",
337
- "prob": 80.974327
338
- },
339
- {
340
- "token": 121,
341
- "text": "▁B",
342
- "prob": 80.753807
343
- }
344
- ],
345
- "action": "blank",
346
- "advance": 1,
347
- "advance_reason": "duration_step"
348
- },
349
- {
350
- "step": 8,
351
- "t": 8,
352
- "emitted_tokens": 0,
353
- "probs_shape": [
354
- 1025
355
- ],
356
- "token": 1024,
357
- "token_text": "<blk>",
358
- "is_blank": true,
359
- "step_prediction": 1,
360
- "top_5_tokens": [
361
- {
362
- "token": 1024,
363
- "text": "<blk>",
364
- "prob": 91.791618
365
- },
366
- {
367
- "token": 34,
368
- "text": "▁I",
369
- "prob": 86.509773
370
- },
371
- {
372
- "token": 121,
373
- "text": "▁B",
374
- "prob": 85.500786
375
- },
376
- {
377
- "token": 157,
378
- "text": "▁H",
379
- "prob": 84.580933
380
- },
381
- {
382
- "token": 164,
383
- "text": "▁M",
384
- "prob": 83.718918
385
- }
386
- ],
387
- "action": "blank",
388
- "advance": 1,
389
- "advance_reason": "duration_step"
390
- },
391
- {
392
- "step": 9,
393
- "t": 9,
394
- "emitted_tokens": 0,
395
- "probs_shape": [
396
- 1025
397
- ],
398
- "token": 1024,
399
- "token_text": "<blk>",
400
- "is_blank": true,
401
- "step_prediction": 1,
402
- "top_5_tokens": [
403
- {
404
- "token": 1024,
405
- "text": "<blk>",
406
- "prob": 95.938683
407
- },
408
- {
409
- "token": 34,
410
- "text": "▁I",
411
- "prob": 90.072189
412
- },
413
- {
414
- "token": 157,
415
- "text": "▁H",
416
- "prob": 89.927513
417
- },
418
- {
419
- "token": 121,
420
- "text": "▁B",
421
- "prob": 87.387215
422
- },
423
- {
424
- "token": 212,
425
- "text": "▁N",
426
- "prob": 86.561859
427
- }
428
- ],
429
- "action": "blank",
430
- "advance": 1,
431
- "advance_reason": "duration_step"
432
- },
433
- {
434
- "step": 10,
435
- "t": 10,
436
- "emitted_tokens": 0,
437
- "probs_shape": [
438
- 1025
439
- ],
440
- "token": 1024,
441
- "token_text": "<blk>",
442
- "is_blank": true,
443
- "step_prediction": 1,
444
- "top_5_tokens": [
445
- {
446
- "token": 1024,
447
- "text": "<blk>",
448
- "prob": 101.980629
449
- },
450
- {
451
- "token": 121,
452
- "text": "▁B",
453
- "prob": 97.974594
454
- },
455
- {
456
- "token": 157,
457
- "text": "▁H",
458
- "prob": 96.391281
459
- },
460
- {
461
- "token": 34,
462
- "text": "▁I",
463
- "prob": 96.158134
464
- },
465
- {
466
- "token": 237,
467
- "text": "▁F",
468
- "prob": 94.231049
469
- }
470
- ],
471
- "action": "blank",
472
- "advance": 1,
473
- "advance_reason": "duration_step"
474
- },
475
- {
476
- "step": 11,
477
- "t": 11,
478
- "emitted_tokens": 0,
479
- "probs_shape": [
480
- 1025
481
- ],
482
- "token": 1024,
483
- "token_text": "<blk>",
484
- "is_blank": true,
485
- "step_prediction": 1,
486
- "top_5_tokens": [
487
- {
488
- "token": 1024,
489
- "text": "<blk>",
490
- "prob": 114.559204
491
- },
492
- {
493
- "token": 121,
494
- "text": "▁B",
495
- "prob": 103.858131
496
- },
497
- {
498
- "token": 34,
499
- "text": "▁I",
500
- "prob": 103.499763
501
- },
502
- {
503
- "token": 157,
504
- "text": "▁H",
505
- "prob": 103.040092
506
- },
507
- {
508
- "token": 209,
509
- "text": "▁L",
510
- "prob": 102.949928
511
- }
512
- ],
513
- "action": "blank",
514
- "advance": 1,
515
- "advance_reason": "duration_step"
516
- },
517
- {
518
- "step": 12,
519
- "t": 12,
520
- "emitted_tokens": 0,
521
- "probs_shape": [
522
- 1025
523
- ],
524
- "token": 1024,
525
- "token_text": "<blk>",
526
- "is_blank": true,
527
- "step_prediction": 1,
528
- "top_5_tokens": [
529
- {
530
- "token": 1024,
531
- "text": "<blk>",
532
- "prob": 121.496132
533
- },
534
- {
535
- "token": 121,
536
- "text": "▁B",
537
- "prob": 114.315231
538
- },
539
- {
540
- "token": 34,
541
- "text": "▁I",
542
- "prob": 114.09465
543
- },
544
- {
545
- "token": 157,
546
- "text": "▁H",
547
- "prob": 113.557442
548
- },
549
- {
550
- "token": 209,
551
- "text": "▁L",
552
- "prob": 113.035225
553
- }
554
- ],
555
- "action": "blank",
556
- "advance": 1,
557
- "advance_reason": "duration_step"
558
- },
559
- {
560
- "step": 13,
561
- "t": 13,
562
- "emitted_tokens": 0,
563
- "probs_shape": [
564
- 1025
565
- ],
566
- "token": 1024,
567
- "token_text": "<blk>",
568
- "is_blank": true,
569
- "step_prediction": 1,
570
- "top_5_tokens": [
571
- {
572
- "token": 1024,
573
- "text": "<blk>",
574
- "prob": 130.765839
575
- },
576
- {
577
- "token": 209,
578
- "text": "▁L",
579
- "prob": 120.391922
580
- },
581
- {
582
- "token": 34,
583
- "text": "▁I",
584
- "prob": 119.965118
585
- },
586
- {
587
- "token": 121,
588
- "text": "▁B",
589
- "prob": 119.680946
590
- },
591
- {
592
- "token": 164,
593
- "text": "▁M",
594
- "prob": 118.794739
595
- }
596
- ],
597
- "action": "blank",
598
- "advance": 1,
599
- "advance_reason": "duration_step"
600
- },
601
- {
602
- "step": 14,
603
- "t": 14,
604
- "emitted_tokens": 0,
605
- "probs_shape": [
606
- 1025
607
- ],
608
- "token": 1024,
609
- "token_text": "<blk>",
610
- "is_blank": true,
611
- "step_prediction": 1,
612
- "top_5_tokens": [
613
- {
614
- "token": 1024,
615
- "text": "<blk>",
616
- "prob": 134.31105
617
- },
618
- {
619
- "token": 209,
620
- "text": "▁L",
621
- "prob": 122.642548
622
- },
623
- {
624
- "token": 121,
625
- "text": "▁B",
626
- "prob": 122.414894
627
- },
628
- {
629
- "token": 34,
630
- "text": "▁I",
631
- "prob": 121.111061
632
- },
633
- {
634
- "token": 157,
635
- "text": "▁H",
636
- "prob": 121.026833
637
- }
638
- ],
639
- "action": "blank",
640
- "advance": 1,
641
- "advance_reason": "duration_step"
642
- },
643
- {
644
- "step": 15,
645
- "t": 15,
646
- "emitted_tokens": 0,
647
- "probs_shape": [
648
- 1025
649
- ],
650
- "token": 1024,
651
- "token_text": "<blk>",
652
- "is_blank": true,
653
- "step_prediction": 1,
654
- "top_5_tokens": [
655
- {
656
- "token": 1024,
657
- "text": "<blk>",
658
- "prob": 149.716187
659
- },
660
- {
661
- "token": 34,
662
- "text": "▁I",
663
- "prob": 140.060654
664
- },
665
- {
666
- "token": 209,
667
- "text": "▁L",
668
- "prob": 139.599869
669
- },
670
- {
671
- "token": 121,
672
- "text": "▁B",
673
- "prob": 139.148178
674
- },
675
- {
676
- "token": 157,
677
- "text": "▁H",
678
- "prob": 136.720764
679
- }
680
- ],
681
- "action": "blank",
682
- "advance": 1,
683
- "advance_reason": "duration_step"
684
- }
685
- ],
686
- "preprocessing": {
687
- "features_shape": [
688
- 1,
689
- 1101,
690
- 128
691
- ],
692
- "features_lens": [
693
- 1101
694
- ],
695
- "features_sample": [
696
- -2.090094,
697
- -2.090094,
698
- -2.090094,
699
- -2.090094,
700
- -2.090092,
701
- -2.089793,
702
- -0.958127,
703
- -0.772503,
704
- -1.40938,
705
- -1.363333,
706
- 0.001163,
707
- 0.518873,
708
- -0.04418,
709
- -1.181798,
710
- 0.639789,
711
- -0.904283,
712
- -1.034808,
713
- -0.534969,
714
- -1.772774,
715
- -0.669429,
716
- 0.650727,
717
- 0.169987,
718
- -1.771345,
719
- -0.385424,
720
- 0.238204,
721
- 0.593137,
722
- 0.008182,
723
- -1.227177,
724
- -2.077962,
725
- -1.190059,
726
- -0.708222,
727
- -0.845768,
728
- -1.260647,
729
- 0.186319,
730
- 0.620574,
731
- -0.145378,
732
- 0.113896,
733
- -1.859648,
734
- -1.930027,
735
- -1.231057,
736
- -2.012543,
737
- -1.819176,
738
- -1.905104,
739
- -1.074125,
740
- -1.571918,
741
- -0.086888,
742
- 0.23079,
743
- 0.135472,
744
- -0.544634,
745
- -2.049327
746
- ],
747
- "features_dtype": "float32"
748
- },
749
- "encoding": {
750
- "encoder_out_shape": [
751
- 1,
752
- 1024,
753
- 16
754
- ],
755
- "encoder_out_lens": [
756
- 16
757
- ],
758
- "encoder_out_sample": [
759
- -0.00962,
760
- -0.017541,
761
- -0.063593,
762
- -0.051033,
763
- -0.037465,
764
- -0.021321,
765
- -0.022699,
766
- -0.017574,
767
- -0.023857,
768
- -0.019216,
769
- -0.016927,
770
- -0.021104,
771
- -0.07553,
772
- -0.101816,
773
- -0.061699,
774
- -0.189528,
775
- -0.038174,
776
- -0.060278,
777
- -0.070712,
778
- -0.075584,
779
- -0.047741,
780
- -0.028036,
781
- -0.033324,
782
- -0.038365,
783
- -0.033162,
784
- -0.045478,
785
- -0.057772,
786
- -0.07853,
787
- -0.056808,
788
- -0.091472,
789
- -0.106492,
790
- -0.044249,
791
- 0.072112,
792
- 0.027999,
793
- 0.020361,
794
- -0.00243,
795
- -0.01057,
796
- -0.005678,
797
- 0.007209,
798
- 0.012739,
799
- 0.002819,
800
- 0.018608,
801
- 0.025901,
802
- 0.040724,
803
- 0.037501,
804
- 0.037166,
805
- 0.044925,
806
- 0.094555,
807
- 0.003903,
808
- 0.00944
809
- ],
810
- "encoder_out_dtype": "float32"
811
- },
812
- "final_results": {
813
- "tokens": [],
814
- "timestamps": [],
815
- "decoded_text": "",
816
- "total_steps": 16,
817
- "final_t": 16,
818
- "encodings_len": 16
819
- },
820
- "full_transcription": "I I I I I I I I I"
821
- }