parakeet.js 0.0.2 → 0.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.gitmodules +3 -0
- package/README.md +240 -239
- package/examples/hf-spaces-demo/README.md +6 -9
- package/examples/hf-spaces-demo/package.json +1 -1
- package/examples/hf-spaces-demo/src/App.js +307 -316
- package/examples/react-demo/package.json +19 -19
- package/examples/react-demo/src/App.jsx +324 -326
- package/examples/react-demo-dev/src/App.jsx +23 -24
- package/package.json +1 -1
- package/publish.ps1 +65 -0
- package/src/hub.js +235 -241
- package/src/parakeet.js +15 -8
- package/src/preprocessor.js +75 -68
- package/docs/parakeet-transformers-js/.gitattributes +0 -2
- package/docs/parakeet-transformers-js/.prettierignore +0 -8
- package/docs/parakeet-transformers-js/.prettierrc +0 -10
- package/docs/parakeet-transformers-js/.tmp_features.json +0 -1
- package/docs/parakeet-transformers-js/LICENSE +0 -202
- package/docs/parakeet-transformers-js/README.md +0 -448
- package/docs/parakeet-transformers-js/assets/nemo128.onnx +0 -0
- package/docs/parakeet-transformers-js/assets/nemo80.onnx +0 -0
- package/docs/parakeet-transformers-js/debug_test.js +0 -84
- package/docs/parakeet-transformers-js/dev/inspect_decoder.cjs +0 -9
- package/docs/parakeet-transformers-js/dev/inspect_joiner.cjs +0 -9
- package/docs/parakeet-transformers-js/dev/js_step_by_step.js +0 -249
- package/docs/parakeet-transformers-js/dev/parakeet_cli.js +0 -91
- package/docs/parakeet-transformers-js/jest.config.mjs +0 -194
- package/docs/parakeet-transformers-js/js_preprocessing.json +0 -225
- package/docs/parakeet-transformers-js/js_step_by_step.json +0 -837
- package/docs/parakeet-transformers-js/js_step_by_step_v2.json +0 -450
- package/docs/parakeet-transformers-js/js_step_by_step_v3.json +0 -450
- package/docs/parakeet-transformers-js/js_steps.json +0 -821
- package/docs/parakeet-transformers-js/package-lock.json +0 -12251
- package/docs/parakeet-transformers-js/package.json +0 -96
- package/docs/parakeet-transformers-js/src/audio_features.js +0 -178
- package/docs/parakeet-transformers-js/src/backends/onnx.js +0 -210
- package/docs/parakeet-transformers-js/src/base/feature_extraction_utils.js +0 -54
- package/docs/parakeet-transformers-js/src/base/image_processors_utils.js +0 -1105
- package/docs/parakeet-transformers-js/src/base/processing_utils.js +0 -173
- package/docs/parakeet-transformers-js/src/configs.js +0 -455
- package/docs/parakeet-transformers-js/src/env.js +0 -167
- package/docs/parakeet-transformers-js/src/generation/configuration_utils.js +0 -388
- package/docs/parakeet-transformers-js/src/generation/logits_process.js +0 -727
- package/docs/parakeet-transformers-js/src/generation/logits_sampler.js +0 -204
- package/docs/parakeet-transformers-js/src/generation/parameters.js +0 -35
- package/docs/parakeet-transformers-js/src/generation/stopping_criteria.js +0 -156
- package/docs/parakeet-transformers-js/src/generation/streamers.js +0 -225
- package/docs/parakeet-transformers-js/src/models/audio_spectrogram_transformer/feature_extraction_audio_spectrogram_transformer.js +0 -85
- package/docs/parakeet-transformers-js/src/models/auto/feature_extraction_auto.js +0 -25
- package/docs/parakeet-transformers-js/src/models/auto/image_processing_auto.js +0 -29
- package/docs/parakeet-transformers-js/src/models/auto/processing_auto.js +0 -85
- package/docs/parakeet-transformers-js/src/models/beit/image_processing_beit.js +0 -5
- package/docs/parakeet-transformers-js/src/models/bit/image_processing_bit.js +0 -5
- package/docs/parakeet-transformers-js/src/models/chinese_clip/image_processing_chinese_clip.js +0 -5
- package/docs/parakeet-transformers-js/src/models/clap/feature_extraction_clap.js +0 -159
- package/docs/parakeet-transformers-js/src/models/clip/image_processing_clip.js +0 -6
- package/docs/parakeet-transformers-js/src/models/convnext/image_processing_convnext.js +0 -46
- package/docs/parakeet-transformers-js/src/models/dac/feature_extraction_dac.js +0 -3
- package/docs/parakeet-transformers-js/src/models/deit/image_processing_deit.js +0 -6
- package/docs/parakeet-transformers-js/src/models/detr/image_processing_detr.js +0 -52
- package/docs/parakeet-transformers-js/src/models/donut/image_processing_donut.js +0 -31
- package/docs/parakeet-transformers-js/src/models/dpt/image_processing_dpt.js +0 -6
- package/docs/parakeet-transformers-js/src/models/efficientnet/image_processing_efficientnet.js +0 -14
- package/docs/parakeet-transformers-js/src/models/encodec/feature_extraction_encodec.js +0 -32
- package/docs/parakeet-transformers-js/src/models/feature_extractors.js +0 -17
- package/docs/parakeet-transformers-js/src/models/florence2/processing_florence2.js +0 -131
- package/docs/parakeet-transformers-js/src/models/gemma3n/feature_extraction_gemma3n.js +0 -97
- package/docs/parakeet-transformers-js/src/models/gemma3n/processing_gemma3n.js +0 -74
- package/docs/parakeet-transformers-js/src/models/glpn/image_processing_glpn.js +0 -5
- package/docs/parakeet-transformers-js/src/models/grounding_dino/image_processing_grounding_dino.js +0 -29
- package/docs/parakeet-transformers-js/src/models/grounding_dino/processing_grounding_dino.js +0 -101
- package/docs/parakeet-transformers-js/src/models/idefics3/image_processing_idefics3.js +0 -232
- package/docs/parakeet-transformers-js/src/models/idefics3/processing_idefics3.js +0 -136
- package/docs/parakeet-transformers-js/src/models/image_processors.js +0 -40
- package/docs/parakeet-transformers-js/src/models/janus/image_processing_janus.js +0 -27
- package/docs/parakeet-transformers-js/src/models/janus/processing_janus.js +0 -123
- package/docs/parakeet-transformers-js/src/models/jina_clip/image_processing_jina_clip.js +0 -26
- package/docs/parakeet-transformers-js/src/models/jina_clip/processing_jina_clip.js +0 -24
- package/docs/parakeet-transformers-js/src/models/llava/processing_llava.js +0 -44
- package/docs/parakeet-transformers-js/src/models/llava_onevision/image_processing_llava_onevision.js +0 -5
- package/docs/parakeet-transformers-js/src/models/mask2former/image_processing_mask2former.js +0 -5
- package/docs/parakeet-transformers-js/src/models/maskformer/image_processing_maskformer.js +0 -18
- package/docs/parakeet-transformers-js/src/models/mgp_str/processing_mgp_str.js +0 -172
- package/docs/parakeet-transformers-js/src/models/mobilenet_v1/image_processing_mobilenet_v1.js +0 -7
- package/docs/parakeet-transformers-js/src/models/mobilenet_v2/image_processing_mobilenet_v2.js +0 -7
- package/docs/parakeet-transformers-js/src/models/mobilenet_v3/image_processing_mobilenet_v3.js +0 -7
- package/docs/parakeet-transformers-js/src/models/mobilenet_v4/image_processing_mobilenet_v4.js +0 -7
- package/docs/parakeet-transformers-js/src/models/mobilevit/image_processing_mobilevit.js +0 -6
- package/docs/parakeet-transformers-js/src/models/moonshine/feature_extraction_moonshine.js +0 -26
- package/docs/parakeet-transformers-js/src/models/moonshine/processing_moonshine.js +0 -20
- package/docs/parakeet-transformers-js/src/models/nougat/image_processing_nougat.js +0 -5
- package/docs/parakeet-transformers-js/src/models/owlv2/image_processing_owlv2.js +0 -5
- package/docs/parakeet-transformers-js/src/models/owlvit/image_processing_owlvit.js +0 -12
- package/docs/parakeet-transformers-js/src/models/owlvit/processing_owlvit.js +0 -7
- package/docs/parakeet-transformers-js/src/models/paligemma/processing_paligemma.js +0 -83
- package/docs/parakeet-transformers-js/src/models/parakeet/feature_extraction_parakeet.js +0 -3
- package/docs/parakeet-transformers-js/src/models/parakeet/modeling_parakeet.js +0 -3
- package/docs/parakeet-transformers-js/src/models/parakeet/processing_parakeet.js +0 -3
- package/docs/parakeet-transformers-js/src/models/parakeet/tokenization_parakeet.js +0 -3
- package/docs/parakeet-transformers-js/src/models/phi3_v/image_processing_phi3_v.js +0 -163
- package/docs/parakeet-transformers-js/src/models/phi3_v/processing_phi3_v.js +0 -53
- package/docs/parakeet-transformers-js/src/models/processors.js +0 -22
- package/docs/parakeet-transformers-js/src/models/pvt/image_processing_pvt.js +0 -5
- package/docs/parakeet-transformers-js/src/models/pyannote/feature_extraction_pyannote.js +0 -85
- package/docs/parakeet-transformers-js/src/models/pyannote/processing_pyannote.js +0 -24
- package/docs/parakeet-transformers-js/src/models/qwen2_vl/image_processing_qwen2_vl.js +0 -52
- package/docs/parakeet-transformers-js/src/models/qwen2_vl/processing_qwen2_vl.js +0 -53
- package/docs/parakeet-transformers-js/src/models/rt_detr/image_processing_rt_detr.js +0 -12
- package/docs/parakeet-transformers-js/src/models/sam/image_processing_sam.js +0 -242
- package/docs/parakeet-transformers-js/src/models/sam/processing_sam.js +0 -20
- package/docs/parakeet-transformers-js/src/models/sapiens/image_processing_sapiens.js +0 -13
- package/docs/parakeet-transformers-js/src/models/seamless_m4t/feature_extraction_seamless_m4t.js +0 -175
- package/docs/parakeet-transformers-js/src/models/segformer/image_processing_segformer.js +0 -13
- package/docs/parakeet-transformers-js/src/models/siglip/image_processing_siglip.js +0 -5
- package/docs/parakeet-transformers-js/src/models/smolvlm/image_processing_smolvlm.js +0 -2
- package/docs/parakeet-transformers-js/src/models/smolvlm/processing_smolvlm.js +0 -2
- package/docs/parakeet-transformers-js/src/models/snac/feature_extraction_snac.js +0 -3
- package/docs/parakeet-transformers-js/src/models/speecht5/feature_extraction_speecht5.js +0 -4
- package/docs/parakeet-transformers-js/src/models/speecht5/processing_speecht5.js +0 -17
- package/docs/parakeet-transformers-js/src/models/swin2sr/image_processing_swin2sr.js +0 -24
- package/docs/parakeet-transformers-js/src/models/ultravox/processing_ultravox.js +0 -54
- package/docs/parakeet-transformers-js/src/models/vit/image_processing_vit.js +0 -7
- package/docs/parakeet-transformers-js/src/models/vitmatte/image_processing_vitmatte.js +0 -50
- package/docs/parakeet-transformers-js/src/models/vitpose/image_processing_vitpose.js +0 -89
- package/docs/parakeet-transformers-js/src/models/wav2vec2/feature_extraction_wav2vec2.js +0 -44
- package/docs/parakeet-transformers-js/src/models/wav2vec2/processing_wav2vec2.js +0 -17
- package/docs/parakeet-transformers-js/src/models/wav2vec2_with_lm/processing_wav2vec2_with_lm.js +0 -17
- package/docs/parakeet-transformers-js/src/models/wespeaker/feature_extraction_wespeaker.js +0 -95
- package/docs/parakeet-transformers-js/src/models/whisper/common_whisper.js +0 -157
- package/docs/parakeet-transformers-js/src/models/whisper/feature_extraction_whisper.js +0 -92
- package/docs/parakeet-transformers-js/src/models/whisper/generation_whisper.js +0 -89
- package/docs/parakeet-transformers-js/src/models/whisper/processing_whisper.js +0 -21
- package/docs/parakeet-transformers-js/src/models/yolos/image_processing_yolos.js +0 -12
- package/docs/parakeet-transformers-js/src/models.js +0 -8644
- package/docs/parakeet-transformers-js/src/ops/registry.js +0 -133
- package/docs/parakeet-transformers-js/src/ort_env.js +0 -8
- package/docs/parakeet-transformers-js/src/parakeet.js +0 -792
- package/docs/parakeet-transformers-js/src/pipelines.js +0 -3540
- package/docs/parakeet-transformers-js/src/processors.js +0 -16
- package/docs/parakeet-transformers-js/src/tokenizers.js +0 -4432
- package/docs/parakeet-transformers-js/src/transformers.js +0 -50
- package/docs/parakeet-transformers-js/src/utils/audio.js +0 -893
- package/docs/parakeet-transformers-js/src/utils/constants.js +0 -9
- package/docs/parakeet-transformers-js/src/utils/core.js +0 -259
- package/docs/parakeet-transformers-js/src/utils/data-structures.js +0 -574
- package/docs/parakeet-transformers-js/src/utils/devices.js +0 -22
- package/docs/parakeet-transformers-js/src/utils/dtypes.js +0 -63
- package/docs/parakeet-transformers-js/src/utils/generic.js +0 -35
- package/docs/parakeet-transformers-js/src/utils/hub.js +0 -780
- package/docs/parakeet-transformers-js/src/utils/image.js +0 -834
- package/docs/parakeet-transformers-js/src/utils/maths.js +0 -1061
- package/docs/parakeet-transformers-js/src/utils/tensor.js +0 -1539
- package/docs/parakeet-transformers-js/src/utils/video.js +0 -128
- package/docs/parakeet-transformers-js/test/decoder.test.js +0 -114
- package/docs/parakeet-transformers-js/test/encoder.test.js +0 -108
- package/docs/parakeet-transformers-js/test/preprocessor.test.js +0 -85
- package/docs/parakeet-transformers-js/test/tokenizer.test.js +0 -24
- package/docs/parakeet-transformers-js/test/transcribe.js +0 -89
- package/docs/parakeet-transformers-js/tsconfig.json +0 -21
- package/docs/parakeet-transformers-js/webpack.config.js +0 -223
|
@@ -1,450 +0,0 @@
|
|
|
1
|
-
{
|
|
2
|
-
"audio_length": 176017,
|
|
3
|
-
"sample_rate": 16000,
|
|
4
|
-
"model_config": {
|
|
5
|
-
"vocab_size": 1025,
|
|
6
|
-
"blank_idx": 1024,
|
|
7
|
-
"max_tokens_per_step": 10,
|
|
8
|
-
"subsampling_factor": 8,
|
|
9
|
-
"pred_layers": 2,
|
|
10
|
-
"pred_hidden": 640
|
|
11
|
-
},
|
|
12
|
-
"steps": [
|
|
13
|
-
{
|
|
14
|
-
"step": 0,
|
|
15
|
-
"t": 0,
|
|
16
|
-
"emitted_tokens": 0,
|
|
17
|
-
"probs_shape": [
|
|
18
|
-
1025
|
|
19
|
-
],
|
|
20
|
-
"token": 1024,
|
|
21
|
-
"token_text": "<blk>",
|
|
22
|
-
"is_blank": true,
|
|
23
|
-
"step_prediction": 1,
|
|
24
|
-
"top_5_tokens": [
|
|
25
|
-
{
|
|
26
|
-
"token": 1024,
|
|
27
|
-
"text": "<blk>",
|
|
28
|
-
"prob": 41.226837
|
|
29
|
-
},
|
|
30
|
-
{
|
|
31
|
-
"token": 34,
|
|
32
|
-
"text": "▁I",
|
|
33
|
-
"prob": 33.77882
|
|
34
|
-
},
|
|
35
|
-
{
|
|
36
|
-
"token": 248,
|
|
37
|
-
"text": "▁We",
|
|
38
|
-
"prob": 33.430401
|
|
39
|
-
},
|
|
40
|
-
{
|
|
41
|
-
"token": 139,
|
|
42
|
-
"text": "▁So",
|
|
43
|
-
"prob": 33.349823
|
|
44
|
-
},
|
|
45
|
-
{
|
|
46
|
-
"token": 108,
|
|
47
|
-
"text": "▁W",
|
|
48
|
-
"prob": 32.639244
|
|
49
|
-
}
|
|
50
|
-
],
|
|
51
|
-
"top_5_probs": [],
|
|
52
|
-
"action": "blank",
|
|
53
|
-
"advance": 1,
|
|
54
|
-
"advance_reason": "duration_step"
|
|
55
|
-
},
|
|
56
|
-
{
|
|
57
|
-
"step": 1,
|
|
58
|
-
"t": 1,
|
|
59
|
-
"emitted_tokens": 0,
|
|
60
|
-
"probs_shape": [
|
|
61
|
-
1025
|
|
62
|
-
],
|
|
63
|
-
"token": 1024,
|
|
64
|
-
"token_text": "<blk>",
|
|
65
|
-
"is_blank": true,
|
|
66
|
-
"step_prediction": 4,
|
|
67
|
-
"top_5_tokens": [
|
|
68
|
-
{
|
|
69
|
-
"token": 1024,
|
|
70
|
-
"text": "<blk>",
|
|
71
|
-
"prob": 42.59375
|
|
72
|
-
},
|
|
73
|
-
{
|
|
74
|
-
"token": 123,
|
|
75
|
-
"text": "▁And",
|
|
76
|
-
"prob": 35.228672
|
|
77
|
-
},
|
|
78
|
-
{
|
|
79
|
-
"token": 34,
|
|
80
|
-
"text": "▁I",
|
|
81
|
-
"prob": 34.526535
|
|
82
|
-
},
|
|
83
|
-
{
|
|
84
|
-
"token": 155,
|
|
85
|
-
"text": "▁The",
|
|
86
|
-
"prob": 33.929031
|
|
87
|
-
},
|
|
88
|
-
{
|
|
89
|
-
"token": 248,
|
|
90
|
-
"text": "▁We",
|
|
91
|
-
"prob": 33.60059
|
|
92
|
-
}
|
|
93
|
-
],
|
|
94
|
-
"top_5_probs": [],
|
|
95
|
-
"action": "blank",
|
|
96
|
-
"advance": 4,
|
|
97
|
-
"advance_reason": "duration_step"
|
|
98
|
-
},
|
|
99
|
-
{
|
|
100
|
-
"step": 2,
|
|
101
|
-
"t": 5,
|
|
102
|
-
"emitted_tokens": 0,
|
|
103
|
-
"probs_shape": [
|
|
104
|
-
1025
|
|
105
|
-
],
|
|
106
|
-
"token": 1024,
|
|
107
|
-
"token_text": "<blk>",
|
|
108
|
-
"is_blank": true,
|
|
109
|
-
"step_prediction": 4,
|
|
110
|
-
"top_5_tokens": [
|
|
111
|
-
{
|
|
112
|
-
"token": 1024,
|
|
113
|
-
"text": "<blk>",
|
|
114
|
-
"prob": 41.437325
|
|
115
|
-
},
|
|
116
|
-
{
|
|
117
|
-
"token": 34,
|
|
118
|
-
"text": "▁I",
|
|
119
|
-
"prob": 35.343742
|
|
120
|
-
},
|
|
121
|
-
{
|
|
122
|
-
"token": 123,
|
|
123
|
-
"text": "▁And",
|
|
124
|
-
"prob": 34.755867
|
|
125
|
-
},
|
|
126
|
-
{
|
|
127
|
-
"token": 155,
|
|
128
|
-
"text": "▁The",
|
|
129
|
-
"prob": 33.781651
|
|
130
|
-
},
|
|
131
|
-
{
|
|
132
|
-
"token": 157,
|
|
133
|
-
"text": "▁H",
|
|
134
|
-
"prob": 33.757645
|
|
135
|
-
}
|
|
136
|
-
],
|
|
137
|
-
"top_5_probs": [],
|
|
138
|
-
"action": "blank",
|
|
139
|
-
"advance": 4,
|
|
140
|
-
"advance_reason": "duration_step"
|
|
141
|
-
},
|
|
142
|
-
{
|
|
143
|
-
"step": 3,
|
|
144
|
-
"t": 9,
|
|
145
|
-
"emitted_tokens": 0,
|
|
146
|
-
"probs_shape": [
|
|
147
|
-
1025
|
|
148
|
-
],
|
|
149
|
-
"token": 1024,
|
|
150
|
-
"token_text": "<blk>",
|
|
151
|
-
"is_blank": true,
|
|
152
|
-
"step_prediction": 1,
|
|
153
|
-
"top_5_tokens": [
|
|
154
|
-
{
|
|
155
|
-
"token": 1024,
|
|
156
|
-
"text": "<blk>",
|
|
157
|
-
"prob": 40.797409
|
|
158
|
-
},
|
|
159
|
-
{
|
|
160
|
-
"token": 34,
|
|
161
|
-
"text": "▁I",
|
|
162
|
-
"prob": 35.102684
|
|
163
|
-
},
|
|
164
|
-
{
|
|
165
|
-
"token": 123,
|
|
166
|
-
"text": "▁And",
|
|
167
|
-
"prob": 34.858852
|
|
168
|
-
},
|
|
169
|
-
{
|
|
170
|
-
"token": 574,
|
|
171
|
-
"text": "▁Oh",
|
|
172
|
-
"prob": 34.581413
|
|
173
|
-
},
|
|
174
|
-
{
|
|
175
|
-
"token": 261,
|
|
176
|
-
"text": "▁You",
|
|
177
|
-
"prob": 34.249733
|
|
178
|
-
}
|
|
179
|
-
],
|
|
180
|
-
"top_5_probs": [],
|
|
181
|
-
"action": "blank",
|
|
182
|
-
"advance": 1,
|
|
183
|
-
"advance_reason": "duration_step"
|
|
184
|
-
},
|
|
185
|
-
{
|
|
186
|
-
"step": 4,
|
|
187
|
-
"t": 10,
|
|
188
|
-
"emitted_tokens": 0,
|
|
189
|
-
"probs_shape": [
|
|
190
|
-
1025
|
|
191
|
-
],
|
|
192
|
-
"token": 1024,
|
|
193
|
-
"token_text": "<blk>",
|
|
194
|
-
"is_blank": true,
|
|
195
|
-
"step_prediction": 1,
|
|
196
|
-
"top_5_tokens": [
|
|
197
|
-
{
|
|
198
|
-
"token": 1024,
|
|
199
|
-
"text": "<blk>",
|
|
200
|
-
"prob": 39.746513
|
|
201
|
-
},
|
|
202
|
-
{
|
|
203
|
-
"token": 34,
|
|
204
|
-
"text": "▁I",
|
|
205
|
-
"prob": 35.508808
|
|
206
|
-
},
|
|
207
|
-
{
|
|
208
|
-
"token": 155,
|
|
209
|
-
"text": "▁The",
|
|
210
|
-
"prob": 34.634434
|
|
211
|
-
},
|
|
212
|
-
{
|
|
213
|
-
"token": 163,
|
|
214
|
-
"text": "▁O",
|
|
215
|
-
"prob": 34.146984
|
|
216
|
-
},
|
|
217
|
-
{
|
|
218
|
-
"token": 819,
|
|
219
|
-
"text": "▁",
|
|
220
|
-
"prob": 34.010185
|
|
221
|
-
}
|
|
222
|
-
],
|
|
223
|
-
"top_5_probs": [],
|
|
224
|
-
"action": "blank",
|
|
225
|
-
"advance": 1,
|
|
226
|
-
"advance_reason": "duration_step"
|
|
227
|
-
},
|
|
228
|
-
{
|
|
229
|
-
"step": 5,
|
|
230
|
-
"t": 11,
|
|
231
|
-
"emitted_tokens": 0,
|
|
232
|
-
"probs_shape": [
|
|
233
|
-
1025
|
|
234
|
-
],
|
|
235
|
-
"token": 1024,
|
|
236
|
-
"token_text": "<blk>",
|
|
237
|
-
"is_blank": true,
|
|
238
|
-
"step_prediction": 4,
|
|
239
|
-
"top_5_tokens": [
|
|
240
|
-
{
|
|
241
|
-
"token": 1024,
|
|
242
|
-
"text": "<blk>",
|
|
243
|
-
"prob": 41.378902
|
|
244
|
-
},
|
|
245
|
-
{
|
|
246
|
-
"token": 34,
|
|
247
|
-
"text": "▁I",
|
|
248
|
-
"prob": 32.861183
|
|
249
|
-
},
|
|
250
|
-
{
|
|
251
|
-
"token": 123,
|
|
252
|
-
"text": "▁And",
|
|
253
|
-
"prob": 32.594582
|
|
254
|
-
},
|
|
255
|
-
{
|
|
256
|
-
"token": 155,
|
|
257
|
-
"text": "▁The",
|
|
258
|
-
"prob": 32.279743
|
|
259
|
-
},
|
|
260
|
-
{
|
|
261
|
-
"token": 819,
|
|
262
|
-
"text": "▁",
|
|
263
|
-
"prob": 32.114971
|
|
264
|
-
}
|
|
265
|
-
],
|
|
266
|
-
"top_5_probs": [],
|
|
267
|
-
"action": "blank",
|
|
268
|
-
"advance": 4,
|
|
269
|
-
"advance_reason": "duration_step"
|
|
270
|
-
},
|
|
271
|
-
{
|
|
272
|
-
"step": 6,
|
|
273
|
-
"t": 15,
|
|
274
|
-
"emitted_tokens": 0,
|
|
275
|
-
"probs_shape": [
|
|
276
|
-
1025
|
|
277
|
-
],
|
|
278
|
-
"token": 1024,
|
|
279
|
-
"token_text": "<blk>",
|
|
280
|
-
"is_blank": true,
|
|
281
|
-
"step_prediction": 1,
|
|
282
|
-
"top_5_tokens": [
|
|
283
|
-
{
|
|
284
|
-
"token": 1024,
|
|
285
|
-
"text": "<blk>",
|
|
286
|
-
"prob": 48.968941
|
|
287
|
-
},
|
|
288
|
-
{
|
|
289
|
-
"token": 34,
|
|
290
|
-
"text": "▁I",
|
|
291
|
-
"prob": 43.725601
|
|
292
|
-
},
|
|
293
|
-
{
|
|
294
|
-
"token": 74,
|
|
295
|
-
"text": "▁Th",
|
|
296
|
-
"prob": 41.064816
|
|
297
|
-
},
|
|
298
|
-
{
|
|
299
|
-
"token": 219,
|
|
300
|
-
"text": "▁G",
|
|
301
|
-
"prob": 41.000469
|
|
302
|
-
},
|
|
303
|
-
{
|
|
304
|
-
"token": 260,
|
|
305
|
-
"text": "▁E",
|
|
306
|
-
"prob": 40.559464
|
|
307
|
-
}
|
|
308
|
-
],
|
|
309
|
-
"top_5_probs": [],
|
|
310
|
-
"action": "blank",
|
|
311
|
-
"advance": 1,
|
|
312
|
-
"advance_reason": "duration_step"
|
|
313
|
-
}
|
|
314
|
-
],
|
|
315
|
-
"preprocessing": {
|
|
316
|
-
"features_shape": [
|
|
317
|
-
1,
|
|
318
|
-
1101,
|
|
319
|
-
128
|
|
320
|
-
],
|
|
321
|
-
"features_lens": [
|
|
322
|
-
1101
|
|
323
|
-
],
|
|
324
|
-
"features_sample": [
|
|
325
|
-
6.473908,
|
|
326
|
-
3.58362,
|
|
327
|
-
-2.028411,
|
|
328
|
-
-2.028411,
|
|
329
|
-
-2.028409,
|
|
330
|
-
-2.028214,
|
|
331
|
-
-1.430135,
|
|
332
|
-
-0.635826,
|
|
333
|
-
-1.805648,
|
|
334
|
-
-1.180914,
|
|
335
|
-
0.139405,
|
|
336
|
-
0.423509,
|
|
337
|
-
-0.254901,
|
|
338
|
-
-1.161156,
|
|
339
|
-
0.343323,
|
|
340
|
-
-0.877018,
|
|
341
|
-
-1.558379,
|
|
342
|
-
-0.60808,
|
|
343
|
-
-1.13381,
|
|
344
|
-
-1.102631,
|
|
345
|
-
0.779868,
|
|
346
|
-
0.108186,
|
|
347
|
-
-1.602304,
|
|
348
|
-
-0.413008,
|
|
349
|
-
0.014313,
|
|
350
|
-
0.368889,
|
|
351
|
-
-0.184027,
|
|
352
|
-
-1.061025,
|
|
353
|
-
-1.77546,
|
|
354
|
-
-0.943931,
|
|
355
|
-
-0.283598,
|
|
356
|
-
-1.202594,
|
|
357
|
-
-1.094526,
|
|
358
|
-
0.152471,
|
|
359
|
-
0.48408,
|
|
360
|
-
-0.438624,
|
|
361
|
-
-0.192583,
|
|
362
|
-
-1.456899,
|
|
363
|
-
-1.78464,
|
|
364
|
-
-1.120869,
|
|
365
|
-
-1.801295,
|
|
366
|
-
-1.852787,
|
|
367
|
-
-1.885587,
|
|
368
|
-
-1.48906,
|
|
369
|
-
-1.306157,
|
|
370
|
-
0.219165,
|
|
371
|
-
0.023559,
|
|
372
|
-
-0.32646,
|
|
373
|
-
-0.66996,
|
|
374
|
-
-1.473523
|
|
375
|
-
],
|
|
376
|
-
"features_dtype": "float32"
|
|
377
|
-
},
|
|
378
|
-
"encoding": {
|
|
379
|
-
"encoder_out_shape": [
|
|
380
|
-
1,
|
|
381
|
-
1024,
|
|
382
|
-
16
|
|
383
|
-
],
|
|
384
|
-
"encoder_out_lens": [
|
|
385
|
-
16
|
|
386
|
-
],
|
|
387
|
-
"encoder_out_sample": [
|
|
388
|
-
0.007837,
|
|
389
|
-
0.003748,
|
|
390
|
-
-0.05033,
|
|
391
|
-
-0.034014,
|
|
392
|
-
-0.022349,
|
|
393
|
-
-0.017078,
|
|
394
|
-
-0.019876,
|
|
395
|
-
-0.016203,
|
|
396
|
-
-0.025393,
|
|
397
|
-
-0.017256,
|
|
398
|
-
-0.012161,
|
|
399
|
-
-0.008812,
|
|
400
|
-
-0.066451,
|
|
401
|
-
-0.089987,
|
|
402
|
-
-0.047875,
|
|
403
|
-
-0.176737,
|
|
404
|
-
-0.027524,
|
|
405
|
-
-0.042996,
|
|
406
|
-
-0.059277,
|
|
407
|
-
-0.056243,
|
|
408
|
-
-0.033654,
|
|
409
|
-
-0.024435,
|
|
410
|
-
-0.029944,
|
|
411
|
-
-0.036262,
|
|
412
|
-
-0.026254,
|
|
413
|
-
-0.038526,
|
|
414
|
-
-0.050476,
|
|
415
|
-
-0.067926,
|
|
416
|
-
-0.054781,
|
|
417
|
-
-0.094117,
|
|
418
|
-
-0.106241,
|
|
419
|
-
-0.050649,
|
|
420
|
-
0.057139,
|
|
421
|
-
0.009718,
|
|
422
|
-
0.009076,
|
|
423
|
-
-0.003998,
|
|
424
|
-
-0.007633,
|
|
425
|
-
-0.003716,
|
|
426
|
-
0.005142,
|
|
427
|
-
0.010054,
|
|
428
|
-
-0.001808,
|
|
429
|
-
0.015909,
|
|
430
|
-
0.026671,
|
|
431
|
-
0.036184,
|
|
432
|
-
0.033018,
|
|
433
|
-
0.03269,
|
|
434
|
-
0.044188,
|
|
435
|
-
0.083504,
|
|
436
|
-
0.009676,
|
|
437
|
-
0.009832
|
|
438
|
-
],
|
|
439
|
-
"encoder_out_dtype": "float32"
|
|
440
|
-
},
|
|
441
|
-
"final_results": {
|
|
442
|
-
"tokens": [],
|
|
443
|
-
"timestamps": [],
|
|
444
|
-
"decoded_text": "",
|
|
445
|
-
"total_steps": 7,
|
|
446
|
-
"final_t": 16,
|
|
447
|
-
"encodings_len": 16
|
|
448
|
-
},
|
|
449
|
-
"full_transcription": "<unk>"
|
|
450
|
-
}
|