react-native-executorch 0.5.1-rc.0 → 0.5.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +132 -0
- package/common/rnexecutorch/models/speech_to_text/SpeechToText.cpp +4 -10
- package/common/rnexecutorch/models/speech_to_text/SpeechToText.h +1 -1
- package/common/rnexecutorch/models/speech_to_text/SpeechToTextStrategy.h +3 -2
- package/common/rnexecutorch/models/speech_to_text/WhisperStrategy.cpp +16 -4
- package/common/rnexecutorch/models/speech_to_text/WhisperStrategy.h +2 -2
- package/lib/Error.d.ts +30 -0
- package/lib/Error.js +50 -0
- package/lib/constants/directories.d.ts +1 -0
- package/lib/constants/directories.js +2 -0
- package/lib/constants/llmDefaults.d.ts +6 -0
- package/lib/constants/llmDefaults.js +16 -0
- package/lib/constants/modelUrls.d.ts +217 -83
- package/lib/constants/modelUrls.js +304 -98
- package/lib/constants/ocr/models.d.ts +882 -0
- package/lib/constants/ocr/models.js +182 -0
- package/lib/constants/ocr/symbols.d.ts +75 -0
- package/lib/constants/ocr/symbols.js +139 -0
- package/lib/{typescript/constants → constants}/sttDefaults.d.ts +0 -1
- package/lib/constants/sttDefaults.js +12 -10
- package/lib/controllers/LLMController.d.ts +47 -0
- package/lib/controllers/LLMController.js +14 -11
- package/lib/controllers/OCRController.d.ts +23 -0
- package/lib/controllers/OCRController.js +12 -5
- package/lib/controllers/SpeechToTextController.d.ts +8 -4
- package/lib/controllers/SpeechToTextController.js +15 -9
- package/lib/controllers/VerticalOCRController.d.ts +25 -0
- package/lib/controllers/VerticalOCRController.js +75 -0
- package/lib/hooks/computer_vision/useClassification.d.ts +15 -0
- package/lib/hooks/computer_vision/useClassification.js +7 -0
- package/lib/hooks/computer_vision/useImageEmbeddings.d.ts +15 -0
- package/lib/hooks/computer_vision/useImageEmbeddings.js +7 -0
- package/lib/hooks/computer_vision/useImageSegmentation.d.ts +38 -0
- package/lib/hooks/computer_vision/useImageSegmentation.js +7 -0
- package/lib/hooks/computer_vision/useOCR.d.ts +20 -0
- package/lib/hooks/computer_vision/useOCR.js +42 -0
- package/lib/hooks/computer_vision/useObjectDetection.d.ts +15 -0
- package/lib/hooks/computer_vision/useObjectDetection.js +7 -0
- package/lib/hooks/computer_vision/useStyleTransfer.d.ts +15 -0
- package/lib/hooks/computer_vision/useStyleTransfer.js +7 -0
- package/lib/hooks/computer_vision/useVerticalOCR.d.ts +21 -0
- package/lib/hooks/computer_vision/useVerticalOCR.js +45 -0
- package/lib/hooks/general/useExecutorchModule.d.ts +13 -0
- package/lib/hooks/general/useExecutorchModule.js +7 -0
- package/lib/hooks/natural_language_processing/useLLM.d.ts +10 -0
- package/lib/hooks/natural_language_processing/useLLM.js +78 -0
- package/lib/hooks/natural_language_processing/useSpeechToText.d.ts +27 -0
- package/lib/hooks/natural_language_processing/useSpeechToText.js +19 -14
- package/lib/hooks/natural_language_processing/useTextEmbeddings.d.ts +16 -0
- package/lib/hooks/natural_language_processing/useTextEmbeddings.js +7 -0
- package/lib/hooks/natural_language_processing/useTokenizer.d.ts +17 -0
- package/lib/hooks/natural_language_processing/useTokenizer.js +52 -0
- package/lib/hooks/useModule.d.ts +17 -0
- package/lib/hooks/useModule.js +45 -0
- package/lib/hooks/useNonStaticModule.d.ts +20 -0
- package/lib/hooks/useNonStaticModule.js +49 -0
- package/lib/index.d.ts +1 -1
- package/lib/index.js +3 -2
- package/lib/module/constants/modelUrls.js +61 -36
- package/lib/module/constants/modelUrls.js.map +1 -1
- package/lib/module/constants/ocr/models.js +1 -1
- package/lib/module/hooks/natural_language_processing/useSpeechToText.js +71 -34
- package/lib/module/hooks/natural_language_processing/useSpeechToText.js.map +1 -1
- package/lib/module/index.js +2 -3
- package/lib/module/index.js.map +1 -1
- package/lib/module/modules/natural_language_processing/SpeechToTextModule.js +72 -31
- package/lib/module/modules/natural_language_processing/SpeechToTextModule.js.map +1 -1
- package/lib/module/types/stt.js +1 -85
- package/lib/module/types/stt.js.map +1 -1
- package/lib/module/utils/ResourceFetcher.js +6 -8
- package/lib/module/utils/ResourceFetcher.js.map +1 -1
- package/lib/module/utils/ResourceFetcherUtils.js +20 -20
- package/lib/module/utils/ResourceFetcherUtils.js.map +1 -1
- package/lib/module/utils/SpeechToTextModule/ASR.js +191 -0
- package/lib/module/utils/SpeechToTextModule/ASR.js.map +1 -0
- package/lib/module/utils/SpeechToTextModule/OnlineProcessor.js +73 -0
- package/lib/module/utils/SpeechToTextModule/OnlineProcessor.js.map +1 -0
- package/lib/module/utils/SpeechToTextModule/hypothesisBuffer.js +56 -0
- package/lib/module/utils/SpeechToTextModule/hypothesisBuffer.js.map +1 -0
- package/lib/modules/BaseModule.d.ts +8 -0
- package/lib/modules/BaseModule.js +25 -0
- package/lib/modules/BaseNonStaticModule.d.ts +9 -0
- package/lib/modules/BaseNonStaticModule.js +14 -0
- package/lib/modules/computer_vision/ClassificationModule.d.ts +8 -0
- package/lib/modules/computer_vision/ClassificationModule.js +17 -0
- package/lib/modules/computer_vision/ImageEmbeddingsModule.d.ts +8 -0
- package/lib/modules/computer_vision/ImageEmbeddingsModule.js +17 -0
- package/lib/modules/computer_vision/ImageSegmentationModule.d.ts +11 -0
- package/lib/modules/computer_vision/ImageSegmentationModule.js +27 -0
- package/lib/modules/computer_vision/OCRModule.d.ts +15 -0
- package/lib/modules/computer_vision/OCRModule.js +20 -0
- package/lib/modules/computer_vision/ObjectDetectionModule.d.ts +9 -0
- package/lib/modules/computer_vision/ObjectDetectionModule.js +17 -0
- package/lib/modules/computer_vision/StyleTransferModule.d.ts +8 -0
- package/lib/modules/computer_vision/StyleTransferModule.js +17 -0
- package/lib/modules/computer_vision/VerticalOCRModule.d.ts +15 -0
- package/lib/modules/computer_vision/VerticalOCRModule.js +22 -0
- package/lib/modules/general/ExecutorchModule.d.ts +7 -0
- package/lib/modules/general/ExecutorchModule.js +14 -0
- package/lib/modules/natural_language_processing/LLMModule.d.ts +28 -0
- package/lib/modules/natural_language_processing/LLMModule.js +45 -0
- package/lib/modules/natural_language_processing/SpeechToTextModule.d.ts +18 -8
- package/lib/modules/natural_language_processing/SpeechToTextModule.js +21 -15
- package/lib/modules/natural_language_processing/TextEmbeddingsModule.d.ts +9 -0
- package/lib/modules/natural_language_processing/TextEmbeddingsModule.js +21 -0
- package/lib/modules/natural_language_processing/TokenizerModule.d.ts +12 -0
- package/lib/modules/natural_language_processing/TokenizerModule.js +5 -4
- package/lib/native/NativeETInstaller.d.ts +6 -0
- package/lib/native/NativeETInstaller.js +2 -0
- package/lib/native/NativeOCR.d.ts +8 -0
- package/lib/native/NativeOCR.js +2 -0
- package/lib/native/NativeVerticalOCR.d.ts +8 -0
- package/lib/native/NativeVerticalOCR.js +2 -0
- package/lib/types/common.d.ts +31 -0
- package/lib/types/common.js +25 -0
- package/lib/types/imageSegmentation.d.ts +24 -0
- package/lib/types/imageSegmentation.js +26 -0
- package/lib/types/llm.d.ts +46 -0
- package/lib/types/llm.js +9 -0
- package/lib/types/objectDetection.d.ts +104 -0
- package/lib/types/objectDetection.js +94 -0
- package/lib/types/ocr.d.ts +11 -0
- package/lib/types/ocr.js +1 -0
- package/lib/types/stt.d.ts +94 -0
- package/lib/types/stt.js +85 -0
- package/lib/typescript/constants/modelUrls.d.ts +24 -7
- package/lib/typescript/constants/modelUrls.d.ts.map +1 -1
- package/lib/typescript/constants/ocr/models.d.ts +126 -126
- package/lib/typescript/hooks/natural_language_processing/useSpeechToText.d.ts +15 -24
- package/lib/typescript/hooks/natural_language_processing/useSpeechToText.d.ts.map +1 -1
- package/lib/typescript/index.d.ts +2 -3
- package/lib/typescript/index.d.ts.map +1 -1
- package/lib/typescript/modules/natural_language_processing/SpeechToTextModule.d.ts +19 -22
- package/lib/typescript/modules/natural_language_processing/SpeechToTextModule.d.ts.map +1 -1
- package/lib/typescript/types/stt.d.ts +17 -91
- package/lib/typescript/types/stt.d.ts.map +1 -1
- package/lib/typescript/utils/ResourceFetcher.d.ts.map +1 -1
- package/lib/typescript/utils/ResourceFetcherUtils.d.ts.map +1 -1
- package/lib/typescript/utils/SpeechToTextModule/ASR.d.ts +27 -0
- package/lib/typescript/utils/SpeechToTextModule/ASR.d.ts.map +1 -0
- package/lib/typescript/utils/SpeechToTextModule/OnlineProcessor.d.ts +23 -0
- package/lib/typescript/utils/SpeechToTextModule/OnlineProcessor.d.ts.map +1 -0
- package/lib/typescript/utils/SpeechToTextModule/hypothesisBuffer.d.ts +13 -0
- package/lib/typescript/utils/SpeechToTextModule/hypothesisBuffer.d.ts.map +1 -0
- package/lib/utils/ResourceFetcher.d.ts +24 -0
- package/lib/utils/ResourceFetcher.js +305 -0
- package/lib/utils/ResourceFetcherUtils.d.ts +54 -0
- package/lib/utils/ResourceFetcherUtils.js +9 -0
- package/lib/utils/llm.d.ts +6 -0
- package/lib/utils/llm.js +1 -0
- package/lib/utils/stt.d.ts +1 -0
- package/lib/utils/stt.js +21 -0
- package/package.json +5 -3
- package/src/constants/modelUrls.ts +70 -37
- package/src/constants/ocr/models.ts +1 -1
- package/src/hooks/natural_language_processing/useSpeechToText.ts +87 -92
- package/src/index.ts +6 -8
- package/src/modules/natural_language_processing/SpeechToTextModule.ts +81 -69
- package/src/types/stt.ts +97 -92
- package/src/utils/ResourceFetcher.ts +9 -7
- package/src/utils/ResourceFetcherUtils.ts +15 -17
- package/src/utils/SpeechToTextModule/ASR.ts +303 -0
- package/src/utils/SpeechToTextModule/OnlineProcessor.ts +87 -0
- package/src/utils/SpeechToTextModule/hypothesisBuffer.ts +79 -0
- package/common/rnexecutorch/models/speech_to_text/MoonshineStrategy.cpp +0 -31
- package/common/rnexecutorch/models/speech_to_text/MoonshineStrategy.h +0 -21
- package/lib/module/constants/sttDefaults.js +0 -74
- package/lib/module/constants/sttDefaults.js.map +0 -1
- package/lib/module/controllers/SpeechToTextController.js +0 -320
- package/lib/module/controllers/SpeechToTextController.js.map +0 -1
- package/lib/typescript/constants/sttDefaults.d.ts.map +0 -1
- package/lib/typescript/controllers/SpeechToTextController.d.ts +0 -57
- package/lib/typescript/controllers/SpeechToTextController.d.ts.map +0 -1
- package/src/constants/sttDefaults.ts +0 -82
- package/src/controllers/SpeechToTextController.ts +0 -471
- package/third-party/ios/ExecutorchLib/ExecutorchLib.xcodeproj/project.xcworkspace/contents.xcworkspacedata +0 -7
- package/third-party/ios/ExecutorchLib/ExecutorchLib.xcodeproj/project.xcworkspace/xcuserdata/norbertklockiewicz.xcuserdatad/UserInterfaceState.xcuserstate +0 -0
- package/third-party/ios/ExecutorchLib/ExecutorchLib.xcodeproj/xcuserdata/norbertklockiewicz.xcuserdatad/xcschemes/xcschememanagement.plist +0 -14
|
@@ -1,320 +0,0 @@
|
|
|
1
|
-
"use strict";
|
|
2
|
-
|
|
3
|
-
import { HAMMING_DIST_THRESHOLD, MODEL_CONFIGS, SECOND, MODES, NUM_TOKENS_TO_TRIM, STREAMING_ACTION } from '../constants/sttDefaults';
|
|
4
|
-
import { AvailableModels } from '../types/stt';
|
|
5
|
-
import { TokenizerModule } from '../modules/natural_language_processing/TokenizerModule';
|
|
6
|
-
import { ResourceFetcher } from '../utils/ResourceFetcher';
|
|
7
|
-
import { longCommonInfPref } from '../utils/stt';
|
|
8
|
-
import { ETError, getError } from '../Error';
|
|
9
|
-
import { Logger } from '../common/Logger';
|
|
10
|
-
export class SpeechToTextController {
|
|
11
|
-
sequence = [];
|
|
12
|
-
isReady = false;
|
|
13
|
-
isGenerating = false;
|
|
14
|
-
chunks = [];
|
|
15
|
-
seqs = [];
|
|
16
|
-
prevSeq = [];
|
|
17
|
-
waveform = [];
|
|
18
|
-
numOfChunks = 0;
|
|
19
|
-
streaming = false;
|
|
20
|
-
|
|
21
|
-
// User callbacks
|
|
22
|
-
|
|
23
|
-
constructor({
|
|
24
|
-
transcribeCallback,
|
|
25
|
-
isReadyCallback,
|
|
26
|
-
isGeneratingCallback,
|
|
27
|
-
onErrorCallback,
|
|
28
|
-
overlapSeconds,
|
|
29
|
-
windowSize,
|
|
30
|
-
streamingConfig
|
|
31
|
-
}) {
|
|
32
|
-
this.tokenizerModule = new TokenizerModule();
|
|
33
|
-
this.decodedTranscribeCallback = async seq => transcribeCallback(await this.tokenIdsToText(seq));
|
|
34
|
-
this.isReadyCallback = isReady => {
|
|
35
|
-
this.isReady = isReady;
|
|
36
|
-
isReadyCallback?.(isReady);
|
|
37
|
-
};
|
|
38
|
-
this.isGeneratingCallback = isGenerating => {
|
|
39
|
-
this.isGenerating = isGenerating;
|
|
40
|
-
isGeneratingCallback?.(isGenerating);
|
|
41
|
-
};
|
|
42
|
-
this.onErrorCallback = error => {
|
|
43
|
-
if (onErrorCallback) {
|
|
44
|
-
onErrorCallback(error ? new Error(getError(error)) : undefined);
|
|
45
|
-
return;
|
|
46
|
-
} else {
|
|
47
|
-
throw new Error(getError(error));
|
|
48
|
-
}
|
|
49
|
-
};
|
|
50
|
-
this.configureStreaming(overlapSeconds, windowSize, streamingConfig || 'balanced');
|
|
51
|
-
}
|
|
52
|
-
async load({
|
|
53
|
-
modelName,
|
|
54
|
-
encoderSource,
|
|
55
|
-
decoderSource,
|
|
56
|
-
tokenizerSource,
|
|
57
|
-
onDownloadProgressCallback
|
|
58
|
-
}) {
|
|
59
|
-
this.onErrorCallback(undefined);
|
|
60
|
-
this.isReadyCallback(false);
|
|
61
|
-
this.config = MODEL_CONFIGS[modelName];
|
|
62
|
-
try {
|
|
63
|
-
const tokenizerLoadPromise = this.tokenizerModule.load({
|
|
64
|
-
tokenizerSource: tokenizerSource || this.config.tokenizer.source
|
|
65
|
-
});
|
|
66
|
-
const pathsPromise = ResourceFetcher.fetch(onDownloadProgressCallback, encoderSource || this.config.sources.encoder, decoderSource || this.config.sources.decoder);
|
|
67
|
-
const [_, encoderDecoderResults] = await Promise.all([tokenizerLoadPromise, pathsPromise]);
|
|
68
|
-
encoderSource = encoderDecoderResults?.[0];
|
|
69
|
-
decoderSource = encoderDecoderResults?.[1];
|
|
70
|
-
if (!encoderSource || !decoderSource) {
|
|
71
|
-
throw new Error('Download interrupted.');
|
|
72
|
-
}
|
|
73
|
-
} catch (e) {
|
|
74
|
-
this.onErrorCallback(e);
|
|
75
|
-
return;
|
|
76
|
-
}
|
|
77
|
-
if (modelName === 'whisperMultilingual') {
|
|
78
|
-
// The underlying native class is instantiated based on the name of the model. There is no need to
|
|
79
|
-
// create a separate class for multilingual version of Whisper, since it is the same. We just need
|
|
80
|
-
// the distinction here, in TS, for start tokens and such. If we introduce
|
|
81
|
-
// more versions of Whisper, such as the small one, this should be refactored.
|
|
82
|
-
modelName = AvailableModels.WHISPER;
|
|
83
|
-
}
|
|
84
|
-
try {
|
|
85
|
-
const nativeSpeechToText = await global.loadSpeechToText(encoderSource, decoderSource, modelName);
|
|
86
|
-
this.speechToTextNativeModule = nativeSpeechToText;
|
|
87
|
-
this.isReadyCallback(true);
|
|
88
|
-
} catch (e) {
|
|
89
|
-
this.onErrorCallback(e);
|
|
90
|
-
}
|
|
91
|
-
}
|
|
92
|
-
configureStreaming(overlapSeconds, windowSize, streamingConfig) {
|
|
93
|
-
if (streamingConfig) {
|
|
94
|
-
this.windowSize = MODES[streamingConfig].windowSize * SECOND;
|
|
95
|
-
this.overlapSeconds = MODES[streamingConfig].overlapSeconds * SECOND;
|
|
96
|
-
}
|
|
97
|
-
if (streamingConfig && (windowSize || overlapSeconds)) {
|
|
98
|
-
Logger.warn(`windowSize and overlapSeconds overrides values from streamingConfig ${streamingConfig}.`);
|
|
99
|
-
}
|
|
100
|
-
this.windowSize = (windowSize || 0) * SECOND || this.windowSize;
|
|
101
|
-
this.overlapSeconds = (overlapSeconds || 0) * SECOND || this.overlapSeconds;
|
|
102
|
-
if (2 * this.overlapSeconds + this.windowSize >= 30 * SECOND) {
|
|
103
|
-
Logger.warn(`Invalid values for overlapSeconds and/or windowSize provided. Expected windowSize + 2 * overlapSeconds (== ${this.windowSize + 2 * this.overlapSeconds}) <= 30. Setting windowSize to ${30 * SECOND - 2 * this.overlapSeconds}.`);
|
|
104
|
-
this.windowSize = 30 * SECOND - 2 * this.overlapSeconds;
|
|
105
|
-
}
|
|
106
|
-
}
|
|
107
|
-
chunkWaveform() {
|
|
108
|
-
this.numOfChunks = Math.ceil(this.waveform.length / this.windowSize);
|
|
109
|
-
for (let i = 0; i < this.numOfChunks; i++) {
|
|
110
|
-
let chunk = [];
|
|
111
|
-
const left = Math.max(this.windowSize * i - this.overlapSeconds, 0);
|
|
112
|
-
const right = Math.min(this.windowSize * (i + 1) + this.overlapSeconds, this.waveform.length);
|
|
113
|
-
chunk = this.waveform.slice(left, right);
|
|
114
|
-
this.chunks.push(chunk);
|
|
115
|
-
}
|
|
116
|
-
}
|
|
117
|
-
resetState() {
|
|
118
|
-
this.sequence = [];
|
|
119
|
-
this.seqs = [];
|
|
120
|
-
this.waveform = [];
|
|
121
|
-
this.prevSeq = [];
|
|
122
|
-
this.chunks = [];
|
|
123
|
-
this.decodedTranscribeCallback([]);
|
|
124
|
-
this.onErrorCallback(undefined);
|
|
125
|
-
}
|
|
126
|
-
expectedChunkLength() {
|
|
127
|
-
//only first chunk can be of shorter length, for first chunk there are no seqs decoded
|
|
128
|
-
return this.seqs.length ? this.windowSize + 2 * this.overlapSeconds : this.windowSize + this.overlapSeconds;
|
|
129
|
-
}
|
|
130
|
-
async getStartingTokenIds(audioLanguage) {
|
|
131
|
-
// We need different starting token ids based on the multilingualism of the model.
|
|
132
|
-
// The eng version only needs BOS token, while the multilingual one needs:
|
|
133
|
-
// [BOS, LANG, TRANSCRIBE]. Optionally we should also set notimestamps token, as timestamps
|
|
134
|
-
// is not yet supported.
|
|
135
|
-
if (!audioLanguage) {
|
|
136
|
-
return [this.config.tokenizer.bos];
|
|
137
|
-
}
|
|
138
|
-
// FIXME: I should use .getTokenId for the BOS as well, should remove it from config
|
|
139
|
-
const langTokenId = await this.tokenizerModule.tokenToId(`<|${audioLanguage}|>`);
|
|
140
|
-
const transcribeTokenId = await this.tokenizerModule.tokenToId('<|transcribe|>');
|
|
141
|
-
const noTimestampsTokenId = await this.tokenizerModule.tokenToId('<|notimestamps|>');
|
|
142
|
-
const startingTokenIds = [this.config.tokenizer.bos, langTokenId, transcribeTokenId, noTimestampsTokenId];
|
|
143
|
-
return startingTokenIds;
|
|
144
|
-
}
|
|
145
|
-
async decodeChunk(chunk, audioLanguage) {
|
|
146
|
-
const seq = await this.getStartingTokenIds(audioLanguage);
|
|
147
|
-
let prevSeqTokenIdx = 0;
|
|
148
|
-
this.prevSeq = this.sequence.slice();
|
|
149
|
-
try {
|
|
150
|
-
await this.encode(new Float32Array(chunk));
|
|
151
|
-
} catch (error) {
|
|
152
|
-
this.onErrorCallback(new Error(getError(error) + ' encoding error'));
|
|
153
|
-
return [];
|
|
154
|
-
}
|
|
155
|
-
let lastToken = seq.at(-1);
|
|
156
|
-
while (lastToken !== this.config.tokenizer.eos) {
|
|
157
|
-
try {
|
|
158
|
-
lastToken = await this.decode(seq);
|
|
159
|
-
} catch (error) {
|
|
160
|
-
this.onErrorCallback(new Error(getError(error) + ' decoding error'));
|
|
161
|
-
return [...seq, this.config.tokenizer.eos];
|
|
162
|
-
}
|
|
163
|
-
seq.push(lastToken);
|
|
164
|
-
if (this.seqs.length > 0 && seq.length < this.seqs.at(-1).length && seq.length % 3 !== 0) {
|
|
165
|
-
this.prevSeq.push(this.seqs.at(-1)[prevSeqTokenIdx++]);
|
|
166
|
-
this.decodedTranscribeCallback(this.prevSeq);
|
|
167
|
-
}
|
|
168
|
-
}
|
|
169
|
-
return seq;
|
|
170
|
-
}
|
|
171
|
-
async handleOverlaps(seqs) {
|
|
172
|
-
const maxInd = longCommonInfPref(seqs.at(-2), seqs.at(-1), HAMMING_DIST_THRESHOLD);
|
|
173
|
-
this.sequence = [...this.sequence, ...seqs.at(-2).slice(0, maxInd)];
|
|
174
|
-
this.decodedTranscribeCallback(this.sequence);
|
|
175
|
-
return this.sequence.slice();
|
|
176
|
-
}
|
|
177
|
-
trimLeft(numOfTokensToTrim) {
|
|
178
|
-
const idx = this.seqs.length - 1;
|
|
179
|
-
if (this.seqs[idx][0] === this.config.tokenizer.bos) {
|
|
180
|
-
this.seqs[idx] = this.seqs[idx].slice(numOfTokensToTrim);
|
|
181
|
-
}
|
|
182
|
-
}
|
|
183
|
-
trimRight(numOfTokensToTrim) {
|
|
184
|
-
const idx = this.seqs.length - 2;
|
|
185
|
-
if (this.seqs[idx].at(-1) === this.config.tokenizer.eos) {
|
|
186
|
-
this.seqs[idx] = this.seqs[idx].slice(0, -numOfTokensToTrim);
|
|
187
|
-
}
|
|
188
|
-
}
|
|
189
|
-
|
|
190
|
-
// since we are calling this every time (except first) after a new seq is pushed to this.seqs
|
|
191
|
-
// we can only trim left the last seq and trim right the second to last seq
|
|
192
|
-
async trimSequences(audioLanguage) {
|
|
193
|
-
const numSpecialTokens = (await this.getStartingTokenIds(audioLanguage)).length;
|
|
194
|
-
this.trimLeft(numSpecialTokens + NUM_TOKENS_TO_TRIM);
|
|
195
|
-
this.trimRight(numSpecialTokens + NUM_TOKENS_TO_TRIM);
|
|
196
|
-
}
|
|
197
|
-
|
|
198
|
-
// if last chunk is too short combine it with second to last to improve quality
|
|
199
|
-
validateAndFixLastChunk() {
|
|
200
|
-
if (this.chunks.length < 2) return;
|
|
201
|
-
const lastChunkLength = this.chunks.at(-1).length / SECOND;
|
|
202
|
-
const secondToLastChunkLength = this.chunks.at(-2).length / SECOND;
|
|
203
|
-
if (lastChunkLength < 5 && secondToLastChunkLength + lastChunkLength < 30) {
|
|
204
|
-
this.chunks[this.chunks.length - 2] = [...this.chunks.at(-2).slice(0, -this.overlapSeconds * 2), ...this.chunks.at(-1)];
|
|
205
|
-
this.chunks = this.chunks.slice(0, -1);
|
|
206
|
-
}
|
|
207
|
-
}
|
|
208
|
-
async tokenIdsToText(tokenIds) {
|
|
209
|
-
try {
|
|
210
|
-
return await this.tokenizerModule.decode(tokenIds, true);
|
|
211
|
-
} catch (e) {
|
|
212
|
-
this.onErrorCallback(new Error(`An error has occurred when decoding the token ids: ${e}`));
|
|
213
|
-
return '';
|
|
214
|
-
}
|
|
215
|
-
}
|
|
216
|
-
async transcribe(waveform, audioLanguage) {
|
|
217
|
-
try {
|
|
218
|
-
if (!this.isReady) throw Error(getError(ETError.ModuleNotLoaded));
|
|
219
|
-
if (this.isGenerating || this.streaming) throw Error(getError(ETError.ModelGenerating));
|
|
220
|
-
if (!!audioLanguage !== this.config.isMultilingual) throw new Error(getError(ETError.MultilingualConfiguration));
|
|
221
|
-
} catch (e) {
|
|
222
|
-
this.onErrorCallback(e);
|
|
223
|
-
return '';
|
|
224
|
-
}
|
|
225
|
-
|
|
226
|
-
// Making sure that the error is not set when we get there
|
|
227
|
-
this.isGeneratingCallback(true);
|
|
228
|
-
this.resetState();
|
|
229
|
-
this.waveform = waveform;
|
|
230
|
-
this.chunkWaveform();
|
|
231
|
-
this.validateAndFixLastChunk();
|
|
232
|
-
for (let chunkId = 0; chunkId < this.chunks.length; chunkId++) {
|
|
233
|
-
const seq = await this.decodeChunk(this.chunks.at(chunkId), audioLanguage);
|
|
234
|
-
// whole audio is inside one chunk, no processing required
|
|
235
|
-
if (this.chunks.length === 1) {
|
|
236
|
-
this.sequence = seq;
|
|
237
|
-
this.decodedTranscribeCallback(seq);
|
|
238
|
-
break;
|
|
239
|
-
}
|
|
240
|
-
this.seqs.push(seq);
|
|
241
|
-
if (this.seqs.length < 2) continue;
|
|
242
|
-
|
|
243
|
-
// Remove starting tokenIds and some additional ones
|
|
244
|
-
await this.trimSequences(audioLanguage);
|
|
245
|
-
this.prevSeq = await this.handleOverlaps(this.seqs);
|
|
246
|
-
|
|
247
|
-
// last sequence processed
|
|
248
|
-
// overlaps are already handled, so just append the last seq
|
|
249
|
-
if (this.seqs.length === this.chunks.length) {
|
|
250
|
-
this.sequence = [...this.sequence, ...this.seqs.at(-1)];
|
|
251
|
-
this.decodedTranscribeCallback(this.sequence);
|
|
252
|
-
this.prevSeq = this.sequence;
|
|
253
|
-
}
|
|
254
|
-
}
|
|
255
|
-
const decodedText = await this.tokenIdsToText(this.sequence);
|
|
256
|
-
this.isGeneratingCallback(false);
|
|
257
|
-
return decodedText;
|
|
258
|
-
}
|
|
259
|
-
async streamingTranscribe(streamAction, waveform, audioLanguage) {
|
|
260
|
-
try {
|
|
261
|
-
if (!this.isReady) throw Error(getError(ETError.ModuleNotLoaded));
|
|
262
|
-
if (!!audioLanguage !== this.config.isMultilingual) throw new Error(getError(ETError.MultilingualConfiguration));
|
|
263
|
-
if (streamAction === STREAMING_ACTION.START && !this.streaming && this.isGenerating) throw Error(getError(ETError.ModelGenerating));
|
|
264
|
-
if (streamAction === STREAMING_ACTION.START && this.streaming) throw Error(getError(ETError.ModelGenerating));
|
|
265
|
-
if (streamAction === STREAMING_ACTION.DATA && !this.streaming) throw Error(getError(ETError.StreamingNotStarted));
|
|
266
|
-
if (streamAction === STREAMING_ACTION.STOP && !this.streaming) throw Error(getError(ETError.StreamingNotStarted));
|
|
267
|
-
if (streamAction === STREAMING_ACTION.DATA && !waveform) throw new Error(getError(ETError.MissingDataChunk));
|
|
268
|
-
} catch (e) {
|
|
269
|
-
this.onErrorCallback(e);
|
|
270
|
-
return '';
|
|
271
|
-
}
|
|
272
|
-
if (streamAction === STREAMING_ACTION.START) {
|
|
273
|
-
this.resetState();
|
|
274
|
-
this.streaming = true;
|
|
275
|
-
this.isGeneratingCallback(true);
|
|
276
|
-
}
|
|
277
|
-
this.waveform = [...this.waveform, ...(waveform || [])];
|
|
278
|
-
|
|
279
|
-
// while buffer has at least required size get chunk and decode
|
|
280
|
-
while (this.waveform.length >= this.expectedChunkLength()) {
|
|
281
|
-
const chunk = this.waveform.slice(0, this.windowSize + this.overlapSeconds * (1 + Number(this.seqs.length > 0)));
|
|
282
|
-
this.chunks = [chunk]; //save last chunk for STREAMING_ACTION.STOP
|
|
283
|
-
this.waveform = this.waveform.slice(this.windowSize - this.overlapSeconds * Number(this.seqs.length === 0));
|
|
284
|
-
const seq = await this.decodeChunk(chunk, audioLanguage);
|
|
285
|
-
this.seqs.push(seq);
|
|
286
|
-
if (this.seqs.length < 2) continue;
|
|
287
|
-
await this.trimSequences(audioLanguage);
|
|
288
|
-
await this.handleOverlaps(this.seqs);
|
|
289
|
-
}
|
|
290
|
-
|
|
291
|
-
// got final package, process all remaining waveform data
|
|
292
|
-
// since we run the loop above the waveform has at most one chunk in it
|
|
293
|
-
if (streamAction === STREAMING_ACTION.STOP) {
|
|
294
|
-
// pad remaining waveform data with previous chunk to this.windowSize + 2 * this.overlapSeconds
|
|
295
|
-
const chunk = this.chunks.length ? [...this.chunks[0].slice(0, this.windowSize), ...this.waveform].slice(-this.windowSize - 2 * this.overlapSeconds) : this.waveform;
|
|
296
|
-
this.waveform = [];
|
|
297
|
-
const seq = await this.decodeChunk(chunk, audioLanguage);
|
|
298
|
-
this.seqs.push(seq);
|
|
299
|
-
if (this.seqs.length === 1) {
|
|
300
|
-
this.sequence = this.seqs[0];
|
|
301
|
-
} else {
|
|
302
|
-
await this.trimSequences(audioLanguage);
|
|
303
|
-
await this.handleOverlaps(this.seqs);
|
|
304
|
-
this.sequence = [...this.sequence, ...this.seqs.at(-1)];
|
|
305
|
-
}
|
|
306
|
-
this.decodedTranscribeCallback(this.sequence);
|
|
307
|
-
this.isGeneratingCallback(false);
|
|
308
|
-
this.streaming = false;
|
|
309
|
-
}
|
|
310
|
-
const decodedText = await this.tokenIdsToText(this.sequence);
|
|
311
|
-
return decodedText;
|
|
312
|
-
}
|
|
313
|
-
async encode(waveform) {
|
|
314
|
-
return await this.speechToTextNativeModule.encode(waveform);
|
|
315
|
-
}
|
|
316
|
-
async decode(seq) {
|
|
317
|
-
return await this.speechToTextNativeModule.decode(seq);
|
|
318
|
-
}
|
|
319
|
-
}
|
|
320
|
-
//# sourceMappingURL=SpeechToTextController.js.map
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"names":["HAMMING_DIST_THRESHOLD","MODEL_CONFIGS","SECOND","MODES","NUM_TOKENS_TO_TRIM","STREAMING_ACTION","AvailableModels","TokenizerModule","ResourceFetcher","longCommonInfPref","ETError","getError","Logger","SpeechToTextController","sequence","isReady","isGenerating","chunks","seqs","prevSeq","waveform","numOfChunks","streaming","constructor","transcribeCallback","isReadyCallback","isGeneratingCallback","onErrorCallback","overlapSeconds","windowSize","streamingConfig","tokenizerModule","decodedTranscribeCallback","seq","tokenIdsToText","error","Error","undefined","configureStreaming","load","modelName","encoderSource","decoderSource","tokenizerSource","onDownloadProgressCallback","config","tokenizerLoadPromise","tokenizer","source","pathsPromise","fetch","sources","encoder","decoder","_","encoderDecoderResults","Promise","all","e","WHISPER","nativeSpeechToText","global","loadSpeechToText","speechToTextNativeModule","warn","chunkWaveform","Math","ceil","length","i","chunk","left","max","right","min","slice","push","resetState","expectedChunkLength","getStartingTokenIds","audioLanguage","bos","langTokenId","tokenToId","transcribeTokenId","noTimestampsTokenId","startingTokenIds","decodeChunk","prevSeqTokenIdx","encode","Float32Array","lastToken","at","eos","decode","handleOverlaps","maxInd","trimLeft","numOfTokensToTrim","idx","trimRight","trimSequences","numSpecialTokens","validateAndFixLastChunk","lastChunkLength","secondToLastChunkLength","tokenIds","transcribe","ModuleNotLoaded","ModelGenerating","isMultilingual","MultilingualConfiguration","chunkId","decodedText","streamingTranscribe","streamAction","START","DATA","StreamingNotStarted","STOP","MissingDataChunk","Number"],"sourceRoot":"../../../src","sources":["controllers/SpeechToTextController.ts"],"mappings":";;AAAA,SACEA,sBAAsB,EACtBC,aAAa,EACbC,MAAM,EACNC,KAAK,EACLC,kBAAkB,EAClBC,gBAAgB,QACX,0BAA0B;AACjC,SAASC,eAAe,QAAqB,cAAc;AAC3D,SAASC,eAAe,QAAQ,wDAAwD;AAExF,SAASC,eAAe,QAAQ,0BAA0B;AAC1D,SAASC,iBAAiB,QAAQ,cAAc;AAEhD,SAASC,OAAO,EAAEC,QAAQ,QAAQ,UAAU;AAC5C,SAASC,MAAM,QAAQ,kBAAkB;AAEzC,OAAO,MAAMC,sBAAsB,CAAC;EAG3BC,QAAQ,GAAa,EAAE;EACvBC,OAAO,GAAG,KAAK;EACfC,YAAY,GAAG,KAAK;EAKnBC,MAAM,GAAe,EAAE;EACvBC,IAAI,GAAe,EAAE;EACrBC,OAAO,GAAa,EAAE;EACtBC,QAAQ,GAAa,EAAE;EACvBC,WAAW,GAAG,CAAC;EACfC,SAAS,GAAG,KAAK;;EAEzB;;EAOAC,WAAWA,CAAC;IACVC,kBAAkB;IAClBC,eAAe;IACfC,oBAAoB;IACpBC,eAAe;IACfC,cAAc;IACdC,UAAU;IACVC;EASF,CAAC,EAAE;IACD,IAAI,CAACC,eAAe,GAAG,IAAIxB,eAAe,CAAC,CAAC;IAC5C,IAAI,CAACyB,yBAAyB,GAAG,MAAOC,GAAG,IACzCT,kBAAkB,CAAC,MAAM,IAAI,CAACU,cAAc,CAACD,GAAG,CAAC,CAAC;IACpD,IAAI,CAACR,eAAe,GAAIV,OAAO,IAAK;MAClC,IAAI,CAACA,OAAO,GAAGA,OAAO;MACtBU,eAAe,GAAGV,OAAO,CAAC;IAC5B,CAAC;IACD,IAAI,CAACW,oBAAoB,GAAIV,YAAY,IAAK;MAC5C,IAAI,CAACA,YAAY,GAAGA,YAAY;MAChCU,oBAAoB,GAAGV,YAAY,CAAC;IACtC,CAAC;IACD,IAAI,CAACW,eAAe,GAAIQ,KAAK,IAAK;MAChC,IAAIR,eAAe,EAAE;QACnBA,eAAe,CAACQ,KAAK,GAAG,IAAIC,KAAK,CAACzB,QAAQ,CAACwB,KAAK,CAAC,CAAC,GAAGE,SAAS,CAAC;QAC/D;MACF,CAAC,MAAM;QACL,MAAM,IAAID,KAAK,CAACzB,QAAQ,CAACwB,KAAK,CAAC,CAAC;MAClC;IACF,CAAC;IACD,IAAI,CAACG,kBAAkB,CACrBV,cAAc,EACdC,UAAU,EACVC,eAAe,IAAI,UACrB,CAAC;EACH;EAEA,MAAaS,IAAIA,CAAC;IAChBC,SAAS;IACTC,aAAa;IACbC,aAAa;IACbC,eAAe;IACfC;EAOF,CAAC,EAAE;IACD,IAAI,CAACjB,eAAe,CAACU,SAAS,CAAC;IAC/B,IAAI,CAACZ,eAAe,CAAC,KAAK,CAAC;IAC3B,IAAI,CAACoB,MAAM,GAAG5C,aAAa,CAACuC,SAAS,CAAC;IAEtC,IAAI;MACF,MAAMM,oBAAoB,GAAG,IAAI,CAACf,eAAe,CAACQ,IAAI,CAAC;QACrDI,eAAe,EAAEA,eAAe,IAAI,IAAI,CAACE,MAAM,CAACE,SAAS,CAACC;MAC5D,CAAC,CAAC;MACF,MAAMC,YAAY,GAAGzC,eAAe,CAAC0C,KAAK,CACxCN,0BAA0B,EAC1BH,aAAa,IAAI,IAAI,CAACI,MAAM,CAACM,OAAO,CAACC,OAAO,EAC5CV,aAAa,IAAI,IAAI,CAACG,MAAM,CAACM,OAAO,CAACE,OACvC,CAAC;MACD,MAAM,CAACC,CAAC,EAAEC,qBAAqB,CAAC,GAAG,MAAMC,OAAO,CAACC,GAAG,CAAC,CACnDX,oBAAoB,EACpBG,YAAY,CACb,CAAC;MACFR,aAAa,GAAGc,qBAAqB,GAAG,CAAC,CAAC;MAC1Cb,aAAa,GAAGa,qBAAqB,GAAG,CAAC,CAAC;MAC1C,IAAI,CAACd,aAAa,IAAI,CAACC,aAAa,EAAE;QACpC,MAAM,IAAIN,KAAK,CAAC,uBAAuB,CAAC;MAC1C;IACF,CAAC,CAAC,OAAOsB,CAAC,EAAE;MACV,IAAI,CAAC/B,eAAe,CAAC+B,CAAC,CAAC;MACvB;IACF;IAEA,IAAIlB,SAAS,KAAK,qBAAqB,EAAE;MACvC;MACA;MACA;MACA;MACAA,SAAS,GAAGlC,eAAe,CAACqD,OAAO;IACrC;IAEA,IAAI;MACF,MAAMC,kBAAkB,GAAG,MAAMC,MAAM,CAACC,gBAAgB,CACtDrB,aAAa,EACbC,aAAa,EACbF,SACF,CAAC;MACD,IAAI,CAACuB,wBAAwB,GAAGH,kBAAkB;MAClD,IAAI,CAACnC,eAAe,CAAC,IAAI,CAAC;IAC5B,CAAC,CAAC,OAAOiC,CAAC,EAAE;MACV,IAAI,CAAC/B,eAAe,CAAC+B,CAAC,CAAC;IACzB;EACF;EAEOpB,kBAAkBA,CACvBV,cAAuB,EACvBC,UAAmB,EACnBC,eAAoC,EACpC;IACA,IAAIA,eAAe,EAAE;MACnB,IAAI,CAACD,UAAU,GAAG1B,KAAK,CAAC2B,eAAe,CAAC,CAACD,UAAU,GAAG3B,MAAM;MAC5D,IAAI,CAAC0B,cAAc,GAAGzB,KAAK,CAAC2B,eAAe,CAAC,CAACF,cAAc,GAAG1B,MAAM;IACtE;IACA,IAAI4B,eAAe,KAAKD,UAAU,IAAID,cAAc,CAAC,EAAE;MACrDhB,MAAM,CAACoD,IAAI,CACT,uEAAuElC,eAAe,GACxF,CAAC;IACH;IACA,IAAI,CAACD,UAAU,GAAG,CAACA,UAAU,IAAI,CAAC,IAAI3B,MAAM,IAAI,IAAI,CAAC2B,UAAU;IAC/D,IAAI,CAACD,cAAc,GAAG,CAACA,cAAc,IAAI,CAAC,IAAI1B,MAAM,IAAI,IAAI,CAAC0B,cAAc;IAC3E,IAAI,CAAC,GAAG,IAAI,CAACA,cAAc,GAAG,IAAI,CAACC,UAAU,IAAI,EAAE,GAAG3B,MAAM,EAAE;MAC5DU,MAAM,CAACoD,IAAI,CACT,8GAA8G,IAAI,CAACnC,UAAU,GAAG,CAAC,GAAG,IAAI,CAACD,cAAc,kCAAkC,EAAE,GAAG1B,MAAM,GAAG,CAAC,GAAG,IAAI,CAAC0B,cAAc,GAChO,CAAC;MACD,IAAI,CAACC,UAAU,GAAG,EAAE,GAAG3B,MAAM,GAAG,CAAC,GAAG,IAAI,CAAC0B,cAAc;IACzD;EACF;EAEQqC,aAAaA,CAAA,EAAG;IACtB,IAAI,CAAC5C,WAAW,GAAG6C,IAAI,CAACC,IAAI,CAAC,IAAI,CAAC/C,QAAQ,CAACgD,MAAM,GAAG,IAAI,CAACvC,UAAU,CAAC;IACpE,KAAK,IAAIwC,CAAC,GAAG,CAAC,EAAEA,CAAC,GAAG,IAAI,CAAChD,WAAW,EAAEgD,CAAC,EAAE,EAAE;MACzC,IAAIC,KAAe,GAAG,EAAE;MACxB,MAAMC,IAAI,GAAGL,IAAI,CAACM,GAAG,CAAC,IAAI,CAAC3C,UAAU,GAAGwC,CAAC,GAAG,IAAI,CAACzC,cAAc,EAAE,CAAC,CAAC;MACnE,MAAM6C,KAAK,GAAGP,IAAI,CAACQ,GAAG,CACpB,IAAI,CAAC7C,UAAU,IAAIwC,CAAC,GAAG,CAAC,CAAC,GAAG,IAAI,CAACzC,cAAc,EAC/C,IAAI,CAACR,QAAQ,CAACgD,MAChB,CAAC;MACDE,KAAK,GAAG,IAAI,CAAClD,QAAQ,CAACuD,KAAK,CAACJ,IAAI,EAAEE,KAAK,CAAC;MACxC,IAAI,CAACxD,MAAM,CAAC2D,IAAI,CAACN,KAAK,CAAC;IACzB;EACF;EAEQO,UAAUA,CAAA,EAAG;IACnB,IAAI,CAAC/D,QAAQ,GAAG,EAAE;IAClB,IAAI,CAACI,IAAI,GAAG,EAAE;IACd,IAAI,CAACE,QAAQ,GAAG,EAAE;IAClB,IAAI,CAACD,OAAO,GAAG,EAAE;IACjB,IAAI,CAACF,MAAM,GAAG,EAAE;IAChB,IAAI,CAACe,yBAAyB,CAAC,EAAE,CAAC;IAClC,IAAI,CAACL,eAAe,CAACU,SAAS,CAAC;EACjC;EAEQyC,mBAAmBA,CAAA,EAAG;IAC5B;IACA,OAAO,IAAI,CAAC5D,IAAI,CAACkD,MAAM,GACnB,IAAI,CAACvC,UAAU,GAAG,CAAC,GAAG,IAAI,CAACD,cAAc,GACzC,IAAI,CAACC,UAAU,GAAG,IAAI,CAACD,cAAc;EAC3C;EAEA,MAAcmD,mBAAmBA,CAACC,aAAsB,EAAqB;IAC3E;IACA;IACA;IACA;IACA,IAAI,CAACA,aAAa,EAAE;MAClB,OAAO,CAAC,IAAI,CAACnC,MAAM,CAACE,SAAS,CAACkC,GAAG,CAAC;IACpC;IACA;IACA,MAAMC,WAAW,GAAG,MAAM,IAAI,CAACnD,eAAe,CAACoD,SAAS,CACtD,KAAKH,aAAa,IACpB,CAAC;IACD,MAAMI,iBAAiB,GACrB,MAAM,IAAI,CAACrD,eAAe,CAACoD,SAAS,CAAC,gBAAgB,CAAC;IACxD,MAAME,mBAAmB,GACvB,MAAM,IAAI,CAACtD,eAAe,CAACoD,SAAS,CAAC,kBAAkB,CAAC;IAC1D,MAAMG,gBAAgB,GAAG,CACvB,IAAI,CAACzC,MAAM,CAACE,SAAS,CAACkC,GAAG,EACzBC,WAAW,EACXE,iBAAiB,EACjBC,mBAAmB,CACpB;IACD,OAAOC,gBAAgB;EACzB;EAEA,MAAcC,WAAWA,CACvBjB,KAAe,EACfU,aAAoC,EACjB;IACnB,MAAM/C,GAAG,GAAG,MAAM,IAAI,CAAC8C,mBAAmB,CAACC,aAAa,CAAC;IACzD,IAAIQ,eAAe,GAAG,CAAC;IACvB,IAAI,CAACrE,OAAO,GAAG,IAAI,CAACL,QAAQ,CAAC6D,KAAK,CAAC,CAAC;IACpC,IAAI;MACF,MAAM,IAAI,CAACc,MAAM,CAAC,IAAIC,YAAY,CAACpB,KAAK,CAAC,CAAC;IAC5C,CAAC,CAAC,OAAOnC,KAAK,EAAE;MACd,IAAI,CAACR,eAAe,CAAC,IAAIS,KAAK,CAACzB,QAAQ,CAACwB,KAAK,CAAC,GAAG,iBAAiB,CAAC,CAAC;MACpE,OAAO,EAAE;IACX;IACA,IAAIwD,SAAS,GAAG1D,GAAG,CAAC2D,EAAE,CAAC,CAAC,CAAC,CAAW;IACpC,OAAOD,SAAS,KAAK,IAAI,CAAC9C,MAAM,CAACE,SAAS,CAAC8C,GAAG,EAAE;MAC9C,IAAI;QACFF,SAAS,GAAG,MAAM,IAAI,CAACG,MAAM,CAAC7D,GAAG,CAAC;MACpC,CAAC,CAAC,OAAOE,KAAK,EAAE;QACd,IAAI,CAACR,eAAe,CAAC,IAAIS,KAAK,CAACzB,QAAQ,CAACwB,KAAK,CAAC,GAAG,iBAAiB,CAAC,CAAC;QACpE,OAAO,CAAC,GAAGF,GAAG,EAAE,IAAI,CAACY,MAAM,CAACE,SAAS,CAAC8C,GAAG,CAAC;MAC5C;MACA5D,GAAG,CAAC2C,IAAI,CAACe,SAAS,CAAC;MACnB,IACE,IAAI,CAACzE,IAAI,CAACkD,MAAM,GAAG,CAAC,IACpBnC,GAAG,CAACmC,MAAM,GAAG,IAAI,CAAClD,IAAI,CAAC0E,EAAE,CAAC,CAAC,CAAC,CAAC,CAAExB,MAAM,IACrCnC,GAAG,CAACmC,MAAM,GAAG,CAAC,KAAK,CAAC,EACpB;QACA,IAAI,CAACjD,OAAO,CAACyD,IAAI,CAAC,IAAI,CAAC1D,IAAI,CAAC0E,EAAE,CAAC,CAAC,CAAC,CAAC,CAAEJ,eAAe,EAAE,CAAE,CAAC;QACxD,IAAI,CAACxD,yBAAyB,CAAC,IAAI,CAACb,OAAO,CAAC;MAC9C;IACF;IACA,OAAOc,GAAG;EACZ;EAEA,MAAc8D,cAAcA,CAAC7E,IAAgB,EAAqB;IAChE,MAAM8E,MAAM,GAAGvF,iBAAiB,CAC9BS,IAAI,CAAC0E,EAAE,CAAC,CAAC,CAAC,CAAC,EACX1E,IAAI,CAAC0E,EAAE,CAAC,CAAC,CAAC,CAAC,EACX5F,sBACF,CAAC;IACD,IAAI,CAACc,QAAQ,GAAG,CAAC,GAAG,IAAI,CAACA,QAAQ,EAAE,GAAGI,IAAI,CAAC0E,EAAE,CAAC,CAAC,CAAC,CAAC,CAAEjB,KAAK,CAAC,CAAC,EAAEqB,MAAM,CAAC,CAAC;IACpE,IAAI,CAAChE,yBAAyB,CAAC,IAAI,CAAClB,QAAQ,CAAC;IAC7C,OAAO,IAAI,CAACA,QAAQ,CAAC6D,KAAK,CAAC,CAAC;EAC9B;EAEQsB,QAAQA,CAACC,iBAAyB,EAAE;IAC1C,MAAMC,GAAG,GAAG,IAAI,CAACjF,IAAI,CAACkD,MAAM,GAAG,CAAC;IAChC,IAAI,IAAI,CAAClD,IAAI,CAACiF,GAAG,CAAC,CAAE,CAAC,CAAC,KAAK,IAAI,CAACtD,MAAM,CAACE,SAAS,CAACkC,GAAG,EAAE;MACpD,IAAI,CAAC/D,IAAI,CAACiF,GAAG,CAAC,GAAG,IAAI,CAACjF,IAAI,CAACiF,GAAG,CAAC,CAAExB,KAAK,CAACuB,iBAAiB,CAAC;IAC3D;EACF;EAEQE,SAASA,CAACF,iBAAyB,EAAE;IAC3C,MAAMC,GAAG,GAAG,IAAI,CAACjF,IAAI,CAACkD,MAAM,GAAG,CAAC;IAChC,IAAI,IAAI,CAAClD,IAAI,CAACiF,GAAG,CAAC,CAAEP,EAAE,CAAC,CAAC,CAAC,CAAC,KAAK,IAAI,CAAC/C,MAAM,CAACE,SAAS,CAAC8C,GAAG,EAAE;MACxD,IAAI,CAAC3E,IAAI,CAACiF,GAAG,CAAC,GAAG,IAAI,CAACjF,IAAI,CAACiF,GAAG,CAAC,CAAExB,KAAK,CAAC,CAAC,EAAE,CAACuB,iBAAiB,CAAC;IAC/D;EACF;;EAEA;EACA;EACA,MAAcG,aAAaA,CAACrB,aAAsB,EAAE;IAClD,MAAMsB,gBAAgB,GAAG,CAAC,MAAM,IAAI,CAACvB,mBAAmB,CAACC,aAAa,CAAC,EACpEZ,MAAM;IACT,IAAI,CAAC6B,QAAQ,CAACK,gBAAgB,GAAGlG,kBAAkB,CAAC;IACpD,IAAI,CAACgG,SAAS,CAACE,gBAAgB,GAAGlG,kBAAkB,CAAC;EACvD;;EAEA;EACQmG,uBAAuBA,CAAA,EAAG;IAChC,IAAI,IAAI,CAACtF,MAAM,CAACmD,MAAM,GAAG,CAAC,EAAE;IAE5B,MAAMoC,eAAe,GAAG,IAAI,CAACvF,MAAM,CAAC2E,EAAE,CAAC,CAAC,CAAC,CAAC,CAAExB,MAAM,GAAGlE,MAAM;IAC3D,MAAMuG,uBAAuB,GAAG,IAAI,CAACxF,MAAM,CAAC2E,EAAE,CAAC,CAAC,CAAC,CAAC,CAAExB,MAAM,GAAGlE,MAAM;IACnE,IAAIsG,eAAe,GAAG,CAAC,IAAIC,uBAAuB,GAAGD,eAAe,GAAG,EAAE,EAAE;MACzE,IAAI,CAACvF,MAAM,CAAC,IAAI,CAACA,MAAM,CAACmD,MAAM,GAAG,CAAC,CAAC,GAAG,CACpC,GAAG,IAAI,CAACnD,MAAM,CAAC2E,EAAE,CAAC,CAAC,CAAC,CAAC,CAAEjB,KAAK,CAAC,CAAC,EAAE,CAAC,IAAI,CAAC/C,cAAc,GAAG,CAAC,CAAC,EACzD,GAAG,IAAI,CAACX,MAAM,CAAC2E,EAAE,CAAC,CAAC,CAAC,CAAE,CACvB;MACD,IAAI,CAAC3E,MAAM,GAAG,IAAI,CAACA,MAAM,CAAC0D,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC;IACxC;EACF;EAEA,MAAczC,cAAcA,CAACwE,QAAkB,EAAmB;IAChE,IAAI;MACF,OAAO,MAAM,IAAI,CAAC3E,eAAe,CAAC+D,MAAM,CAACY,QAAQ,EAAE,IAAI,CAAC;IAC1D,CAAC,CAAC,OAAOhD,CAAC,EAAE;MACV,IAAI,CAAC/B,eAAe,CAClB,IAAIS,KAAK,CAAC,sDAAsDsB,CAAC,EAAE,CACrE,CAAC;MACD,OAAO,EAAE;IACX;EACF;EAEA,MAAaiD,UAAUA,CACrBvF,QAAkB,EAClB4D,aAAoC,EACnB;IACjB,IAAI;MACF,IAAI,CAAC,IAAI,CAACjE,OAAO,EAAE,MAAMqB,KAAK,CAACzB,QAAQ,CAACD,OAAO,CAACkG,eAAe,CAAC,CAAC;MACjE,IAAI,IAAI,CAAC5F,YAAY,IAAI,IAAI,CAACM,SAAS,EACrC,MAAMc,KAAK,CAACzB,QAAQ,CAACD,OAAO,CAACmG,eAAe,CAAC,CAAC;MAChD,IAAI,CAAC,CAAC7B,aAAa,KAAK,IAAI,CAACnC,MAAM,CAACiE,cAAc,EAChD,MAAM,IAAI1E,KAAK,CAACzB,QAAQ,CAACD,OAAO,CAACqG,yBAAyB,CAAC,CAAC;IAChE,CAAC,CAAC,OAAOrD,CAAC,EAAE;MACV,IAAI,CAAC/B,eAAe,CAAC+B,CAAC,CAAC;MACvB,OAAO,EAAE;IACX;;IAEA;IACA,IAAI,CAAChC,oBAAoB,CAAC,IAAI,CAAC;IAC/B,IAAI,CAACmD,UAAU,CAAC,CAAC;IACjB,IAAI,CAACzD,QAAQ,GAAGA,QAAQ;IACxB,IAAI,CAAC6C,aAAa,CAAC,CAAC;IACpB,IAAI,CAACsC,uBAAuB,CAAC,CAAC;IAE9B,KAAK,IAAIS,OAAO,GAAG,CAAC,EAAEA,OAAO,GAAG,IAAI,CAAC/F,MAAM,CAACmD,MAAM,EAAE4C,OAAO,EAAE,EAAE;MAC7D,MAAM/E,GAAG,GAAG,MAAM,IAAI,CAACsD,WAAW,CAChC,IAAI,CAACtE,MAAM,CAAE2E,EAAE,CAACoB,OAAO,CAAC,EACxBhC,aACF,CAAC;MACD;MACA,IAAI,IAAI,CAAC/D,MAAM,CAACmD,MAAM,KAAK,CAAC,EAAE;QAC5B,IAAI,CAACtD,QAAQ,GAAGmB,GAAG;QACnB,IAAI,CAACD,yBAAyB,CAACC,GAAG,CAAC;QACnC;MACF;MACA,IAAI,CAACf,IAAI,CAAC0D,IAAI,CAAC3C,GAAG,CAAC;MAEnB,IAAI,IAAI,CAACf,IAAI,CAACkD,MAAM,GAAG,CAAC,EAAE;;MAE1B;MACA,MAAM,IAAI,CAACiC,aAAa,CAACrB,aAAa,CAAC;MAEvC,IAAI,CAAC7D,OAAO,GAAG,MAAM,IAAI,CAAC4E,cAAc,CAAC,IAAI,CAAC7E,IAAI,CAAC;;MAEnD;MACA;MACA,IAAI,IAAI,CAACA,IAAI,CAACkD,MAAM,KAAK,IAAI,CAACnD,MAAM,CAACmD,MAAM,EAAE;QAC3C,IAAI,CAACtD,QAAQ,GAAG,CAAC,GAAG,IAAI,CAACA,QAAQ,EAAE,GAAG,IAAI,CAACI,IAAI,CAAC0E,EAAE,CAAC,CAAC,CAAC,CAAE,CAAC;QACxD,IAAI,CAAC5D,yBAAyB,CAAC,IAAI,CAAClB,QAAQ,CAAC;QAC7C,IAAI,CAACK,OAAO,GAAG,IAAI,CAACL,QAAQ;MAC9B;IACF;IACA,MAAMmG,WAAW,GAAG,MAAM,IAAI,CAAC/E,cAAc,CAAC,IAAI,CAACpB,QAAQ,CAAC;IAC5D,IAAI,CAACY,oBAAoB,CAAC,KAAK,CAAC;IAChC,OAAOuF,WAAW;EACpB;EAEA,MAAaC,mBAAmBA,CAC9BC,YAA8B,EAC9B/F,QAAmB,EACnB4D,aAAoC,EACnB;IACjB,IAAI;MACF,IAAI,CAAC,IAAI,CAACjE,OAAO,EAAE,MAAMqB,KAAK,CAACzB,QAAQ,CAACD,OAAO,CAACkG,eAAe,CAAC,CAAC;MACjE,IAAI,CAAC,CAAC5B,aAAa,KAAK,IAAI,CAACnC,MAAM,CAACiE,cAAc,EAChD,MAAM,IAAI1E,KAAK,CAACzB,QAAQ,CAACD,OAAO,CAACqG,yBAAyB,CAAC,CAAC;MAE9D,IACEI,YAAY,KAAK9G,gBAAgB,CAAC+G,KAAK,IACvC,CAAC,IAAI,CAAC9F,SAAS,IACf,IAAI,CAACN,YAAY,EAEjB,MAAMoB,KAAK,CAACzB,QAAQ,CAACD,OAAO,CAACmG,eAAe,CAAC,CAAC;MAChD,IAAIM,YAAY,KAAK9G,gBAAgB,CAAC+G,KAAK,IAAI,IAAI,CAAC9F,SAAS,EAC3D,MAAMc,KAAK,CAACzB,QAAQ,CAACD,OAAO,CAACmG,eAAe,CAAC,CAAC;MAChD,IAAIM,YAAY,KAAK9G,gBAAgB,CAACgH,IAAI,IAAI,CAAC,IAAI,CAAC/F,SAAS,EAC3D,MAAMc,KAAK,CAACzB,QAAQ,CAACD,OAAO,CAAC4G,mBAAmB,CAAC,CAAC;MACpD,IAAIH,YAAY,KAAK9G,gBAAgB,CAACkH,IAAI,IAAI,CAAC,IAAI,CAACjG,SAAS,EAC3D,MAAMc,KAAK,CAACzB,QAAQ,CAACD,OAAO,CAAC4G,mBAAmB,CAAC,CAAC;MACpD,IAAIH,YAAY,KAAK9G,gBAAgB,CAACgH,IAAI,IAAI,CAACjG,QAAQ,EACrD,MAAM,IAAIgB,KAAK,CAACzB,QAAQ,CAACD,OAAO,CAAC8G,gBAAgB,CAAC,CAAC;IACvD,CAAC,CAAC,OAAO9D,CAAC,EAAE;MACV,IAAI,CAAC/B,eAAe,CAAC+B,CAAC,CAAC;MACvB,OAAO,EAAE;IACX;IAEA,IAAIyD,YAAY,KAAK9G,gBAAgB,CAAC+G,KAAK,EAAE;MAC3C,IAAI,CAACvC,UAAU,CAAC,CAAC;MACjB,IAAI,CAACvD,SAAS,GAAG,IAAI;MACrB,IAAI,CAACI,oBAAoB,CAAC,IAAI,CAAC;IACjC;IAEA,IAAI,CAACN,QAAQ,GAAG,CAAC,GAAG,IAAI,CAACA,QAAQ,EAAE,IAAIA,QAAQ,IAAI,EAAE,CAAC,CAAC;;IAEvD;IACA,OAAO,IAAI,CAACA,QAAQ,CAACgD,MAAM,IAAI,IAAI,CAACU,mBAAmB,CAAC,CAAC,EAAE;MACzD,MAAMR,KAAK,GAAG,IAAI,CAAClD,QAAQ,CAACuD,KAAK,CAC/B,CAAC,EACD,IAAI,CAAC9C,UAAU,GACb,IAAI,CAACD,cAAc,IAAI,CAAC,GAAG6F,MAAM,CAAC,IAAI,CAACvG,IAAI,CAACkD,MAAM,GAAG,CAAC,CAAC,CAC3D,CAAC;MACD,IAAI,CAACnD,MAAM,GAAG,CAACqD,KAAK,CAAC,CAAC,CAAC;MACvB,IAAI,CAAClD,QAAQ,GAAG,IAAI,CAACA,QAAQ,CAACuD,KAAK,CACjC,IAAI,CAAC9C,UAAU,GAAG,IAAI,CAACD,cAAc,GAAG6F,MAAM,CAAC,IAAI,CAACvG,IAAI,CAACkD,MAAM,KAAK,CAAC,CACvE,CAAC;MACD,MAAMnC,GAAG,GAAG,MAAM,IAAI,CAACsD,WAAW,CAACjB,KAAK,EAAEU,aAAa,CAAC;MACxD,IAAI,CAAC9D,IAAI,CAAC0D,IAAI,CAAC3C,GAAG,CAAC;MAEnB,IAAI,IAAI,CAACf,IAAI,CAACkD,MAAM,GAAG,CAAC,EAAE;MAE1B,MAAM,IAAI,CAACiC,aAAa,CAACrB,aAAa,CAAC;MACvC,MAAM,IAAI,CAACe,cAAc,CAAC,IAAI,CAAC7E,IAAI,CAAC;IACtC;;IAEA;IACA;IACA,IAAIiG,YAAY,KAAK9G,gBAAgB,CAACkH,IAAI,EAAE;MAC1C;MACA,MAAMjD,KAAK,GAAG,IAAI,CAACrD,MAAM,CAACmD,MAAM,GAC5B,CACE,GAAG,IAAI,CAACnD,MAAM,CAAC,CAAC,CAAC,CAAE0D,KAAK,CAAC,CAAC,EAAE,IAAI,CAAC9C,UAAU,CAAC,EAC5C,GAAG,IAAI,CAACT,QAAQ,CACjB,CAACuD,KAAK,CAAC,CAAC,IAAI,CAAC9C,UAAU,GAAG,CAAC,GAAG,IAAI,CAACD,cAAc,CAAC,GACnD,IAAI,CAACR,QAAQ;MAEjB,IAAI,CAACA,QAAQ,GAAG,EAAE;MAClB,MAAMa,GAAG,GAAG,MAAM,IAAI,CAACsD,WAAW,CAACjB,KAAK,EAAEU,aAAa,CAAC;MACxD,IAAI,CAAC9D,IAAI,CAAC0D,IAAI,CAAC3C,GAAG,CAAC;MAEnB,IAAI,IAAI,CAACf,IAAI,CAACkD,MAAM,KAAK,CAAC,EAAE;QAC1B,IAAI,CAACtD,QAAQ,GAAG,IAAI,CAACI,IAAI,CAAC,CAAC,CAAE;MAC/B,CAAC,MAAM;QACL,MAAM,IAAI,CAACmF,aAAa,CAACrB,aAAa,CAAC;QACvC,MAAM,IAAI,CAACe,cAAc,CAAC,IAAI,CAAC7E,IAAI,CAAC;QACpC,IAAI,CAACJ,QAAQ,GAAG,CAAC,GAAG,IAAI,CAACA,QAAQ,EAAE,GAAG,IAAI,CAACI,IAAI,CAAC0E,EAAE,CAAC,CAAC,CAAC,CAAE,CAAC;MAC1D;MACA,IAAI,CAAC5D,yBAAyB,CAAC,IAAI,CAAClB,QAAQ,CAAC;MAC7C,IAAI,CAACY,oBAAoB,CAAC,KAAK,CAAC;MAChC,IAAI,CAACJ,SAAS,GAAG,KAAK;IACxB;IAEA,MAAM2F,WAAW,GAAG,MAAM,IAAI,CAAC/E,cAAc,CAAC,IAAI,CAACpB,QAAQ,CAAC;IAE5D,OAAOmG,WAAW;EACpB;EAEA,MAAaxB,MAAMA,CAACrE,QAAsB,EAAiB;IACzD,OAAO,MAAM,IAAI,CAAC2C,wBAAwB,CAAC0B,MAAM,CAACrE,QAAQ,CAAC;EAC7D;EAEA,MAAa0E,MAAMA,CAAC7D,GAAa,EAAmB;IAClD,OAAO,MAAM,IAAI,CAAC8B,wBAAwB,CAAC+B,MAAM,CAAC7D,GAAG,CAAC;EACxD;AACF","ignoreList":[]}
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"sttDefaults.d.ts","sourceRoot":"","sources":["../../../src/constants/sttDefaults.ts"],"names":[],"mappings":"AAKA,OAAO,EAAE,eAAe,EAAE,WAAW,EAAE,MAAM,cAAc,CAAC;AAE5D,eAAO,MAAM,WAAW,QAAS,CAAC;AAClC,eAAO,MAAM,MAAM,QAAc,CAAC;AAClC,eAAO,MAAM,sBAAsB,IAAI,CAAC;AAyCxC,eAAO,MAAM,aAAa,EAAE;KACzB,GAAG,IAAI,eAAe,GAAG,WAAW;CAKtC,CAAC;AAEF,eAAO,MAAM,KAAK;;;;;;;;;;;;;CAajB,CAAC;AAEF,eAAO,MAAM,kBAAkB,IAAI,CAAC;AAEpC,oBAAY,gBAAgB;IAC1B,KAAK,IAAA;IACL,IAAI,IAAA;IACJ,IAAI,IAAA;CACL;AAED,OAAO,EAAE,eAAe,EAAE,CAAC"}
|
|
@@ -1,57 +0,0 @@
|
|
|
1
|
-
import { MODES, STREAMING_ACTION } from '../constants/sttDefaults';
|
|
2
|
-
import { AvailableModels } from '../types/stt';
|
|
3
|
-
import { ResourceSource } from '../types/common';
|
|
4
|
-
import { SpeechToTextLanguage } from '../types/stt';
|
|
5
|
-
export declare class SpeechToTextController {
|
|
6
|
-
private speechToTextNativeModule;
|
|
7
|
-
sequence: number[];
|
|
8
|
-
isReady: boolean;
|
|
9
|
-
isGenerating: boolean;
|
|
10
|
-
private tokenizerModule;
|
|
11
|
-
private overlapSeconds;
|
|
12
|
-
private windowSize;
|
|
13
|
-
private chunks;
|
|
14
|
-
private seqs;
|
|
15
|
-
private prevSeq;
|
|
16
|
-
private waveform;
|
|
17
|
-
private numOfChunks;
|
|
18
|
-
private streaming;
|
|
19
|
-
private decodedTranscribeCallback;
|
|
20
|
-
private isReadyCallback;
|
|
21
|
-
private isGeneratingCallback;
|
|
22
|
-
private onErrorCallback;
|
|
23
|
-
private config;
|
|
24
|
-
constructor({ transcribeCallback, isReadyCallback, isGeneratingCallback, onErrorCallback, overlapSeconds, windowSize, streamingConfig, }: {
|
|
25
|
-
transcribeCallback: (sequence: string) => void;
|
|
26
|
-
isReadyCallback?: (isReady: boolean) => void;
|
|
27
|
-
isGeneratingCallback?: (isGenerating: boolean) => void;
|
|
28
|
-
onErrorCallback?: (error: Error | undefined) => void;
|
|
29
|
-
overlapSeconds?: number;
|
|
30
|
-
windowSize?: number;
|
|
31
|
-
streamingConfig?: keyof typeof MODES;
|
|
32
|
-
});
|
|
33
|
-
load({ modelName, encoderSource, decoderSource, tokenizerSource, onDownloadProgressCallback, }: {
|
|
34
|
-
modelName: AvailableModels;
|
|
35
|
-
encoderSource?: ResourceSource;
|
|
36
|
-
decoderSource?: ResourceSource;
|
|
37
|
-
tokenizerSource?: ResourceSource;
|
|
38
|
-
onDownloadProgressCallback?: (downloadProgress: number) => void;
|
|
39
|
-
}): Promise<void>;
|
|
40
|
-
configureStreaming(overlapSeconds?: number, windowSize?: number, streamingConfig?: keyof typeof MODES): void;
|
|
41
|
-
private chunkWaveform;
|
|
42
|
-
private resetState;
|
|
43
|
-
private expectedChunkLength;
|
|
44
|
-
private getStartingTokenIds;
|
|
45
|
-
private decodeChunk;
|
|
46
|
-
private handleOverlaps;
|
|
47
|
-
private trimLeft;
|
|
48
|
-
private trimRight;
|
|
49
|
-
private trimSequences;
|
|
50
|
-
private validateAndFixLastChunk;
|
|
51
|
-
private tokenIdsToText;
|
|
52
|
-
transcribe(waveform: number[], audioLanguage?: SpeechToTextLanguage): Promise<string>;
|
|
53
|
-
streamingTranscribe(streamAction: STREAMING_ACTION, waveform?: number[], audioLanguage?: SpeechToTextLanguage): Promise<string>;
|
|
54
|
-
encode(waveform: Float32Array): Promise<null>;
|
|
55
|
-
decode(seq: number[]): Promise<number>;
|
|
56
|
-
}
|
|
57
|
-
//# sourceMappingURL=SpeechToTextController.d.ts.map
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"SpeechToTextController.d.ts","sourceRoot":"","sources":["../../../src/controllers/SpeechToTextController.ts"],"names":[],"mappings":"AAAA,OAAO,EAIL,KAAK,EAEL,gBAAgB,EACjB,MAAM,0BAA0B,CAAC;AAClC,OAAO,EAAE,eAAe,EAAe,MAAM,cAAc,CAAC;AAE5D,OAAO,EAAE,cAAc,EAAE,MAAM,iBAAiB,CAAC;AAGjD,OAAO,EAAE,oBAAoB,EAAE,MAAM,cAAc,CAAC;AAIpD,qBAAa,sBAAsB;IACjC,OAAO,CAAC,wBAAwB,CAAM;IAE/B,QAAQ,EAAE,MAAM,EAAE,CAAM;IACxB,OAAO,UAAS;IAChB,YAAY,UAAS;IAE5B,OAAO,CAAC,eAAe,CAAkB;IACzC,OAAO,CAAC,cAAc,CAAU;IAChC,OAAO,CAAC,UAAU,CAAU;IAC5B,OAAO,CAAC,MAAM,CAAkB;IAChC,OAAO,CAAC,IAAI,CAAkB;IAC9B,OAAO,CAAC,OAAO,CAAgB;IAC/B,OAAO,CAAC,QAAQ,CAAgB;IAChC,OAAO,CAAC,WAAW,CAAK;IACxB,OAAO,CAAC,SAAS,CAAS;IAG1B,OAAO,CAAC,yBAAyB,CAA+B;IAChE,OAAO,CAAC,eAAe,CAA6B;IACpD,OAAO,CAAC,oBAAoB,CAAkC;IAC9D,OAAO,CAAC,eAAe,CAAuB;IAC9C,OAAO,CAAC,MAAM,CAAe;gBAEjB,EACV,kBAAkB,EAClB,eAAe,EACf,oBAAoB,EACpB,eAAe,EACf,cAAc,EACd,UAAU,EACV,eAAe,GAChB,EAAE;QACD,kBAAkB,EAAE,CAAC,QAAQ,EAAE,MAAM,KAAK,IAAI,CAAC;QAC/C,eAAe,CAAC,EAAE,CAAC,OAAO,EAAE,OAAO,KAAK,IAAI,CAAC;QAC7C,oBAAoB,CAAC,EAAE,CAAC,YAAY,EAAE,OAAO,KAAK,IAAI,CAAC;QACvD,eAAe,CAAC,EAAE,CAAC,KAAK,EAAE,KAAK,GAAG,SAAS,KAAK,IAAI,CAAC;QACrD,cAAc,CAAC,EAAE,MAAM,CAAC;QACxB,UAAU,CAAC,EAAE,MAAM,CAAC;QACpB,eAAe,CAAC,EAAE,MAAM,OAAO,KAAK,CAAC;KACtC;IA2BY,IAAI,CAAC,EAChB,SAAS,EACT,aAAa,EACb,aAAa,EACb,eAAe,EACf,0BAA0B,GAC3B,EAAE;QACD,SAAS,EAAE,eAAe,CAAC;QAC3B,aAAa,CAAC,EAAE,cAAc,CAAC;QAC/B,aAAa,CAAC,EAAE,cAAc,CAAC;QAC/B,eAAe,CAAC,EAAE,cAAc,CAAC;QACjC,0BAA0B,CAAC,EAAE,CAAC,gBAAgB,EAAE,MAAM,KAAK,IAAI,CAAC;KACjE;IAiDM,kBAAkB,CACvB,cAAc,CAAC,EAAE,MAAM,EACvB,UAAU,CAAC,EAAE,MAAM,EACnB,eAAe,CAAC,EAAE,MAAM,OAAO,KAAK;IAqBtC,OAAO,CAAC,aAAa;IAcrB,OAAO,CAAC,UAAU;IAUlB,OAAO,CAAC,mBAAmB;YAOb,mBAAmB;YAyBnB,WAAW;YAkCX,cAAc;IAW5B,OAAO,CAAC,QAAQ;IAOhB,OAAO,CAAC,SAAS;YASH,aAAa;IAQ3B,OAAO,CAAC,uBAAuB;YAcjB,cAAc;IAWf,UAAU,CACrB,QAAQ,EAAE,MAAM,EAAE,EAClB,aAAa,CAAC,EAAE,oBAAoB,GACnC,OAAO,CAAC,MAAM,CAAC;IAoDL,mBAAmB,CAC9B,YAAY,EAAE,gBAAgB,EAC9B,QAAQ,CAAC,EAAE,MAAM,EAAE,EACnB,aAAa,CAAC,EAAE,oBAAoB,GACnC,OAAO,CAAC,MAAM,CAAC;IAqFL,MAAM,CAAC,QAAQ,EAAE,YAAY,GAAG,OAAO,CAAC,IAAI,CAAC;IAI7C,MAAM,CAAC,GAAG,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC,MAAM,CAAC;CAGpD"}
|
|
@@ -1,82 +0,0 @@
|
|
|
1
|
-
import {
|
|
2
|
-
MOONSHINE_TINY,
|
|
3
|
-
WHISPER_TINY,
|
|
4
|
-
WHISPER_TINY_MULTILINGUAL,
|
|
5
|
-
} from './modelUrls';
|
|
6
|
-
import { AvailableModels, ModelConfig } from '../types/stt';
|
|
7
|
-
|
|
8
|
-
export const SAMPLE_RATE = 16_000;
|
|
9
|
-
export const SECOND = SAMPLE_RATE;
|
|
10
|
-
export const HAMMING_DIST_THRESHOLD = 1;
|
|
11
|
-
|
|
12
|
-
const whisperTinyModelConfig = {
|
|
13
|
-
sources: {
|
|
14
|
-
encoder: WHISPER_TINY.encoderSource,
|
|
15
|
-
decoder: WHISPER_TINY.decoderSource,
|
|
16
|
-
},
|
|
17
|
-
tokenizer: {
|
|
18
|
-
source: WHISPER_TINY.tokenizerSource,
|
|
19
|
-
bos: 50257, // FIXME: this is a placeholder and needs to be changed
|
|
20
|
-
eos: 50256, // FIXME: this is a placeholder and needs to be changed
|
|
21
|
-
},
|
|
22
|
-
isMultilingual: false,
|
|
23
|
-
};
|
|
24
|
-
|
|
25
|
-
const moonshineTinyModelConfig = {
|
|
26
|
-
sources: {
|
|
27
|
-
encoder: MOONSHINE_TINY.encoderSource,
|
|
28
|
-
decoder: MOONSHINE_TINY.decoderSource,
|
|
29
|
-
},
|
|
30
|
-
tokenizer: {
|
|
31
|
-
source: MOONSHINE_TINY.tokenizerSource,
|
|
32
|
-
bos: 1, // FIXME: this is a placeholder and needs to be changed
|
|
33
|
-
eos: 2, // FIXME: this is a placeholder and needs to be changed
|
|
34
|
-
},
|
|
35
|
-
isMultilingual: false,
|
|
36
|
-
};
|
|
37
|
-
|
|
38
|
-
const whisperTinyMultilingualModelConfig = {
|
|
39
|
-
sources: {
|
|
40
|
-
encoder: WHISPER_TINY_MULTILINGUAL.encoderSource,
|
|
41
|
-
decoder: WHISPER_TINY_MULTILINGUAL.decoderSource,
|
|
42
|
-
},
|
|
43
|
-
tokenizer: {
|
|
44
|
-
source: WHISPER_TINY_MULTILINGUAL.tokenizerSource,
|
|
45
|
-
bos: 50258, // FIXME: this is a placeholder and needs to be changed
|
|
46
|
-
eos: 50257, // FIXME: this is a placeholder and needs to be changed
|
|
47
|
-
},
|
|
48
|
-
isMultilingual: true,
|
|
49
|
-
};
|
|
50
|
-
|
|
51
|
-
export const MODEL_CONFIGS: {
|
|
52
|
-
[key in AvailableModels]: ModelConfig;
|
|
53
|
-
} = {
|
|
54
|
-
moonshine: moonshineTinyModelConfig,
|
|
55
|
-
whisper: whisperTinyModelConfig,
|
|
56
|
-
whisperMultilingual: whisperTinyMultilingualModelConfig,
|
|
57
|
-
};
|
|
58
|
-
|
|
59
|
-
export const MODES = {
|
|
60
|
-
fast: {
|
|
61
|
-
windowSize: 5,
|
|
62
|
-
overlapSeconds: 1.2,
|
|
63
|
-
},
|
|
64
|
-
balanced: {
|
|
65
|
-
windowSize: 12,
|
|
66
|
-
overlapSeconds: 2,
|
|
67
|
-
},
|
|
68
|
-
quality: {
|
|
69
|
-
windowSize: 24,
|
|
70
|
-
overlapSeconds: 3,
|
|
71
|
-
},
|
|
72
|
-
};
|
|
73
|
-
|
|
74
|
-
export const NUM_TOKENS_TO_TRIM = 3;
|
|
75
|
-
|
|
76
|
-
export enum STREAMING_ACTION {
|
|
77
|
-
START,
|
|
78
|
-
DATA,
|
|
79
|
-
STOP,
|
|
80
|
-
}
|
|
81
|
-
|
|
82
|
-
export { AvailableModels };
|