react-native-executorch 0.5.3 → 0.5.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/android/src/main/cpp/CMakeLists.txt +2 -1
- package/common/rnexecutorch/data_processing/Numerical.cpp +27 -19
- package/common/rnexecutorch/data_processing/Numerical.h +53 -4
- package/common/rnexecutorch/data_processing/dsp.cpp +1 -1
- package/common/rnexecutorch/data_processing/dsp.h +1 -1
- package/common/rnexecutorch/data_processing/gzip.cpp +47 -0
- package/common/rnexecutorch/data_processing/gzip.h +7 -0
- package/common/rnexecutorch/host_objects/ModelHostObject.h +24 -0
- package/common/rnexecutorch/metaprogramming/TypeConcepts.h +21 -1
- package/common/rnexecutorch/models/BaseModel.cpp +3 -2
- package/common/rnexecutorch/models/BaseModel.h +3 -2
- package/common/rnexecutorch/models/speech_to_text/SpeechToText.cpp +100 -39
- package/common/rnexecutorch/models/speech_to_text/SpeechToText.h +43 -21
- package/common/rnexecutorch/models/speech_to_text/asr/ASR.cpp +307 -0
- package/common/rnexecutorch/models/speech_to_text/asr/ASR.h +61 -0
- package/common/rnexecutorch/models/speech_to_text/stream/HypothesisBuffer.cpp +80 -0
- package/common/rnexecutorch/models/speech_to_text/stream/HypothesisBuffer.h +27 -0
- package/common/rnexecutorch/models/speech_to_text/stream/OnlineASRProcessor.cpp +96 -0
- package/common/rnexecutorch/models/speech_to_text/stream/OnlineASRProcessor.h +36 -0
- package/common/rnexecutorch/models/speech_to_text/types/DecodingOptions.h +15 -0
- package/common/rnexecutorch/models/speech_to_text/types/GenerationResult.h +12 -0
- package/common/rnexecutorch/models/speech_to_text/types/ProcessResult.h +12 -0
- package/common/rnexecutorch/models/speech_to_text/types/Segment.h +14 -0
- package/common/rnexecutorch/models/speech_to_text/types/Word.h +13 -0
- package/lib/module/modules/natural_language_processing/SpeechToTextModule.js +75 -53
- package/lib/module/modules/natural_language_processing/SpeechToTextModule.js.map +1 -1
- package/lib/typescript/hooks/natural_language_processing/useSpeechToText.d.ts +5 -5
- package/lib/typescript/modules/natural_language_processing/SpeechToTextModule.d.ts +7 -12
- package/lib/typescript/modules/natural_language_processing/SpeechToTextModule.d.ts.map +1 -1
- package/lib/typescript/types/stt.d.ts +0 -9
- package/lib/typescript/types/stt.d.ts.map +1 -1
- package/package.json +1 -1
- package/react-native-executorch.podspec +2 -0
- package/src/modules/natural_language_processing/SpeechToTextModule.ts +118 -54
- package/src/types/stt.ts +0 -12
- package/common/rnexecutorch/models/EncoderDecoderBase.cpp +0 -21
- package/common/rnexecutorch/models/EncoderDecoderBase.h +0 -31
- package/common/rnexecutorch/models/speech_to_text/SpeechToTextStrategy.h +0 -27
- package/common/rnexecutorch/models/speech_to_text/WhisperStrategy.cpp +0 -50
- package/common/rnexecutorch/models/speech_to_text/WhisperStrategy.h +0 -25
- package/lib/module/utils/SpeechToTextModule/ASR.js +0 -191
- package/lib/module/utils/SpeechToTextModule/ASR.js.map +0 -1
- package/lib/module/utils/SpeechToTextModule/OnlineProcessor.js +0 -73
- package/lib/module/utils/SpeechToTextModule/OnlineProcessor.js.map +0 -1
- package/lib/module/utils/SpeechToTextModule/hypothesisBuffer.js +0 -56
- package/lib/module/utils/SpeechToTextModule/hypothesisBuffer.js.map +0 -1
- package/lib/module/utils/stt.js +0 -22
- package/lib/module/utils/stt.js.map +0 -1
- package/lib/typescript/utils/SpeechToTextModule/ASR.d.ts +0 -27
- package/lib/typescript/utils/SpeechToTextModule/ASR.d.ts.map +0 -1
- package/lib/typescript/utils/SpeechToTextModule/OnlineProcessor.d.ts +0 -23
- package/lib/typescript/utils/SpeechToTextModule/OnlineProcessor.d.ts.map +0 -1
- package/lib/typescript/utils/SpeechToTextModule/hypothesisBuffer.d.ts +0 -13
- package/lib/typescript/utils/SpeechToTextModule/hypothesisBuffer.d.ts.map +0 -1
- package/lib/typescript/utils/stt.d.ts +0 -2
- package/lib/typescript/utils/stt.d.ts.map +0 -1
- package/src/utils/SpeechToTextModule/ASR.ts +0 -303
- package/src/utils/SpeechToTextModule/OnlineProcessor.ts +0 -87
- package/src/utils/SpeechToTextModule/hypothesisBuffer.ts +0 -79
- package/src/utils/stt.ts +0 -28
|
@@ -1,25 +0,0 @@
|
|
|
1
|
-
#pragma once
|
|
2
|
-
|
|
3
|
-
#include "SpeechToTextStrategy.h"
|
|
4
|
-
#include <span>
|
|
5
|
-
#include <vector>
|
|
6
|
-
|
|
7
|
-
namespace rnexecutorch::models::speech_to_text {
|
|
8
|
-
|
|
9
|
-
class WhisperStrategy final : public SpeechToTextStrategy {
|
|
10
|
-
public:
|
|
11
|
-
TensorPtr prepareAudioInput(std::span<float> waveform) override;
|
|
12
|
-
|
|
13
|
-
TensorPtr prepareTokenInput(const std::vector<int64_t> &prevTokens) override;
|
|
14
|
-
|
|
15
|
-
std::string getDecoderMethod() const override { return "forward"; }
|
|
16
|
-
|
|
17
|
-
std::shared_ptr<OwningArrayBuffer> extractOutputToken(
|
|
18
|
-
const executorch::aten::Tensor &decoderOutputTensor) const override;
|
|
19
|
-
|
|
20
|
-
private:
|
|
21
|
-
std::vector<float> preprocessedData;
|
|
22
|
-
std::vector<int32_t> tokens32;
|
|
23
|
-
};
|
|
24
|
-
|
|
25
|
-
} // namespace rnexecutorch::models::speech_to_text
|
|
@@ -1,191 +0,0 @@
|
|
|
1
|
-
"use strict";
|
|
2
|
-
|
|
3
|
-
// NOTE: This will be implemented in C++
|
|
4
|
-
|
|
5
|
-
import { TokenizerModule } from '../../modules/natural_language_processing/TokenizerModule';
|
|
6
|
-
import { ResourceFetcher } from '../ResourceFetcher';
|
|
7
|
-
export class ASR {
|
|
8
|
-
tokenizerModule = new TokenizerModule();
|
|
9
|
-
timePrecision = 0.02; // Whisper timestamp precision
|
|
10
|
-
maxDecodeLength = 128;
|
|
11
|
-
chunkSize = 30; // 30 seconds
|
|
12
|
-
minChunkSamples = 1 * 16000; // 1 second
|
|
13
|
-
samplingRate = 16000;
|
|
14
|
-
async load(model, onDownloadProgressCallback) {
|
|
15
|
-
const tokenizerLoadPromise = this.tokenizerModule.load(model);
|
|
16
|
-
const encoderDecoderPromise = ResourceFetcher.fetch(onDownloadProgressCallback, model.encoderSource, model.decoderSource);
|
|
17
|
-
const [_, encoderDecoderResults] = await Promise.all([tokenizerLoadPromise, encoderDecoderPromise]);
|
|
18
|
-
const encoderSource = encoderDecoderResults?.[0];
|
|
19
|
-
const decoderSource = encoderDecoderResults?.[1];
|
|
20
|
-
if (!encoderSource || !decoderSource) {
|
|
21
|
-
throw new Error('Download interrupted.');
|
|
22
|
-
}
|
|
23
|
-
this.nativeModule = await global.loadSpeechToText(encoderSource, decoderSource, 'whisper');
|
|
24
|
-
this.startOfTranscriptToken = await this.tokenizerModule.tokenToId('<|startoftranscript|>');
|
|
25
|
-
this.endOfTextToken = await this.tokenizerModule.tokenToId('<|endoftext|>');
|
|
26
|
-
this.timestampBeginToken = await this.tokenizerModule.tokenToId('<|0.00|>');
|
|
27
|
-
}
|
|
28
|
-
async getInitialSequence(options) {
|
|
29
|
-
const initialSequence = [this.startOfTranscriptToken];
|
|
30
|
-
if (options.language) {
|
|
31
|
-
const languageToken = await this.tokenizerModule.tokenToId(`<|${options.language}|>`);
|
|
32
|
-
const taskToken = await this.tokenizerModule.tokenToId('<|transcribe|>');
|
|
33
|
-
initialSequence.push(languageToken);
|
|
34
|
-
initialSequence.push(taskToken);
|
|
35
|
-
}
|
|
36
|
-
initialSequence.push(this.timestampBeginToken);
|
|
37
|
-
return initialSequence;
|
|
38
|
-
}
|
|
39
|
-
async generate(audio, temperature, options) {
|
|
40
|
-
await this.encode(new Float32Array(audio));
|
|
41
|
-
const initialSequence = await this.getInitialSequence(options);
|
|
42
|
-
const sequencesIds = [...initialSequence];
|
|
43
|
-
const scores = [];
|
|
44
|
-
while (sequencesIds.length <= this.maxDecodeLength) {
|
|
45
|
-
const logits = this.softmaxWithTemperature(Array.from(await this.decode(sequencesIds)), temperature === 0 ? 1 : temperature);
|
|
46
|
-
const nextTokenId = temperature === 0 ? logits.indexOf(Math.max(...logits)) : this.sampleFromDistribution(logits);
|
|
47
|
-
const nextTokenProb = logits[nextTokenId];
|
|
48
|
-
sequencesIds.push(nextTokenId);
|
|
49
|
-
scores.push(nextTokenProb);
|
|
50
|
-
if (nextTokenId === this.endOfTextToken) {
|
|
51
|
-
break;
|
|
52
|
-
}
|
|
53
|
-
}
|
|
54
|
-
return {
|
|
55
|
-
sequencesIds: sequencesIds.slice(initialSequence.length),
|
|
56
|
-
scores: scores.slice(initialSequence.length)
|
|
57
|
-
};
|
|
58
|
-
}
|
|
59
|
-
softmaxWithTemperature(logits, temperature = 1.0) {
|
|
60
|
-
const max = Math.max(...logits);
|
|
61
|
-
const exps = logits.map(logit => Math.exp((logit - max) / temperature));
|
|
62
|
-
const sum = exps.reduce((a, b) => a + b, 0);
|
|
63
|
-
return exps.map(exp => exp / sum);
|
|
64
|
-
}
|
|
65
|
-
sampleFromDistribution(probs) {
|
|
66
|
-
const r = Math.random();
|
|
67
|
-
let cumulative = 0;
|
|
68
|
-
for (let i = 0; i < probs.length; i++) {
|
|
69
|
-
cumulative += probs[i];
|
|
70
|
-
if (r < cumulative) {
|
|
71
|
-
return i;
|
|
72
|
-
}
|
|
73
|
-
}
|
|
74
|
-
return probs.length - 1;
|
|
75
|
-
}
|
|
76
|
-
async generateWithFallback(audio, options) {
|
|
77
|
-
const temperatures = [0.0, 0.2, 0.4, 0.6, 0.8, 1.0];
|
|
78
|
-
let generatedTokens = [];
|
|
79
|
-
for (const temperature of temperatures) {
|
|
80
|
-
const result = await this.generate(audio, temperature, options);
|
|
81
|
-
const tokens = result.sequencesIds;
|
|
82
|
-
const scores = result.scores;
|
|
83
|
-
const seqLen = tokens.length;
|
|
84
|
-
const cumLogProb = scores.reduce((acc, score) => acc + Math.log(score), 0);
|
|
85
|
-
const avgLogProb = cumLogProb / seqLen;
|
|
86
|
-
if (avgLogProb >= -1.0) {
|
|
87
|
-
generatedTokens = tokens;
|
|
88
|
-
break;
|
|
89
|
-
}
|
|
90
|
-
}
|
|
91
|
-
return this.calculateWordLevelTimestamps(generatedTokens, audio);
|
|
92
|
-
}
|
|
93
|
-
async calculateWordLevelTimestamps(generatedTokens, audio) {
|
|
94
|
-
const segments = [];
|
|
95
|
-
let tokens = [];
|
|
96
|
-
let prevTimestamp = this.timestampBeginToken;
|
|
97
|
-
for (let i = 0; i < generatedTokens.length; i++) {
|
|
98
|
-
if (generatedTokens[i] < this.timestampBeginToken) {
|
|
99
|
-
tokens.push(generatedTokens[i]);
|
|
100
|
-
}
|
|
101
|
-
if (i > 0 && generatedTokens[i - 1] >= this.timestampBeginToken && generatedTokens[i] >= this.timestampBeginToken) {
|
|
102
|
-
const start = prevTimestamp;
|
|
103
|
-
const end = generatedTokens[i - 1];
|
|
104
|
-
const wordObjects = await this.estimateWordTimestampsLinear(tokens, start, end);
|
|
105
|
-
segments.push({
|
|
106
|
-
words: wordObjects
|
|
107
|
-
});
|
|
108
|
-
tokens = [];
|
|
109
|
-
prevTimestamp = generatedTokens[i];
|
|
110
|
-
}
|
|
111
|
-
}
|
|
112
|
-
const start = prevTimestamp;
|
|
113
|
-
const end = generatedTokens.at(-2);
|
|
114
|
-
const wordObjects = await this.estimateWordTimestampsLinear(tokens, start, end);
|
|
115
|
-
segments.push({
|
|
116
|
-
words: wordObjects
|
|
117
|
-
});
|
|
118
|
-
const scalingFactor = audio.length / this.samplingRate / ((end - this.timestampBeginToken) * this.timePrecision);
|
|
119
|
-
if (scalingFactor < 1) {
|
|
120
|
-
for (const segment of segments) {
|
|
121
|
-
for (const word of segment.words) {
|
|
122
|
-
word.start *= scalingFactor;
|
|
123
|
-
word.end *= scalingFactor;
|
|
124
|
-
}
|
|
125
|
-
}
|
|
126
|
-
}
|
|
127
|
-
return segments;
|
|
128
|
-
}
|
|
129
|
-
async estimateWordTimestampsLinear(tokens, timestampStart, timestampEnd) {
|
|
130
|
-
const duration = (timestampEnd - timestampStart) * this.timePrecision;
|
|
131
|
-
const segmentText = (await this.tokenizerModule.decode(tokens)).trim();
|
|
132
|
-
const words = segmentText.split(' ').map(w => ` ${w}`);
|
|
133
|
-
const numOfCharacters = words.reduce((acc, word) => acc + word.length, 0);
|
|
134
|
-
const timePerCharacter = duration / numOfCharacters;
|
|
135
|
-
const wordObjects = [];
|
|
136
|
-
const startTimeOffset = (timestampStart - this.timestampBeginToken) * this.timePrecision;
|
|
137
|
-
let prevCharNum = 0;
|
|
138
|
-
for (let j = 0; j < words.length; j++) {
|
|
139
|
-
const word = words[j];
|
|
140
|
-
const start = startTimeOffset + prevCharNum * timePerCharacter;
|
|
141
|
-
const end = start + timePerCharacter * word.length;
|
|
142
|
-
wordObjects.push({
|
|
143
|
-
word,
|
|
144
|
-
start,
|
|
145
|
-
end
|
|
146
|
-
});
|
|
147
|
-
prevCharNum += word.length;
|
|
148
|
-
}
|
|
149
|
-
return wordObjects;
|
|
150
|
-
}
|
|
151
|
-
async transcribe(audio, options) {
|
|
152
|
-
let seek = 0;
|
|
153
|
-
const allSegments = [];
|
|
154
|
-
while (seek * this.samplingRate < audio.length) {
|
|
155
|
-
const chunk = audio.slice(seek * this.samplingRate, (seek + this.chunkSize) * this.samplingRate);
|
|
156
|
-
if (chunk.length < this.minChunkSamples) {
|
|
157
|
-
return allSegments;
|
|
158
|
-
}
|
|
159
|
-
const segments = await this.generateWithFallback(chunk, options);
|
|
160
|
-
for (const segment of segments) {
|
|
161
|
-
for (const word of segment.words) {
|
|
162
|
-
word.start += seek;
|
|
163
|
-
word.end += seek;
|
|
164
|
-
}
|
|
165
|
-
}
|
|
166
|
-
allSegments.push(...segments);
|
|
167
|
-
const lastTimeStamp = segments.at(-1).words.at(-1).end;
|
|
168
|
-
seek = lastTimeStamp;
|
|
169
|
-
}
|
|
170
|
-
return allSegments;
|
|
171
|
-
}
|
|
172
|
-
tsWords(segments) {
|
|
173
|
-
const o = [];
|
|
174
|
-
for (const segment of segments) {
|
|
175
|
-
for (const word of segment.words) {
|
|
176
|
-
o.push([word.start, word.end, word.word]);
|
|
177
|
-
}
|
|
178
|
-
}
|
|
179
|
-
return o;
|
|
180
|
-
}
|
|
181
|
-
segmentsEndTs(res) {
|
|
182
|
-
return res.map(segment => segment.words.at(-1).end);
|
|
183
|
-
}
|
|
184
|
-
async encode(waveform) {
|
|
185
|
-
await this.nativeModule.encode(waveform);
|
|
186
|
-
}
|
|
187
|
-
async decode(tokens) {
|
|
188
|
-
return new Float32Array(await this.nativeModule.decode(tokens));
|
|
189
|
-
}
|
|
190
|
-
}
|
|
191
|
-
//# sourceMappingURL=ASR.js.map
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"names":["TokenizerModule","ResourceFetcher","ASR","tokenizerModule","timePrecision","maxDecodeLength","chunkSize","minChunkSamples","samplingRate","load","model","onDownloadProgressCallback","tokenizerLoadPromise","encoderDecoderPromise","fetch","encoderSource","decoderSource","_","encoderDecoderResults","Promise","all","Error","nativeModule","global","loadSpeechToText","startOfTranscriptToken","tokenToId","endOfTextToken","timestampBeginToken","getInitialSequence","options","initialSequence","language","languageToken","taskToken","push","generate","audio","temperature","encode","Float32Array","sequencesIds","scores","length","logits","softmaxWithTemperature","Array","from","decode","nextTokenId","indexOf","Math","max","sampleFromDistribution","nextTokenProb","slice","exps","map","logit","exp","sum","reduce","a","b","probs","r","random","cumulative","i","generateWithFallback","temperatures","generatedTokens","result","tokens","seqLen","cumLogProb","acc","score","log","avgLogProb","calculateWordLevelTimestamps","segments","prevTimestamp","start","end","wordObjects","estimateWordTimestampsLinear","words","at","scalingFactor","segment","word","timestampStart","timestampEnd","duration","segmentText","trim","split","w","numOfCharacters","timePerCharacter","startTimeOffset","prevCharNum","j","transcribe","seek","allSegments","chunk","lastTimeStamp","tsWords","o","segmentsEndTs","res","waveform"],"sourceRoot":"../../../../src","sources":["utils/SpeechToTextModule/ASR.ts"],"mappings":";;AAAA;;AAEA,SAASA,eAAe,QAAQ,2DAA2D;AAQ3F,SAASC,eAAe,QAAQ,oBAAoB;AAEpD,OAAO,MAAMC,GAAG,CAAC;EAEPC,eAAe,GAAoB,IAAIH,eAAe,CAAC,CAAC;EAExDI,aAAa,GAAW,IAAI,CAAC,CAAC;EAC9BC,eAAe,GAAW,GAAG;EAC7BC,SAAS,GAAW,EAAE,CAAC,CAAC;EACxBC,eAAe,GAAW,CAAC,GAAG,KAAK,CAAC,CAAC;EACrCC,YAAY,GAAW,KAAK;EAMpC,MAAaC,IAAIA,CACfC,KAA8B,EAC9BC,0BAAsD,EACtD;IACA,MAAMC,oBAAoB,GAAG,IAAI,CAACT,eAAe,CAACM,IAAI,CAACC,KAAK,CAAC;IAC7D,MAAMG,qBAAqB,GAAGZ,eAAe,CAACa,KAAK,CACjDH,0BAA0B,EAC1BD,KAAK,CAACK,aAAa,EACnBL,KAAK,CAACM,aACR,CAAC;IACD,MAAM,CAACC,CAAC,EAAEC,qBAAqB,CAAC,GAAG,MAAMC,OAAO,CAACC,GAAG,CAAC,CACnDR,oBAAoB,EACpBC,qBAAqB,CACtB,CAAC;IACF,MAAME,aAAa,GAAGG,qBAAqB,GAAG,CAAC,CAAC;IAChD,MAAMF,aAAa,GAAGE,qBAAqB,GAAG,CAAC,CAAC;IAChD,IAAI,CAACH,aAAa,IAAI,CAACC,aAAa,EAAE;MACpC,MAAM,IAAIK,KAAK,CAAC,uBAAuB,CAAC;IAC1C;IACA,IAAI,CAACC,YAAY,GAAG,MAAMC,MAAM,CAACC,gBAAgB,CAC/CT,aAAa,EACbC,aAAa,EACb,SACF,CAAC;IAED,IAAI,CAACS,sBAAsB,GAAG,MAAM,IAAI,CAACtB,eAAe,CAACuB,SAAS,CAChE,uBACF,CAAC;IACD,IAAI,CAACC,cAAc,GAAG,MAAM,IAAI,CAACxB,eAAe,CAACuB,SAAS,CAAC,eAAe,CAAC;IAC3E,IAAI,CAACE,mBAAmB,GAAG,MAAM,IAAI,CAACzB,eAAe,CAACuB,SAAS,CAAC,UAAU,CAAC;EAC7E;EAEA,MAAcG,kBAAkBA,CAC9BC,OAAwB,EACL;IACnB,MAAMC,eAAyB,GAAG,CAAC,IAAI,CAACN,sBAAsB,CAAC;IAC/D,IAAIK,OAAO,CAACE,QAAQ,EAAE;MACpB,MAAMC,aAAa,GAAG,MAAM,IAAI,CAAC9B,eAAe,CAACuB,SAAS,CACxD,KAAKI,OAAO,CAACE,QAAQ,IACvB,CAAC;MACD,MAAME,SAAS,GAAG,MAAM,IAAI,CAAC/B,eAAe,CAACuB,SAAS,CAAC,gBAAgB,CAAC;MACxEK,eAAe,CAACI,IAAI,CAACF,aAAa,CAAC;MACnCF,eAAe,CAACI,IAAI,CAACD,SAAS,CAAC;IACjC;IACAH,eAAe,CAACI,IAAI,CAAC,IAAI,CAACP,mBAAmB,CAAC;IAC9C,OAAOG,eAAe;EACxB;EAEA,MAAcK,QAAQA,CACpBC,KAAe,EACfC,WAAmB,EACnBR,OAAwB,EAIvB;IACD,MAAM,IAAI,CAACS,MAAM,CAAC,IAAIC,YAAY,CAACH,KAAK,CAAC,CAAC;IAC1C,MAAMN,eAAe,GAAG,MAAM,IAAI,CAACF,kBAAkB,CAACC,OAAO,CAAC;IAC9D,MAAMW,YAAY,GAAG,CAAC,GAAGV,eAAe,CAAC;IACzC,MAAMW,MAAgB,GAAG,EAAE;IAE3B,OAAOD,YAAY,CAACE,MAAM,IAAI,IAAI,CAACtC,eAAe,EAAE;MAClD,MAAMuC,MAAM,GAAG,IAAI,CAACC,sBAAsB,CACxCC,KAAK,CAACC,IAAI,CAAC,MAAM,IAAI,CAACC,MAAM,CAACP,YAAY,CAAC,CAAC,EAC3CH,WAAW,KAAK,CAAC,GAAG,CAAC,GAAGA,WAC1B,CAAC;MACD,MAAMW,WAAW,GACfX,WAAW,KAAK,CAAC,GACbM,MAAM,CAACM,OAAO,CAACC,IAAI,CAACC,GAAG,CAAC,GAAGR,MAAM,CAAC,CAAC,GACnC,IAAI,CAACS,sBAAsB,CAACT,MAAM,CAAC;MACzC,MAAMU,aAAa,GAAGV,MAAM,CAACK,WAAW,CAAE;MAC1CR,YAAY,CAACN,IAAI,CAACc,WAAW,CAAC;MAC9BP,MAAM,CAACP,IAAI,CAACmB,aAAa,CAAC;MAC1B,IAAIL,WAAW,KAAK,IAAI,CAACtB,cAAc,EAAE;QACvC;MACF;IACF;IAEA,OAAO;MACLc,YAAY,EAAEA,YAAY,CAACc,KAAK,CAACxB,eAAe,CAACY,MAAM,CAAC;MACxDD,MAAM,EAAEA,MAAM,CAACa,KAAK,CAACxB,eAAe,CAACY,MAAM;IAC7C,CAAC;EACH;EAEQE,sBAAsBA,CAACD,MAAgB,EAAEN,WAAW,GAAG,GAAG,EAAE;IAClE,MAAMc,GAAG,GAAGD,IAAI,CAACC,GAAG,CAAC,GAAGR,MAAM,CAAC;IAC/B,MAAMY,IAAI,GAAGZ,MAAM,CAACa,GAAG,CAAEC,KAAK,IAAKP,IAAI,CAACQ,GAAG,CAAC,CAACD,KAAK,GAAGN,GAAG,IAAId,WAAW,CAAC,CAAC;IACzE,MAAMsB,GAAG,GAAGJ,IAAI,CAACK,MAAM,CAAC,CAACC,CAAC,EAAEC,CAAC,KAAKD,CAAC,GAAGC,CAAC,EAAE,CAAC,CAAC;IAC3C,OAAOP,IAAI,CAACC,GAAG,CAAEE,GAAG,IAAKA,GAAG,GAAGC,GAAG,CAAC;EACrC;EAEQP,sBAAsBA,CAACW,KAAe,EAAU;IACtD,MAAMC,CAAC,GAAGd,IAAI,CAACe,MAAM,CAAC,CAAC;IACvB,IAAIC,UAAU,GAAG,CAAC;IAClB,KAAK,IAAIC,CAAC,GAAG,CAAC,EAAEA,CAAC,GAAGJ,KAAK,CAACrB,MAAM,EAAEyB,CAAC,EAAE,EAAE;MACrCD,UAAU,IAAIH,KAAK,CAACI,CAAC,CAAE;MACvB,IAAIH,CAAC,GAAGE,UAAU,EAAE;QAClB,OAAOC,CAAC;MACV;IACF;IACA,OAAOJ,KAAK,CAACrB,MAAM,GAAG,CAAC;EACzB;EAEA,MAAc0B,oBAAoBA,CAChChC,KAAe,EACfP,OAAwB,EACxB;IACA,MAAMwC,YAAY,GAAG,CAAC,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,CAAC;IACnD,IAAIC,eAAyB,GAAG,EAAE;IAElC,KAAK,MAAMjC,WAAW,IAAIgC,YAAY,EAAE;MACtC,MAAME,MAAM,GAAG,MAAM,IAAI,CAACpC,QAAQ,CAACC,KAAK,EAAEC,WAAW,EAAER,OAAO,CAAC;MAC/D,MAAM2C,MAAM,GAAGD,MAAM,CAAC/B,YAAY;MAClC,MAAMC,MAAM,GAAG8B,MAAM,CAAC9B,MAAM;MAE5B,MAAMgC,MAAM,GAAGD,MAAM,CAAC9B,MAAM;MAC5B,MAAMgC,UAAU,GAAGjC,MAAM,CAACmB,MAAM,CAC9B,CAACe,GAAG,EAAEC,KAAK,KAAKD,GAAG,GAAGzB,IAAI,CAAC2B,GAAG,CAACD,KAAK,CAAC,EACrC,CACF,CAAC;MACD,MAAME,UAAU,GAAGJ,UAAU,GAAGD,MAAM;MAEtC,IAAIK,UAAU,IAAI,CAAC,GAAG,EAAE;QACtBR,eAAe,GAAGE,MAAM;QACxB;MACF;IACF;IAEA,OAAO,IAAI,CAACO,4BAA4B,CAACT,eAAe,EAAElC,KAAK,CAAC;EAClE;EAEA,MAAc2C,4BAA4BA,CACxCT,eAAyB,EACzBlC,KAAe,EACK;IACpB,MAAM4C,QAAmB,GAAG,EAAE;IAE9B,IAAIR,MAAgB,GAAG,EAAE;IACzB,IAAIS,aAAa,GAAG,IAAI,CAACtD,mBAAmB;IAC5C,KAAK,IAAIwC,CAAC,GAAG,CAAC,EAAEA,CAAC,GAAGG,eAAe,CAAC5B,MAAM,EAAEyB,CAAC,EAAE,EAAE;MAC/C,IAAIG,eAAe,CAACH,CAAC,CAAC,GAAI,IAAI,CAACxC,mBAAmB,EAAE;QAClD6C,MAAM,CAACtC,IAAI,CAACoC,eAAe,CAACH,CAAC,CAAE,CAAC;MAClC;MAEA,IACEA,CAAC,GAAG,CAAC,IACLG,eAAe,CAACH,CAAC,GAAG,CAAC,CAAC,IAAK,IAAI,CAACxC,mBAAmB,IACnD2C,eAAe,CAACH,CAAC,CAAC,IAAK,IAAI,CAACxC,mBAAmB,EAC/C;QACA,MAAMuD,KAAK,GAAGD,aAAa;QAC3B,MAAME,GAAG,GAAGb,eAAe,CAACH,CAAC,GAAG,CAAC,CAAE;QACnC,MAAMiB,WAAW,GAAG,MAAM,IAAI,CAACC,4BAA4B,CACzDb,MAAM,EACNU,KAAK,EACLC,GACF,CAAC;QACDH,QAAQ,CAAC9C,IAAI,CAAC;UACZoD,KAAK,EAAEF;QACT,CAAC,CAAC;QACFZ,MAAM,GAAG,EAAE;QACXS,aAAa,GAAGX,eAAe,CAACH,CAAC,CAAE;MACrC;IACF;IAEA,MAAMe,KAAK,GAAGD,aAAa;IAC3B,MAAME,GAAG,GAAGb,eAAe,CAACiB,EAAE,CAAC,CAAC,CAAC,CAAE;IACnC,MAAMH,WAAW,GAAG,MAAM,IAAI,CAACC,4BAA4B,CACzDb,MAAM,EACNU,KAAK,EACLC,GACF,CAAC;IACDH,QAAQ,CAAC9C,IAAI,CAAC;MACZoD,KAAK,EAAEF;IACT,CAAC,CAAC;IAEF,MAAMI,aAAa,GACjBpD,KAAK,CAACM,MAAM,GACZ,IAAI,CAACnC,YAAY,IAChB,CAAC4E,GAAG,GAAG,IAAI,CAACxD,mBAAmB,IAAI,IAAI,CAACxB,aAAa,CAAC;IACzD,IAAIqF,aAAa,GAAG,CAAC,EAAE;MACrB,KAAK,MAAMC,OAAO,IAAIT,QAAQ,EAAE;QAC9B,KAAK,MAAMU,IAAI,IAAID,OAAO,CAACH,KAAK,EAAE;UAChCI,IAAI,CAACR,KAAK,IAAIM,aAAa;UAC3BE,IAAI,CAACP,GAAG,IAAIK,aAAa;QAC3B;MACF;IACF;IAEA,OAAOR,QAAQ;EACjB;EAEA,MAAcK,4BAA4BA,CACxCb,MAAgB,EAChBmB,cAAsB,EACtBC,YAAoB,EACG;IACvB,MAAMC,QAAQ,GAAG,CAACD,YAAY,GAAGD,cAAc,IAAI,IAAI,CAACxF,aAAa;IACrE,MAAM2F,WAAW,GAAG,CACjB,MAAM,IAAI,CAAC5F,eAAe,CAAC6C,MAAM,CAACyB,MAAM,CAAC,EAC1CuB,IAAI,CAAC,CAAC;IAER,MAAMT,KAAK,GAAGQ,WAAW,CAACE,KAAK,CAAC,GAAG,CAAC,CAACxC,GAAG,CAAEyC,CAAC,IAAK,IAAIA,CAAC,EAAE,CAAC;IACxD,MAAMC,eAAe,GAAGZ,KAAK,CAAC1B,MAAM,CAClC,CAACe,GAAW,EAAEe,IAAY,KAAKf,GAAG,GAAGe,IAAI,CAAChD,MAAM,EAChD,CACF,CAAC;IAED,MAAMyD,gBAAgB,GAAGN,QAAQ,GAAGK,eAAe;IAEnD,MAAMd,WAAyB,GAAG,EAAE;IACpC,MAAMgB,eAAe,GACnB,CAACT,cAAc,GAAG,IAAI,CAAChE,mBAAmB,IAAI,IAAI,CAACxB,aAAa;IAElE,IAAIkG,WAAW,GAAG,CAAC;IACnB,KAAK,IAAIC,CAAC,GAAG,CAAC,EAAEA,CAAC,GAAGhB,KAAK,CAAC5C,MAAM,EAAE4D,CAAC,EAAE,EAAE;MACrC,MAAMZ,IAAI,GAAGJ,KAAK,CAACgB,CAAC,CAAE;MACtB,MAAMpB,KAAK,GAAGkB,eAAe,GAAGC,WAAW,GAAGF,gBAAgB;MAC9D,MAAMhB,GAAG,GAAGD,KAAK,GAAGiB,gBAAgB,GAAGT,IAAI,CAAChD,MAAM;MAClD0C,WAAW,CAAClD,IAAI,CAAC;QAAEwD,IAAI;QAAER,KAAK;QAAEC;MAAI,CAAC,CAAC;MACtCkB,WAAW,IAAIX,IAAI,CAAChD,MAAM;IAC5B;IAEA,OAAO0C,WAAW;EACpB;EAEA,MAAamB,UAAUA,CACrBnE,KAAe,EACfP,OAAwB,EACJ;IACpB,IAAI2E,IAAI,GAAG,CAAC;IACZ,MAAMC,WAAsB,GAAG,EAAE;IAEjC,OAAOD,IAAI,GAAG,IAAI,CAACjG,YAAY,GAAG6B,KAAK,CAACM,MAAM,EAAE;MAC9C,MAAMgE,KAAK,GAAGtE,KAAK,CAACkB,KAAK,CACvBkD,IAAI,GAAG,IAAI,CAACjG,YAAY,EACxB,CAACiG,IAAI,GAAG,IAAI,CAACnG,SAAS,IAAI,IAAI,CAACE,YACjC,CAAC;MACD,IAAImG,KAAK,CAAChE,MAAM,GAAG,IAAI,CAACpC,eAAe,EAAE;QACvC,OAAOmG,WAAW;MACpB;MACA,MAAMzB,QAAQ,GAAG,MAAM,IAAI,CAACZ,oBAAoB,CAACsC,KAAK,EAAE7E,OAAO,CAAC;MAChE,KAAK,MAAM4D,OAAO,IAAIT,QAAQ,EAAE;QAC9B,KAAK,MAAMU,IAAI,IAAID,OAAO,CAACH,KAAK,EAAE;UAChCI,IAAI,CAACR,KAAK,IAAIsB,IAAI;UAClBd,IAAI,CAACP,GAAG,IAAIqB,IAAI;QAClB;MACF;MACAC,WAAW,CAACvE,IAAI,CAAC,GAAG8C,QAAQ,CAAC;MAC7B,MAAM2B,aAAa,GAAG3B,QAAQ,CAACO,EAAE,CAAC,CAAC,CAAC,CAAC,CAAED,KAAK,CAACC,EAAE,CAAC,CAAC,CAAC,CAAC,CAAEJ,GAAG;MACxDqB,IAAI,GAAGG,aAAa;IACtB;IAEA,OAAOF,WAAW;EACpB;EAEOG,OAAOA,CAAC5B,QAAmB,EAAe;IAC/C,MAAM6B,CAAc,GAAG,EAAE;IACzB,KAAK,MAAMpB,OAAO,IAAIT,QAAQ,EAAE;MAC9B,KAAK,MAAMU,IAAI,IAAID,OAAO,CAACH,KAAK,EAAE;QAChCuB,CAAC,CAAC3E,IAAI,CAAC,CAACwD,IAAI,CAACR,KAAK,EAAEQ,IAAI,CAACP,GAAG,EAAEO,IAAI,CAACA,IAAI,CAAC,CAAC;MAC3C;IACF;IACA,OAAOmB,CAAC;EACV;EAEOC,aAAaA,CAACC,GAAc,EAAE;IACnC,OAAOA,GAAG,CAACvD,GAAG,CAAEiC,OAAO,IAAKA,OAAO,CAACH,KAAK,CAACC,EAAE,CAAC,CAAC,CAAC,CAAC,CAAEJ,GAAG,CAAC;EACxD;EAEA,MAAa7C,MAAMA,CAAC0E,QAAsB,EAAiB;IACzD,MAAM,IAAI,CAAC3F,YAAY,CAACiB,MAAM,CAAC0E,QAAQ,CAAC;EAC1C;EAEA,MAAajE,MAAMA,CAACyB,MAAgB,EAAyB;IAC3D,OAAO,IAAIjC,YAAY,CAAC,MAAM,IAAI,CAAClB,YAAY,CAAC0B,MAAM,CAACyB,MAAM,CAAC,CAAC;EACjE;AACF","ignoreList":[]}
|
|
@@ -1,73 +0,0 @@
|
|
|
1
|
-
"use strict";
|
|
2
|
-
|
|
3
|
-
// NOTE: This will be implemented in C++
|
|
4
|
-
|
|
5
|
-
import { HypothesisBuffer } from './hypothesisBuffer';
|
|
6
|
-
export class OnlineASRProcessor {
|
|
7
|
-
samplingRate = 16000;
|
|
8
|
-
audioBuffer = [];
|
|
9
|
-
transcriptBuffer = new HypothesisBuffer();
|
|
10
|
-
bufferTimeOffset = 0;
|
|
11
|
-
committed = [];
|
|
12
|
-
constructor(asr) {
|
|
13
|
-
this.asr = asr;
|
|
14
|
-
}
|
|
15
|
-
insertAudioChunk(audio) {
|
|
16
|
-
this.audioBuffer.push(...audio);
|
|
17
|
-
}
|
|
18
|
-
async processIter(options) {
|
|
19
|
-
const res = await this.asr.transcribe(this.audioBuffer, options);
|
|
20
|
-
const tsw = this.asr.tsWords(res);
|
|
21
|
-
this.transcriptBuffer.insert(tsw, this.bufferTimeOffset);
|
|
22
|
-
const o = this.transcriptBuffer.flush();
|
|
23
|
-
this.committed.push(...o);
|
|
24
|
-
const s = 15;
|
|
25
|
-
if (this.audioBuffer.length / this.samplingRate > s) {
|
|
26
|
-
this.chunkCompletedSegment(res);
|
|
27
|
-
}
|
|
28
|
-
const committed = this.toFlush(o)[2];
|
|
29
|
-
const nonCommitted = this.transcriptBuffer.complete().map(x => x[2]).join('');
|
|
30
|
-
return {
|
|
31
|
-
committed,
|
|
32
|
-
nonCommitted
|
|
33
|
-
};
|
|
34
|
-
}
|
|
35
|
-
chunkCompletedSegment(res) {
|
|
36
|
-
if (this.committed.length === 0) {
|
|
37
|
-
return;
|
|
38
|
-
}
|
|
39
|
-
const ends = this.asr.segmentsEndTs(res);
|
|
40
|
-
const t = this.committed.at(-1)[1];
|
|
41
|
-
if (ends.length > 1) {
|
|
42
|
-
let e = ends.at(-2) + this.bufferTimeOffset;
|
|
43
|
-
while (ends.length > 2 && e > t) {
|
|
44
|
-
ends.pop();
|
|
45
|
-
e = ends.at(-2) + this.bufferTimeOffset;
|
|
46
|
-
}
|
|
47
|
-
if (e <= t) {
|
|
48
|
-
this.chunkAt(e);
|
|
49
|
-
}
|
|
50
|
-
}
|
|
51
|
-
}
|
|
52
|
-
chunkAt(time) {
|
|
53
|
-
this.transcriptBuffer.popCommitted(time);
|
|
54
|
-
const cutSeconds = time - this.bufferTimeOffset;
|
|
55
|
-
this.audioBuffer = this.audioBuffer.slice(Math.floor(cutSeconds * this.samplingRate));
|
|
56
|
-
this.bufferTimeOffset = time;
|
|
57
|
-
}
|
|
58
|
-
async finish() {
|
|
59
|
-
const o = this.transcriptBuffer.complete();
|
|
60
|
-
const f = this.toFlush(o);
|
|
61
|
-
this.bufferTimeOffset += this.audioBuffer.length / this.samplingRate;
|
|
62
|
-
return {
|
|
63
|
-
committed: f[2]
|
|
64
|
-
};
|
|
65
|
-
}
|
|
66
|
-
toFlush(words) {
|
|
67
|
-
const t = words.map(s => s[2]).join(' ');
|
|
68
|
-
const b = words.length === 0 ? null : words[0][0];
|
|
69
|
-
const e = words.length === 0 ? null : words.at(-1)[1];
|
|
70
|
-
return [b, e, t];
|
|
71
|
-
}
|
|
72
|
-
}
|
|
73
|
-
//# sourceMappingURL=OnlineProcessor.js.map
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"names":["HypothesisBuffer","OnlineASRProcessor","samplingRate","audioBuffer","transcriptBuffer","bufferTimeOffset","committed","constructor","asr","insertAudioChunk","audio","push","processIter","options","res","transcribe","tsw","tsWords","insert","o","flush","s","length","chunkCompletedSegment","toFlush","nonCommitted","complete","map","x","join","ends","segmentsEndTs","t","at","e","pop","chunkAt","time","popCommitted","cutSeconds","slice","Math","floor","finish","f","words","b"],"sourceRoot":"../../../../src","sources":["utils/SpeechToTextModule/OnlineProcessor.ts"],"mappings":";;AAAA;;AAIA,SAASA,gBAAgB,QAAQ,oBAAoB;AAErD,OAAO,MAAMC,kBAAkB,CAAC;EAGtBC,YAAY,GAAW,KAAK;EAC7BC,WAAW,GAAa,EAAE;EACzBC,gBAAgB,GAAqB,IAAIJ,gBAAgB,CAAC,CAAC;EAC3DK,gBAAgB,GAAW,CAAC;EAC5BC,SAAS,GAAgB,EAAE;EAEnCC,WAAWA,CAACC,GAAQ,EAAE;IACpB,IAAI,CAACA,GAAG,GAAGA,GAAG;EAChB;EAEOC,gBAAgBA,CAACC,KAAe,EAAE;IACvC,IAAI,CAACP,WAAW,CAACQ,IAAI,CAAC,GAAGD,KAAK,CAAC;EACjC;EAEA,MAAaE,WAAWA,CAACC,OAAwB,EAAE;IACjD,MAAMC,GAAG,GAAG,MAAM,IAAI,CAACN,GAAG,CAACO,UAAU,CAAC,IAAI,CAACZ,WAAW,EAAEU,OAAO,CAAC;IAChE,MAAMG,GAAG,GAAG,IAAI,CAACR,GAAG,CAACS,OAAO,CAACH,GAAG,CAAC;IACjC,IAAI,CAACV,gBAAgB,CAACc,MAAM,CAACF,GAAG,EAAE,IAAI,CAACX,gBAAgB,CAAC;IACxD,MAAMc,CAAC,GAAG,IAAI,CAACf,gBAAgB,CAACgB,KAAK,CAAC,CAAC;IACvC,IAAI,CAACd,SAAS,CAACK,IAAI,CAAC,GAAGQ,CAAC,CAAC;IAEzB,MAAME,CAAC,GAAG,EAAE;IACZ,IAAI,IAAI,CAAClB,WAAW,CAACmB,MAAM,GAAG,IAAI,CAACpB,YAAY,GAAGmB,CAAC,EAAE;MACnD,IAAI,CAACE,qBAAqB,CAACT,GAAG,CAAC;IACjC;IAEA,MAAMR,SAAS,GAAG,IAAI,CAACkB,OAAO,CAACL,CAAC,CAAC,CAAC,CAAC,CAAC;IACpC,MAAMM,YAAY,GAAG,IAAI,CAACrB,gBAAgB,CACvCsB,QAAQ,CAAC,CAAC,CACVC,GAAG,CAAEC,CAAC,IAAKA,CAAC,CAAC,CAAC,CAAC,CAAC,CAChBC,IAAI,CAAC,EAAE,CAAC;IACX,OAAO;MAAEvB,SAAS;MAAEmB;IAAa,CAAC;EACpC;EAEQF,qBAAqBA,CAACT,GAAc,EAAE;IAC5C,IAAI,IAAI,CAACR,SAAS,CAACgB,MAAM,KAAK,CAAC,EAAE;MAC/B;IACF;IAEA,MAAMQ,IAAI,GAAG,IAAI,CAACtB,GAAG,CAACuB,aAAa,CAACjB,GAAG,CAAC;IACxC,MAAMkB,CAAC,GAAG,IAAI,CAAC1B,SAAS,CAAC2B,EAAE,CAAC,CAAC,CAAC,CAAC,CAAE,CAAC,CAAC;IAEnC,IAAIH,IAAI,CAACR,MAAM,GAAG,CAAC,EAAE;MACnB,IAAIY,CAAC,GAAGJ,IAAI,CAACG,EAAE,CAAC,CAAC,CAAC,CAAC,GAAI,IAAI,CAAC5B,gBAAgB;MAC5C,OAAOyB,IAAI,CAACR,MAAM,GAAG,CAAC,IAAIY,CAAC,GAAGF,CAAC,EAAE;QAC/BF,IAAI,CAACK,GAAG,CAAC,CAAC;QACVD,CAAC,GAAGJ,IAAI,CAACG,EAAE,CAAC,CAAC,CAAC,CAAC,GAAI,IAAI,CAAC5B,gBAAgB;MAC1C;MAEA,IAAI6B,CAAC,IAAIF,CAAC,EAAE;QACV,IAAI,CAACI,OAAO,CAACF,CAAC,CAAC;MACjB;IACF;EACF;EAEQE,OAAOA,CAACC,IAAY,EAAE;IAC5B,IAAI,CAACjC,gBAAgB,CAACkC,YAAY,CAACD,IAAI,CAAC;IACxC,MAAME,UAAU,GAAGF,IAAI,GAAG,IAAI,CAAChC,gBAAgB;IAC/C,IAAI,CAACF,WAAW,GAAG,IAAI,CAACA,WAAW,CAACqC,KAAK,CACvCC,IAAI,CAACC,KAAK,CAACH,UAAU,GAAG,IAAI,CAACrC,YAAY,CAC3C,CAAC;IACD,IAAI,CAACG,gBAAgB,GAAGgC,IAAI;EAC9B;EAEA,MAAaM,MAAMA,CAAA,EAAG;IACpB,MAAMxB,CAAC,GAAG,IAAI,CAACf,gBAAgB,CAACsB,QAAQ,CAAC,CAAC;IAC1C,MAAMkB,CAAC,GAAG,IAAI,CAACpB,OAAO,CAACL,CAAC,CAAC;IACzB,IAAI,CAACd,gBAAgB,IAAI,IAAI,CAACF,WAAW,CAACmB,MAAM,GAAG,IAAI,CAACpB,YAAY;IACpE,OAAO;MAAEI,SAAS,EAAEsC,CAAC,CAAC,CAAC;IAAE,CAAC;EAC5B;EAEQpB,OAAOA,CAACqB,KAAkB,EAA0C;IAC1E,MAAMb,CAAC,GAAGa,KAAK,CAAClB,GAAG,CAAEN,CAAC,IAAKA,CAAC,CAAC,CAAC,CAAC,CAAC,CAACQ,IAAI,CAAC,GAAG,CAAC;IAC1C,MAAMiB,CAAC,GAAGD,KAAK,CAACvB,MAAM,KAAK,CAAC,GAAG,IAAI,GAAGuB,KAAK,CAAC,CAAC,CAAC,CAAE,CAAC,CAAC;IAClD,MAAMX,CAAC,GAAGW,KAAK,CAACvB,MAAM,KAAK,CAAC,GAAG,IAAI,GAAGuB,KAAK,CAACZ,EAAE,CAAC,CAAC,CAAC,CAAC,CAAE,CAAC,CAAC;IACtD,OAAO,CAACa,CAAC,EAAEZ,CAAC,EAAEF,CAAC,CAAC;EAClB;AACF","ignoreList":[]}
|
|
@@ -1,56 +0,0 @@
|
|
|
1
|
-
"use strict";
|
|
2
|
-
|
|
3
|
-
// NOTE: This will be implemented in C++
|
|
4
|
-
|
|
5
|
-
export class HypothesisBuffer {
|
|
6
|
-
committedInBuffer = [];
|
|
7
|
-
buffer = [];
|
|
8
|
-
new = [];
|
|
9
|
-
lastCommittedTime = 0;
|
|
10
|
-
lastCommittedWord = null;
|
|
11
|
-
insert(newWords, offset) {
|
|
12
|
-
const newWordsOffset = newWords.map(([a, b, t]) => [a + offset, b + offset, t]);
|
|
13
|
-
this.new = newWordsOffset.filter(([a, _b, _t]) => a > this.lastCommittedTime - 0.5);
|
|
14
|
-
if (this.new.length > 0) {
|
|
15
|
-
const [a, _b, _t] = this.new[0];
|
|
16
|
-
if (Math.abs(a - this.lastCommittedTime) < 1 && this.committedInBuffer.length > 0) {
|
|
17
|
-
const cn = this.committedInBuffer.length;
|
|
18
|
-
const nn = this.new.length;
|
|
19
|
-
for (let i = 1; i <= Math.min(cn, nn, 5); i++) {
|
|
20
|
-
const c = this.committedInBuffer.slice(-i).map(w => w[2]).join(' ');
|
|
21
|
-
const tail = this.new.slice(0, i).map(w => w[2]).join(' ');
|
|
22
|
-
if (c === tail) {
|
|
23
|
-
for (let j = 0; j < i; j++) {
|
|
24
|
-
this.new.shift();
|
|
25
|
-
}
|
|
26
|
-
break;
|
|
27
|
-
}
|
|
28
|
-
}
|
|
29
|
-
}
|
|
30
|
-
}
|
|
31
|
-
}
|
|
32
|
-
flush() {
|
|
33
|
-
const commit = [];
|
|
34
|
-
while (this.new.length > 0 && this.buffer.length > 0) {
|
|
35
|
-
if (this.new[0][2] !== this.buffer[0][2]) {
|
|
36
|
-
break;
|
|
37
|
-
}
|
|
38
|
-
commit.push(this.new[0]);
|
|
39
|
-
this.lastCommittedWord = this.new[0][2];
|
|
40
|
-
this.lastCommittedTime = this.new[0][1];
|
|
41
|
-
this.buffer.shift();
|
|
42
|
-
this.new.shift();
|
|
43
|
-
}
|
|
44
|
-
this.buffer = this.new;
|
|
45
|
-
this.new = [];
|
|
46
|
-
this.committedInBuffer.push(...commit);
|
|
47
|
-
return commit;
|
|
48
|
-
}
|
|
49
|
-
popCommitted(time) {
|
|
50
|
-
this.committedInBuffer = this.committedInBuffer.filter(([_a, b, _t]) => b > time);
|
|
51
|
-
}
|
|
52
|
-
complete() {
|
|
53
|
-
return this.buffer;
|
|
54
|
-
}
|
|
55
|
-
}
|
|
56
|
-
//# sourceMappingURL=hypothesisBuffer.js.map
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"names":["HypothesisBuffer","committedInBuffer","buffer","new","lastCommittedTime","lastCommittedWord","insert","newWords","offset","newWordsOffset","map","a","b","t","filter","_b","_t","length","Math","abs","cn","nn","i","min","c","slice","w","join","tail","j","shift","flush","commit","push","popCommitted","time","_a","complete"],"sourceRoot":"../../../../src","sources":["utils/SpeechToTextModule/hypothesisBuffer.ts"],"mappings":";;AAAA;;AAIA,OAAO,MAAMA,gBAAgB,CAAC;EACpBC,iBAAiB,GAAgB,EAAE;EACnCC,MAAM,GAAgB,EAAE;EACxBC,GAAG,GAAgB,EAAE;EAErBC,iBAAiB,GAAW,CAAC;EAC9BC,iBAAiB,GAAkB,IAAI;EAEvCC,MAAMA,CAACC,QAAqB,EAAEC,MAAc,EAAE;IACnD,MAAMC,cAA2B,GAAGF,QAAQ,CAACG,GAAG,CAAC,CAAC,CAACC,CAAC,EAAEC,CAAC,EAAEC,CAAC,CAAC,KAAK,CAC9DF,CAAC,GAAGH,MAAM,EACVI,CAAC,GAAGJ,MAAM,EACVK,CAAC,CACF,CAAC;IACF,IAAI,CAACV,GAAG,GAAGM,cAAc,CAACK,MAAM,CAC9B,CAAC,CAACH,CAAC,EAAEI,EAAE,EAAEC,EAAE,CAAC,KAAKL,CAAC,GAAG,IAAI,CAACP,iBAAiB,GAAG,GAChD,CAAC;IAED,IAAI,IAAI,CAACD,GAAG,CAACc,MAAM,GAAG,CAAC,EAAE;MACvB,MAAM,CAACN,CAAC,EAAEI,EAAE,EAAEC,EAAE,CAAC,GAAG,IAAI,CAACb,GAAG,CAAC,CAAC,CAAE;MAChC,IACEe,IAAI,CAACC,GAAG,CAACR,CAAC,GAAG,IAAI,CAACP,iBAAiB,CAAC,GAAG,CAAC,IACxC,IAAI,CAACH,iBAAiB,CAACgB,MAAM,GAAG,CAAC,EACjC;QACA,MAAMG,EAAE,GAAG,IAAI,CAACnB,iBAAiB,CAACgB,MAAM;QACxC,MAAMI,EAAE,GAAG,IAAI,CAAClB,GAAG,CAACc,MAAM;QAE1B,KAAK,IAAIK,CAAC,GAAG,CAAC,EAAEA,CAAC,IAAIJ,IAAI,CAACK,GAAG,CAACH,EAAE,EAAEC,EAAE,EAAE,CAAC,CAAC,EAAEC,CAAC,EAAE,EAAE;UAC7C,MAAME,CAAC,GAAG,IAAI,CAACvB,iBAAiB,CAC7BwB,KAAK,CAAC,CAACH,CAAC,CAAC,CACTZ,GAAG,CAAEgB,CAAC,IAAKA,CAAC,CAAC,CAAC,CAAC,CAAC,CAChBC,IAAI,CAAC,GAAG,CAAC;UACZ,MAAMC,IAAI,GAAG,IAAI,CAACzB,GAAG,CAClBsB,KAAK,CAAC,CAAC,EAAEH,CAAC,CAAC,CACXZ,GAAG,CAAEgB,CAAC,IAAKA,CAAC,CAAC,CAAC,CAAC,CAAC,CAChBC,IAAI,CAAC,GAAG,CAAC;UACZ,IAAIH,CAAC,KAAKI,IAAI,EAAE;YACd,KAAK,IAAIC,CAAC,GAAG,CAAC,EAAEA,CAAC,GAAGP,CAAC,EAAEO,CAAC,EAAE,EAAE;cAC1B,IAAI,CAAC1B,GAAG,CAAC2B,KAAK,CAAC,CAAC;YAClB;YACA;UACF;QACF;MACF;IACF;EACF;EAEOC,KAAKA,CAAA,EAAgB;IAC1B,MAAMC,MAAmB,GAAG,EAAE;IAC9B,OAAO,IAAI,CAAC7B,GAAG,CAACc,MAAM,GAAG,CAAC,IAAI,IAAI,CAACf,MAAM,CAACe,MAAM,GAAG,CAAC,EAAE;MACpD,IAAI,IAAI,CAACd,GAAG,CAAC,CAAC,CAAC,CAAE,CAAC,CAAC,KAAK,IAAI,CAACD,MAAM,CAAC,CAAC,CAAC,CAAE,CAAC,CAAC,EAAE;QAC1C;MACF;MACA8B,MAAM,CAACC,IAAI,CAAC,IAAI,CAAC9B,GAAG,CAAC,CAAC,CAAE,CAAC;MACzB,IAAI,CAACE,iBAAiB,GAAG,IAAI,CAACF,GAAG,CAAC,CAAC,CAAC,CAAE,CAAC,CAAC;MACxC,IAAI,CAACC,iBAAiB,GAAG,IAAI,CAACD,GAAG,CAAC,CAAC,CAAC,CAAE,CAAC,CAAC;MACxC,IAAI,CAACD,MAAM,CAAC4B,KAAK,CAAC,CAAC;MACnB,IAAI,CAAC3B,GAAG,CAAC2B,KAAK,CAAC,CAAC;IAClB;IACA,IAAI,CAAC5B,MAAM,GAAG,IAAI,CAACC,GAAG;IACtB,IAAI,CAACA,GAAG,GAAG,EAAE;IACb,IAAI,CAACF,iBAAiB,CAACgC,IAAI,CAAC,GAAGD,MAAM,CAAC;IACtC,OAAOA,MAAM;EACf;EAEOE,YAAYA,CAACC,IAAY,EAAE;IAChC,IAAI,CAAClC,iBAAiB,GAAG,IAAI,CAACA,iBAAiB,CAACa,MAAM,CACpD,CAAC,CAACsB,EAAE,EAAExB,CAAC,EAAEI,EAAE,CAAC,KAAKJ,CAAC,GAAGuB,IACvB,CAAC;EACH;EAEOE,QAAQA,CAAA,EAAgB;IAC7B,OAAO,IAAI,CAACnC,MAAM;EACpB;AACF","ignoreList":[]}
|
package/lib/module/utils/stt.js
DELETED
|
@@ -1,22 +0,0 @@
|
|
|
1
|
-
"use strict";
|
|
2
|
-
|
|
3
|
-
export const longCommonInfPref = (seq1, seq2, hammingDistThreshold) => {
|
|
4
|
-
let maxInd = 0;
|
|
5
|
-
let maxLength = 0;
|
|
6
|
-
for (let i = 0; i < seq1.length; i++) {
|
|
7
|
-
let j = 0;
|
|
8
|
-
let hammingDist = 0;
|
|
9
|
-
while (j < seq2.length && i + j < seq1.length && (seq1[i + j] === seq2[j] || hammingDist < hammingDistThreshold)) {
|
|
10
|
-
if (seq1[i + j] !== seq2[j]) {
|
|
11
|
-
hammingDist++;
|
|
12
|
-
}
|
|
13
|
-
j++;
|
|
14
|
-
}
|
|
15
|
-
if (j >= maxLength) {
|
|
16
|
-
maxLength = j;
|
|
17
|
-
maxInd = i;
|
|
18
|
-
}
|
|
19
|
-
}
|
|
20
|
-
return maxInd;
|
|
21
|
-
};
|
|
22
|
-
//# sourceMappingURL=stt.js.map
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"names":["longCommonInfPref","seq1","seq2","hammingDistThreshold","maxInd","maxLength","i","length","j","hammingDist"],"sourceRoot":"../../../src","sources":["utils/stt.ts"],"mappings":";;AAAA,OAAO,MAAMA,iBAAiB,GAAGA,CAC/BC,IAAc,EACdC,IAAc,EACdC,oBAA4B,KACzB;EACH,IAAIC,MAAM,GAAG,CAAC;EACd,IAAIC,SAAS,GAAG,CAAC;EAEjB,KAAK,IAAIC,CAAC,GAAG,CAAC,EAAEA,CAAC,GAAGL,IAAI,CAACM,MAAM,EAAED,CAAC,EAAE,EAAE;IACpC,IAAIE,CAAC,GAAG,CAAC;IACT,IAAIC,WAAW,GAAG,CAAC;IACnB,OACED,CAAC,GAAGN,IAAI,CAACK,MAAM,IACfD,CAAC,GAAGE,CAAC,GAAGP,IAAI,CAACM,MAAM,KAClBN,IAAI,CAACK,CAAC,GAAGE,CAAC,CAAC,KAAKN,IAAI,CAACM,CAAC,CAAC,IAAIC,WAAW,GAAGN,oBAAoB,CAAC,EAC/D;MACA,IAAIF,IAAI,CAACK,CAAC,GAAGE,CAAC,CAAC,KAAKN,IAAI,CAACM,CAAC,CAAC,EAAE;QAC3BC,WAAW,EAAE;MACf;MACAD,CAAC,EAAE;IACL;IACA,IAAIA,CAAC,IAAIH,SAAS,EAAE;MAClBA,SAAS,GAAGG,CAAC;MACbJ,MAAM,GAAGE,CAAC;IACZ;EACF;EACA,OAAOF,MAAM;AACf,CAAC","ignoreList":[]}
|
|
@@ -1,27 +0,0 @@
|
|
|
1
|
-
import { DecodingOptions, Segment, SpeechToTextModelConfig, WordTuple } from '../../types/stt';
|
|
2
|
-
export declare class ASR {
|
|
3
|
-
private nativeModule;
|
|
4
|
-
private tokenizerModule;
|
|
5
|
-
private timePrecision;
|
|
6
|
-
private maxDecodeLength;
|
|
7
|
-
private chunkSize;
|
|
8
|
-
private minChunkSamples;
|
|
9
|
-
private samplingRate;
|
|
10
|
-
private startOfTranscriptToken;
|
|
11
|
-
private endOfTextToken;
|
|
12
|
-
private timestampBeginToken;
|
|
13
|
-
load(model: SpeechToTextModelConfig, onDownloadProgressCallback: (progress: number) => void): Promise<void>;
|
|
14
|
-
private getInitialSequence;
|
|
15
|
-
private generate;
|
|
16
|
-
private softmaxWithTemperature;
|
|
17
|
-
private sampleFromDistribution;
|
|
18
|
-
private generateWithFallback;
|
|
19
|
-
private calculateWordLevelTimestamps;
|
|
20
|
-
private estimateWordTimestampsLinear;
|
|
21
|
-
transcribe(audio: number[], options: DecodingOptions): Promise<Segment[]>;
|
|
22
|
-
tsWords(segments: Segment[]): WordTuple[];
|
|
23
|
-
segmentsEndTs(res: Segment[]): number[];
|
|
24
|
-
encode(waveform: Float32Array): Promise<void>;
|
|
25
|
-
decode(tokens: number[]): Promise<Float32Array>;
|
|
26
|
-
}
|
|
27
|
-
//# sourceMappingURL=ASR.d.ts.map
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"ASR.d.ts","sourceRoot":"","sources":["../../../../src/utils/SpeechToTextModule/ASR.ts"],"names":[],"mappings":"AAGA,OAAO,EACL,eAAe,EACf,OAAO,EACP,uBAAuB,EAEvB,SAAS,EACV,MAAM,iBAAiB,CAAC;AAGzB,qBAAa,GAAG;IACd,OAAO,CAAC,YAAY,CAAM;IAC1B,OAAO,CAAC,eAAe,CAA0C;IAEjE,OAAO,CAAC,aAAa,CAAgB;IACrC,OAAO,CAAC,eAAe,CAAe;IACtC,OAAO,CAAC,SAAS,CAAc;IAC/B,OAAO,CAAC,eAAe,CAAqB;IAC5C,OAAO,CAAC,YAAY,CAAiB;IAErC,OAAO,CAAC,sBAAsB,CAAU;IACxC,OAAO,CAAC,cAAc,CAAU;IAChC,OAAO,CAAC,mBAAmB,CAAU;IAExB,IAAI,CACf,KAAK,EAAE,uBAAuB,EAC9B,0BAA0B,EAAE,CAAC,QAAQ,EAAE,MAAM,KAAK,IAAI;YA8B1C,kBAAkB;YAgBlB,QAAQ;IAoCtB,OAAO,CAAC,sBAAsB;IAO9B,OAAO,CAAC,sBAAsB;YAYhB,oBAAoB;YA4BpB,4BAA4B;YA4D5B,4BAA4B;IAkC7B,UAAU,CACrB,KAAK,EAAE,MAAM,EAAE,EACf,OAAO,EAAE,eAAe,GACvB,OAAO,CAAC,OAAO,EAAE,CAAC;IA2Bd,OAAO,CAAC,QAAQ,EAAE,OAAO,EAAE,GAAG,SAAS,EAAE;IAUzC,aAAa,CAAC,GAAG,EAAE,OAAO,EAAE;IAItB,MAAM,CAAC,QAAQ,EAAE,YAAY,GAAG,OAAO,CAAC,IAAI,CAAC;IAI7C,MAAM,CAAC,MAAM,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC,YAAY,CAAC;CAG7D"}
|
|
@@ -1,23 +0,0 @@
|
|
|
1
|
-
import { DecodingOptions } from '../../types/stt';
|
|
2
|
-
import { ASR } from './ASR';
|
|
3
|
-
export declare class OnlineASRProcessor {
|
|
4
|
-
private asr;
|
|
5
|
-
private samplingRate;
|
|
6
|
-
audioBuffer: number[];
|
|
7
|
-
private transcriptBuffer;
|
|
8
|
-
private bufferTimeOffset;
|
|
9
|
-
private committed;
|
|
10
|
-
constructor(asr: ASR);
|
|
11
|
-
insertAudioChunk(audio: number[]): void;
|
|
12
|
-
processIter(options: DecodingOptions): Promise<{
|
|
13
|
-
committed: string;
|
|
14
|
-
nonCommitted: string;
|
|
15
|
-
}>;
|
|
16
|
-
private chunkCompletedSegment;
|
|
17
|
-
private chunkAt;
|
|
18
|
-
finish(): Promise<{
|
|
19
|
-
committed: string;
|
|
20
|
-
}>;
|
|
21
|
-
private toFlush;
|
|
22
|
-
}
|
|
23
|
-
//# sourceMappingURL=OnlineProcessor.d.ts.map
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"OnlineProcessor.d.ts","sourceRoot":"","sources":["../../../../src/utils/SpeechToTextModule/OnlineProcessor.ts"],"names":[],"mappings":"AAEA,OAAO,EAAa,eAAe,EAAW,MAAM,iBAAiB,CAAC;AACtE,OAAO,EAAE,GAAG,EAAE,MAAM,OAAO,CAAC;AAG5B,qBAAa,kBAAkB;IAC7B,OAAO,CAAC,GAAG,CAAM;IAEjB,OAAO,CAAC,YAAY,CAAiB;IAC9B,WAAW,EAAE,MAAM,EAAE,CAAM;IAClC,OAAO,CAAC,gBAAgB,CAA4C;IACpE,OAAO,CAAC,gBAAgB,CAAa;IACrC,OAAO,CAAC,SAAS,CAAmB;gBAExB,GAAG,EAAE,GAAG;IAIb,gBAAgB,CAAC,KAAK,EAAE,MAAM,EAAE;IAI1B,WAAW,CAAC,OAAO,EAAE,eAAe;;;;IAoBjD,OAAO,CAAC,qBAAqB;IAqB7B,OAAO,CAAC,OAAO;IASF,MAAM;;;IAOnB,OAAO,CAAC,OAAO;CAMhB"}
|
|
@@ -1,13 +0,0 @@
|
|
|
1
|
-
import { WordTuple } from '../../types/stt';
|
|
2
|
-
export declare class HypothesisBuffer {
|
|
3
|
-
private committedInBuffer;
|
|
4
|
-
private buffer;
|
|
5
|
-
private new;
|
|
6
|
-
private lastCommittedTime;
|
|
7
|
-
lastCommittedWord: string | null;
|
|
8
|
-
insert(newWords: WordTuple[], offset: number): void;
|
|
9
|
-
flush(): WordTuple[];
|
|
10
|
-
popCommitted(time: number): void;
|
|
11
|
-
complete(): WordTuple[];
|
|
12
|
-
}
|
|
13
|
-
//# sourceMappingURL=hypothesisBuffer.d.ts.map
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"hypothesisBuffer.d.ts","sourceRoot":"","sources":["../../../../src/utils/SpeechToTextModule/hypothesisBuffer.ts"],"names":[],"mappings":"AAEA,OAAO,EAAE,SAAS,EAAE,MAAM,iBAAiB,CAAC;AAE5C,qBAAa,gBAAgB;IAC3B,OAAO,CAAC,iBAAiB,CAAmB;IAC5C,OAAO,CAAC,MAAM,CAAmB;IACjC,OAAO,CAAC,GAAG,CAAmB;IAE9B,OAAO,CAAC,iBAAiB,CAAa;IAC/B,iBAAiB,EAAE,MAAM,GAAG,IAAI,CAAQ;IAExC,MAAM,CAAC,QAAQ,EAAE,SAAS,EAAE,EAAE,MAAM,EAAE,MAAM;IAuC5C,KAAK,IAAI,SAAS,EAAE;IAkBpB,YAAY,CAAC,IAAI,EAAE,MAAM;IAMzB,QAAQ,IAAI,SAAS,EAAE;CAG/B"}
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"stt.d.ts","sourceRoot":"","sources":["../../../src/utils/stt.ts"],"names":[],"mappings":"AAAA,eAAO,MAAM,iBAAiB,GAC5B,MAAM,MAAM,EAAE,EACd,MAAM,MAAM,EAAE,EACd,sBAAsB,MAAM,WAwB7B,CAAC"}
|