react-native-executorch 0.5.11 → 0.5.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/common/rnexecutorch/host_objects/JsiConversions.h +19 -8
- package/common/rnexecutorch/models/speech_to_text/SpeechToText.cpp +19 -14
- package/common/rnexecutorch/models/speech_to_text/SpeechToText.h +5 -2
- package/lib/module/modules/natural_language_processing/SpeechToTextModule.js +8 -3
- package/lib/module/modules/natural_language_processing/SpeechToTextModule.js.map +1 -1
- package/lib/typescript/modules/natural_language_processing/SpeechToTextModule.d.ts +1 -0
- package/lib/typescript/modules/natural_language_processing/SpeechToTextModule.d.ts.map +1 -1
- package/package.json +1 -1
- package/src/modules/natural_language_processing/SpeechToTextModule.ts +17 -4
|
@@ -66,7 +66,8 @@ inline JSTensorViewIn getValue<JSTensorViewIn>(const jsi::Value &val,
|
|
|
66
66
|
tensorView.sizes.reserve(numShapeDims);
|
|
67
67
|
|
|
68
68
|
for (size_t i = 0; i < numShapeDims; ++i) {
|
|
69
|
-
int32_t dim =
|
|
69
|
+
int32_t dim =
|
|
70
|
+
getValue<int32_t>(shapeArray.getValueAtIndex(runtime, i), runtime);
|
|
70
71
|
tensorView.sizes.push_back(dim);
|
|
71
72
|
}
|
|
72
73
|
|
|
@@ -173,23 +174,24 @@ inline std::vector<T> getArrayAsVector(const jsi::Value &val,
|
|
|
173
174
|
return result;
|
|
174
175
|
}
|
|
175
176
|
|
|
176
|
-
|
|
177
177
|
// Template specializations for std::vector<T> types
|
|
178
178
|
template <>
|
|
179
|
-
inline std::vector<JSTensorViewIn>
|
|
180
|
-
|
|
179
|
+
inline std::vector<JSTensorViewIn>
|
|
180
|
+
getValue<std::vector<JSTensorViewIn>>(const jsi::Value &val,
|
|
181
|
+
jsi::Runtime &runtime) {
|
|
181
182
|
return getArrayAsVector<JSTensorViewIn>(val, runtime);
|
|
182
183
|
}
|
|
183
184
|
|
|
184
185
|
template <>
|
|
185
|
-
inline std::vector<std::string>
|
|
186
|
-
|
|
186
|
+
inline std::vector<std::string>
|
|
187
|
+
getValue<std::vector<std::string>>(const jsi::Value &val,
|
|
188
|
+
jsi::Runtime &runtime) {
|
|
187
189
|
return getArrayAsVector<std::string>(val, runtime);
|
|
188
190
|
}
|
|
189
191
|
|
|
190
192
|
template <>
|
|
191
|
-
inline std::vector<int32_t>
|
|
192
|
-
|
|
193
|
+
inline std::vector<int32_t>
|
|
194
|
+
getValue<std::vector<int32_t>>(const jsi::Value &val, jsi::Runtime &runtime) {
|
|
193
195
|
return getArrayAsVector<int32_t>(val, runtime);
|
|
194
196
|
}
|
|
195
197
|
|
|
@@ -280,6 +282,15 @@ inline jsi::Value getJsiValue(const std::vector<int32_t> &vec,
|
|
|
280
282
|
return {runtime, array};
|
|
281
283
|
}
|
|
282
284
|
|
|
285
|
+
inline jsi::Value getJsiValue(const std::vector<char> &vec,
|
|
286
|
+
jsi::Runtime &runtime) {
|
|
287
|
+
jsi::Array array(runtime, vec.size());
|
|
288
|
+
for (size_t i = 0; i < vec.size(); i++) {
|
|
289
|
+
array.setValueAtIndex(runtime, i, jsi::Value(vec[i]));
|
|
290
|
+
}
|
|
291
|
+
return {runtime, array};
|
|
292
|
+
}
|
|
293
|
+
|
|
283
294
|
inline jsi::Value getJsiValue(int val, jsi::Runtime &runtime) {
|
|
284
295
|
return {runtime, val};
|
|
285
296
|
}
|
|
@@ -36,8 +36,8 @@ SpeechToText::decode(std::span<int32_t> tokens,
|
|
|
36
36
|
return this->makeOwningBuffer(decoderOutput);
|
|
37
37
|
}
|
|
38
38
|
|
|
39
|
-
std::
|
|
40
|
-
|
|
39
|
+
std::vector<char> SpeechToText::transcribe(std::span<float> waveform,
|
|
40
|
+
std::string languageOption) const {
|
|
41
41
|
std::vector<Segment> segments =
|
|
42
42
|
this->asr->transcribe(waveform, DecodingOptions(languageOption));
|
|
43
43
|
std::string transcription;
|
|
@@ -55,7 +55,8 @@ std::string SpeechToText::transcribe(std::span<float> waveform,
|
|
|
55
55
|
transcription += word.content;
|
|
56
56
|
}
|
|
57
57
|
}
|
|
58
|
-
|
|
58
|
+
|
|
59
|
+
return {transcription.begin(), transcription.end()};
|
|
59
60
|
}
|
|
60
61
|
|
|
61
62
|
size_t SpeechToText::getMemoryLowerBound() const noexcept {
|
|
@@ -79,16 +80,17 @@ void SpeechToText::stream(std::shared_ptr<jsi::Function> callback,
|
|
|
79
80
|
throw std::runtime_error("Streaming is already in progress");
|
|
80
81
|
}
|
|
81
82
|
|
|
82
|
-
auto nativeCallback =
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
callback->call(
|
|
88
|
-
|
|
89
|
-
|
|
83
|
+
auto nativeCallback =
|
|
84
|
+
[this, callback](const std::vector<char> &committedVec,
|
|
85
|
+
const std::vector<char> &nonCommittedVec, bool isDone) {
|
|
86
|
+
this->callInvoker->invokeAsync([callback, committedVec, nonCommittedVec,
|
|
87
|
+
isDone](jsi::Runtime &rt) {
|
|
88
|
+
callback->call(
|
|
89
|
+
rt, rnexecutorch::jsi_conversion::getJsiValue(committedVec, rt),
|
|
90
|
+
rnexecutorch::jsi_conversion::getJsiValue(nonCommittedVec, rt),
|
|
91
|
+
jsi::Value(isDone));
|
|
90
92
|
});
|
|
91
|
-
|
|
93
|
+
};
|
|
92
94
|
|
|
93
95
|
this->isStreaming = true;
|
|
94
96
|
while (this->isStreaming) {
|
|
@@ -99,12 +101,15 @@ void SpeechToText::stream(std::shared_ptr<jsi::Function> callback,
|
|
|
99
101
|
}
|
|
100
102
|
ProcessResult res =
|
|
101
103
|
this->processor->processIter(DecodingOptions(languageOption));
|
|
102
|
-
|
|
104
|
+
|
|
105
|
+
nativeCallback({res.committed.begin(), res.committed.end()},
|
|
106
|
+
{res.nonCommitted.begin(), res.nonCommitted.end()}, false);
|
|
103
107
|
this->readyToProcess = false;
|
|
104
108
|
}
|
|
105
109
|
|
|
106
110
|
std::string committed = this->processor->finish();
|
|
107
|
-
|
|
111
|
+
|
|
112
|
+
nativeCallback({committed.begin(), committed.end()}, {}, true);
|
|
108
113
|
|
|
109
114
|
this->resetStreamState();
|
|
110
115
|
}
|
|
@@ -1,6 +1,9 @@
|
|
|
1
1
|
#pragma once
|
|
2
2
|
|
|
3
3
|
#include "rnexecutorch/models/speech_to_text/stream/OnlineASRProcessor.h"
|
|
4
|
+
#include <span>
|
|
5
|
+
#include <string>
|
|
6
|
+
#include <vector>
|
|
4
7
|
|
|
5
8
|
namespace rnexecutorch {
|
|
6
9
|
|
|
@@ -16,8 +19,8 @@ public:
|
|
|
16
19
|
std::shared_ptr<OwningArrayBuffer> encode(std::span<float> waveform) const;
|
|
17
20
|
std::shared_ptr<OwningArrayBuffer>
|
|
18
21
|
decode(std::span<int32_t> tokens, std::span<float> encoderOutput) const;
|
|
19
|
-
std::
|
|
20
|
-
|
|
22
|
+
std::vector<char> transcribe(std::span<float> waveform,
|
|
23
|
+
std::string languageOption) const;
|
|
21
24
|
|
|
22
25
|
size_t getMemoryLowerBound() const noexcept;
|
|
23
26
|
|
|
@@ -3,6 +3,10 @@
|
|
|
3
3
|
import { Logger } from '../../common/Logger';
|
|
4
4
|
import { ResourceFetcher } from '../../utils/ResourceFetcher';
|
|
5
5
|
export class SpeechToTextModule {
|
|
6
|
+
textDecoder = new TextDecoder('utf-8', {
|
|
7
|
+
fatal: false,
|
|
8
|
+
ignoreBOM: true
|
|
9
|
+
});
|
|
6
10
|
async load(model, onDownloadProgressCallback = () => {}) {
|
|
7
11
|
this.modelConfig = model;
|
|
8
12
|
const tokenizerLoadPromise = ResourceFetcher.fetch(undefined, model.tokenizerSource);
|
|
@@ -39,7 +43,8 @@ export class SpeechToTextModule {
|
|
|
39
43
|
Logger.info('Passing waveform as number[] is deprecated, use Float32Array instead');
|
|
40
44
|
waveform = new Float32Array(waveform);
|
|
41
45
|
}
|
|
42
|
-
|
|
46
|
+
const transcriptionBytes = await this.nativeModule.transcribe(waveform, options.language || '');
|
|
47
|
+
return this.textDecoder.decode(new Uint8Array(transcriptionBytes));
|
|
43
48
|
}
|
|
44
49
|
async *stream(options = {}) {
|
|
45
50
|
this.validateOptions(options);
|
|
@@ -55,8 +60,8 @@ export class SpeechToTextModule {
|
|
|
55
60
|
try {
|
|
56
61
|
await this.nativeModule.stream((committed, nonCommitted, isDone) => {
|
|
57
62
|
queue.push({
|
|
58
|
-
committed,
|
|
59
|
-
nonCommitted
|
|
63
|
+
committed: this.textDecoder.decode(new Uint8Array(committed)),
|
|
64
|
+
nonCommitted: this.textDecoder.decode(new Uint8Array(nonCommitted))
|
|
60
65
|
});
|
|
61
66
|
if (isDone) {
|
|
62
67
|
finished = true;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"names":["Logger","ResourceFetcher","SpeechToTextModule","load","model","onDownloadProgressCallback","modelConfig","tokenizerLoadPromise","fetch","undefined","tokenizerSource","encoderDecoderPromise","encoderSource","decoderSource","tokenizerSources","encoderDecoderResults","Promise","all","Error","nativeModule","global","loadSpeechToText","encode","waveform","Array","isArray","info","Float32Array","decode","tokens","encoderOutput","Int32Array","transcribe","options","validateOptions","language","stream","queue","waiter","finished","error","wake","committed","nonCommitted","isDone","push","e","length","shift","r","streamInsert","streamStop","isMultilingual"],"sourceRoot":"../../../../src","sources":["modules/natural_language_processing/SpeechToTextModule.ts"],"mappings":";;AAAA,SAASA,MAAM,QAAQ,qBAAqB;AAE5C,SAASC,eAAe,QAAQ,6BAA6B;AAE7D,OAAO,MAAMC,kBAAkB,CAAC;
|
|
1
|
+
{"version":3,"names":["Logger","ResourceFetcher","SpeechToTextModule","textDecoder","TextDecoder","fatal","ignoreBOM","load","model","onDownloadProgressCallback","modelConfig","tokenizerLoadPromise","fetch","undefined","tokenizerSource","encoderDecoderPromise","encoderSource","decoderSource","tokenizerSources","encoderDecoderResults","Promise","all","Error","nativeModule","global","loadSpeechToText","encode","waveform","Array","isArray","info","Float32Array","decode","tokens","encoderOutput","Int32Array","transcribe","options","validateOptions","transcriptionBytes","language","Uint8Array","stream","queue","waiter","finished","error","wake","committed","nonCommitted","isDone","push","e","length","shift","r","streamInsert","streamStop","isMultilingual"],"sourceRoot":"../../../../src","sources":["modules/natural_language_processing/SpeechToTextModule.ts"],"mappings":";;AAAA,SAASA,MAAM,QAAQ,qBAAqB;AAE5C,SAASC,eAAe,QAAQ,6BAA6B;AAE7D,OAAO,MAAMC,kBAAkB,CAAC;EAKtBC,WAAW,GAAG,IAAIC,WAAW,CAAC,OAAO,EAAE;IAC7CC,KAAK,EAAE,KAAK;IACZC,SAAS,EAAE;EACb,CAAC,CAAC;EAEF,MAAaC,IAAIA,CACfC,KAA8B,EAC9BC,0BAAsD,GAAGA,CAAA,KAAM,CAAC,CAAC,EACjE;IACA,IAAI,CAACC,WAAW,GAAGF,KAAK;IAExB,MAAMG,oBAAoB,GAAGV,eAAe,CAACW,KAAK,CAChDC,SAAS,EACTL,KAAK,CAACM,eACR,CAAC;IACD,MAAMC,qBAAqB,GAAGd,eAAe,CAACW,KAAK,CACjDH,0BAA0B,EAC1BD,KAAK,CAACQ,aAAa,EACnBR,KAAK,CAACS,aACR,CAAC;IACD,MAAM,CAACC,gBAAgB,EAAEC,qBAAqB,CAAC,GAAG,MAAMC,OAAO,CAACC,GAAG,CAAC,CAClEV,oBAAoB,EACpBI,qBAAqB,CACtB,CAAC;IACF,MAAMC,aAAa,GAAGG,qBAAqB,GAAG,CAAC,CAAC;IAChD,MAAMF,aAAa,GAAGE,qBAAqB,GAAG,CAAC,CAAC;IAChD,IAAI,CAACH,aAAa,IAAI,CAACC,aAAa,IAAI,CAACC,gBAAgB,EAAE;MACzD,MAAM,IAAII,KAAK,CAAC,uBAAuB,CAAC;IAC1C;IACA,IAAI,CAACC,YAAY,GAAG,MAAMC,MAAM,CAACC,gBAAgB,CAC/CT,aAAa,EACbC,aAAa,EACbC,gBAAgB,CAAC,CAAC,CACpB,CAAC;EACH;EAEA,MAAaQ,MAAMA,CACjBC,QAAiC,EACV;IACvB,IAAIC,KAAK,CAACC,OAAO,CAACF,QAAQ,CAAC,EAAE;MAC3B3B,MAAM,CAAC8B,IAAI,CACT,sEACF,CAAC;MACDH,QAAQ,GAAG,IAAII,YAAY,CAACJ,QAAQ,CAAC;IACvC;IACA,OAAO,IAAII,YAAY,CAAC,MAAM,IAAI,CAACR,YAAY,CAACG,MAAM,CAACC,QAAQ,CAAC,CAAC;EACnE;EAEA,MAAaK,MAAMA,CACjBC,MAA6B,EAC7BC,aAAsC,EACf;IACvB,IAAIN,KAAK,CAACC,OAAO,CAACI,MAAM,CAAC,EAAE;MACzBjC,MAAM,CAAC8B,IAAI,CACT,kEACF,CAAC;MACDG,MAAM,GAAG,IAAIE,UAAU,CAACF,MAAM,CAAC;IACjC;IACA,IAAIL,KAAK,CAACC,OAAO,CAACK,aAAa,CAAC,EAAE;MAChClC,MAAM,CAAC8B,IAAI,CACT,2EACF,CAAC;MACDI,aAAa,GAAG,IAAIH,YAAY,CAACG,aAAa,CAAC;IACjD;IACA,OAAO,IAAIH,YAAY,CACrB,MAAM,IAAI,CAACR,YAAY,CAACS,MAAM,CAACC,MAAM,EAAEC,aAAa,CACtD,CAAC;EACH;EAEA,MAAaE,UAAUA,CACrBT,QAAiC,EACjCU,OAAwB,GAAG,CAAC,CAAC,EACZ;IACjB,IAAI,CAACC,eAAe,CAACD,OAAO,CAAC;IAE7B,IAAIT,KAAK,CAACC,OAAO,CAACF,QAAQ,CAAC,EAAE;MAC3B3B,MAAM,CAAC8B,IAAI,CACT,sEACF,CAAC;MACDH,QAAQ,GAAG,IAAII,YAAY,CAACJ,QAAQ,CAAC;IACvC;IACA,MAAMY,kBAAkB,GAAG,MAAM,IAAI,CAAChB,YAAY,CAACa,UAAU,CAC3DT,QAAQ,EACRU,OAAO,CAACG,QAAQ,IAAI,EACtB,CAAC;IACD,OAAO,IAAI,CAACrC,WAAW,CAAC6B,MAAM,CAAC,IAAIS,UAAU,CAACF,kBAAkB,CAAC,CAAC;EACpE;EAEA,OAAcG,MAAMA,CAClBL,OAAwB,GAAG,CAAC,CAAC,EACgC;IAC7D,IAAI,CAACC,eAAe,CAACD,OAAO,CAAC;IAE7B,MAAMM,KAAoD,GAAG,EAAE;IAC/D,IAAIC,MAA2B,GAAG,IAAI;IACtC,IAAIC,QAAQ,GAAG,KAAK;IACpB,IAAIC,KAAc;IAElB,MAAMC,IAAI,GAAGA,CAAA,KAAM;MACjBH,MAAM,GAAG,CAAC;MACVA,MAAM,GAAG,IAAI;IACf,CAAC;IAED,CAAC,YAAY;MACX,IAAI;QACF,MAAM,IAAI,CAACrB,YAAY,CAACmB,MAAM,CAC5B,CAACM,SAAmB,EAAEC,YAAsB,EAAEC,MAAe,KAAK;UAChEP,KAAK,CAACQ,IAAI,CAAC;YACTH,SAAS,EAAE,IAAI,CAAC7C,WAAW,CAAC6B,MAAM,CAAC,IAAIS,UAAU,CAACO,SAAS,CAAC,CAAC;YAC7DC,YAAY,EAAE,IAAI,CAAC9C,WAAW,CAAC6B,MAAM,CACnC,IAAIS,UAAU,CAACQ,YAAY,CAC7B;UACF,CAAC,CAAC;UACF,IAAIC,MAAM,EAAE;YACVL,QAAQ,GAAG,IAAI;UACjB;UACAE,IAAI,CAAC,CAAC;QACR,CAAC,EACDV,OAAO,CAACG,QAAQ,IAAI,EACtB,CAAC;QACDK,QAAQ,GAAG,IAAI;QACfE,IAAI,CAAC,CAAC;MACR,CAAC,CAAC,OAAOK,CAAC,EAAE;QACVN,KAAK,GAAGM,CAAC;QACTP,QAAQ,GAAG,IAAI;QACfE,IAAI,CAAC,CAAC;MACR;IACF,CAAC,EAAE,CAAC;IAEJ,OAAO,IAAI,EAAE;MACX,IAAIJ,KAAK,CAACU,MAAM,GAAG,CAAC,EAAE;QACpB,MAAMV,KAAK,CAACW,KAAK,CAAC,CAAE;QACpB,IAAIT,QAAQ,IAAIF,KAAK,CAACU,MAAM,KAAK,CAAC,EAAE;UAClC;QACF;QACA;MACF;MACA,IAAIP,KAAK,EAAE,MAAMA,KAAK;MACtB,IAAID,QAAQ,EAAE;MACd,MAAM,IAAIzB,OAAO,CAAQmC,CAAC,IAAMX,MAAM,GAAGW,CAAE,CAAC;IAC9C;EACF;EAEA,MAAaC,YAAYA,CAAC7B,QAAiC,EAAiB;IAC1E,IAAIC,KAAK,CAACC,OAAO,CAACF,QAAQ,CAAC,EAAE;MAC3B3B,MAAM,CAAC8B,IAAI,CACT,sEACF,CAAC;MACDH,QAAQ,GAAG,IAAII,YAAY,CAACJ,QAAQ,CAAC;IACvC;IACA,OAAO,IAAI,CAACJ,YAAY,CAACiC,YAAY,CAAC7B,QAAQ,CAAC;EACjD;EAEA,MAAa8B,UAAUA,CAAA,EAAkB;IACvC,OAAO,IAAI,CAAClC,YAAY,CAACkC,UAAU,CAAC,CAAC;EACvC;EAEQnB,eAAeA,CAACD,OAAwB,EAAE;IAChD,IAAI,CAAC,IAAI,CAAC3B,WAAW,CAACgD,cAAc,IAAIrB,OAAO,CAACG,QAAQ,EAAE;MACxD,MAAM,IAAIlB,KAAK,CAAC,gDAAgD,CAAC;IACnE;IACA,IAAI,IAAI,CAACZ,WAAW,CAACgD,cAAc,IAAI,CAACrB,OAAO,CAACG,QAAQ,EAAE;MACxD,MAAM,IAAIlB,KAAK,CAAC,2CAA2C,CAAC;IAC9D;EACF;AACF","ignoreList":[]}
|
|
@@ -2,6 +2,7 @@ import { DecodingOptions, SpeechToTextModelConfig } from '../../types/stt';
|
|
|
2
2
|
export declare class SpeechToTextModule {
|
|
3
3
|
private nativeModule;
|
|
4
4
|
private modelConfig;
|
|
5
|
+
private textDecoder;
|
|
5
6
|
load(model: SpeechToTextModelConfig, onDownloadProgressCallback?: (progress: number) => void): Promise<void>;
|
|
6
7
|
encode(waveform: Float32Array | number[]): Promise<Float32Array>;
|
|
7
8
|
decode(tokens: Int32Array | number[], encoderOutput: Float32Array | number[]): Promise<Float32Array>;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"SpeechToTextModule.d.ts","sourceRoot":"","sources":["../../../../src/modules/natural_language_processing/SpeechToTextModule.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,eAAe,EAAE,uBAAuB,EAAE,MAAM,iBAAiB,CAAC;AAG3E,qBAAa,kBAAkB;IAC7B,OAAO,CAAC,YAAY,CAAM;IAE1B,OAAO,CAAC,WAAW,CAA2B;
|
|
1
|
+
{"version":3,"file":"SpeechToTextModule.d.ts","sourceRoot":"","sources":["../../../../src/modules/natural_language_processing/SpeechToTextModule.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,eAAe,EAAE,uBAAuB,EAAE,MAAM,iBAAiB,CAAC;AAG3E,qBAAa,kBAAkB;IAC7B,OAAO,CAAC,YAAY,CAAM;IAE1B,OAAO,CAAC,WAAW,CAA2B;IAE9C,OAAO,CAAC,WAAW,CAGhB;IAEU,IAAI,CACf,KAAK,EAAE,uBAAuB,EAC9B,0BAA0B,GAAE,CAAC,QAAQ,EAAE,MAAM,KAAK,IAAe;IA6BtD,MAAM,CACjB,QAAQ,EAAE,YAAY,GAAG,MAAM,EAAE,GAChC,OAAO,CAAC,YAAY,CAAC;IAUX,MAAM,CACjB,MAAM,EAAE,UAAU,GAAG,MAAM,EAAE,EAC7B,aAAa,EAAE,YAAY,GAAG,MAAM,EAAE,GACrC,OAAO,CAAC,YAAY,CAAC;IAkBX,UAAU,CACrB,QAAQ,EAAE,YAAY,GAAG,MAAM,EAAE,EACjC,OAAO,GAAE,eAAoB,GAC5B,OAAO,CAAC,MAAM,CAAC;IAgBJ,MAAM,CAClB,OAAO,GAAE,eAAoB,GAC5B,cAAc,CAAC;QAAE,SAAS,EAAE,MAAM,CAAC;QAAC,YAAY,EAAE,MAAM,CAAA;KAAE,CAAC;IAqDjD,YAAY,CAAC,QAAQ,EAAE,YAAY,GAAG,MAAM,EAAE,GAAG,OAAO,CAAC,IAAI,CAAC;IAU9D,UAAU,IAAI,OAAO,CAAC,IAAI,CAAC;IAIxC,OAAO,CAAC,eAAe;CAQxB"}
|
package/package.json
CHANGED
|
@@ -7,6 +7,11 @@ export class SpeechToTextModule {
|
|
|
7
7
|
|
|
8
8
|
private modelConfig!: SpeechToTextModelConfig;
|
|
9
9
|
|
|
10
|
+
private textDecoder = new TextDecoder('utf-8', {
|
|
11
|
+
fatal: false,
|
|
12
|
+
ignoreBOM: true,
|
|
13
|
+
});
|
|
14
|
+
|
|
10
15
|
public async load(
|
|
11
16
|
model: SpeechToTextModelConfig,
|
|
12
17
|
onDownloadProgressCallback: (progress: number) => void = () => {}
|
|
@@ -83,8 +88,11 @@ export class SpeechToTextModule {
|
|
|
83
88
|
);
|
|
84
89
|
waveform = new Float32Array(waveform);
|
|
85
90
|
}
|
|
86
|
-
|
|
87
|
-
|
|
91
|
+
const transcriptionBytes = await this.nativeModule.transcribe(
|
|
92
|
+
waveform,
|
|
93
|
+
options.language || ''
|
|
94
|
+
);
|
|
95
|
+
return this.textDecoder.decode(new Uint8Array(transcriptionBytes));
|
|
88
96
|
}
|
|
89
97
|
|
|
90
98
|
public async *stream(
|
|
@@ -105,8 +113,13 @@ export class SpeechToTextModule {
|
|
|
105
113
|
(async () => {
|
|
106
114
|
try {
|
|
107
115
|
await this.nativeModule.stream(
|
|
108
|
-
(committed:
|
|
109
|
-
queue.push({
|
|
116
|
+
(committed: number[], nonCommitted: number[], isDone: boolean) => {
|
|
117
|
+
queue.push({
|
|
118
|
+
committed: this.textDecoder.decode(new Uint8Array(committed)),
|
|
119
|
+
nonCommitted: this.textDecoder.decode(
|
|
120
|
+
new Uint8Array(nonCommitted)
|
|
121
|
+
),
|
|
122
|
+
});
|
|
110
123
|
if (isDone) {
|
|
111
124
|
finished = true;
|
|
112
125
|
}
|