react-native-executorch 0.5.10 → 0.5.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/common/rnexecutorch/host_objects/JsiConversions.h +19 -8
- package/common/rnexecutorch/models/speech_to_text/SpeechToText.cpp +19 -14
- package/common/rnexecutorch/models/speech_to_text/SpeechToText.h +5 -2
- package/lib/module/hooks/natural_language_processing/useSpeechToText.js +2 -2
- package/lib/module/hooks/natural_language_processing/useSpeechToText.js.map +1 -1
- package/lib/module/modules/natural_language_processing/SpeechToTextModule.js +8 -3
- package/lib/module/modules/natural_language_processing/SpeechToTextModule.js.map +1 -1
- package/lib/typescript/hooks/natural_language_processing/useSpeechToText.d.ts +3 -3
- package/lib/typescript/hooks/natural_language_processing/useSpeechToText.d.ts.map +1 -1
- package/lib/typescript/modules/natural_language_processing/SpeechToTextModule.d.ts +1 -0
- package/lib/typescript/modules/natural_language_processing/SpeechToTextModule.d.ts.map +1 -1
- package/package.json +1 -1
- package/src/hooks/natural_language_processing/useSpeechToText.ts +23 -18
- package/src/modules/natural_language_processing/SpeechToTextModule.ts +17 -4
|
@@ -66,7 +66,8 @@ inline JSTensorViewIn getValue<JSTensorViewIn>(const jsi::Value &val,
|
|
|
66
66
|
tensorView.sizes.reserve(numShapeDims);
|
|
67
67
|
|
|
68
68
|
for (size_t i = 0; i < numShapeDims; ++i) {
|
|
69
|
-
int32_t dim =
|
|
69
|
+
int32_t dim =
|
|
70
|
+
getValue<int32_t>(shapeArray.getValueAtIndex(runtime, i), runtime);
|
|
70
71
|
tensorView.sizes.push_back(dim);
|
|
71
72
|
}
|
|
72
73
|
|
|
@@ -173,23 +174,24 @@ inline std::vector<T> getArrayAsVector(const jsi::Value &val,
|
|
|
173
174
|
return result;
|
|
174
175
|
}
|
|
175
176
|
|
|
176
|
-
|
|
177
177
|
// Template specializations for std::vector<T> types
|
|
178
178
|
template <>
|
|
179
|
-
inline std::vector<JSTensorViewIn>
|
|
180
|
-
|
|
179
|
+
inline std::vector<JSTensorViewIn>
|
|
180
|
+
getValue<std::vector<JSTensorViewIn>>(const jsi::Value &val,
|
|
181
|
+
jsi::Runtime &runtime) {
|
|
181
182
|
return getArrayAsVector<JSTensorViewIn>(val, runtime);
|
|
182
183
|
}
|
|
183
184
|
|
|
184
185
|
template <>
|
|
185
|
-
inline std::vector<std::string>
|
|
186
|
-
|
|
186
|
+
inline std::vector<std::string>
|
|
187
|
+
getValue<std::vector<std::string>>(const jsi::Value &val,
|
|
188
|
+
jsi::Runtime &runtime) {
|
|
187
189
|
return getArrayAsVector<std::string>(val, runtime);
|
|
188
190
|
}
|
|
189
191
|
|
|
190
192
|
template <>
|
|
191
|
-
inline std::vector<int32_t>
|
|
192
|
-
|
|
193
|
+
inline std::vector<int32_t>
|
|
194
|
+
getValue<std::vector<int32_t>>(const jsi::Value &val, jsi::Runtime &runtime) {
|
|
193
195
|
return getArrayAsVector<int32_t>(val, runtime);
|
|
194
196
|
}
|
|
195
197
|
|
|
@@ -280,6 +282,15 @@ inline jsi::Value getJsiValue(const std::vector<int32_t> &vec,
|
|
|
280
282
|
return {runtime, array};
|
|
281
283
|
}
|
|
282
284
|
|
|
285
|
+
inline jsi::Value getJsiValue(const std::vector<char> &vec,
|
|
286
|
+
jsi::Runtime &runtime) {
|
|
287
|
+
jsi::Array array(runtime, vec.size());
|
|
288
|
+
for (size_t i = 0; i < vec.size(); i++) {
|
|
289
|
+
array.setValueAtIndex(runtime, i, jsi::Value(vec[i]));
|
|
290
|
+
}
|
|
291
|
+
return {runtime, array};
|
|
292
|
+
}
|
|
293
|
+
|
|
283
294
|
inline jsi::Value getJsiValue(int val, jsi::Runtime &runtime) {
|
|
284
295
|
return {runtime, val};
|
|
285
296
|
}
|
|
@@ -36,8 +36,8 @@ SpeechToText::decode(std::span<int32_t> tokens,
|
|
|
36
36
|
return this->makeOwningBuffer(decoderOutput);
|
|
37
37
|
}
|
|
38
38
|
|
|
39
|
-
std::
|
|
40
|
-
|
|
39
|
+
std::vector<char> SpeechToText::transcribe(std::span<float> waveform,
|
|
40
|
+
std::string languageOption) const {
|
|
41
41
|
std::vector<Segment> segments =
|
|
42
42
|
this->asr->transcribe(waveform, DecodingOptions(languageOption));
|
|
43
43
|
std::string transcription;
|
|
@@ -55,7 +55,8 @@ std::string SpeechToText::transcribe(std::span<float> waveform,
|
|
|
55
55
|
transcription += word.content;
|
|
56
56
|
}
|
|
57
57
|
}
|
|
58
|
-
|
|
58
|
+
|
|
59
|
+
return {transcription.begin(), transcription.end()};
|
|
59
60
|
}
|
|
60
61
|
|
|
61
62
|
size_t SpeechToText::getMemoryLowerBound() const noexcept {
|
|
@@ -79,16 +80,17 @@ void SpeechToText::stream(std::shared_ptr<jsi::Function> callback,
|
|
|
79
80
|
throw std::runtime_error("Streaming is already in progress");
|
|
80
81
|
}
|
|
81
82
|
|
|
82
|
-
auto nativeCallback =
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
callback->call(
|
|
88
|
-
|
|
89
|
-
|
|
83
|
+
auto nativeCallback =
|
|
84
|
+
[this, callback](const std::vector<char> &committedVec,
|
|
85
|
+
const std::vector<char> &nonCommittedVec, bool isDone) {
|
|
86
|
+
this->callInvoker->invokeAsync([callback, committedVec, nonCommittedVec,
|
|
87
|
+
isDone](jsi::Runtime &rt) {
|
|
88
|
+
callback->call(
|
|
89
|
+
rt, rnexecutorch::jsi_conversion::getJsiValue(committedVec, rt),
|
|
90
|
+
rnexecutorch::jsi_conversion::getJsiValue(nonCommittedVec, rt),
|
|
91
|
+
jsi::Value(isDone));
|
|
90
92
|
});
|
|
91
|
-
|
|
93
|
+
};
|
|
92
94
|
|
|
93
95
|
this->isStreaming = true;
|
|
94
96
|
while (this->isStreaming) {
|
|
@@ -99,12 +101,15 @@ void SpeechToText::stream(std::shared_ptr<jsi::Function> callback,
|
|
|
99
101
|
}
|
|
100
102
|
ProcessResult res =
|
|
101
103
|
this->processor->processIter(DecodingOptions(languageOption));
|
|
102
|
-
|
|
104
|
+
|
|
105
|
+
nativeCallback({res.committed.begin(), res.committed.end()},
|
|
106
|
+
{res.nonCommitted.begin(), res.nonCommitted.end()}, false);
|
|
103
107
|
this->readyToProcess = false;
|
|
104
108
|
}
|
|
105
109
|
|
|
106
110
|
std::string committed = this->processor->finish();
|
|
107
|
-
|
|
111
|
+
|
|
112
|
+
nativeCallback({committed.begin(), committed.end()}, {}, true);
|
|
108
113
|
|
|
109
114
|
this->resetStreamState();
|
|
110
115
|
}
|
|
@@ -1,6 +1,9 @@
|
|
|
1
1
|
#pragma once
|
|
2
2
|
|
|
3
3
|
#include "rnexecutorch/models/speech_to_text/stream/OnlineASRProcessor.h"
|
|
4
|
+
#include <span>
|
|
5
|
+
#include <string>
|
|
6
|
+
#include <vector>
|
|
4
7
|
|
|
5
8
|
namespace rnexecutorch {
|
|
6
9
|
|
|
@@ -16,8 +19,8 @@ public:
|
|
|
16
19
|
std::shared_ptr<OwningArrayBuffer> encode(std::span<float> waveform) const;
|
|
17
20
|
std::shared_ptr<OwningArrayBuffer>
|
|
18
21
|
decode(std::span<int32_t> tokens, std::span<float> encoderOutput) const;
|
|
19
|
-
std::
|
|
20
|
-
|
|
22
|
+
std::vector<char> transcribe(std::span<float> waveform,
|
|
23
|
+
std::string languageOption) const;
|
|
21
24
|
|
|
22
25
|
size_t getMemoryLowerBound() const noexcept;
|
|
23
26
|
|
|
@@ -43,7 +43,7 @@ export const useSpeechToText = ({
|
|
|
43
43
|
setIsGenerating(false);
|
|
44
44
|
}
|
|
45
45
|
}, [isReady, isGenerating, modelInstance]);
|
|
46
|
-
const stream = useCallback(async
|
|
46
|
+
const stream = useCallback(async options => {
|
|
47
47
|
if (!isReady) throw new Error(getError(ETError.ModuleNotLoaded));
|
|
48
48
|
if (isGenerating) throw new Error(getError(ETError.ModelGenerating));
|
|
49
49
|
setIsGenerating(true);
|
|
@@ -54,7 +54,7 @@ export const useSpeechToText = ({
|
|
|
54
54
|
for await (const {
|
|
55
55
|
committed,
|
|
56
56
|
nonCommitted
|
|
57
|
-
} of modelInstance.stream()) {
|
|
57
|
+
} of modelInstance.stream(options)) {
|
|
58
58
|
setCommittedTranscription(prev => prev + committed);
|
|
59
59
|
setNonCommittedTranscription(nonCommitted);
|
|
60
60
|
transcription += committed;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"names":["useEffect","useCallback","useState","ETError","getError","SpeechToTextModule","useSpeechToText","model","preventLoad","error","setError","isReady","setIsReady","isGenerating","setIsGenerating","downloadProgress","setDownloadProgress","modelInstance","committedTranscription","setCommittedTranscription","nonCommittedTranscription","setNonCommittedTranscription","load","isMultilingual","encoderSource","decoderSource","tokenizerSource","err","message","stateWrapper","fn","args","Error","ModuleNotLoaded","ModelGenerating","apply","stream","transcription","committed","nonCommitted","prev","wrapper","encode","prototype","decode","transcribe","streamStop","streamInsert"],"sourceRoot":"../../../../src","sources":["hooks/natural_language_processing/useSpeechToText.ts"],"mappings":";;AAAA,SAASA,SAAS,EAAEC,WAAW,EAAEC,QAAQ,QAAQ,OAAO;AACxD,SAASC,OAAO,EAAEC,QAAQ,QAAQ,aAAa;AAC/C,SAASC,kBAAkB,QAAQ,8DAA8D;AAGjG,OAAO,MAAMC,eAAe,GAAGA,CAAC;EAC9BC,KAAK;EACLC,WAAW,GAAG;AAIhB,CAAC,KAAK;EACJ,MAAM,CAACC,KAAK,EAAEC,QAAQ,CAAC,GAAGR,QAAQ,CAAgB,IAAI,CAAC;EACvD,MAAM,CAACS,OAAO,EAAEC,UAAU,CAAC,GAAGV,QAAQ,CAAC,KAAK,CAAC;EAC7C,MAAM,CAACW,YAAY,EAAEC,eAAe,CAAC,GAAGZ,QAAQ,CAAC,KAAK,CAAC;EACvD,MAAM,CAACa,gBAAgB,EAAEC,mBAAmB,CAAC,GAAGd,QAAQ,CAAC,CAAC,CAAC;EAE3D,MAAM,CAACe,aAAa,CAAC,GAAGf,QAAQ,CAAC,MAAM,IAAIG,kBAAkB,CAAC,CAAC,CAAC;EAChE,MAAM,CAACa,sBAAsB,EAAEC,yBAAyB,CAAC,GAAGjB,QAAQ,CAAC,EAAE,CAAC;EACxE,MAAM,CAACkB,yBAAyB,EAAEC,4BAA4B,CAAC,GAC7DnB,QAAQ,CAAC,EAAE,CAAC;EAEdF,SAAS,CAAC,MAAM;IACd,IAAIQ,WAAW,EAAE;IACjB,CAAC,YAAY;MACXQ,mBAAmB,CAAC,CAAC,CAAC;MACtBN,QAAQ,CAAC,IAAI,CAAC;MACd,IAAI;QACFE,UAAU,CAAC,KAAK,CAAC;QACjB,MAAMK,aAAa,CAACK,IAAI,CACtB;UACEC,cAAc,EAAEhB,KAAK,CAACgB,cAAc;UACpCC,aAAa,EAAEjB,KAAK,CAACiB,aAAa;UAClCC,aAAa,EAAElB,KAAK,CAACkB,aAAa;UAClCC,eAAe,EAAEnB,KAAK,CAACmB;QACzB,CAAC,EACDV,mBACF,CAAC;QACDJ,UAAU,CAAC,IAAI,CAAC;MAClB,CAAC,CAAC,OAAOe,GAAG,EAAE;QACZjB,QAAQ,CAAEiB,GAAG,CAAWC,OAAO,CAAC;MAClC;IACF,CAAC,EAAE,CAAC;EACN,CAAC,EAAE,CACDX,aAAa,EACbV,KAAK,CAACgB,cAAc,EACpBhB,KAAK,CAACiB,aAAa,EACnBjB,KAAK,CAACkB,aAAa,EACnBlB,KAAK,CAACmB,eAAe,EACrBlB,WAAW,CACZ,CAAC;EAEF,MAAMqB,YAAY,GAAG5B,WAAW,CACe6B,EAAK,IAChD,OAAO,GAAGC,IAAmB,KAAsC;IACjE,IAAI,CAACpB,OAAO,EAAE,MAAM,IAAIqB,KAAK,CAAC5B,QAAQ,CAACD,OAAO,CAAC8B,eAAe,CAAC,CAAC;IAChE,IAAIpB,YAAY,EAAE,MAAM,IAAImB,KAAK,CAAC5B,QAAQ,CAACD,OAAO,CAAC+B,eAAe,CAAC,CAAC;IACpEpB,eAAe,CAAC,IAAI,CAAC;IACrB,IAAI;MACF,OAAO,MAAMgB,EAAE,CAACK,KAAK,CAAClB,aAAa,EAAEc,IAAI,CAAC;IAC5C,CAAC,SAAS;MACRjB,eAAe,CAAC,KAAK,CAAC;IACxB;EACF,CAAC,EACH,CAACH,OAAO,EAAEE,YAAY,EAAEI,aAAa,CACvC,CAAC;EAED,MAAMmB,MAAM,GAAGnC,WAAW,
|
|
1
|
+
{"version":3,"names":["useEffect","useCallback","useState","ETError","getError","SpeechToTextModule","useSpeechToText","model","preventLoad","error","setError","isReady","setIsReady","isGenerating","setIsGenerating","downloadProgress","setDownloadProgress","modelInstance","committedTranscription","setCommittedTranscription","nonCommittedTranscription","setNonCommittedTranscription","load","isMultilingual","encoderSource","decoderSource","tokenizerSource","err","message","stateWrapper","fn","args","Error","ModuleNotLoaded","ModelGenerating","apply","stream","options","transcription","committed","nonCommitted","prev","wrapper","encode","prototype","decode","transcribe","streamStop","streamInsert"],"sourceRoot":"../../../../src","sources":["hooks/natural_language_processing/useSpeechToText.ts"],"mappings":";;AAAA,SAASA,SAAS,EAAEC,WAAW,EAAEC,QAAQ,QAAQ,OAAO;AACxD,SAASC,OAAO,EAAEC,QAAQ,QAAQ,aAAa;AAC/C,SAASC,kBAAkB,QAAQ,8DAA8D;AAGjG,OAAO,MAAMC,eAAe,GAAGA,CAAC;EAC9BC,KAAK;EACLC,WAAW,GAAG;AAIhB,CAAC,KAAK;EACJ,MAAM,CAACC,KAAK,EAAEC,QAAQ,CAAC,GAAGR,QAAQ,CAAgB,IAAI,CAAC;EACvD,MAAM,CAACS,OAAO,EAAEC,UAAU,CAAC,GAAGV,QAAQ,CAAC,KAAK,CAAC;EAC7C,MAAM,CAACW,YAAY,EAAEC,eAAe,CAAC,GAAGZ,QAAQ,CAAC,KAAK,CAAC;EACvD,MAAM,CAACa,gBAAgB,EAAEC,mBAAmB,CAAC,GAAGd,QAAQ,CAAC,CAAC,CAAC;EAE3D,MAAM,CAACe,aAAa,CAAC,GAAGf,QAAQ,CAAC,MAAM,IAAIG,kBAAkB,CAAC,CAAC,CAAC;EAChE,MAAM,CAACa,sBAAsB,EAAEC,yBAAyB,CAAC,GAAGjB,QAAQ,CAAC,EAAE,CAAC;EACxE,MAAM,CAACkB,yBAAyB,EAAEC,4BAA4B,CAAC,GAC7DnB,QAAQ,CAAC,EAAE,CAAC;EAEdF,SAAS,CAAC,MAAM;IACd,IAAIQ,WAAW,EAAE;IACjB,CAAC,YAAY;MACXQ,mBAAmB,CAAC,CAAC,CAAC;MACtBN,QAAQ,CAAC,IAAI,CAAC;MACd,IAAI;QACFE,UAAU,CAAC,KAAK,CAAC;QACjB,MAAMK,aAAa,CAACK,IAAI,CACtB;UACEC,cAAc,EAAEhB,KAAK,CAACgB,cAAc;UACpCC,aAAa,EAAEjB,KAAK,CAACiB,aAAa;UAClCC,aAAa,EAAElB,KAAK,CAACkB,aAAa;UAClCC,eAAe,EAAEnB,KAAK,CAACmB;QACzB,CAAC,EACDV,mBACF,CAAC;QACDJ,UAAU,CAAC,IAAI,CAAC;MAClB,CAAC,CAAC,OAAOe,GAAG,EAAE;QACZjB,QAAQ,CAAEiB,GAAG,CAAWC,OAAO,CAAC;MAClC;IACF,CAAC,EAAE,CAAC;EACN,CAAC,EAAE,CACDX,aAAa,EACbV,KAAK,CAACgB,cAAc,EACpBhB,KAAK,CAACiB,aAAa,EACnBjB,KAAK,CAACkB,aAAa,EACnBlB,KAAK,CAACmB,eAAe,EACrBlB,WAAW,CACZ,CAAC;EAEF,MAAMqB,YAAY,GAAG5B,WAAW,CACe6B,EAAK,IAChD,OAAO,GAAGC,IAAmB,KAAsC;IACjE,IAAI,CAACpB,OAAO,EAAE,MAAM,IAAIqB,KAAK,CAAC5B,QAAQ,CAACD,OAAO,CAAC8B,eAAe,CAAC,CAAC;IAChE,IAAIpB,YAAY,EAAE,MAAM,IAAImB,KAAK,CAAC5B,QAAQ,CAACD,OAAO,CAAC+B,eAAe,CAAC,CAAC;IACpEpB,eAAe,CAAC,IAAI,CAAC;IACrB,IAAI;MACF,OAAO,MAAMgB,EAAE,CAACK,KAAK,CAAClB,aAAa,EAAEc,IAAI,CAAC;IAC5C,CAAC,SAAS;MACRjB,eAAe,CAAC,KAAK,CAAC;IACxB;EACF,CAAC,EACH,CAACH,OAAO,EAAEE,YAAY,EAAEI,aAAa,CACvC,CAAC;EAED,MAAMmB,MAAM,GAAGnC,WAAW,CACxB,MAAOoC,OAAyB,IAAK;IACnC,IAAI,CAAC1B,OAAO,EAAE,MAAM,IAAIqB,KAAK,CAAC5B,QAAQ,CAACD,OAAO,CAAC8B,eAAe,CAAC,CAAC;IAChE,IAAIpB,YAAY,EAAE,MAAM,IAAImB,KAAK,CAAC5B,QAAQ,CAACD,OAAO,CAAC+B,eAAe,CAAC,CAAC;IACpEpB,eAAe,CAAC,IAAI,CAAC;IACrBK,yBAAyB,CAAC,EAAE,CAAC;IAC7BE,4BAA4B,CAAC,EAAE,CAAC;IAChC,IAAIiB,aAAa,GAAG,EAAE;IACtB,IAAI;MACF,WAAW,MAAM;QAAEC,SAAS;QAAEC;MAAa,CAAC,IAAIvB,aAAa,CAACmB,MAAM,CAClEC,OACF,CAAC,EAAE;QACDlB,yBAAyB,CAAEsB,IAAI,IAAKA,IAAI,GAAGF,SAAS,CAAC;QACrDlB,4BAA4B,CAACmB,YAAY,CAAC;QAC1CF,aAAa,IAAIC,SAAS;MAC5B;IACF,CAAC,SAAS;MACRzB,eAAe,CAAC,KAAK,CAAC;IACxB;IACA,OAAOwB,aAAa;EACtB,CAAC,EACD,CAAC3B,OAAO,EAAEE,YAAY,EAAEI,aAAa,CACvC,CAAC;EAED,MAAMyB,OAAO,GAAGzC,WAAW,CACW6B,EAAK,IAAK;IAC5C,OAAO,CAAC,GAAGC,IAAmB,KAAoB;MAChD,IAAI,CAACpB,OAAO,EAAE,MAAM,IAAIqB,KAAK,CAAC5B,QAAQ,CAACD,OAAO,CAAC8B,eAAe,CAAC,CAAC;MAChE,OAAOH,EAAE,CAACK,KAAK,CAAClB,aAAa,EAAEc,IAAI,CAAC;IACtC,CAAC;EACH,CAAC,EACD,CAACpB,OAAO,EAAEM,aAAa,CACzB,CAAC;EAED,OAAO;IACLR,KAAK;IACLE,OAAO;IACPE,YAAY;IACZE,gBAAgB;IAChBG,sBAAsB;IACtBE,yBAAyB;IACzBuB,MAAM,EAAEd,YAAY,CAACxB,kBAAkB,CAACuC,SAAS,CAACD,MAAM,CAAC;IACzDE,MAAM,EAAEhB,YAAY,CAACxB,kBAAkB,CAACuC,SAAS,CAACC,MAAM,CAAC;IACzDC,UAAU,EAAEjB,YAAY,CAACxB,kBAAkB,CAACuC,SAAS,CAACE,UAAU,CAAC;IACjEV,MAAM;IACNW,UAAU,EAAEL,OAAO,CAACrC,kBAAkB,CAACuC,SAAS,CAACG,UAAU,CAAC;IAC5DC,YAAY,EAAEN,OAAO,CAACrC,kBAAkB,CAACuC,SAAS,CAACI,YAAY;EACjE,CAAC;AACH,CAAC","ignoreList":[]}
|
|
@@ -3,6 +3,10 @@
|
|
|
3
3
|
import { Logger } from '../../common/Logger';
|
|
4
4
|
import { ResourceFetcher } from '../../utils/ResourceFetcher';
|
|
5
5
|
export class SpeechToTextModule {
|
|
6
|
+
textDecoder = new TextDecoder('utf-8', {
|
|
7
|
+
fatal: false,
|
|
8
|
+
ignoreBOM: true
|
|
9
|
+
});
|
|
6
10
|
async load(model, onDownloadProgressCallback = () => {}) {
|
|
7
11
|
this.modelConfig = model;
|
|
8
12
|
const tokenizerLoadPromise = ResourceFetcher.fetch(undefined, model.tokenizerSource);
|
|
@@ -39,7 +43,8 @@ export class SpeechToTextModule {
|
|
|
39
43
|
Logger.info('Passing waveform as number[] is deprecated, use Float32Array instead');
|
|
40
44
|
waveform = new Float32Array(waveform);
|
|
41
45
|
}
|
|
42
|
-
|
|
46
|
+
const transcriptionBytes = await this.nativeModule.transcribe(waveform, options.language || '');
|
|
47
|
+
return this.textDecoder.decode(new Uint8Array(transcriptionBytes));
|
|
43
48
|
}
|
|
44
49
|
async *stream(options = {}) {
|
|
45
50
|
this.validateOptions(options);
|
|
@@ -55,8 +60,8 @@ export class SpeechToTextModule {
|
|
|
55
60
|
try {
|
|
56
61
|
await this.nativeModule.stream((committed, nonCommitted, isDone) => {
|
|
57
62
|
queue.push({
|
|
58
|
-
committed,
|
|
59
|
-
nonCommitted
|
|
63
|
+
committed: this.textDecoder.decode(new Uint8Array(committed)),
|
|
64
|
+
nonCommitted: this.textDecoder.decode(new Uint8Array(nonCommitted))
|
|
60
65
|
});
|
|
61
66
|
if (isDone) {
|
|
62
67
|
finished = true;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"names":["Logger","ResourceFetcher","SpeechToTextModule","load","model","onDownloadProgressCallback","modelConfig","tokenizerLoadPromise","fetch","undefined","tokenizerSource","encoderDecoderPromise","encoderSource","decoderSource","tokenizerSources","encoderDecoderResults","Promise","all","Error","nativeModule","global","loadSpeechToText","encode","waveform","Array","isArray","info","Float32Array","decode","tokens","encoderOutput","Int32Array","transcribe","options","validateOptions","language","stream","queue","waiter","finished","error","wake","committed","nonCommitted","isDone","push","e","length","shift","r","streamInsert","streamStop","isMultilingual"],"sourceRoot":"../../../../src","sources":["modules/natural_language_processing/SpeechToTextModule.ts"],"mappings":";;AAAA,SAASA,MAAM,QAAQ,qBAAqB;AAE5C,SAASC,eAAe,QAAQ,6BAA6B;AAE7D,OAAO,MAAMC,kBAAkB,CAAC;
|
|
1
|
+
{"version":3,"names":["Logger","ResourceFetcher","SpeechToTextModule","textDecoder","TextDecoder","fatal","ignoreBOM","load","model","onDownloadProgressCallback","modelConfig","tokenizerLoadPromise","fetch","undefined","tokenizerSource","encoderDecoderPromise","encoderSource","decoderSource","tokenizerSources","encoderDecoderResults","Promise","all","Error","nativeModule","global","loadSpeechToText","encode","waveform","Array","isArray","info","Float32Array","decode","tokens","encoderOutput","Int32Array","transcribe","options","validateOptions","transcriptionBytes","language","Uint8Array","stream","queue","waiter","finished","error","wake","committed","nonCommitted","isDone","push","e","length","shift","r","streamInsert","streamStop","isMultilingual"],"sourceRoot":"../../../../src","sources":["modules/natural_language_processing/SpeechToTextModule.ts"],"mappings":";;AAAA,SAASA,MAAM,QAAQ,qBAAqB;AAE5C,SAASC,eAAe,QAAQ,6BAA6B;AAE7D,OAAO,MAAMC,kBAAkB,CAAC;EAKtBC,WAAW,GAAG,IAAIC,WAAW,CAAC,OAAO,EAAE;IAC7CC,KAAK,EAAE,KAAK;IACZC,SAAS,EAAE;EACb,CAAC,CAAC;EAEF,MAAaC,IAAIA,CACfC,KAA8B,EAC9BC,0BAAsD,GAAGA,CAAA,KAAM,CAAC,CAAC,EACjE;IACA,IAAI,CAACC,WAAW,GAAGF,KAAK;IAExB,MAAMG,oBAAoB,GAAGV,eAAe,CAACW,KAAK,CAChDC,SAAS,EACTL,KAAK,CAACM,eACR,CAAC;IACD,MAAMC,qBAAqB,GAAGd,eAAe,CAACW,KAAK,CACjDH,0BAA0B,EAC1BD,KAAK,CAACQ,aAAa,EACnBR,KAAK,CAACS,aACR,CAAC;IACD,MAAM,CAACC,gBAAgB,EAAEC,qBAAqB,CAAC,GAAG,MAAMC,OAAO,CAACC,GAAG,CAAC,CAClEV,oBAAoB,EACpBI,qBAAqB,CACtB,CAAC;IACF,MAAMC,aAAa,GAAGG,qBAAqB,GAAG,CAAC,CAAC;IAChD,MAAMF,aAAa,GAAGE,qBAAqB,GAAG,CAAC,CAAC;IAChD,IAAI,CAACH,aAAa,IAAI,CAACC,aAAa,IAAI,CAACC,gBAAgB,EAAE;MACzD,MAAM,IAAII,KAAK,CAAC,uBAAuB,CAAC;IAC1C;IACA,IAAI,CAACC,YAAY,GAAG,MAAMC,MAAM,CAACC,gBAAgB,CAC/CT,aAAa,EACbC,aAAa,EACbC,gBAAgB,CAAC,CAAC,CACpB,CAAC;EACH;EAEA,MAAaQ,MAAMA,CACjBC,QAAiC,EACV;IACvB,IAAIC,KAAK,CAACC,OAAO,CAACF,QAAQ,CAAC,EAAE;MAC3B3B,MAAM,CAAC8B,IAAI,CACT,sEACF,CAAC;MACDH,QAAQ,GAAG,IAAII,YAAY,CAACJ,QAAQ,CAAC;IACvC;IACA,OAAO,IAAII,YAAY,CAAC,MAAM,IAAI,CAACR,YAAY,CAACG,MAAM,CAACC,QAAQ,CAAC,CAAC;EACnE;EAEA,MAAaK,MAAMA,CACjBC,MAA6B,EAC7BC,aAAsC,EACf;IACvB,IAAIN,KAAK,CAACC,OAAO,CAACI,MAAM,CAAC,EAAE;MACzBjC,MAAM,CAAC8B,IAAI,CACT,kEACF,CAAC;MACDG,MAAM,GAAG,IAAIE,UAAU,CAACF,MAAM,CAAC;IACjC;IACA,IAAIL,KAAK,CAACC,OAAO,CAACK,aAAa,CAAC,EAAE;MAChClC,MAAM,CAAC8B,IAAI,CACT,2EACF,CAAC;MACDI,aAAa,GAAG,IAAIH,YAAY,CAACG,aAAa,CAAC;IACjD;IACA,OAAO,IAAIH,YAAY,CACrB,MAAM,IAAI,CAACR,YAAY,CAACS,MAAM,CAACC,MAAM,EAAEC,aAAa,CACtD,CAAC;EACH;EAEA,MAAaE,UAAUA,CACrBT,QAAiC,EACjCU,OAAwB,GAAG,CAAC,CAAC,EACZ;IACjB,IAAI,CAACC,eAAe,CAACD,OAAO,CAAC;IAE7B,IAAIT,KAAK,CAACC,OAAO,CAACF,QAAQ,CAAC,EAAE;MAC3B3B,MAAM,CAAC8B,IAAI,CACT,sEACF,CAAC;MACDH,QAAQ,GAAG,IAAII,YAAY,CAACJ,QAAQ,CAAC;IACvC;IACA,MAAMY,kBAAkB,GAAG,MAAM,IAAI,CAAChB,YAAY,CAACa,UAAU,CAC3DT,QAAQ,EACRU,OAAO,CAACG,QAAQ,IAAI,EACtB,CAAC;IACD,OAAO,IAAI,CAACrC,WAAW,CAAC6B,MAAM,CAAC,IAAIS,UAAU,CAACF,kBAAkB,CAAC,CAAC;EACpE;EAEA,OAAcG,MAAMA,CAClBL,OAAwB,GAAG,CAAC,CAAC,EACgC;IAC7D,IAAI,CAACC,eAAe,CAACD,OAAO,CAAC;IAE7B,MAAMM,KAAoD,GAAG,EAAE;IAC/D,IAAIC,MAA2B,GAAG,IAAI;IACtC,IAAIC,QAAQ,GAAG,KAAK;IACpB,IAAIC,KAAc;IAElB,MAAMC,IAAI,GAAGA,CAAA,KAAM;MACjBH,MAAM,GAAG,CAAC;MACVA,MAAM,GAAG,IAAI;IACf,CAAC;IAED,CAAC,YAAY;MACX,IAAI;QACF,MAAM,IAAI,CAACrB,YAAY,CAACmB,MAAM,CAC5B,CAACM,SAAmB,EAAEC,YAAsB,EAAEC,MAAe,KAAK;UAChEP,KAAK,CAACQ,IAAI,CAAC;YACTH,SAAS,EAAE,IAAI,CAAC7C,WAAW,CAAC6B,MAAM,CAAC,IAAIS,UAAU,CAACO,SAAS,CAAC,CAAC;YAC7DC,YAAY,EAAE,IAAI,CAAC9C,WAAW,CAAC6B,MAAM,CACnC,IAAIS,UAAU,CAACQ,YAAY,CAC7B;UACF,CAAC,CAAC;UACF,IAAIC,MAAM,EAAE;YACVL,QAAQ,GAAG,IAAI;UACjB;UACAE,IAAI,CAAC,CAAC;QACR,CAAC,EACDV,OAAO,CAACG,QAAQ,IAAI,EACtB,CAAC;QACDK,QAAQ,GAAG,IAAI;QACfE,IAAI,CAAC,CAAC;MACR,CAAC,CAAC,OAAOK,CAAC,EAAE;QACVN,KAAK,GAAGM,CAAC;QACTP,QAAQ,GAAG,IAAI;QACfE,IAAI,CAAC,CAAC;MACR;IACF,CAAC,EAAE,CAAC;IAEJ,OAAO,IAAI,EAAE;MACX,IAAIJ,KAAK,CAACU,MAAM,GAAG,CAAC,EAAE;QACpB,MAAMV,KAAK,CAACW,KAAK,CAAC,CAAE;QACpB,IAAIT,QAAQ,IAAIF,KAAK,CAACU,MAAM,KAAK,CAAC,EAAE;UAClC;QACF;QACA;MACF;MACA,IAAIP,KAAK,EAAE,MAAMA,KAAK;MACtB,IAAID,QAAQ,EAAE;MACd,MAAM,IAAIzB,OAAO,CAAQmC,CAAC,IAAMX,MAAM,GAAGW,CAAE,CAAC;IAC9C;EACF;EAEA,MAAaC,YAAYA,CAAC7B,QAAiC,EAAiB;IAC1E,IAAIC,KAAK,CAACC,OAAO,CAACF,QAAQ,CAAC,EAAE;MAC3B3B,MAAM,CAAC8B,IAAI,CACT,sEACF,CAAC;MACDH,QAAQ,GAAG,IAAII,YAAY,CAACJ,QAAQ,CAAC;IACvC;IACA,OAAO,IAAI,CAACJ,YAAY,CAACiC,YAAY,CAAC7B,QAAQ,CAAC;EACjD;EAEA,MAAa8B,UAAUA,CAAA,EAAkB;IACvC,OAAO,IAAI,CAAClC,YAAY,CAACkC,UAAU,CAAC,CAAC;EACvC;EAEQnB,eAAeA,CAACD,OAAwB,EAAE;IAChD,IAAI,CAAC,IAAI,CAAC3B,WAAW,CAACgD,cAAc,IAAIrB,OAAO,CAACG,QAAQ,EAAE;MACxD,MAAM,IAAIlB,KAAK,CAAC,gDAAgD,CAAC;IACnE;IACA,IAAI,IAAI,CAACZ,WAAW,CAACgD,cAAc,IAAI,CAACrB,OAAO,CAACG,QAAQ,EAAE;MACxD,MAAM,IAAIlB,KAAK,CAAC,2CAA2C,CAAC;IAC9D;EACF;AACF","ignoreList":[]}
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { SpeechToTextModelConfig } from '../../types/stt';
|
|
1
|
+
import { DecodingOptions, SpeechToTextModelConfig } from '../../types/stt';
|
|
2
2
|
export declare const useSpeechToText: ({ model, preventLoad, }: {
|
|
3
3
|
model: SpeechToTextModelConfig;
|
|
4
4
|
preventLoad?: boolean;
|
|
@@ -11,8 +11,8 @@ export declare const useSpeechToText: ({ model, preventLoad, }: {
|
|
|
11
11
|
nonCommittedTranscription: string;
|
|
12
12
|
encode: (waveform: number[] | Float32Array<ArrayBufferLike>) => Promise<Float32Array<ArrayBufferLike>>;
|
|
13
13
|
decode: (tokens: number[] | Int32Array<ArrayBufferLike>, encoderOutput: number[] | Float32Array<ArrayBufferLike>) => Promise<Float32Array<ArrayBufferLike>>;
|
|
14
|
-
transcribe: (waveform: number[] | Float32Array<ArrayBufferLike>, options?:
|
|
15
|
-
stream: () => Promise<string>;
|
|
14
|
+
transcribe: (waveform: number[] | Float32Array<ArrayBufferLike>, options?: DecodingOptions | undefined) => Promise<string>;
|
|
15
|
+
stream: (options?: DecodingOptions) => Promise<string>;
|
|
16
16
|
streamStop: () => Promise<void>;
|
|
17
17
|
streamInsert: (waveform: number[] | Float32Array<ArrayBufferLike>) => Promise<void>;
|
|
18
18
|
};
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"useSpeechToText.d.ts","sourceRoot":"","sources":["../../../../src/hooks/natural_language_processing/useSpeechToText.ts"],"names":[],"mappings":"AAGA,OAAO,EAAE,uBAAuB,EAAE,MAAM,iBAAiB,CAAC;
|
|
1
|
+
{"version":3,"file":"useSpeechToText.d.ts","sourceRoot":"","sources":["../../../../src/hooks/natural_language_processing/useSpeechToText.ts"],"names":[],"mappings":"AAGA,OAAO,EAAE,eAAe,EAAE,uBAAuB,EAAE,MAAM,iBAAiB,CAAC;AAE3E,eAAO,MAAM,eAAe,GAAI,yBAG7B;IACD,KAAK,EAAE,uBAAuB,CAAC;IAC/B,WAAW,CAAC,EAAE,OAAO,CAAC;CACvB;;;;;;;;;;uBAyDoB,eAAe;;;CA+CnC,CAAC"}
|
|
@@ -2,6 +2,7 @@ import { DecodingOptions, SpeechToTextModelConfig } from '../../types/stt';
|
|
|
2
2
|
export declare class SpeechToTextModule {
|
|
3
3
|
private nativeModule;
|
|
4
4
|
private modelConfig;
|
|
5
|
+
private textDecoder;
|
|
5
6
|
load(model: SpeechToTextModelConfig, onDownloadProgressCallback?: (progress: number) => void): Promise<void>;
|
|
6
7
|
encode(waveform: Float32Array | number[]): Promise<Float32Array>;
|
|
7
8
|
decode(tokens: Int32Array | number[], encoderOutput: Float32Array | number[]): Promise<Float32Array>;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"SpeechToTextModule.d.ts","sourceRoot":"","sources":["../../../../src/modules/natural_language_processing/SpeechToTextModule.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,eAAe,EAAE,uBAAuB,EAAE,MAAM,iBAAiB,CAAC;AAG3E,qBAAa,kBAAkB;IAC7B,OAAO,CAAC,YAAY,CAAM;IAE1B,OAAO,CAAC,WAAW,CAA2B;
|
|
1
|
+
{"version":3,"file":"SpeechToTextModule.d.ts","sourceRoot":"","sources":["../../../../src/modules/natural_language_processing/SpeechToTextModule.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,eAAe,EAAE,uBAAuB,EAAE,MAAM,iBAAiB,CAAC;AAG3E,qBAAa,kBAAkB;IAC7B,OAAO,CAAC,YAAY,CAAM;IAE1B,OAAO,CAAC,WAAW,CAA2B;IAE9C,OAAO,CAAC,WAAW,CAGhB;IAEU,IAAI,CACf,KAAK,EAAE,uBAAuB,EAC9B,0BAA0B,GAAE,CAAC,QAAQ,EAAE,MAAM,KAAK,IAAe;IA6BtD,MAAM,CACjB,QAAQ,EAAE,YAAY,GAAG,MAAM,EAAE,GAChC,OAAO,CAAC,YAAY,CAAC;IAUX,MAAM,CACjB,MAAM,EAAE,UAAU,GAAG,MAAM,EAAE,EAC7B,aAAa,EAAE,YAAY,GAAG,MAAM,EAAE,GACrC,OAAO,CAAC,YAAY,CAAC;IAkBX,UAAU,CACrB,QAAQ,EAAE,YAAY,GAAG,MAAM,EAAE,EACjC,OAAO,GAAE,eAAoB,GAC5B,OAAO,CAAC,MAAM,CAAC;IAgBJ,MAAM,CAClB,OAAO,GAAE,eAAoB,GAC5B,cAAc,CAAC;QAAE,SAAS,EAAE,MAAM,CAAC;QAAC,YAAY,EAAE,MAAM,CAAA;KAAE,CAAC;IAqDjD,YAAY,CAAC,QAAQ,EAAE,YAAY,GAAG,MAAM,EAAE,GAAG,OAAO,CAAC,IAAI,CAAC;IAU9D,UAAU,IAAI,OAAO,CAAC,IAAI,CAAC;IAIxC,OAAO,CAAC,eAAe;CAQxB"}
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import { useEffect, useCallback, useState } from 'react';
|
|
2
2
|
import { ETError, getError } from '../../Error';
|
|
3
3
|
import { SpeechToTextModule } from '../../modules/natural_language_processing/SpeechToTextModule';
|
|
4
|
-
import { SpeechToTextModelConfig } from '../../types/stt';
|
|
4
|
+
import { DecodingOptions, SpeechToTextModelConfig } from '../../types/stt';
|
|
5
5
|
|
|
6
6
|
export const useSpeechToText = ({
|
|
7
7
|
model,
|
|
@@ -65,24 +65,29 @@ export const useSpeechToText = ({
|
|
|
65
65
|
[isReady, isGenerating, modelInstance]
|
|
66
66
|
);
|
|
67
67
|
|
|
68
|
-
const stream = useCallback(
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
68
|
+
const stream = useCallback(
|
|
69
|
+
async (options?: DecodingOptions) => {
|
|
70
|
+
if (!isReady) throw new Error(getError(ETError.ModuleNotLoaded));
|
|
71
|
+
if (isGenerating) throw new Error(getError(ETError.ModelGenerating));
|
|
72
|
+
setIsGenerating(true);
|
|
73
|
+
setCommittedTranscription('');
|
|
74
|
+
setNonCommittedTranscription('');
|
|
75
|
+
let transcription = '';
|
|
76
|
+
try {
|
|
77
|
+
for await (const { committed, nonCommitted } of modelInstance.stream(
|
|
78
|
+
options
|
|
79
|
+
)) {
|
|
80
|
+
setCommittedTranscription((prev) => prev + committed);
|
|
81
|
+
setNonCommittedTranscription(nonCommitted);
|
|
82
|
+
transcription += committed;
|
|
83
|
+
}
|
|
84
|
+
} finally {
|
|
85
|
+
setIsGenerating(false);
|
|
80
86
|
}
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
}, [isReady, isGenerating, modelInstance]);
|
|
87
|
+
return transcription;
|
|
88
|
+
},
|
|
89
|
+
[isReady, isGenerating, modelInstance]
|
|
90
|
+
);
|
|
86
91
|
|
|
87
92
|
const wrapper = useCallback(
|
|
88
93
|
<T extends (...args: any[]) => any>(fn: T) => {
|
|
@@ -7,6 +7,11 @@ export class SpeechToTextModule {
|
|
|
7
7
|
|
|
8
8
|
private modelConfig!: SpeechToTextModelConfig;
|
|
9
9
|
|
|
10
|
+
private textDecoder = new TextDecoder('utf-8', {
|
|
11
|
+
fatal: false,
|
|
12
|
+
ignoreBOM: true,
|
|
13
|
+
});
|
|
14
|
+
|
|
10
15
|
public async load(
|
|
11
16
|
model: SpeechToTextModelConfig,
|
|
12
17
|
onDownloadProgressCallback: (progress: number) => void = () => {}
|
|
@@ -83,8 +88,11 @@ export class SpeechToTextModule {
|
|
|
83
88
|
);
|
|
84
89
|
waveform = new Float32Array(waveform);
|
|
85
90
|
}
|
|
86
|
-
|
|
87
|
-
|
|
91
|
+
const transcriptionBytes = await this.nativeModule.transcribe(
|
|
92
|
+
waveform,
|
|
93
|
+
options.language || ''
|
|
94
|
+
);
|
|
95
|
+
return this.textDecoder.decode(new Uint8Array(transcriptionBytes));
|
|
88
96
|
}
|
|
89
97
|
|
|
90
98
|
public async *stream(
|
|
@@ -105,8 +113,13 @@ export class SpeechToTextModule {
|
|
|
105
113
|
(async () => {
|
|
106
114
|
try {
|
|
107
115
|
await this.nativeModule.stream(
|
|
108
|
-
(committed:
|
|
109
|
-
queue.push({
|
|
116
|
+
(committed: number[], nonCommitted: number[], isDone: boolean) => {
|
|
117
|
+
queue.push({
|
|
118
|
+
committed: this.textDecoder.decode(new Uint8Array(committed)),
|
|
119
|
+
nonCommitted: this.textDecoder.decode(
|
|
120
|
+
new Uint8Array(nonCommitted)
|
|
121
|
+
),
|
|
122
|
+
});
|
|
110
123
|
if (isDone) {
|
|
111
124
|
finished = true;
|
|
112
125
|
}
|