react-native-executorch 0.5.11 → 0.5.13
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/common/rnexecutorch/host_objects/JsiConversions.h +19 -8
- package/common/rnexecutorch/models/speech_to_text/SpeechToText.cpp +19 -14
- package/common/rnexecutorch/models/speech_to_text/SpeechToText.h +5 -2
- package/lib/module/controllers/LLMController.js +1 -1
- package/lib/module/controllers/LLMController.js.map +1 -1
- package/lib/module/modules/natural_language_processing/SpeechToTextModule.js +8 -3
- package/lib/module/modules/natural_language_processing/SpeechToTextModule.js.map +1 -1
- package/lib/typescript/modules/natural_language_processing/SpeechToTextModule.d.ts +1 -0
- package/lib/typescript/modules/natural_language_processing/SpeechToTextModule.d.ts.map +1 -1
- package/package.json +1 -1
- package/src/controllers/LLMController.ts +1 -1
- package/src/modules/natural_language_processing/SpeechToTextModule.ts +17 -4
|
@@ -66,7 +66,8 @@ inline JSTensorViewIn getValue<JSTensorViewIn>(const jsi::Value &val,
|
|
|
66
66
|
tensorView.sizes.reserve(numShapeDims);
|
|
67
67
|
|
|
68
68
|
for (size_t i = 0; i < numShapeDims; ++i) {
|
|
69
|
-
int32_t dim =
|
|
69
|
+
int32_t dim =
|
|
70
|
+
getValue<int32_t>(shapeArray.getValueAtIndex(runtime, i), runtime);
|
|
70
71
|
tensorView.sizes.push_back(dim);
|
|
71
72
|
}
|
|
72
73
|
|
|
@@ -173,23 +174,24 @@ inline std::vector<T> getArrayAsVector(const jsi::Value &val,
|
|
|
173
174
|
return result;
|
|
174
175
|
}
|
|
175
176
|
|
|
176
|
-
|
|
177
177
|
// Template specializations for std::vector<T> types
|
|
178
178
|
template <>
|
|
179
|
-
inline std::vector<JSTensorViewIn>
|
|
180
|
-
|
|
179
|
+
inline std::vector<JSTensorViewIn>
|
|
180
|
+
getValue<std::vector<JSTensorViewIn>>(const jsi::Value &val,
|
|
181
|
+
jsi::Runtime &runtime) {
|
|
181
182
|
return getArrayAsVector<JSTensorViewIn>(val, runtime);
|
|
182
183
|
}
|
|
183
184
|
|
|
184
185
|
template <>
|
|
185
|
-
inline std::vector<std::string>
|
|
186
|
-
|
|
186
|
+
inline std::vector<std::string>
|
|
187
|
+
getValue<std::vector<std::string>>(const jsi::Value &val,
|
|
188
|
+
jsi::Runtime &runtime) {
|
|
187
189
|
return getArrayAsVector<std::string>(val, runtime);
|
|
188
190
|
}
|
|
189
191
|
|
|
190
192
|
template <>
|
|
191
|
-
inline std::vector<int32_t>
|
|
192
|
-
|
|
193
|
+
inline std::vector<int32_t>
|
|
194
|
+
getValue<std::vector<int32_t>>(const jsi::Value &val, jsi::Runtime &runtime) {
|
|
193
195
|
return getArrayAsVector<int32_t>(val, runtime);
|
|
194
196
|
}
|
|
195
197
|
|
|
@@ -280,6 +282,15 @@ inline jsi::Value getJsiValue(const std::vector<int32_t> &vec,
|
|
|
280
282
|
return {runtime, array};
|
|
281
283
|
}
|
|
282
284
|
|
|
285
|
+
inline jsi::Value getJsiValue(const std::vector<char> &vec,
|
|
286
|
+
jsi::Runtime &runtime) {
|
|
287
|
+
jsi::Array array(runtime, vec.size());
|
|
288
|
+
for (size_t i = 0; i < vec.size(); i++) {
|
|
289
|
+
array.setValueAtIndex(runtime, i, jsi::Value(vec[i]));
|
|
290
|
+
}
|
|
291
|
+
return {runtime, array};
|
|
292
|
+
}
|
|
293
|
+
|
|
283
294
|
inline jsi::Value getJsiValue(int val, jsi::Runtime &runtime) {
|
|
284
295
|
return {runtime, val};
|
|
285
296
|
}
|
|
@@ -36,8 +36,8 @@ SpeechToText::decode(std::span<int32_t> tokens,
|
|
|
36
36
|
return this->makeOwningBuffer(decoderOutput);
|
|
37
37
|
}
|
|
38
38
|
|
|
39
|
-
std::
|
|
40
|
-
|
|
39
|
+
std::vector<char> SpeechToText::transcribe(std::span<float> waveform,
|
|
40
|
+
std::string languageOption) const {
|
|
41
41
|
std::vector<Segment> segments =
|
|
42
42
|
this->asr->transcribe(waveform, DecodingOptions(languageOption));
|
|
43
43
|
std::string transcription;
|
|
@@ -55,7 +55,8 @@ std::string SpeechToText::transcribe(std::span<float> waveform,
|
|
|
55
55
|
transcription += word.content;
|
|
56
56
|
}
|
|
57
57
|
}
|
|
58
|
-
|
|
58
|
+
|
|
59
|
+
return {transcription.begin(), transcription.end()};
|
|
59
60
|
}
|
|
60
61
|
|
|
61
62
|
size_t SpeechToText::getMemoryLowerBound() const noexcept {
|
|
@@ -79,16 +80,17 @@ void SpeechToText::stream(std::shared_ptr<jsi::Function> callback,
|
|
|
79
80
|
throw std::runtime_error("Streaming is already in progress");
|
|
80
81
|
}
|
|
81
82
|
|
|
82
|
-
auto nativeCallback =
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
callback->call(
|
|
88
|
-
|
|
89
|
-
|
|
83
|
+
auto nativeCallback =
|
|
84
|
+
[this, callback](const std::vector<char> &committedVec,
|
|
85
|
+
const std::vector<char> &nonCommittedVec, bool isDone) {
|
|
86
|
+
this->callInvoker->invokeAsync([callback, committedVec, nonCommittedVec,
|
|
87
|
+
isDone](jsi::Runtime &rt) {
|
|
88
|
+
callback->call(
|
|
89
|
+
rt, rnexecutorch::jsi_conversion::getJsiValue(committedVec, rt),
|
|
90
|
+
rnexecutorch::jsi_conversion::getJsiValue(nonCommittedVec, rt),
|
|
91
|
+
jsi::Value(isDone));
|
|
90
92
|
});
|
|
91
|
-
|
|
93
|
+
};
|
|
92
94
|
|
|
93
95
|
this->isStreaming = true;
|
|
94
96
|
while (this->isStreaming) {
|
|
@@ -99,12 +101,15 @@ void SpeechToText::stream(std::shared_ptr<jsi::Function> callback,
|
|
|
99
101
|
}
|
|
100
102
|
ProcessResult res =
|
|
101
103
|
this->processor->processIter(DecodingOptions(languageOption));
|
|
102
|
-
|
|
104
|
+
|
|
105
|
+
nativeCallback({res.committed.begin(), res.committed.end()},
|
|
106
|
+
{res.nonCommitted.begin(), res.nonCommitted.end()}, false);
|
|
103
107
|
this->readyToProcess = false;
|
|
104
108
|
}
|
|
105
109
|
|
|
106
110
|
std::string committed = this->processor->finish();
|
|
107
|
-
|
|
111
|
+
|
|
112
|
+
nativeCallback({committed.begin(), committed.end()}, {}, true);
|
|
108
113
|
|
|
109
114
|
this->resetStreamState();
|
|
110
115
|
}
|
|
@@ -1,6 +1,9 @@
|
|
|
1
1
|
#pragma once
|
|
2
2
|
|
|
3
3
|
#include "rnexecutorch/models/speech_to_text/stream/OnlineASRProcessor.h"
|
|
4
|
+
#include <span>
|
|
5
|
+
#include <string>
|
|
6
|
+
#include <vector>
|
|
4
7
|
|
|
5
8
|
namespace rnexecutorch {
|
|
6
9
|
|
|
@@ -16,8 +19,8 @@ public:
|
|
|
16
19
|
std::shared_ptr<OwningArrayBuffer> encode(std::span<float> waveform) const;
|
|
17
20
|
std::shared_ptr<OwningArrayBuffer>
|
|
18
21
|
decode(std::span<int32_t> tokens, std::span<float> encoderOutput) const;
|
|
19
|
-
std::
|
|
20
|
-
|
|
22
|
+
std::vector<char> transcribe(std::span<float> waveform,
|
|
23
|
+
std::string languageOption) const;
|
|
21
24
|
|
|
22
25
|
size_t getMemoryLowerBound() const noexcept;
|
|
23
26
|
|
|
@@ -221,7 +221,7 @@ export class LLMController {
|
|
|
221
221
|
throw Error("Tokenizer config doesn't include chat_template");
|
|
222
222
|
}
|
|
223
223
|
const template = new Template(tokenizerConfig.chat_template);
|
|
224
|
-
const specialTokens = Object.fromEntries(Object.
|
|
224
|
+
const specialTokens = Object.fromEntries(Object.values(SPECIAL_TOKENS).filter(key => key in tokenizerConfig).map(key => [key, tokenizerConfig[key]]));
|
|
225
225
|
const result = template.render({
|
|
226
226
|
messages,
|
|
227
227
|
tools,
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"names":["ResourceFetcher","ETError","getError","Template","DEFAULT_CHAT_CONFIG","readAsStringAsync","SPECIAL_TOKENS","parseToolCall","Logger","LLMController","chatConfig","_response","_isReady","_isGenerating","_messageHistory","constructor","tokenCallback","responseCallback","messageHistoryCallback","isReadyCallback","isGeneratingCallback","undefined","warn","token","response","messageHistory","isReady","isGenerating","load","modelSource","tokenizerSource","tokenizerConfigSource","onDownloadProgressCallback","initialMessageHistory","tokenizersPromise","fetch","modelPromise","tokenizersResults","modelResult","Promise","all","tokenizerPath","tokenizerConfigPath","modelPath","Error","tokenizerConfig","JSON","parse","nativeModule","global","loadLLM","onToken","data","EOS_TOKEN","indexOf","eos_token","replaceAll","PAD_TOKEN","pad_token","length","e","setTokenCallback","configure","toolsConfig","generationConfig","outputTokenBatchSize","setCountInterval","batchTimeInterval","setTimeInterval","delete","ModelGenerating","unload","forward","input","ModuleNotLoaded","generate","interrupt","getGeneratedTokenCount","messages","tools","role","renderedChat","applyChatTemplate","tools_in_user_message","add_generation_prompt","sendMessage","message","content","messageHistoryWithPrompt","systemPrompt","slice","contextWindowLength","displayToolCalls","toolCalls","toolCall","executeToolCallback","then","toolResponse","deleteMessage","index","newMessageHistory","templateFlags","chat_template","template","specialTokens","Object","fromEntries","
|
|
1
|
+
{"version":3,"names":["ResourceFetcher","ETError","getError","Template","DEFAULT_CHAT_CONFIG","readAsStringAsync","SPECIAL_TOKENS","parseToolCall","Logger","LLMController","chatConfig","_response","_isReady","_isGenerating","_messageHistory","constructor","tokenCallback","responseCallback","messageHistoryCallback","isReadyCallback","isGeneratingCallback","undefined","warn","token","response","messageHistory","isReady","isGenerating","load","modelSource","tokenizerSource","tokenizerConfigSource","onDownloadProgressCallback","initialMessageHistory","tokenizersPromise","fetch","modelPromise","tokenizersResults","modelResult","Promise","all","tokenizerPath","tokenizerConfigPath","modelPath","Error","tokenizerConfig","JSON","parse","nativeModule","global","loadLLM","onToken","data","EOS_TOKEN","indexOf","eos_token","replaceAll","PAD_TOKEN","pad_token","length","e","setTokenCallback","configure","toolsConfig","generationConfig","outputTokenBatchSize","setCountInterval","batchTimeInterval","setTimeInterval","delete","ModelGenerating","unload","forward","input","ModuleNotLoaded","generate","interrupt","getGeneratedTokenCount","messages","tools","role","renderedChat","applyChatTemplate","tools_in_user_message","add_generation_prompt","sendMessage","message","content","messageHistoryWithPrompt","systemPrompt","slice","contextWindowLength","displayToolCalls","toolCalls","toolCall","executeToolCallback","then","toolResponse","deleteMessage","index","newMessageHistory","templateFlags","chat_template","template","specialTokens","Object","fromEntries","values","filter","key","map","result","render"],"sourceRoot":"../../../src","sources":["controllers/LLMController.ts"],"mappings":";;AACA,SAASA,eAAe,QAAQ,0BAA0B;AAC1D,SAASC,OAAO,EAAEC,QAAQ,QAAQ,UAAU;AAC5C,SAASC,QAAQ,QAAQ,oBAAoB;AAC7C,SAASC,mBAAmB,QAAQ,0BAA0B;AAC9D,SAASC,iBAAiB,QAAQ,kBAAkB;AACpD,SAKEC,cAAc,QAET,cAAc;AACrB,SAASC,aAAa,QAAQ,cAAc;AAC5C,SAASC,MAAM,QAAQ,kBAAkB;AAEzC,OAAO,MAAMC,aAAa,CAAC;EAEjBC,UAAU,GAAeN,mBAAmB;EAI5CO,SAAS,GAAG,EAAE;EACdC,QAAQ,GAAG,KAAK;EAChBC,aAAa,GAAG,KAAK;EACrBC,eAAe,GAAc,EAAE;;EAEvC;;EAOAC,WAAWA,CAAC;IACVC,aAAa;IACbC,gBAAgB;IAChBC,sBAAsB;IACtBC,eAAe;IACfC;EAOF,CAAC,EAAE;IACD,IAAIH,gBAAgB,KAAKI,SAAS,EAAE;MAClCb,MAAM,CAACc,IAAI,CACT,sEACF,CAAC;IACH;IACA,IAAI,CAACN,aAAa,GAAIO,KAAK,IAAK;MAC9BP,aAAa,GAAGO,KAAK,CAAC;IACxB,CAAC;IACD,IAAI,CAACN,gBAAgB,GAAIO,QAAQ,IAAK;MACpC,IAAI,CAACb,SAAS,GAAGa,QAAQ;MACzBP,gBAAgB,GAAGO,QAAQ,CAAC;IAC9B,CAAC;IACD,IAAI,CAACN,sBAAsB,GAAIO,cAAc,IAAK;MAChD,IAAI,CAACX,eAAe,GAAGW,cAAc;MACrCP,sBAAsB,GAAGO,cAAc,CAAC;IAC1C,CAAC;IACD,IAAI,CAACN,eAAe,GAAIO,OAAO,IAAK;MAClC,IAAI,CAACd,QAAQ,GAAGc,OAAO;MACvBP,eAAe,GAAGO,OAAO,CAAC;IAC5B,CAAC;IACD,IAAI,CAACN,oBAAoB,GAAIO,YAAY,IAAK;MAC5C,IAAI,CAACd,aAAa,GAAGc,YAAY;MACjCP,oBAAoB,GAAGO,YAAY,CAAC;IACtC,CAAC;EACH;EAEA,IAAWH,QAAQA,CAAA,EAAG;IACpB,OAAO,IAAI,CAACb,SAAS;EACvB;EACA,IAAWe,OAAOA,CAAA,EAAG;IACnB,OAAO,IAAI,CAACd,QAAQ;EACtB;EACA,IAAWe,YAAYA,CAAA,EAAG;IACxB,OAAO,IAAI,CAACd,aAAa;EAC3B;EACA,IAAWY,cAAcA,CAAA,EAAG;IAC1B,OAAO,IAAI,CAACX,eAAe;EAC7B;EAEA,MAAac,IAAIA,CAAC;IAChBC,WAAW;IACXC,eAAe;IACfC,qBAAqB;IACrBC;EAMF,CAAC,EAAE;IACD;IACA,IAAI,CAACf,gBAAgB,CAAC,EAAE,CAAC;IACzB,IAAI,CAACC,sBAAsB,CAAC,IAAI,CAACR,UAAU,CAACuB,qBAAqB,CAAC;IAClE,IAAI,CAACb,oBAAoB,CAAC,KAAK,CAAC;IAChC,IAAI,CAACD,eAAe,CAAC,KAAK,CAAC;IAE3B,IAAI;MACF,MAAMe,iBAAiB,GAAGlC,eAAe,CAACmC,KAAK,CAC7Cd,SAAS,EACTS,eAAe,EACfC,qBACF,CAAC;MAED,MAAMK,YAAY,GAAGpC,eAAe,CAACmC,KAAK,CACxCH,0BAA0B,EAC1BH,WACF,CAAC;MAED,MAAM,CAACQ,iBAAiB,EAAEC,WAAW,CAAC,GAAG,MAAMC,OAAO,CAACC,GAAG,CAAC,CACzDN,iBAAiB,EACjBE,YAAY,CACb,CAAC;MAEF,MAAMK,aAAa,GAAGJ,iBAAiB,GAAG,CAAC,CAAC;MAC5C,MAAMK,mBAAmB,GAAGL,iBAAiB,GAAG,CAAC,CAAC;MAClD,MAAMM,SAAS,GAAGL,WAAW,GAAG,CAAC,CAAC;MAElC,IAAI,CAACG,aAAa,IAAI,CAACC,mBAAmB,IAAI,CAACC,SAAS,EAAE;QACxD,MAAM,IAAIC,KAAK,CAAC,uBAAuB,CAAC;MAC1C;MAEA,IAAI,CAACC,eAAe,GAAGC,IAAI,CAACC,KAAK,CAC/B,MAAM1C,iBAAiB,CAAC,SAAS,GAAGqC,mBAAoB,CAC1D,CAAC;MACD,IAAI,CAACM,YAAY,GAAGC,MAAM,CAACC,OAAO,CAACP,SAAS,EAAEF,aAAa,CAAC;MAC5D,IAAI,CAACtB,eAAe,CAAC,IAAI,CAAC;MAC1B,IAAI,CAACgC,OAAO,GAAIC,IAAY,IAAK;QAC/B,IAAI,CAACA,IAAI,EAAE;UACT;QACF;QAEA,IACE9C,cAAc,CAAC+C,SAAS,IAAI,IAAI,CAACR,eAAe,IAChDO,IAAI,CAACE,OAAO,CAAC,IAAI,CAACT,eAAe,CAACU,SAAS,CAAC,IAAI,CAAC,EACjD;UACAH,IAAI,GAAGA,IAAI,CAACI,UAAU,CAAC,IAAI,CAACX,eAAe,CAACU,SAAS,EAAE,EAAE,CAAC;QAC5D;QACA,IACEjD,cAAc,CAACmD,SAAS,IAAI,IAAI,CAACZ,eAAe,IAChDO,IAAI,CAACE,OAAO,CAAC,IAAI,CAACT,eAAe,CAACa,SAAS,CAAC,IAAI,CAAC,EACjD;UACAN,IAAI,GAAGA,IAAI,CAACI,UAAU,CAAC,IAAI,CAACX,eAAe,CAACa,SAAS,EAAE,EAAE,CAAC;QAC5D;QACA,IAAIN,IAAI,CAACO,MAAM,KAAK,CAAC,EAAE;UACrB;QACF;QAEA,IAAI,CAAC3C,aAAa,CAACoC,IAAI,CAAC;QACxB,IAAI,CAACnC,gBAAgB,CAAC,IAAI,CAACN,SAAS,GAAGyC,IAAI,CAAC;MAC9C,CAAC;IACH,CAAC,CAAC,OAAOQ,CAAC,EAAE;MACV,IAAI,CAACzC,eAAe,CAAC,KAAK,CAAC;MAC3B,MAAM,IAAIyB,KAAK,CAAC1C,QAAQ,CAAC0D,CAAC,CAAC,CAAC;IAC9B;EACF;EAEOC,gBAAgBA,CAAC7C,aAAsC,EAAE;IAC9D,IAAI,CAACA,aAAa,GAAGA,aAAa;EACpC;EAEO8C,SAASA,CAAC;IACfpD,UAAU;IACVqD,WAAW;IACXC;EAKF,CAAC,EAAE;IACD,IAAI,CAACtD,UAAU,GAAG;MAAE,GAAGN,mBAAmB;MAAE,GAAGM;IAAW,CAAC;IAC3D,IAAI,CAACqD,WAAW,GAAGA,WAAW;IAE9B,IAAIC,gBAAgB,EAAEC,oBAAoB,EAAE;MAC1C,IAAI,CAACjB,YAAY,CAACkB,gBAAgB,CAACF,gBAAgB,CAACC,oBAAoB,CAAC;IAC3E;IACA,IAAID,gBAAgB,EAAEG,iBAAiB,EAAE;MACvC,IAAI,CAACnB,YAAY,CAACoB,eAAe,CAACJ,gBAAgB,CAACG,iBAAiB,CAAC;IACvE;;IAEA;IACA,IAAI,CAAClD,gBAAgB,CAAC,EAAE,CAAC;IACzB,IAAI,CAACC,sBAAsB,CAAC,IAAI,CAACR,UAAU,CAACuB,qBAAqB,CAAC;IAClE,IAAI,CAACb,oBAAoB,CAAC,KAAK,CAAC;EAClC;EAEOiD,MAAMA,CAAA,EAAG;IACd,IAAI,IAAI,CAACxD,aAAa,EAAE;MACtB,MAAM,IAAI+B,KAAK,CACb1C,QAAQ,CAACD,OAAO,CAACqE,eAAe,CAAC,GAC/B,+DACJ,CAAC;IACH;IACA,IAAI,CAACnB,OAAO,GAAG,MAAM,CAAC,CAAC;IACvB,IAAI,CAACH,YAAY,CAACuB,MAAM,CAAC,CAAC;IAC1B,IAAI,CAACpD,eAAe,CAAC,KAAK,CAAC;IAC3B,IAAI,CAACC,oBAAoB,CAAC,KAAK,CAAC;EAClC;EAEA,MAAaoD,OAAOA,CAACC,KAAa,EAAE;IAClC,IAAI,CAAC,IAAI,CAAC7D,QAAQ,EAAE;MAClB,MAAM,IAAIgC,KAAK,CAAC1C,QAAQ,CAACD,OAAO,CAACyE,eAAe,CAAC,CAAC;IACpD;IACA,IAAI,IAAI,CAAC7D,aAAa,EAAE;MACtB,MAAM,IAAI+B,KAAK,CAAC1C,QAAQ,CAACD,OAAO,CAACqE,eAAe,CAAC,CAAC;IACpD;IACA,IAAI;MACF,IAAI,CAACrD,gBAAgB,CAAC,EAAE,CAAC;MACzB,IAAI,CAACG,oBAAoB,CAAC,IAAI,CAAC;MAC/B,MAAM,IAAI,CAAC4B,YAAY,CAAC2B,QAAQ,CAACF,KAAK,EAAE,IAAI,CAACtB,OAAO,CAAC;IACvD,CAAC,CAAC,OAAOS,CAAC,EAAE;MACV,MAAM,IAAIhB,KAAK,CAAC1C,QAAQ,CAAC0D,CAAC,CAAC,CAAC;IAC9B,CAAC,SAAS;MACR,IAAI,CAACxC,oBAAoB,CAAC,KAAK,CAAC;IAClC;EACF;EAEOwD,SAASA,CAAA,EAAG;IACjB,IAAI,CAAC5B,YAAY,CAAC4B,SAAS,CAAC,CAAC;EAC/B;EAEOC,sBAAsBA,CAAA,EAAW;IACtC,OAAO,IAAI,CAAC7B,YAAY,CAAC6B,sBAAsB,CAAC,CAAC;EACnD;EAEA,MAAaF,QAAQA,CAACG,QAAmB,EAAEC,KAAiB,EAAE;IAC5D,IAAI,CAAC,IAAI,CAACnE,QAAQ,EAAE;MAClB,MAAM,IAAIgC,KAAK,CAAC1C,QAAQ,CAACD,OAAO,CAACyE,eAAe,CAAC,CAAC;IACpD;IACA,IAAII,QAAQ,CAACnB,MAAM,KAAK,CAAC,EAAE;MACzB,MAAM,IAAIf,KAAK,CAAC,yBAAyB,CAAC;IAC5C;IACA,IAAIkC,QAAQ,CAAC,CAAC,CAAC,IAAIA,QAAQ,CAAC,CAAC,CAAC,CAACE,IAAI,KAAK,QAAQ,EAAE;MAChDxE,MAAM,CAACc,IAAI,CACT,0LACF,CAAC;IACH;IAEA,MAAM2D,YAAoB,GAAG,IAAI,CAACC,iBAAiB,CACjDJ,QAAQ,EACR,IAAI,CAACjC,eAAe,EACpBkC,KAAK;IACL;IACA;MAAEI,qBAAqB,EAAE,KAAK;MAAEC,qBAAqB,EAAE;IAAK,CAC9D,CAAC;IAED,MAAM,IAAI,CAACZ,OAAO,CAACS,YAAY,CAAC;EAClC;EAEA,MAAaI,WAAWA,CAACC,OAAe,EAAE;IACxC,IAAI,CAACpE,sBAAsB,CAAC,CAC1B,GAAG,IAAI,CAACJ,eAAe,EACvB;MAAEyE,OAAO,EAAED,OAAO;MAAEN,IAAI,EAAE;IAAO,CAAC,CACnC,CAAC;IAEF,MAAMQ,wBAAmC,GAAG,CAC1C;MAAED,OAAO,EAAE,IAAI,CAAC7E,UAAU,CAAC+E,YAAY;MAAET,IAAI,EAAE;IAAS,CAAC,EACzD,GAAG,IAAI,CAAClE,eAAe,CAAC4E,KAAK,CAAC,CAAC,IAAI,CAAChF,UAAU,CAACiF,mBAAmB,CAAC,CACpE;IAED,MAAM,IAAI,CAAChB,QAAQ,CAACa,wBAAwB,EAAE,IAAI,CAACzB,WAAW,EAAEgB,KAAK,CAAC;IAEtE,IAAI,CAAC,IAAI,CAAChB,WAAW,IAAI,IAAI,CAACA,WAAW,CAAC6B,gBAAgB,EAAE;MAC1D,IAAI,CAAC1E,sBAAsB,CAAC,CAC1B,GAAG,IAAI,CAACJ,eAAe,EACvB;QAAEyE,OAAO,EAAE,IAAI,CAAC5E,SAAS;QAAEqE,IAAI,EAAE;MAAY,CAAC,CAC/C,CAAC;IACJ;IACA,IAAI,CAAC,IAAI,CAACjB,WAAW,EAAE;MACrB;IACF;IAEA,MAAM8B,SAAS,GAAGtF,aAAa,CAAC,IAAI,CAACI,SAAS,CAAC;IAE/C,KAAK,MAAMmF,QAAQ,IAAID,SAAS,EAAE;MAChC,IAAI,CAAC9B,WAAW,CACbgC,mBAAmB,CAACD,QAAQ,CAAC,CAC7BE,IAAI,CAAEC,YAA2B,IAAK;QACrC,IAAIA,YAAY,EAAE;UAChB,IAAI,CAAC/E,sBAAsB,CAAC,CAC1B,GAAG,IAAI,CAACJ,eAAe,EACvB;YAAEyE,OAAO,EAAEU,YAAY;YAAEjB,IAAI,EAAE;UAAY,CAAC,CAC7C,CAAC;QACJ;MACF,CAAC,CAAC;IACN;EACF;EAEOkB,aAAaA,CAACC,KAAa,EAAE;IAClC;IACA;IACA,MAAMC,iBAAiB,GAAG,IAAI,CAACtF,eAAe,CAAC4E,KAAK,CAAC,CAAC,EAAES,KAAK,CAAC;IAE9D,IAAI,CAACjF,sBAAsB,CAACkF,iBAAiB,CAAC;EAChD;EAEQlB,iBAAiBA,CACvBJ,QAAmB,EACnBjC,eAAoB,EACpBkC,KAAiB,EACjBsB,aAAsB,EACd;IACR,IAAI,CAACxD,eAAe,CAACyD,aAAa,EAAE;MAClC,MAAM1D,KAAK,CAAC,gDAAgD,CAAC;IAC/D;IACA,MAAM2D,QAAQ,GAAG,IAAIpG,QAAQ,CAAC0C,eAAe,CAACyD,aAAa,CAAC;IAE5D,MAAME,aAAa,GAAGC,MAAM,CAACC,WAAW,CACtCD,MAAM,CAACE,MAAM,CAACrG,cAAc,CAAC,CAC1BsG,MAAM,CAAEC,GAAG,IAAKA,GAAG,IAAIhE,eAAe,CAAC,CACvCiE,GAAG,CAAED,GAAG,IAAK,CAACA,GAAG,EAAEhE,eAAe,CAACgE,GAAG,CAAC,CAAC,CAC7C,CAAC;IAED,MAAME,MAAM,GAAGR,QAAQ,CAACS,MAAM,CAAC;MAC7BlC,QAAQ;MACRC,KAAK;MACL,GAAGsB,aAAa;MAChB,GAAGG;IACL,CAAC,CAAC;IACF,OAAOO,MAAM;EACf;AACF","ignoreList":[]}
|
|
@@ -3,6 +3,10 @@
|
|
|
3
3
|
import { Logger } from '../../common/Logger';
|
|
4
4
|
import { ResourceFetcher } from '../../utils/ResourceFetcher';
|
|
5
5
|
export class SpeechToTextModule {
|
|
6
|
+
textDecoder = new TextDecoder('utf-8', {
|
|
7
|
+
fatal: false,
|
|
8
|
+
ignoreBOM: true
|
|
9
|
+
});
|
|
6
10
|
async load(model, onDownloadProgressCallback = () => {}) {
|
|
7
11
|
this.modelConfig = model;
|
|
8
12
|
const tokenizerLoadPromise = ResourceFetcher.fetch(undefined, model.tokenizerSource);
|
|
@@ -39,7 +43,8 @@ export class SpeechToTextModule {
|
|
|
39
43
|
Logger.info('Passing waveform as number[] is deprecated, use Float32Array instead');
|
|
40
44
|
waveform = new Float32Array(waveform);
|
|
41
45
|
}
|
|
42
|
-
|
|
46
|
+
const transcriptionBytes = await this.nativeModule.transcribe(waveform, options.language || '');
|
|
47
|
+
return this.textDecoder.decode(new Uint8Array(transcriptionBytes));
|
|
43
48
|
}
|
|
44
49
|
async *stream(options = {}) {
|
|
45
50
|
this.validateOptions(options);
|
|
@@ -55,8 +60,8 @@ export class SpeechToTextModule {
|
|
|
55
60
|
try {
|
|
56
61
|
await this.nativeModule.stream((committed, nonCommitted, isDone) => {
|
|
57
62
|
queue.push({
|
|
58
|
-
committed,
|
|
59
|
-
nonCommitted
|
|
63
|
+
committed: this.textDecoder.decode(new Uint8Array(committed)),
|
|
64
|
+
nonCommitted: this.textDecoder.decode(new Uint8Array(nonCommitted))
|
|
60
65
|
});
|
|
61
66
|
if (isDone) {
|
|
62
67
|
finished = true;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"names":["Logger","ResourceFetcher","SpeechToTextModule","load","model","onDownloadProgressCallback","modelConfig","tokenizerLoadPromise","fetch","undefined","tokenizerSource","encoderDecoderPromise","encoderSource","decoderSource","tokenizerSources","encoderDecoderResults","Promise","all","Error","nativeModule","global","loadSpeechToText","encode","waveform","Array","isArray","info","Float32Array","decode","tokens","encoderOutput","Int32Array","transcribe","options","validateOptions","language","stream","queue","waiter","finished","error","wake","committed","nonCommitted","isDone","push","e","length","shift","r","streamInsert","streamStop","isMultilingual"],"sourceRoot":"../../../../src","sources":["modules/natural_language_processing/SpeechToTextModule.ts"],"mappings":";;AAAA,SAASA,MAAM,QAAQ,qBAAqB;AAE5C,SAASC,eAAe,QAAQ,6BAA6B;AAE7D,OAAO,MAAMC,kBAAkB,CAAC;
|
|
1
|
+
{"version":3,"names":["Logger","ResourceFetcher","SpeechToTextModule","textDecoder","TextDecoder","fatal","ignoreBOM","load","model","onDownloadProgressCallback","modelConfig","tokenizerLoadPromise","fetch","undefined","tokenizerSource","encoderDecoderPromise","encoderSource","decoderSource","tokenizerSources","encoderDecoderResults","Promise","all","Error","nativeModule","global","loadSpeechToText","encode","waveform","Array","isArray","info","Float32Array","decode","tokens","encoderOutput","Int32Array","transcribe","options","validateOptions","transcriptionBytes","language","Uint8Array","stream","queue","waiter","finished","error","wake","committed","nonCommitted","isDone","push","e","length","shift","r","streamInsert","streamStop","isMultilingual"],"sourceRoot":"../../../../src","sources":["modules/natural_language_processing/SpeechToTextModule.ts"],"mappings":";;AAAA,SAASA,MAAM,QAAQ,qBAAqB;AAE5C,SAASC,eAAe,QAAQ,6BAA6B;AAE7D,OAAO,MAAMC,kBAAkB,CAAC;EAKtBC,WAAW,GAAG,IAAIC,WAAW,CAAC,OAAO,EAAE;IAC7CC,KAAK,EAAE,KAAK;IACZC,SAAS,EAAE;EACb,CAAC,CAAC;EAEF,MAAaC,IAAIA,CACfC,KAA8B,EAC9BC,0BAAsD,GAAGA,CAAA,KAAM,CAAC,CAAC,EACjE;IACA,IAAI,CAACC,WAAW,GAAGF,KAAK;IAExB,MAAMG,oBAAoB,GAAGV,eAAe,CAACW,KAAK,CAChDC,SAAS,EACTL,KAAK,CAACM,eACR,CAAC;IACD,MAAMC,qBAAqB,GAAGd,eAAe,CAACW,KAAK,CACjDH,0BAA0B,EAC1BD,KAAK,CAACQ,aAAa,EACnBR,KAAK,CAACS,aACR,CAAC;IACD,MAAM,CAACC,gBAAgB,EAAEC,qBAAqB,CAAC,GAAG,MAAMC,OAAO,CAACC,GAAG,CAAC,CAClEV,oBAAoB,EACpBI,qBAAqB,CACtB,CAAC;IACF,MAAMC,aAAa,GAAGG,qBAAqB,GAAG,CAAC,CAAC;IAChD,MAAMF,aAAa,GAAGE,qBAAqB,GAAG,CAAC,CAAC;IAChD,IAAI,CAACH,aAAa,IAAI,CAACC,aAAa,IAAI,CAACC,gBAAgB,EAAE;MACzD,MAAM,IAAII,KAAK,CAAC,uBAAuB,CAAC;IAC1C;IACA,IAAI,CAACC,YAAY,GAAG,MAAMC,MAAM,CAACC,gBAAgB,CAC/CT,aAAa,EACbC,aAAa,EACbC,gBAAgB,CAAC,CAAC,CACpB,CAAC;EACH;EAEA,MAAaQ,MAAMA,CACjBC,QAAiC,EACV;IACvB,IAAIC,KAAK,CAACC,OAAO,CAACF,QAAQ,CAAC,EAAE;MAC3B3B,MAAM,CAAC8B,IAAI,CACT,sEACF,CAAC;MACDH,QAAQ,GAAG,IAAII,YAAY,CAACJ,QAAQ,CAAC;IACvC;IACA,OAAO,IAAII,YAAY,CAAC,MAAM,IAAI,CAACR,YAAY,CAACG,MAAM,CAACC,QAAQ,CAAC,CAAC;EACnE;EAEA,MAAaK,MAAMA,CACjBC,MAA6B,EAC7BC,aAAsC,EACf;IACvB,IAAIN,KAAK,CAACC,OAAO,CAACI,MAAM,CAAC,EAAE;MACzBjC,MAAM,CAAC8B,IAAI,CACT,kEACF,CAAC;MACDG,MAAM,GAAG,IAAIE,UAAU,CAACF,MAAM,CAAC;IACjC;IACA,IAAIL,KAAK,CAACC,OAAO,CAACK,aAAa,CAAC,EAAE;MAChClC,MAAM,CAAC8B,IAAI,CACT,2EACF,CAAC;MACDI,aAAa,GAAG,IAAIH,YAAY,CAACG,aAAa,CAAC;IACjD;IACA,OAAO,IAAIH,YAAY,CACrB,MAAM,IAAI,CAACR,YAAY,CAACS,MAAM,CAACC,MAAM,EAAEC,aAAa,CACtD,CAAC;EACH;EAEA,MAAaE,UAAUA,CACrBT,QAAiC,EACjCU,OAAwB,GAAG,CAAC,CAAC,EACZ;IACjB,IAAI,CAACC,eAAe,CAACD,OAAO,CAAC;IAE7B,IAAIT,KAAK,CAACC,OAAO,CAACF,QAAQ,CAAC,EAAE;MAC3B3B,MAAM,CAAC8B,IAAI,CACT,sEACF,CAAC;MACDH,QAAQ,GAAG,IAAII,YAAY,CAACJ,QAAQ,CAAC;IACvC;IACA,MAAMY,kBAAkB,GAAG,MAAM,IAAI,CAAChB,YAAY,CAACa,UAAU,CAC3DT,QAAQ,EACRU,OAAO,CAACG,QAAQ,IAAI,EACtB,CAAC;IACD,OAAO,IAAI,CAACrC,WAAW,CAAC6B,MAAM,CAAC,IAAIS,UAAU,CAACF,kBAAkB,CAAC,CAAC;EACpE;EAEA,OAAcG,MAAMA,CAClBL,OAAwB,GAAG,CAAC,CAAC,EACgC;IAC7D,IAAI,CAACC,eAAe,CAACD,OAAO,CAAC;IAE7B,MAAMM,KAAoD,GAAG,EAAE;IAC/D,IAAIC,MAA2B,GAAG,IAAI;IACtC,IAAIC,QAAQ,GAAG,KAAK;IACpB,IAAIC,KAAc;IAElB,MAAMC,IAAI,GAAGA,CAAA,KAAM;MACjBH,MAAM,GAAG,CAAC;MACVA,MAAM,GAAG,IAAI;IACf,CAAC;IAED,CAAC,YAAY;MACX,IAAI;QACF,MAAM,IAAI,CAACrB,YAAY,CAACmB,MAAM,CAC5B,CAACM,SAAmB,EAAEC,YAAsB,EAAEC,MAAe,KAAK;UAChEP,KAAK,CAACQ,IAAI,CAAC;YACTH,SAAS,EAAE,IAAI,CAAC7C,WAAW,CAAC6B,MAAM,CAAC,IAAIS,UAAU,CAACO,SAAS,CAAC,CAAC;YAC7DC,YAAY,EAAE,IAAI,CAAC9C,WAAW,CAAC6B,MAAM,CACnC,IAAIS,UAAU,CAACQ,YAAY,CAC7B;UACF,CAAC,CAAC;UACF,IAAIC,MAAM,EAAE;YACVL,QAAQ,GAAG,IAAI;UACjB;UACAE,IAAI,CAAC,CAAC;QACR,CAAC,EACDV,OAAO,CAACG,QAAQ,IAAI,EACtB,CAAC;QACDK,QAAQ,GAAG,IAAI;QACfE,IAAI,CAAC,CAAC;MACR,CAAC,CAAC,OAAOK,CAAC,EAAE;QACVN,KAAK,GAAGM,CAAC;QACTP,QAAQ,GAAG,IAAI;QACfE,IAAI,CAAC,CAAC;MACR;IACF,CAAC,EAAE,CAAC;IAEJ,OAAO,IAAI,EAAE;MACX,IAAIJ,KAAK,CAACU,MAAM,GAAG,CAAC,EAAE;QACpB,MAAMV,KAAK,CAACW,KAAK,CAAC,CAAE;QACpB,IAAIT,QAAQ,IAAIF,KAAK,CAACU,MAAM,KAAK,CAAC,EAAE;UAClC;QACF;QACA;MACF;MACA,IAAIP,KAAK,EAAE,MAAMA,KAAK;MACtB,IAAID,QAAQ,EAAE;MACd,MAAM,IAAIzB,OAAO,CAAQmC,CAAC,IAAMX,MAAM,GAAGW,CAAE,CAAC;IAC9C;EACF;EAEA,MAAaC,YAAYA,CAAC7B,QAAiC,EAAiB;IAC1E,IAAIC,KAAK,CAACC,OAAO,CAACF,QAAQ,CAAC,EAAE;MAC3B3B,MAAM,CAAC8B,IAAI,CACT,sEACF,CAAC;MACDH,QAAQ,GAAG,IAAII,YAAY,CAACJ,QAAQ,CAAC;IACvC;IACA,OAAO,IAAI,CAACJ,YAAY,CAACiC,YAAY,CAAC7B,QAAQ,CAAC;EACjD;EAEA,MAAa8B,UAAUA,CAAA,EAAkB;IACvC,OAAO,IAAI,CAAClC,YAAY,CAACkC,UAAU,CAAC,CAAC;EACvC;EAEQnB,eAAeA,CAACD,OAAwB,EAAE;IAChD,IAAI,CAAC,IAAI,CAAC3B,WAAW,CAACgD,cAAc,IAAIrB,OAAO,CAACG,QAAQ,EAAE;MACxD,MAAM,IAAIlB,KAAK,CAAC,gDAAgD,CAAC;IACnE;IACA,IAAI,IAAI,CAACZ,WAAW,CAACgD,cAAc,IAAI,CAACrB,OAAO,CAACG,QAAQ,EAAE;MACxD,MAAM,IAAIlB,KAAK,CAAC,2CAA2C,CAAC;IAC9D;EACF;AACF","ignoreList":[]}
|
|
@@ -2,6 +2,7 @@ import { DecodingOptions, SpeechToTextModelConfig } from '../../types/stt';
|
|
|
2
2
|
export declare class SpeechToTextModule {
|
|
3
3
|
private nativeModule;
|
|
4
4
|
private modelConfig;
|
|
5
|
+
private textDecoder;
|
|
5
6
|
load(model: SpeechToTextModelConfig, onDownloadProgressCallback?: (progress: number) => void): Promise<void>;
|
|
6
7
|
encode(waveform: Float32Array | number[]): Promise<Float32Array>;
|
|
7
8
|
decode(tokens: Int32Array | number[], encoderOutput: Float32Array | number[]): Promise<Float32Array>;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"SpeechToTextModule.d.ts","sourceRoot":"","sources":["../../../../src/modules/natural_language_processing/SpeechToTextModule.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,eAAe,EAAE,uBAAuB,EAAE,MAAM,iBAAiB,CAAC;AAG3E,qBAAa,kBAAkB;IAC7B,OAAO,CAAC,YAAY,CAAM;IAE1B,OAAO,CAAC,WAAW,CAA2B;
|
|
1
|
+
{"version":3,"file":"SpeechToTextModule.d.ts","sourceRoot":"","sources":["../../../../src/modules/natural_language_processing/SpeechToTextModule.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,eAAe,EAAE,uBAAuB,EAAE,MAAM,iBAAiB,CAAC;AAG3E,qBAAa,kBAAkB;IAC7B,OAAO,CAAC,YAAY,CAAM;IAE1B,OAAO,CAAC,WAAW,CAA2B;IAE9C,OAAO,CAAC,WAAW,CAGhB;IAEU,IAAI,CACf,KAAK,EAAE,uBAAuB,EAC9B,0BAA0B,GAAE,CAAC,QAAQ,EAAE,MAAM,KAAK,IAAe;IA6BtD,MAAM,CACjB,QAAQ,EAAE,YAAY,GAAG,MAAM,EAAE,GAChC,OAAO,CAAC,YAAY,CAAC;IAUX,MAAM,CACjB,MAAM,EAAE,UAAU,GAAG,MAAM,EAAE,EAC7B,aAAa,EAAE,YAAY,GAAG,MAAM,EAAE,GACrC,OAAO,CAAC,YAAY,CAAC;IAkBX,UAAU,CACrB,QAAQ,EAAE,YAAY,GAAG,MAAM,EAAE,EACjC,OAAO,GAAE,eAAoB,GAC5B,OAAO,CAAC,MAAM,CAAC;IAgBJ,MAAM,CAClB,OAAO,GAAE,eAAoB,GAC5B,cAAc,CAAC;QAAE,SAAS,EAAE,MAAM,CAAC;QAAC,YAAY,EAAE,MAAM,CAAA;KAAE,CAAC;IAqDjD,YAAY,CAAC,QAAQ,EAAE,YAAY,GAAG,MAAM,EAAE,GAAG,OAAO,CAAC,IAAI,CAAC;IAU9D,UAAU,IAAI,OAAO,CAAC,IAAI,CAAC;IAIxC,OAAO,CAAC,eAAe;CAQxB"}
|
package/package.json
CHANGED
|
@@ -313,7 +313,7 @@ export class LLMController {
|
|
|
313
313
|
const template = new Template(tokenizerConfig.chat_template);
|
|
314
314
|
|
|
315
315
|
const specialTokens = Object.fromEntries(
|
|
316
|
-
Object.
|
|
316
|
+
Object.values(SPECIAL_TOKENS)
|
|
317
317
|
.filter((key) => key in tokenizerConfig)
|
|
318
318
|
.map((key) => [key, tokenizerConfig[key]])
|
|
319
319
|
);
|
|
@@ -7,6 +7,11 @@ export class SpeechToTextModule {
|
|
|
7
7
|
|
|
8
8
|
private modelConfig!: SpeechToTextModelConfig;
|
|
9
9
|
|
|
10
|
+
private textDecoder = new TextDecoder('utf-8', {
|
|
11
|
+
fatal: false,
|
|
12
|
+
ignoreBOM: true,
|
|
13
|
+
});
|
|
14
|
+
|
|
10
15
|
public async load(
|
|
11
16
|
model: SpeechToTextModelConfig,
|
|
12
17
|
onDownloadProgressCallback: (progress: number) => void = () => {}
|
|
@@ -83,8 +88,11 @@ export class SpeechToTextModule {
|
|
|
83
88
|
);
|
|
84
89
|
waveform = new Float32Array(waveform);
|
|
85
90
|
}
|
|
86
|
-
|
|
87
|
-
|
|
91
|
+
const transcriptionBytes = await this.nativeModule.transcribe(
|
|
92
|
+
waveform,
|
|
93
|
+
options.language || ''
|
|
94
|
+
);
|
|
95
|
+
return this.textDecoder.decode(new Uint8Array(transcriptionBytes));
|
|
88
96
|
}
|
|
89
97
|
|
|
90
98
|
public async *stream(
|
|
@@ -105,8 +113,13 @@ export class SpeechToTextModule {
|
|
|
105
113
|
(async () => {
|
|
106
114
|
try {
|
|
107
115
|
await this.nativeModule.stream(
|
|
108
|
-
(committed:
|
|
109
|
-
queue.push({
|
|
116
|
+
(committed: number[], nonCommitted: number[], isDone: boolean) => {
|
|
117
|
+
queue.push({
|
|
118
|
+
committed: this.textDecoder.decode(new Uint8Array(committed)),
|
|
119
|
+
nonCommitted: this.textDecoder.decode(
|
|
120
|
+
new Uint8Array(nonCommitted)
|
|
121
|
+
),
|
|
122
|
+
});
|
|
110
123
|
if (isDone) {
|
|
111
124
|
finished = true;
|
|
112
125
|
}
|