react-native-executorch 0.8.2 → 0.8.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/common/rnexecutorch/host_objects/ModelHostObject.h +3 -1
- package/common/rnexecutorch/models/embeddings/text/TextEmbeddings.cpp +6 -0
- package/common/rnexecutorch/models/embeddings/text/TextEmbeddings.h +3 -0
- package/common/rnexecutorch/models/llm/LLM.cpp +7 -3
- package/common/rnexecutorch/models/text_to_image/TextToImage.cpp +2 -0
- package/common/rnexecutorch/models/text_to_image/TextToImage.h +2 -0
- package/common/rnexecutorch/models/voice_activity_detection/VoiceActivityDetection.cpp +6 -0
- package/common/rnexecutorch/models/voice_activity_detection/VoiceActivityDetection.h +5 -0
- package/package.json +1 -1
|
@@ -375,7 +375,9 @@ public:
|
|
|
375
375
|
// We need to dispatch a thread if we want the function to be
|
|
376
376
|
// asynchronous. In this thread all accesses to jsi::Runtime need to
|
|
377
377
|
// be done via the callInvoker.
|
|
378
|
-
threads::GlobalThreadPool::detach([this,
|
|
378
|
+
threads::GlobalThreadPool::detach([model = this->model,
|
|
379
|
+
callInvoker = this->callInvoker,
|
|
380
|
+
promise,
|
|
379
381
|
argsConverted =
|
|
380
382
|
std::move(argsConverted)]() {
|
|
381
383
|
try {
|
|
@@ -35,8 +35,14 @@ TokenIdsWithAttentionMask TextEmbeddings::preprocess(const std::string &input) {
|
|
|
35
35
|
return {.inputIds = inputIds64, .attentionMask = attentionMask};
|
|
36
36
|
}
|
|
37
37
|
|
|
38
|
+
void TextEmbeddings::unload() noexcept {
|
|
39
|
+
std::scoped_lock lock(inference_mutex_);
|
|
40
|
+
BaseModel::unload();
|
|
41
|
+
}
|
|
42
|
+
|
|
38
43
|
std::shared_ptr<OwningArrayBuffer>
|
|
39
44
|
TextEmbeddings::generate(const std::string input) {
|
|
45
|
+
std::scoped_lock lock(inference_mutex_);
|
|
40
46
|
auto preprocessed = preprocess(input);
|
|
41
47
|
|
|
42
48
|
std::vector<int32_t> tokenIdsShape = {
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
#pragma once
|
|
2
2
|
|
|
3
3
|
#include "rnexecutorch/metaprogramming/ConstructorHelpers.h"
|
|
4
|
+
#include <mutex>
|
|
4
5
|
#include <rnexecutorch/TokenizerModule.h>
|
|
5
6
|
#include <rnexecutorch/models/embeddings/BaseEmbeddings.h>
|
|
6
7
|
|
|
@@ -20,8 +21,10 @@ public:
|
|
|
20
21
|
[[nodiscard(
|
|
21
22
|
"Registered non-void function")]] std::shared_ptr<OwningArrayBuffer>
|
|
22
23
|
generate(const std::string input);
|
|
24
|
+
void unload() noexcept;
|
|
23
25
|
|
|
24
26
|
private:
|
|
27
|
+
mutable std::mutex inference_mutex_;
|
|
25
28
|
std::vector<std::vector<int32_t>> inputShapes;
|
|
26
29
|
TokenIdsWithAttentionMask preprocess(const std::string &input);
|
|
27
30
|
std::unique_ptr<TokenizerModule> tokenizer;
|
|
@@ -20,7 +20,7 @@ using executorch::runtime::Error;
|
|
|
20
20
|
LLM::LLM(const std::string &modelSource, const std::string &tokenizerSource,
|
|
21
21
|
std::vector<std::string> capabilities,
|
|
22
22
|
std::shared_ptr<react::CallInvoker> callInvoker)
|
|
23
|
-
: BaseModel(modelSource, callInvoker, Module::LoadMode::
|
|
23
|
+
: BaseModel(modelSource, callInvoker, Module::LoadMode::Mmap) {
|
|
24
24
|
|
|
25
25
|
if (capabilities.empty()) {
|
|
26
26
|
runner_ =
|
|
@@ -42,8 +42,12 @@ LLM::LLM(const std::string &modelSource, const std::string &tokenizerSource,
|
|
|
42
42
|
throw RnExecutorchError(loadResult, "Failed to load LLM runner");
|
|
43
43
|
}
|
|
44
44
|
|
|
45
|
-
|
|
46
|
-
|
|
45
|
+
// I am purposefully not adding file size of the model here. The reason is
|
|
46
|
+
// that Hermes would crash the app if we try to alloc too much memory here.
|
|
47
|
+
// Also, given we're using mmap, the true memory consumption of a model is not
|
|
48
|
+
// really equal to the size of the model. The size of the tokenizer file is a
|
|
49
|
+
// hint to the GC that this object might be worth getting rid of.
|
|
50
|
+
memorySizeLowerBound = fs::file_size(fs::path(tokenizerSource));
|
|
47
51
|
}
|
|
48
52
|
|
|
49
53
|
std::string LLM::generate(std::string input,
|
|
@@ -58,6 +58,7 @@ std::shared_ptr<OwningArrayBuffer>
|
|
|
58
58
|
TextToImage::generate(std::string input, int32_t imageSize,
|
|
59
59
|
size_t numInferenceSteps, int32_t seed,
|
|
60
60
|
std::shared_ptr<jsi::Function> callback) {
|
|
61
|
+
std::scoped_lock lock(inference_mutex_);
|
|
61
62
|
setImageSize(imageSize);
|
|
62
63
|
setSeed(seed);
|
|
63
64
|
|
|
@@ -137,6 +138,7 @@ size_t TextToImage::getMemoryLowerBound() const noexcept {
|
|
|
137
138
|
}
|
|
138
139
|
|
|
139
140
|
void TextToImage::unload() noexcept {
|
|
141
|
+
std::scoped_lock lock(inference_mutex_);
|
|
140
142
|
encoder->unload();
|
|
141
143
|
unet->unload();
|
|
142
144
|
decoder->unload();
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
#pragma once
|
|
2
2
|
|
|
3
3
|
#include <memory>
|
|
4
|
+
#include <mutex>
|
|
4
5
|
#include <string>
|
|
5
6
|
#include <vector>
|
|
6
7
|
|
|
@@ -49,6 +50,7 @@ private:
|
|
|
49
50
|
static constexpr float guidanceScale = 7.5f;
|
|
50
51
|
static constexpr float latentsScale = 0.18215f;
|
|
51
52
|
bool interrupted = false;
|
|
53
|
+
mutable std::mutex inference_mutex_;
|
|
52
54
|
|
|
53
55
|
std::shared_ptr<react::CallInvoker> callInvoker;
|
|
54
56
|
std::unique_ptr<Scheduler> scheduler;
|
|
@@ -54,8 +54,14 @@ VoiceActivityDetection::preprocess(std::span<float> waveform) const {
|
|
|
54
54
|
return frameBuffer;
|
|
55
55
|
}
|
|
56
56
|
|
|
57
|
+
void VoiceActivityDetection::unload() noexcept {
|
|
58
|
+
std::scoped_lock lock(inference_mutex_);
|
|
59
|
+
BaseModel::unload();
|
|
60
|
+
}
|
|
61
|
+
|
|
57
62
|
std::vector<types::Segment>
|
|
58
63
|
VoiceActivityDetection::generate(std::span<float> waveform) const {
|
|
64
|
+
std::scoped_lock lock(inference_mutex_);
|
|
59
65
|
|
|
60
66
|
auto windowedInput = preprocess(waveform);
|
|
61
67
|
auto [chunksNumber, remainder] = std::div(
|
|
@@ -5,6 +5,7 @@
|
|
|
5
5
|
#include <executorch/extension/tensor/tensor.h>
|
|
6
6
|
#include <executorch/extension/tensor/tensor_ptr.h>
|
|
7
7
|
#include <executorch/runtime/core/evalue.h>
|
|
8
|
+
#include <mutex>
|
|
8
9
|
#include <span>
|
|
9
10
|
|
|
10
11
|
#include "rnexecutorch/metaprogramming/ConstructorHelpers.h"
|
|
@@ -23,7 +24,11 @@ public:
|
|
|
23
24
|
[[nodiscard("Registered non-void function")]] std::vector<types::Segment>
|
|
24
25
|
generate(std::span<float> waveform) const;
|
|
25
26
|
|
|
27
|
+
void unload() noexcept;
|
|
28
|
+
|
|
26
29
|
private:
|
|
30
|
+
mutable std::mutex inference_mutex_;
|
|
31
|
+
|
|
27
32
|
std::vector<std::array<float, constants::kPaddedWindowSize>>
|
|
28
33
|
preprocess(std::span<float> waveform) const;
|
|
29
34
|
std::vector<types::Segment> postprocess(const std::vector<float> &scores,
|