react-native-executorch 0.8.2 → 0.8.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -375,7 +375,9 @@ public:
375
375
  // We need to dispatch a thread if we want the function to be
376
376
  // asynchronous. In this thread all accesses to jsi::Runtime need to
377
377
  // be done via the callInvoker.
378
- threads::GlobalThreadPool::detach([this, promise,
378
+ threads::GlobalThreadPool::detach([model = this->model,
379
+ callInvoker = this->callInvoker,
380
+ promise,
379
381
  argsConverted =
380
382
  std::move(argsConverted)]() {
381
383
  try {
@@ -35,8 +35,14 @@ TokenIdsWithAttentionMask TextEmbeddings::preprocess(const std::string &input) {
35
35
  return {.inputIds = inputIds64, .attentionMask = attentionMask};
36
36
  }
37
37
 
38
+ void TextEmbeddings::unload() noexcept {
39
+ std::scoped_lock lock(inference_mutex_);
40
+ BaseModel::unload();
41
+ }
42
+
38
43
  std::shared_ptr<OwningArrayBuffer>
39
44
  TextEmbeddings::generate(const std::string input) {
45
+ std::scoped_lock lock(inference_mutex_);
40
46
  auto preprocessed = preprocess(input);
41
47
 
42
48
  std::vector<int32_t> tokenIdsShape = {
@@ -1,6 +1,7 @@
1
1
  #pragma once
2
2
 
3
3
  #include "rnexecutorch/metaprogramming/ConstructorHelpers.h"
4
+ #include <mutex>
4
5
  #include <rnexecutorch/TokenizerModule.h>
5
6
  #include <rnexecutorch/models/embeddings/BaseEmbeddings.h>
6
7
 
@@ -20,8 +21,10 @@ public:
20
21
  [[nodiscard(
21
22
  "Registered non-void function")]] std::shared_ptr<OwningArrayBuffer>
22
23
  generate(const std::string input);
24
+ void unload() noexcept;
23
25
 
24
26
  private:
27
+ mutable std::mutex inference_mutex_;
25
28
  std::vector<std::vector<int32_t>> inputShapes;
26
29
  TokenIdsWithAttentionMask preprocess(const std::string &input);
27
30
  std::unique_ptr<TokenizerModule> tokenizer;
@@ -20,7 +20,7 @@ using executorch::runtime::Error;
20
20
  LLM::LLM(const std::string &modelSource, const std::string &tokenizerSource,
21
21
  std::vector<std::string> capabilities,
22
22
  std::shared_ptr<react::CallInvoker> callInvoker)
23
- : BaseModel(modelSource, callInvoker, Module::LoadMode::File) {
23
+ : BaseModel(modelSource, callInvoker, Module::LoadMode::Mmap) {
24
24
 
25
25
  if (capabilities.empty()) {
26
26
  runner_ =
@@ -42,8 +42,12 @@ LLM::LLM(const std::string &modelSource, const std::string &tokenizerSource,
42
42
  throw RnExecutorchError(loadResult, "Failed to load LLM runner");
43
43
  }
44
44
 
45
- memorySizeLowerBound = fs::file_size(fs::path(modelSource)) +
46
- fs::file_size(fs::path(tokenizerSource));
45
+ // I am purposefully not adding file size of the model here. The reason is
46
+ // that Hermes would crash the app if we try to alloc too much memory here.
47
+ // Also, given we're using mmap, the true memory consumption of a model is not
48
+ // really equal to the size of the model. The size of the tokenizer file is a
49
+ // hint to the GC that this object might be worth getting rid of.
50
+ memorySizeLowerBound = fs::file_size(fs::path(tokenizerSource));
47
51
  }
48
52
 
49
53
  std::string LLM::generate(std::string input,
@@ -58,6 +58,7 @@ std::shared_ptr<OwningArrayBuffer>
58
58
  TextToImage::generate(std::string input, int32_t imageSize,
59
59
  size_t numInferenceSteps, int32_t seed,
60
60
  std::shared_ptr<jsi::Function> callback) {
61
+ std::scoped_lock lock(inference_mutex_);
61
62
  setImageSize(imageSize);
62
63
  setSeed(seed);
63
64
 
@@ -137,6 +138,7 @@ size_t TextToImage::getMemoryLowerBound() const noexcept {
137
138
  }
138
139
 
139
140
  void TextToImage::unload() noexcept {
141
+ std::scoped_lock lock(inference_mutex_);
140
142
  encoder->unload();
141
143
  unet->unload();
142
144
  decoder->unload();
@@ -1,6 +1,7 @@
1
1
  #pragma once
2
2
 
3
3
  #include <memory>
4
+ #include <mutex>
4
5
  #include <string>
5
6
  #include <vector>
6
7
 
@@ -49,6 +50,7 @@ private:
49
50
  static constexpr float guidanceScale = 7.5f;
50
51
  static constexpr float latentsScale = 0.18215f;
51
52
  bool interrupted = false;
53
+ mutable std::mutex inference_mutex_;
52
54
 
53
55
  std::shared_ptr<react::CallInvoker> callInvoker;
54
56
  std::unique_ptr<Scheduler> scheduler;
@@ -54,8 +54,14 @@ VoiceActivityDetection::preprocess(std::span<float> waveform) const {
54
54
  return frameBuffer;
55
55
  }
56
56
 
57
+ void VoiceActivityDetection::unload() noexcept {
58
+ std::scoped_lock lock(inference_mutex_);
59
+ BaseModel::unload();
60
+ }
61
+
57
62
  std::vector<types::Segment>
58
63
  VoiceActivityDetection::generate(std::span<float> waveform) const {
64
+ std::scoped_lock lock(inference_mutex_);
59
65
 
60
66
  auto windowedInput = preprocess(waveform);
61
67
  auto [chunksNumber, remainder] = std::div(
@@ -5,6 +5,7 @@
5
5
  #include <executorch/extension/tensor/tensor.h>
6
6
  #include <executorch/extension/tensor/tensor_ptr.h>
7
7
  #include <executorch/runtime/core/evalue.h>
8
+ #include <mutex>
8
9
  #include <span>
9
10
 
10
11
  #include "rnexecutorch/metaprogramming/ConstructorHelpers.h"
@@ -23,7 +24,11 @@ public:
23
24
  [[nodiscard("Registered non-void function")]] std::vector<types::Segment>
24
25
  generate(std::span<float> waveform) const;
25
26
 
27
+ void unload() noexcept;
28
+
26
29
  private:
30
+ mutable std::mutex inference_mutex_;
31
+
27
32
  std::vector<std::array<float, constants::kPaddedWindowSize>>
28
33
  preprocess(std::span<float> waveform) const;
29
34
  std::vector<types::Segment> postprocess(const std::vector<float> &scores,
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "react-native-executorch",
3
- "version": "0.8.2",
3
+ "version": "0.8.3",
4
4
  "description": "An easy way to run AI models in React Native with ExecuTorch",
5
5
  "source": "./src/index.ts",
6
6
  "main": "./lib/module/index.js",