npm - react-native-executorch - Versions diffs - 0.8.2 → 0.8.3 - Mend

react-native-executorch 0.8.2 → 0.8.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

package/common/rnexecutorch/host_objects/ModelHostObject.h CHANGED Viewed

@@ -375,7 +375,9 @@ public:
             // We need to dispatch a thread if we want the function to be
             // asynchronous. In this thread all accesses to jsi::Runtime need to
             // be done via the callInvoker.
-            threads::GlobalThreadPool::detach([this, promise,
+            threads::GlobalThreadPool::detach([model = this->model,
+                                               callInvoker = this->callInvoker,
+                                               promise,
                                                argsConverted =
                                                    std::move(argsConverted)]() {
               try {

package/common/rnexecutorch/models/embeddings/text/TextEmbeddings.cpp CHANGED Viewed

@@ -35,8 +35,14 @@ TokenIdsWithAttentionMask TextEmbeddings::preprocess(const std::string &input) {
   return {.inputIds = inputIds64, .attentionMask = attentionMask};
 }
+void TextEmbeddings::unload() noexcept {
+  std::scoped_lock lock(inference_mutex_);
+  BaseModel::unload();
+}
 std::shared_ptr<OwningArrayBuffer>
 TextEmbeddings::generate(const std::string input) {
+  std::scoped_lock lock(inference_mutex_);
   auto preprocessed = preprocess(input);
   std::vector<int32_t> tokenIdsShape = {

package/common/rnexecutorch/models/embeddings/text/TextEmbeddings.h CHANGED Viewed

@@ -1,6 +1,7 @@
 #pragma once
 #include "rnexecutorch/metaprogramming/ConstructorHelpers.h"
+#include <mutex>
 #include <rnexecutorch/TokenizerModule.h>
 #include <rnexecutorch/models/embeddings/BaseEmbeddings.h>
@@ -20,8 +21,10 @@ public:
   [[nodiscard(
       "Registered non-void function")]] std::shared_ptr<OwningArrayBuffer>
   generate(const std::string input);
+  void unload() noexcept;
 private:
+  mutable std::mutex inference_mutex_;
   std::vector<std::vector<int32_t>> inputShapes;
   TokenIdsWithAttentionMask preprocess(const std::string &input);
   std::unique_ptr<TokenizerModule> tokenizer;

package/common/rnexecutorch/models/llm/LLM.cpp CHANGED Viewed

@@ -20,7 +20,7 @@ using executorch::runtime::Error;
 LLM::LLM(const std::string &modelSource, const std::string &tokenizerSource,
          std::vector<std::string> capabilities,
          std::shared_ptr<react::CallInvoker> callInvoker)
-    : BaseModel(modelSource, callInvoker, Module::LoadMode::File) {
+    : BaseModel(modelSource, callInvoker, Module::LoadMode::Mmap) {
   if (capabilities.empty()) {
     runner_ =
@@ -42,8 +42,12 @@ LLM::LLM(const std::string &modelSource, const std::string &tokenizerSource,
     throw RnExecutorchError(loadResult, "Failed to load LLM runner");
   }
-  memorySizeLowerBound = fs::file_size(fs::path(modelSource)) +
-                         fs::file_size(fs::path(tokenizerSource));
+  // I am purposefully not adding file size of the model here. The reason is
+  // that Hermes would crash the app if we try to alloc too much memory here.
+  // Also, given we're using mmap, the true memory consumption of a model is not
+  // really equal to the size of the model. The size of the tokenizer file is a
+  // hint to the GC that this object might be worth getting rid of.
+  memorySizeLowerBound = fs::file_size(fs::path(tokenizerSource));
 }
 std::string LLM::generate(std::string input,

package/common/rnexecutorch/models/text_to_image/TextToImage.cpp CHANGED Viewed

@@ -58,6 +58,7 @@ std::shared_ptr<OwningArrayBuffer>
 TextToImage::generate(std::string input, int32_t imageSize,
                       size_t numInferenceSteps, int32_t seed,
                       std::shared_ptr<jsi::Function> callback) {
+  std::scoped_lock lock(inference_mutex_);
   setImageSize(imageSize);
   setSeed(seed);
@@ -137,6 +138,7 @@ size_t TextToImage::getMemoryLowerBound() const noexcept {
 }
 void TextToImage::unload() noexcept {
+  std::scoped_lock lock(inference_mutex_);
   encoder->unload();
   unet->unload();
   decoder->unload();

package/common/rnexecutorch/models/text_to_image/TextToImage.h CHANGED Viewed

@@ -1,6 +1,7 @@
 #pragma once
 #include <memory>
+#include <mutex>
 #include <string>
 #include <vector>
@@ -49,6 +50,7 @@ private:
   static constexpr float guidanceScale = 7.5f;
   static constexpr float latentsScale = 0.18215f;
   bool interrupted = false;
+  mutable std::mutex inference_mutex_;
   std::shared_ptr<react::CallInvoker> callInvoker;
   std::unique_ptr<Scheduler> scheduler;

package/common/rnexecutorch/models/voice_activity_detection/VoiceActivityDetection.cpp CHANGED Viewed

@@ -54,8 +54,14 @@ VoiceActivityDetection::preprocess(std::span<float> waveform) const {
   return frameBuffer;
 }
+void VoiceActivityDetection::unload() noexcept {
+  std::scoped_lock lock(inference_mutex_);
+  BaseModel::unload();
+}
 std::vector<types::Segment>
 VoiceActivityDetection::generate(std::span<float> waveform) const {
+  std::scoped_lock lock(inference_mutex_);
   auto windowedInput = preprocess(waveform);
   auto [chunksNumber, remainder] = std::div(

package/common/rnexecutorch/models/voice_activity_detection/VoiceActivityDetection.h CHANGED Viewed

@@ -5,6 +5,7 @@
 #include <executorch/extension/tensor/tensor.h>
 #include <executorch/extension/tensor/tensor_ptr.h>
 #include <executorch/runtime/core/evalue.h>
+#include <mutex>
 #include <span>
 #include "rnexecutorch/metaprogramming/ConstructorHelpers.h"
@@ -23,7 +24,11 @@ public:
   [[nodiscard("Registered non-void function")]] std::vector<types::Segment>
   generate(std::span<float> waveform) const;
+  void unload() noexcept;
 private:
+  mutable std::mutex inference_mutex_;
   std::vector<std::array<float, constants::kPaddedWindowSize>>
   preprocess(std::span<float> waveform) const;
   std::vector<types::Segment> postprocess(const std::vector<float> &scores,

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "react-native-executorch",
-  "version": "0.8.2",
+  "version": "0.8.3",
   "description": "An easy way to run AI models in React Native with ExecuTorch",
   "source": "./src/index.ts",
   "main": "./lib/module/index.js",