npm - react-native-executorch - Versions diffs - 0.5.15 → 0.6.0 - Mend

react-native-executorch 0.5.15 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (277) hide show

package/common/rnexecutorch/models/BaseModel.cpp CHANGED Viewed

@@ -8,13 +8,14 @@ namespace rnexecutorch::models {
 using namespace facebook;
 using namespace executorch::extension;
+using ::executorch::extension::module::Module;
 using ::executorch::runtime::Error;
 BaseModel::BaseModel(const std::string &modelSource,
-                     std::shared_ptr<react::CallInvoker> callInvoker)
+                     std::shared_ptr<react::CallInvoker> callInvoker,
+                     Module::LoadMode loadMode)
     : callInvoker(callInvoker),
-      module_(std::make_unique<Module>(
-          modelSource, Module::LoadMode::MmapUseMlockIgnoreErrors)) {
+      module_(std::make_unique<Module>(modelSource, loadMode)) {
   Error loadError = module_->load();
   if (loadError != Error::Ok) {
     throw std::runtime_error("Failed to load model: Error " +
@@ -29,7 +30,7 @@ BaseModel::BaseModel(const std::string &modelSource,
 }
 std::vector<int32_t> BaseModel::getInputShape(std::string method_name,
-                                              int32_t index) {
+                                              int32_t index) const {
   if (!module_) {
     throw std::runtime_error("Model not loaded: Cannot get input shape");
   }
@@ -55,7 +56,7 @@ std::vector<int32_t> BaseModel::getInputShape(std::string method_name,
 }
 std::vector<std::vector<int32_t>>
-BaseModel::getAllInputShapes(std::string methodName) {
+BaseModel::getAllInputShapes(std::string methodName) const {
   if (!module_) {
     throw std::runtime_error("Model not loaded: Cannot get all input shapes");
   }
@@ -87,7 +88,7 @@ BaseModel::getAllInputShapes(std::string methodName) {
 /// to JS. It is not meant to be used within C++. If you want to call forward
 /// from C++ on a BaseModel, please use BaseModel::forward.
 std::vector<JSTensorViewOut>
-BaseModel::forwardJS(std::vector<JSTensorViewIn> tensorViewVec) {
+BaseModel::forwardJS(std::vector<JSTensorViewIn> tensorViewVec) const {
   if (!module_) {
     throw std::runtime_error("Model not loaded: Cannot perform forward pass");
   }
@@ -126,8 +127,8 @@ BaseModel::forwardJS(std::vector<JSTensorViewIn> tensorViewVec) {
     auto &outputTensor = outputs[i].toTensor();
     std::vector<int32_t> sizes = getTensorShape(outputTensor);
     size_t bufferSize = outputTensor.numel() * outputTensor.element_size();
-    auto buffer = std::make_shared<OwningArrayBuffer>(bufferSize);
-    std::memcpy(buffer->data(), outputTensor.const_data_ptr(), bufferSize);
+    auto buffer = std::make_shared<OwningArrayBuffer>(
+        outputTensor.const_data_ptr(), bufferSize);
     auto jsTensor = JSTensorViewOut(sizes, outputTensor.scalar_type(), buffer);
     output.emplace_back(jsTensor);
   }
@@ -135,7 +136,7 @@ BaseModel::forwardJS(std::vector<JSTensorViewIn> tensorViewVec) {
 }
 Result<executorch::runtime::MethodMeta>
-BaseModel::getMethodMeta(const std::string &methodName) {
+BaseModel::getMethodMeta(const std::string &methodName) const {
   if (!module_) {
     throw std::runtime_error("Model not loaded: Cannot get method meta!");
   }
@@ -160,7 +161,7 @@ BaseModel::forward(const std::vector<EValue> &input_evalues) const {
 Result<std::vector<EValue>>
 BaseModel::execute(const std::string &methodName,
-                   const std::vector<EValue> &input_value) {
+                   const std::vector<EValue> &input_value) const {
   if (!module_) {
     throw std::runtime_error("Model not loaded, cannot run execute.");
   }
@@ -174,7 +175,7 @@ std::size_t BaseModel::getMemoryLowerBound() const noexcept {
 void BaseModel::unload() noexcept { module_.reset(nullptr); }
 std::vector<int32_t>
-BaseModel::getTensorShape(const executorch::aten::Tensor &tensor) {
+BaseModel::getTensorShape(const executorch::aten::Tensor &tensor) const {
   auto sizes = tensor.sizes();
   return std::vector<int32_t>(sizes.begin(), sizes.end());
 }

package/common/rnexecutorch/models/BaseModel.h CHANGED Viewed

@@ -13,26 +13,32 @@
 namespace rnexecutorch {
 namespace models {
 using namespace facebook;
+using executorch::extension::module::Module;
 using executorch::runtime::EValue;
 using executorch::runtime::Result;
 class BaseModel {
 public:
-  BaseModel(const std::string &modelSource,
-            std::shared_ptr<react::CallInvoker> callInvoker);
+  BaseModel(
+      const std::string &modelSource,
+      std::shared_ptr<react::CallInvoker> callInvoker,
+      Module::LoadMode loadMode = Module::LoadMode::MmapUseMlockIgnoreErrors);
   std::size_t getMemoryLowerBound() const noexcept;
   void unload() noexcept;
-  std::vector<int32_t> getInputShape(std::string method_name, int32_t index);
+  std::vector<int32_t> getInputShape(std::string method_name,
+                                     int32_t index) const;
   std::vector<std::vector<int32_t>>
-  getAllInputShapes(std::string methodName = "forward");
+  getAllInputShapes(std::string methodName = "forward") const;
   std::vector<JSTensorViewOut>
-  forwardJS(std::vector<JSTensorViewIn> tensorViewVec);
+  forwardJS(std::vector<JSTensorViewIn> tensorViewVec) const;
   Result<std::vector<EValue>> forward(const EValue &input_value) const;
   Result<std::vector<EValue>>
   forward(const std::vector<EValue> &input_value) const;
-  Result<std::vector<EValue>> execute(const std::string &methodName,
-                                      const std::vector<EValue> &input_value);
+  Result<std::vector<EValue>>
+  execute(const std::string &methodName,
+          const std::vector<EValue> &input_value) const;
   Result<executorch::runtime::MethodMeta>
-  getMethodMeta(const std::string &methodName);
+  getMethodMeta(const std::string &methodName) const;
 protected:
   // If possible, models should not use the JS runtime to keep JSI internals
@@ -42,9 +48,11 @@ protected:
   std::shared_ptr<react::CallInvoker> callInvoker;
   std::unique_ptr<executorch::extension::Module> module_;
-private:
   std::size_t memorySizeLowerBound{0};
-  std::vector<int32_t> getTensorShape(const executorch::aten::Tensor &tensor);
+private:
+  std::vector<int32_t>
+  getTensorShape(const executorch::aten::Tensor &tensor) const;
 };
 } // namespace models

package/common/rnexecutorch/models/embeddings/BaseEmbeddings.cpp CHANGED Viewed

@@ -11,17 +11,9 @@ BaseEmbeddings::BaseEmbeddings(const std::string &modelSource,
 std::shared_ptr<OwningArrayBuffer>
 BaseEmbeddings::postprocess(const Result<std::vector<EValue>> &forwardResult) {
   auto forwardResultTensor = forwardResult->at(0).toTensor();
-  auto dataPtr = forwardResultTensor.mutable_data_ptr();
-  auto outputNumel = forwardResultTensor.numel();
-  std::span<float> modelOutput(static_cast<float *>(dataPtr), outputNumel);
-  auto createBuffer = [](const auto &data, size_t size) {
-    auto buffer = std::make_shared<OwningArrayBuffer>(size);
-    std::memcpy(buffer->data(), data, size);
-    return buffer;
-  };
-  return createBuffer(modelOutput.data(), modelOutput.size_bytes());
+  auto buffer = std::make_shared<OwningArrayBuffer>(
+      forwardResultTensor.const_data_ptr(), forwardResultTensor.nbytes());
+  return buffer;
 }
 } // namespace rnexecutorch::models::embeddings

package/common/rnexecutorch/models/embeddings/text/TextEmbeddings.cpp CHANGED Viewed

@@ -48,7 +48,6 @@ TextEmbeddings::generate(const std::string input) {
       attnMaskShape, preprocessed.attentionMask.data(), ScalarType::Long);
   auto forwardResult = BaseModel::forward({tokenIds, attnMask});
   if (!forwardResult.ok()) {
     throw std::runtime_error(
         "Function forward in TextEmbeddings failed with error code: " +

package/common/rnexecutorch/models/image_segmentation/ImageSegmentation.cpp CHANGED Viewed

@@ -62,11 +62,9 @@ std::shared_ptr<jsi::Object> ImageSegmentation::postprocess(
   std::vector<std::shared_ptr<OwningArrayBuffer>> resultClasses;
   resultClasses.reserve(numClasses);
   for (std::size_t cl = 0; cl < numClasses; ++cl) {
-    auto classBuffer =
-        std::make_shared<OwningArrayBuffer>(numModelPixels * sizeof(float));
+    auto classBuffer = std::make_shared<OwningArrayBuffer>(
+        &resultData[cl * numModelPixels], numModelPixels * sizeof(float));
     resultClasses.push_back(classBuffer);
-    std::memcpy(classBuffer->data(), &resultData[cl * numModelPixels],
-                numModelPixels * sizeof(float));
   }
   // Apply softmax per each pixel across all classes
@@ -112,18 +110,14 @@ std::shared_ptr<jsi::Object> ImageSegmentation::postprocess(
     cv::Mat argmaxMat(modelImageSize, CV_32SC1, argmax->data());
     cv::resize(argmaxMat, argmaxMat, originalSize, 0, 0,
                cv::InterpolationFlags::INTER_NEAREST);
-    argmax = std::make_shared<OwningArrayBuffer>(originalSize.area() *
-                                                 sizeof(int32_t));
-    std::memcpy(argmax->data(), argmaxMat.data,
-                originalSize.area() * sizeof(int32_t));
+    argmax = std::make_shared<OwningArrayBuffer>(
+        argmaxMat.data, originalSize.area() * sizeof(int32_t));
     for (auto &[label, arrayBuffer] : *buffersToReturn) {
       cv::Mat classMat(modelImageSize, CV_32FC1, arrayBuffer->data());
       cv::resize(classMat, classMat, originalSize);
-      arrayBuffer = std::make_shared<OwningArrayBuffer>(originalSize.area() *
-                                                        sizeof(float));
-      std::memcpy(arrayBuffer->data(), classMat.data,
-                  originalSize.area() * sizeof(float));
+      arrayBuffer = std::make_shared<OwningArrayBuffer>(
+          classMat.data, originalSize.area() * sizeof(float));
     }
   }
   return populateDictionary(argmax, buffersToReturn);

package/common/rnexecutorch/models/llm/LLM.cpp CHANGED Viewed

@@ -1,30 +1,33 @@
 #include "LLM.h"
-#include <atomic>
 #include <executorch/extension/tensor/tensor.h>
 #include <filesystem>
 #include <rnexecutorch/threads/GlobalThreadPool.h>
 namespace rnexecutorch::models::llm {
+namespace llm = ::executorch::extension::llm;
+namespace fs = std::filesystem;
 using namespace facebook;
 using executorch::extension::TensorPtr;
+using executorch::extension::module::Module;
 using executorch::runtime::Error;
 LLM::LLM(const std::string &modelSource, const std::string &tokenizerSource,
          std::shared_ptr<react::CallInvoker> callInvoker)
-    : runner(std::make_unique<example::Runner>(modelSource, tokenizerSource)),
-      callInvoker(callInvoker) {
+    : BaseModel(modelSource, callInvoker, Module::LoadMode::File),
+      runner(
+          std::make_unique<example::Runner>(module_.get(), tokenizerSource)) {
   auto loadResult = runner->load();
   if (loadResult != Error::Ok) {
     throw std::runtime_error("Failed to load LLM runner, error code: " +
                              std::to_string(static_cast<int>(loadResult)));
   }
-  memorySizeLowerBound =
-      std::filesystem::file_size(std::filesystem::path(modelSource)) +
-      std::filesystem::file_size(std::filesystem::path(tokenizerSource));
+  memorySizeLowerBound = fs::file_size(fs::path(modelSource)) +
+                         fs::file_size(fs::path(tokenizerSource));
 }
+// TODO: add a way to manipulate the generation config with params
 void LLM::generate(std::string input, std::shared_ptr<jsi::Function> callback) {
   if (!runner || !runner->is_loaded()) {
     throw std::runtime_error("Runner is not loaded");
@@ -37,7 +40,8 @@ void LLM::generate(std::string input, std::shared_ptr<jsi::Function> callback) {
     });
   };
-  auto error = runner->generate(input, nativeCallback, {}, false);
+  auto config = llm::GenerationConfig{.echo = false, .warming = false};
+  auto error = runner->generate(input, config, nativeCallback, {});
   if (error != executorch::runtime::Error::Ok) {
     throw std::runtime_error("Failed to generate text, error code: " +
                              std::to_string(static_cast<int>(error)));
@@ -76,6 +80,19 @@ void LLM::setTimeInterval(size_t timeInterval) {
   runner->set_time_interval(timeInterval);
 }
+void LLM::setTemperature(float temperature) {
+  if (!runner || !runner->is_loaded()) {
+    throw std::runtime_error("Can't configure a model that's not loaded!");
+  }
+  runner->set_temperature(temperature);
+};
+void LLM::setTopp(float topp) {
+  if (!runner || !runner->is_loaded()) {
+    throw std::runtime_error("Can't configure a model that's not loaded!");
+  }
+  runner->set_topp(topp);
+}
 void LLM::unload() noexcept { runner.reset(nullptr); }
 } // namespace rnexecutorch::models::llm

package/common/rnexecutorch/models/llm/LLM.h CHANGED Viewed

@@ -3,16 +3,16 @@
 #include <memory>
 #include <string>
-#include "rnexecutorch/metaprogramming/ConstructorHelpers.h"
 #include <ReactCommon/CallInvoker.h>
 #include <jsi/jsi.h>
+#include <rnexecutorch/models/BaseModel.h>
 #include <runner/runner.h>
 namespace rnexecutorch {
 namespace models::llm {
 using namespace facebook;
-class LLM {
+class LLM : public BaseModel {
 public:
   explicit LLM(const std::string &modelSource,
                const std::string &tokenizerSource,
@@ -24,12 +24,12 @@ public:
   size_t getGeneratedTokenCount() const noexcept;
   size_t getMemoryLowerBound() const noexcept;
   void setCountInterval(size_t countInterval);
+  void setTemperature(float temperature);
+  void setTopp(float topp);
   void setTimeInterval(size_t timeInterval);
 private:
-  size_t memorySizeLowerBound;
   std::unique_ptr<example::Runner> runner;
-  std::shared_ptr<react::CallInvoker> callInvoker;
 };
 } // namespace models::llm

package/common/rnexecutorch/models/ocr/CTCLabelConverter.h CHANGED Viewed

@@ -23,7 +23,7 @@ public:
                                         size_t length);
 private:
-  std::vector<std::string> character;
   int32_t ignoreIdx;
+  std::vector<std::string> character;
 };
 } // namespace rnexecutorch::models::ocr

package/common/rnexecutorch/models/ocr/utils/RecognitionHandlerUtils.cpp CHANGED Viewed

@@ -60,16 +60,19 @@ cv::Mat cropImage(types::DetectorBBox box, cv::Mat &image,
   cv::warpAffine(image, rotatedImage, rotationMatrix, image.size(),
                  cv::INTER_LINEAR);
-  cv::Mat rectMat(4, 2, CV_32FC2);
+  constexpr int32_t rows = 4;
+  constexpr int32_t cols = 2;
+  cv::Mat rectMat(rows, cols, CV_32FC2);
 #pragma unroll
-  for (int32_t i = 0; i < rectMat.rows; ++i) {
+  for (int32_t i = 0; i < rows; ++i) {
     rectMat.at<cv::Vec2f>(i, 0) = cv::Vec2f(rectPoints[i].x, rectPoints[i].y);
   }
   cv::transform(rectMat, rectMat, rotationMatrix);
-  std::vector<cv::Point2f> transformedPoints(4);
+  constexpr size_t transformedPointsSize = 4;
+  std::vector<cv::Point2f> transformedPoints(transformedPointsSize);
 #pragma unroll
-  for (std::size_t i = 0; i < transformedPoints.size(); ++i) {
+  for (std::size_t i = 0; i < transformedPointsSize; ++i) {
     cv::Vec2f point = rectMat.at<cv::Vec2f>(i, 0);
     transformedPoints[i] = cv::Point2f(point[0], point[1]);
   }

package/common/rnexecutorch/models/speech_to_text/SpeechToText.cpp CHANGED Viewed

@@ -23,17 +23,22 @@ SpeechToText::SpeechToText(const std::string &encoderSource,
       processor(std::make_unique<OnlineASRProcessor>(this->asr.get())),
       isStreaming(false), readyToProcess(false) {}
+void SpeechToText::unload() noexcept {
+  this->encoder->unload();
+  this->decoder->unload();
+}
 std::shared_ptr<OwningArrayBuffer>
 SpeechToText::encode(std::span<float> waveform) const {
   std::vector<float> encoderOutput = this->asr->encode(waveform);
-  return this->makeOwningBuffer(encoderOutput);
+  return std::make_shared<OwningArrayBuffer>(encoderOutput);
 }
 std::shared_ptr<OwningArrayBuffer>
 SpeechToText::decode(std::span<int32_t> tokens,
                      std::span<float> encoderOutput) const {
   std::vector<float> decoderOutput = this->asr->decode(tokens, encoderOutput);
-  return this->makeOwningBuffer(decoderOutput);
+  return std::make_shared<OwningArrayBuffer>(decoderOutput);
 }
 std::vector<char> SpeechToText::transcribe(std::span<float> waveform,
@@ -61,17 +66,7 @@ std::vector<char> SpeechToText::transcribe(std::span<float> waveform,
 size_t SpeechToText::getMemoryLowerBound() const noexcept {
   return this->encoder->getMemoryLowerBound() +
-         this->decoder->getMemoryLowerBound() +
-         this->tokenizer->getMemoryLowerBound();
-}
-std::shared_ptr<OwningArrayBuffer>
-SpeechToText::makeOwningBuffer(std::span<const float> vectorView) const {
-  auto owningArrayBuffer =
-      std::make_shared<OwningArrayBuffer>(vectorView.size_bytes());
-  std::memcpy(owningArrayBuffer->data(), vectorView.data(),
-              vectorView.size_bytes());
-  return owningArrayBuffer;
+         this->decoder->getMemoryLowerBound();
 }
 void SpeechToText::stream(std::shared_ptr<jsi::Function> callback,

package/common/rnexecutorch/models/speech_to_text/SpeechToText.h CHANGED Viewed

@@ -16,6 +16,7 @@ public:
                         const std::string &tokenizerSource,
                         std::shared_ptr<react::CallInvoker> callInvoker);
+  void unload() noexcept;
   std::shared_ptr<OwningArrayBuffer> encode(std::span<float> waveform) const;
   std::shared_ptr<OwningArrayBuffer>
   decode(std::span<int32_t> tokens, std::span<float> encoderOutput) const;
@@ -37,9 +38,6 @@ private:
   std::unique_ptr<TokenizerModule> tokenizer;
   std::unique_ptr<asr::ASR> asr;
-  std::shared_ptr<OwningArrayBuffer>
-  makeOwningBuffer(std::span<const float> vectorView) const;
   // Stream
   std::unique_ptr<stream::OnlineASRProcessor> processor;
   bool isStreaming;

package/common/rnexecutorch/models/speech_to_text/asr/ASR.cpp CHANGED Viewed

@@ -4,7 +4,6 @@
 #include "ASR.h"
 #include "executorch/extension/tensor/tensor_ptr.h"
 #include "rnexecutorch/data_processing/Numerical.h"
-#include "rnexecutorch/data_processing/dsp.h"
 #include "rnexecutorch/data_processing/gzip.h"
 namespace rnexecutorch::models::speech_to_text::asr {
@@ -37,8 +36,7 @@ ASR::getInitialSequence(const DecodingOptions &options) const {
   return seq;
 }
-GenerationResult ASR::generate(std::span<const float> waveform,
-                               float temperature,
+GenerationResult ASR::generate(std::span<float> waveform, float temperature,
                                const DecodingOptions &options) const {
   std::vector<float> encoderOutput = this->encode(waveform);
@@ -94,7 +92,7 @@ float ASR::getCompressionRatio(const std::string &text) const {
 }
 std::vector<Segment>
-ASR::generateWithFallback(std::span<const float> waveform,
+ASR::generateWithFallback(std::span<float> waveform,
                           const DecodingOptions &options) const {
   std::vector<float> temperatures = {0.0f, 0.2f, 0.4f, 0.6f, 0.8f, 1.0f};
   std::vector<int32_t> bestTokens;
@@ -209,7 +207,7 @@ ASR::estimateWordLevelTimestampsLinear(std::span<const int32_t> tokens,
   return wordObjs;
 }
-std::vector<Segment> ASR::transcribe(std::span<const float> waveform,
+std::vector<Segment> ASR::transcribe(std::span<float> waveform,
                                      const DecodingOptions &options) const {
   int32_t seek = 0;
   std::vector<Segment> results;
@@ -218,7 +216,7 @@ std::vector<Segment> ASR::transcribe(std::span<const float> waveform,
     int32_t start = seek * ASR::kSamplingRate;
     const auto end = std::min<int32_t>(
         (seek + ASR::kChunkSize) * ASR::kSamplingRate, waveform.size());
-    std::span<const float> chunk = waveform.subspan(start, end - start);
+    auto chunk = waveform.subspan(start, end - start);
     if (std::cmp_less(chunk.size(), ASR::kMinChunkSamples)) {
       break;
@@ -246,19 +244,12 @@ std::vector<Segment> ASR::transcribe(std::span<const float> waveform,
   return results;
 }
-std::vector<float> ASR::encode(std::span<const float> waveform) const {
-  constexpr int32_t fftWindowSize = 512;
-  constexpr int32_t stftHopLength = 160;
-  constexpr int32_t innerDim = 256;
-  std::vector<float> preprocessedData =
-      dsp::stftFromWaveform(waveform, fftWindowSize, stftHopLength);
-  const auto numFrames =
-      static_cast<int32_t>(preprocessedData.size()) / innerDim;
-  std::vector<int32_t> inputShape = {numFrames, innerDim};
+std::vector<float> ASR::encode(std::span<float> waveform) const {
+  auto inputShape = {static_cast<int32_t>(waveform.size())};
   const auto modelInputTensor = executorch::extension::make_tensor_ptr(
-      std::move(inputShape), std::move(preprocessedData));
+      std::move(inputShape), waveform.data(),
+      executorch::runtime::etensor::ScalarType::Float);
   const auto encoderResult = this->encoder->forward(modelInputTensor);
   if (!encoderResult.ok()) {
@@ -268,7 +259,7 @@ std::vector<float> ASR::encode(std::span<const float> waveform) const {
   }
   const auto decoderOutputTensor = encoderResult.get().at(0).toTensor();
-  const int32_t outputNumel = decoderOutputTensor.numel();
+  const auto outputNumel = decoderOutputTensor.numel();
   const float *const dataPtr = decoderOutputTensor.const_data_ptr<float>();
   return {dataPtr, dataPtr + outputNumel};
@@ -277,8 +268,10 @@ std::vector<float> ASR::encode(std::span<const float> waveform) const {
 std::vector<float> ASR::decode(std::span<int32_t> tokens,
                                std::span<float> encoderOutput) const {
   std::vector<int32_t> tokenShape = {1, static_cast<int32_t>(tokens.size())};
+  auto tokensLong = std::vector<int64_t>(tokens.begin(), tokens.end());
   auto tokenTensor = executorch::extension::make_tensor_ptr(
-      std::move(tokenShape), tokens.data(), ScalarType::Int);
+      tokenShape, tokensLong.data(), ScalarType::Long);
   const auto encoderOutputSize = static_cast<int32_t>(encoderOutput.size());
   std::vector<int32_t> encShape = {1, ASR::kNumFrames,

package/common/rnexecutorch/models/speech_to_text/asr/ASR.h CHANGED Viewed

@@ -14,9 +14,9 @@ public:
                const models::BaseModel *decoder,
                const TokenizerModule *tokenizer);
   std::vector<types::Segment>
-  transcribe(std::span<const float> waveform,
+  transcribe(std::span<float> waveform,
              const types::DecodingOptions &options) const;
-  std::vector<float> encode(std::span<const float> waveform) const;
+  std::vector<float> encode(std::span<float> waveform) const;
   std::vector<float> decode(std::span<int32_t> tokens,
                             std::span<float> encoderOutput) const;
@@ -44,11 +44,10 @@ private:
   std::vector<int32_t>
   getInitialSequence(const types::DecodingOptions &options) const;
-  types::GenerationResult generate(std::span<const float> waveform,
-                                   float temperature,
+  types::GenerationResult generate(std::span<float> waveform, float temperature,
                                    const types::DecodingOptions &options) const;
   std::vector<types::Segment>
-  generateWithFallback(std::span<const float> waveform,
+  generateWithFallback(std::span<float> waveform,
                        const types::DecodingOptions &options) const;
   std::vector<types::Segment>
   calculateWordLevelTimestamps(std::span<const int32_t> tokens,

package/common/rnexecutorch/models/text_to_image/Constants.h ADDED Viewed

@@ -0,0 +1,9 @@
+#pragma once
+#include <string_view>
+namespace rnexecutorch::models::text_to_image::constants {
+inline constexpr std::string_view kBosToken = "<|startoftext|>";
+} // namespace rnexecutorch::models::text_to_image::constants

package/common/rnexecutorch/models/text_to_image/Decoder.cpp ADDED Viewed

@@ -0,0 +1,32 @@
+#include "Decoder.h"
+#include <cmath>
+#include <executorch/extension/tensor/tensor_ptr_maker.h>
+namespace rnexecutorch::models::text_to_image {
+using namespace executorch::extension;
+Decoder::Decoder(const std::string &modelSource,
+                 std::shared_ptr<react::CallInvoker> callInvoker)
+    : BaseModel(modelSource, callInvoker) {}
+std::vector<float> Decoder::generate(std::vector<float> &input) const {
+  std::vector<int32_t> inputShape = {1, numChannels, latentImageSize,
+                                     latentImageSize};
+  auto inputTensor =
+      make_tensor_ptr(inputShape, input.data(), ScalarType::Float);
+  auto forwardResult = BaseModel::forward(inputTensor);
+  if (!forwardResult.ok()) {
+    throw std::runtime_error(
+        "Function forward in decoder failed with error code: " +
+        std::to_string(static_cast<uint32_t>(forwardResult.error())));
+  }
+  auto forwardResultTensor = forwardResult->at(0).toTensor();
+  const auto *dataPtr = forwardResultTensor.const_data_ptr<float>();
+  return {dataPtr, dataPtr + forwardResultTensor.numel()};
+}
+} // namespace rnexecutorch::models::text_to_image

package/common/rnexecutorch/models/text_to_image/Decoder.h ADDED Viewed

@@ -0,0 +1,24 @@
+#pragma once
+#include <memory>
+#include <string>
+#include <vector>
+#include <ReactCommon/CallInvoker.h>
+#include <rnexecutorch/models/BaseModel.h>
+namespace rnexecutorch::models::text_to_image {
+class Decoder final : public BaseModel {
+public:
+  explicit Decoder(const std::string &modelSource,
+                   std::shared_ptr<react::CallInvoker> callInvoker);
+  std::vector<float> generate(std::vector<float> &input) const;
+  int32_t latentImageSize;
+private:
+  static constexpr int32_t numChannels = 4;
+};
+} // namespace rnexecutorch::models::text_to_image

package/common/rnexecutorch/models/text_to_image/Encoder.cpp ADDED Viewed

@@ -0,0 +1,44 @@
+#include "Encoder.h"
+#include <cmath>
+#include <random>
+#include <span>
+#include <rnexecutorch/models/text_to_image/Constants.h>
+namespace rnexecutorch::models::text_to_image {
+Encoder::Encoder(const std::string &tokenizerSource,
+                 const std::string &encoderSource,
+                 std::shared_ptr<react::CallInvoker> callInvoker)
+    : callInvoker(callInvoker),
+      encoder(std::make_unique<embeddings::TextEmbeddings>(
+          encoderSource, tokenizerSource, callInvoker)) {}
+std::vector<float> Encoder::generate(std::string input) {
+  std::shared_ptr<OwningArrayBuffer> embeddingsText = encoder->generate(input);
+  std::shared_ptr<OwningArrayBuffer> embeddingsUncond =
+      encoder->generate(std::string(constants::kBosToken));
+  assert(embeddingsText->size() == embeddingsUncond->size());
+  size_t embeddingsSize = embeddingsText->size() / sizeof(float);
+  auto *embeddingsTextPtr = reinterpret_cast<float *>(embeddingsText->data());
+  auto *embeddingsUncondPtr =
+      reinterpret_cast<float *>(embeddingsUncond->data());
+  std::vector<float> embeddingsConcat;
+  embeddingsConcat.reserve(embeddingsSize * 2);
+  embeddingsConcat.insert(embeddingsConcat.end(), embeddingsUncondPtr,
+                          embeddingsUncondPtr + embeddingsSize);
+  embeddingsConcat.insert(embeddingsConcat.end(), embeddingsTextPtr,
+                          embeddingsTextPtr + embeddingsSize);
+  return embeddingsConcat;
+}
+size_t Encoder::getMemoryLowerBound() const noexcept {
+  return encoder->getMemoryLowerBound();
+}
+void Encoder::unload() noexcept { encoder->unload(); }
+} // namespace rnexecutorch::models::text_to_image