npm - react-native-executorch - Versions diffs - 0.5.15 → 0.6.0-nightly-897eae9-20251213 - Mend

react-native-executorch 0.5.15 → 0.6.0-nightly-897eae9-20251213

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (277) hide show

package/common/rnexecutorch/models/voice_activity_detection/VoiceActivityDetection.cpp ADDED Viewed

@@ -0,0 +1,160 @@
+#include "VoiceActivityDetection.h"
+#include "rnexecutorch/data_processing/dsp.h"
+#include "rnexecutorch/models/voice_activity_detection/Utils.h"
+#include <algorithm>
+#include <array>
+#include <functional>
+#include <numeric>
+#include <vector>
+namespace rnexecutorch::models::voice_activity_detection {
+using namespace constants;
+namespace ranges = std::ranges;
+using executorch::aten::Tensor;
+using executorch::extension::TensorPtr;
+VoiceActivityDetection::VoiceActivityDetection(
+    const std::string &modelSource,
+    std::shared_ptr<react::CallInvoker> callInvoker)
+    : BaseModel(modelSource, callInvoker) {}
+std::vector<std::array<float, kPaddedWindowSize>>
+VoiceActivityDetection::preprocess(std::span<float> waveform) const {
+  auto kHammingWindowArray = dsp::hannWindow(kWindowSize);
+  const size_t numFrames = (waveform.size() - kWindowSize) / kHopLength;
+  std::vector<std::array<float, kPaddedWindowSize>> frameBuffer(
+      numFrames, std::array<float, kPaddedWindowSize>{});
+  constexpr size_t totalPadding = kPaddedWindowSize - kWindowSize;
+  constexpr size_t leftPadding = totalPadding / 2;
+  for (size_t i = 0; i < numFrames; i++) {
+    auto windowView = waveform.subspan(i * kHopLength, kWindowSize);
+    ranges::copy(windowView, frameBuffer[i].begin() + leftPadding);
+    auto frameView =
+        std::span{frameBuffer[i].data() + leftPadding, kWindowSize};
+    const float sum = std::reduce(frameView.begin(), frameView.end(), 0.0f);
+    const float mean = sum / kWindowSize;
+    ranges::transform(frameView, frameView.begin(),
+                      [mean](float value) { return value - mean; });
+    // apply pre-emphasis filter
+    for (auto j = frameView.size() - 1; j > 0; --j) {
+      frameView[j] -= kPreemphasisCoeff * frameView[j - 1];
+    }
+    // apply hamming window to reduce spectral leakage
+    ranges::transform(frameView, kHammingWindowArray, frameView.begin(),
+                      std::multiplies{});
+  }
+  return frameBuffer;
+}
+std::vector<types::Segment>
+VoiceActivityDetection::generate(std::span<float> waveform) const {
+  auto windowedInput = preprocess(waveform);
+  auto [chunksNumber, remainder] = std::div(
+      static_cast<int>(windowedInput.size()), static_cast<int>(kModelInputMax));
+  std::vector<float> scores(windowedInput.size());
+  auto lastChunkSize = remainder;
+  if (remainder < kModelInputMin) {
+    auto paddingSize = kModelInputMin - remainder;
+    lastChunkSize = kModelInputMin;
+    windowedInput.insert(windowedInput.end(), paddingSize,
+                         std::array<float, kPaddedWindowSize>{});
+  }
+  TensorPtr inputTensor;
+  size_t startIdx = 0;
+  for (size_t i = 0; i < chunksNumber; i++) {
+    std::span<std::array<float, kPaddedWindowSize>> chunk(
+        windowedInput.data() + kModelInputMax * i, kModelInputMax);
+    inputTensor = executorch::extension::from_blob(
+        chunk.data(), {kModelInputMax, kPaddedWindowSize},
+        executorch::aten::ScalarType::Float);
+    auto forwardResult = BaseModel::forward(inputTensor);
+    if (!forwardResult.ok()) {
+      throw std::runtime_error(
+          "Failed to forward, error: " +
+          std::to_string(static_cast<uint32_t>(forwardResult.error())));
+    }
+    auto tensor = forwardResult->at(0).toTensor();
+    startIdx = utils::getNonSpeechClassProbabilites(
+        tensor, tensor.size(2), tensor.size(1), scores, startIdx);
+  }
+  std::span<std::array<float, kPaddedWindowSize>> lastChunk(
+      windowedInput.data() + kModelInputMax * chunksNumber, lastChunkSize);
+  inputTensor = executorch::extension::from_blob(
+      lastChunk.data(), {lastChunkSize, kPaddedWindowSize},
+      executorch::aten::ScalarType::Float);
+  auto forwardResult = BaseModel::forward(inputTensor);
+  if (!forwardResult.ok()) {
+    throw std::runtime_error(
+        "Failed to forward, error: " +
+        std::to_string(static_cast<uint32_t>(forwardResult.error())));
+  }
+  auto tensor = forwardResult->at(0).toTensor();
+  startIdx = utils::getNonSpeechClassProbabilites(tensor, tensor.size(2),
+                                                  remainder, scores, startIdx);
+  return postprocess(scores, kSpeechThreshold);
+}
+std::vector<types::Segment>
+VoiceActivityDetection::postprocess(const std::vector<float> &scores,
+                                    float threshold) const {
+  bool triggered = false;
+  std::vector<types::Segment> speechSegments{};
+  ssize_t startSegment = -1;
+  ssize_t endSegment = -1;
+  ssize_t potentialStart = -1;
+  ssize_t potentialEnd = -1;
+  float score;
+  for (size_t i = 0; i < scores.size(); i++) {
+    score = 1 - scores[i];
+    if (!triggered) {
+      if (score >= threshold) {
+        if (potentialStart == -1) {
+          potentialStart = i;
+        } else if (i - potentialStart >= kMinSpeechDuration) {
+          triggered = true;
+          startSegment = potentialStart;
+          potentialStart = -1;
+        }
+      } else { // score < threshold
+        potentialStart = -1;
+      }
+    } else { // triggered
+      if (score < threshold) {
+        if (potentialEnd == -1) {
+          potentialEnd = i;
+        } else if (i - potentialEnd >= kMinSilenceDuration) {
+          triggered = false;
+          endSegment = potentialEnd;
+          speechSegments.emplace_back(startSegment, endSegment);
+          potentialEnd = -1;
+        }
+      } else {
+        potentialEnd = -1;
+      }
+    }
+  }
+  if (triggered) {
+    endSegment = scores.size();
+    speechSegments.emplace_back(startSegment, endSegment);
+  }
+  for (auto &[start, end] : speechSegments) {
+    // std::max(start-kSpeedchPad, 0) might be underflow that is why we use ?
+    // operator.
+    start = (start > kSpeechPad ? start - kSpeechPad : 0) * kHopLength;
+    end = std::min(end + kSpeechPad, scores.size()) * kHopLength;
+  }
+  return speechSegments;
+}
+} // namespace rnexecutorch::models::voice_activity_detection

package/common/rnexecutorch/models/voice_activity_detection/VoiceActivityDetection.h ADDED Viewed

@@ -0,0 +1,36 @@
+#pragma once
+#include <cstddef>
+#include <executorch/extension/module/module.h>
+#include <executorch/extension/tensor/tensor.h>
+#include <executorch/extension/tensor/tensor_ptr.h>
+#include <executorch/runtime/core/evalue.h>
+#include <span>
+#include "rnexecutorch/metaprogramming/ConstructorHelpers.h"
+#include "rnexecutorch/models/BaseModel.h"
+#include "rnexecutorch/models/voice_activity_detection/Constants.h"
+#include "rnexecutorch/models/voice_activity_detection/Types.h"
+namespace rnexecutorch {
+namespace models::voice_activity_detection {
+using executorch::extension::TensorPtr;
+using executorch::runtime::EValue;
+class VoiceActivityDetection : public BaseModel {
+public:
+  VoiceActivityDetection(const std::string &modelSource,
+                         std::shared_ptr<react::CallInvoker> callInvoker);
+  [[nodiscard("Registered non-void function")]] std::vector<types::Segment>
+  generate(std::span<float> waveform) const;
+private:
+  std::vector<std::array<float, constants::kPaddedWindowSize>>
+  preprocess(std::span<float> waveform) const;
+  std::vector<types::Segment> postprocess(const std::vector<float> &scores,
+                                          float threshold) const;
+};
+} // namespace models::voice_activity_detection
+REGISTER_CONSTRUCTOR(models::voice_activity_detection::VoiceActivityDetection,
+                     std::string, std::shared_ptr<react::CallInvoker>);
+} // namespace rnexecutorch

package/common/rnexecutorch/tests/CMakeLists.txt ADDED Viewed

@@ -0,0 +1,30 @@
+cmake_minimum_required(VERSION 3.10)
+project(RNExecutorchTests)
+# C++ standard
+set(CMAKE_CXX_STANDARD 20)
+set(CMAKE_CXX_STANDARD_REQUIRED TRUE)
+# googletest subdirectory
+# Using an absolute path from the top-level source directory
+add_subdirectory(${CMAKE_SOURCE_DIR}/../../../../../third-party/googletest ${PROJECT_BINARY_DIR}/googletest)
+# Directories to include
+include_directories(${CMAKE_SOURCE_DIR}/../data_processing)
+include_directories(${CMAKE_SOURCE_DIR}/..)
+# Source files
+set(SOURCE_FILES ${CMAKE_SOURCE_DIR}/../data_processing/Numerical.cpp)
+# Executables for the tests
+add_executable(NumericalTests NumericalTest.cpp ${SOURCE_FILES})
+add_executable(LogTests LogTest.cpp)
+# Libraries linking
+target_link_libraries(NumericalTests gtest gtest_main)
+target_link_libraries(LogTests gtest gtest_main)
+# Testing functionalities
+enable_testing()
+add_test(NAME NumericalTests COMMAND NumericalTests)
+add_test(NAME LogTests COMMAND LogTests)

package/common/rnexecutorch/tests/NumericalTest.cpp ADDED Viewed

@@ -0,0 +1,110 @@
+#include "../data_processing/Numerical.h"
+#include <gtest/gtest.h>
+#include <limits>
+#include <span>
+#include <stdexcept>
+#include <vector>
+namespace rnexecutorch::numerical {
+// Helper function to check if two float vectors are approximately equal
+void expect_vectors_eq(const std::vector<float> &vector1,
+                       const std::vector<float> &vector2, float atol = 1.0e-6F) {
+  ASSERT_EQ(vector1.size(), vector2.size());
+  for (size_t i = 0; i < vector1.size(); i++) {
+    EXPECT_NEAR(vector1[i], vector2[i], atol);
+  }
+}
+TEST(SoftmaxTests, SoftmaxBasic) {
+  std::vector<float> input = {1.0F, 2.0F, 3.0F};
+  softmax(input);
+  const std::vector<float> expected = {0.09003057F, 0.24472847F, 0.66524095F};
+  expect_vectors_eq(input, expected);
+}
+TEST(SoftmaxTests, SoftmaxWithBigValues) {
+  std::vector<float> input = {100000.0F, 100000.0F, 100000.0F};
+  softmax(input);
+  const std::vector<float> expected = {0.3333333F, 0.3333333F, 0.3333333F};
+  expect_vectors_eq(input, expected);
+}
+TEST(SoftmaxTests, SoftmaxOfEmptyVector) {
+  std::vector<float> emptyVector{};
+  EXPECT_NO_THROW(softmax(emptyVector));
+}
+TEST(NormalizeTests, NormalizeBasic) {
+  std::vector<float> input = {1.0F, 2.0F, 3.0F};
+  normalize(input);
+  const auto normOfInput = std::sqrtf(14.0F);
+  const std::vector<float> expected = {1.0F / normOfInput, 2.0F / normOfInput,
+                                       3.0F / normOfInput};
+  expect_vectors_eq(input, expected);
+}
+TEST(NormalizeTests, NormalizationOfExtremelySmallValues) {
+  constexpr auto epsilon = std::numeric_limits<float>::epsilon();
+  std::vector<float> input(3, epsilon);
+  const auto normOfInput = std::sqrtf(3.0F);
+  const std::vector<float> expected(3, 1.0F / normOfInput);
+  normalize(input);
+  expect_vectors_eq(input, expected);
+}
+TEST(NormalizeTests, NormalizationOfZeroVector) {
+  std::vector<float> zeroVector(3, 0.0F);
+  EXPECT_NO_THROW(normalize(zeroVector));
+}
+TEST(NormalizeTests, NormalizationOfEmptyVector) {
+  std::vector<float> emptyVector{};
+  EXPECT_NO_THROW(normalize(emptyVector));
+}
+TEST(MeanPoolingTests, MeanPoolingBasic) {
+  const std::vector<float> modelOutputVec = {1.0F, 2.0F, 3.0F,
+                                             4.0F, 5.0F, 6.0F};
+  const std::vector<int64_t> attnMaskVec = {1, 1, 0};
+  std::span<const float> modelOutput(modelOutputVec);
+  std::span<const int64_t> attnMask(attnMaskVec);
+  const auto result = meanPooling(modelOutput, attnMask);
+  const std::vector<float> expected = {2.0F, 3.0F};
+  expect_vectors_eq(result, expected);
+}
+TEST(MeanPoolingTests, MeanPoolingWithZeroAttentionMask) {
+  const std::vector<float> modelOutputVec = {1.0F, 2.0F, 3.0F,
+                                             4.0F, 5.0F, 6.0F};
+  const std::vector<int64_t> attnMaskVec = {0, 0, 0};
+  std::span<const float> modelOutput(modelOutputVec);
+  std::span<const int64_t> attnMask(attnMaskVec);
+  const auto result = meanPooling(modelOutput, attnMask);
+  const std::vector<float> expected = {0.0F, 0.0F};
+  expect_vectors_eq(result, expected);
+}
+TEST(MeanPoolingTests, InvalidDimensionSize) {
+  const std::vector<float> modelOutput = {1.0F, 2.0F, 3.0F, 4.0F};
+  const std::vector<int64_t> attnMask = {1, 1, 1};
+  EXPECT_THROW(
+      { meanPooling(modelOutput, attnMask); },
+      std::invalid_argument);
+}
+TEST(MeanPoolingTests, EmptyAttentionMask) {
+  const std::vector<float> modelOutput = {1.0F, 2.0F, 3.0F, 4.0F};
+  const std::vector<int64_t> attnMask = {};
+  EXPECT_THROW(
+      { meanPooling(modelOutput, attnMask); },
+      std::invalid_argument);
+}
+} // namespace rnexecutorch::numerical

package/common/rnexecutorch/tests/README.md CHANGED Viewed

@@ -2,19 +2,36 @@
 This guide provide information on how functions are tested, how to install all needed dependencies and how to run tests.
 ### Used Tools
-To test the native code we use [`googletest`](https://github.com/google/googletest). It is a flexible tool for creating unit tests.
+To test the native code we use [`googletest`](https://github.com/google/googletest). It's a flexible tool for creating unit tests.
 ### Installation
-The easiest way to install `googletest` is following:
-* Clone repo locally and checkout on newest release:
-  `git clone git@github.com:google/googletest.git && cd googletest && git switch --detach v1.17.0`
-* Build library files:
-  * `mkdir build && cd build`
-  * `cmake ..`
-  * `make`
-* Add `/usr/local/include` and `/usr/local/lib` to your path if not already there.
+The googletest is already in repo in `react-native-executorch/third-party/googletest`. Firstly, you need to fetch googletest locally, run from root directory of project:
+* `git submodule update --init --recursive third-party/googletest`
-### Usage
-To run tests please use:
-* `run_test.sh` if you want to run one specific test, e.g. `run_test.sh LogTest.cpp`.
-* `run_all_tests.sh` if you want to run all tests in the `tests` directory.
+### Build Test Files
+To run tests navigate tests directory namely:
+* `cd packages/react-native-executorch/common/rnexecutorch/tests`
+and then type:
+* `mkdir build && cd build`
+* `cmake ..`
+* `make`
+### Run Tests
+To run tests use the following command in `packages/react-native-executorch/common/rnexecutorch/tests/build`:
+* `ctest --verbose`
+Every time you updated the source code, you need to recompile the test files using: `cmake .. && make`.
+### How to add a new test
+To add new test you need to:
+* Place `*.cpp` file with tests using googletest in this directory.
+* In `CMakeLists.txt`, add all executables and link them with googletest, e.g.:
+    ```
+    set(SOURCE_FILES ${CMAKE_SOURCE_DIR}/../data_processing/Numerical.cpp)
+    add_executable(NumericalTests tests/NumericalTest.cpp ${SOURCE_FILES})
+    target_link_libraries(NumericalTests gtest gtest_main)
+    ```
+* Add test execution, e.g.:
+    ```
+    add_test(NAME NumericalTests COMMAND NumericalTests)
+    ```

package/common/rnexecutorch/threads/GlobalThreadPool.h CHANGED Viewed

@@ -4,6 +4,7 @@
 #include <executorch/extension/threadpool/cpuinfo_utils.h>
 #include <memory>
 #include <mutex>
+#include <opencv2/opencv.hpp>
 #include <optional>
 #include <rnexecutorch/Log.h>
 #include <rnexecutorch/threads/HighPerformanceThreadPool.h>
@@ -38,6 +39,9 @@ public:
           numThreads, "threads");
       instance = std::make_unique<HighPerformanceThreadPool>(numThreads.value(),
                                                              config);
+      // Disable OpenCV's internal threading to prevent it from overriding our
+      // thread pool configuration, which would cause degraded performance
+      cv::setNumThreads(0);
     });
   }

package/common/runner/arange_util.cpp ADDED Viewed

@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+#include "arange_util.h"
+namespace torch::executor::native {
+#define ET_ARANGE_IMPL(ctx, start, numel, step, out, op_name)                  \
+  ET_SWITCH_REALHBF16_TYPES(out.scalar_type(), ctx, op_name, CTYPE, [&]() {    \
+    auto out_data = out.mutable_data_ptr<CTYPE>();                             \
+    for (executorch::aten::SizesType i = 0; i < numel; ++i) {                  \
+      out_data[i] = static_cast<CTYPE>(start + i * step);                      \
+    }                                                                          \
+  })
+executorch::aten::SizesType compute_arange_out_size(double start, double end,
+                                                    double step) {
+  executorch::aten::SizesType numel =
+      static_cast<executorch::aten::SizesType>(std::ceil((end - start) / step));
+  ET_CHECK_MSG(numel >= 0,
+               "numel should be non-negative, but got (%" PRId64
+               "). start (%f), end (%f), step (%f)",
+               static_cast<int64_t>(numel), start, end, step);
+  return numel;
+}
+void arange_out_impl(KernelRuntimeContext &ctx, double start, double end,
+                     double step, Tensor &out) {
+  (void)ctx;
+  executorch::aten::SizesType numel = compute_arange_out_size(start, end, step);
+  ET_ARANGE_IMPL(ctx, start, numel, step, out, "arange.start_out");
+}
+void arange_out_impl(KernelRuntimeContext &ctx, double end, Tensor &out) {
+  (void)ctx;
+  ET_ARANGE_IMPL(ctx, 0.0, end, 1.0, out, "arange.out");
+}
+} // namespace torch::executor::native

package/common/runner/arange_util.h ADDED Viewed

@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+#pragma once
+#include "kernel_includes.h"
+namespace torch::executor::native {
+executorch::aten::SizesType compute_arange_out_size(double start, double end,
+                                                    double step);
+inline executorch::aten::SizesType compute_arange_out_size(double end) {
+  return compute_arange_out_size(0.0, end, 1.0);
+}
+void arange_out_impl(KernelRuntimeContext &ctx, double start, double end,
+                     double step, Tensor &out);
+void arange_out_impl(KernelRuntimeContext &ctx, double end, Tensor &out);
+inline void arange_out_impl(double start, double end, double step,
+                            Tensor &out) {
+  KernelRuntimeContext ctx;
+  arange_out_impl(ctx, start, end, step, out);
+}
+inline void arange_out_impl(double end, Tensor &out) {
+  KernelRuntimeContext ctx;
+  arange_out_impl(ctx, 0.0, end, 1.0, out);
+}
+} // namespace torch::executor::native

package/common/runner/constants.h ADDED Viewed

@@ -0,0 +1,28 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+#pragma once
+// constants for LLM runtime
+namespace executorch::extension::llm {
+// Runtime metadata key constants
+inline constexpr auto kEnableDynamicShape = "enable_dynamic_shape";
+inline constexpr auto kBosId = "get_bos_id";
+inline constexpr auto kEosIds = "get_eos_ids";
+inline constexpr auto kMaxSeqLen = "get_max_seq_len";
+inline constexpr auto kMaxContextLen = "get_max_context_len";
+inline constexpr auto kVocabSize = "get_vocab_size";
+inline constexpr auto kUseKVCache = "use_kv_cache";
+inline constexpr auto kUseSDPAWithKVCache = "use_sdpa_with_kv_cache";
+// Multimodal method name conventions
+inline constexpr auto kVisionEncoderMethod = "vision_encoder";
+inline constexpr auto kAudioEncoderMethod = "audio_encoder";
+inline constexpr auto kTokenEmbeddingMethod = "token_embedding";
+inline constexpr auto kTextModelMethod = "text_decoder";
+} // namespace executorch::extension::llm