react-native-executorch 0.5.15 → 0.6.0-nightly-897eae9-20251213
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +42 -36
- package/android/CMakeLists.txt +13 -25
- package/android/build.gradle +2 -3
- package/android/libs/classes.jar +0 -0
- package/android/src/main/cpp/CMakeLists.txt +2 -1
- package/common/rnexecutorch/RnExecutorchInstaller.cpp +18 -0
- package/common/rnexecutorch/TokenizerModule.cpp +3 -3
- package/common/rnexecutorch/data_processing/Numerical.cpp +31 -23
- package/common/rnexecutorch/data_processing/Numerical.h +6 -1
- package/common/rnexecutorch/data_processing/dsp.cpp +0 -46
- package/common/rnexecutorch/host_objects/JsiConversions.h +16 -0
- package/common/rnexecutorch/host_objects/ModelHostObject.h +26 -11
- package/common/rnexecutorch/jsi/OwningArrayBuffer.h +19 -2
- package/common/rnexecutorch/metaprogramming/TypeConcepts.h +0 -20
- package/common/rnexecutorch/models/BaseModel.cpp +12 -11
- package/common/rnexecutorch/models/BaseModel.h +18 -10
- package/common/rnexecutorch/models/embeddings/BaseEmbeddings.cpp +3 -11
- package/common/rnexecutorch/models/embeddings/text/TextEmbeddings.cpp +0 -1
- package/common/rnexecutorch/models/image_segmentation/ImageSegmentation.cpp +6 -12
- package/common/rnexecutorch/models/llm/LLM.cpp +25 -8
- package/common/rnexecutorch/models/llm/LLM.h +4 -4
- package/common/rnexecutorch/models/ocr/CTCLabelConverter.h +1 -1
- package/common/rnexecutorch/models/ocr/utils/RecognitionHandlerUtils.cpp +7 -4
- package/common/rnexecutorch/models/speech_to_text/SpeechToText.cpp +8 -13
- package/common/rnexecutorch/models/speech_to_text/SpeechToText.h +1 -3
- package/common/rnexecutorch/models/speech_to_text/asr/ASR.cpp +12 -19
- package/common/rnexecutorch/models/speech_to_text/asr/ASR.h +4 -5
- package/common/rnexecutorch/models/text_to_image/Constants.h +9 -0
- package/common/rnexecutorch/models/text_to_image/Decoder.cpp +32 -0
- package/common/rnexecutorch/models/text_to_image/Decoder.h +24 -0
- package/common/rnexecutorch/models/text_to_image/Encoder.cpp +44 -0
- package/common/rnexecutorch/models/text_to_image/Encoder.h +32 -0
- package/common/rnexecutorch/models/text_to_image/Scheduler.cpp +152 -0
- package/common/rnexecutorch/models/text_to_image/Scheduler.h +41 -0
- package/common/rnexecutorch/models/text_to_image/TextToImage.cpp +141 -0
- package/common/rnexecutorch/models/text_to_image/TextToImage.h +64 -0
- package/common/rnexecutorch/models/text_to_image/UNet.cpp +38 -0
- package/common/rnexecutorch/models/text_to_image/UNet.h +28 -0
- package/common/rnexecutorch/models/voice_activity_detection/Constants.h +27 -0
- package/common/rnexecutorch/models/voice_activity_detection/Types.h +12 -0
- package/common/rnexecutorch/models/voice_activity_detection/Utils.cpp +15 -0
- package/common/rnexecutorch/models/voice_activity_detection/Utils.h +13 -0
- package/common/rnexecutorch/models/voice_activity_detection/VoiceActivityDetection.cpp +160 -0
- package/common/rnexecutorch/models/voice_activity_detection/VoiceActivityDetection.h +36 -0
- package/common/rnexecutorch/tests/CMakeLists.txt +30 -0
- package/common/rnexecutorch/tests/NumericalTest.cpp +110 -0
- package/common/rnexecutorch/tests/README.md +30 -13
- package/common/rnexecutorch/threads/GlobalThreadPool.h +4 -0
- package/common/runner/arange_util.cpp +44 -0
- package/common/runner/arange_util.h +37 -0
- package/common/runner/constants.h +28 -0
- package/common/runner/io_manager.h +240 -0
- package/common/runner/irunner.h +87 -16
- package/common/runner/kernel_includes.h +23 -0
- package/common/runner/runner.cpp +151 -66
- package/common/runner/runner.h +39 -22
- package/common/runner/sampler.cpp +8 -1
- package/common/runner/sampler.h +4 -2
- package/common/runner/stats.h +1 -4
- package/common/runner/text_decoder_runner.cpp +26 -12
- package/common/runner/text_decoder_runner.h +52 -31
- package/common/runner/text_prefiller.cpp +46 -12
- package/common/runner/text_prefiller.h +38 -4
- package/common/runner/text_token_generator.h +51 -26
- package/common/runner/util.h +53 -8
- package/ios/RnExecutorch.xcodeproj/project.pbxproj +0 -23
- package/lib/module/Error.js +1 -0
- package/lib/module/Error.js.map +1 -1
- package/lib/module/constants/directories.js +1 -1
- package/lib/module/constants/directories.js.map +1 -1
- package/lib/module/constants/modelUrls.js +32 -1
- package/lib/module/constants/modelUrls.js.map +1 -1
- package/lib/module/constants/ocr/models.js +7 -7
- package/lib/module/constants/ocr/models.js.map +1 -1
- package/lib/module/constants/ocr/symbols.js +3 -2
- package/lib/module/constants/ocr/symbols.js.map +1 -1
- package/lib/module/controllers/LLMController.js +10 -1
- package/lib/module/controllers/LLMController.js.map +1 -1
- package/lib/module/controllers/OCRController.js +3 -3
- package/lib/module/controllers/OCRController.js.map +1 -1
- package/lib/module/controllers/VerticalOCRController.js +2 -2
- package/lib/module/controllers/VerticalOCRController.js.map +1 -1
- package/lib/module/hooks/computer_vision/useOCR.js +3 -3
- package/lib/module/hooks/computer_vision/useOCR.js.map +1 -1
- package/lib/module/hooks/{useNonStaticModule.js → computer_vision/useTextToImage.js} +21 -16
- package/lib/module/hooks/computer_vision/useTextToImage.js.map +1 -0
- package/lib/module/hooks/computer_vision/useVerticalOCR.js +3 -3
- package/lib/module/hooks/computer_vision/useVerticalOCR.js.map +1 -1
- package/lib/module/hooks/natural_language_processing/useLLM.js +3 -3
- package/lib/module/hooks/natural_language_processing/useLLM.js.map +1 -1
- package/lib/module/hooks/natural_language_processing/useTokenizer.js +5 -5
- package/lib/module/hooks/natural_language_processing/useTokenizer.js.map +1 -1
- package/lib/module/hooks/natural_language_processing/useVAD.js +13 -0
- package/lib/module/hooks/natural_language_processing/useVAD.js.map +1 -0
- package/lib/module/index.js +7 -2
- package/lib/module/index.js.map +1 -1
- package/lib/module/modules/computer_vision/OCRModule.js +2 -2
- package/lib/module/modules/computer_vision/OCRModule.js.map +1 -1
- package/lib/module/modules/computer_vision/TextToImageModule.js +48 -0
- package/lib/module/modules/computer_vision/TextToImageModule.js.map +1 -0
- package/lib/module/modules/computer_vision/VerticalOCRModule.js +2 -2
- package/lib/module/modules/computer_vision/VerticalOCRModule.js.map +1 -1
- package/lib/module/modules/natural_language_processing/SpeechToTextModule.js +7 -4
- package/lib/module/modules/natural_language_processing/SpeechToTextModule.js.map +1 -1
- package/lib/module/modules/natural_language_processing/VADModule.js +19 -0
- package/lib/module/modules/natural_language_processing/VADModule.js.map +1 -0
- package/lib/module/types/llm.js.map +1 -1
- package/lib/module/types/vad.js +2 -0
- package/lib/module/types/vad.js.map +1 -0
- package/lib/module/utils/ResourceFetcher.js +2 -1
- package/lib/module/utils/ResourceFetcher.js.map +1 -1
- package/lib/module/utils/ResourceFetcherUtils.js +6 -6
- package/lib/module/utils/ResourceFetcherUtils.js.map +1 -1
- package/lib/typescript/Error.d.ts +1 -0
- package/lib/typescript/Error.d.ts.map +1 -1
- package/lib/typescript/constants/modelUrls.d.ts +23 -0
- package/lib/typescript/constants/modelUrls.d.ts.map +1 -1
- package/lib/typescript/constants/ocr/symbols.d.ts +1 -1
- package/lib/typescript/constants/ocr/symbols.d.ts.map +1 -1
- package/lib/typescript/controllers/LLMController.d.ts.map +1 -1
- package/lib/typescript/controllers/OCRController.d.ts +1 -1
- package/lib/typescript/controllers/OCRController.d.ts.map +1 -1
- package/lib/typescript/controllers/VerticalOCRController.d.ts +1 -1
- package/lib/typescript/controllers/VerticalOCRController.d.ts.map +1 -1
- package/lib/typescript/hooks/computer_vision/useOCR.d.ts +1 -1
- package/lib/typescript/hooks/computer_vision/useOCR.d.ts.map +1 -1
- package/lib/typescript/hooks/computer_vision/useTextToImage.d.ts +22 -0
- package/lib/typescript/hooks/computer_vision/useTextToImage.d.ts.map +1 -0
- package/lib/typescript/hooks/computer_vision/useVerticalOCR.d.ts +1 -1
- package/lib/typescript/hooks/computer_vision/useVerticalOCR.d.ts.map +1 -1
- package/lib/typescript/hooks/natural_language_processing/useLLM.d.ts.map +1 -1
- package/lib/typescript/hooks/natural_language_processing/useSpeechToText.d.ts +2 -2
- package/lib/typescript/hooks/natural_language_processing/useVAD.d.ts +16 -0
- package/lib/typescript/hooks/natural_language_processing/useVAD.d.ts.map +1 -0
- package/lib/typescript/index.d.ts +8 -1
- package/lib/typescript/index.d.ts.map +1 -1
- package/lib/typescript/modules/computer_vision/OCRModule.d.ts +1 -1
- package/lib/typescript/modules/computer_vision/OCRModule.d.ts.map +1 -1
- package/lib/typescript/modules/computer_vision/TextToImageModule.d.ts +16 -0
- package/lib/typescript/modules/computer_vision/TextToImageModule.d.ts.map +1 -0
- package/lib/typescript/modules/computer_vision/VerticalOCRModule.d.ts +1 -1
- package/lib/typescript/modules/computer_vision/VerticalOCRModule.d.ts.map +1 -1
- package/lib/typescript/modules/natural_language_processing/SpeechToTextModule.d.ts +3 -2
- package/lib/typescript/modules/natural_language_processing/SpeechToTextModule.d.ts.map +1 -1
- package/lib/typescript/modules/natural_language_processing/VADModule.d.ts +10 -0
- package/lib/typescript/modules/natural_language_processing/VADModule.d.ts.map +1 -0
- package/lib/typescript/types/llm.d.ts +2 -0
- package/lib/typescript/types/llm.d.ts.map +1 -1
- package/lib/typescript/types/vad.d.ts +5 -0
- package/lib/typescript/types/vad.d.ts.map +1 -0
- package/lib/typescript/utils/ResourceFetcher.d.ts +29 -0
- package/lib/typescript/utils/ResourceFetcher.d.ts.map +1 -1
- package/lib/typescript/utils/ResourceFetcherUtils.d.ts +2 -2
- package/lib/typescript/utils/ResourceFetcherUtils.d.ts.map +1 -1
- package/package.json +11 -8
- package/react-native-executorch.podspec +9 -9
- package/src/Error.ts +1 -0
- package/src/constants/directories.ts +1 -1
- package/src/constants/modelUrls.ts +36 -1
- package/src/constants/ocr/models.ts +7 -7
- package/src/constants/ocr/symbols.ts +3 -2
- package/src/controllers/LLMController.ts +12 -1
- package/src/controllers/OCRController.ts +3 -3
- package/src/controllers/VerticalOCRController.ts +2 -2
- package/src/hooks/computer_vision/useOCR.ts +4 -5
- package/src/hooks/computer_vision/useTextToImage.ts +92 -0
- package/src/hooks/computer_vision/useVerticalOCR.ts +4 -5
- package/src/hooks/natural_language_processing/useLLM.ts +3 -4
- package/src/hooks/natural_language_processing/useTokenizer.ts +5 -5
- package/src/hooks/natural_language_processing/useVAD.ts +15 -0
- package/src/index.ts +20 -1
- package/src/modules/computer_vision/OCRModule.ts +2 -2
- package/src/modules/computer_vision/TextToImageModule.ts +93 -0
- package/src/modules/computer_vision/VerticalOCRModule.ts +2 -2
- package/src/modules/natural_language_processing/SpeechToTextModule.ts +8 -4
- package/src/modules/natural_language_processing/VADModule.ts +27 -0
- package/src/types/llm.ts +2 -0
- package/src/types/vad.ts +4 -0
- package/src/utils/ResourceFetcher.ts +2 -1
- package/src/utils/ResourceFetcherUtils.ts +8 -8
- package/third-party/android/libs/cpuinfo/arm64-v8a/libcpuinfo.so +0 -0
- package/third-party/android/libs/executorch/arm64-v8a/libexecutorch.so +0 -0
- package/third-party/android/libs/executorch/x86_64/libexecutorch.so +0 -0
- package/third-party/android/libs/pthreadpool/arm64-v8a/libpthreadpool.so +0 -0
- package/third-party/include/c10/macros/Export.h +0 -78
- package/third-party/include/c10/macros/Macros.h +1 -520
- package/third-party/include/c10/util/BFloat16-inl.h +1 -339
- package/third-party/include/c10/util/BFloat16.h +1 -122
- package/third-party/include/c10/util/Half-inl.h +1 -347
- package/third-party/include/c10/util/Half.h +6 -419
- package/third-party/include/c10/util/TypeSafeSignMath.h +1 -133
- package/third-party/include/c10/util/bit_cast.h +1 -43
- package/third-party/include/c10/util/complex.h +1 -568
- package/third-party/include/c10/util/floating_point_utils.h +1 -33
- package/third-party/include/c10/util/irange.h +1 -1
- package/third-party/include/c10/util/llvmMathExtras.h +866 -0
- package/third-party/include/c10/util/safe_numerics.h +97 -0
- package/third-party/include/executorch/ExecuTorchError.h +6 -7
- package/third-party/include/executorch/ExecuTorchLLM/ExecuTorchLLM.h +12 -0
- package/third-party/include/executorch/ExecuTorchLLM/ExecuTorchLLMConfig.h +56 -0
- package/third-party/include/executorch/ExecuTorchLLM/ExecuTorchLLMError.h +16 -0
- package/third-party/include/executorch/ExecuTorchLLM/ExecuTorchLLMMultimodalRunner.h +227 -0
- package/third-party/include/executorch/ExecuTorchLLM/ExecuTorchLLMTextRunner.h +97 -0
- package/third-party/include/executorch/ExecuTorchLLM/module.modulemap +4 -0
- package/third-party/include/executorch/ExecuTorchLog.h +1 -0
- package/third-party/include/executorch/ExecuTorchModule.h +177 -4
- package/third-party/include/executorch/ExecuTorchTensor.h +3 -4
- package/third-party/include/executorch/ExecuTorchValue.h +1 -7
- package/third-party/include/executorch/extension/module/module.h +139 -8
- package/third-party/include/executorch/extension/tensor/tensor.h +1 -0
- package/third-party/include/executorch/extension/tensor/tensor_ptr.h +88 -26
- package/third-party/include/executorch/extension/threadpool/threadpool.h +4 -1
- package/third-party/include/executorch/runtime/backend/backend_init_context.h +6 -0
- package/third-party/include/executorch/runtime/backend/interface.h +1 -1
- package/third-party/include/executorch/runtime/core/error.h +76 -49
- package/third-party/include/executorch/runtime/core/exec_aten/util/scalar_type_util.h +18 -4
- package/third-party/include/executorch/runtime/core/memory_allocator.h +12 -2
- package/third-party/include/executorch/runtime/core/named_data_map.h +1 -11
- package/third-party/include/executorch/runtime/core/portable_type/c10/c10/macros/Export.h +0 -78
- package/third-party/include/executorch/runtime/core/portable_type/c10/c10/macros/Macros.h +1 -520
- package/third-party/include/executorch/runtime/core/portable_type/c10/c10/util/BFloat16-inl.h +1 -339
- package/third-party/include/executorch/runtime/core/portable_type/c10/c10/util/BFloat16.h +1 -122
- package/third-party/include/executorch/runtime/core/portable_type/c10/c10/util/Half-inl.h +1 -347
- package/third-party/include/executorch/runtime/core/portable_type/c10/c10/util/Half.h +6 -419
- package/third-party/include/executorch/runtime/core/portable_type/c10/c10/util/TypeSafeSignMath.h +1 -133
- package/third-party/include/executorch/runtime/core/portable_type/c10/c10/util/bit_cast.h +1 -43
- package/third-party/include/executorch/runtime/core/portable_type/c10/c10/util/complex.h +1 -568
- package/third-party/include/executorch/runtime/core/portable_type/c10/c10/util/floating_point_utils.h +1 -33
- package/third-party/include/executorch/runtime/core/portable_type/c10/c10/util/irange.h +1 -1
- package/third-party/include/executorch/runtime/core/portable_type/c10/c10/util/llvmMathExtras.h +866 -0
- package/third-party/include/executorch/runtime/core/portable_type/c10/c10/util/safe_numerics.h +97 -0
- package/third-party/include/executorch/runtime/core/portable_type/c10/torch/headeronly/macros/Export.h +66 -0
- package/third-party/include/executorch/runtime/core/portable_type/c10/torch/headeronly/macros/Macros.h +553 -0
- package/third-party/include/executorch/runtime/core/portable_type/c10/torch/headeronly/util/BFloat16.h +477 -0
- package/third-party/include/executorch/runtime/core/portable_type/c10/torch/headeronly/util/Half.h +781 -0
- package/third-party/include/executorch/runtime/core/portable_type/c10/torch/headeronly/util/TypeSafeSignMath.h +141 -0
- package/third-party/include/executorch/runtime/core/portable_type/c10/torch/headeronly/util/bit_cast.h +49 -0
- package/third-party/include/executorch/runtime/core/portable_type/c10/torch/headeronly/util/complex.h +593 -0
- package/third-party/include/executorch/runtime/core/portable_type/c10/torch/headeronly/util/floating_point_utils.h +38 -0
- package/third-party/include/executorch/runtime/core/tensor_layout.h +1 -1
- package/third-party/include/executorch/runtime/executor/merged_data_map.h +142 -0
- package/third-party/include/executorch/runtime/executor/method.h +21 -8
- package/third-party/include/executorch/runtime/executor/method_meta.h +20 -2
- package/third-party/include/executorch/runtime/executor/program.h +0 -10
- package/third-party/include/executorch/runtime/kernel/operator_registry.h +1 -1
- package/third-party/include/executorch/runtime/platform/compiler.h +2 -0
- package/third-party/include/executorch/schema/extended_header.h +10 -1
- package/third-party/include/torch/headeronly/macros/Export.h +66 -0
- package/third-party/include/torch/headeronly/macros/Macros.h +553 -0
- package/third-party/include/torch/headeronly/util/BFloat16.h +477 -0
- package/third-party/include/torch/headeronly/util/Half.h +781 -0
- package/third-party/include/torch/headeronly/util/TypeSafeSignMath.h +141 -0
- package/third-party/include/torch/headeronly/util/bit_cast.h +49 -0
- package/third-party/include/torch/headeronly/util/complex.h +593 -0
- package/third-party/include/torch/headeronly/util/floating_point_utils.h +38 -0
- package/third-party/ios/ExecutorchLib.xcframework/ios-arm64/ExecutorchLib.framework/ExecutorchLib +0 -0
- package/third-party/ios/ExecutorchLib.xcframework/ios-arm64/ExecutorchLib.framework/Info.plist +0 -0
- package/third-party/ios/ExecutorchLib.xcframework/ios-arm64-simulator/ExecutorchLib.framework/ExecutorchLib +0 -0
- package/third-party/ios/ExecutorchLib.xcframework/ios-arm64-simulator/ExecutorchLib.framework/Info.plist +0 -0
- package/common/rnexecutorch/tests/run_all_tests.sh +0 -14
- package/common/rnexecutorch/tests/run_test.sh +0 -18
- package/ios/RnExecutorch/utils/Conversions.h +0 -14
- package/ios/RnExecutorch/utils/ETError.h +0 -26
- package/ios/RnExecutorch/utils/ImageProcessor.h +0 -15
- package/ios/RnExecutorch/utils/ImageProcessor.mm +0 -147
- package/ios/RnExecutorch/utils/Numerical.h +0 -3
- package/ios/RnExecutorch/utils/Numerical.mm +0 -18
- package/ios/RnExecutorch/utils/ScalarType.h +0 -14
- package/ios/RnExecutorch/utils/ScalarType.mm +0 -21
- package/lib/module/hooks/useNonStaticModule.js.map +0 -1
- package/lib/typescript/hooks/useNonStaticModule.d.ts +0 -21
- package/lib/typescript/hooks/useNonStaticModule.d.ts.map +0 -1
- package/src/hooks/useNonStaticModule.ts +0 -74
- package/third-party/include/executorch/extension/kernel_util/make_boxed_from_unboxed_functor.h +0 -181
- package/third-party/include/executorch/extension/kernel_util/meta_programming.h +0 -108
- package/third-party/include/executorch/extension/kernel_util/type_list.h +0 -137
- package/third-party/include/executorch/extension/threadpool/threadpool_guard.h +0 -35
|
@@ -10,20 +10,17 @@
|
|
|
10
10
|
|
|
11
11
|
#pragma once
|
|
12
12
|
|
|
13
|
+
#include "io_manager.h"
|
|
13
14
|
#include "sampler.h"
|
|
14
|
-
#include <executorch/extension/module/module.h>
|
|
15
|
-
#include <executorch/extension/tensor/tensor.h>
|
|
16
|
-
#include <executorch/runtime/platform/compiler.h>
|
|
17
|
-
#include <functional>
|
|
18
15
|
|
|
19
16
|
namespace executorch {
|
|
20
17
|
namespace extension {
|
|
21
18
|
namespace llm {
|
|
22
19
|
|
|
23
|
-
class
|
|
20
|
+
class TextDecoderRunner {
|
|
24
21
|
public:
|
|
25
|
-
TextDecoderRunner(Module *module,
|
|
26
|
-
|
|
22
|
+
explicit TextDecoderRunner(Module *module, IOManager *io_manager,
|
|
23
|
+
float temperature = 0.8F, float topp = 0.9F);
|
|
27
24
|
|
|
28
25
|
virtual ~TextDecoderRunner() = default;
|
|
29
26
|
|
|
@@ -35,7 +32,7 @@ public:
|
|
|
35
32
|
* @return The output of the LLM Module. This will be a tensor of logits.
|
|
36
33
|
*/
|
|
37
34
|
virtual ::executorch::runtime::Result<executorch::aten::Tensor>
|
|
38
|
-
step(TensorPtr &input,
|
|
35
|
+
step(TensorPtr &input, int64_t start_pos);
|
|
39
36
|
|
|
40
37
|
/**
|
|
41
38
|
* Load the Module for text decode purpose.
|
|
@@ -53,43 +50,67 @@ public:
|
|
|
53
50
|
return module_->is_method_loaded("forward");
|
|
54
51
|
}
|
|
55
52
|
|
|
53
|
+
virtual void set_temperature(float temperature) noexcept {
|
|
54
|
+
temperature_ = temperature;
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
virtual void set_topp(float topp) noexcept { topp_ = topp; }
|
|
58
|
+
|
|
56
59
|
inline void stop() { should_stop_ = true; }
|
|
57
60
|
|
|
58
61
|
/**
|
|
59
62
|
* Sample the next token from the logits tensor.
|
|
60
63
|
* @param logits_tensor The logits tensor.
|
|
64
|
+
* @param temperature The temperature parameter used to control randomness in
|
|
65
|
+
* sampling.
|
|
61
66
|
* @return The next token.
|
|
62
67
|
*/
|
|
63
|
-
inline int32_t
|
|
64
|
-
|
|
68
|
+
inline int32_t logits_to_token(const executorch::aten::Tensor &logits_tensor,
|
|
69
|
+
float temperature = -1.F, float topp = -1.F) {
|
|
65
70
|
int32_t result = 0;
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
71
|
+
|
|
72
|
+
temperature = temperature < 0.F ? temperature_ : temperature;
|
|
73
|
+
topp = topp < 0.F ? topp_ : topp;
|
|
74
|
+
|
|
75
|
+
// Create a minimal context for error handling in ET_SWITCH
|
|
76
|
+
struct {
|
|
77
|
+
[[noreturn]] void fail(torch::executor::Error /* error */) {
|
|
78
|
+
ET_CHECK_MSG(false, "Unsupported dtype in logits_to_token");
|
|
79
|
+
}
|
|
80
|
+
} ctx;
|
|
81
|
+
|
|
82
|
+
ET_SWITCH_FOUR_TYPES(
|
|
83
|
+
Float, Half, BFloat16, UInt16, logits_tensor.scalar_type(), ctx,
|
|
84
|
+
"logits_to_token", CTYPE, [&]() {
|
|
85
|
+
// If the logit_tensor rank is 3, the shape is [batch, seq_length,
|
|
86
|
+
// vocab_size], get the last logits, sample and return. Else the model
|
|
87
|
+
// outputs the last logit, directly sample and return.
|
|
88
|
+
auto *logits = logits_tensor.mutable_data_ptr<CTYPE>();
|
|
89
|
+
ssize_t vocab_size = logits_tensor.size(logits_tensor.dim() - 1);
|
|
90
|
+
if (logits_tensor.dim() == 3) {
|
|
91
|
+
auto num_tokens = logits_tensor.size(1);
|
|
92
|
+
logits += (num_tokens - 1) * vocab_size;
|
|
93
|
+
}
|
|
94
|
+
// @lint-ignore CLANGTIDY facebook-hte-Deprecated
|
|
95
|
+
Sampler sampler(vocab_size, temperature, topp);
|
|
96
|
+
result = sampler.sample(logits);
|
|
97
|
+
});
|
|
84
98
|
return result;
|
|
85
99
|
}
|
|
86
100
|
|
|
87
101
|
protected:
|
|
88
|
-
|
|
102
|
+
/**
|
|
103
|
+
* Note: TextDecoderRunner does not own the Module or IOManager instance. It
|
|
104
|
+
* is expected that the outer class (likely Runner) manages the lifecycle of
|
|
105
|
+
* them. This means that the responsibility for creating, maintaining, and
|
|
106
|
+
* destroying the Module lies outside of TextDecoderRunner. Ensure that the
|
|
107
|
+
* Module remains valid for the duration of TextDecoderRunner's usage.
|
|
108
|
+
*/
|
|
89
109
|
Module *module_;
|
|
90
|
-
|
|
91
|
-
bool use_kv_cache_;
|
|
110
|
+
IOManager *io_manager_;
|
|
92
111
|
bool should_stop_{false};
|
|
112
|
+
float temperature_;
|
|
113
|
+
float topp_;
|
|
93
114
|
};
|
|
94
115
|
|
|
95
116
|
} // namespace llm
|
|
@@ -10,15 +10,18 @@
|
|
|
10
10
|
// LLM.
|
|
11
11
|
|
|
12
12
|
#include "text_prefiller.h"
|
|
13
|
+
#include <algorithm>
|
|
13
14
|
|
|
14
15
|
namespace executorch {
|
|
15
16
|
namespace extension {
|
|
16
17
|
namespace llm {
|
|
17
18
|
|
|
18
19
|
TextPrefiller::TextPrefiller(TextDecoderRunner *text_decoder_runner,
|
|
19
|
-
bool use_kv_cache, bool enable_parallel_prefill
|
|
20
|
+
bool use_kv_cache, bool enable_parallel_prefill,
|
|
21
|
+
int64_t max_seq_len)
|
|
20
22
|
: text_decoder_runner_(text_decoder_runner), use_kv_cache_(use_kv_cache),
|
|
21
|
-
enable_parallel_prefill_(enable_parallel_prefill)
|
|
23
|
+
enable_parallel_prefill_(enable_parallel_prefill),
|
|
24
|
+
max_seq_len_(max_seq_len > 0 ? max_seq_len : 128) {}
|
|
22
25
|
|
|
23
26
|
::executorch::runtime::Result<uint64_t>
|
|
24
27
|
TextPrefiller::prefill(std::vector<uint64_t> &prompt_tokens,
|
|
@@ -27,6 +30,44 @@ TextPrefiller::prefill(std::vector<uint64_t> &prompt_tokens,
|
|
|
27
30
|
if (!text_decoder_runner_->is_method_loaded()) {
|
|
28
31
|
ET_CHECK_OK_OR_RETURN_ERROR(text_decoder_runner_->load());
|
|
29
32
|
}
|
|
33
|
+
|
|
34
|
+
// Check if we need to chunk the prompt tokens
|
|
35
|
+
int32_t num_prompt_tokens = prompt_tokens.size();
|
|
36
|
+
|
|
37
|
+
// If prompt tokens exceed max_seq_len_, we need to chunk them
|
|
38
|
+
if (num_prompt_tokens > max_seq_len_) {
|
|
39
|
+
uint64_t cur_token = 0;
|
|
40
|
+
int num_tokens_to_process = 0;
|
|
41
|
+
|
|
42
|
+
while (num_tokens_to_process < num_prompt_tokens) {
|
|
43
|
+
auto num_tokens_to_prefill_with = std::min<int>(
|
|
44
|
+
num_prompt_tokens - num_tokens_to_process, max_seq_len_);
|
|
45
|
+
|
|
46
|
+
std::vector<uint64_t> prompt_tokens_to_process(
|
|
47
|
+
num_tokens_to_prefill_with);
|
|
48
|
+
std::copy(prompt_tokens.begin() + num_tokens_to_process,
|
|
49
|
+
prompt_tokens.begin() + num_tokens_to_process +
|
|
50
|
+
num_tokens_to_prefill_with,
|
|
51
|
+
prompt_tokens_to_process.begin());
|
|
52
|
+
|
|
53
|
+
// Process this chunk
|
|
54
|
+
auto chunk_result = prefill_chunk(prompt_tokens_to_process, start_pos);
|
|
55
|
+
ET_CHECK_OK_OR_RETURN_ERROR(chunk_result.error());
|
|
56
|
+
cur_token = chunk_result.get();
|
|
57
|
+
|
|
58
|
+
num_tokens_to_process += num_tokens_to_prefill_with;
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
return cur_token;
|
|
62
|
+
} else {
|
|
63
|
+
// If prompt tokens don't exceed max_seq_len_, process them directly
|
|
64
|
+
return prefill_chunk(prompt_tokens, start_pos);
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
::executorch::runtime::Result<uint64_t>
|
|
69
|
+
TextPrefiller::prefill_chunk(std::vector<uint64_t> &prompt_tokens,
|
|
70
|
+
int64_t &start_pos) {
|
|
30
71
|
// enable_parallel_prefill_ maybe set even when not using kv cache
|
|
31
72
|
// When kv cache is not used, start pos is ignored
|
|
32
73
|
int32_t num_prompt_tokens = prompt_tokens.size();
|
|
@@ -38,10 +79,7 @@ TextPrefiller::prefill(std::vector<uint64_t> &prompt_tokens,
|
|
|
38
79
|
auto tokens = from_blob(prompt_tokens.data(), {1, num_prompt_tokens},
|
|
39
80
|
executorch::aten::ScalarType::Long);
|
|
40
81
|
|
|
41
|
-
auto
|
|
42
|
-
from_blob(&start_pos, {1}, executorch::aten::ScalarType::Long);
|
|
43
|
-
|
|
44
|
-
auto outputs_res = text_decoder_runner_->step(tokens, start_pos_tensor);
|
|
82
|
+
auto outputs_res = text_decoder_runner_->step(tokens, start_pos);
|
|
45
83
|
|
|
46
84
|
ET_CHECK_OK_OR_RETURN_ERROR(outputs_res.error());
|
|
47
85
|
ET_LOG(Info, "Prefill token result numel(): %zu",
|
|
@@ -58,13 +96,10 @@ TextPrefiller::prefill(std::vector<uint64_t> &prompt_tokens,
|
|
|
58
96
|
auto tokens =
|
|
59
97
|
from_blob(&cur_token, {1, 1}, executorch::aten::ScalarType::Long);
|
|
60
98
|
|
|
61
|
-
auto start_pos_tensor =
|
|
62
|
-
from_blob(&start_pos, {1}, executorch::aten::ScalarType::Long);
|
|
63
|
-
|
|
64
99
|
// run the first token and get back logits tensor. Assuming the first token
|
|
65
100
|
// is bos so don't callback.
|
|
66
101
|
auto logits_tensor =
|
|
67
|
-
ET_UNWRAP(text_decoder_runner_->step(tokens,
|
|
102
|
+
ET_UNWRAP(text_decoder_runner_->step(tokens, start_pos));
|
|
68
103
|
|
|
69
104
|
pos += 1; // start the loop from index 1
|
|
70
105
|
start_pos += 1;
|
|
@@ -74,8 +109,7 @@ TextPrefiller::prefill(std::vector<uint64_t> &prompt_tokens,
|
|
|
74
109
|
// NOLINTNEXTLINE(facebook-hte-ParameterUncheckedArrayBounds)
|
|
75
110
|
cur_token = prompt_tokens[pos];
|
|
76
111
|
|
|
77
|
-
logits_tensor =
|
|
78
|
-
ET_UNWRAP(text_decoder_runner_->step(tokens, start_pos_tensor));
|
|
112
|
+
logits_tensor = ET_UNWRAP(text_decoder_runner_->step(tokens, start_pos));
|
|
79
113
|
|
|
80
114
|
pos++;
|
|
81
115
|
start_pos++;
|
|
@@ -17,10 +17,12 @@ namespace executorch {
|
|
|
17
17
|
namespace extension {
|
|
18
18
|
namespace llm {
|
|
19
19
|
|
|
20
|
-
class
|
|
20
|
+
class TextPrefiller {
|
|
21
21
|
public:
|
|
22
|
-
TextPrefiller(TextDecoderRunner *text_decoder_runner, bool
|
|
23
|
-
bool enable_parallel_prefill);
|
|
22
|
+
TextPrefiller(TextDecoderRunner *text_decoder_runner, bool use_kv_cache,
|
|
23
|
+
bool enable_parallel_prefill, int64_t max_seq_len = 128);
|
|
24
|
+
|
|
25
|
+
virtual ~TextPrefiller() = default;
|
|
24
26
|
/**
|
|
25
27
|
* Prefill an LLM Module with the given text input.
|
|
26
28
|
* @param prompt_tokens The text prompt tokens to the LLM Module. Encoded by
|
|
@@ -29,13 +31,45 @@ public:
|
|
|
29
31
|
* Module.
|
|
30
32
|
* @return The next token of the LLM Module after prefill.
|
|
31
33
|
*/
|
|
32
|
-
::executorch::runtime::Result<uint64_t>
|
|
34
|
+
virtual ::executorch::runtime::Result<uint64_t>
|
|
33
35
|
prefill(std::vector<uint64_t> &prompt_tokens, int64_t &start_pos);
|
|
34
36
|
|
|
37
|
+
/**
|
|
38
|
+
* Helper method to prefill a chunk of tokens.
|
|
39
|
+
* @param prompt_tokens The chunk of text prompt tokens to process.
|
|
40
|
+
* @param start_pos The starting position in KV cache of the input in the LLM
|
|
41
|
+
* Module.
|
|
42
|
+
* @return The next token of the LLM Module after prefilling this chunk.
|
|
43
|
+
*/
|
|
44
|
+
virtual ::executorch::runtime::Result<uint64_t>
|
|
45
|
+
prefill_chunk(std::vector<uint64_t> &prompt_tokens, int64_t &start_pos);
|
|
46
|
+
|
|
47
|
+
/**
|
|
48
|
+
* Load the necessary resources for the TextPrefiller.
|
|
49
|
+
* This method should be called before using the prefill methods.
|
|
50
|
+
*/
|
|
51
|
+
::executorch::runtime::Error load() { return text_decoder_runner_->load(); }
|
|
52
|
+
|
|
53
|
+
/**
|
|
54
|
+
* Check if the TextPrefiller has been successfully loaded.
|
|
55
|
+
* @return True if the resources are loaded, false otherwise.
|
|
56
|
+
*/
|
|
57
|
+
bool inline is_loaded() const {
|
|
58
|
+
// Implementation to check if resources are loaded
|
|
59
|
+
return text_decoder_runner_->is_method_loaded();
|
|
60
|
+
}
|
|
61
|
+
|
|
35
62
|
private:
|
|
63
|
+
/**
|
|
64
|
+
* Note: TextPrefiller does not own the TextDecoderRunner instance.
|
|
65
|
+
* The responsibility of managing the lifecycle of TextDecoderRunner
|
|
66
|
+
* lies with the outer class or entity (likely Runner) that creates
|
|
67
|
+
* and passes the TextDecoderRunner instance to TextPrefiller.
|
|
68
|
+
*/
|
|
36
69
|
TextDecoderRunner *text_decoder_runner_;
|
|
37
70
|
bool use_kv_cache_;
|
|
38
71
|
bool enable_parallel_prefill_;
|
|
72
|
+
int64_t max_seq_len_;
|
|
39
73
|
};
|
|
40
74
|
|
|
41
75
|
} // namespace llm
|
|
@@ -11,39 +11,40 @@
|
|
|
11
11
|
|
|
12
12
|
#include "stats.h"
|
|
13
13
|
#include "text_decoder_runner.h"
|
|
14
|
-
#include
|
|
14
|
+
#include "util.h"
|
|
15
15
|
#include <executorch/extension/tensor/tensor.h>
|
|
16
|
-
#include <iostream>
|
|
17
16
|
#include <tokenizers-cpp/tokenizers_cpp.h>
|
|
18
17
|
|
|
19
18
|
namespace executorch {
|
|
20
19
|
namespace extension {
|
|
21
20
|
namespace llm {
|
|
22
21
|
|
|
23
|
-
class
|
|
22
|
+
class TextTokenGenerator {
|
|
24
23
|
public:
|
|
25
|
-
TextTokenGenerator(tokenizers::Tokenizer *tokenizer,
|
|
24
|
+
TextTokenGenerator(::tokenizers::Tokenizer *tokenizer,
|
|
26
25
|
TextDecoderRunner *text_decoder_runner, bool use_kv_cache,
|
|
27
26
|
std::unique_ptr<std::unordered_set<uint64_t>> &&eos_ids,
|
|
28
27
|
Stats *stats)
|
|
29
28
|
: tokenizer_(tokenizer), text_decoder_runner_(text_decoder_runner),
|
|
30
29
|
eos_ids_(std::move(eos_ids)), use_kv_cache_(use_kv_cache),
|
|
31
|
-
|
|
30
|
+
timestamp_(std::chrono::high_resolution_clock::now()), stats_(stats) {}
|
|
31
|
+
|
|
32
|
+
virtual ~TextTokenGenerator() = default;
|
|
32
33
|
|
|
33
34
|
/**
|
|
34
35
|
* Token generation loop.
|
|
35
|
-
* @param tokens
|
|
36
|
-
* prefill.
|
|
37
|
-
* @param start_pos
|
|
36
|
+
* @param tokens The first token generated by prefill, if using kv cache. Else
|
|
37
|
+
* the prompt tokens + the first token generated by prefill.
|
|
38
|
+
* @param start_pos The start position of the new tokens, based on how many
|
|
38
39
|
* prompt tokens is prefilled.
|
|
39
|
-
* @param
|
|
40
|
-
* token from prefill and new tokens.
|
|
40
|
+
* @param max_new_tokens Maximum number of new tokens to generate.
|
|
41
41
|
* @param token_callback what to do after a token is generated.
|
|
42
42
|
* @return how many tokens are generated.
|
|
43
43
|
*/
|
|
44
|
-
inline ::executorch::runtime::Result<int64_t>
|
|
45
|
-
|
|
46
|
-
|
|
44
|
+
inline ::executorch::runtime::Result<int64_t> generate(
|
|
45
|
+
std::vector<uint64_t> tokens, int64_t start_pos, int32_t max_new_tokens,
|
|
46
|
+
float temperature, float topp,
|
|
47
|
+
const std::function<void(const std::string &)> &token_callback = {}) {
|
|
47
48
|
ET_CHECK_MSG(!tokens.empty(),
|
|
48
49
|
"Token generation loop shouldn't take empty tokens");
|
|
49
50
|
int64_t pos = start_pos; // position in the sequence
|
|
@@ -53,7 +54,8 @@ public:
|
|
|
53
54
|
|
|
54
55
|
// Token after prefill
|
|
55
56
|
uint64_t cur_token = tokens.back();
|
|
56
|
-
uint64_t prev_token;
|
|
57
|
+
[[maybe_unused]] uint64_t prev_token;
|
|
58
|
+
|
|
57
59
|
// cache to keep tokens if they were decoded into illegal character
|
|
58
60
|
std::vector<int32_t> token_cache;
|
|
59
61
|
// add first token after prefill to cache here
|
|
@@ -72,16 +74,14 @@ public:
|
|
|
72
74
|
// initialize tensor wrappers
|
|
73
75
|
auto tokens_managed = from_blob(token_data.data(), token_shape,
|
|
74
76
|
executorch::aten::ScalarType::Long);
|
|
75
|
-
auto start_pos_managed =
|
|
76
|
-
from_blob(&pos, {1}, executorch::aten::ScalarType::Long);
|
|
77
77
|
|
|
78
78
|
should_stop_ = false;
|
|
79
79
|
timestamp_ = std::chrono::high_resolution_clock::now();
|
|
80
|
+
|
|
80
81
|
// Generate our tokens
|
|
81
|
-
while (pos <
|
|
82
|
+
while (pos < start_pos + max_new_tokens) {
|
|
82
83
|
// Run the model
|
|
83
|
-
auto logits_res =
|
|
84
|
-
text_decoder_runner_->step(tokens_managed, start_pos_managed);
|
|
84
|
+
auto logits_res = text_decoder_runner_->step(tokens_managed, pos);
|
|
85
85
|
|
|
86
86
|
ET_CHECK_OK_OR_RETURN_ERROR(logits_res.error());
|
|
87
87
|
executorch::aten::Tensor &logits_tensor = logits_res.get();
|
|
@@ -89,7 +89,8 @@ public:
|
|
|
89
89
|
prev_token = cur_token;
|
|
90
90
|
|
|
91
91
|
stats_->on_sampling_begin();
|
|
92
|
-
cur_token = text_decoder_runner_->logits_to_token(logits_tensor
|
|
92
|
+
cur_token = text_decoder_runner_->logits_to_token(logits_tensor,
|
|
93
|
+
temperature, topp);
|
|
93
94
|
stats_->on_sampling_end();
|
|
94
95
|
|
|
95
96
|
pos++;
|
|
@@ -105,8 +106,9 @@ public:
|
|
|
105
106
|
tokens_managed, {1, static_cast<int>(token_data.size())}));
|
|
106
107
|
}
|
|
107
108
|
|
|
108
|
-
// print the tokens as string, decode it with the Tokenizer object
|
|
109
109
|
token_cache.push_back(static_cast<int32_t>(cur_token));
|
|
110
|
+
|
|
111
|
+
// print the token as string, decode it with the Tokenizer object
|
|
110
112
|
const std::string cache_decoded = tokenizer_->Decode(token_cache);
|
|
111
113
|
|
|
112
114
|
const auto timeIntervalElapsed =
|
|
@@ -129,7 +131,7 @@ public:
|
|
|
129
131
|
}
|
|
130
132
|
|
|
131
133
|
// data-dependent terminating condition: we have n_eos_ number of EOS
|
|
132
|
-
if (
|
|
134
|
+
if (eos_ids_->find(cur_token) != eos_ids_->end()) {
|
|
133
135
|
printf("\n");
|
|
134
136
|
ET_LOG(Info, "\nReached to the end of generation");
|
|
135
137
|
break;
|
|
@@ -143,6 +145,23 @@ public:
|
|
|
143
145
|
*/
|
|
144
146
|
inline void stop() { should_stop_ = true; }
|
|
145
147
|
|
|
148
|
+
/**
|
|
149
|
+
* Load the necessary resources for TextTokenGenerator.
|
|
150
|
+
* This method should be called before using the generate() method.
|
|
151
|
+
*/
|
|
152
|
+
::executorch::runtime::Error load() { return text_decoder_runner_->load(); }
|
|
153
|
+
|
|
154
|
+
/**
|
|
155
|
+
* Check if the TextTokenGenerator has been successfully loaded.
|
|
156
|
+
* @return True if the resources are loaded, false otherwise.
|
|
157
|
+
*/
|
|
158
|
+
bool inline is_loaded() const {
|
|
159
|
+
// Implementation to check if resources are loaded
|
|
160
|
+
// return tokenizer_->is_loaded() &&
|
|
161
|
+
// text_decoder_runner_->is_method_loaded();
|
|
162
|
+
return text_decoder_runner_->is_method_loaded();
|
|
163
|
+
}
|
|
164
|
+
|
|
146
165
|
void set_count_interval(size_t count_interval) {
|
|
147
166
|
count_interval_ = count_interval;
|
|
148
167
|
}
|
|
@@ -152,16 +171,22 @@ public:
|
|
|
152
171
|
}
|
|
153
172
|
|
|
154
173
|
private:
|
|
155
|
-
|
|
174
|
+
/**
|
|
175
|
+
* Note: TextTokenGenerator does not own the tokenizer_ and
|
|
176
|
+
* text_decoder_runner_. The lifecycle of these objects should be managed
|
|
177
|
+
* externally, likely in the Runner. This class assumes that the provided
|
|
178
|
+
* pointers remain valid for the duration of its use.
|
|
179
|
+
*/
|
|
180
|
+
::tokenizers::Tokenizer *tokenizer_;
|
|
156
181
|
TextDecoderRunner *text_decoder_runner_;
|
|
157
182
|
std::unique_ptr<std::unordered_set<uint64_t>> eos_ids_;
|
|
158
183
|
bool use_kv_cache_;
|
|
159
|
-
size_t count_interval_{10};
|
|
160
|
-
std::chrono::milliseconds time_interval_{120};
|
|
161
|
-
std::chrono::high_resolution_clock::time_point timestamp_;
|
|
162
184
|
|
|
163
185
|
// state machine
|
|
164
186
|
bool should_stop_ = false;
|
|
187
|
+
size_t count_interval_{10};
|
|
188
|
+
std::chrono::milliseconds time_interval_{120};
|
|
189
|
+
std::chrono::high_resolution_clock::time_point timestamp_;
|
|
165
190
|
|
|
166
191
|
// stats
|
|
167
192
|
Stats *stats_;
|
package/common/runner/util.h
CHANGED
|
@@ -7,7 +7,10 @@
|
|
|
7
7
|
*/
|
|
8
8
|
|
|
9
9
|
#pragma once
|
|
10
|
+
#include "constants.h"
|
|
11
|
+
#include "text_prefiller.h"
|
|
10
12
|
#include <cctype>
|
|
13
|
+
#include <executorch/extension/tensor/tensor.h>
|
|
11
14
|
#include <executorch/runtime/platform/compiler.h>
|
|
12
15
|
#include <stdio.h>
|
|
13
16
|
#include <time.h>
|
|
@@ -41,7 +44,7 @@ namespace executorch {
|
|
|
41
44
|
namespace extension {
|
|
42
45
|
namespace llm {
|
|
43
46
|
|
|
44
|
-
|
|
47
|
+
void inline safe_printf(const char *piece) {
|
|
45
48
|
// piece might be a raw byte token, and we only want to print printable chars
|
|
46
49
|
// or whitespace because some of the other bytes can be various control codes,
|
|
47
50
|
// backspace, etc.
|
|
@@ -63,10 +66,16 @@ ET_EXPERIMENTAL void inline safe_printf(const char *piece) {
|
|
|
63
66
|
// ----------------------------------------------------------------------------
|
|
64
67
|
// utilities: time
|
|
65
68
|
|
|
66
|
-
|
|
69
|
+
long inline time_in_ms() {
|
|
67
70
|
// return time in milliseconds, for benchmarking the model speed
|
|
68
71
|
struct timespec time;
|
|
72
|
+
// The `timespec_get` function is for windows time access. Some AOSP OS does
|
|
73
|
+
// not have timespec_get support.
|
|
74
|
+
#if defined(__ANDROID_API__)
|
|
69
75
|
clock_gettime(CLOCK_REALTIME, &time);
|
|
76
|
+
#else
|
|
77
|
+
timespec_get(&time, TIME_UTC);
|
|
78
|
+
#endif
|
|
70
79
|
return time.tv_sec * 1000 + time.tv_nsec / 1000000;
|
|
71
80
|
}
|
|
72
81
|
|
|
@@ -77,20 +86,56 @@ ET_EXPERIMENTAL long inline time_in_ms() {
|
|
|
77
86
|
// RSS: Resident Set Size, the amount of memory currently in the RAM for this
|
|
78
87
|
// process. These values are approximate, and are only used for logging
|
|
79
88
|
// purposes.
|
|
80
|
-
|
|
89
|
+
size_t inline get_rss_bytes() {
|
|
81
90
|
#if defined(__linux__) || defined(__ANDROID__) || defined(__unix__)
|
|
82
91
|
struct rusage r_usage;
|
|
83
92
|
if (getrusage(RUSAGE_SELF, &r_usage) == 0) {
|
|
84
93
|
return r_usage.ru_maxrss * 1024;
|
|
85
94
|
}
|
|
86
95
|
#endif // __linux__ || __ANDROID__ || __unix__
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
96
|
+
// Unsupported platform like Windows, or getrusage() failed.
|
|
97
|
+
// __APPLE__ and __MACH__ are not supported because r_usage.ru_maxrss does not
|
|
98
|
+
// consistently return kbytes on macOS. On older versions of macOS, it
|
|
99
|
+
// returns bytes, but on newer versions it returns kbytes. Need to figure out
|
|
100
|
+
// when this changed.
|
|
92
101
|
return 0;
|
|
93
102
|
}
|
|
103
|
+
|
|
104
|
+
// Returns the cache position tensor, which can be either a single start_pos
|
|
105
|
+
// (when the method_name [`text_decoder` or `forward`] expects a tensor with
|
|
106
|
+
// size 1 because model will populate the cache position tensor underneath), or
|
|
107
|
+
// a populated tensor for cache position, for the given start_pos and seq_len.
|
|
108
|
+
inline runtime::Result<TensorPtr>
|
|
109
|
+
populate_start_pos_or_cache_position(Module *module, int64_t &start_pos,
|
|
110
|
+
std::vector<int64_t> &cache_positions_vec,
|
|
111
|
+
int seq_len,
|
|
112
|
+
const char *method_name = "forward") {
|
|
113
|
+
// Get expected shape of cache position tensor, which should be the second
|
|
114
|
+
// argument
|
|
115
|
+
auto method_meta = ET_UNWRAP(module->method_meta(method_name));
|
|
116
|
+
auto second_input_info = ET_UNWRAP(method_meta.input_tensor_meta(1));
|
|
117
|
+
auto second_input_sizes = second_input_info.sizes();
|
|
118
|
+
auto numel = second_input_sizes[0];
|
|
119
|
+
|
|
120
|
+
TensorPtr start_pos_tensor;
|
|
121
|
+
if (numel > 1) {
|
|
122
|
+
// `cache_position` goes from start_pos to start_pos +
|
|
123
|
+
// encoder_output.size(1). e.g. if start_pos = 2 and encoder_output.size(1)
|
|
124
|
+
// = 5, cache_position_tensor should be [2, 3, 4, 5, 6].
|
|
125
|
+
cache_positions_vec.resize(seq_len);
|
|
126
|
+
for (int64_t i = 0; i < seq_len; ++i) {
|
|
127
|
+
cache_positions_vec[i] = start_pos + i;
|
|
128
|
+
}
|
|
129
|
+
return ::executorch::extension::from_blob(
|
|
130
|
+
cache_positions_vec.data(), {static_cast<int>(seq_len)},
|
|
131
|
+
executorch::aten::ScalarType::Long);
|
|
132
|
+
} else {
|
|
133
|
+
// Cache position is size 1.
|
|
134
|
+
return ::executorch::extension::from_blob(
|
|
135
|
+
&start_pos, {1}, executorch::aten::ScalarType::Long);
|
|
136
|
+
}
|
|
137
|
+
}
|
|
138
|
+
|
|
94
139
|
} // namespace llm
|
|
95
140
|
} // namespace extension
|
|
96
141
|
} // namespace executorch
|
|
@@ -7,7 +7,6 @@
|
|
|
7
7
|
objects = {
|
|
8
8
|
|
|
9
9
|
/* Begin PBXBuildFile section */
|
|
10
|
-
55D6EA8C2D0987D2009BA408 /* ExecutorchLib.xcframework in Frameworks */ = {isa = PBXBuildFile; fileRef = 55D6EA8B2D0987D2009BA408 /* ExecutorchLib.xcframework */; };
|
|
11
10
|
8C9A9BD02DB0CE800027DD32 /* ImageSegmentation.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 8C9A9BCA2DB0CE800027DD32 /* ImageSegmentation.cpp */; };
|
|
12
11
|
8C9A9BD12DB0CE800027DD32 /* RuntimeLifecycleMonitor.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 8C9A9BC72DB0CE800027DD32 /* RuntimeLifecycleMonitor.cpp */; };
|
|
13
12
|
8C9A9BD22DB0CE800027DD32 /* JsiHostObject.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 8C9A9BC22DB0CE800027DD32 /* JsiHostObject.cpp */; };
|
|
@@ -43,30 +42,9 @@
|
|
|
43
42
|
8C9A9BCD2DB0CE800027DD32 /* RnExecutorchInstaller.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = RnExecutorchInstaller.h; sourceTree = "<group>"; };
|
|
44
43
|
/* End PBXFileReference section */
|
|
45
44
|
|
|
46
|
-
/* Begin PBXFileSystemSynchronizedGroupBuildPhaseMembershipExceptionSet section */
|
|
47
|
-
550986902CEF541900FECBB8 /* Exceptions for "RnExecutorch" folder in "Copy Files" phase from "RnExecutorch" target */ = {
|
|
48
|
-
isa = PBXFileSystemSynchronizedGroupBuildPhaseMembershipExceptionSet;
|
|
49
|
-
buildPhase = 550986872CEF541900FECBB8 /* CopyFiles */;
|
|
50
|
-
membershipExceptions = (
|
|
51
|
-
LLM.h,
|
|
52
|
-
);
|
|
53
|
-
};
|
|
54
|
-
552754CC2D394AC9006B38A2 /* Exceptions for "RnExecutorch" folder in "Compile Sources" phase from "RnExecutorch" target */ = {
|
|
55
|
-
isa = PBXFileSystemSynchronizedGroupBuildPhaseMembershipExceptionSet;
|
|
56
|
-
buildPhase = 550986852CEF541900FECBB8 /* Sources */;
|
|
57
|
-
membershipExceptions = (
|
|
58
|
-
models/ocr/utils/DetectorUtils.h,
|
|
59
|
-
);
|
|
60
|
-
};
|
|
61
|
-
/* End PBXFileSystemSynchronizedGroupBuildPhaseMembershipExceptionSet section */
|
|
62
|
-
|
|
63
45
|
/* Begin PBXFileSystemSynchronizedRootGroup section */
|
|
64
46
|
5509868B2CEF541900FECBB8 /* RnExecutorch */ = {
|
|
65
47
|
isa = PBXFileSystemSynchronizedRootGroup;
|
|
66
|
-
exceptions = (
|
|
67
|
-
552754CC2D394AC9006B38A2 /* Exceptions for "RnExecutorch" folder in "Compile Sources" phase from "RnExecutorch" target */,
|
|
68
|
-
550986902CEF541900FECBB8 /* Exceptions for "RnExecutorch" folder in "Copy Files" phase from "RnExecutorch" target */,
|
|
69
|
-
);
|
|
70
48
|
path = RnExecutorch;
|
|
71
49
|
sourceTree = "<group>";
|
|
72
50
|
};
|
|
@@ -77,7 +55,6 @@
|
|
|
77
55
|
isa = PBXFrameworksBuildPhase;
|
|
78
56
|
buildActionMask = 2147483647;
|
|
79
57
|
files = (
|
|
80
|
-
55D6EA8C2D0987D2009BA408 /* ExecutorchLib.xcframework in Frameworks */,
|
|
81
58
|
);
|
|
82
59
|
runOnlyForDeploymentPostprocessing = 0;
|
|
83
60
|
};
|
package/lib/module/Error.js
CHANGED
|
@@ -7,6 +7,7 @@ export let ETError = /*#__PURE__*/function (ETError) {
|
|
|
7
7
|
ETError[ETError["FileWriteFailed"] = 103] = "FileWriteFailed";
|
|
8
8
|
ETError[ETError["ModelGenerating"] = 104] = "ModelGenerating";
|
|
9
9
|
ETError[ETError["LanguageNotSupported"] = 105] = "LanguageNotSupported";
|
|
10
|
+
ETError[ETError["InvalidConfig"] = 112] = "InvalidConfig";
|
|
10
11
|
ETError[ETError["InvalidModelSource"] = 255] = "InvalidModelSource";
|
|
11
12
|
// SpeechToText errors
|
|
12
13
|
ETError[ETError["MultilingualConfiguration"] = 160] = "MultilingualConfiguration";
|
package/lib/module/Error.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"names":["ETError","getError","e","UndefinedError","error","errorCode","parseInt","message","Number","isNaN"],"sourceRoot":"../../src","sources":["Error.ts"],"mappings":";;AAAA,WAAYA,OAAO,0BAAPA,OAAO;EACjB;EADUA,OAAO,CAAPA,OAAO;EAAPA,OAAO,CAAPA,OAAO;EAAPA,OAAO,CAAPA,OAAO;EAAPA,OAAO,CAAPA,OAAO;EAAPA,OAAO,CAAPA,OAAO;EAAPA,OAAO,CAAPA,OAAO;
|
|
1
|
+
{"version":3,"names":["ETError","getError","e","UndefinedError","error","errorCode","parseInt","message","Number","isNaN"],"sourceRoot":"../../src","sources":["Error.ts"],"mappings":";;AAAA,WAAYA,OAAO,0BAAPA,OAAO;EACjB;EADUA,OAAO,CAAPA,OAAO;EAAPA,OAAO,CAAPA,OAAO;EAAPA,OAAO,CAAPA,OAAO;EAAPA,OAAO,CAAPA,OAAO;EAAPA,OAAO,CAAPA,OAAO;EAAPA,OAAO,CAAPA,OAAO;EAAPA,OAAO,CAAPA,OAAO;EAUjB;EAVUA,OAAO,CAAPA,OAAO;EAAPA,OAAO,CAAPA,OAAO;EAAPA,OAAO,CAAPA,OAAO;EAejB;EACA;EACA;EAjBUA,OAAO,CAAPA,OAAO;EAAPA,OAAO,CAAPA,OAAO;EAAPA,OAAO,CAAPA,OAAO;EAAPA,OAAO,CAAPA,OAAO;EAuBjB;EAvBUA,OAAO,CAAPA,OAAO;EAAPA,OAAO,CAAPA,OAAO;EAAPA,OAAO,CAAPA,OAAO;EAAPA,OAAO,CAAPA,OAAO;EAAPA,OAAO,CAAPA,OAAO;EA8BjB;EA9BUA,OAAO,CAAPA,OAAO;EAAPA,OAAO,CAAPA,OAAO;EAAPA,OAAO,CAAPA,OAAO;EAAPA,OAAO,CAAPA,OAAO;EAAPA,OAAO,CAAPA,OAAO;EAAPA,OAAO,CAAPA,OAAO;EAsCjB;EAtCUA,OAAO,CAAPA,OAAO;EAAPA,OAAO,CAAPA,OAAO;EAAPA,OAAO,CAAPA,OAAO;EAAA,OAAPA,OAAO;AAAA;AA4CnB,OAAO,MAAMC,QAAQ,GAAIC,CAA4B,IAAa;EAChE,IAAI,OAAOA,CAAC,KAAK,QAAQ,EAAE;IACzB,OAAOF,OAAO,CAACE,CAAC,CAAC,IAAIF,OAAO,CAACA,OAAO,CAACG,cAAc,CAAC;EACtD;;EAEA;EACA,MAAMC,KAAK,GAAGF,CAAU;EACxB,MAAMG,SAAS,GAAGC,QAAQ,CAACF,KAAK,CAACG,OAAO,EAAE,EAAE,CAAC;EAE7C,IAAIC,MAAM,CAACC,KAAK,CAACJ,SAAS,CAAC,EAAE;IAC3B,OAAOD,KAAK,CAACG,OAAO;EACtB;EAEA,OAAOP,OAAO,CAACK,SAAS,CAAC,IAAIL,OAAO,CAACA,OAAO,CAACG,cAAc,CAAC;AAC9D,CAAC","ignoreList":[]}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"names":["documentDirectory","RNEDirectory"],"sourceRoot":"../../../src","sources":["constants/directories.ts"],"mappings":";;AAAA,SAASA,iBAAiB,QAAQ,
|
|
1
|
+
{"version":3,"names":["documentDirectory","RNEDirectory"],"sourceRoot":"../../../src","sources":["constants/directories.ts"],"mappings":";;AAAA,SAASA,iBAAiB,QAAQ,yBAAyB;AAE3D,OAAO,MAAMC,YAAY,GAAG,GAAGD,iBAAiB,0BAA0B","ignoreList":[]}
|