@novastera-oss/llamarn 0.2.5 → 0.2.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/RNLlamaCpp.podspec +3 -2
- package/android/CMakeLists.txt +6 -3
- package/android/src/main/cpp/include/llama.h +140 -38
- package/android/src/main/jniLibs/arm64-v8a/libggml-base.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/libggml-cpu.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/libggml.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/libllama.so +0 -0
- package/android/src/main/jniLibs/x86_64/libggml-base.so +0 -0
- package/android/src/main/jniLibs/x86_64/libggml-cpu.so +0 -0
- package/android/src/main/jniLibs/x86_64/libggml.so +0 -0
- package/android/src/main/jniLibs/x86_64/libllama.so +0 -0
- package/cpp/LlamaCppModel.cpp +48 -67
- package/cpp/LlamaCppModel.h +8 -3
- package/cpp/PureCppImpl.cpp +1 -1
- package/cpp/PureCppImpl.h +2 -2
- package/cpp/build-info.cpp +2 -2
- package/cpp/llama.cpp/CMakeLists.txt +15 -4
- package/cpp/llama.cpp/Makefile +2 -2
- package/cpp/llama.cpp/README.md +33 -13
- package/cpp/llama.cpp/common/CMakeLists.txt +15 -28
- package/cpp/llama.cpp/common/arg.cpp +38 -12
- package/cpp/llama.cpp/common/build-info.cpp.in +2 -2
- package/cpp/llama.cpp/common/chat-parser.cpp +9 -3
- package/cpp/llama.cpp/common/chat-parser.h +4 -1
- package/cpp/llama.cpp/common/chat.cpp +16 -13
- package/cpp/llama.cpp/common/chat.h +1 -1
- package/cpp/llama.cpp/common/common.cpp +52 -40
- package/cpp/llama.cpp/common/common.h +5 -2
- package/cpp/llama.cpp/common/json-partial.cpp +5 -4
- package/cpp/llama.cpp/common/json-partial.h +2 -1
- package/cpp/llama.cpp/common/json-schema-to-grammar.cpp +2 -1
- package/cpp/llama.cpp/common/json-schema-to-grammar.h +4 -4
- package/cpp/llama.cpp/common/speculative.cpp +6 -4
- package/cpp/llama.cpp/convert_hf_to_gguf.py +128 -84
- package/cpp/llama.cpp/ggml/CMakeLists.txt +47 -2
- package/cpp/llama.cpp/ggml/cmake/common.cmake +1 -2
- package/cpp/llama.cpp/ggml/include/ggml.h +1 -3
- package/cpp/llama.cpp/ggml/src/CMakeLists.txt +49 -13
- package/cpp/llama.cpp/ggml/src/ggml-backend-reg.cpp +5 -0
- package/cpp/llama.cpp/ggml/src/ggml-backend.cpp +10 -5
- package/cpp/llama.cpp/ggml/src/ggml-blas/CMakeLists.txt +3 -3
- package/cpp/llama.cpp/ggml/src/ggml-cann/common.h +6 -1
- package/cpp/llama.cpp/ggml/src/ggml-cann/ggml-cann.cpp +33 -9
- package/cpp/llama.cpp/ggml/src/ggml-common.h +4 -0
- package/cpp/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +93 -24
- package/cpp/llama.cpp/ggml/src/ggml-cpu/amx/amx.cpp +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-cpu/amx/mmq.cpp +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/arm/cpu-feats.cpp +94 -0
- package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/arm/quants.c +4113 -0
- package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/arm/repack.cpp +2174 -0
- package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/loongarch/quants.c +2638 -0
- package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/powerpc/quants.c +2731 -0
- package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/riscv/quants.c +2068 -0
- package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/riscv/repack.cpp +396 -0
- package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/s390/quants.c +1299 -0
- package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/wasm/quants.c +1480 -0
- package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/x86/quants.c +4310 -0
- package/cpp/llama.cpp/ggml/src/ggml-cpu/{ggml-cpu-aarch64.cpp → arch/x86/repack.cpp} +59 -3206
- package/cpp/llama.cpp/ggml/src/ggml-cpu/arch-fallback.h +184 -0
- package/cpp/llama.cpp/ggml/src/ggml-cpu/common.h +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-impl.h +7 -4
- package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +33 -2
- package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +8 -8
- package/cpp/llama.cpp/ggml/src/ggml-cpu/{ggml-cpu-hbm.cpp → hbm.cpp} +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm.cpp +56 -7
- package/cpp/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm.h +5 -0
- package/cpp/llama.cpp/ggml/src/ggml-cpu/ops.cpp +2 -2
- package/cpp/llama.cpp/ggml/src/ggml-cpu/quants.c +1157 -0
- package/cpp/llama.cpp/ggml/src/ggml-cpu/{ggml-cpu-quants.h → quants.h} +26 -0
- package/cpp/llama.cpp/ggml/src/ggml-cpu/repack.cpp +1555 -0
- package/cpp/llama.cpp/ggml/src/ggml-cpu/repack.h +98 -0
- package/cpp/llama.cpp/ggml/src/ggml-cpu/simd-mappings.h +2 -4
- package/cpp/llama.cpp/ggml/src/ggml-cpu/{ggml-cpu-traits.cpp → traits.cpp} +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-cuda/common.cuh +6 -8
- package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-mma-f16.cuh +5 -2
- package/cpp/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu +25 -16
- package/cpp/llama.cpp/ggml/src/ggml-cuda/ssm-scan.cu +6 -4
- package/cpp/llama.cpp/ggml/src/ggml-hip/CMakeLists.txt +4 -0
- package/cpp/llama.cpp/ggml/src/ggml-impl.h +2 -0
- package/cpp/llama.cpp/ggml/src/ggml-metal/CMakeLists.txt +11 -10
- package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal.m +33 -8
- package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal.metal +135 -100
- package/cpp/llama.cpp/ggml/src/ggml-opencl/CMakeLists.txt +7 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +908 -3
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/concat.cl +109 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/mul_mv_id_q4_0_f32_8x_flat.cl +283 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/pad.cl +30 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/repeat.cl +39 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/tanh.cl +63 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/tsembd.cl +48 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/upscale.cl +121 -0
- package/cpp/llama.cpp/ggml/src/ggml-quants.c +0 -2
- package/cpp/llama.cpp/ggml/src/ggml-rpc/ggml-rpc.cpp +18 -15
- package/cpp/llama.cpp/ggml/src/ggml-sycl/CMakeLists.txt +3 -3
- package/cpp/llama.cpp/ggml/src/ggml-sycl/common.hpp +19 -24
- package/cpp/llama.cpp/ggml/src/ggml-sycl/convert.cpp +21 -2
- package/cpp/llama.cpp/ggml/src/ggml-sycl/cpy.cpp +121 -4
- package/cpp/llama.cpp/ggml/src/ggml-sycl/dequantize.hpp +32 -0
- package/cpp/llama.cpp/ggml/src/ggml-sycl/gemm.hpp +3 -0
- package/cpp/llama.cpp/ggml/src/ggml-sycl/getrows.cpp +2 -96
- package/cpp/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +164 -46
- package/cpp/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +32 -8
- package/cpp/llama.cpp/ggml/src/ggml-sycl/quants.hpp +38 -10
- package/cpp/llama.cpp/ggml/src/ggml-sycl/rope.cpp +118 -11
- package/cpp/llama.cpp/ggml/src/ggml-sycl/vecdotq.hpp +108 -16
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/CMakeLists.txt +26 -29
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +432 -248
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/CMakeLists.txt +0 -12
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/conv_transpose_1d.comp +98 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +2 -0
- package/cpp/llama.cpp/ggml/src/ggml.c +9 -8
- package/cpp/llama.cpp/ggml/src/ggml.cpp +26 -0
- package/cpp/llama.cpp/ggml/src/gguf.cpp +19 -2
- package/cpp/llama.cpp/gguf-py/gguf/constants.py +57 -0
- package/cpp/llama.cpp/gguf-py/gguf/gguf_writer.py +4 -1
- package/cpp/llama.cpp/gguf-py/gguf/tensor_mapping.py +14 -3
- package/cpp/llama.cpp/include/llama.h +140 -38
- package/cpp/llama.cpp/requirements/requirements-compare-llama-bench.txt +1 -0
- package/cpp/llama.cpp/src/CMakeLists.txt +4 -1
- package/cpp/llama.cpp/src/llama-arch.cpp +95 -3
- package/cpp/llama.cpp/src/llama-arch.h +7 -1
- package/cpp/llama.cpp/src/llama-batch.cpp +289 -31
- package/cpp/llama.cpp/src/llama-batch.h +47 -17
- package/cpp/llama.cpp/src/llama-chat.cpp +19 -2
- package/cpp/llama.cpp/src/llama-chat.h +1 -0
- package/cpp/llama.cpp/src/llama-context.cpp +488 -313
- package/cpp/llama.cpp/src/llama-context.h +38 -17
- package/cpp/llama.cpp/src/llama-cparams.cpp +1 -1
- package/cpp/llama.cpp/src/llama-cparams.h +1 -1
- package/cpp/llama.cpp/src/llama-graph.cpp +275 -152
- package/cpp/llama.cpp/src/llama-graph.h +109 -52
- package/cpp/llama.cpp/src/llama-hparams.cpp +6 -2
- package/cpp/llama.cpp/src/llama-hparams.h +8 -2
- package/cpp/llama.cpp/src/llama-kv-cache-unified-iswa.cpp +281 -0
- package/cpp/llama.cpp/src/llama-kv-cache-unified-iswa.h +133 -0
- package/cpp/llama.cpp/src/llama-kv-cache-unified.cpp +1835 -0
- package/cpp/llama.cpp/src/llama-kv-cache-unified.h +308 -0
- package/cpp/llama.cpp/src/llama-kv-cells.h +53 -17
- package/cpp/llama.cpp/src/llama-memory-hybrid.cpp +247 -0
- package/cpp/llama.cpp/src/llama-memory-hybrid.h +143 -0
- package/cpp/llama.cpp/src/llama-memory-recurrent.cpp +1116 -0
- package/cpp/llama.cpp/src/llama-memory-recurrent.h +188 -0
- package/cpp/llama.cpp/src/llama-memory.cpp +41 -0
- package/cpp/llama.cpp/src/llama-memory.h +89 -4
- package/cpp/llama.cpp/src/llama-mmap.cpp +1 -1
- package/cpp/llama.cpp/src/llama-model-loader.cpp +42 -17
- package/cpp/llama.cpp/src/llama-model.cpp +735 -143
- package/cpp/llama.cpp/src/llama-model.h +4 -0
- package/cpp/llama.cpp/src/llama-quant.cpp +2 -1
- package/cpp/llama.cpp/src/llama-vocab.cpp +39 -25
- package/cpp/llama.cpp/src/llama.cpp +11 -7
- package/cpp/llama.cpp/src/unicode.cpp +5 -0
- package/cpp/llama.cpp/vendor/cpp-httplib/httplib.h +10518 -0
- package/cpp/llama.cpp/vendor/miniaudio/miniaudio.h +93468 -0
- package/cpp/llama.cpp/{common → vendor}/minja/chat-template.hpp +1 -1
- package/cpp/llama.cpp/{common → vendor}/minja/minja.hpp +1 -1
- package/cpp/llama.cpp/{common → vendor/nlohmann}/json.hpp +3027 -2267
- package/cpp/llama.cpp/vendor/nlohmann/json_fwd.hpp +187 -0
- package/cpp/llama.cpp/vendor/stb/stb_image.h +7988 -0
- package/cpp/rn-completion.cpp +65 -10
- package/cpp/{rn-llama.hpp → rn-llama.h} +1 -1
- package/cpp/{rn-utils.hpp → rn-utils.h} +8 -1
- package/ios/include/chat.h +1 -1
- package/ios/include/common/minja/chat-template.hpp +1 -1
- package/ios/include/common/minja/minja.hpp +1 -1
- package/ios/include/common.h +5 -2
- package/ios/include/json-schema-to-grammar.h +4 -4
- package/ios/include/llama.h +140 -38
- package/ios/include/{common → nlohmann}/json.hpp +3027 -2267
- package/ios/libs/llama.xcframework/Info.plist +20 -20
- package/ios/libs/llama.xcframework/ios-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/ios-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4863 -4617
- package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/ggml.h +1 -3
- package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/llama.h +140 -38
- package/ios/libs/llama.xcframework/ios-arm64/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4834 -4638
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3742 -3557
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/ggml.h +1 -3
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/llama.h +140 -38
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4834 -4638
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3744 -3559
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/ggml.h +1 -3
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/llama.h +140 -38
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/ggml.h +1 -3
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/llama.h +140 -38
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/llama +0 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/ggml.h +1 -3
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/llama.h +140 -38
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/llama +0 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/tvos-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/tvos-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4863 -4616
- package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/ggml.h +1 -3
- package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/llama.h +140 -38
- package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4834 -4637
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3742 -3556
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/ggml.h +1 -3
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/llama.h +140 -38
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/xros-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/xros-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4900 -4653
- package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/ggml.h +1 -3
- package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/llama.h +140 -38
- package/ios/libs/llama.xcframework/xros-arm64/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4871 -4674
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3773 -3587
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/ggml.h +1 -3
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/llama.h +140 -38
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/llama +0 -0
- package/package.json +1 -2
- package/cpp/llama.cpp/common/cmake/build-info-gen-cpp.cmake +0 -24
- package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.h +0 -8
- package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.c +0 -13891
- package/cpp/llama.cpp/src/llama-kv-cache.cpp +0 -2747
- package/cpp/llama.cpp/src/llama-kv-cache.h +0 -502
- /package/cpp/llama.cpp/ggml/src/ggml-cpu/{cpu-feats-x86.cpp → arch/x86/cpu-feats.cpp} +0 -0
- /package/cpp/llama.cpp/ggml/src/ggml-cpu/{ggml-cpu-hbm.h → hbm.h} +0 -0
- /package/cpp/llama.cpp/ggml/src/ggml-cpu/{ggml-cpu-traits.h → traits.h} +0 -0
package/RNLlamaCpp.podspec
CHANGED
|
@@ -39,7 +39,8 @@ Pod::Spec.new do |s|
|
|
|
39
39
|
"cpp/llama.cpp/common/speculative.{h,cpp}",
|
|
40
40
|
"cpp/llama.cpp/common/llguidance.{h,cpp}",
|
|
41
41
|
"cpp/llama.cpp/common/*.hpp",
|
|
42
|
-
"cpp/llama.cpp/
|
|
42
|
+
"cpp/llama.cpp/vendor/minja/*.hpp"
|
|
43
|
+
"cpp/llama.cpp/vendor/nlohmann/*.hpp"
|
|
43
44
|
|
|
44
45
|
# Include all necessary headers for compilation
|
|
45
46
|
s.preserve_paths = "ios/include/**/*.h",
|
|
@@ -51,7 +52,7 @@ Pod::Spec.new do |s|
|
|
|
51
52
|
|
|
52
53
|
# Compiler settings
|
|
53
54
|
s.pod_target_xcconfig = {
|
|
54
|
-
"HEADER_SEARCH_PATHS" => "\"$(PODS_TARGET_SRCROOT)/ios/include\" \"$(PODS_TARGET_SRCROOT)/cpp\" \"$(PODS_TARGET_SRCROOT)/ios/generated/RNLlamaCppSpec\" \"$(PODS_TARGET_SRCROOT)/ios/generated\" \"$(PODS_TARGET_SRCROOT)/cpp/llama.cpp\" \"$(PODS_TARGET_SRCROOT)/cpp/llama.cpp/include\" \"$(PODS_TARGET_SRCROOT)/cpp/llama.cpp/ggml/include\" \"$(PODS_TARGET_SRCROOT)/cpp/llama.cpp/common\" \"$(PODS_ROOT)/boost\" \"$(PODS_ROOT)/Headers/Public/React-bridging\" \"$(PODS_ROOT)/Headers/Public/React\"",
|
|
55
|
+
"HEADER_SEARCH_PATHS" => "\"$(PODS_TARGET_SRCROOT)/ios/include\" \"$(PODS_TARGET_SRCROOT)/cpp\" \"$(PODS_TARGET_SRCROOT)/ios/generated/RNLlamaCppSpec\" \"$(PODS_TARGET_SRCROOT)/ios/generated\" \"$(PODS_TARGET_SRCROOT)/cpp/llama.cpp\" \"$(PODS_TARGET_SRCROOT)/cpp/llama.cpp/include\" \"$(PODS_TARGET_SRCROOT)/cpp/llama.cpp/ggml/include\" \"$(PODS_TARGET_SRCROOT)/cpp/llama.cpp/common\" \"$(PODS_TARGET_SRCROOT)/cpp/llama.cpp/vendor\" \"$(PODS_ROOT)/boost\" \"$(PODS_ROOT)/Headers/Public/React-bridging\" \"$(PODS_ROOT)/Headers/Public/React\"",
|
|
55
56
|
"OTHER_CPLUSPLUSFLAGS" => "-DFOLLY_NO_CONFIG -DFOLLY_MOBILE=1 -DFOLLY_USE_LIBCPP=1 -DLLAMA_METAL -DRCT_NEW_ARCH_ENABLED=1 -DFBJSRT_EXPORTED=1",
|
|
56
57
|
"CLANG_CXX_LANGUAGE_STANDARD" => "c++17",
|
|
57
58
|
"GCC_OPTIMIZATION_LEVEL" => "3", # Maximum optimization
|
package/android/CMakeLists.txt
CHANGED
|
@@ -141,7 +141,8 @@ target_include_directories(common PRIVATE
|
|
|
141
141
|
${LLAMA_CPP_DIR}/ggml/include
|
|
142
142
|
${LLAMA_CPP_DIR}/include
|
|
143
143
|
${LLAMA_CPP_DIR}/common
|
|
144
|
-
${LLAMA_CPP_DIR}/
|
|
144
|
+
${LLAMA_CPP_DIR}/vendor/minja
|
|
145
|
+
${LLAMA_CPP_DIR}/vendor
|
|
145
146
|
${LLAMA_CPP_DIR}/src
|
|
146
147
|
)
|
|
147
148
|
|
|
@@ -150,7 +151,8 @@ target_include_directories(RNLlamaCpp PRIVATE
|
|
|
150
151
|
${LLAMA_CPP_DIR}/ggml/include
|
|
151
152
|
${LLAMA_CPP_DIR}/include
|
|
152
153
|
${LLAMA_CPP_DIR}/common
|
|
153
|
-
${LLAMA_CPP_DIR}/
|
|
154
|
+
${LLAMA_CPP_DIR}/vendor/minja # Add this for chat-template.hpp
|
|
155
|
+
${LLAMA_CPP_DIR}/vendor
|
|
154
156
|
${LLAMA_CPP_DIR}/src
|
|
155
157
|
# Add the generated headers path
|
|
156
158
|
${MODULE_ROOT}/android/generated/jni
|
|
@@ -244,6 +246,7 @@ target_include_directories(RNLlamaCpp INTERFACE
|
|
|
244
246
|
${LLAMA_CPP_DIR}/ggml/include
|
|
245
247
|
${LLAMA_CPP_DIR}/include
|
|
246
248
|
${LLAMA_CPP_DIR}/common
|
|
247
|
-
${LLAMA_CPP_DIR}/
|
|
249
|
+
${LLAMA_CPP_DIR}/vendor/minja
|
|
250
|
+
${LLAMA_CPP_DIR}/vendor
|
|
248
251
|
${LLAMA_CPP_DIR}/src
|
|
249
252
|
)
|
|
@@ -61,7 +61,10 @@ extern "C" {
|
|
|
61
61
|
struct llama_model;
|
|
62
62
|
struct llama_context;
|
|
63
63
|
struct llama_sampler;
|
|
64
|
-
|
|
64
|
+
|
|
65
|
+
typedef struct llama_memory_i * llama_memory_t;
|
|
66
|
+
|
|
67
|
+
struct llama_kv_cache; // DEPRECATED (use llama_memory instead)
|
|
65
68
|
|
|
66
69
|
typedef int32_t llama_pos;
|
|
67
70
|
typedef int32_t llama_token;
|
|
@@ -240,18 +243,21 @@ extern "C" {
|
|
|
240
243
|
|
|
241
244
|
typedef bool (*llama_progress_callback)(float progress, void * user_data);
|
|
242
245
|
|
|
243
|
-
// Input data for llama_decode
|
|
246
|
+
// Input data for llama_encode/llama_decode
|
|
244
247
|
// A llama_batch object can contain input about one or many sequences
|
|
245
248
|
// The provided arrays (i.e. token, embd, pos, etc.) must have size of n_tokens
|
|
246
249
|
//
|
|
247
250
|
// - token : the token ids of the input (used when embd is NULL)
|
|
248
251
|
// - embd : token embeddings (i.e. float vector of size n_embd) (used when token is NULL)
|
|
249
252
|
// - pos : the positions of the respective token in the sequence
|
|
250
|
-
// (if set to NULL, the token position will be tracked automatically by llama_decode)
|
|
253
|
+
// (if set to NULL, the token position will be tracked automatically by llama_encode/llama_decode)
|
|
251
254
|
// - seq_id : the sequence to which the respective token belongs
|
|
252
255
|
// (if set to NULL, the sequence ID will be assumed to be 0)
|
|
253
256
|
// - logits : if zero, the logits (and/or the embeddings) for the respective token will not be output
|
|
254
|
-
// (if set to NULL
|
|
257
|
+
// (if set to NULL:
|
|
258
|
+
// - if embeddings: all tokens are output
|
|
259
|
+
// - if not: only the last token is output
|
|
260
|
+
// )
|
|
255
261
|
//
|
|
256
262
|
typedef struct llama_batch {
|
|
257
263
|
int32_t n_tokens;
|
|
@@ -261,7 +267,7 @@ extern "C" {
|
|
|
261
267
|
llama_pos * pos;
|
|
262
268
|
int32_t * n_seq_id;
|
|
263
269
|
llama_seq_id ** seq_id;
|
|
264
|
-
int8_t * logits;
|
|
270
|
+
int8_t * logits; // TODO: rename this to "output"
|
|
265
271
|
} llama_batch;
|
|
266
272
|
|
|
267
273
|
enum llama_model_kv_override_type {
|
|
@@ -366,6 +372,8 @@ extern "C" {
|
|
|
366
372
|
bool no_perf; // measure performance timings
|
|
367
373
|
bool op_offload; // offload host tensor operations to device
|
|
368
374
|
bool swa_full; // use full-size SWA cache (https://github.com/ggml-org/llama.cpp/pull/13194#issuecomment-2868343055)
|
|
375
|
+
// NOTE: setting to false when n_seq_max > 1 can cause bad performance in some cases
|
|
376
|
+
// ref: https://github.com/ggml-org/llama.cpp/pull/13845#issuecomment-2924800573
|
|
369
377
|
};
|
|
370
378
|
|
|
371
379
|
// model quantization parameters
|
|
@@ -491,9 +499,11 @@ extern "C" {
|
|
|
491
499
|
DEPRECATED(LLAMA_API int32_t llama_n_vocab (const struct llama_vocab * vocab), "use llama_vocab_n_tokens instead");
|
|
492
500
|
|
|
493
501
|
LLAMA_API const struct llama_model * llama_get_model (const struct llama_context * ctx);
|
|
494
|
-
LLAMA_API
|
|
502
|
+
LLAMA_API llama_memory_t llama_get_memory (const struct llama_context * ctx);
|
|
495
503
|
LLAMA_API enum llama_pooling_type llama_pooling_type(const struct llama_context * ctx); // TODO: rename to llama_get_pooling_type
|
|
496
504
|
|
|
505
|
+
DEPRECATED(LLAMA_API struct llama_kv_cache * llama_get_kv_self(struct llama_context * ctx), "use llama_get_memory instead");
|
|
506
|
+
|
|
497
507
|
LLAMA_API const struct llama_vocab * llama_model_get_vocab(const struct llama_model * model);
|
|
498
508
|
LLAMA_API enum llama_rope_type llama_model_rope_type(const struct llama_model * model);
|
|
499
509
|
|
|
@@ -502,10 +512,18 @@ extern "C" {
|
|
|
502
512
|
LLAMA_API int32_t llama_model_n_layer (const struct llama_model * model);
|
|
503
513
|
LLAMA_API int32_t llama_model_n_head (const struct llama_model * model);
|
|
504
514
|
LLAMA_API int32_t llama_model_n_head_kv (const struct llama_model * model);
|
|
515
|
+
LLAMA_API int32_t llama_model_n_swa (const struct llama_model * model);
|
|
505
516
|
|
|
506
517
|
// Get the model's RoPE frequency scaling factor
|
|
507
518
|
LLAMA_API float llama_model_rope_freq_scale_train(const struct llama_model * model);
|
|
508
519
|
|
|
520
|
+
// Returns the number of classifier outputs (only valid for classifier models)
|
|
521
|
+
// Undefined behavior for non-classifier models
|
|
522
|
+
LLAMA_API uint32_t llama_model_n_cls_out(const struct llama_model * model);
|
|
523
|
+
|
|
524
|
+
// Returns label of classifier output by index (<n_cls_out). Returns nullptr if no label provided
|
|
525
|
+
LLAMA_API const char * llama_model_cls_label(const struct llama_model * model, uint32_t i);
|
|
526
|
+
|
|
509
527
|
LLAMA_API enum llama_vocab_type llama_vocab_type(const struct llama_vocab * vocab);
|
|
510
528
|
|
|
511
529
|
LLAMA_API int32_t llama_vocab_n_tokens(const struct llama_vocab * vocab);
|
|
@@ -606,7 +624,81 @@ extern "C" {
|
|
|
606
624
|
int32_t il_end);
|
|
607
625
|
|
|
608
626
|
//
|
|
609
|
-
//
|
|
627
|
+
// Memory
|
|
628
|
+
//
|
|
629
|
+
|
|
630
|
+
// Clear the memory contents
|
|
631
|
+
// If data == true, the data buffers will also be cleared together with the metadata
|
|
632
|
+
LLAMA_API void llama_memory_clear(
|
|
633
|
+
llama_memory_t mem,
|
|
634
|
+
bool data);
|
|
635
|
+
|
|
636
|
+
// Removes all tokens that belong to the specified sequence and have positions in [p0, p1)
|
|
637
|
+
// Returns false if a partial sequence cannot be removed. Removing a whole sequence never fails
|
|
638
|
+
// seq_id < 0 : match any sequence
|
|
639
|
+
// p0 < 0 : [0, p1]
|
|
640
|
+
// p1 < 0 : [p0, inf)
|
|
641
|
+
LLAMA_API bool llama_memory_seq_rm(
|
|
642
|
+
llama_memory_t mem,
|
|
643
|
+
llama_seq_id seq_id,
|
|
644
|
+
llama_pos p0,
|
|
645
|
+
llama_pos p1);
|
|
646
|
+
|
|
647
|
+
// Copy all tokens that belong to the specified sequence to another sequence
|
|
648
|
+
// p0 < 0 : [0, p1]
|
|
649
|
+
// p1 < 0 : [p0, inf)
|
|
650
|
+
LLAMA_API void llama_memory_seq_cp(
|
|
651
|
+
llama_memory_t mem,
|
|
652
|
+
llama_seq_id seq_id_src,
|
|
653
|
+
llama_seq_id seq_id_dst,
|
|
654
|
+
llama_pos p0,
|
|
655
|
+
llama_pos p1);
|
|
656
|
+
|
|
657
|
+
// Removes all tokens that do not belong to the specified sequence
|
|
658
|
+
LLAMA_API void llama_memory_seq_keep(
|
|
659
|
+
llama_memory_t mem,
|
|
660
|
+
llama_seq_id seq_id);
|
|
661
|
+
|
|
662
|
+
// Adds relative position "delta" to all tokens that belong to the specified sequence and have positions in [p0, p1)
|
|
663
|
+
// p0 < 0 : [0, p1]
|
|
664
|
+
// p1 < 0 : [p0, inf)
|
|
665
|
+
LLAMA_API void llama_memory_seq_add(
|
|
666
|
+
llama_memory_t mem,
|
|
667
|
+
llama_seq_id seq_id,
|
|
668
|
+
llama_pos p0,
|
|
669
|
+
llama_pos p1,
|
|
670
|
+
llama_pos delta);
|
|
671
|
+
|
|
672
|
+
// Integer division of the positions by factor of `d > 1`
|
|
673
|
+
// p0 < 0 : [0, p1]
|
|
674
|
+
// p1 < 0 : [p0, inf)
|
|
675
|
+
LLAMA_API void llama_memory_seq_div(
|
|
676
|
+
llama_memory_t mem,
|
|
677
|
+
llama_seq_id seq_id,
|
|
678
|
+
llama_pos p0,
|
|
679
|
+
llama_pos p1,
|
|
680
|
+
int d);
|
|
681
|
+
|
|
682
|
+
// Returns the smallest position present in the memory for the specified sequence
|
|
683
|
+
// This is typically non-zero only for SWA caches
|
|
684
|
+
// Note that all positions in the range [pos_min, pos_max] are guaranteed to be present in the memory
|
|
685
|
+
// Return -1 if the sequence is empty
|
|
686
|
+
LLAMA_API llama_pos llama_memory_seq_pos_min(
|
|
687
|
+
llama_memory_t mem,
|
|
688
|
+
llama_seq_id seq_id);
|
|
689
|
+
|
|
690
|
+
// Returns the largest position present in the memory for the specified sequence
|
|
691
|
+
// Note that all positions in the range [pos_min, pos_max] are guaranteed to be present in the memory
|
|
692
|
+
// Return -1 if the sequence is empty
|
|
693
|
+
LLAMA_API llama_pos llama_memory_seq_pos_max(
|
|
694
|
+
llama_memory_t mem,
|
|
695
|
+
llama_seq_id seq_id);
|
|
696
|
+
|
|
697
|
+
// Check if the memory supports shifting
|
|
698
|
+
LLAMA_API bool llama_memory_can_shift(llama_memory_t mem);
|
|
699
|
+
|
|
700
|
+
//
|
|
701
|
+
// KV cache for self-attention (TODO: deprecate in favor of llama_memory)
|
|
610
702
|
//
|
|
611
703
|
|
|
612
704
|
// Returns the number of tokens in the KV cache (slow, use only for debug)
|
|
@@ -619,93 +711,103 @@ extern "C" {
|
|
|
619
711
|
"Use llama_kv_self_seq_pos_max() and llama_kv_self_seq_pos_min() instead (https://github.com/ggml-org/llama.cpp/issues/13793)");
|
|
620
712
|
|
|
621
713
|
// Clear the KV cache - both cell info is erased and KV data is zeroed
|
|
622
|
-
LLAMA_API void llama_kv_self_clear(
|
|
623
|
-
|
|
714
|
+
DEPRECATED(LLAMA_API void llama_kv_self_clear(
|
|
715
|
+
struct llama_context * ctx),
|
|
716
|
+
"Use llama_memory_clear() instead");
|
|
624
717
|
|
|
625
718
|
// Removes all tokens that belong to the specified sequence and have positions in [p0, p1)
|
|
626
719
|
// Returns false if a partial sequence cannot be removed. Removing a whole sequence never fails
|
|
627
720
|
// seq_id < 0 : match any sequence
|
|
628
721
|
// p0 < 0 : [0, p1]
|
|
629
722
|
// p1 < 0 : [p0, inf)
|
|
630
|
-
LLAMA_API bool llama_kv_self_seq_rm(
|
|
723
|
+
DEPRECATED(LLAMA_API bool llama_kv_self_seq_rm(
|
|
631
724
|
struct llama_context * ctx,
|
|
632
725
|
llama_seq_id seq_id,
|
|
633
726
|
llama_pos p0,
|
|
634
|
-
llama_pos p1)
|
|
727
|
+
llama_pos p1),
|
|
728
|
+
"Use llama_memory_seq_rm() instead");
|
|
635
729
|
|
|
636
730
|
// Copy all tokens that belong to the specified sequence to another sequence
|
|
637
731
|
// Note that this does not allocate extra KV cache memory - it simply assigns the tokens to the new sequence
|
|
638
732
|
// p0 < 0 : [0, p1]
|
|
639
733
|
// p1 < 0 : [p0, inf)
|
|
640
|
-
LLAMA_API void llama_kv_self_seq_cp(
|
|
734
|
+
DEPRECATED(LLAMA_API void llama_kv_self_seq_cp(
|
|
641
735
|
struct llama_context * ctx,
|
|
642
736
|
llama_seq_id seq_id_src,
|
|
643
737
|
llama_seq_id seq_id_dst,
|
|
644
738
|
llama_pos p0,
|
|
645
|
-
llama_pos p1)
|
|
739
|
+
llama_pos p1),
|
|
740
|
+
"Use llama_memory_seq_cp() instead");
|
|
646
741
|
|
|
647
742
|
// Removes all tokens that do not belong to the specified sequence
|
|
648
|
-
LLAMA_API void llama_kv_self_seq_keep(
|
|
743
|
+
DEPRECATED(LLAMA_API void llama_kv_self_seq_keep(
|
|
649
744
|
struct llama_context * ctx,
|
|
650
|
-
llama_seq_id seq_id)
|
|
745
|
+
llama_seq_id seq_id),
|
|
746
|
+
"Use llama_memory_seq_keep() instead");
|
|
651
747
|
|
|
652
748
|
// Adds relative position "delta" to all tokens that belong to the specified sequence and have positions in [p0, p1)
|
|
653
749
|
// If the KV cache is RoPEd, the KV data is updated accordingly:
|
|
654
750
|
// - lazily on next llama_decode()
|
|
655
|
-
// - explicitly with llama_kv_self_update()
|
|
656
751
|
// p0 < 0 : [0, p1]
|
|
657
752
|
// p1 < 0 : [p0, inf)
|
|
658
|
-
LLAMA_API void llama_kv_self_seq_add(
|
|
753
|
+
DEPRECATED(LLAMA_API void llama_kv_self_seq_add(
|
|
659
754
|
struct llama_context * ctx,
|
|
660
755
|
llama_seq_id seq_id,
|
|
661
756
|
llama_pos p0,
|
|
662
757
|
llama_pos p1,
|
|
663
|
-
llama_pos delta)
|
|
758
|
+
llama_pos delta),
|
|
759
|
+
"Use llama_memory_seq_add() instead");
|
|
664
760
|
|
|
665
761
|
// Integer division of the positions by factor of `d > 1`
|
|
666
762
|
// If the KV cache is RoPEd, the KV data is updated accordingly:
|
|
667
763
|
// - lazily on next llama_decode()
|
|
668
|
-
// - explicitly with llama_kv_self_update()
|
|
669
764
|
// p0 < 0 : [0, p1]
|
|
670
765
|
// p1 < 0 : [p0, inf)
|
|
671
|
-
|
|
766
|
+
DEPRECATED(void llama_kv_self_seq_div(
|
|
672
767
|
struct llama_context * ctx,
|
|
673
768
|
llama_seq_id seq_id,
|
|
674
769
|
llama_pos p0,
|
|
675
770
|
llama_pos p1,
|
|
676
|
-
int d)
|
|
771
|
+
int d),
|
|
772
|
+
"Use llama_memory_seq_div() instead");
|
|
677
773
|
|
|
678
774
|
// Returns the smallest position present in the KV cache for the specified sequence
|
|
679
775
|
// This is typically non-zero only for SWA caches
|
|
776
|
+
// Note that all positions in the range [pos_min, pos_max] are guaranteed to be present in the KV cache
|
|
680
777
|
// Return -1 if the sequence is empty
|
|
681
|
-
LLAMA_API llama_pos llama_kv_self_seq_pos_min(
|
|
778
|
+
DEPRECATED(LLAMA_API llama_pos llama_kv_self_seq_pos_min(
|
|
682
779
|
struct llama_context * ctx,
|
|
683
|
-
llama_seq_id seq_id)
|
|
780
|
+
llama_seq_id seq_id),
|
|
781
|
+
"Use llama_memory_seq_pos_min() instead");
|
|
684
782
|
|
|
685
783
|
// Returns the largest position present in the KV cache for the specified sequence
|
|
784
|
+
// Note that all positions in the range [pos_min, pos_max] are guaranteed to be present in the KV cache
|
|
686
785
|
// Return -1 if the sequence is empty
|
|
687
|
-
LLAMA_API llama_pos llama_kv_self_seq_pos_max(
|
|
786
|
+
DEPRECATED(LLAMA_API llama_pos llama_kv_self_seq_pos_max(
|
|
688
787
|
struct llama_context * ctx,
|
|
689
|
-
llama_seq_id seq_id)
|
|
788
|
+
llama_seq_id seq_id),
|
|
789
|
+
"Use llama_memory_seq_pos_max() instead");
|
|
690
790
|
|
|
691
791
|
// Defragment the KV cache
|
|
692
792
|
// This will be applied:
|
|
693
793
|
// - lazily on next llama_decode()
|
|
694
|
-
|
|
695
|
-
|
|
794
|
+
DEPRECATED(LLAMA_API void llama_kv_self_defrag(struct llama_context * ctx),
|
|
795
|
+
"simply remove this call, the context will automatically decide when to do a defragmentation based on 'defrag_thold'");
|
|
696
796
|
|
|
697
797
|
// Check if the context supports KV cache shifting
|
|
698
|
-
LLAMA_API bool llama_kv_self_can_shift(const struct llama_context * ctx)
|
|
798
|
+
DEPRECATED(LLAMA_API bool llama_kv_self_can_shift(const struct llama_context * ctx),
|
|
799
|
+
"use llama_memory_can_shift() instead");
|
|
699
800
|
|
|
700
801
|
// Apply the KV cache updates (such as K-shifts, defragmentation, etc.)
|
|
701
|
-
LLAMA_API void llama_kv_self_update(struct llama_context * ctx)
|
|
802
|
+
DEPRECATED(LLAMA_API void llama_kv_self_update(struct llama_context * ctx),
|
|
803
|
+
"simply remove this call, updates are applied lazily on the next llama_decode()");
|
|
702
804
|
|
|
703
805
|
//
|
|
704
806
|
// State / sessions
|
|
705
807
|
//
|
|
706
808
|
|
|
707
809
|
// Returns the *actual* size in bytes of the state
|
|
708
|
-
// (logits, embedding and
|
|
810
|
+
// (logits, embedding and memory)
|
|
709
811
|
// Only use when saving the state, not when restoring it, otherwise the size may be too small.
|
|
710
812
|
LLAMA_API size_t llama_state_get_size(struct llama_context * ctx);
|
|
711
813
|
LLAMA_API DEPRECATED(size_t llama_get_state_size(struct llama_context * ctx),
|
|
@@ -761,12 +863,12 @@ extern "C" {
|
|
|
761
863
|
size_t n_token_count),
|
|
762
864
|
"use llama_state_save_file instead");
|
|
763
865
|
|
|
764
|
-
// Get the exact size needed to copy the
|
|
866
|
+
// Get the exact size needed to copy the state of a single sequence
|
|
765
867
|
LLAMA_API size_t llama_state_seq_get_size(
|
|
766
868
|
struct llama_context * ctx,
|
|
767
869
|
llama_seq_id seq_id);
|
|
768
870
|
|
|
769
|
-
// Copy the
|
|
871
|
+
// Copy the state of a single sequence into the specified buffer
|
|
770
872
|
LLAMA_API size_t llama_state_seq_get_data(
|
|
771
873
|
struct llama_context * ctx,
|
|
772
874
|
uint8_t * dst,
|
|
@@ -832,16 +934,16 @@ extern "C" {
|
|
|
832
934
|
// For encode-decoder contexts, processes the batch using the encoder.
|
|
833
935
|
// Can store the encoder output internally for later use by the decoder's cross-attention layers.
|
|
834
936
|
// 0 - success
|
|
835
|
-
// < 0 - error. the
|
|
937
|
+
// < 0 - error. the memory state is restored to the state before this call
|
|
836
938
|
LLAMA_API int32_t llama_encode(
|
|
837
939
|
struct llama_context * ctx,
|
|
838
940
|
struct llama_batch batch);
|
|
839
941
|
|
|
840
942
|
// Process a batch of tokens.
|
|
841
|
-
// Requires
|
|
943
|
+
// Requires the context to have a memory.
|
|
842
944
|
// For encode-decoder contexts, processes the batch using the decoder.
|
|
843
945
|
// Positive return values does not mean a fatal error, but rather a warning.
|
|
844
|
-
// Upon non-zero return values, the
|
|
946
|
+
// Upon non-zero return values, the memory state is restored to the state before this call
|
|
845
947
|
// 0 - success
|
|
846
948
|
// 1 - could not find a KV slot for the batch (try reducing the size of the batch or increase the context)
|
|
847
949
|
// 2 - aborted
|
|
@@ -862,8 +964,8 @@ extern "C" {
|
|
|
862
964
|
// Get the number of threads used for prompt and batch processing (multiple token).
|
|
863
965
|
LLAMA_API int32_t llama_n_threads_batch(struct llama_context * ctx);
|
|
864
966
|
|
|
865
|
-
// Set whether the
|
|
866
|
-
//
|
|
967
|
+
// Set whether the context outputs embeddings or not
|
|
968
|
+
// TODO: rename to avoid confusion with llama_get_embeddings()
|
|
867
969
|
LLAMA_API void llama_set_embeddings(struct llama_context * ctx, bool embeddings);
|
|
868
970
|
|
|
869
971
|
// Set whether to use causal attention or not
|
|
@@ -912,7 +1014,7 @@ extern "C" {
|
|
|
912
1014
|
|
|
913
1015
|
// Get the embeddings for a sequence id
|
|
914
1016
|
// Returns NULL if pooling_type is LLAMA_POOLING_TYPE_NONE
|
|
915
|
-
// when pooling_type == LLAMA_POOLING_TYPE_RANK, returns float[
|
|
1017
|
+
// when pooling_type == LLAMA_POOLING_TYPE_RANK, returns float[n_cls_out] with the rank(s) of the sequence
|
|
916
1018
|
// otherwise: float[n_embd] (1-dimensional)
|
|
917
1019
|
LLAMA_API float * llama_get_embeddings_seq(struct llama_context * ctx, llama_seq_id seq_id);
|
|
918
1020
|
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
package/cpp/LlamaCppModel.cpp
CHANGED
|
@@ -17,8 +17,8 @@
|
|
|
17
17
|
#include <memory>
|
|
18
18
|
|
|
19
19
|
// Include rn-completion integration
|
|
20
|
-
#include "rn-utils.
|
|
21
|
-
#include "rn-llama.
|
|
20
|
+
#include "rn-utils.h"
|
|
21
|
+
#include "rn-llama.h"
|
|
22
22
|
|
|
23
23
|
// Include llama.cpp headers
|
|
24
24
|
#include "llama.h"
|
|
@@ -242,38 +242,12 @@ CompletionOptions LlamaCppModel::parseCompletionOptions(jsi::Runtime& rt, const
|
|
|
242
242
|
auto paramsVal = fnObj.getProperty(rt, "parameters");
|
|
243
243
|
if (paramsVal.isObject()) {
|
|
244
244
|
try {
|
|
245
|
-
// Convert the JSI object directly to nlohmann::json
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
size_t propCount = propNames.size(rt);
|
|
252
|
-
for (size_t i = 0; i < propCount; i++) {
|
|
253
|
-
jsi::String propName = propNames.getValueAtIndex(rt, i).asString(rt);
|
|
254
|
-
std::string key = propName.utf8(rt);
|
|
255
|
-
auto value = paramsObj.getProperty(rt, propName);
|
|
256
|
-
|
|
257
|
-
if (value.isString()) {
|
|
258
|
-
fnParams[key] = value.asString(rt).utf8(rt);
|
|
259
|
-
} else if (value.isNumber()) {
|
|
260
|
-
fnParams[key] = value.asNumber();
|
|
261
|
-
} else if (value.isBool()) {
|
|
262
|
-
fnParams[key] = value.getBool();
|
|
263
|
-
} else if (value.isNull()) {
|
|
264
|
-
fnParams[key] = nullptr;
|
|
265
|
-
} else if (value.isObject()) {
|
|
266
|
-
if (value.getObject(rt).isArray(rt)) {
|
|
267
|
-
fnParams[key] = json::array();
|
|
268
|
-
} else {
|
|
269
|
-
fnParams[key] = json::object();
|
|
270
|
-
}
|
|
271
|
-
}
|
|
272
|
-
}
|
|
273
|
-
|
|
274
|
-
fnJson["parameters"] = fnParams;
|
|
275
|
-
} catch (const std::exception&) {
|
|
276
|
-
fnJson["parameters"] = json::object();
|
|
245
|
+
// Convert the JSI object directly to nlohmann::json using the new helper
|
|
246
|
+
fnJson["parameters"] = jsiValueToJson(rt, paramsVal);
|
|
247
|
+
} catch (const std::exception& e) {
|
|
248
|
+
// Log error or handle as appropriate
|
|
249
|
+
fprintf(stderr, "Failed to parse tool parameters: %s\n", e.what());
|
|
250
|
+
fnJson["parameters"] = json::object(); // Fallback to empty object
|
|
277
251
|
}
|
|
278
252
|
}
|
|
279
253
|
}
|
|
@@ -336,39 +310,12 @@ CompletionOptions LlamaCppModel::parseCompletionOptions(jsi::Runtime& rt, const
|
|
|
336
310
|
auto paramsVal = fnObj.getProperty(rt, "parameters");
|
|
337
311
|
if (paramsVal.isObject()) {
|
|
338
312
|
try {
|
|
339
|
-
// Convert the JSI object directly to nlohmann::json
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
size_t propCount = propNames.size(rt);
|
|
346
|
-
for (size_t i = 0; i < propCount; i++) {
|
|
347
|
-
jsi::String propName = propNames.getValueAtIndex(rt, i).asString(rt);
|
|
348
|
-
std::string key = propName.utf8(rt);
|
|
349
|
-
auto value = paramsObj.getProperty(rt, propName);
|
|
350
|
-
|
|
351
|
-
if (value.isString()) {
|
|
352
|
-
fnParams[key] = value.asString(rt).utf8(rt);
|
|
353
|
-
} else if (value.isNumber()) {
|
|
354
|
-
fnParams[key] = value.asNumber();
|
|
355
|
-
} else if (value.isBool()) {
|
|
356
|
-
fnParams[key] = value.getBool();
|
|
357
|
-
} else if (value.isNull()) {
|
|
358
|
-
fnParams[key] = nullptr;
|
|
359
|
-
} else if (value.isObject()) {
|
|
360
|
-
// For nested objects, we use a simplified approach
|
|
361
|
-
if (value.getObject(rt).isArray(rt)) {
|
|
362
|
-
fnParams[key] = json::array();
|
|
363
|
-
} else {
|
|
364
|
-
fnParams[key] = json::object();
|
|
365
|
-
}
|
|
366
|
-
}
|
|
367
|
-
}
|
|
368
|
-
|
|
369
|
-
fnJson["parameters"] = fnParams;
|
|
370
|
-
} catch (const std::exception&) {
|
|
371
|
-
fnJson["parameters"] = json::object();
|
|
313
|
+
// Convert the JSI object directly to nlohmann::json using the new helper
|
|
314
|
+
fnJson["parameters"] = jsiValueToJson(rt, paramsVal);
|
|
315
|
+
} catch (const std::exception& e) {
|
|
316
|
+
// Log error or handle as appropriate
|
|
317
|
+
fprintf(stderr, "Failed to parse tool parameters: %s\n", e.what());
|
|
318
|
+
fnJson["parameters"] = json::object(); // Fallback to empty object
|
|
372
319
|
}
|
|
373
320
|
}
|
|
374
321
|
}
|
|
@@ -553,6 +500,40 @@ jsi::Value LlamaCppModel::jsonToJsi(jsi::Runtime& rt, const json& j) {
|
|
|
553
500
|
return jsi::Value::undefined();
|
|
554
501
|
}
|
|
555
502
|
|
|
503
|
+
// Helper to convert JSI Value to nlohmann::json
|
|
504
|
+
json LlamaCppModel::jsiValueToJson(jsi::Runtime& rt, const jsi::Value& val) {
|
|
505
|
+
if (val.isUndefined() || val.isNull()) {
|
|
506
|
+
return nullptr;
|
|
507
|
+
} else if (val.isBool()) {
|
|
508
|
+
return val.getBool();
|
|
509
|
+
} else if (val.isNumber()) {
|
|
510
|
+
return val.getNumber();
|
|
511
|
+
} else if (val.isString()) {
|
|
512
|
+
return val.getString(rt).utf8(rt);
|
|
513
|
+
} else if (val.isObject()) {
|
|
514
|
+
jsi::Object jsiObj = val.getObject(rt);
|
|
515
|
+
if (jsiObj.isArray(rt)) {
|
|
516
|
+
jsi::Array jsiArr = jsiObj.getArray(rt);
|
|
517
|
+
json jsonArr = json::array();
|
|
518
|
+
for (size_t i = 0; i < jsiArr.size(rt); ++i) {
|
|
519
|
+
jsonArr.push_back(jsiValueToJson(rt, jsiArr.getValueAtIndex(rt, i)));
|
|
520
|
+
}
|
|
521
|
+
return jsonArr;
|
|
522
|
+
} else {
|
|
523
|
+
json jsonObj = json::object();
|
|
524
|
+
jsi::Array propNames = jsiObj.getPropertyNames(rt);
|
|
525
|
+
for (size_t i = 0; i < propNames.size(rt); ++i) {
|
|
526
|
+
jsi::String propName = propNames.getValueAtIndex(rt, i).asString(rt);
|
|
527
|
+
std::string key = propName.utf8(rt);
|
|
528
|
+
jsonObj[key] = jsiValueToJson(rt, jsiObj.getProperty(rt, propName));
|
|
529
|
+
}
|
|
530
|
+
return jsonObj;
|
|
531
|
+
}
|
|
532
|
+
}
|
|
533
|
+
// Should not happen for valid JSON-like structures
|
|
534
|
+
return nullptr;
|
|
535
|
+
}
|
|
536
|
+
|
|
556
537
|
// JSI method for completions (synchronous - kept for compatibility)
|
|
557
538
|
jsi::Value LlamaCppModel::completionJsi(jsi::Runtime& rt, const jsi::Value* args, size_t count) {
|
|
558
539
|
if (count < 1 || !args[0].isObject()) {
|
package/cpp/LlamaCppModel.h
CHANGED
|
@@ -17,9 +17,12 @@
|
|
|
17
17
|
#include "chat.h" // For chat format handling and templates
|
|
18
18
|
#include "json-schema-to-grammar.h"
|
|
19
19
|
|
|
20
|
-
// Include rn-utils.
|
|
21
|
-
#include "rn-utils.
|
|
22
|
-
#include "rn-llama.
|
|
20
|
+
// Include rn-utils.h which has the CompletionResult definition
|
|
21
|
+
#include "rn-utils.h"
|
|
22
|
+
#include "rn-llama.h"
|
|
23
|
+
|
|
24
|
+
// Include json.hpp for json handling
|
|
25
|
+
#include "nlohmann/json.hpp"
|
|
23
26
|
|
|
24
27
|
namespace facebook::react {
|
|
25
28
|
|
|
@@ -166,6 +169,8 @@ private:
|
|
|
166
169
|
|
|
167
170
|
// Add CallInvoker for async operations
|
|
168
171
|
std::shared_ptr<CallInvoker> jsInvoker_;
|
|
172
|
+
|
|
173
|
+
static json jsiValueToJson(jsi::Runtime& rt, const jsi::Value& val); // Declaration of new helper
|
|
169
174
|
};
|
|
170
175
|
|
|
171
176
|
} // namespace facebook::react
|
package/cpp/PureCppImpl.cpp
CHANGED
package/cpp/PureCppImpl.h
CHANGED
|
@@ -9,7 +9,7 @@
|
|
|
9
9
|
#include <mutex>
|
|
10
10
|
|
|
11
11
|
// Include the header with the full definition of rn_llama_context
|
|
12
|
-
#include "rn-llama.
|
|
12
|
+
#include "rn-llama.h"
|
|
13
13
|
|
|
14
14
|
// Forward declarations for C++ only
|
|
15
15
|
struct llama_model;
|
|
@@ -49,7 +49,7 @@ private:
|
|
|
49
49
|
jsi::Object createModelObject(jsi::Runtime& runtime, struct rn_llama_context* rn_ctx);
|
|
50
50
|
|
|
51
51
|
// Context for the currently loaded model, if any.
|
|
52
|
-
// The actual definition of rn_llama_context should be in "rn-llama.
|
|
52
|
+
// The actual definition of rn_llama_context should be in "rn-llama.h"
|
|
53
53
|
std::unique_ptr<struct rn_llama_context> rn_ctx_;
|
|
54
54
|
|
|
55
55
|
// Mutex for thread safety when accessing rn_ctx_ or other shared resources
|
package/cpp/build-info.cpp
CHANGED