cui-llama.rn 1.4.6 → 1.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +20 -20
- package/README.md +317 -319
- package/android/build.gradle +116 -116
- package/android/gradle.properties +5 -5
- package/android/src/main/AndroidManifest.xml +4 -4
- package/android/src/main/CMakeLists.txt +124 -117
- package/android/src/main/java/com/rnllama/LlamaContext.java +645 -645
- package/android/src/main/java/com/rnllama/RNLlama.java +695 -695
- package/android/src/main/java/com/rnllama/RNLlamaPackage.java +48 -48
- package/android/src/main/jni-utils.h +100 -100
- package/android/src/main/jni.cpp +1263 -1245
- package/android/src/main/jniLibs/arm64-v8a/librnllama.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/librnllama_v8.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_dotprod.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_dotprod_i8mm.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_i8mm.so +0 -0
- package/android/src/main/jniLibs/x86_64/librnllama.so +0 -0
- package/android/src/main/jniLibs/x86_64/librnllama_x86_64.so +0 -0
- package/android/src/newarch/java/com/rnllama/RNLlamaModule.java +135 -135
- package/android/src/oldarch/java/com/rnllama/RNLlamaModule.java +136 -136
- package/cpp/README.md +4 -4
- package/cpp/binary-ops.cpp +158 -0
- package/cpp/binary-ops.h +16 -0
- package/cpp/chat.cpp +1769 -1779
- package/cpp/chat.h +9 -1
- package/cpp/common.cpp +20 -522
- package/cpp/common.h +13 -36
- package/cpp/cpu-common.h +72 -0
- package/cpp/ggml-common.h +12 -6
- package/cpp/ggml-cpu-aarch64.cpp +1557 -80
- package/cpp/ggml-cpu-impl.h +2 -21
- package/cpp/ggml-cpu-quants.c +904 -405
- package/cpp/ggml-cpu.c +909 -13237
- package/cpp/ggml-impl.h +50 -23
- package/cpp/ggml-llama-sim.metallib +0 -0
- package/cpp/ggml-llama.metallib +0 -0
- package/cpp/ggml-metal-impl.h +597 -523
- package/cpp/ggml-metal.m +798 -580
- package/cpp/ggml.c +92 -3
- package/cpp/ggml.h +30 -6
- package/cpp/gguf.cpp +1 -0
- package/cpp/llama-adapter.cpp +55 -20
- package/cpp/llama-adapter.h +11 -9
- package/cpp/llama-arch.cpp +217 -16
- package/cpp/llama-arch.h +25 -0
- package/cpp/llama-batch.h +2 -2
- package/cpp/llama-chat.cpp +54 -2
- package/cpp/llama-chat.h +3 -0
- package/cpp/llama-context.cpp +2294 -1238
- package/cpp/llama-context.h +214 -77
- package/cpp/llama-cparams.h +1 -0
- package/cpp/llama-graph.cpp +1695 -0
- package/cpp/llama-graph.h +592 -0
- package/cpp/llama-hparams.cpp +8 -0
- package/cpp/llama-hparams.h +17 -0
- package/cpp/llama-io.cpp +15 -0
- package/cpp/llama-io.h +35 -0
- package/cpp/llama-kv-cache.cpp +965 -303
- package/cpp/llama-kv-cache.h +145 -151
- package/cpp/llama-memory.cpp +1 -0
- package/cpp/llama-memory.h +21 -0
- package/cpp/llama-mmap.cpp +1 -1
- package/cpp/llama-model-loader.cpp +10 -5
- package/cpp/llama-model-loader.h +5 -3
- package/cpp/llama-model.cpp +9194 -201
- package/cpp/llama-model.h +40 -1
- package/cpp/llama-sampling.cpp +5 -0
- package/cpp/llama-vocab.cpp +36 -5
- package/cpp/llama.cpp +51 -9984
- package/cpp/llama.h +102 -22
- package/cpp/log.cpp +34 -0
- package/cpp/minja/chat-template.hpp +15 -7
- package/cpp/minja/minja.hpp +120 -94
- package/cpp/ops.cpp +8723 -0
- package/cpp/ops.h +128 -0
- package/cpp/rn-llama.cpp +873 -882
- package/cpp/rn-llama.h +138 -148
- package/cpp/sampling.cpp +3 -0
- package/cpp/sampling.h +107 -107
- package/cpp/sgemm.cpp +533 -88
- package/cpp/simd-mappings.h +888 -0
- package/cpp/speculative.cpp +4 -4
- package/cpp/unary-ops.cpp +186 -0
- package/cpp/unary-ops.h +28 -0
- package/cpp/unicode-data.cpp +7034 -7034
- package/cpp/unicode-data.h +20 -20
- package/cpp/unicode.cpp +849 -849
- package/cpp/unicode.h +66 -66
- package/cpp/vec.cpp +258 -0
- package/cpp/vec.h +802 -0
- package/ios/CMakeLists.txt +116 -105
- package/ios/RNLlama.h +7 -7
- package/ios/RNLlama.mm +418 -405
- package/ios/RNLlamaContext.h +57 -57
- package/ios/RNLlamaContext.mm +835 -819
- package/ios/rnllama.xcframework/Info.plist +74 -74
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/binary-ops.h +16 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/chat.h +143 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/common.h +677 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/cpu-common.h +72 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-alloc.h +76 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-backend-impl.h +255 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-backend.h +354 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-common.h +1857 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-cpp.h +39 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-cpu-aarch64.h +8 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-cpu-impl.h +512 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-cpu-quants.h +63 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-cpu-traits.h +38 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-cpu.h +138 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-impl.h +594 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-metal-impl.h +597 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-metal.h +66 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-opt.h +216 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-quants.h +100 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-threading.h +14 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml.h +2222 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/gguf.h +202 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/json-schema-to-grammar.h +21 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/json.hpp +24766 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-adapter.h +76 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-arch.h +428 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-batch.h +88 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-chat.h +56 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-context.h +265 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-cparams.h +38 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-cpp.h +30 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-grammar.h +173 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-graph.h +592 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-hparams.h +156 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-impl.h +61 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-io.h +35 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-kv-cache.h +213 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-memory.h +21 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-mmap.h +68 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-model-loader.h +169 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-model.h +409 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-sampling.h +32 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-vocab.h +125 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama.h +1434 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/log.h +132 -0
- package/{cpp → ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/minja}/chat-template.hpp +15 -7
- package/{cpp → ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/minja}/minja.hpp +120 -94
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ops.h +128 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/rn-llama.h +138 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/sampling.h +107 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/sgemm.h +14 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/simd-mappings.h +888 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/speculative.h +28 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/unary-ops.h +28 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/unicode-data.h +20 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/unicode.h +66 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/vec.h +802 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Info.plist +0 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/ggml-llama.metallib +0 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/rnllama +0 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/binary-ops.h +16 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/chat.h +143 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/common.h +677 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/cpu-common.h +72 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-alloc.h +76 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-backend-impl.h +255 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-backend.h +354 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-common.h +1857 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpp.h +39 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu-aarch64.h +8 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu-impl.h +512 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu-quants.h +63 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu-traits.h +38 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu.h +138 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-impl.h +594 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-metal-impl.h +597 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-metal.h +66 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-opt.h +216 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-quants.h +100 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-threading.h +14 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml.h +2222 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/gguf.h +202 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/json-schema-to-grammar.h +21 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/json.hpp +24766 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-adapter.h +76 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-arch.h +428 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-batch.h +88 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-chat.h +56 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-context.h +265 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-cparams.h +38 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-cpp.h +30 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-grammar.h +173 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-graph.h +592 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-hparams.h +156 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-impl.h +61 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-io.h +35 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cache.h +213 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-memory.h +21 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-mmap.h +68 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-model-loader.h +169 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-model.h +409 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-sampling.h +32 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-vocab.h +125 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama.h +1434 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/log.h +132 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/minja/chat-template.hpp +537 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/minja/minja.hpp +2941 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ops.h +128 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/rn-llama.h +138 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/sampling.h +107 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/sgemm.h +14 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/simd-mappings.h +888 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/speculative.h +28 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/unary-ops.h +28 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/unicode-data.h +20 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/unicode.h +66 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/vec.h +802 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Info.plist +0 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/_CodeSignature/CodeResources +101 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/ggml-llama-sim.metallib +0 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/rnllama +0 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/binary-ops.h +16 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/chat.h +143 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/common.h +677 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/cpu-common.h +72 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-alloc.h +76 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-backend-impl.h +255 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-backend.h +354 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-common.h +1857 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-cpp.h +39 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-cpu-aarch64.h +8 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-cpu-impl.h +512 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-cpu-quants.h +63 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-cpu-traits.h +38 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-cpu.h +138 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-impl.h +594 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-metal-impl.h +597 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-metal.h +66 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-opt.h +216 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-quants.h +100 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-threading.h +14 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml.h +2222 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/gguf.h +202 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/json-schema-to-grammar.h +21 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/json.hpp +24766 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-adapter.h +76 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-arch.h +428 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-batch.h +88 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-chat.h +56 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-context.h +265 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-cparams.h +38 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-cpp.h +30 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-grammar.h +173 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-graph.h +592 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-hparams.h +156 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-impl.h +61 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-io.h +35 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-kv-cache.h +213 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-memory.h +21 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-mmap.h +68 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-model-loader.h +169 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-model.h +409 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-sampling.h +32 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-vocab.h +125 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama.h +1434 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/log.h +132 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/minja/chat-template.hpp +537 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/minja/minja.hpp +2941 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ops.h +128 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/rn-llama.h +138 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/sampling.h +107 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/sgemm.h +14 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/simd-mappings.h +888 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/speculative.h +28 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/unary-ops.h +28 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/unicode-data.h +20 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/unicode.h +66 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/vec.h +802 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Info.plist +0 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/ggml-llama.metallib +0 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/rnllama +0 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/binary-ops.h +16 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/chat.h +143 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/common.h +677 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/cpu-common.h +72 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-alloc.h +76 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-backend-impl.h +255 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-backend.h +354 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-common.h +1857 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpp.h +39 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu-aarch64.h +8 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu-impl.h +512 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu-quants.h +63 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu-traits.h +38 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu.h +138 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-impl.h +594 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-metal-impl.h +597 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-metal.h +66 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-opt.h +216 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-quants.h +100 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-threading.h +14 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml.h +2222 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/gguf.h +202 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/json-schema-to-grammar.h +21 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/json.hpp +24766 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-adapter.h +76 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-arch.h +428 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-batch.h +88 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-chat.h +56 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-context.h +265 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-cparams.h +38 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-cpp.h +30 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-grammar.h +173 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-graph.h +592 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-hparams.h +156 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-impl.h +61 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-io.h +35 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cache.h +213 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-memory.h +21 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-mmap.h +68 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-model-loader.h +169 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-model.h +409 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-sampling.h +32 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-vocab.h +125 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama.h +1434 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/log.h +132 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/minja/chat-template.hpp +537 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/minja/minja.hpp +2941 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ops.h +128 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/rn-llama.h +138 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/sampling.h +107 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/sgemm.h +14 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/simd-mappings.h +888 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/speculative.h +28 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/unary-ops.h +28 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/unicode-data.h +20 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/unicode.h +66 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/vec.h +802 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Info.plist +0 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/_CodeSignature/CodeResources +101 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/ggml-llama-sim.metallib +0 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/rnllama +0 -0
- package/jest/mock.js +203 -203
- package/lib/commonjs/NativeRNLlama.js +1 -2
- package/lib/commonjs/NativeRNLlama.js.map +1 -1
- package/lib/commonjs/chat.js.map +1 -1
- package/lib/commonjs/grammar.js +12 -31
- package/lib/commonjs/grammar.js.map +1 -1
- package/lib/commonjs/index.js +47 -47
- package/lib/commonjs/index.js.map +1 -1
- package/lib/commonjs/package.json +1 -0
- package/lib/module/NativeRNLlama.js +2 -0
- package/lib/module/NativeRNLlama.js.map +1 -1
- package/lib/module/chat.js +2 -0
- package/lib/module/chat.js.map +1 -1
- package/lib/module/grammar.js +14 -31
- package/lib/module/grammar.js.map +1 -1
- package/lib/module/index.js +47 -45
- package/lib/module/index.js.map +1 -1
- package/lib/module/package.json +1 -0
- package/lib/typescript/NativeRNLlama.d.ts +6 -4
- package/lib/typescript/NativeRNLlama.d.ts.map +1 -1
- package/lib/typescript/index.d.ts.map +1 -1
- package/llama-rn.podspec +48 -48
- package/package.json +233 -233
- package/src/NativeRNLlama.ts +426 -424
- package/src/chat.ts +44 -44
- package/src/grammar.ts +854 -854
- package/src/index.ts +495 -485
package/cpp/unicode.h
CHANGED
@@ -1,66 +1,66 @@
|
|
1
|
-
#pragma once
|
2
|
-
|
3
|
-
#include <cstdint>
|
4
|
-
#include <string>
|
5
|
-
#include <vector>
|
6
|
-
|
7
|
-
struct unicode_cpt_flags {
|
8
|
-
enum {
|
9
|
-
UNDEFINED = 0x0001,
|
10
|
-
NUMBER = 0x0002, // regex: \p{N}
|
11
|
-
LETTER = 0x0004, // regex: \p{L}
|
12
|
-
SEPARATOR = 0x0008, // regex: \p{Z}
|
13
|
-
ACCENT_MARK = 0x0010, // regex: \p{M}
|
14
|
-
PUNCTUATION = 0x0020, // regex: \p{P}
|
15
|
-
SYMBOL = 0x0040, // regex: \p{S}
|
16
|
-
CONTROL = 0x0080, // regex: \p{C}
|
17
|
-
MASK_CATEGORIES = 0x00FF,
|
18
|
-
};
|
19
|
-
|
20
|
-
// codepoint type
|
21
|
-
uint16_t is_undefined : 1;
|
22
|
-
uint16_t is_number : 1; // regex: \p{N}
|
23
|
-
uint16_t is_letter : 1; // regex: \p{L}
|
24
|
-
uint16_t is_separator : 1; // regex: \p{Z}
|
25
|
-
uint16_t is_accent_mark : 1; // regex: \p{M}
|
26
|
-
uint16_t is_punctuation : 1; // regex: \p{P}
|
27
|
-
uint16_t is_symbol : 1; // regex: \p{S}
|
28
|
-
uint16_t is_control : 1; // regex: \p{C}
|
29
|
-
// helper flags
|
30
|
-
uint16_t is_whitespace : 1; // regex: \s
|
31
|
-
uint16_t is_lowercase : 1;
|
32
|
-
uint16_t is_uppercase : 1;
|
33
|
-
uint16_t is_nfd : 1;
|
34
|
-
|
35
|
-
// decode from uint16
|
36
|
-
inline unicode_cpt_flags(const uint16_t flags = 0) {
|
37
|
-
*reinterpret_cast<uint16_t*>(this) = flags;
|
38
|
-
}
|
39
|
-
|
40
|
-
inline uint16_t as_uint() const {
|
41
|
-
return *reinterpret_cast<const uint16_t*>(this);
|
42
|
-
}
|
43
|
-
|
44
|
-
inline uint16_t category_flag() const {
|
45
|
-
return this->as_uint() & MASK_CATEGORIES;
|
46
|
-
}
|
47
|
-
};
|
48
|
-
|
49
|
-
size_t unicode_len_utf8(char src);
|
50
|
-
|
51
|
-
std::string unicode_cpt_to_utf8 (uint32_t cpt);
|
52
|
-
uint32_t unicode_cpt_from_utf8(const std::string & utf8, size_t & offset);
|
53
|
-
|
54
|
-
std::vector<uint32_t> unicode_cpts_from_utf8(const std::string & utf8);
|
55
|
-
|
56
|
-
std::vector<uint32_t> unicode_cpts_normalize_nfd(const std::vector<uint32_t> & cpts);
|
57
|
-
|
58
|
-
unicode_cpt_flags unicode_cpt_flags_from_cpt (uint32_t cpt);
|
59
|
-
unicode_cpt_flags unicode_cpt_flags_from_utf8(const std::string & utf8);
|
60
|
-
|
61
|
-
std::string unicode_byte_to_utf8(uint8_t byte);
|
62
|
-
uint8_t unicode_utf8_to_byte(const std::string & utf8);
|
63
|
-
|
64
|
-
uint32_t unicode_tolower(uint32_t cpt);
|
65
|
-
|
66
|
-
std::vector<std::string> unicode_regex_split(const std::string & text, const std::vector<std::string> & regex_exprs);
|
1
|
+
#pragma once
|
2
|
+
|
3
|
+
#include <cstdint>
|
4
|
+
#include <string>
|
5
|
+
#include <vector>
|
6
|
+
|
7
|
+
struct unicode_cpt_flags {
|
8
|
+
enum {
|
9
|
+
UNDEFINED = 0x0001,
|
10
|
+
NUMBER = 0x0002, // regex: \p{N}
|
11
|
+
LETTER = 0x0004, // regex: \p{L}
|
12
|
+
SEPARATOR = 0x0008, // regex: \p{Z}
|
13
|
+
ACCENT_MARK = 0x0010, // regex: \p{M}
|
14
|
+
PUNCTUATION = 0x0020, // regex: \p{P}
|
15
|
+
SYMBOL = 0x0040, // regex: \p{S}
|
16
|
+
CONTROL = 0x0080, // regex: \p{C}
|
17
|
+
MASK_CATEGORIES = 0x00FF,
|
18
|
+
};
|
19
|
+
|
20
|
+
// codepoint type
|
21
|
+
uint16_t is_undefined : 1;
|
22
|
+
uint16_t is_number : 1; // regex: \p{N}
|
23
|
+
uint16_t is_letter : 1; // regex: \p{L}
|
24
|
+
uint16_t is_separator : 1; // regex: \p{Z}
|
25
|
+
uint16_t is_accent_mark : 1; // regex: \p{M}
|
26
|
+
uint16_t is_punctuation : 1; // regex: \p{P}
|
27
|
+
uint16_t is_symbol : 1; // regex: \p{S}
|
28
|
+
uint16_t is_control : 1; // regex: \p{C}
|
29
|
+
// helper flags
|
30
|
+
uint16_t is_whitespace : 1; // regex: \s
|
31
|
+
uint16_t is_lowercase : 1;
|
32
|
+
uint16_t is_uppercase : 1;
|
33
|
+
uint16_t is_nfd : 1;
|
34
|
+
|
35
|
+
// decode from uint16
|
36
|
+
inline unicode_cpt_flags(const uint16_t flags = 0) {
|
37
|
+
*reinterpret_cast<uint16_t*>(this) = flags;
|
38
|
+
}
|
39
|
+
|
40
|
+
inline uint16_t as_uint() const {
|
41
|
+
return *reinterpret_cast<const uint16_t*>(this);
|
42
|
+
}
|
43
|
+
|
44
|
+
inline uint16_t category_flag() const {
|
45
|
+
return this->as_uint() & MASK_CATEGORIES;
|
46
|
+
}
|
47
|
+
};
|
48
|
+
|
49
|
+
size_t unicode_len_utf8(char src);
|
50
|
+
|
51
|
+
std::string unicode_cpt_to_utf8 (uint32_t cpt);
|
52
|
+
uint32_t unicode_cpt_from_utf8(const std::string & utf8, size_t & offset);
|
53
|
+
|
54
|
+
std::vector<uint32_t> unicode_cpts_from_utf8(const std::string & utf8);
|
55
|
+
|
56
|
+
std::vector<uint32_t> unicode_cpts_normalize_nfd(const std::vector<uint32_t> & cpts);
|
57
|
+
|
58
|
+
unicode_cpt_flags unicode_cpt_flags_from_cpt (uint32_t cpt);
|
59
|
+
unicode_cpt_flags unicode_cpt_flags_from_utf8(const std::string & utf8);
|
60
|
+
|
61
|
+
std::string unicode_byte_to_utf8(uint8_t byte);
|
62
|
+
uint8_t unicode_utf8_to_byte(const std::string & utf8);
|
63
|
+
|
64
|
+
uint32_t unicode_tolower(uint32_t cpt);
|
65
|
+
|
66
|
+
std::vector<std::string> unicode_regex_split(const std::string & text, const std::vector<std::string> & regex_exprs);
|
package/cpp/vec.cpp
ADDED
@@ -0,0 +1,258 @@
|
|
1
|
+
#include "vec.h"
|
2
|
+
|
3
|
+
#include <cassert>
|
4
|
+
|
5
|
+
#if defined(_MSC_VER)
|
6
|
+
// disable "possible loss of data" to avoid hundreds of casts
|
7
|
+
// we should just be careful :)
|
8
|
+
#pragma warning(disable: 4244 4267)
|
9
|
+
#endif
|
10
|
+
|
11
|
+
// precomputed gelu table for f16 (128 KB)
|
12
|
+
lm_ggml_fp16_t lm_ggml_table_gelu_f16[1 << 16];
|
13
|
+
|
14
|
+
// precomputed quick gelu table for f16 (128 KB)
|
15
|
+
lm_ggml_fp16_t lm_ggml_table_gelu_quick_f16[1 << 16];
|
16
|
+
|
17
|
+
void lm_ggml_vec_dot_f32(int n, float * LM_GGML_RESTRICT s, size_t bs, const float * LM_GGML_RESTRICT x, size_t bx, const float * LM_GGML_RESTRICT y, size_t by, int nrc) {
|
18
|
+
assert(nrc == 1);
|
19
|
+
LM_GGML_UNUSED(nrc);
|
20
|
+
LM_GGML_UNUSED(bx);
|
21
|
+
LM_GGML_UNUSED(by);
|
22
|
+
LM_GGML_UNUSED(bs);
|
23
|
+
|
24
|
+
#if defined(LM_GGML_SIMD)
|
25
|
+
float sumf = 0.0f;
|
26
|
+
const int np = (n & ~(LM_GGML_F32_STEP - 1));
|
27
|
+
|
28
|
+
LM_GGML_F32_VEC sum[LM_GGML_F32_ARR] = { LM_GGML_F32_VEC_ZERO };
|
29
|
+
|
30
|
+
LM_GGML_F32_VEC ax[LM_GGML_F32_ARR];
|
31
|
+
LM_GGML_F32_VEC ay[LM_GGML_F32_ARR];
|
32
|
+
|
33
|
+
for (int i = 0; i < np; i += LM_GGML_F32_STEP) {
|
34
|
+
for (int j = 0; j < LM_GGML_F32_ARR; j++) {
|
35
|
+
ax[j] = LM_GGML_F32_VEC_LOAD(x + i + j*LM_GGML_F32_EPR);
|
36
|
+
ay[j] = LM_GGML_F32_VEC_LOAD(y + i + j*LM_GGML_F32_EPR);
|
37
|
+
|
38
|
+
sum[j] = LM_GGML_F32_VEC_FMA(sum[j], ax[j], ay[j]);
|
39
|
+
}
|
40
|
+
}
|
41
|
+
|
42
|
+
// reduce sum0..sum3 to sum0
|
43
|
+
LM_GGML_F32_VEC_REDUCE(sumf, sum);
|
44
|
+
|
45
|
+
// leftovers
|
46
|
+
for (int i = np; i < n; ++i) {
|
47
|
+
sumf += x[i]*y[i];
|
48
|
+
}
|
49
|
+
#else
|
50
|
+
// scalar
|
51
|
+
lm_ggml_float sumf = 0.0;
|
52
|
+
for (int i = 0; i < n; ++i) {
|
53
|
+
sumf += (lm_ggml_float)(x[i]*y[i]);
|
54
|
+
}
|
55
|
+
#endif
|
56
|
+
|
57
|
+
*s = sumf;
|
58
|
+
}
|
59
|
+
|
60
|
+
void lm_ggml_vec_dot_bf16(int n, float * LM_GGML_RESTRICT s, size_t bs, lm_ggml_bf16_t * LM_GGML_RESTRICT x, size_t bx, lm_ggml_bf16_t * LM_GGML_RESTRICT y, size_t by, int nrc) {
|
61
|
+
assert(nrc == 1);
|
62
|
+
LM_GGML_UNUSED(nrc);
|
63
|
+
LM_GGML_UNUSED(bx);
|
64
|
+
LM_GGML_UNUSED(by);
|
65
|
+
LM_GGML_UNUSED(bs);
|
66
|
+
int i = 0;
|
67
|
+
lm_ggml_float sumf = 0;
|
68
|
+
|
69
|
+
#if defined(__AVX512BF16__)
|
70
|
+
__m512 c1 = _mm512_setzero_ps();
|
71
|
+
__m512 c2 = _mm512_setzero_ps();
|
72
|
+
for (; i + 64 <= n; i += 64) {
|
73
|
+
c1 = _mm512_dpbf16_ps(c1, m512bh(_mm512_loadu_si512((x + i))),
|
74
|
+
m512bh(_mm512_loadu_si512((y + i))));
|
75
|
+
c2 = _mm512_dpbf16_ps(c2, m512bh(_mm512_loadu_si512((x + i + 32))),
|
76
|
+
m512bh(_mm512_loadu_si512((y + i + 32))));
|
77
|
+
}
|
78
|
+
sumf += (lm_ggml_float)_mm512_reduce_add_ps(c1);
|
79
|
+
sumf += (lm_ggml_float)_mm512_reduce_add_ps(c2);
|
80
|
+
|
81
|
+
#elif defined(__AVX512F__)
|
82
|
+
#define LOAD(p) _mm512_castsi512_ps(_mm512_slli_epi32(_mm512_cvtepu16_epi32(_mm256_loadu_si256((const __m256i *)(p))), 16))
|
83
|
+
__m512 c1 = _mm512_setzero_ps();
|
84
|
+
__m512 c2 = _mm512_setzero_ps();
|
85
|
+
for (; i + 32 <= n; i += 32) {
|
86
|
+
c1 = _mm512_add_ps(_mm512_mul_ps(LOAD(x + i), LOAD(y + i)), c1);
|
87
|
+
c2 = _mm512_add_ps(_mm512_mul_ps(LOAD(x + i + 16), LOAD(y + i + 16)), c2);
|
88
|
+
}
|
89
|
+
sumf += (lm_ggml_float)_mm512_reduce_add_ps(c1);
|
90
|
+
sumf += (lm_ggml_float)_mm512_reduce_add_ps(c2);
|
91
|
+
|
92
|
+
#undef LOAD
|
93
|
+
#elif defined(__AVX2__) || defined(__AVX__)
|
94
|
+
#if defined(__AVX2__)
|
95
|
+
#define LOAD(p) _mm256_castsi256_ps(_mm256_slli_epi32(_mm256_cvtepu16_epi32(_mm_loadu_si128((const __m128i *)(p))), 16))
|
96
|
+
#else
|
97
|
+
#define LOAD(p) _mm256_castsi256_ps(_mm256_insertf128_si256(_mm256_castsi128_si256(_mm_slli_epi32(_mm_cvtepu16_epi32(_mm_loadu_si128((const __m128i *)(p))), 16)), (_mm_slli_epi32(_mm_cvtepu16_epi32(_mm_bsrli_si128(_mm_loadu_si128((const __m128i *)(p)), 8)), 16)), 1))
|
98
|
+
#endif
|
99
|
+
__m256 c1 = _mm256_setzero_ps();
|
100
|
+
__m256 c2 = _mm256_setzero_ps();
|
101
|
+
__m256 c3 = _mm256_setzero_ps();
|
102
|
+
__m256 c4 = _mm256_setzero_ps();
|
103
|
+
for (; i + 32 <= n; i += 32) {
|
104
|
+
c1 = _mm256_add_ps(_mm256_mul_ps(LOAD(x + i), LOAD(y + i)), c1);
|
105
|
+
c2 = _mm256_add_ps(_mm256_mul_ps(LOAD(x + i + 8), LOAD(y + i + 8)), c2);
|
106
|
+
c3 = _mm256_add_ps(_mm256_mul_ps(LOAD(x + i + 16), LOAD(y + i + 16)), c3);
|
107
|
+
c4 = _mm256_add_ps(_mm256_mul_ps(LOAD(x + i + 24), LOAD(y + i + 24)), c4);
|
108
|
+
}
|
109
|
+
__m128 g;
|
110
|
+
c1 = _mm256_add_ps(_mm256_add_ps(c1, c3),
|
111
|
+
_mm256_add_ps(c2, c4));
|
112
|
+
g = _mm_add_ps(_mm256_extractf128_ps(c1, 1),
|
113
|
+
_mm256_castps256_ps128(c1));
|
114
|
+
g = _mm_add_ps(g, _mm_movehl_ps(g, g));
|
115
|
+
g = _mm_add_ss(g, _mm_movehdup_ps(g));
|
116
|
+
sumf += (lm_ggml_float)_mm_cvtss_f32(g);
|
117
|
+
|
118
|
+
#undef LOAD
|
119
|
+
#endif
|
120
|
+
|
121
|
+
for (; i < n; ++i) {
|
122
|
+
sumf += (lm_ggml_float)(LM_GGML_BF16_TO_FP32(x[i]) *
|
123
|
+
LM_GGML_BF16_TO_FP32(y[i]));
|
124
|
+
}
|
125
|
+
*s = sumf;
|
126
|
+
}
|
127
|
+
|
128
|
+
void lm_ggml_vec_dot_f16(int n, float * LM_GGML_RESTRICT s, size_t bs, lm_ggml_fp16_t * LM_GGML_RESTRICT x, size_t bx, lm_ggml_fp16_t * LM_GGML_RESTRICT y, size_t by, int nrc) {
|
129
|
+
assert(nrc == 1);
|
130
|
+
LM_GGML_UNUSED(nrc);
|
131
|
+
LM_GGML_UNUSED(bx);
|
132
|
+
LM_GGML_UNUSED(by);
|
133
|
+
LM_GGML_UNUSED(bs);
|
134
|
+
|
135
|
+
lm_ggml_float sumf = 0.0;
|
136
|
+
|
137
|
+
#if defined(LM_GGML_SIMD)
|
138
|
+
const int np = (n & ~(LM_GGML_F16_STEP - 1));
|
139
|
+
|
140
|
+
LM_GGML_F16_VEC sum[LM_GGML_F16_ARR] = { LM_GGML_F16_VEC_ZERO };
|
141
|
+
|
142
|
+
LM_GGML_F16_VEC ax[LM_GGML_F16_ARR];
|
143
|
+
LM_GGML_F16_VEC ay[LM_GGML_F16_ARR];
|
144
|
+
|
145
|
+
for (int i = 0; i < np; i += LM_GGML_F16_STEP) {
|
146
|
+
for (int j = 0; j < LM_GGML_F16_ARR; j++) {
|
147
|
+
ax[j] = LM_GGML_F16_VEC_LOAD(x + i + j*LM_GGML_F16_EPR, j);
|
148
|
+
ay[j] = LM_GGML_F16_VEC_LOAD(y + i + j*LM_GGML_F16_EPR, j);
|
149
|
+
|
150
|
+
sum[j] = LM_GGML_F16_VEC_FMA(sum[j], ax[j], ay[j]);
|
151
|
+
}
|
152
|
+
}
|
153
|
+
|
154
|
+
// reduce sum0..sum3 to sum0
|
155
|
+
LM_GGML_F16_VEC_REDUCE(sumf, sum);
|
156
|
+
|
157
|
+
// leftovers
|
158
|
+
for (int i = np; i < n; ++i) {
|
159
|
+
sumf += (lm_ggml_float)(LM_GGML_FP16_TO_FP32(x[i])*LM_GGML_FP16_TO_FP32(y[i]));
|
160
|
+
}
|
161
|
+
#else
|
162
|
+
for (int i = 0; i < n; ++i) {
|
163
|
+
sumf += (lm_ggml_float)(LM_GGML_FP16_TO_FP32(x[i])*LM_GGML_FP16_TO_FP32(y[i]));
|
164
|
+
}
|
165
|
+
#endif
|
166
|
+
|
167
|
+
*s = sumf;
|
168
|
+
}
|
169
|
+
|
170
|
+
void lm_ggml_vec_silu_f32(const int n, float * y, const float * x) {
|
171
|
+
int i = 0;
|
172
|
+
#if defined(__AVX512F__) && defined(__AVX512DQ__)
|
173
|
+
for (; i + 15 < n; i += 16) {
|
174
|
+
_mm512_storeu_ps(y + i, lm_ggml_v_silu(_mm512_loadu_ps(x + i)));
|
175
|
+
}
|
176
|
+
#elif defined(__AVX2__) && defined(__FMA__)
|
177
|
+
for (; i + 7 < n; i += 8) {
|
178
|
+
_mm256_storeu_ps(y + i, lm_ggml_v_silu(_mm256_loadu_ps(x + i)));
|
179
|
+
}
|
180
|
+
#elif defined(__SSE2__)
|
181
|
+
for (; i + 3 < n; i += 4) {
|
182
|
+
_mm_storeu_ps(y + i, lm_ggml_v_silu(_mm_loadu_ps(x + i)));
|
183
|
+
}
|
184
|
+
#elif defined(__ARM_NEON) && defined(__aarch64__)
|
185
|
+
for (; i + 3 < n; i += 4) {
|
186
|
+
vst1q_f32(y + i, lm_ggml_v_silu(vld1q_f32(x + i)));
|
187
|
+
}
|
188
|
+
#endif
|
189
|
+
for (; i < n; ++i) {
|
190
|
+
y[i] = lm_ggml_silu_f32(x[i]);
|
191
|
+
}
|
192
|
+
}
|
193
|
+
|
194
|
+
lm_ggml_float lm_ggml_vec_soft_max_f32(const int n, float * y, const float * x, float max) {
|
195
|
+
int i = 0;
|
196
|
+
lm_ggml_float sum = 0;
|
197
|
+
#if defined(__AVX512F__) && defined(__AVX512DQ__)
|
198
|
+
for (; i + 15 < n; i += 16) {
|
199
|
+
__m512 val = lm_ggml_v_expf(_mm512_sub_ps(_mm512_loadu_ps(x + i),
|
200
|
+
_mm512_set1_ps(max)));
|
201
|
+
_mm512_storeu_ps(y + i, val);
|
202
|
+
sum += (lm_ggml_float)_mm512_reduce_add_ps(val);
|
203
|
+
}
|
204
|
+
#elif defined(__AVX2__) && defined(__FMA__)
|
205
|
+
for (; i + 7 < n; i += 8) {
|
206
|
+
__m256 val = lm_ggml_v_expf(_mm256_sub_ps(_mm256_loadu_ps(x + i),
|
207
|
+
_mm256_set1_ps(max)));
|
208
|
+
_mm256_storeu_ps(y + i, val);
|
209
|
+
__m128 val2 = _mm_add_ps(_mm256_extractf128_ps(val, 1),
|
210
|
+
_mm256_castps256_ps128(val));
|
211
|
+
val2 = _mm_add_ps(val2, _mm_movehl_ps(val2, val2));
|
212
|
+
val2 = _mm_add_ss(val2, _mm_movehdup_ps(val2));
|
213
|
+
sum += (lm_ggml_float)_mm_cvtss_f32(val2);
|
214
|
+
}
|
215
|
+
#elif defined(__SSE2__)
|
216
|
+
for (; i + 3 < n; i += 4) {
|
217
|
+
__m128 val = lm_ggml_v_expf(_mm_sub_ps(_mm_loadu_ps(x + i),
|
218
|
+
_mm_set1_ps(max)));
|
219
|
+
_mm_storeu_ps(y + i, val);
|
220
|
+
#if defined(__AVX__) || defined(__AVX2__) || defined(__AVX512F__)
|
221
|
+
val = _mm_add_ps(val, _mm_movehl_ps(val, val));
|
222
|
+
val = _mm_add_ss(val, _mm_movehdup_ps(val));
|
223
|
+
#else
|
224
|
+
__m128 tmp = _mm_shuffle_ps(val, val, _MM_SHUFFLE(2, 3, 0, 1));
|
225
|
+
val = _mm_add_ps(val, tmp);
|
226
|
+
tmp = _mm_movehl_ps(tmp, val);
|
227
|
+
val = _mm_add_ss(val, tmp);
|
228
|
+
#endif
|
229
|
+
sum += (lm_ggml_float)_mm_cvtss_f32(val);
|
230
|
+
}
|
231
|
+
#elif defined(__ARM_NEON) && defined(__aarch64__)
|
232
|
+
for (; i + 3 < n; i += 4) {
|
233
|
+
float32x4_t val = lm_ggml_v_expf(vsubq_f32(vld1q_f32(x + i),
|
234
|
+
vdupq_n_f32(max)));
|
235
|
+
vst1q_f32(y + i, val);
|
236
|
+
sum += (lm_ggml_float)vaddvq_f32(val);
|
237
|
+
}
|
238
|
+
#endif
|
239
|
+
for (; i < n; ++i) {
|
240
|
+
float val = expf(x[i] - max);
|
241
|
+
sum += (lm_ggml_float)val;
|
242
|
+
y[i] = val;
|
243
|
+
}
|
244
|
+
return sum;
|
245
|
+
}
|
246
|
+
|
247
|
+
lm_ggml_float lm_ggml_vec_log_soft_max_f32(const int n, float * y, const float * x, float max) {
|
248
|
+
// log(soft_max) = log(soft_max_i / soft_max_sum) = log(soft_max_i) - log(soft_max_sum) = (logit_i - max) - log(soft_max_i)
|
249
|
+
|
250
|
+
int i = 0;
|
251
|
+
lm_ggml_float sum = 0;
|
252
|
+
for (; i < n; ++i) {
|
253
|
+
float val = x[i] - max;
|
254
|
+
y[i] = val;
|
255
|
+
sum += (lm_ggml_float)expf(val);
|
256
|
+
}
|
257
|
+
return sum = (lm_ggml_float)logf(sum);
|
258
|
+
}
|