cui-llama.rn 1.6.0 → 1.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +35 -7
- package/android/src/main/CMakeLists.txt +22 -11
- package/android/src/main/java/com/rnllama/LlamaContext.java +42 -6
- package/android/src/main/java/com/rnllama/RNLlama.java +139 -4
- package/android/src/main/jni.cpp +173 -18
- package/android/src/main/jniLibs/arm64-v8a/librnllama.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/librnllama_v8.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_dotprod.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_dotprod_i8mm.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_i8mm.so +0 -0
- package/android/src/main/jniLibs/x86_64/librnllama.so +0 -0
- package/android/src/main/jniLibs/x86_64/librnllama_x86_64.so +0 -0
- package/android/src/newarch/java/com/rnllama/RNLlamaModule.java +24 -4
- package/android/src/oldarch/java/com/rnllama/RNLlamaModule.java +22 -2
- package/cpp/LICENSE +21 -0
- package/cpp/chat.cpp +129 -107
- package/cpp/chat.h +2 -0
- package/cpp/common.cpp +58 -78
- package/cpp/common.h +29 -21
- package/cpp/ggml-alloc.c +4 -1
- package/cpp/ggml-backend.cpp +9 -5
- package/cpp/ggml-backend.h +4 -4
- package/cpp/ggml-cpp.h +1 -1
- package/cpp/ggml-cpu/amx/amx.cpp +221 -0
- package/cpp/ggml-cpu/amx/amx.h +8 -0
- package/cpp/ggml-cpu/amx/common.h +91 -0
- package/cpp/ggml-cpu/amx/mmq.cpp +2511 -0
- package/cpp/ggml-cpu/amx/mmq.h +10 -0
- package/{ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers → cpp/ggml-cpu}/binary-ops.h +1 -1
- package/cpp/ggml-cpu/common.h +72 -0
- package/cpp/{ggml-cpu-aarch64.cpp → ggml-cpu/ggml-cpu-aarch64.cpp} +809 -103
- package/cpp/{ggml-cpu-quants.c → ggml-cpu/ggml-cpu-quants.c} +306 -6
- package/cpp/{ggml-cpu.c → ggml-cpu/ggml-cpu.c} +114 -55
- package/cpp/{ggml-cpu.cpp → ggml-cpu/ggml-cpu.cpp} +32 -16
- package/cpp/{ops.cpp → ggml-cpu/ops.cpp} +353 -173
- package/{ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers → cpp/ggml-cpu}/ops.h +2 -20
- package/cpp/{sgemm.cpp → ggml-cpu/sgemm.cpp} +501 -0
- package/{ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers → cpp/ggml-cpu}/simd-mappings.h +7 -3
- package/{ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers → cpp/ggml-cpu}/unary-ops.h +1 -1
- package/cpp/{vec.cpp → ggml-cpu/vec.cpp} +0 -6
- package/{ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers → cpp/ggml-cpu}/vec.h +16 -0
- package/cpp/ggml-cpu.h +5 -0
- package/cpp/ggml-impl.h +16 -9
- package/cpp/ggml-llama-sim.metallib +0 -0
- package/cpp/ggml-llama.metallib +0 -0
- package/cpp/ggml-metal-impl.h +36 -11
- package/cpp/ggml-metal.m +810 -176
- package/cpp/ggml-opt.cpp +373 -190
- package/cpp/ggml-opt.h +49 -28
- package/cpp/ggml-quants.c +0 -6
- package/cpp/ggml.c +227 -282
- package/cpp/ggml.h +82 -101
- package/cpp/gguf.cpp +33 -33
- package/cpp/json-schema-to-grammar.cpp +3 -0
- package/cpp/llama-adapter.cpp +6 -0
- package/cpp/llama-arch.cpp +49 -17
- package/cpp/llama-arch.h +9 -0
- package/cpp/llama-batch.cpp +8 -2
- package/cpp/llama-batch.h +2 -1
- package/cpp/llama-chat.cpp +39 -16
- package/cpp/llama-chat.h +4 -2
- package/cpp/llama-context.cpp +440 -611
- package/cpp/llama-context.h +44 -33
- package/cpp/llama-cparams.h +1 -0
- package/cpp/llama-graph.cpp +214 -291
- package/cpp/llama-graph.h +69 -21
- package/cpp/llama-hparams.cpp +17 -1
- package/cpp/llama-hparams.h +39 -5
- package/cpp/llama-kv-cache.cpp +2067 -620
- package/cpp/llama-kv-cache.h +410 -108
- package/cpp/llama-memory.h +12 -1
- package/cpp/llama-model-loader.cpp +24 -15
- package/cpp/llama-model-saver.cpp +281 -0
- package/cpp/llama-model-saver.h +37 -0
- package/cpp/llama-model.cpp +1089 -359
- package/cpp/llama-model.h +19 -3
- package/cpp/llama-sampling.cpp +20 -7
- package/cpp/llama-vocab.cpp +54 -9
- package/cpp/llama-vocab.h +6 -0
- package/cpp/llama.cpp +14 -0
- package/cpp/llama.h +86 -142
- package/cpp/minja/chat-template.hpp +9 -5
- package/cpp/minja/minja.hpp +69 -36
- package/cpp/rn-llama.cpp +602 -190
- package/cpp/rn-llama.h +34 -8
- package/cpp/sampling.cpp +57 -50
- package/cpp/tools/mtmd/clip-impl.h +462 -0
- package/cpp/tools/mtmd/clip.cpp +4024 -0
- package/cpp/tools/mtmd/clip.h +101 -0
- package/cpp/tools/mtmd/miniaudio.h +93468 -0
- package/cpp/tools/mtmd/mtmd-audio.cpp +855 -0
- package/cpp/tools/mtmd/mtmd-audio.h +62 -0
- package/cpp/tools/mtmd/mtmd-helper.cpp +297 -0
- package/cpp/tools/mtmd/mtmd.cpp +942 -0
- package/cpp/tools/mtmd/mtmd.h +362 -0
- package/cpp/tools/mtmd/stb_image.h +7988 -0
- package/ios/CMakeLists.txt +20 -10
- package/ios/RNLlama.h +6 -0
- package/ios/RNLlama.mm +82 -3
- package/ios/RNLlamaContext.h +5 -1
- package/ios/RNLlamaContext.mm +131 -38
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/chat.h +2 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/common.h +29 -21
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-backend.h +4 -4
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-cpp.h +1 -1
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-cpu.h +5 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-impl.h +16 -9
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-metal-impl.h +36 -11
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-opt.h +49 -28
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml.h +82 -101
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-arch.h +9 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-batch.h +2 -1
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-chat.h +4 -2
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-context.h +44 -33
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-cparams.h +1 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-graph.h +69 -21
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-hparams.h +39 -5
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-kv-cache.h +410 -108
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-memory.h +12 -1
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-model-saver.h +37 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-model.h +19 -3
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-vocab.h +6 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama.h +86 -142
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/minja/chat-template.hpp +9 -5
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/minja/minja.hpp +69 -36
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/rn-llama.h +34 -8
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Info.plist +0 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/ggml-llama.metallib +0 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/rnllama +0 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/chat.h +2 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/common.h +29 -21
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-backend.h +4 -4
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpp.h +1 -1
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu.h +5 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-impl.h +16 -9
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-metal-impl.h +36 -11
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-opt.h +49 -28
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml.h +82 -101
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-arch.h +9 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-batch.h +2 -1
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-chat.h +4 -2
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-context.h +44 -33
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-cparams.h +1 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-graph.h +69 -21
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-hparams.h +39 -5
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cache.h +410 -108
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-memory.h +12 -1
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-model-saver.h +37 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-model.h +19 -3
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-vocab.h +6 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama.h +86 -142
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/minja/chat-template.hpp +9 -5
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/minja/minja.hpp +69 -36
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/rn-llama.h +34 -8
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Info.plist +0 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/_CodeSignature/CodeResources +1 -1
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/ggml-llama-sim.metallib +0 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/rnllama +0 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/chat.h +2 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/common.h +29 -21
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-backend.h +4 -4
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-cpp.h +1 -1
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-cpu.h +5 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-impl.h +16 -9
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-metal-impl.h +36 -11
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-opt.h +49 -28
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml.h +82 -101
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-arch.h +9 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-batch.h +2 -1
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-chat.h +4 -2
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-context.h +44 -33
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-cparams.h +1 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-graph.h +69 -21
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-hparams.h +39 -5
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-kv-cache.h +410 -108
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-memory.h +12 -1
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-model-saver.h +37 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-model.h +19 -3
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-vocab.h +6 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama.h +86 -142
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/minja/chat-template.hpp +9 -5
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/minja/minja.hpp +69 -36
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/rn-llama.h +34 -8
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Info.plist +0 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/ggml-llama.metallib +0 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/rnllama +0 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/chat.h +2 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/common.h +29 -21
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-backend.h +4 -4
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpp.h +1 -1
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu.h +5 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-impl.h +16 -9
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-metal-impl.h +36 -11
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-opt.h +49 -28
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml.h +82 -101
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-arch.h +9 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-batch.h +2 -1
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-chat.h +4 -2
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-context.h +44 -33
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-cparams.h +1 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-graph.h +69 -21
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-hparams.h +39 -5
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cache.h +410 -108
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-memory.h +12 -1
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-model-saver.h +37 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-model.h +19 -3
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-vocab.h +6 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama.h +86 -142
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/minja/chat-template.hpp +9 -5
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/minja/minja.hpp +69 -36
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/rn-llama.h +34 -8
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Info.plist +0 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/_CodeSignature/CodeResources +1 -1
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/ggml-llama-sim.metallib +0 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/rnllama +0 -0
- package/jest/mock.js +33 -7
- package/lib/commonjs/NativeRNLlama.js.map +1 -1
- package/lib/commonjs/index.js +153 -21
- package/lib/commonjs/index.js.map +1 -1
- package/lib/module/NativeRNLlama.js.map +1 -1
- package/lib/module/index.js +152 -20
- package/lib/module/index.js.map +1 -1
- package/lib/typescript/NativeRNLlama.d.ts +54 -4
- package/lib/typescript/NativeRNLlama.d.ts.map +1 -1
- package/lib/typescript/index.d.ts +72 -6
- package/lib/typescript/index.d.ts.map +1 -1
- package/package.json +1 -1
- package/src/NativeRNLlama.ts +72 -4
- package/src/index.ts +212 -38
- package/cpp/binary-ops.h +0 -16
- package/cpp/ops.h +0 -128
- package/cpp/simd-mappings.h +0 -888
- package/cpp/unary-ops.h +0 -28
- package/cpp/vec.h +0 -802
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/binary-ops.h +0 -16
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-cpu-aarch64.h +0 -8
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-cpu-impl.h +0 -512
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-cpu-quants.h +0 -63
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-cpu-traits.h +0 -38
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ops.h +0 -128
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/sgemm.h +0 -14
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/simd-mappings.h +0 -888
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/vec.h +0 -802
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu-aarch64.h +0 -8
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu-impl.h +0 -512
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu-quants.h +0 -63
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu-traits.h +0 -38
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/sgemm.h +0 -14
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/unary-ops.h +0 -28
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/vec.h +0 -802
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/binary-ops.h +0 -16
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-cpu-aarch64.h +0 -8
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-cpu-impl.h +0 -512
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-cpu-quants.h +0 -63
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-cpu-traits.h +0 -38
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ops.h +0 -128
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/sgemm.h +0 -14
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/simd-mappings.h +0 -888
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/unary-ops.h +0 -28
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/binary-ops.h +0 -16
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu-aarch64.h +0 -8
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu-impl.h +0 -512
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu-quants.h +0 -63
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu-traits.h +0 -38
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ops.h +0 -128
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/sgemm.h +0 -14
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/simd-mappings.h +0 -888
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/unary-ops.h +0 -28
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/vec.h +0 -802
- package/lib/commonjs/chat.js +0 -37
- package/lib/commonjs/chat.js.map +0 -1
- package/lib/module/chat.js +0 -33
- package/lib/module/chat.js.map +0 -1
- package/lib/typescript/chat.d.ts +0 -10
- package/lib/typescript/chat.d.ts.map +0 -1
- package/src/chat.ts +0 -44
- /package/cpp/{binary-ops.cpp → ggml-cpu/binary-ops.cpp} +0 -0
- /package/cpp/{ggml-cpu-aarch64.h → ggml-cpu/ggml-cpu-aarch64.h} +0 -0
- /package/cpp/{ggml-cpu-impl.h → ggml-cpu/ggml-cpu-impl.h} +0 -0
- /package/cpp/{ggml-cpu-quants.h → ggml-cpu/ggml-cpu-quants.h} +0 -0
- /package/cpp/{ggml-cpu-traits.cpp → ggml-cpu/ggml-cpu-traits.cpp} +0 -0
- /package/cpp/{ggml-cpu-traits.h → ggml-cpu/ggml-cpu-traits.h} +0 -0
- /package/cpp/{sgemm.h → ggml-cpu/sgemm.h} +0 -0
- /package/cpp/{unary-ops.cpp → ggml-cpu/unary-ops.cpp} +0 -0
@@ -8,19 +8,6 @@
|
|
8
8
|
|
9
9
|
#include <float.h>
|
10
10
|
|
11
|
-
#if defined(_MSC_VER)
|
12
|
-
// disable "possible loss of data" to avoid hundreds of casts
|
13
|
-
// we should just be careful :)
|
14
|
-
#pragma warning(disable: 4244 4267)
|
15
|
-
|
16
|
-
// disable POSIX deprecation warnings
|
17
|
-
// these functions are never going away, anyway
|
18
|
-
#pragma warning(disable: 4996)
|
19
|
-
|
20
|
-
// unreachable code because of multiple instances of code after LM_GGML_ABORT
|
21
|
-
#pragma warning(disable: 4702)
|
22
|
-
#endif
|
23
|
-
|
24
11
|
// lm_ggml_compute_forward_dup
|
25
12
|
|
26
13
|
static void lm_ggml_compute_forward_dup_same_cont(
|
@@ -2704,6 +2691,109 @@ static void lm_ggml_compute_forward_gelu(
|
|
2704
2691
|
}
|
2705
2692
|
}
|
2706
2693
|
|
2694
|
+
// lm_ggml_compute_forward_gelu_erf
|
2695
|
+
|
2696
|
+
static void lm_ggml_compute_forward_gelu_erf_f32(
|
2697
|
+
const lm_ggml_compute_params * params,
|
2698
|
+
lm_ggml_tensor * dst) {
|
2699
|
+
|
2700
|
+
const lm_ggml_tensor * src0 = dst->src[0];
|
2701
|
+
|
2702
|
+
assert(lm_ggml_is_contiguous_1(src0));
|
2703
|
+
assert(lm_ggml_is_contiguous_1(dst));
|
2704
|
+
assert(lm_ggml_are_same_shape(src0, dst));
|
2705
|
+
|
2706
|
+
const int ith = params->ith;
|
2707
|
+
const int nth = params->nth;
|
2708
|
+
|
2709
|
+
const int nc = src0->ne[0];
|
2710
|
+
const int nr = lm_ggml_nrows(src0);
|
2711
|
+
|
2712
|
+
// rows per thread
|
2713
|
+
const int dr = (nr + nth - 1)/nth;
|
2714
|
+
|
2715
|
+
// row range for this thread
|
2716
|
+
const int ir0 = dr*ith;
|
2717
|
+
const int ir1 = MIN(ir0 + dr, nr);
|
2718
|
+
|
2719
|
+
for (int i1 = ir0; i1 < ir1; i1++) {
|
2720
|
+
lm_ggml_vec_gelu_erf_f32(nc,
|
2721
|
+
(float *) ((char *) dst->data + i1*( dst->nb[1])),
|
2722
|
+
(float *) ((char *) src0->data + i1*(src0->nb[1])));
|
2723
|
+
|
2724
|
+
#ifndef NDEBUG
|
2725
|
+
for (int k = 0; k < nc; k++) {
|
2726
|
+
const float x = ((float *) ((char *) dst->data + i1*( dst->nb[1])))[k];
|
2727
|
+
LM_GGML_UNUSED(x);
|
2728
|
+
assert(!isnan(x));
|
2729
|
+
assert(!isinf(x));
|
2730
|
+
}
|
2731
|
+
#endif
|
2732
|
+
}
|
2733
|
+
}
|
2734
|
+
|
2735
|
+
static void lm_ggml_compute_forward_gelu_erf_f16(
|
2736
|
+
const lm_ggml_compute_params * params,
|
2737
|
+
lm_ggml_tensor * dst) {
|
2738
|
+
|
2739
|
+
const lm_ggml_tensor * src0 = dst->src[0];
|
2740
|
+
|
2741
|
+
assert(lm_ggml_is_contiguous_1(src0));
|
2742
|
+
assert(lm_ggml_is_contiguous_1(dst));
|
2743
|
+
assert(lm_ggml_are_same_shape(src0, dst));
|
2744
|
+
|
2745
|
+
const int ith = params->ith;
|
2746
|
+
const int nth = params->nth;
|
2747
|
+
|
2748
|
+
const int nc = src0->ne[0];
|
2749
|
+
const int nr = lm_ggml_nrows(src0);
|
2750
|
+
|
2751
|
+
// rows per thread
|
2752
|
+
const int dr = (nr + nth - 1)/nth;
|
2753
|
+
|
2754
|
+
// row range for this thread
|
2755
|
+
const int ir0 = dr*ith;
|
2756
|
+
const int ir1 = MIN(ir0 + dr, nr);
|
2757
|
+
|
2758
|
+
for (int i1 = ir0; i1 < ir1; i1++) {
|
2759
|
+
lm_ggml_vec_gelu_erf_f16(nc,
|
2760
|
+
(lm_ggml_fp16_t *) ((char *) dst->data + i1*( dst->nb[1])),
|
2761
|
+
(lm_ggml_fp16_t *) ((char *) src0->data + i1*(src0->nb[1])));
|
2762
|
+
|
2763
|
+
#ifndef NDEBUG
|
2764
|
+
for (int k = 0; k < nc; k++) {
|
2765
|
+
const lm_ggml_fp16_t x = ((lm_ggml_fp16_t *) ((char *) dst->data + i1*( dst->nb[1])))[k];
|
2766
|
+
const float v = LM_GGML_FP16_TO_FP32(x);
|
2767
|
+
LM_GGML_UNUSED(v);
|
2768
|
+
assert(!isnan(v));
|
2769
|
+
assert(!isinf(v));
|
2770
|
+
}
|
2771
|
+
#endif
|
2772
|
+
}
|
2773
|
+
}
|
2774
|
+
|
2775
|
+
static void lm_ggml_compute_forward_gelu_erf(
|
2776
|
+
const lm_ggml_compute_params * params,
|
2777
|
+
lm_ggml_tensor * dst) {
|
2778
|
+
|
2779
|
+
const lm_ggml_tensor * src0 = dst->src[0];
|
2780
|
+
|
2781
|
+
switch (src0->type) {
|
2782
|
+
case LM_GGML_TYPE_F32:
|
2783
|
+
{
|
2784
|
+
lm_ggml_compute_forward_gelu_erf_f32(params, dst);
|
2785
|
+
} break;
|
2786
|
+
case LM_GGML_TYPE_F16:
|
2787
|
+
{
|
2788
|
+
lm_ggml_compute_forward_gelu_erf_f16(params, dst);
|
2789
|
+
} break;
|
2790
|
+
default:
|
2791
|
+
{
|
2792
|
+
LM_GGML_ABORT("fatal error");
|
2793
|
+
}
|
2794
|
+
}
|
2795
|
+
}
|
2796
|
+
|
2707
2797
|
// lm_ggml_compute_forward_gelu_quick
|
2708
2798
|
|
2709
2799
|
static void lm_ggml_compute_forward_gelu_quick_f32(
|
@@ -4222,7 +4312,7 @@ static void lm_ggml_compute_forward_get_rows_f16(
|
|
4222
4312
|
|
4223
4313
|
LM_GGML_ASSERT(i01 >= 0 && i01 < ne01);
|
4224
4314
|
|
4225
|
-
|
4315
|
+
lm_ggml_cpu_fp16_to_fp32(
|
4226
4316
|
(const lm_ggml_fp16_t*) ((char *) src0->data + i01*nb01 + i11*nb02 + i12*nb03),
|
4227
4317
|
(float *) ((char *) dst->data + i10*nb1 + i11*nb2 + i12*nb3), nc);
|
4228
4318
|
}
|
@@ -4263,7 +4353,7 @@ static void lm_ggml_compute_forward_get_rows_bf16(
|
|
4263
4353
|
|
4264
4354
|
LM_GGML_ASSERT(i01 >= 0 && i01 < ne01);
|
4265
4355
|
|
4266
|
-
|
4356
|
+
lm_ggml_cpu_bf16_to_fp32(
|
4267
4357
|
(const lm_ggml_bf16_t *) ((char *) src0->data + i01*nb01 + i11*nb02 + i12*nb03),
|
4268
4358
|
(float *) ((char *) dst->data + i10*nb1 + i11*nb2 + i12*nb3), nc);
|
4269
4359
|
}
|
@@ -6064,6 +6154,178 @@ void lm_ggml_compute_forward_conv_transpose_2d(
|
|
6064
6154
|
}
|
6065
6155
|
}
|
6066
6156
|
|
6157
|
+
// lm_ggml_compute_forward_conv_2d_dw
|
6158
|
+
|
6159
|
+
struct lm_ggml_conv_2d_dw_params {
|
6160
|
+
int64_t channels;
|
6161
|
+
int64_t batch;
|
6162
|
+
int64_t src_w;
|
6163
|
+
int64_t src_h;
|
6164
|
+
int64_t dst_w;
|
6165
|
+
int64_t dst_h;
|
6166
|
+
int64_t knl_w;
|
6167
|
+
int64_t knl_h;
|
6168
|
+
int stride_x;
|
6169
|
+
int stride_y;
|
6170
|
+
int pad_x;
|
6171
|
+
int pad_y;
|
6172
|
+
int dilation_x;
|
6173
|
+
int dilation_y;
|
6174
|
+
};
|
6175
|
+
|
6176
|
+
static void lm_ggml_compute_forward_conv_2d_dw_cwhn(
|
6177
|
+
const lm_ggml_compute_params * params,
|
6178
|
+
const lm_ggml_tensor * src,
|
6179
|
+
const lm_ggml_tensor * kernel,
|
6180
|
+
lm_ggml_tensor * dst,
|
6181
|
+
const lm_ggml_conv_2d_dw_params & p) {
|
6182
|
+
|
6183
|
+
const int64_t c = p.channels;
|
6184
|
+
const float * knl_data = (const float *)kernel->data;
|
6185
|
+
|
6186
|
+
const int64_t rows_total = p.dst_h * p.batch;
|
6187
|
+
const int64_t rows_per_thread = (rows_total + params->nth - 1) / params->nth;
|
6188
|
+
const int64_t row_start = params->ith * rows_per_thread;
|
6189
|
+
const int64_t row_end = MIN(row_start + rows_per_thread, rows_total);
|
6190
|
+
|
6191
|
+
#ifdef LM_GGML_SIMD
|
6192
|
+
const int64_t pkg_size = LM_GGML_F32_EPR;
|
6193
|
+
const int64_t pkg_count = c / pkg_size;
|
6194
|
+
const int64_t c_pkg_end = pkg_count * pkg_size;
|
6195
|
+
#else
|
6196
|
+
const int64_t c_pkg_end = 0;
|
6197
|
+
#endif
|
6198
|
+
|
6199
|
+
for (int64_t row = row_start; row < row_end; ++row) {
|
6200
|
+
const int64_t dst_y = row % p.dst_h;
|
6201
|
+
const float * src_data = (const float *)src->data + (row / p.dst_h) * p.src_w * p.src_h * c;
|
6202
|
+
for (int64_t dst_x = 0; dst_x < p.dst_w; ++dst_x) {
|
6203
|
+
float * dst_data = (float *)dst->data + (row * p.dst_w + dst_x) * c;
|
6204
|
+
const int64_t src_y_base = dst_y * p.stride_y - p.pad_y;
|
6205
|
+
const int64_t src_x_base = dst_x * p.stride_x - p.pad_x;
|
6206
|
+
|
6207
|
+
#ifdef LM_GGML_SIMD
|
6208
|
+
// Vectorized loop
|
6209
|
+
for (int64_t c_i = 0; c_i < c_pkg_end; c_i += pkg_size) {
|
6210
|
+
LM_GGML_F32_VEC sum = LM_GGML_F32_VEC_ZERO;
|
6211
|
+
for (int64_t knl_y = 0; knl_y < p.knl_h; ++knl_y) {
|
6212
|
+
const int64_t src_y = src_y_base + knl_y * p.dilation_y;
|
6213
|
+
if (src_y < 0 || src_y >= p.src_h) {
|
6214
|
+
continue;
|
6215
|
+
}
|
6216
|
+
for (int64_t knl_x = 0; knl_x < p.knl_w; ++knl_x) {
|
6217
|
+
const int64_t src_x = src_x_base + knl_x * p.dilation_x;
|
6218
|
+
if (src_x < 0 || src_x >= p.src_w) {
|
6219
|
+
continue;
|
6220
|
+
}
|
6221
|
+
LM_GGML_F32_VEC k = LM_GGML_F32_VEC_LOAD(knl_data + (knl_y * p.knl_w + knl_x) * c + c_i);
|
6222
|
+
LM_GGML_F32_VEC s = LM_GGML_F32_VEC_LOAD(src_data + (src_y * p.src_w + src_x) * c + c_i);
|
6223
|
+
sum = LM_GGML_F32_VEC_FMA(sum, k, s);
|
6224
|
+
}
|
6225
|
+
}
|
6226
|
+
LM_GGML_F32_VEC_STORE(dst_data + c_i, sum);
|
6227
|
+
}
|
6228
|
+
#endif
|
6229
|
+
// Scalar loop
|
6230
|
+
for (int64_t c_i = c_pkg_end; c_i < c; ++c_i) {
|
6231
|
+
float sum = 0.0f;
|
6232
|
+
for (int64_t knl_y = 0; knl_y < p.knl_h; ++knl_y) {
|
6233
|
+
const int64_t src_y = src_y_base + knl_y * p.dilation_y;
|
6234
|
+
if (src_y < 0 || src_y >= p.src_h) {
|
6235
|
+
continue;
|
6236
|
+
}
|
6237
|
+
for (int64_t knl_x = 0; knl_x < p.knl_w; ++knl_x) {
|
6238
|
+
const int64_t src_x = src_x_base + knl_x * p.dilation_x;
|
6239
|
+
if (src_x < 0 || src_x >= p.src_w) {
|
6240
|
+
continue;
|
6241
|
+
}
|
6242
|
+
sum += knl_data[(knl_y * p.knl_w + knl_x) * c + c_i]
|
6243
|
+
* src_data[(src_y * p.src_w + src_x) * c + c_i];
|
6244
|
+
}
|
6245
|
+
}
|
6246
|
+
dst_data[c_i] = sum;
|
6247
|
+
}
|
6248
|
+
}
|
6249
|
+
}
|
6250
|
+
}
|
6251
|
+
|
6252
|
+
static void lm_ggml_compute_forward_conv_2d_dw_whcn(
|
6253
|
+
const lm_ggml_compute_params * params,
|
6254
|
+
const lm_ggml_tensor * src,
|
6255
|
+
const lm_ggml_tensor * kernel,
|
6256
|
+
lm_ggml_tensor * dst,
|
6257
|
+
const lm_ggml_conv_2d_dw_params & p) {
|
6258
|
+
|
6259
|
+
const int64_t n = p.channels * p.batch;
|
6260
|
+
const int64_t per_thread = (n + params->nth - 1) / params->nth;
|
6261
|
+
const int64_t start = params->ith * per_thread;
|
6262
|
+
const int64_t end = MIN(start + per_thread, n);
|
6263
|
+
|
6264
|
+
for (int64_t i = start; i < end; ++i) {
|
6265
|
+
const float * knl_data = (const float *)kernel->data + (i % p.channels) * p.knl_w * p.knl_h;
|
6266
|
+
const float * src_data = (const float *)src->data + i * p.src_w * p.src_h;
|
6267
|
+
float * dst_data = (float *)dst->data + i * p.dst_w * p.dst_h;
|
6268
|
+
|
6269
|
+
for (int64_t dst_y = 0; dst_y < p.dst_h; ++dst_y) {
|
6270
|
+
for (int64_t dst_x = 0; dst_x < p.dst_w; ++dst_x) {
|
6271
|
+
|
6272
|
+
float sum = 0.0f;
|
6273
|
+
for (int64_t knl_y = 0; knl_y < p.knl_h; ++knl_y) {
|
6274
|
+
const int64_t src_y = dst_y * p.stride_y + knl_y * p.dilation_y - p.pad_y;
|
6275
|
+
if (src_y < 0 || src_y >= p.src_h) {
|
6276
|
+
continue;
|
6277
|
+
}
|
6278
|
+
for (int64_t knl_x = 0; knl_x < p.knl_w; ++knl_x) {
|
6279
|
+
const int64_t src_x = dst_x * p.stride_x + knl_x * p.dilation_x - p.pad_x;
|
6280
|
+
if (src_x < 0 || src_x >= p.src_w) {
|
6281
|
+
continue;
|
6282
|
+
}
|
6283
|
+
sum += knl_data[knl_y * p.knl_w + knl_x]
|
6284
|
+
* src_data[src_y * p.src_w + src_x];
|
6285
|
+
}
|
6286
|
+
}
|
6287
|
+
dst_data[dst_y * p.dst_w + dst_x] = sum;
|
6288
|
+
}
|
6289
|
+
}
|
6290
|
+
}
|
6291
|
+
}
|
6292
|
+
|
6293
|
+
void lm_ggml_compute_forward_conv_2d_dw(
|
6294
|
+
const lm_ggml_compute_params * params,
|
6295
|
+
lm_ggml_tensor * dst) {
|
6296
|
+
|
6297
|
+
const lm_ggml_tensor * kernel = dst->src[0];
|
6298
|
+
const lm_ggml_tensor * src = dst->src[1];
|
6299
|
+
lm_ggml_conv_2d_dw_params p;
|
6300
|
+
p.channels = src->ne[2];
|
6301
|
+
p.batch = src->ne[3];
|
6302
|
+
p.src_w = src->ne[0];
|
6303
|
+
p.src_h = src->ne[1];
|
6304
|
+
p.dst_w = dst->ne[0];
|
6305
|
+
p.dst_h = dst->ne[1];
|
6306
|
+
p.knl_w = kernel->ne[0];
|
6307
|
+
p.knl_h = kernel->ne[1];
|
6308
|
+
p.stride_x = dst->op_params[0];
|
6309
|
+
p.stride_y = dst->op_params[1];
|
6310
|
+
p.pad_x = dst->op_params[2];
|
6311
|
+
p.pad_y = dst->op_params[3];
|
6312
|
+
p.dilation_x = dst->op_params[4];
|
6313
|
+
p.dilation_y = dst->op_params[5];
|
6314
|
+
|
6315
|
+
LM_GGML_ASSERT(kernel->ne[3] == p.channels);
|
6316
|
+
LM_GGML_ASSERT(dst->ne[3] == p.batch);
|
6317
|
+
|
6318
|
+
if (lm_ggml_is_contiguous(src)) {
|
6319
|
+
lm_ggml_compute_forward_conv_2d_dw_whcn(params, src, kernel, dst, p);
|
6320
|
+
} else if (lm_ggml_is_contiguous_channels(src)) {
|
6321
|
+
// kernel should also have channels most contiguous in memory
|
6322
|
+
LM_GGML_ASSERT(kernel->nb[0] >= kernel->nb[2] && kernel->nb[1] >= kernel->nb[0]);
|
6323
|
+
lm_ggml_compute_forward_conv_2d_dw_cwhn(params, src, kernel, dst, p);
|
6324
|
+
} else {
|
6325
|
+
LM_GGML_ABORT("non-contiguous memory layout not supported");
|
6326
|
+
}
|
6327
|
+
}
|
6328
|
+
|
6067
6329
|
// lm_ggml_compute_forward_pool_1d_sk_p0
|
6068
6330
|
|
6069
6331
|
static void lm_ggml_compute_forward_pool_1d_sk_p0(
|
@@ -6351,24 +6613,72 @@ static void lm_ggml_compute_forward_upscale_f32(
|
|
6351
6613
|
const float sf2 = (float)ne2/src0->ne[2];
|
6352
6614
|
const float sf3 = (float)ne3/src0->ne[3];
|
6353
6615
|
|
6354
|
-
|
6616
|
+
const lm_ggml_scale_mode mode = (lm_ggml_scale_mode) lm_ggml_get_op_params_i32(dst, 0);
|
6355
6617
|
|
6356
|
-
|
6357
|
-
|
6358
|
-
|
6359
|
-
|
6360
|
-
|
6361
|
-
|
6362
|
-
|
6363
|
-
|
6618
|
+
if (mode == LM_GGML_SCALE_MODE_NEAREST) {
|
6619
|
+
for (int64_t i3 = 0; i3 < ne3; i3++) {
|
6620
|
+
const int64_t i03 = i3 / sf3;
|
6621
|
+
for (int64_t i2 = ith; i2 < ne2; i2 += nth) {
|
6622
|
+
const int64_t i02 = i2 / sf2;
|
6623
|
+
for (int64_t i1 = 0; i1 < ne1; i1++) {
|
6624
|
+
const int64_t i01 = i1 / sf1;
|
6625
|
+
for (int64_t i0 = 0; i0 < ne0; i0++) {
|
6626
|
+
const int64_t i00 = i0 / sf0;
|
6364
6627
|
|
6365
|
-
|
6366
|
-
|
6628
|
+
const float * x = (float *)((char *) src0->data + i00*nb00 + i01*nb01 + i02*nb02 + i03*nb03);
|
6629
|
+
float * y = (float *)((char *) dst->data + i0*nb0 + i1*nb1 + i2*nb2 + i3*nb3);
|
6367
6630
|
|
6368
|
-
|
6631
|
+
*y = *x;
|
6632
|
+
}
|
6369
6633
|
}
|
6370
6634
|
}
|
6371
6635
|
}
|
6636
|
+
} else if (mode == LM_GGML_SCALE_MODE_BILINEAR) {
|
6637
|
+
// setting a pixel offset of 0 would replicate the behavior of pytorch interpolate with align_corners=True
|
6638
|
+
const float pixel_offset = 0.5f;
|
6639
|
+
|
6640
|
+
for (int64_t i3 = 0; i3 < ne3; i3++) {
|
6641
|
+
const int64_t i03 = i3 / sf3;
|
6642
|
+
for (int64_t i2 = ith; i2 < ne2; i2 += nth) {
|
6643
|
+
const int64_t i02 = i2 / sf2;
|
6644
|
+
for (int64_t i1 = 0; i1 < ne1; i1++) {
|
6645
|
+
const float y = ((float)i1 + pixel_offset) / sf1 - pixel_offset;
|
6646
|
+
int64_t y0 = (int64_t)floorf(y);
|
6647
|
+
int64_t y1 = y0 + 1;
|
6648
|
+
|
6649
|
+
y0 = std::max(int64_t(0), std::min(y0, ne01 - 1));
|
6650
|
+
y1 = std::max(int64_t(0), std::min(y1, ne01 - 1));
|
6651
|
+
|
6652
|
+
float dy = y - (float)y0;
|
6653
|
+
dy = std::max(0.0f, std::min(dy, 1.0f));
|
6654
|
+
|
6655
|
+
for (int64_t i0 = 0; i0 < ne0; i0++) {
|
6656
|
+
const float x = ((float)i0 + pixel_offset) / sf0 - pixel_offset;
|
6657
|
+
int64_t x0 = (int64_t)floorf(x);
|
6658
|
+
int64_t x1 = x0 + 1;
|
6659
|
+
|
6660
|
+
x0 = std::max(int64_t(0), std::min(x0, ne00 - 1));
|
6661
|
+
x1 = std::max(int64_t(0), std::min(x1, ne00 - 1));
|
6662
|
+
|
6663
|
+
float dx = x - (float)x0;
|
6664
|
+
dx = std::max(0.0f, std::min(dx, 1.0f));
|
6665
|
+
|
6666
|
+
// fetch the four surrounding pixel values and interpolate
|
6667
|
+
const float a = *(const float *)((const char *)src0->data + x0*nb00 + y0*nb01 + i02*nb02 + i03*nb03);
|
6668
|
+
const float b = *(const float *)((const char *)src0->data + x1*nb00 + y0*nb01 + i02*nb02 + i03*nb03);
|
6669
|
+
const float c = *(const float *)((const char *)src0->data + x0*nb00 + y1*nb01 + i02*nb02 + i03*nb03);
|
6670
|
+
const float d = *(const float *)((const char *)src0->data + x1*nb00 + y1*nb01 + i02*nb02 + i03*nb03);
|
6671
|
+
|
6672
|
+
const float val = a*(1 - dx)*(1 - dy) + b*dx*(1 - dy) + c*(1 - dx)*dy + d*dx*dy;
|
6673
|
+
|
6674
|
+
float * y_dst = (float *)((char *)dst->data + i0*nb0 + i1*nb1 + i2*nb2 + i3*nb3);
|
6675
|
+
*y_dst = val;
|
6676
|
+
}
|
6677
|
+
}
|
6678
|
+
}
|
6679
|
+
}
|
6680
|
+
} else {
|
6681
|
+
LM_GGML_ABORT("unsupported upscale mode");
|
6372
6682
|
}
|
6373
6683
|
}
|
6374
6684
|
|
@@ -7542,6 +7852,10 @@ void lm_ggml_compute_forward_unary(
|
|
7542
7852
|
{
|
7543
7853
|
lm_ggml_compute_forward_gelu(params, dst);
|
7544
7854
|
} break;
|
7855
|
+
case LM_GGML_UNARY_OP_GELU_ERF:
|
7856
|
+
{
|
7857
|
+
lm_ggml_compute_forward_gelu_erf(params, dst);
|
7858
|
+
} break;
|
7545
7859
|
case LM_GGML_UNARY_OP_GELU_QUICK:
|
7546
7860
|
{
|
7547
7861
|
lm_ggml_compute_forward_gelu_quick(params, dst);
|
@@ -8268,152 +8582,6 @@ void lm_ggml_compute_forward_rwkv_wkv7(
|
|
8268
8582
|
}
|
8269
8583
|
}
|
8270
8584
|
|
8271
|
-
// lm_ggml_compute_forward_map_unary
|
8272
|
-
|
8273
|
-
static void lm_ggml_compute_forward_map_unary_f32(
|
8274
|
-
const lm_ggml_compute_params * params,
|
8275
|
-
lm_ggml_tensor * dst,
|
8276
|
-
const lm_ggml_unary_op_f32_t fun) {
|
8277
|
-
|
8278
|
-
const lm_ggml_tensor * src0 = dst->src[0];
|
8279
|
-
|
8280
|
-
if (params->ith != 0) {
|
8281
|
-
return;
|
8282
|
-
}
|
8283
|
-
|
8284
|
-
assert(lm_ggml_is_contiguous_1(src0));
|
8285
|
-
assert(lm_ggml_is_contiguous_1(dst));
|
8286
|
-
assert(lm_ggml_are_same_shape(src0, dst));
|
8287
|
-
|
8288
|
-
const int n = lm_ggml_nrows(src0);
|
8289
|
-
const int nc = src0->ne[0];
|
8290
|
-
|
8291
|
-
for (int i = 0; i < n; i++) {
|
8292
|
-
fun(nc,
|
8293
|
-
(float *) ((char *) dst->data + i*( dst->nb[1])),
|
8294
|
-
(float *) ((char *) src0->data + i*(src0->nb[1])));
|
8295
|
-
}
|
8296
|
-
}
|
8297
|
-
|
8298
|
-
void lm_ggml_compute_forward_map_unary(
|
8299
|
-
const lm_ggml_compute_params * params,
|
8300
|
-
lm_ggml_tensor * dst,
|
8301
|
-
const lm_ggml_unary_op_f32_t fun) {
|
8302
|
-
|
8303
|
-
const lm_ggml_tensor * src0 = dst->src[0];
|
8304
|
-
|
8305
|
-
switch (src0->type) {
|
8306
|
-
case LM_GGML_TYPE_F32:
|
8307
|
-
{
|
8308
|
-
lm_ggml_compute_forward_map_unary_f32(params, dst, fun);
|
8309
|
-
} break;
|
8310
|
-
default:
|
8311
|
-
{
|
8312
|
-
LM_GGML_ABORT("fatal error");
|
8313
|
-
}
|
8314
|
-
}
|
8315
|
-
}
|
8316
|
-
|
8317
|
-
// lm_ggml_compute_forward_map_binary
|
8318
|
-
|
8319
|
-
static void lm_ggml_compute_forward_map_binary_f32(
|
8320
|
-
const lm_ggml_compute_params * params,
|
8321
|
-
lm_ggml_tensor * dst,
|
8322
|
-
const lm_ggml_binary_op_f32_t fun) {
|
8323
|
-
|
8324
|
-
const lm_ggml_tensor * src0 = dst->src[0];
|
8325
|
-
const lm_ggml_tensor * src1 = dst->src[1];
|
8326
|
-
|
8327
|
-
if (params->ith != 0) {
|
8328
|
-
return;
|
8329
|
-
}
|
8330
|
-
|
8331
|
-
assert(lm_ggml_is_contiguous_1(src0));
|
8332
|
-
assert(lm_ggml_is_contiguous_1(src1));
|
8333
|
-
assert(lm_ggml_is_contiguous_1(dst));
|
8334
|
-
assert(lm_ggml_are_same_shape(src0, src1) && lm_ggml_are_same_shape(src0, dst));
|
8335
|
-
|
8336
|
-
const int n = lm_ggml_nrows(src0);
|
8337
|
-
const int nc = src0->ne[0];
|
8338
|
-
|
8339
|
-
for (int i = 0; i < n; i++) {
|
8340
|
-
fun(nc,
|
8341
|
-
(float *) ((char *) dst->data + i*( dst->nb[1])),
|
8342
|
-
(float *) ((char *) src0->data + i*(src0->nb[1])),
|
8343
|
-
(float *) ((char *) src1->data + i*(src1->nb[1])));
|
8344
|
-
}
|
8345
|
-
}
|
8346
|
-
|
8347
|
-
void lm_ggml_compute_forward_map_binary(
|
8348
|
-
const lm_ggml_compute_params * params,
|
8349
|
-
lm_ggml_tensor * dst,
|
8350
|
-
const lm_ggml_binary_op_f32_t fun) {
|
8351
|
-
|
8352
|
-
const lm_ggml_tensor * src0 = dst->src[0];
|
8353
|
-
|
8354
|
-
switch (src0->type) {
|
8355
|
-
case LM_GGML_TYPE_F32:
|
8356
|
-
{
|
8357
|
-
lm_ggml_compute_forward_map_binary_f32(params, dst, fun);
|
8358
|
-
} break;
|
8359
|
-
default:
|
8360
|
-
{
|
8361
|
-
LM_GGML_ABORT("fatal error");
|
8362
|
-
}
|
8363
|
-
}
|
8364
|
-
}
|
8365
|
-
|
8366
|
-
// lm_ggml_compute_forward_map_custom1
|
8367
|
-
|
8368
|
-
void lm_ggml_compute_forward_map_custom1_f32(
|
8369
|
-
const lm_ggml_compute_params * params,
|
8370
|
-
lm_ggml_tensor * dst,
|
8371
|
-
const lm_ggml_custom1_op_f32_t fun) {
|
8372
|
-
|
8373
|
-
const lm_ggml_tensor * a = dst->src[0];
|
8374
|
-
|
8375
|
-
if (params->ith != 0) {
|
8376
|
-
return;
|
8377
|
-
}
|
8378
|
-
|
8379
|
-
fun(dst, a);
|
8380
|
-
}
|
8381
|
-
|
8382
|
-
// lm_ggml_compute_forward_map_custom2
|
8383
|
-
|
8384
|
-
void lm_ggml_compute_forward_map_custom2_f32(
|
8385
|
-
const lm_ggml_compute_params * params,
|
8386
|
-
lm_ggml_tensor * dst,
|
8387
|
-
const lm_ggml_custom2_op_f32_t fun) {
|
8388
|
-
|
8389
|
-
const lm_ggml_tensor * a = dst->src[0];
|
8390
|
-
const lm_ggml_tensor * b = dst->src[1];
|
8391
|
-
|
8392
|
-
if (params->ith != 0) {
|
8393
|
-
return;
|
8394
|
-
}
|
8395
|
-
|
8396
|
-
fun(dst, a, b);
|
8397
|
-
}
|
8398
|
-
|
8399
|
-
// lm_ggml_compute_forward_map_custom3
|
8400
|
-
|
8401
|
-
void lm_ggml_compute_forward_map_custom3_f32(
|
8402
|
-
const lm_ggml_compute_params * params,
|
8403
|
-
lm_ggml_tensor * dst,
|
8404
|
-
const lm_ggml_custom3_op_f32_t fun) {
|
8405
|
-
|
8406
|
-
const lm_ggml_tensor * a = dst->src[0];
|
8407
|
-
const lm_ggml_tensor * b = dst->src[1];
|
8408
|
-
const lm_ggml_tensor * c = dst->src[1];
|
8409
|
-
|
8410
|
-
if (params->ith != 0) {
|
8411
|
-
return;
|
8412
|
-
}
|
8413
|
-
|
8414
|
-
fun(dst, a, b, c);
|
8415
|
-
}
|
8416
|
-
|
8417
8585
|
// lm_ggml_compute_forward_map_custom1
|
8418
8586
|
|
8419
8587
|
void lm_ggml_compute_forward_map_custom1(
|
@@ -8459,6 +8627,18 @@ void lm_ggml_compute_forward_map_custom3(
|
|
8459
8627
|
p.fun(dst, a, b, c, params->ith, params->nth, p.userdata);
|
8460
8628
|
}
|
8461
8629
|
|
8630
|
+
// lm_ggml_compute_forward_custom
|
8631
|
+
|
8632
|
+
void lm_ggml_compute_forward_custom(
|
8633
|
+
const struct lm_ggml_compute_params * params,
|
8634
|
+
struct lm_ggml_tensor * dst) {
|
8635
|
+
|
8636
|
+
struct lm_ggml_custom_op_params p;
|
8637
|
+
memcpy(&p, dst->op_params, sizeof(p));
|
8638
|
+
|
8639
|
+
p.fun(dst, params->ith, params->nth, p.userdata);
|
8640
|
+
}
|
8641
|
+
|
8462
8642
|
// lm_ggml_compute_forward_cross_entropy_loss
|
8463
8643
|
|
8464
8644
|
static void lm_ggml_compute_forward_cross_entropy_loss_f32(
|
@@ -65,6 +65,7 @@ void lm_ggml_compute_forward_conv_transpose_1d(const struct lm_ggml_compute_para
|
|
65
65
|
void lm_ggml_compute_forward_im2col(const struct lm_ggml_compute_params * params, struct lm_ggml_tensor * dst);
|
66
66
|
void lm_ggml_compute_forward_im2col_back_f32(const struct lm_ggml_compute_params * params, struct lm_ggml_tensor * dst);
|
67
67
|
void lm_ggml_compute_forward_conv_transpose_2d(const struct lm_ggml_compute_params * params, struct lm_ggml_tensor * dst);
|
68
|
+
void lm_ggml_compute_forward_conv_2d_dw(const struct lm_ggml_compute_params * params, struct lm_ggml_tensor * dst);
|
68
69
|
void lm_ggml_compute_forward_pool_1d(const struct lm_ggml_compute_params * params, struct lm_ggml_tensor * dst);
|
69
70
|
void lm_ggml_compute_forward_pool_2d(const struct lm_ggml_compute_params * params, struct lm_ggml_tensor * dst);
|
70
71
|
void lm_ggml_compute_forward_pool_2d_back(const struct lm_ggml_compute_params * params, struct lm_ggml_tensor * dst);
|
@@ -96,29 +97,10 @@ void lm_ggml_compute_forward_add_rel_pos(const struct lm_ggml_compute_params * p
|
|
96
97
|
void lm_ggml_compute_forward_rwkv_wkv6(const struct lm_ggml_compute_params * params, struct lm_ggml_tensor * dst);
|
97
98
|
void lm_ggml_compute_forward_rwkv_wkv7(const struct lm_ggml_compute_params * params, struct lm_ggml_tensor * dst);
|
98
99
|
void lm_ggml_compute_forward_gla(const struct lm_ggml_compute_params * params, struct lm_ggml_tensor * dst);
|
99
|
-
void lm_ggml_compute_forward_map_unary(
|
100
|
-
const struct lm_ggml_compute_params * params,
|
101
|
-
struct lm_ggml_tensor * dst,
|
102
|
-
const lm_ggml_unary_op_f32_t fun);
|
103
|
-
void lm_ggml_compute_forward_map_binary(
|
104
|
-
const struct lm_ggml_compute_params * params,
|
105
|
-
struct lm_ggml_tensor * dst,
|
106
|
-
const lm_ggml_binary_op_f32_t fun);
|
107
|
-
void lm_ggml_compute_forward_map_custom1_f32(
|
108
|
-
const struct lm_ggml_compute_params * params,
|
109
|
-
struct lm_ggml_tensor * dst,
|
110
|
-
const lm_ggml_custom1_op_f32_t fun);
|
111
|
-
void lm_ggml_compute_forward_map_custom2_f32(
|
112
|
-
const struct lm_ggml_compute_params * params,
|
113
|
-
struct lm_ggml_tensor * dst,
|
114
|
-
const lm_ggml_custom2_op_f32_t fun);
|
115
|
-
void lm_ggml_compute_forward_map_custom3_f32(
|
116
|
-
const struct lm_ggml_compute_params * params,
|
117
|
-
struct lm_ggml_tensor * dst,
|
118
|
-
const lm_ggml_custom3_op_f32_t fun);
|
119
100
|
void lm_ggml_compute_forward_map_custom1(const struct lm_ggml_compute_params * params, struct lm_ggml_tensor * dst);
|
120
101
|
void lm_ggml_compute_forward_map_custom2(const struct lm_ggml_compute_params * params, struct lm_ggml_tensor * dst);
|
121
102
|
void lm_ggml_compute_forward_map_custom3(const struct lm_ggml_compute_params * params, struct lm_ggml_tensor * dst);
|
103
|
+
void lm_ggml_compute_forward_custom(const struct lm_ggml_compute_params * params, struct lm_ggml_tensor * dst);
|
122
104
|
void lm_ggml_compute_forward_cross_entropy_loss(const struct lm_ggml_compute_params * params, struct lm_ggml_tensor * dst);
|
123
105
|
void lm_ggml_compute_forward_cross_entropy_loss_back(const struct lm_ggml_compute_params * params, struct lm_ggml_tensor * dst);
|
124
106
|
void lm_ggml_compute_forward_opt_step_adamw(const struct lm_ggml_compute_params * params, struct lm_ggml_tensor * dst);
|