cui-llama.rn 1.6.0 → 1.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +35 -7
- package/android/src/main/CMakeLists.txt +22 -11
- package/android/src/main/java/com/rnllama/LlamaContext.java +42 -6
- package/android/src/main/java/com/rnllama/RNLlama.java +139 -4
- package/android/src/main/jni.cpp +173 -18
- package/android/src/main/jniLibs/arm64-v8a/librnllama.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/librnllama_v8.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_dotprod.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_dotprod_i8mm.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_i8mm.so +0 -0
- package/android/src/main/jniLibs/x86_64/librnllama.so +0 -0
- package/android/src/main/jniLibs/x86_64/librnllama_x86_64.so +0 -0
- package/android/src/newarch/java/com/rnllama/RNLlamaModule.java +24 -4
- package/android/src/oldarch/java/com/rnllama/RNLlamaModule.java +22 -2
- package/cpp/LICENSE +21 -0
- package/cpp/chat.cpp +129 -107
- package/cpp/chat.h +2 -0
- package/cpp/common.cpp +58 -78
- package/cpp/common.h +29 -21
- package/cpp/ggml-alloc.c +4 -1
- package/cpp/ggml-backend.cpp +9 -5
- package/cpp/ggml-backend.h +4 -4
- package/cpp/ggml-cpp.h +1 -1
- package/cpp/ggml-cpu/amx/amx.cpp +221 -0
- package/cpp/ggml-cpu/amx/amx.h +8 -0
- package/cpp/ggml-cpu/amx/common.h +91 -0
- package/cpp/ggml-cpu/amx/mmq.cpp +2511 -0
- package/cpp/ggml-cpu/amx/mmq.h +10 -0
- package/{ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers → cpp/ggml-cpu}/binary-ops.h +1 -1
- package/cpp/ggml-cpu/common.h +72 -0
- package/cpp/{ggml-cpu-aarch64.cpp → ggml-cpu/ggml-cpu-aarch64.cpp} +809 -103
- package/cpp/{ggml-cpu-quants.c → ggml-cpu/ggml-cpu-quants.c} +306 -6
- package/cpp/{ggml-cpu.c → ggml-cpu/ggml-cpu.c} +114 -55
- package/cpp/{ggml-cpu.cpp → ggml-cpu/ggml-cpu.cpp} +32 -16
- package/cpp/{ops.cpp → ggml-cpu/ops.cpp} +353 -173
- package/{ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers → cpp/ggml-cpu}/ops.h +2 -20
- package/cpp/{sgemm.cpp → ggml-cpu/sgemm.cpp} +501 -0
- package/{ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers → cpp/ggml-cpu}/simd-mappings.h +7 -3
- package/{ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers → cpp/ggml-cpu}/unary-ops.h +1 -1
- package/cpp/{vec.cpp → ggml-cpu/vec.cpp} +0 -6
- package/{ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers → cpp/ggml-cpu}/vec.h +16 -0
- package/cpp/ggml-cpu.h +5 -0
- package/cpp/ggml-impl.h +16 -9
- package/cpp/ggml-llama-sim.metallib +0 -0
- package/cpp/ggml-llama.metallib +0 -0
- package/cpp/ggml-metal-impl.h +36 -11
- package/cpp/ggml-metal.m +810 -176
- package/cpp/ggml-opt.cpp +373 -190
- package/cpp/ggml-opt.h +49 -28
- package/cpp/ggml-quants.c +0 -6
- package/cpp/ggml.c +227 -282
- package/cpp/ggml.h +82 -101
- package/cpp/gguf.cpp +33 -33
- package/cpp/json-schema-to-grammar.cpp +3 -0
- package/cpp/llama-adapter.cpp +6 -0
- package/cpp/llama-arch.cpp +49 -17
- package/cpp/llama-arch.h +9 -0
- package/cpp/llama-batch.cpp +8 -2
- package/cpp/llama-batch.h +2 -1
- package/cpp/llama-chat.cpp +39 -16
- package/cpp/llama-chat.h +4 -2
- package/cpp/llama-context.cpp +440 -611
- package/cpp/llama-context.h +44 -33
- package/cpp/llama-cparams.h +1 -0
- package/cpp/llama-graph.cpp +214 -291
- package/cpp/llama-graph.h +69 -21
- package/cpp/llama-hparams.cpp +17 -1
- package/cpp/llama-hparams.h +39 -5
- package/cpp/llama-kv-cache.cpp +2067 -620
- package/cpp/llama-kv-cache.h +410 -108
- package/cpp/llama-memory.h +12 -1
- package/cpp/llama-model-loader.cpp +24 -15
- package/cpp/llama-model-saver.cpp +281 -0
- package/cpp/llama-model-saver.h +37 -0
- package/cpp/llama-model.cpp +1089 -359
- package/cpp/llama-model.h +19 -3
- package/cpp/llama-sampling.cpp +20 -7
- package/cpp/llama-vocab.cpp +54 -9
- package/cpp/llama-vocab.h +6 -0
- package/cpp/llama.cpp +14 -0
- package/cpp/llama.h +86 -142
- package/cpp/minja/chat-template.hpp +9 -5
- package/cpp/minja/minja.hpp +69 -36
- package/cpp/rn-llama.cpp +602 -190
- package/cpp/rn-llama.h +34 -8
- package/cpp/sampling.cpp +57 -50
- package/cpp/tools/mtmd/clip-impl.h +462 -0
- package/cpp/tools/mtmd/clip.cpp +4024 -0
- package/cpp/tools/mtmd/clip.h +101 -0
- package/cpp/tools/mtmd/miniaudio.h +93468 -0
- package/cpp/tools/mtmd/mtmd-audio.cpp +855 -0
- package/cpp/tools/mtmd/mtmd-audio.h +62 -0
- package/cpp/tools/mtmd/mtmd-helper.cpp +297 -0
- package/cpp/tools/mtmd/mtmd.cpp +942 -0
- package/cpp/tools/mtmd/mtmd.h +362 -0
- package/cpp/tools/mtmd/stb_image.h +7988 -0
- package/ios/CMakeLists.txt +20 -10
- package/ios/RNLlama.h +6 -0
- package/ios/RNLlama.mm +82 -3
- package/ios/RNLlamaContext.h +5 -1
- package/ios/RNLlamaContext.mm +131 -38
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/chat.h +2 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/common.h +29 -21
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-backend.h +4 -4
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-cpp.h +1 -1
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-cpu.h +5 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-impl.h +16 -9
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-metal-impl.h +36 -11
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-opt.h +49 -28
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml.h +82 -101
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-arch.h +9 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-batch.h +2 -1
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-chat.h +4 -2
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-context.h +44 -33
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-cparams.h +1 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-graph.h +69 -21
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-hparams.h +39 -5
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-kv-cache.h +410 -108
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-memory.h +12 -1
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-model-saver.h +37 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-model.h +19 -3
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-vocab.h +6 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama.h +86 -142
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/minja/chat-template.hpp +9 -5
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/minja/minja.hpp +69 -36
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/rn-llama.h +34 -8
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Info.plist +0 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/ggml-llama.metallib +0 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/rnllama +0 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/chat.h +2 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/common.h +29 -21
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-backend.h +4 -4
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpp.h +1 -1
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu.h +5 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-impl.h +16 -9
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-metal-impl.h +36 -11
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-opt.h +49 -28
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml.h +82 -101
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-arch.h +9 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-batch.h +2 -1
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-chat.h +4 -2
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-context.h +44 -33
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-cparams.h +1 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-graph.h +69 -21
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-hparams.h +39 -5
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cache.h +410 -108
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-memory.h +12 -1
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-model-saver.h +37 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-model.h +19 -3
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-vocab.h +6 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama.h +86 -142
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/minja/chat-template.hpp +9 -5
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/minja/minja.hpp +69 -36
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/rn-llama.h +34 -8
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Info.plist +0 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/_CodeSignature/CodeResources +1 -1
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/ggml-llama-sim.metallib +0 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/rnllama +0 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/chat.h +2 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/common.h +29 -21
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-backend.h +4 -4
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-cpp.h +1 -1
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-cpu.h +5 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-impl.h +16 -9
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-metal-impl.h +36 -11
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-opt.h +49 -28
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml.h +82 -101
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-arch.h +9 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-batch.h +2 -1
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-chat.h +4 -2
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-context.h +44 -33
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-cparams.h +1 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-graph.h +69 -21
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-hparams.h +39 -5
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-kv-cache.h +410 -108
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-memory.h +12 -1
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-model-saver.h +37 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-model.h +19 -3
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-vocab.h +6 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama.h +86 -142
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/minja/chat-template.hpp +9 -5
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/minja/minja.hpp +69 -36
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/rn-llama.h +34 -8
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Info.plist +0 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/ggml-llama.metallib +0 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/rnllama +0 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/chat.h +2 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/common.h +29 -21
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-backend.h +4 -4
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpp.h +1 -1
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu.h +5 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-impl.h +16 -9
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-metal-impl.h +36 -11
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-opt.h +49 -28
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml.h +82 -101
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-arch.h +9 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-batch.h +2 -1
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-chat.h +4 -2
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-context.h +44 -33
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-cparams.h +1 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-graph.h +69 -21
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-hparams.h +39 -5
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cache.h +410 -108
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-memory.h +12 -1
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-model-saver.h +37 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-model.h +19 -3
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-vocab.h +6 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama.h +86 -142
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/minja/chat-template.hpp +9 -5
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/minja/minja.hpp +69 -36
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/rn-llama.h +34 -8
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Info.plist +0 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/_CodeSignature/CodeResources +1 -1
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/ggml-llama-sim.metallib +0 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/rnllama +0 -0
- package/jest/mock.js +33 -7
- package/lib/commonjs/NativeRNLlama.js.map +1 -1
- package/lib/commonjs/index.js +153 -21
- package/lib/commonjs/index.js.map +1 -1
- package/lib/module/NativeRNLlama.js.map +1 -1
- package/lib/module/index.js +152 -20
- package/lib/module/index.js.map +1 -1
- package/lib/typescript/NativeRNLlama.d.ts +54 -4
- package/lib/typescript/NativeRNLlama.d.ts.map +1 -1
- package/lib/typescript/index.d.ts +72 -6
- package/lib/typescript/index.d.ts.map +1 -1
- package/package.json +1 -1
- package/src/NativeRNLlama.ts +72 -4
- package/src/index.ts +212 -38
- package/cpp/binary-ops.h +0 -16
- package/cpp/ops.h +0 -128
- package/cpp/simd-mappings.h +0 -888
- package/cpp/unary-ops.h +0 -28
- package/cpp/vec.h +0 -802
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/binary-ops.h +0 -16
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-cpu-aarch64.h +0 -8
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-cpu-impl.h +0 -512
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-cpu-quants.h +0 -63
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-cpu-traits.h +0 -38
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ops.h +0 -128
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/sgemm.h +0 -14
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/simd-mappings.h +0 -888
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/vec.h +0 -802
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu-aarch64.h +0 -8
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu-impl.h +0 -512
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu-quants.h +0 -63
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu-traits.h +0 -38
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/sgemm.h +0 -14
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/unary-ops.h +0 -28
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/vec.h +0 -802
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/binary-ops.h +0 -16
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-cpu-aarch64.h +0 -8
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-cpu-impl.h +0 -512
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-cpu-quants.h +0 -63
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-cpu-traits.h +0 -38
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ops.h +0 -128
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/sgemm.h +0 -14
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/simd-mappings.h +0 -888
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/unary-ops.h +0 -28
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/binary-ops.h +0 -16
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu-aarch64.h +0 -8
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu-impl.h +0 -512
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu-quants.h +0 -63
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu-traits.h +0 -38
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ops.h +0 -128
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/sgemm.h +0 -14
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/simd-mappings.h +0 -888
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/unary-ops.h +0 -28
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/vec.h +0 -802
- package/lib/commonjs/chat.js +0 -37
- package/lib/commonjs/chat.js.map +0 -1
- package/lib/module/chat.js +0 -33
- package/lib/module/chat.js.map +0 -1
- package/lib/typescript/chat.d.ts +0 -10
- package/lib/typescript/chat.d.ts.map +0 -1
- package/src/chat.ts +0 -44
- /package/cpp/{binary-ops.cpp → ggml-cpu/binary-ops.cpp} +0 -0
- /package/cpp/{ggml-cpu-aarch64.h → ggml-cpu/ggml-cpu-aarch64.h} +0 -0
- /package/cpp/{ggml-cpu-impl.h → ggml-cpu/ggml-cpu-impl.h} +0 -0
- /package/cpp/{ggml-cpu-quants.h → ggml-cpu/ggml-cpu-quants.h} +0 -0
- /package/cpp/{ggml-cpu-traits.cpp → ggml-cpu/ggml-cpu-traits.cpp} +0 -0
- /package/cpp/{ggml-cpu-traits.h → ggml-cpu/ggml-cpu-traits.h} +0 -0
- /package/cpp/{sgemm.h → ggml-cpu/sgemm.h} +0 -0
- /package/cpp/{unary-ops.cpp → ggml-cpu/unary-ops.cpp} +0 -0
@@ -38,7 +38,7 @@ extern "C" {
|
|
38
38
|
LM_GGML_API lm_ggml_backend_buffer_t lm_ggml_backend_buft_alloc_buffer (lm_ggml_backend_buffer_type_t buft, size_t size);
|
39
39
|
LM_GGML_API size_t lm_ggml_backend_buft_get_alignment (lm_ggml_backend_buffer_type_t buft);
|
40
40
|
LM_GGML_API size_t lm_ggml_backend_buft_get_max_size (lm_ggml_backend_buffer_type_t buft);
|
41
|
-
LM_GGML_API size_t lm_ggml_backend_buft_get_alloc_size(lm_ggml_backend_buffer_type_t buft, struct lm_ggml_tensor * tensor);
|
41
|
+
LM_GGML_API size_t lm_ggml_backend_buft_get_alloc_size(lm_ggml_backend_buffer_type_t buft, const struct lm_ggml_tensor * tensor);
|
42
42
|
LM_GGML_API bool lm_ggml_backend_buft_is_host (lm_ggml_backend_buffer_type_t buft);
|
43
43
|
LM_GGML_API lm_ggml_backend_dev_t lm_ggml_backend_buft_get_device (lm_ggml_backend_buffer_type_t buft);
|
44
44
|
|
@@ -59,7 +59,7 @@ extern "C" {
|
|
59
59
|
LM_GGML_API enum lm_ggml_status lm_ggml_backend_buffer_init_tensor (lm_ggml_backend_buffer_t buffer, struct lm_ggml_tensor * tensor);
|
60
60
|
LM_GGML_API size_t lm_ggml_backend_buffer_get_alignment (lm_ggml_backend_buffer_t buffer);
|
61
61
|
LM_GGML_API size_t lm_ggml_backend_buffer_get_max_size (lm_ggml_backend_buffer_t buffer);
|
62
|
-
LM_GGML_API size_t lm_ggml_backend_buffer_get_alloc_size(lm_ggml_backend_buffer_t buffer, struct lm_ggml_tensor * tensor);
|
62
|
+
LM_GGML_API size_t lm_ggml_backend_buffer_get_alloc_size(lm_ggml_backend_buffer_t buffer, const struct lm_ggml_tensor * tensor);
|
63
63
|
LM_GGML_API void lm_ggml_backend_buffer_clear (lm_ggml_backend_buffer_t buffer, uint8_t value);
|
64
64
|
LM_GGML_API bool lm_ggml_backend_buffer_is_host (lm_ggml_backend_buffer_t buffer);
|
65
65
|
LM_GGML_API void lm_ggml_backend_buffer_set_usage (lm_ggml_backend_buffer_t buffer, enum lm_ggml_backend_buffer_usage usage);
|
@@ -248,7 +248,7 @@ extern "C" {
|
|
248
248
|
// preferrably to run on the same backend as the buffer
|
249
249
|
lm_ggml_backend_buffer_set_usage(buf_weights, LM_GGML_BACKEND_BUFFER_USAGE_WEIGHTS);
|
250
250
|
|
251
|
-
sched = lm_ggml_backend_sched_new({backend_gpu, backend_gpu2, backend_cpu}, NULL, num_backends, LM_GGML_DEFAULT_GRAPH_SIZE, false);
|
251
|
+
sched = lm_ggml_backend_sched_new({backend_gpu, backend_gpu2, backend_cpu}, NULL, num_backends, LM_GGML_DEFAULT_GRAPH_SIZE, false, true);
|
252
252
|
|
253
253
|
// initialize buffers from a max size graph (optional)
|
254
254
|
reserve_graph = build_graph(sched, max_batch_size);
|
@@ -289,7 +289,7 @@ extern "C" {
|
|
289
289
|
typedef bool (*lm_ggml_backend_sched_eval_callback)(struct lm_ggml_tensor * t, bool ask, void * user_data);
|
290
290
|
|
291
291
|
// Initialize a backend scheduler, backends with low index are given priority over backends with high index
|
292
|
-
LM_GGML_API lm_ggml_backend_sched_t lm_ggml_backend_sched_new(lm_ggml_backend_t * backends, lm_ggml_backend_buffer_type_t * bufts, int n_backends, size_t graph_size, bool parallel);
|
292
|
+
LM_GGML_API lm_ggml_backend_sched_t lm_ggml_backend_sched_new(lm_ggml_backend_t * backends, lm_ggml_backend_buffer_type_t * bufts, int n_backends, size_t graph_size, bool parallel, bool op_offload);
|
293
293
|
LM_GGML_API void lm_ggml_backend_sched_free(lm_ggml_backend_sched_t sched);
|
294
294
|
|
295
295
|
// Initialize backend buffers from a measure graph
|
@@ -24,7 +24,7 @@ typedef std::unique_ptr<lm_gguf_context, lm_gguf_context_deleter> lm_gguf_contex
|
|
24
24
|
|
25
25
|
struct lm_ggml_gallocr_deleter { void operator()(lm_ggml_gallocr_t galloc) { lm_ggml_gallocr_free(galloc); } };
|
26
26
|
|
27
|
-
typedef std::unique_ptr<
|
27
|
+
typedef std::unique_ptr<lm_ggml_gallocr, lm_ggml_gallocr_deleter> lm_ggml_gallocr_ptr;
|
28
28
|
|
29
29
|
// ggml-backend
|
30
30
|
|
@@ -133,6 +133,11 @@ extern "C" {
|
|
133
133
|
|
134
134
|
LM_GGML_BACKEND_API lm_ggml_backend_reg_t lm_ggml_backend_cpu_reg(void);
|
135
135
|
|
136
|
+
LM_GGML_BACKEND_API void lm_ggml_cpu_fp32_to_fp16(const float *, lm_ggml_fp16_t *, int64_t);
|
137
|
+
LM_GGML_BACKEND_API void lm_ggml_cpu_fp16_to_fp32(const lm_ggml_fp16_t *, float *, int64_t);
|
138
|
+
LM_GGML_BACKEND_API void lm_ggml_cpu_fp32_to_bf16(const float *, lm_ggml_bf16_t *, int64_t);
|
139
|
+
LM_GGML_BACKEND_API void lm_ggml_cpu_bf16_to_fp32(const lm_ggml_bf16_t *, float *, int64_t);
|
140
|
+
|
136
141
|
#ifdef __cplusplus
|
137
142
|
}
|
138
143
|
#endif
|
@@ -16,6 +16,14 @@
|
|
16
16
|
#include <arm_sve.h>
|
17
17
|
#endif // __ARM_FEATURE_SVE
|
18
18
|
|
19
|
+
#if defined(__ARM_NEON) && !defined(__CUDACC__) && !defined(__MUSACC__)
|
20
|
+
// if YCM cannot find <arm_neon.h>, make a symbolic link to it, for example:
|
21
|
+
//
|
22
|
+
// $ ln -sfn /Library/Developer/CommandLineTools/usr/lib/clang/13.1.6/include/arm_neon.h ./src/
|
23
|
+
//
|
24
|
+
#include <arm_neon.h>
|
25
|
+
#endif
|
26
|
+
|
19
27
|
#if defined(__F16C__)
|
20
28
|
#include <immintrin.h>
|
21
29
|
#endif
|
@@ -140,8 +148,14 @@ struct lm_ggml_map_custom2_op_params {
|
|
140
148
|
|
141
149
|
struct lm_ggml_map_custom3_op_params {
|
142
150
|
lm_ggml_custom3_op_t fun;
|
143
|
-
int
|
144
|
-
void
|
151
|
+
int n_tasks;
|
152
|
+
void * userdata;
|
153
|
+
};
|
154
|
+
|
155
|
+
struct lm_ggml_custom_op_params {
|
156
|
+
lm_ggml_custom_op_t fun;
|
157
|
+
int n_tasks;
|
158
|
+
void * userdata;
|
145
159
|
};
|
146
160
|
|
147
161
|
// bitset
|
@@ -311,13 +325,6 @@ LM_GGML_API void lm_ggml_aligned_free(void * ptr, size_t size);
|
|
311
325
|
// for MUSA compilers , we use uint16_t: ref https://github.com/ggml-org/llama.cpp/pull/11843
|
312
326
|
//
|
313
327
|
#if defined(__ARM_NEON) && !(defined(__CUDACC__) && __CUDACC_VER_MAJOR__ <= 11) && !defined(__MUSACC__)
|
314
|
-
|
315
|
-
// if YCM cannot find <arm_neon.h>, make a symbolic link to it, for example:
|
316
|
-
//
|
317
|
-
// $ ln -sfn /Library/Developer/CommandLineTools/usr/lib/clang/13.1.6/include/arm_neon.h ./src/
|
318
|
-
//
|
319
|
-
#include <arm_neon.h>
|
320
|
-
|
321
328
|
#define LM_GGML_COMPUTE_FP16_TO_FP32(x) lm_ggml_compute_fp16_to_fp32(x)
|
322
329
|
#define LM_GGML_COMPUTE_FP32_TO_FP16(x) lm_ggml_compute_fp32_to_fp16(x)
|
323
330
|
|
@@ -207,6 +207,10 @@ typedef struct {
|
|
207
207
|
float attn_factor;
|
208
208
|
float beta_fast;
|
209
209
|
float beta_slow;
|
210
|
+
int32_t sect_0;
|
211
|
+
int32_t sect_1;
|
212
|
+
int32_t sect_2;
|
213
|
+
int32_t sect_3;
|
210
214
|
} lm_ggml_metal_kargs_rope;
|
211
215
|
|
212
216
|
typedef struct {
|
@@ -299,21 +303,42 @@ typedef struct {
|
|
299
303
|
} lm_ggml_metal_kargs_mul_mv_ext;
|
300
304
|
|
301
305
|
typedef struct {
|
302
|
-
int32_t
|
303
|
-
int32_t
|
304
|
-
uint64_t
|
306
|
+
int32_t ne10;
|
307
|
+
int32_t ne11; // n_expert_used (bcast)
|
308
|
+
uint64_t nb11;
|
309
|
+
uint64_t nb12;
|
310
|
+
int32_t neh11; // n_tokens
|
311
|
+
uint64_t nbh11;
|
312
|
+
int32_t ne20; // n_expert_used
|
313
|
+
uint64_t nb21;
|
314
|
+
} lm_ggml_metal_kargs_mul_mm_id_map0;
|
315
|
+
|
316
|
+
typedef struct {
|
317
|
+
int32_t ne20; // n_expert_used
|
318
|
+
int32_t neh0;
|
319
|
+
int32_t neh1;
|
320
|
+
uint64_t nbh1;
|
321
|
+
uint64_t nbh2;
|
322
|
+
int32_t ne0;
|
323
|
+
uint64_t nb1;
|
324
|
+
uint64_t nb2;
|
325
|
+
} lm_ggml_metal_kargs_mul_mm_id_map1;
|
326
|
+
|
327
|
+
typedef struct {
|
305
328
|
int32_t ne00;
|
306
329
|
int32_t ne02;
|
307
330
|
uint64_t nb01;
|
308
331
|
uint64_t nb02;
|
309
|
-
|
310
|
-
int32_t
|
311
|
-
|
312
|
-
uint64_t
|
313
|
-
uint64_t
|
314
|
-
uint64_t
|
315
|
-
int32_t
|
316
|
-
int32_t
|
332
|
+
uint64_t nb03;
|
333
|
+
int32_t neh12;
|
334
|
+
uint64_t nbh10;
|
335
|
+
uint64_t nbh11;
|
336
|
+
uint64_t nbh12;
|
337
|
+
uint64_t nbh13;
|
338
|
+
int32_t neh0;
|
339
|
+
int32_t neh1;
|
340
|
+
int16_t r2;
|
341
|
+
int16_t r3;
|
317
342
|
} lm_ggml_metal_kargs_mul_mm_id;
|
318
343
|
|
319
344
|
typedef struct {
|
@@ -37,13 +37,16 @@ extern "C" {
|
|
37
37
|
// ====== Dataset ======
|
38
38
|
|
39
39
|
LM_GGML_API lm_ggml_opt_dataset_t lm_ggml_opt_dataset_init(
|
40
|
-
|
41
|
-
|
42
|
-
int64_t
|
43
|
-
int64_t
|
40
|
+
enum lm_ggml_type type_data, // the type for the internal data tensor
|
41
|
+
enum lm_ggml_type type_label, // the type for the internal labels tensor
|
42
|
+
int64_t ne_datapoint, // number of elements per datapoint
|
43
|
+
int64_t ne_label, // number of elements per label
|
44
|
+
int64_t ndata, // total number of datapoints/labels
|
45
|
+
int64_t ndata_shard); // number of datapoints/labels per shard (unit at which the dataset is shuffled/copied)
|
44
46
|
LM_GGML_API void lm_ggml_opt_dataset_free(lm_ggml_opt_dataset_t dataset);
|
45
47
|
|
46
48
|
// get underlying tensors that store the data
|
49
|
+
LM_GGML_API int64_t lm_ggml_opt_dataset_ndata (lm_ggml_opt_dataset_t dataset);
|
47
50
|
LM_GGML_API struct lm_ggml_tensor * lm_ggml_opt_dataset_data (lm_ggml_opt_dataset_t dataset); // shape = [ne_datapoint, ndata]
|
48
51
|
LM_GGML_API struct lm_ggml_tensor * lm_ggml_opt_dataset_labels(lm_ggml_opt_dataset_t dataset); // shape = [nd_label, ndata]
|
49
52
|
|
@@ -56,13 +59,19 @@ extern "C" {
|
|
56
59
|
struct lm_ggml_tensor * data_batch, // shape = [ne_datapoint, ndata_batch]
|
57
60
|
struct lm_ggml_tensor * labels_batch, // shape = [ne_label, ndata_batch]
|
58
61
|
int64_t ibatch);
|
62
|
+
LM_GGML_API void lm_ggml_opt_dataset_get_batch_host(
|
63
|
+
lm_ggml_opt_dataset_t dataset,
|
64
|
+
void * data_batch,
|
65
|
+
size_t nb_data_batch,
|
66
|
+
void * labels_batch,
|
67
|
+
int64_t ibatch);
|
59
68
|
|
60
69
|
// ====== Model / Context ======
|
61
70
|
|
62
71
|
enum lm_ggml_opt_build_type {
|
63
|
-
LM_GGML_OPT_BUILD_TYPE_FORWARD,
|
64
|
-
LM_GGML_OPT_BUILD_TYPE_GRAD,
|
65
|
-
LM_GGML_OPT_BUILD_TYPE_OPT,
|
72
|
+
LM_GGML_OPT_BUILD_TYPE_FORWARD = 10,
|
73
|
+
LM_GGML_OPT_BUILD_TYPE_GRAD = 20,
|
74
|
+
LM_GGML_OPT_BUILD_TYPE_OPT = 30,
|
66
75
|
};
|
67
76
|
|
68
77
|
// parameters that control which optimizer is used and how said optimizer tries to find the minimal loss
|
@@ -81,20 +90,22 @@ extern "C" {
|
|
81
90
|
// userdata can be used to pass arbitrary data
|
82
91
|
typedef struct lm_ggml_opt_optimizer_params (*lm_ggml_opt_get_optimizer_params)(void * userdata);
|
83
92
|
|
84
|
-
// returns the default optimizer params (constant)
|
93
|
+
// returns the default optimizer params (constant, hard-coded values)
|
85
94
|
// userdata is not used
|
86
95
|
LM_GGML_API struct lm_ggml_opt_optimizer_params lm_ggml_opt_get_default_optimizer_params(void * userdata);
|
87
96
|
|
97
|
+
// casts userdata to lm_ggml_opt_optimizer_params and returns it
|
98
|
+
LM_GGML_API struct lm_ggml_opt_optimizer_params lm_ggml_opt_get_constant_optimizer_params(void * userdata);
|
99
|
+
|
88
100
|
// parameters for initializing a new optimization context
|
89
101
|
struct lm_ggml_opt_params {
|
90
102
|
lm_ggml_backend_sched_t backend_sched; // defines which backends are used to construct the compute graphs
|
91
103
|
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
struct lm_ggml_tensor
|
97
|
-
struct lm_ggml_tensor * outputs;
|
104
|
+
// by default the forward graph needs to be reconstructed for each eval
|
105
|
+
// if ctx_compute, inputs, and outputs are set the graphs are instead allocated statically
|
106
|
+
struct lm_ggml_context * ctx_compute;
|
107
|
+
struct lm_ggml_tensor * inputs;
|
108
|
+
struct lm_ggml_tensor * outputs;
|
98
109
|
|
99
110
|
enum lm_ggml_opt_loss_type loss_type;
|
100
111
|
enum lm_ggml_opt_build_type build_type;
|
@@ -107,12 +118,9 @@ extern "C" {
|
|
107
118
|
|
108
119
|
// get parameters for an optimization context with defaults set where possible
|
109
120
|
// parameters for which no sensible defaults exist are supplied as arguments to this function
|
110
|
-
LM_GGML_API lm_ggml_opt_params lm_ggml_opt_default_params(
|
111
|
-
lm_ggml_backend_sched_t
|
112
|
-
|
113
|
-
struct lm_ggml_tensor * inputs,
|
114
|
-
struct lm_ggml_tensor * outputs,
|
115
|
-
enum lm_ggml_opt_loss_type loss_type);
|
121
|
+
LM_GGML_API struct lm_ggml_opt_params lm_ggml_opt_default_params(
|
122
|
+
lm_ggml_backend_sched_t backend_sched,
|
123
|
+
enum lm_ggml_opt_loss_type loss_type);
|
116
124
|
|
117
125
|
LM_GGML_API lm_ggml_opt_context_t lm_ggml_opt_init(struct lm_ggml_opt_params params);
|
118
126
|
LM_GGML_API void lm_ggml_opt_free(lm_ggml_opt_context_t opt_ctx);
|
@@ -120,7 +128,10 @@ extern "C" {
|
|
120
128
|
// set gradients to zero, initilize loss, and optionally reset the optimizer
|
121
129
|
LM_GGML_API void lm_ggml_opt_reset(lm_ggml_opt_context_t opt_ctx, bool optimizer);
|
122
130
|
|
131
|
+
LM_GGML_API bool lm_ggml_opt_static_graphs(lm_ggml_opt_context_t opt_ctx); // whether the graphs are allocated_statically
|
132
|
+
|
123
133
|
// get underlying tensors that store data
|
134
|
+
// if not using static graphs these pointers become invalid with the next call to lm_ggml_opt_alloc
|
124
135
|
LM_GGML_API struct lm_ggml_tensor * lm_ggml_opt_inputs( lm_ggml_opt_context_t opt_ctx); // forward graph input tensor
|
125
136
|
LM_GGML_API struct lm_ggml_tensor * lm_ggml_opt_outputs( lm_ggml_opt_context_t opt_ctx); // forward graph output tensor
|
126
137
|
LM_GGML_API struct lm_ggml_tensor * lm_ggml_opt_labels( lm_ggml_opt_context_t opt_ctx); // labels to compare outputs against
|
@@ -128,11 +139,12 @@ extern "C" {
|
|
128
139
|
LM_GGML_API struct lm_ggml_tensor * lm_ggml_opt_pred( lm_ggml_opt_context_t opt_ctx); // predictions made by outputs
|
129
140
|
LM_GGML_API struct lm_ggml_tensor * lm_ggml_opt_ncorrect(lm_ggml_opt_context_t opt_ctx); // number of matching predictions between outputs and labels
|
130
141
|
|
142
|
+
// get the gradient accumulator for a node from the forward graph
|
131
143
|
LM_GGML_API struct lm_ggml_tensor * lm_ggml_opt_grad_acc(lm_ggml_opt_context_t opt_ctx, struct lm_ggml_tensor * node);
|
132
144
|
|
133
145
|
// ====== Optimization Result ======
|
134
146
|
|
135
|
-
LM_GGML_API lm_ggml_opt_result_t lm_ggml_opt_result_init();
|
147
|
+
LM_GGML_API lm_ggml_opt_result_t lm_ggml_opt_result_init(void);
|
136
148
|
LM_GGML_API void lm_ggml_opt_result_free(lm_ggml_opt_result_t result);
|
137
149
|
LM_GGML_API void lm_ggml_opt_result_reset(lm_ggml_opt_result_t result);
|
138
150
|
|
@@ -144,11 +156,20 @@ extern "C" {
|
|
144
156
|
|
145
157
|
// ====== Computation ======
|
146
158
|
|
147
|
-
//
|
148
|
-
LM_GGML_API void
|
159
|
+
// if not using static graphs, this function must be called prior to lm_ggml_opt_alloc
|
160
|
+
LM_GGML_API void lm_ggml_opt_prepare_alloc(
|
161
|
+
lm_ggml_opt_context_t opt_ctx,
|
162
|
+
struct lm_ggml_context * ctx_compute,
|
163
|
+
struct lm_ggml_cgraph * gf,
|
164
|
+
struct lm_ggml_tensor * inputs,
|
165
|
+
struct lm_ggml_tensor * outputs);
|
166
|
+
|
167
|
+
// allocate the next graph for evaluation, either forward or forward + backward
|
168
|
+
// must be called exactly once prior to calling lm_ggml_opt_eval
|
169
|
+
LM_GGML_API void lm_ggml_opt_alloc(lm_ggml_opt_context_t opt_ctx, bool backward);
|
149
170
|
|
150
|
-
// do forward pass, increment result if not NULL, do backward pass
|
151
|
-
LM_GGML_API void
|
171
|
+
// do forward pass, increment result if not NULL, do backward pass if allocated
|
172
|
+
LM_GGML_API void lm_ggml_opt_eval(lm_ggml_opt_context_t opt_ctx, lm_ggml_opt_result_t result);
|
152
173
|
|
153
174
|
// ############################################################################
|
154
175
|
// ## The high-level functions start here. They do not depend on any private ##
|
@@ -200,9 +221,9 @@ extern "C" {
|
|
200
221
|
// fit model defined by inputs and outputs to dataset
|
201
222
|
LM_GGML_API void lm_ggml_opt_fit(
|
202
223
|
lm_ggml_backend_sched_t backend_sched, // backend scheduler for constructing the compute graphs
|
203
|
-
lm_ggml_context
|
204
|
-
lm_ggml_tensor
|
205
|
-
lm_ggml_tensor
|
224
|
+
struct lm_ggml_context * ctx_compute, // context with temporarily allocated tensors to calculate the outputs
|
225
|
+
struct lm_ggml_tensor * inputs, // input tensor with shape [ne_datapoint, ndata_batch]
|
226
|
+
struct lm_ggml_tensor * outputs, // output tensor, must have shape [ne_label, ndata_batch] if labels are used
|
206
227
|
lm_ggml_opt_dataset_t dataset, // dataset with data and optionally also labels
|
207
228
|
enum lm_ggml_opt_loss_type loss_type, // loss to minimize
|
208
229
|
lm_ggml_opt_get_optimizer_params get_opt_pars, // callback to get optimizer params, userdata is pointer to epoch (of type int64_t)
|
@@ -394,8 +394,8 @@ extern "C" {
|
|
394
394
|
|
395
395
|
// precision
|
396
396
|
enum lm_ggml_prec {
|
397
|
-
LM_GGML_PREC_DEFAULT,
|
398
|
-
LM_GGML_PREC_F32,
|
397
|
+
LM_GGML_PREC_DEFAULT = 0, // stored as lm_ggml_tensor.op_params, 0 by default
|
398
|
+
LM_GGML_PREC_F32 = 10,
|
399
399
|
};
|
400
400
|
|
401
401
|
// model file types
|
@@ -482,6 +482,7 @@ extern "C" {
|
|
482
482
|
LM_GGML_OP_CONV_TRANSPOSE_1D,
|
483
483
|
LM_GGML_OP_IM2COL,
|
484
484
|
LM_GGML_OP_IM2COL_BACK,
|
485
|
+
LM_GGML_OP_CONV_2D_DW,
|
485
486
|
LM_GGML_OP_CONV_TRANSPOSE_2D,
|
486
487
|
LM_GGML_OP_POOL_1D,
|
487
488
|
LM_GGML_OP_POOL_2D,
|
@@ -508,17 +509,12 @@ extern "C" {
|
|
508
509
|
|
509
510
|
LM_GGML_OP_UNARY,
|
510
511
|
|
511
|
-
LM_GGML_OP_MAP_UNARY,
|
512
|
-
LM_GGML_OP_MAP_BINARY,
|
513
|
-
|
514
|
-
LM_GGML_OP_MAP_CUSTOM1_F32,
|
515
|
-
LM_GGML_OP_MAP_CUSTOM2_F32,
|
516
|
-
LM_GGML_OP_MAP_CUSTOM3_F32,
|
517
|
-
|
518
512
|
LM_GGML_OP_MAP_CUSTOM1,
|
519
513
|
LM_GGML_OP_MAP_CUSTOM2,
|
520
514
|
LM_GGML_OP_MAP_CUSTOM3,
|
521
515
|
|
516
|
+
LM_GGML_OP_CUSTOM,
|
517
|
+
|
522
518
|
LM_GGML_OP_CROSS_ENTROPY_LOSS,
|
523
519
|
LM_GGML_OP_CROSS_ENTROPY_LOSS_BACK,
|
524
520
|
LM_GGML_OP_OPT_STEP_ADAMW,
|
@@ -541,6 +537,7 @@ extern "C" {
|
|
541
537
|
LM_GGML_UNARY_OP_HARDSWISH,
|
542
538
|
LM_GGML_UNARY_OP_HARDSIGMOID,
|
543
539
|
LM_GGML_UNARY_OP_EXP,
|
540
|
+
LM_GGML_UNARY_OP_GELU_ERF,
|
544
541
|
|
545
542
|
LM_GGML_UNARY_OP_COUNT,
|
546
543
|
};
|
@@ -678,11 +675,18 @@ extern "C" {
|
|
678
675
|
LM_GGML_API bool lm_ggml_is_3d (const struct lm_ggml_tensor * tensor);
|
679
676
|
LM_GGML_API int lm_ggml_n_dims (const struct lm_ggml_tensor * tensor); // returns 1 for scalars
|
680
677
|
|
678
|
+
// returns whether the tensor elements can be iterated over with a flattened index (no gaps, no permutation)
|
681
679
|
LM_GGML_API bool lm_ggml_is_contiguous (const struct lm_ggml_tensor * tensor);
|
682
680
|
LM_GGML_API bool lm_ggml_is_contiguous_0(const struct lm_ggml_tensor * tensor); // same as lm_ggml_is_contiguous()
|
683
681
|
LM_GGML_API bool lm_ggml_is_contiguous_1(const struct lm_ggml_tensor * tensor); // contiguous for dims >= 1
|
684
682
|
LM_GGML_API bool lm_ggml_is_contiguous_2(const struct lm_ggml_tensor * tensor); // contiguous for dims >= 2
|
685
683
|
|
684
|
+
// returns whether the tensor elements are allocated as one contiguous block of memory (no gaps, but permutation ok)
|
685
|
+
LM_GGML_API bool lm_ggml_is_contiguously_allocated(const struct lm_ggml_tensor * tensor);
|
686
|
+
|
687
|
+
// true for tensor that is stored in memory as CxWxHxN and has been permuted to WxHxCxN
|
688
|
+
LM_GGML_API bool lm_ggml_is_contiguous_channels(const struct lm_ggml_tensor * tensor);
|
689
|
+
|
686
690
|
LM_GGML_API bool lm_ggml_are_same_shape (const struct lm_ggml_tensor * t0, const struct lm_ggml_tensor * t1);
|
687
691
|
LM_GGML_API bool lm_ggml_are_same_stride(const struct lm_ggml_tensor * t0, const struct lm_ggml_tensor * t1);
|
688
692
|
|
@@ -766,7 +770,7 @@ extern "C" {
|
|
766
770
|
// Tensor flags
|
767
771
|
LM_GGML_API void lm_ggml_set_input(struct lm_ggml_tensor * tensor);
|
768
772
|
LM_GGML_API void lm_ggml_set_output(struct lm_ggml_tensor * tensor);
|
769
|
-
LM_GGML_API void lm_ggml_set_param(struct
|
773
|
+
LM_GGML_API void lm_ggml_set_param(struct lm_ggml_tensor * tensor);
|
770
774
|
LM_GGML_API void lm_ggml_set_loss(struct lm_ggml_tensor * tensor);
|
771
775
|
|
772
776
|
//
|
@@ -936,7 +940,7 @@ extern "C" {
|
|
936
940
|
LM_GGML_API struct lm_ggml_tensor * lm_ggml_repeat_back(
|
937
941
|
struct lm_ggml_context * ctx,
|
938
942
|
struct lm_ggml_tensor * a,
|
939
|
-
struct lm_ggml_tensor * b);
|
943
|
+
struct lm_ggml_tensor * b); // sum up values that are adjacent in dims > 0 instead of repeated with same stride
|
940
944
|
|
941
945
|
// concat a and b along dim
|
942
946
|
// used in stable-diffusion
|
@@ -1022,6 +1026,16 @@ extern "C" {
|
|
1022
1026
|
struct lm_ggml_context * ctx,
|
1023
1027
|
struct lm_ggml_tensor * a);
|
1024
1028
|
|
1029
|
+
// GELU using erf (error function) when possible
|
1030
|
+
// some backends may fallback to approximation based on Abramowitz and Stegun formula
|
1031
|
+
LM_GGML_API struct lm_ggml_tensor * lm_ggml_gelu_erf(
|
1032
|
+
struct lm_ggml_context * ctx,
|
1033
|
+
struct lm_ggml_tensor * a);
|
1034
|
+
|
1035
|
+
LM_GGML_API struct lm_ggml_tensor * lm_ggml_gelu_erf_inplace(
|
1036
|
+
struct lm_ggml_context * ctx,
|
1037
|
+
struct lm_ggml_tensor * a);
|
1038
|
+
|
1025
1039
|
LM_GGML_API struct lm_ggml_tensor * lm_ggml_gelu_quick(
|
1026
1040
|
struct lm_ggml_context * ctx,
|
1027
1041
|
struct lm_ggml_tensor * a);
|
@@ -1666,7 +1680,7 @@ extern "C" {
|
|
1666
1680
|
struct lm_ggml_tensor * a,
|
1667
1681
|
struct lm_ggml_tensor * b);
|
1668
1682
|
|
1669
|
-
// depthwise
|
1683
|
+
// depthwise (via im2col and mul_mat)
|
1670
1684
|
LM_GGML_API struct lm_ggml_tensor * lm_ggml_conv_2d_dw(
|
1671
1685
|
struct lm_ggml_context * ctx,
|
1672
1686
|
struct lm_ggml_tensor * a, // convolution kernel
|
@@ -1678,6 +1692,22 @@ extern "C" {
|
|
1678
1692
|
int d0, // dilation dimension 0
|
1679
1693
|
int d1); // dilation dimension 1
|
1680
1694
|
|
1695
|
+
// Depthwise 2D convolution
|
1696
|
+
// may be faster than lm_ggml_conv_2d_dw, but not available in all backends
|
1697
|
+
// a: KW KH 1 C convolution kernel
|
1698
|
+
// b: W H C N input data
|
1699
|
+
// res: W_out H_out C N
|
1700
|
+
LM_GGML_API struct lm_ggml_tensor * lm_ggml_conv_2d_dw_direct(
|
1701
|
+
struct lm_ggml_context * ctx,
|
1702
|
+
struct lm_ggml_tensor * a,
|
1703
|
+
struct lm_ggml_tensor * b,
|
1704
|
+
int stride0,
|
1705
|
+
int stride1,
|
1706
|
+
int pad0,
|
1707
|
+
int pad1,
|
1708
|
+
int dilation0,
|
1709
|
+
int dilation1);
|
1710
|
+
|
1681
1711
|
LM_GGML_API struct lm_ggml_tensor * lm_ggml_conv_transpose_2d_p0(
|
1682
1712
|
struct lm_ggml_context * ctx,
|
1683
1713
|
struct lm_ggml_tensor * a,
|
@@ -1723,24 +1753,29 @@ extern "C" {
|
|
1723
1753
|
float p0,
|
1724
1754
|
float p1);
|
1725
1755
|
|
1726
|
-
|
1756
|
+
enum lm_ggml_scale_mode {
|
1757
|
+
LM_GGML_SCALE_MODE_NEAREST = 0,
|
1758
|
+
LM_GGML_SCALE_MODE_BILINEAR = 1,
|
1759
|
+
};
|
1760
|
+
|
1761
|
+
// interpolate
|
1727
1762
|
// multiplies ne0 and ne1 by scale factor
|
1728
|
-
// used in stable-diffusion
|
1729
1763
|
LM_GGML_API struct lm_ggml_tensor * lm_ggml_upscale(
|
1730
1764
|
struct lm_ggml_context * ctx,
|
1731
1765
|
struct lm_ggml_tensor * a,
|
1732
|
-
int scale_factor
|
1766
|
+
int scale_factor,
|
1767
|
+
enum lm_ggml_scale_mode mode);
|
1733
1768
|
|
1734
|
-
//
|
1735
|
-
//
|
1736
|
-
// used in tortoise.cpp
|
1769
|
+
// interpolate
|
1770
|
+
// interpolate scale to specified dimensions
|
1737
1771
|
LM_GGML_API struct lm_ggml_tensor * lm_ggml_upscale_ext(
|
1738
1772
|
struct lm_ggml_context * ctx,
|
1739
1773
|
struct lm_ggml_tensor * a,
|
1740
1774
|
int ne0,
|
1741
1775
|
int ne1,
|
1742
1776
|
int ne2,
|
1743
|
-
int ne3
|
1777
|
+
int ne3,
|
1778
|
+
enum lm_ggml_scale_mode mode);
|
1744
1779
|
|
1745
1780
|
// pad each dimension with zeros: [x, ..., x] -> [x, ..., x, 0, ..., 0]
|
1746
1781
|
LM_GGML_API struct lm_ggml_tensor * lm_ggml_pad(
|
@@ -1917,83 +1952,6 @@ extern "C" {
|
|
1917
1952
|
|
1918
1953
|
// custom operators
|
1919
1954
|
|
1920
|
-
typedef void (*lm_ggml_unary_op_f32_t) (const int, float *, const float *);
|
1921
|
-
typedef void (*lm_ggml_binary_op_f32_t)(const int, float *, const float *, const float *);
|
1922
|
-
|
1923
|
-
typedef void (*lm_ggml_custom1_op_f32_t)(struct lm_ggml_tensor *, const struct lm_ggml_tensor *);
|
1924
|
-
typedef void (*lm_ggml_custom2_op_f32_t)(struct lm_ggml_tensor *, const struct lm_ggml_tensor *, const struct lm_ggml_tensor *);
|
1925
|
-
typedef void (*lm_ggml_custom3_op_f32_t)(struct lm_ggml_tensor *, const struct lm_ggml_tensor *, const struct lm_ggml_tensor *, const struct lm_ggml_tensor *);
|
1926
|
-
|
1927
|
-
LM_GGML_DEPRECATED(LM_GGML_API struct lm_ggml_tensor * lm_ggml_map_unary_f32(
|
1928
|
-
struct lm_ggml_context * ctx,
|
1929
|
-
struct lm_ggml_tensor * a,
|
1930
|
-
lm_ggml_unary_op_f32_t fun),
|
1931
|
-
"use lm_ggml_map_custom1 instead");
|
1932
|
-
|
1933
|
-
LM_GGML_DEPRECATED(LM_GGML_API struct lm_ggml_tensor * lm_ggml_map_unary_inplace_f32(
|
1934
|
-
struct lm_ggml_context * ctx,
|
1935
|
-
struct lm_ggml_tensor * a,
|
1936
|
-
lm_ggml_unary_op_f32_t fun),
|
1937
|
-
"use lm_ggml_map_custom1_inplace instead");
|
1938
|
-
|
1939
|
-
LM_GGML_DEPRECATED(LM_GGML_API struct lm_ggml_tensor * lm_ggml_map_binary_f32(
|
1940
|
-
struct lm_ggml_context * ctx,
|
1941
|
-
struct lm_ggml_tensor * a,
|
1942
|
-
struct lm_ggml_tensor * b,
|
1943
|
-
lm_ggml_binary_op_f32_t fun),
|
1944
|
-
"use lm_ggml_map_custom2 instead");
|
1945
|
-
|
1946
|
-
LM_GGML_DEPRECATED(LM_GGML_API struct lm_ggml_tensor * lm_ggml_map_binary_inplace_f32(
|
1947
|
-
struct lm_ggml_context * ctx,
|
1948
|
-
struct lm_ggml_tensor * a,
|
1949
|
-
struct lm_ggml_tensor * b,
|
1950
|
-
lm_ggml_binary_op_f32_t fun),
|
1951
|
-
"use lm_ggml_map_custom2_inplace instead");
|
1952
|
-
|
1953
|
-
LM_GGML_DEPRECATED(LM_GGML_API struct lm_ggml_tensor * lm_ggml_map_custom1_f32(
|
1954
|
-
struct lm_ggml_context * ctx,
|
1955
|
-
struct lm_ggml_tensor * a,
|
1956
|
-
lm_ggml_custom1_op_f32_t fun),
|
1957
|
-
"use lm_ggml_map_custom1 instead");
|
1958
|
-
|
1959
|
-
LM_GGML_DEPRECATED(LM_GGML_API struct lm_ggml_tensor * lm_ggml_map_custom1_inplace_f32(
|
1960
|
-
struct lm_ggml_context * ctx,
|
1961
|
-
struct lm_ggml_tensor * a,
|
1962
|
-
lm_ggml_custom1_op_f32_t fun),
|
1963
|
-
"use lm_ggml_map_custom1_inplace instead");
|
1964
|
-
|
1965
|
-
LM_GGML_DEPRECATED(LM_GGML_API struct lm_ggml_tensor * lm_ggml_map_custom2_f32(
|
1966
|
-
struct lm_ggml_context * ctx,
|
1967
|
-
struct lm_ggml_tensor * a,
|
1968
|
-
struct lm_ggml_tensor * b,
|
1969
|
-
lm_ggml_custom2_op_f32_t fun),
|
1970
|
-
"use lm_ggml_map_custom2 instead");
|
1971
|
-
|
1972
|
-
LM_GGML_DEPRECATED(LM_GGML_API struct lm_ggml_tensor * lm_ggml_map_custom2_inplace_f32(
|
1973
|
-
struct lm_ggml_context * ctx,
|
1974
|
-
struct lm_ggml_tensor * a,
|
1975
|
-
struct lm_ggml_tensor * b,
|
1976
|
-
lm_ggml_custom2_op_f32_t fun),
|
1977
|
-
"use lm_ggml_map_custom2_inplace instead");
|
1978
|
-
|
1979
|
-
LM_GGML_DEPRECATED(LM_GGML_API struct lm_ggml_tensor * lm_ggml_map_custom3_f32(
|
1980
|
-
struct lm_ggml_context * ctx,
|
1981
|
-
struct lm_ggml_tensor * a,
|
1982
|
-
struct lm_ggml_tensor * b,
|
1983
|
-
struct lm_ggml_tensor * c,
|
1984
|
-
lm_ggml_custom3_op_f32_t fun),
|
1985
|
-
"use lm_ggml_map_custom3 instead");
|
1986
|
-
|
1987
|
-
LM_GGML_DEPRECATED(LM_GGML_API struct lm_ggml_tensor * lm_ggml_map_custom3_inplace_f32(
|
1988
|
-
struct lm_ggml_context * ctx,
|
1989
|
-
struct lm_ggml_tensor * a,
|
1990
|
-
struct lm_ggml_tensor * b,
|
1991
|
-
struct lm_ggml_tensor * c,
|
1992
|
-
lm_ggml_custom3_op_f32_t fun),
|
1993
|
-
"use lm_ggml_map_custom3_inplace instead");
|
1994
|
-
|
1995
|
-
// custom operators v2
|
1996
|
-
|
1997
1955
|
typedef void (*lm_ggml_custom1_op_t)(struct lm_ggml_tensor * dst , const struct lm_ggml_tensor * a, int ith, int nth, void * userdata);
|
1998
1956
|
typedef void (*lm_ggml_custom2_op_t)(struct lm_ggml_tensor * dst , const struct lm_ggml_tensor * a, const struct lm_ggml_tensor * b, int ith, int nth, void * userdata);
|
1999
1957
|
typedef void (*lm_ggml_custom3_op_t)(struct lm_ggml_tensor * dst , const struct lm_ggml_tensor * a, const struct lm_ggml_tensor * b, const struct lm_ggml_tensor * c, int ith, int nth, void * userdata);
|
@@ -2049,6 +2007,30 @@ extern "C" {
|
|
2049
2007
|
int n_tasks,
|
2050
2008
|
void * userdata);
|
2051
2009
|
|
2010
|
+
typedef void (*lm_ggml_custom_op_t)(struct lm_ggml_tensor * dst , int ith, int nth, void * userdata);
|
2011
|
+
|
2012
|
+
LM_GGML_API struct lm_ggml_tensor * lm_ggml_custom_4d(
|
2013
|
+
struct lm_ggml_context * ctx,
|
2014
|
+
enum lm_ggml_type type,
|
2015
|
+
int64_t ne0,
|
2016
|
+
int64_t ne1,
|
2017
|
+
int64_t ne2,
|
2018
|
+
int64_t ne3,
|
2019
|
+
struct lm_ggml_tensor ** args,
|
2020
|
+
int n_args,
|
2021
|
+
lm_ggml_custom_op_t fun,
|
2022
|
+
int n_tasks,
|
2023
|
+
void * userdata);
|
2024
|
+
|
2025
|
+
LM_GGML_API struct lm_ggml_tensor * lm_ggml_custom_inplace(
|
2026
|
+
struct lm_ggml_context * ctx,
|
2027
|
+
struct lm_ggml_tensor * a,
|
2028
|
+
struct lm_ggml_tensor ** args,
|
2029
|
+
int n_args,
|
2030
|
+
lm_ggml_custom_op_t fun,
|
2031
|
+
int n_tasks,
|
2032
|
+
void * userdata);
|
2033
|
+
|
2052
2034
|
// loss function
|
2053
2035
|
|
2054
2036
|
LM_GGML_API struct lm_ggml_tensor * lm_ggml_cross_entropy_loss(
|
@@ -2079,15 +2061,14 @@ extern "C" {
|
|
2079
2061
|
|
2080
2062
|
LM_GGML_API void lm_ggml_build_forward_expand(struct lm_ggml_cgraph * cgraph, struct lm_ggml_tensor * tensor);
|
2081
2063
|
LM_GGML_API void lm_ggml_build_backward_expand(
|
2082
|
-
struct lm_ggml_context *
|
2083
|
-
struct
|
2084
|
-
struct
|
2085
|
-
bool accumulate); // whether or not gradients should be accumulated, requires static allocation of tensors in ctx_static
|
2064
|
+
struct lm_ggml_context * ctx, // context for gradient computation
|
2065
|
+
struct lm_ggml_cgraph * cgraph,
|
2066
|
+
struct lm_ggml_tensor ** grad_accs);
|
2086
2067
|
|
2087
2068
|
// graph allocation in a context
|
2088
2069
|
LM_GGML_API struct lm_ggml_cgraph * lm_ggml_new_graph (struct lm_ggml_context * ctx); // size = LM_GGML_DEFAULT_GRAPH_SIZE, grads = false
|
2089
2070
|
LM_GGML_API struct lm_ggml_cgraph * lm_ggml_new_graph_custom(struct lm_ggml_context * ctx, size_t size, bool grads);
|
2090
|
-
LM_GGML_API struct lm_ggml_cgraph * lm_ggml_graph_dup (struct lm_ggml_context * ctx, struct lm_ggml_cgraph * cgraph);
|
2071
|
+
LM_GGML_API struct lm_ggml_cgraph * lm_ggml_graph_dup (struct lm_ggml_context * ctx, struct lm_ggml_cgraph * cgraph, bool force_grads);
|
2091
2072
|
LM_GGML_API void lm_ggml_graph_cpy (struct lm_ggml_cgraph * src, struct lm_ggml_cgraph * dst);
|
2092
2073
|
LM_GGML_API void lm_ggml_graph_reset (struct lm_ggml_cgraph * cgraph); // set regular grads + optimizer momenta to 0, set loss grad to 1
|
2093
2074
|
LM_GGML_API void lm_ggml_graph_clear (struct lm_ggml_cgraph * cgraph);
|