cui-llama.rn 1.6.0 → 1.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +35 -7
- package/android/src/main/CMakeLists.txt +22 -11
- package/android/src/main/java/com/rnllama/LlamaContext.java +42 -6
- package/android/src/main/java/com/rnllama/RNLlama.java +139 -4
- package/android/src/main/jni.cpp +173 -18
- package/android/src/main/jniLibs/arm64-v8a/librnllama.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/librnllama_v8.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_dotprod.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_dotprod_i8mm.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_i8mm.so +0 -0
- package/android/src/main/jniLibs/x86_64/librnllama.so +0 -0
- package/android/src/main/jniLibs/x86_64/librnllama_x86_64.so +0 -0
- package/android/src/newarch/java/com/rnllama/RNLlamaModule.java +24 -4
- package/android/src/oldarch/java/com/rnllama/RNLlamaModule.java +22 -2
- package/cpp/LICENSE +21 -0
- package/cpp/chat.cpp +129 -107
- package/cpp/chat.h +2 -0
- package/cpp/common.cpp +58 -78
- package/cpp/common.h +29 -21
- package/cpp/ggml-alloc.c +4 -1
- package/cpp/ggml-backend.cpp +9 -5
- package/cpp/ggml-backend.h +4 -4
- package/cpp/ggml-cpp.h +1 -1
- package/cpp/ggml-cpu/amx/amx.cpp +221 -0
- package/cpp/ggml-cpu/amx/amx.h +8 -0
- package/cpp/ggml-cpu/amx/common.h +91 -0
- package/cpp/ggml-cpu/amx/mmq.cpp +2511 -0
- package/cpp/ggml-cpu/amx/mmq.h +10 -0
- package/{ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers → cpp/ggml-cpu}/binary-ops.h +1 -1
- package/cpp/ggml-cpu/common.h +72 -0
- package/cpp/{ggml-cpu-aarch64.cpp → ggml-cpu/ggml-cpu-aarch64.cpp} +809 -103
- package/cpp/{ggml-cpu-quants.c → ggml-cpu/ggml-cpu-quants.c} +306 -6
- package/cpp/{ggml-cpu.c → ggml-cpu/ggml-cpu.c} +114 -55
- package/cpp/{ggml-cpu.cpp → ggml-cpu/ggml-cpu.cpp} +32 -16
- package/cpp/{ops.cpp → ggml-cpu/ops.cpp} +353 -173
- package/{ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers → cpp/ggml-cpu}/ops.h +2 -20
- package/cpp/{sgemm.cpp → ggml-cpu/sgemm.cpp} +501 -0
- package/{ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers → cpp/ggml-cpu}/simd-mappings.h +7 -3
- package/{ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers → cpp/ggml-cpu}/unary-ops.h +1 -1
- package/cpp/{vec.cpp → ggml-cpu/vec.cpp} +0 -6
- package/{ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers → cpp/ggml-cpu}/vec.h +16 -0
- package/cpp/ggml-cpu.h +5 -0
- package/cpp/ggml-impl.h +16 -9
- package/cpp/ggml-llama-sim.metallib +0 -0
- package/cpp/ggml-llama.metallib +0 -0
- package/cpp/ggml-metal-impl.h +36 -11
- package/cpp/ggml-metal.m +810 -176
- package/cpp/ggml-opt.cpp +373 -190
- package/cpp/ggml-opt.h +49 -28
- package/cpp/ggml-quants.c +0 -6
- package/cpp/ggml.c +227 -282
- package/cpp/ggml.h +82 -101
- package/cpp/gguf.cpp +33 -33
- package/cpp/json-schema-to-grammar.cpp +3 -0
- package/cpp/llama-adapter.cpp +6 -0
- package/cpp/llama-arch.cpp +49 -17
- package/cpp/llama-arch.h +9 -0
- package/cpp/llama-batch.cpp +8 -2
- package/cpp/llama-batch.h +2 -1
- package/cpp/llama-chat.cpp +39 -16
- package/cpp/llama-chat.h +4 -2
- package/cpp/llama-context.cpp +440 -611
- package/cpp/llama-context.h +44 -33
- package/cpp/llama-cparams.h +1 -0
- package/cpp/llama-graph.cpp +214 -291
- package/cpp/llama-graph.h +69 -21
- package/cpp/llama-hparams.cpp +17 -1
- package/cpp/llama-hparams.h +39 -5
- package/cpp/llama-kv-cache.cpp +2067 -620
- package/cpp/llama-kv-cache.h +410 -108
- package/cpp/llama-memory.h +12 -1
- package/cpp/llama-model-loader.cpp +24 -15
- package/cpp/llama-model-saver.cpp +281 -0
- package/cpp/llama-model-saver.h +37 -0
- package/cpp/llama-model.cpp +1089 -359
- package/cpp/llama-model.h +19 -3
- package/cpp/llama-sampling.cpp +20 -7
- package/cpp/llama-vocab.cpp +54 -9
- package/cpp/llama-vocab.h +6 -0
- package/cpp/llama.cpp +14 -0
- package/cpp/llama.h +86 -142
- package/cpp/minja/chat-template.hpp +9 -5
- package/cpp/minja/minja.hpp +69 -36
- package/cpp/rn-llama.cpp +602 -190
- package/cpp/rn-llama.h +34 -8
- package/cpp/sampling.cpp +57 -50
- package/cpp/tools/mtmd/clip-impl.h +462 -0
- package/cpp/tools/mtmd/clip.cpp +4024 -0
- package/cpp/tools/mtmd/clip.h +101 -0
- package/cpp/tools/mtmd/miniaudio.h +93468 -0
- package/cpp/tools/mtmd/mtmd-audio.cpp +855 -0
- package/cpp/tools/mtmd/mtmd-audio.h +62 -0
- package/cpp/tools/mtmd/mtmd-helper.cpp +297 -0
- package/cpp/tools/mtmd/mtmd.cpp +942 -0
- package/cpp/tools/mtmd/mtmd.h +362 -0
- package/cpp/tools/mtmd/stb_image.h +7988 -0
- package/ios/CMakeLists.txt +20 -10
- package/ios/RNLlama.h +6 -0
- package/ios/RNLlama.mm +82 -3
- package/ios/RNLlamaContext.h +5 -1
- package/ios/RNLlamaContext.mm +131 -38
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/chat.h +2 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/common.h +29 -21
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-backend.h +4 -4
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-cpp.h +1 -1
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-cpu.h +5 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-impl.h +16 -9
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-metal-impl.h +36 -11
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-opt.h +49 -28
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml.h +82 -101
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-arch.h +9 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-batch.h +2 -1
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-chat.h +4 -2
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-context.h +44 -33
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-cparams.h +1 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-graph.h +69 -21
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-hparams.h +39 -5
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-kv-cache.h +410 -108
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-memory.h +12 -1
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-model-saver.h +37 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-model.h +19 -3
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-vocab.h +6 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama.h +86 -142
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/minja/chat-template.hpp +9 -5
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/minja/minja.hpp +69 -36
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/rn-llama.h +34 -8
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Info.plist +0 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/ggml-llama.metallib +0 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/rnllama +0 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/chat.h +2 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/common.h +29 -21
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-backend.h +4 -4
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpp.h +1 -1
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu.h +5 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-impl.h +16 -9
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-metal-impl.h +36 -11
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-opt.h +49 -28
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml.h +82 -101
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-arch.h +9 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-batch.h +2 -1
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-chat.h +4 -2
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-context.h +44 -33
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-cparams.h +1 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-graph.h +69 -21
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-hparams.h +39 -5
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cache.h +410 -108
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-memory.h +12 -1
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-model-saver.h +37 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-model.h +19 -3
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-vocab.h +6 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama.h +86 -142
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/minja/chat-template.hpp +9 -5
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/minja/minja.hpp +69 -36
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/rn-llama.h +34 -8
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Info.plist +0 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/_CodeSignature/CodeResources +1 -1
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/ggml-llama-sim.metallib +0 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/rnllama +0 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/chat.h +2 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/common.h +29 -21
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-backend.h +4 -4
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-cpp.h +1 -1
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-cpu.h +5 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-impl.h +16 -9
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-metal-impl.h +36 -11
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-opt.h +49 -28
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml.h +82 -101
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-arch.h +9 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-batch.h +2 -1
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-chat.h +4 -2
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-context.h +44 -33
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-cparams.h +1 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-graph.h +69 -21
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-hparams.h +39 -5
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-kv-cache.h +410 -108
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-memory.h +12 -1
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-model-saver.h +37 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-model.h +19 -3
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-vocab.h +6 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama.h +86 -142
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/minja/chat-template.hpp +9 -5
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/minja/minja.hpp +69 -36
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/rn-llama.h +34 -8
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Info.plist +0 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/ggml-llama.metallib +0 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/rnllama +0 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/chat.h +2 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/common.h +29 -21
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-backend.h +4 -4
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpp.h +1 -1
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu.h +5 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-impl.h +16 -9
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-metal-impl.h +36 -11
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-opt.h +49 -28
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml.h +82 -101
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-arch.h +9 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-batch.h +2 -1
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-chat.h +4 -2
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-context.h +44 -33
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-cparams.h +1 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-graph.h +69 -21
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-hparams.h +39 -5
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cache.h +410 -108
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-memory.h +12 -1
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-model-saver.h +37 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-model.h +19 -3
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-vocab.h +6 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama.h +86 -142
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/minja/chat-template.hpp +9 -5
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/minja/minja.hpp +69 -36
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/rn-llama.h +34 -8
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Info.plist +0 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/_CodeSignature/CodeResources +1 -1
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/ggml-llama-sim.metallib +0 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/rnllama +0 -0
- package/jest/mock.js +33 -7
- package/lib/commonjs/NativeRNLlama.js.map +1 -1
- package/lib/commonjs/index.js +153 -21
- package/lib/commonjs/index.js.map +1 -1
- package/lib/module/NativeRNLlama.js.map +1 -1
- package/lib/module/index.js +152 -20
- package/lib/module/index.js.map +1 -1
- package/lib/typescript/NativeRNLlama.d.ts +54 -4
- package/lib/typescript/NativeRNLlama.d.ts.map +1 -1
- package/lib/typescript/index.d.ts +72 -6
- package/lib/typescript/index.d.ts.map +1 -1
- package/package.json +1 -1
- package/src/NativeRNLlama.ts +72 -4
- package/src/index.ts +212 -38
- package/cpp/binary-ops.h +0 -16
- package/cpp/ops.h +0 -128
- package/cpp/simd-mappings.h +0 -888
- package/cpp/unary-ops.h +0 -28
- package/cpp/vec.h +0 -802
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/binary-ops.h +0 -16
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-cpu-aarch64.h +0 -8
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-cpu-impl.h +0 -512
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-cpu-quants.h +0 -63
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-cpu-traits.h +0 -38
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ops.h +0 -128
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/sgemm.h +0 -14
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/simd-mappings.h +0 -888
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/vec.h +0 -802
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu-aarch64.h +0 -8
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu-impl.h +0 -512
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu-quants.h +0 -63
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu-traits.h +0 -38
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/sgemm.h +0 -14
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/unary-ops.h +0 -28
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/vec.h +0 -802
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/binary-ops.h +0 -16
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-cpu-aarch64.h +0 -8
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-cpu-impl.h +0 -512
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-cpu-quants.h +0 -63
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-cpu-traits.h +0 -38
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ops.h +0 -128
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/sgemm.h +0 -14
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/simd-mappings.h +0 -888
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/unary-ops.h +0 -28
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/binary-ops.h +0 -16
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu-aarch64.h +0 -8
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu-impl.h +0 -512
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu-quants.h +0 -63
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu-traits.h +0 -38
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ops.h +0 -128
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/sgemm.h +0 -14
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/simd-mappings.h +0 -888
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/unary-ops.h +0 -28
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/vec.h +0 -802
- package/lib/commonjs/chat.js +0 -37
- package/lib/commonjs/chat.js.map +0 -1
- package/lib/module/chat.js +0 -33
- package/lib/module/chat.js.map +0 -1
- package/lib/typescript/chat.d.ts +0 -10
- package/lib/typescript/chat.d.ts.map +0 -1
- package/src/chat.ts +0 -44
- /package/cpp/{binary-ops.cpp → ggml-cpu/binary-ops.cpp} +0 -0
- /package/cpp/{ggml-cpu-aarch64.h → ggml-cpu/ggml-cpu-aarch64.h} +0 -0
- /package/cpp/{ggml-cpu-impl.h → ggml-cpu/ggml-cpu-impl.h} +0 -0
- /package/cpp/{ggml-cpu-quants.h → ggml-cpu/ggml-cpu-quants.h} +0 -0
- /package/cpp/{ggml-cpu-traits.cpp → ggml-cpu/ggml-cpu-traits.cpp} +0 -0
- /package/cpp/{ggml-cpu-traits.h → ggml-cpu/ggml-cpu-traits.h} +0 -0
- /package/cpp/{sgemm.h → ggml-cpu/sgemm.h} +0 -0
- /package/cpp/{unary-ops.cpp → ggml-cpu/unary-ops.cpp} +0 -0
@@ -394,8 +394,8 @@ extern "C" {
|
|
394
394
|
|
395
395
|
// precision
|
396
396
|
enum lm_ggml_prec {
|
397
|
-
LM_GGML_PREC_DEFAULT,
|
398
|
-
LM_GGML_PREC_F32,
|
397
|
+
LM_GGML_PREC_DEFAULT = 0, // stored as lm_ggml_tensor.op_params, 0 by default
|
398
|
+
LM_GGML_PREC_F32 = 10,
|
399
399
|
};
|
400
400
|
|
401
401
|
// model file types
|
@@ -482,6 +482,7 @@ extern "C" {
|
|
482
482
|
LM_GGML_OP_CONV_TRANSPOSE_1D,
|
483
483
|
LM_GGML_OP_IM2COL,
|
484
484
|
LM_GGML_OP_IM2COL_BACK,
|
485
|
+
LM_GGML_OP_CONV_2D_DW,
|
485
486
|
LM_GGML_OP_CONV_TRANSPOSE_2D,
|
486
487
|
LM_GGML_OP_POOL_1D,
|
487
488
|
LM_GGML_OP_POOL_2D,
|
@@ -508,17 +509,12 @@ extern "C" {
|
|
508
509
|
|
509
510
|
LM_GGML_OP_UNARY,
|
510
511
|
|
511
|
-
LM_GGML_OP_MAP_UNARY,
|
512
|
-
LM_GGML_OP_MAP_BINARY,
|
513
|
-
|
514
|
-
LM_GGML_OP_MAP_CUSTOM1_F32,
|
515
|
-
LM_GGML_OP_MAP_CUSTOM2_F32,
|
516
|
-
LM_GGML_OP_MAP_CUSTOM3_F32,
|
517
|
-
|
518
512
|
LM_GGML_OP_MAP_CUSTOM1,
|
519
513
|
LM_GGML_OP_MAP_CUSTOM2,
|
520
514
|
LM_GGML_OP_MAP_CUSTOM3,
|
521
515
|
|
516
|
+
LM_GGML_OP_CUSTOM,
|
517
|
+
|
522
518
|
LM_GGML_OP_CROSS_ENTROPY_LOSS,
|
523
519
|
LM_GGML_OP_CROSS_ENTROPY_LOSS_BACK,
|
524
520
|
LM_GGML_OP_OPT_STEP_ADAMW,
|
@@ -541,6 +537,7 @@ extern "C" {
|
|
541
537
|
LM_GGML_UNARY_OP_HARDSWISH,
|
542
538
|
LM_GGML_UNARY_OP_HARDSIGMOID,
|
543
539
|
LM_GGML_UNARY_OP_EXP,
|
540
|
+
LM_GGML_UNARY_OP_GELU_ERF,
|
544
541
|
|
545
542
|
LM_GGML_UNARY_OP_COUNT,
|
546
543
|
};
|
@@ -678,11 +675,18 @@ extern "C" {
|
|
678
675
|
LM_GGML_API bool lm_ggml_is_3d (const struct lm_ggml_tensor * tensor);
|
679
676
|
LM_GGML_API int lm_ggml_n_dims (const struct lm_ggml_tensor * tensor); // returns 1 for scalars
|
680
677
|
|
678
|
+
// returns whether the tensor elements can be iterated over with a flattened index (no gaps, no permutation)
|
681
679
|
LM_GGML_API bool lm_ggml_is_contiguous (const struct lm_ggml_tensor * tensor);
|
682
680
|
LM_GGML_API bool lm_ggml_is_contiguous_0(const struct lm_ggml_tensor * tensor); // same as lm_ggml_is_contiguous()
|
683
681
|
LM_GGML_API bool lm_ggml_is_contiguous_1(const struct lm_ggml_tensor * tensor); // contiguous for dims >= 1
|
684
682
|
LM_GGML_API bool lm_ggml_is_contiguous_2(const struct lm_ggml_tensor * tensor); // contiguous for dims >= 2
|
685
683
|
|
684
|
+
// returns whether the tensor elements are allocated as one contiguous block of memory (no gaps, but permutation ok)
|
685
|
+
LM_GGML_API bool lm_ggml_is_contiguously_allocated(const struct lm_ggml_tensor * tensor);
|
686
|
+
|
687
|
+
// true for tensor that is stored in memory as CxWxHxN and has been permuted to WxHxCxN
|
688
|
+
LM_GGML_API bool lm_ggml_is_contiguous_channels(const struct lm_ggml_tensor * tensor);
|
689
|
+
|
686
690
|
LM_GGML_API bool lm_ggml_are_same_shape (const struct lm_ggml_tensor * t0, const struct lm_ggml_tensor * t1);
|
687
691
|
LM_GGML_API bool lm_ggml_are_same_stride(const struct lm_ggml_tensor * t0, const struct lm_ggml_tensor * t1);
|
688
692
|
|
@@ -766,7 +770,7 @@ extern "C" {
|
|
766
770
|
// Tensor flags
|
767
771
|
LM_GGML_API void lm_ggml_set_input(struct lm_ggml_tensor * tensor);
|
768
772
|
LM_GGML_API void lm_ggml_set_output(struct lm_ggml_tensor * tensor);
|
769
|
-
LM_GGML_API void lm_ggml_set_param(struct
|
773
|
+
LM_GGML_API void lm_ggml_set_param(struct lm_ggml_tensor * tensor);
|
770
774
|
LM_GGML_API void lm_ggml_set_loss(struct lm_ggml_tensor * tensor);
|
771
775
|
|
772
776
|
//
|
@@ -936,7 +940,7 @@ extern "C" {
|
|
936
940
|
LM_GGML_API struct lm_ggml_tensor * lm_ggml_repeat_back(
|
937
941
|
struct lm_ggml_context * ctx,
|
938
942
|
struct lm_ggml_tensor * a,
|
939
|
-
struct lm_ggml_tensor * b);
|
943
|
+
struct lm_ggml_tensor * b); // sum up values that are adjacent in dims > 0 instead of repeated with same stride
|
940
944
|
|
941
945
|
// concat a and b along dim
|
942
946
|
// used in stable-diffusion
|
@@ -1022,6 +1026,16 @@ extern "C" {
|
|
1022
1026
|
struct lm_ggml_context * ctx,
|
1023
1027
|
struct lm_ggml_tensor * a);
|
1024
1028
|
|
1029
|
+
// GELU using erf (error function) when possible
|
1030
|
+
// some backends may fallback to approximation based on Abramowitz and Stegun formula
|
1031
|
+
LM_GGML_API struct lm_ggml_tensor * lm_ggml_gelu_erf(
|
1032
|
+
struct lm_ggml_context * ctx,
|
1033
|
+
struct lm_ggml_tensor * a);
|
1034
|
+
|
1035
|
+
LM_GGML_API struct lm_ggml_tensor * lm_ggml_gelu_erf_inplace(
|
1036
|
+
struct lm_ggml_context * ctx,
|
1037
|
+
struct lm_ggml_tensor * a);
|
1038
|
+
|
1025
1039
|
LM_GGML_API struct lm_ggml_tensor * lm_ggml_gelu_quick(
|
1026
1040
|
struct lm_ggml_context * ctx,
|
1027
1041
|
struct lm_ggml_tensor * a);
|
@@ -1666,7 +1680,7 @@ extern "C" {
|
|
1666
1680
|
struct lm_ggml_tensor * a,
|
1667
1681
|
struct lm_ggml_tensor * b);
|
1668
1682
|
|
1669
|
-
// depthwise
|
1683
|
+
// depthwise (via im2col and mul_mat)
|
1670
1684
|
LM_GGML_API struct lm_ggml_tensor * lm_ggml_conv_2d_dw(
|
1671
1685
|
struct lm_ggml_context * ctx,
|
1672
1686
|
struct lm_ggml_tensor * a, // convolution kernel
|
@@ -1678,6 +1692,22 @@ extern "C" {
|
|
1678
1692
|
int d0, // dilation dimension 0
|
1679
1693
|
int d1); // dilation dimension 1
|
1680
1694
|
|
1695
|
+
// Depthwise 2D convolution
|
1696
|
+
// may be faster than lm_ggml_conv_2d_dw, but not available in all backends
|
1697
|
+
// a: KW KH 1 C convolution kernel
|
1698
|
+
// b: W H C N input data
|
1699
|
+
// res: W_out H_out C N
|
1700
|
+
LM_GGML_API struct lm_ggml_tensor * lm_ggml_conv_2d_dw_direct(
|
1701
|
+
struct lm_ggml_context * ctx,
|
1702
|
+
struct lm_ggml_tensor * a,
|
1703
|
+
struct lm_ggml_tensor * b,
|
1704
|
+
int stride0,
|
1705
|
+
int stride1,
|
1706
|
+
int pad0,
|
1707
|
+
int pad1,
|
1708
|
+
int dilation0,
|
1709
|
+
int dilation1);
|
1710
|
+
|
1681
1711
|
LM_GGML_API struct lm_ggml_tensor * lm_ggml_conv_transpose_2d_p0(
|
1682
1712
|
struct lm_ggml_context * ctx,
|
1683
1713
|
struct lm_ggml_tensor * a,
|
@@ -1723,24 +1753,29 @@ extern "C" {
|
|
1723
1753
|
float p0,
|
1724
1754
|
float p1);
|
1725
1755
|
|
1726
|
-
|
1756
|
+
enum lm_ggml_scale_mode {
|
1757
|
+
LM_GGML_SCALE_MODE_NEAREST = 0,
|
1758
|
+
LM_GGML_SCALE_MODE_BILINEAR = 1,
|
1759
|
+
};
|
1760
|
+
|
1761
|
+
// interpolate
|
1727
1762
|
// multiplies ne0 and ne1 by scale factor
|
1728
|
-
// used in stable-diffusion
|
1729
1763
|
LM_GGML_API struct lm_ggml_tensor * lm_ggml_upscale(
|
1730
1764
|
struct lm_ggml_context * ctx,
|
1731
1765
|
struct lm_ggml_tensor * a,
|
1732
|
-
int scale_factor
|
1766
|
+
int scale_factor,
|
1767
|
+
enum lm_ggml_scale_mode mode);
|
1733
1768
|
|
1734
|
-
//
|
1735
|
-
//
|
1736
|
-
// used in tortoise.cpp
|
1769
|
+
// interpolate
|
1770
|
+
// interpolate scale to specified dimensions
|
1737
1771
|
LM_GGML_API struct lm_ggml_tensor * lm_ggml_upscale_ext(
|
1738
1772
|
struct lm_ggml_context * ctx,
|
1739
1773
|
struct lm_ggml_tensor * a,
|
1740
1774
|
int ne0,
|
1741
1775
|
int ne1,
|
1742
1776
|
int ne2,
|
1743
|
-
int ne3
|
1777
|
+
int ne3,
|
1778
|
+
enum lm_ggml_scale_mode mode);
|
1744
1779
|
|
1745
1780
|
// pad each dimension with zeros: [x, ..., x] -> [x, ..., x, 0, ..., 0]
|
1746
1781
|
LM_GGML_API struct lm_ggml_tensor * lm_ggml_pad(
|
@@ -1917,83 +1952,6 @@ extern "C" {
|
|
1917
1952
|
|
1918
1953
|
// custom operators
|
1919
1954
|
|
1920
|
-
typedef void (*lm_ggml_unary_op_f32_t) (const int, float *, const float *);
|
1921
|
-
typedef void (*lm_ggml_binary_op_f32_t)(const int, float *, const float *, const float *);
|
1922
|
-
|
1923
|
-
typedef void (*lm_ggml_custom1_op_f32_t)(struct lm_ggml_tensor *, const struct lm_ggml_tensor *);
|
1924
|
-
typedef void (*lm_ggml_custom2_op_f32_t)(struct lm_ggml_tensor *, const struct lm_ggml_tensor *, const struct lm_ggml_tensor *);
|
1925
|
-
typedef void (*lm_ggml_custom3_op_f32_t)(struct lm_ggml_tensor *, const struct lm_ggml_tensor *, const struct lm_ggml_tensor *, const struct lm_ggml_tensor *);
|
1926
|
-
|
1927
|
-
LM_GGML_DEPRECATED(LM_GGML_API struct lm_ggml_tensor * lm_ggml_map_unary_f32(
|
1928
|
-
struct lm_ggml_context * ctx,
|
1929
|
-
struct lm_ggml_tensor * a,
|
1930
|
-
lm_ggml_unary_op_f32_t fun),
|
1931
|
-
"use lm_ggml_map_custom1 instead");
|
1932
|
-
|
1933
|
-
LM_GGML_DEPRECATED(LM_GGML_API struct lm_ggml_tensor * lm_ggml_map_unary_inplace_f32(
|
1934
|
-
struct lm_ggml_context * ctx,
|
1935
|
-
struct lm_ggml_tensor * a,
|
1936
|
-
lm_ggml_unary_op_f32_t fun),
|
1937
|
-
"use lm_ggml_map_custom1_inplace instead");
|
1938
|
-
|
1939
|
-
LM_GGML_DEPRECATED(LM_GGML_API struct lm_ggml_tensor * lm_ggml_map_binary_f32(
|
1940
|
-
struct lm_ggml_context * ctx,
|
1941
|
-
struct lm_ggml_tensor * a,
|
1942
|
-
struct lm_ggml_tensor * b,
|
1943
|
-
lm_ggml_binary_op_f32_t fun),
|
1944
|
-
"use lm_ggml_map_custom2 instead");
|
1945
|
-
|
1946
|
-
LM_GGML_DEPRECATED(LM_GGML_API struct lm_ggml_tensor * lm_ggml_map_binary_inplace_f32(
|
1947
|
-
struct lm_ggml_context * ctx,
|
1948
|
-
struct lm_ggml_tensor * a,
|
1949
|
-
struct lm_ggml_tensor * b,
|
1950
|
-
lm_ggml_binary_op_f32_t fun),
|
1951
|
-
"use lm_ggml_map_custom2_inplace instead");
|
1952
|
-
|
1953
|
-
LM_GGML_DEPRECATED(LM_GGML_API struct lm_ggml_tensor * lm_ggml_map_custom1_f32(
|
1954
|
-
struct lm_ggml_context * ctx,
|
1955
|
-
struct lm_ggml_tensor * a,
|
1956
|
-
lm_ggml_custom1_op_f32_t fun),
|
1957
|
-
"use lm_ggml_map_custom1 instead");
|
1958
|
-
|
1959
|
-
LM_GGML_DEPRECATED(LM_GGML_API struct lm_ggml_tensor * lm_ggml_map_custom1_inplace_f32(
|
1960
|
-
struct lm_ggml_context * ctx,
|
1961
|
-
struct lm_ggml_tensor * a,
|
1962
|
-
lm_ggml_custom1_op_f32_t fun),
|
1963
|
-
"use lm_ggml_map_custom1_inplace instead");
|
1964
|
-
|
1965
|
-
LM_GGML_DEPRECATED(LM_GGML_API struct lm_ggml_tensor * lm_ggml_map_custom2_f32(
|
1966
|
-
struct lm_ggml_context * ctx,
|
1967
|
-
struct lm_ggml_tensor * a,
|
1968
|
-
struct lm_ggml_tensor * b,
|
1969
|
-
lm_ggml_custom2_op_f32_t fun),
|
1970
|
-
"use lm_ggml_map_custom2 instead");
|
1971
|
-
|
1972
|
-
LM_GGML_DEPRECATED(LM_GGML_API struct lm_ggml_tensor * lm_ggml_map_custom2_inplace_f32(
|
1973
|
-
struct lm_ggml_context * ctx,
|
1974
|
-
struct lm_ggml_tensor * a,
|
1975
|
-
struct lm_ggml_tensor * b,
|
1976
|
-
lm_ggml_custom2_op_f32_t fun),
|
1977
|
-
"use lm_ggml_map_custom2_inplace instead");
|
1978
|
-
|
1979
|
-
LM_GGML_DEPRECATED(LM_GGML_API struct lm_ggml_tensor * lm_ggml_map_custom3_f32(
|
1980
|
-
struct lm_ggml_context * ctx,
|
1981
|
-
struct lm_ggml_tensor * a,
|
1982
|
-
struct lm_ggml_tensor * b,
|
1983
|
-
struct lm_ggml_tensor * c,
|
1984
|
-
lm_ggml_custom3_op_f32_t fun),
|
1985
|
-
"use lm_ggml_map_custom3 instead");
|
1986
|
-
|
1987
|
-
LM_GGML_DEPRECATED(LM_GGML_API struct lm_ggml_tensor * lm_ggml_map_custom3_inplace_f32(
|
1988
|
-
struct lm_ggml_context * ctx,
|
1989
|
-
struct lm_ggml_tensor * a,
|
1990
|
-
struct lm_ggml_tensor * b,
|
1991
|
-
struct lm_ggml_tensor * c,
|
1992
|
-
lm_ggml_custom3_op_f32_t fun),
|
1993
|
-
"use lm_ggml_map_custom3_inplace instead");
|
1994
|
-
|
1995
|
-
// custom operators v2
|
1996
|
-
|
1997
1955
|
typedef void (*lm_ggml_custom1_op_t)(struct lm_ggml_tensor * dst , const struct lm_ggml_tensor * a, int ith, int nth, void * userdata);
|
1998
1956
|
typedef void (*lm_ggml_custom2_op_t)(struct lm_ggml_tensor * dst , const struct lm_ggml_tensor * a, const struct lm_ggml_tensor * b, int ith, int nth, void * userdata);
|
1999
1957
|
typedef void (*lm_ggml_custom3_op_t)(struct lm_ggml_tensor * dst , const struct lm_ggml_tensor * a, const struct lm_ggml_tensor * b, const struct lm_ggml_tensor * c, int ith, int nth, void * userdata);
|
@@ -2049,6 +2007,30 @@ extern "C" {
|
|
2049
2007
|
int n_tasks,
|
2050
2008
|
void * userdata);
|
2051
2009
|
|
2010
|
+
typedef void (*lm_ggml_custom_op_t)(struct lm_ggml_tensor * dst , int ith, int nth, void * userdata);
|
2011
|
+
|
2012
|
+
LM_GGML_API struct lm_ggml_tensor * lm_ggml_custom_4d(
|
2013
|
+
struct lm_ggml_context * ctx,
|
2014
|
+
enum lm_ggml_type type,
|
2015
|
+
int64_t ne0,
|
2016
|
+
int64_t ne1,
|
2017
|
+
int64_t ne2,
|
2018
|
+
int64_t ne3,
|
2019
|
+
struct lm_ggml_tensor ** args,
|
2020
|
+
int n_args,
|
2021
|
+
lm_ggml_custom_op_t fun,
|
2022
|
+
int n_tasks,
|
2023
|
+
void * userdata);
|
2024
|
+
|
2025
|
+
LM_GGML_API struct lm_ggml_tensor * lm_ggml_custom_inplace(
|
2026
|
+
struct lm_ggml_context * ctx,
|
2027
|
+
struct lm_ggml_tensor * a,
|
2028
|
+
struct lm_ggml_tensor ** args,
|
2029
|
+
int n_args,
|
2030
|
+
lm_ggml_custom_op_t fun,
|
2031
|
+
int n_tasks,
|
2032
|
+
void * userdata);
|
2033
|
+
|
2052
2034
|
// loss function
|
2053
2035
|
|
2054
2036
|
LM_GGML_API struct lm_ggml_tensor * lm_ggml_cross_entropy_loss(
|
@@ -2079,15 +2061,14 @@ extern "C" {
|
|
2079
2061
|
|
2080
2062
|
LM_GGML_API void lm_ggml_build_forward_expand(struct lm_ggml_cgraph * cgraph, struct lm_ggml_tensor * tensor);
|
2081
2063
|
LM_GGML_API void lm_ggml_build_backward_expand(
|
2082
|
-
struct lm_ggml_context *
|
2083
|
-
struct
|
2084
|
-
struct
|
2085
|
-
bool accumulate); // whether or not gradients should be accumulated, requires static allocation of tensors in ctx_static
|
2064
|
+
struct lm_ggml_context * ctx, // context for gradient computation
|
2065
|
+
struct lm_ggml_cgraph * cgraph,
|
2066
|
+
struct lm_ggml_tensor ** grad_accs);
|
2086
2067
|
|
2087
2068
|
// graph allocation in a context
|
2088
2069
|
LM_GGML_API struct lm_ggml_cgraph * lm_ggml_new_graph (struct lm_ggml_context * ctx); // size = LM_GGML_DEFAULT_GRAPH_SIZE, grads = false
|
2089
2070
|
LM_GGML_API struct lm_ggml_cgraph * lm_ggml_new_graph_custom(struct lm_ggml_context * ctx, size_t size, bool grads);
|
2090
|
-
LM_GGML_API struct lm_ggml_cgraph * lm_ggml_graph_dup (struct lm_ggml_context * ctx, struct lm_ggml_cgraph * cgraph);
|
2071
|
+
LM_GGML_API struct lm_ggml_cgraph * lm_ggml_graph_dup (struct lm_ggml_context * ctx, struct lm_ggml_cgraph * cgraph, bool force_grads);
|
2091
2072
|
LM_GGML_API void lm_ggml_graph_cpy (struct lm_ggml_cgraph * src, struct lm_ggml_cgraph * dst);
|
2092
2073
|
LM_GGML_API void lm_ggml_graph_reset (struct lm_ggml_cgraph * cgraph); // set regular grads + optimizer momenta to 0, set loss grad to 1
|
2093
2074
|
LM_GGML_API void lm_ggml_graph_clear (struct lm_ggml_cgraph * cgraph);
|
@@ -23,6 +23,7 @@ enum llm_arch {
|
|
23
23
|
LLM_ARCH_REFACT,
|
24
24
|
LLM_ARCH_BERT,
|
25
25
|
LLM_ARCH_NOMIC_BERT,
|
26
|
+
LLM_ARCH_NOMIC_BERT_MOE,
|
26
27
|
LLM_ARCH_JINA_BERT_V2,
|
27
28
|
LLM_ARCH_BLOOM,
|
28
29
|
LLM_ARCH_STABLELM,
|
@@ -58,6 +59,7 @@ enum llm_arch {
|
|
58
59
|
LLM_ARCH_DEEPSEEK,
|
59
60
|
LLM_ARCH_DEEPSEEK2,
|
60
61
|
LLM_ARCH_CHATGLM,
|
62
|
+
LLM_ARCH_GLM4,
|
61
63
|
LLM_ARCH_BITNET,
|
62
64
|
LLM_ARCH_T5,
|
63
65
|
LLM_ARCH_T5ENCODER,
|
@@ -109,6 +111,7 @@ enum llm_kv {
|
|
109
111
|
LLM_KV_EXPERT_WEIGHTS_SCALE,
|
110
112
|
LLM_KV_EXPERT_WEIGHTS_NORM,
|
111
113
|
LLM_KV_EXPERT_GATING_FUNC,
|
114
|
+
LLM_KV_MOE_EVERY_N_LAYERS,
|
112
115
|
LLM_KV_POOLING_TYPE,
|
113
116
|
LLM_KV_LOGIT_SCALE,
|
114
117
|
LLM_KV_DECODER_START_TOKEN_ID,
|
@@ -143,6 +146,8 @@ enum llm_kv {
|
|
143
146
|
LLM_KV_ATTENTION_RELATIVE_BUCKETS_COUNT,
|
144
147
|
LLM_KV_ATTENTION_SLIDING_WINDOW,
|
145
148
|
LLM_KV_ATTENTION_SCALE,
|
149
|
+
LLM_KV_ATTENTION_KEY_LENGTH_MLA,
|
150
|
+
LLM_KV_ATTENTION_VALUE_LENGTH_MLA,
|
146
151
|
|
147
152
|
LLM_KV_ROPE_DIMENSION_COUNT,
|
148
153
|
LLM_KV_ROPE_DIMENSION_SECTIONS,
|
@@ -256,6 +261,8 @@ enum llm_tensor {
|
|
256
261
|
LLM_TENSOR_ATTN_Q_NORM,
|
257
262
|
LLM_TENSOR_ATTN_K_NORM,
|
258
263
|
LLM_TENSOR_LAYER_OUT_NORM,
|
264
|
+
LLM_TENSOR_POST_ATTN_NORM,
|
265
|
+
LLM_TENSOR_POST_MLP_NORM,
|
259
266
|
LLM_TENSOR_SSM_IN,
|
260
267
|
LLM_TENSOR_SSM_CONV1D,
|
261
268
|
LLM_TENSOR_SSM_X,
|
@@ -303,6 +310,8 @@ enum llm_tensor {
|
|
303
310
|
LLM_TENSOR_ATTN_Q_B,
|
304
311
|
LLM_TENSOR_ATTN_KV_A_MQA,
|
305
312
|
LLM_TENSOR_ATTN_KV_B,
|
313
|
+
LLM_TENSOR_ATTN_K_B,
|
314
|
+
LLM_TENSOR_ATTN_V_B,
|
306
315
|
LLM_TENSOR_ATTN_Q_A_NORM,
|
307
316
|
LLM_TENSOR_ATTN_KV_A_NORM,
|
308
317
|
LLM_TENSOR_ATTN_SUB_NORM,
|
@@ -70,7 +70,8 @@ struct llama_sbatch {
|
|
70
70
|
// sequence-wise split
|
71
71
|
llama_ubatch split_seq(size_t n_ubatch);
|
72
72
|
|
73
|
-
|
73
|
+
llama_sbatch() = default;
|
74
|
+
llama_sbatch(const llama_batch & batch, size_t n_embd, bool simple_split = false, bool logits_all = false);
|
74
75
|
};
|
75
76
|
|
76
77
|
// temporary allocate memory for the input batch if needed
|
@@ -14,6 +14,7 @@ enum llm_chat_template {
|
|
14
14
|
LLM_CHAT_TEMPLATE_MISTRAL_V3,
|
15
15
|
LLM_CHAT_TEMPLATE_MISTRAL_V3_TEKKEN,
|
16
16
|
LLM_CHAT_TEMPLATE_MISTRAL_V7,
|
17
|
+
LLM_CHAT_TEMPLATE_MISTRAL_V7_TEKKEN,
|
17
18
|
LLM_CHAT_TEMPLATE_PHI_3,
|
18
19
|
LLM_CHAT_TEMPLATE_PHI_4,
|
19
20
|
LLM_CHAT_TEMPLATE_FALCON_3,
|
@@ -29,8 +30,8 @@ enum llm_chat_template {
|
|
29
30
|
LLM_CHAT_TEMPLATE_DEEPSEEK_3,
|
30
31
|
LLM_CHAT_TEMPLATE_COMMAND_R,
|
31
32
|
LLM_CHAT_TEMPLATE_LLAMA_3,
|
32
|
-
|
33
|
-
|
33
|
+
LLM_CHAT_TEMPLATE_CHATGLM_3,
|
34
|
+
LLM_CHAT_TEMPLATE_CHATGLM_4,
|
34
35
|
LLM_CHAT_TEMPLATE_GLMEDGE,
|
35
36
|
LLM_CHAT_TEMPLATE_MINICPM,
|
36
37
|
LLM_CHAT_TEMPLATE_EXAONE_3,
|
@@ -41,6 +42,7 @@ enum llm_chat_template {
|
|
41
42
|
LLM_CHAT_TEMPLATE_YANDEX,
|
42
43
|
LLM_CHAT_TEMPLATE_BAILING,
|
43
44
|
LLM_CHAT_TEMPLATE_LLAMA4,
|
45
|
+
LLM_CHAT_TEMPLATE_SMOLVLM,
|
44
46
|
LLM_CHAT_TEMPLATE_UNKNOWN,
|
45
47
|
};
|
46
48
|
|
@@ -7,6 +7,7 @@
|
|
7
7
|
#include "llama-adapter.h"
|
8
8
|
|
9
9
|
#include "ggml-cpp.h"
|
10
|
+
#include "ggml-opt.h"
|
10
11
|
|
11
12
|
#include <map>
|
12
13
|
#include <vector>
|
@@ -27,7 +28,12 @@ struct llama_context {
|
|
27
28
|
|
28
29
|
void synchronize();
|
29
30
|
|
30
|
-
const llama_model
|
31
|
+
const llama_model & get_model() const;
|
32
|
+
const llama_cparams & get_cparams() const;
|
33
|
+
|
34
|
+
lm_ggml_backend_sched_t get_sched() const;
|
35
|
+
|
36
|
+
lm_ggml_context * get_ctx_compute() const;
|
31
37
|
|
32
38
|
uint32_t n_ctx() const;
|
33
39
|
uint32_t n_ctx_per_seq() const;
|
@@ -128,6 +134,32 @@ struct llama_context {
|
|
128
134
|
llama_perf_context_data perf_get_data() const;
|
129
135
|
void perf_reset();
|
130
136
|
|
137
|
+
//
|
138
|
+
// training
|
139
|
+
//
|
140
|
+
|
141
|
+
void opt_init(struct llama_model * model, struct llama_opt_params lopt_params);
|
142
|
+
|
143
|
+
void opt_epoch(
|
144
|
+
lm_ggml_opt_dataset_t dataset,
|
145
|
+
lm_ggml_opt_result_t result_train,
|
146
|
+
lm_ggml_opt_result_t result_eval,
|
147
|
+
int64_t idata_split,
|
148
|
+
lm_ggml_opt_epoch_callback callback_train,
|
149
|
+
lm_ggml_opt_epoch_callback callback_eval);
|
150
|
+
|
151
|
+
void opt_epoch_iter(
|
152
|
+
lm_ggml_opt_dataset_t dataset,
|
153
|
+
lm_ggml_opt_result_t result,
|
154
|
+
const std::vector<llama_token> & tokens,
|
155
|
+
const std::vector<llama_token> & labels_sparse,
|
156
|
+
llama_batch & batch,
|
157
|
+
lm_ggml_opt_epoch_callback callback,
|
158
|
+
bool train,
|
159
|
+
int64_t idata_in_loop,
|
160
|
+
int64_t ndata_in_loop,
|
161
|
+
int64_t t_loop_start);
|
162
|
+
|
131
163
|
private:
|
132
164
|
//
|
133
165
|
// output
|
@@ -137,50 +169,30 @@ private:
|
|
137
169
|
// Returns max number of outputs for which space was reserved.
|
138
170
|
int32_t output_reserve(int32_t n_outputs);
|
139
171
|
|
140
|
-
// make the outputs have the same order they had in the user-provided batch
|
141
|
-
// TODO: maybe remove this
|
142
|
-
void output_reorder();
|
143
|
-
|
144
172
|
//
|
145
173
|
// graph
|
146
174
|
//
|
147
175
|
|
176
|
+
public:
|
148
177
|
int32_t graph_max_nodes() const;
|
149
178
|
|
150
179
|
// zero-out inputs and create the ctx_compute for the compute graph
|
151
180
|
lm_ggml_cgraph * graph_init();
|
152
181
|
|
182
|
+
// returns the result of lm_ggml_backend_sched_graph_compute_async execution
|
183
|
+
lm_ggml_status graph_compute(
|
184
|
+
lm_ggml_cgraph * gf,
|
185
|
+
bool batched);
|
186
|
+
|
187
|
+
private:
|
153
188
|
llm_graph_result_ptr graph_build(
|
154
189
|
lm_ggml_context * ctx,
|
155
190
|
lm_ggml_cgraph * gf,
|
156
191
|
const llama_ubatch & ubatch,
|
157
192
|
llm_graph_type gtype);
|
158
193
|
|
159
|
-
// returns the result of lm_ggml_backend_sched_graph_compute_async execution
|
160
|
-
lm_ggml_status graph_compute(
|
161
|
-
lm_ggml_cgraph * gf,
|
162
|
-
bool batched);
|
163
|
-
|
164
194
|
llm_graph_cb graph_get_cb() const;
|
165
195
|
|
166
|
-
// used by kv_self_update()
|
167
|
-
lm_ggml_tensor * build_rope_shift(
|
168
|
-
lm_ggml_context * ctx0,
|
169
|
-
lm_ggml_tensor * cur,
|
170
|
-
lm_ggml_tensor * shift,
|
171
|
-
lm_ggml_tensor * factors,
|
172
|
-
float freq_base,
|
173
|
-
float freq_scale,
|
174
|
-
lm_ggml_backend_buffer * bbuf) const;
|
175
|
-
|
176
|
-
llm_graph_result_ptr build_kv_self_shift(
|
177
|
-
lm_ggml_context * ctx0,
|
178
|
-
lm_ggml_cgraph * gf) const;
|
179
|
-
|
180
|
-
llm_graph_result_ptr build_kv_self_defrag(
|
181
|
-
lm_ggml_context * ctx0,
|
182
|
-
lm_ggml_cgraph * gf) const;
|
183
|
-
|
184
196
|
// TODO: read/write lora adapters and cvec
|
185
197
|
size_t state_write_data(llama_io_write_i & io);
|
186
198
|
size_t state_read_data (llama_io_read_i & io);
|
@@ -197,14 +209,10 @@ private:
|
|
197
209
|
llama_cparams cparams;
|
198
210
|
llama_adapter_cvec cvec;
|
199
211
|
llama_adapter_loras loras;
|
200
|
-
llama_sbatch sbatch;
|
201
212
|
|
202
213
|
llama_cross cross; // TODO: tmp for handling cross-attention - need something better probably
|
203
214
|
|
204
|
-
std::unique_ptr<
|
205
|
-
|
206
|
-
// TODO: remove
|
207
|
-
bool logits_all = false;
|
215
|
+
std::unique_ptr<llama_memory_i> memory;
|
208
216
|
|
209
217
|
// decode output (2-dimensional array: [n_outputs][n_vocab])
|
210
218
|
size_t logits_size = 0; // capacity (of floats) for logits
|
@@ -231,6 +239,9 @@ private:
|
|
231
239
|
|
232
240
|
lm_ggml_context_ptr ctx_compute;
|
233
241
|
|
242
|
+
// training
|
243
|
+
lm_ggml_opt_context_t opt_ctx = nullptr;
|
244
|
+
|
234
245
|
lm_ggml_threadpool_t threadpool = nullptr;
|
235
246
|
lm_ggml_threadpool_t threadpool_batch = nullptr;
|
236
247
|
|