cui-llama.rn 1.6.0 → 1.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +35 -7
- package/android/src/main/CMakeLists.txt +22 -11
- package/android/src/main/java/com/rnllama/LlamaContext.java +42 -6
- package/android/src/main/java/com/rnllama/RNLlama.java +139 -4
- package/android/src/main/jni.cpp +173 -18
- package/android/src/main/jniLibs/arm64-v8a/librnllama.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/librnllama_v8.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_dotprod.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_dotprod_i8mm.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_i8mm.so +0 -0
- package/android/src/main/jniLibs/x86_64/librnllama.so +0 -0
- package/android/src/main/jniLibs/x86_64/librnllama_x86_64.so +0 -0
- package/android/src/newarch/java/com/rnllama/RNLlamaModule.java +24 -4
- package/android/src/oldarch/java/com/rnllama/RNLlamaModule.java +22 -2
- package/cpp/LICENSE +21 -0
- package/cpp/chat.cpp +129 -107
- package/cpp/chat.h +2 -0
- package/cpp/common.cpp +58 -78
- package/cpp/common.h +29 -21
- package/cpp/ggml-alloc.c +4 -1
- package/cpp/ggml-backend.cpp +9 -5
- package/cpp/ggml-backend.h +4 -4
- package/cpp/ggml-cpp.h +1 -1
- package/cpp/ggml-cpu/amx/amx.cpp +221 -0
- package/cpp/ggml-cpu/amx/amx.h +8 -0
- package/cpp/ggml-cpu/amx/common.h +91 -0
- package/cpp/ggml-cpu/amx/mmq.cpp +2511 -0
- package/cpp/ggml-cpu/amx/mmq.h +10 -0
- package/{ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers → cpp/ggml-cpu}/binary-ops.h +1 -1
- package/cpp/ggml-cpu/common.h +72 -0
- package/cpp/{ggml-cpu-aarch64.cpp → ggml-cpu/ggml-cpu-aarch64.cpp} +809 -103
- package/cpp/{ggml-cpu-quants.c → ggml-cpu/ggml-cpu-quants.c} +306 -6
- package/cpp/{ggml-cpu.c → ggml-cpu/ggml-cpu.c} +114 -55
- package/cpp/{ggml-cpu.cpp → ggml-cpu/ggml-cpu.cpp} +32 -16
- package/cpp/{ops.cpp → ggml-cpu/ops.cpp} +353 -173
- package/{ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers → cpp/ggml-cpu}/ops.h +2 -20
- package/cpp/{sgemm.cpp → ggml-cpu/sgemm.cpp} +501 -0
- package/{ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers → cpp/ggml-cpu}/simd-mappings.h +7 -3
- package/{ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers → cpp/ggml-cpu}/unary-ops.h +1 -1
- package/cpp/{vec.cpp → ggml-cpu/vec.cpp} +0 -6
- package/{ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers → cpp/ggml-cpu}/vec.h +16 -0
- package/cpp/ggml-cpu.h +5 -0
- package/cpp/ggml-impl.h +16 -9
- package/cpp/ggml-llama-sim.metallib +0 -0
- package/cpp/ggml-llama.metallib +0 -0
- package/cpp/ggml-metal-impl.h +36 -11
- package/cpp/ggml-metal.m +810 -176
- package/cpp/ggml-opt.cpp +373 -190
- package/cpp/ggml-opt.h +49 -28
- package/cpp/ggml-quants.c +0 -6
- package/cpp/ggml.c +227 -282
- package/cpp/ggml.h +82 -101
- package/cpp/gguf.cpp +33 -33
- package/cpp/json-schema-to-grammar.cpp +3 -0
- package/cpp/llama-adapter.cpp +6 -0
- package/cpp/llama-arch.cpp +49 -17
- package/cpp/llama-arch.h +9 -0
- package/cpp/llama-batch.cpp +8 -2
- package/cpp/llama-batch.h +2 -1
- package/cpp/llama-chat.cpp +39 -16
- package/cpp/llama-chat.h +4 -2
- package/cpp/llama-context.cpp +440 -611
- package/cpp/llama-context.h +44 -33
- package/cpp/llama-cparams.h +1 -0
- package/cpp/llama-graph.cpp +214 -291
- package/cpp/llama-graph.h +69 -21
- package/cpp/llama-hparams.cpp +17 -1
- package/cpp/llama-hparams.h +39 -5
- package/cpp/llama-kv-cache.cpp +2067 -620
- package/cpp/llama-kv-cache.h +410 -108
- package/cpp/llama-memory.h +12 -1
- package/cpp/llama-model-loader.cpp +24 -15
- package/cpp/llama-model-saver.cpp +281 -0
- package/cpp/llama-model-saver.h +37 -0
- package/cpp/llama-model.cpp +1089 -359
- package/cpp/llama-model.h +19 -3
- package/cpp/llama-sampling.cpp +20 -7
- package/cpp/llama-vocab.cpp +54 -9
- package/cpp/llama-vocab.h +6 -0
- package/cpp/llama.cpp +14 -0
- package/cpp/llama.h +86 -142
- package/cpp/minja/chat-template.hpp +9 -5
- package/cpp/minja/minja.hpp +69 -36
- package/cpp/rn-llama.cpp +602 -190
- package/cpp/rn-llama.h +34 -8
- package/cpp/sampling.cpp +57 -50
- package/cpp/tools/mtmd/clip-impl.h +462 -0
- package/cpp/tools/mtmd/clip.cpp +4024 -0
- package/cpp/tools/mtmd/clip.h +101 -0
- package/cpp/tools/mtmd/miniaudio.h +93468 -0
- package/cpp/tools/mtmd/mtmd-audio.cpp +855 -0
- package/cpp/tools/mtmd/mtmd-audio.h +62 -0
- package/cpp/tools/mtmd/mtmd-helper.cpp +297 -0
- package/cpp/tools/mtmd/mtmd.cpp +942 -0
- package/cpp/tools/mtmd/mtmd.h +362 -0
- package/cpp/tools/mtmd/stb_image.h +7988 -0
- package/ios/CMakeLists.txt +20 -10
- package/ios/RNLlama.h +6 -0
- package/ios/RNLlama.mm +82 -3
- package/ios/RNLlamaContext.h +5 -1
- package/ios/RNLlamaContext.mm +131 -38
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/chat.h +2 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/common.h +29 -21
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-backend.h +4 -4
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-cpp.h +1 -1
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-cpu.h +5 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-impl.h +16 -9
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-metal-impl.h +36 -11
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-opt.h +49 -28
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml.h +82 -101
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-arch.h +9 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-batch.h +2 -1
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-chat.h +4 -2
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-context.h +44 -33
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-cparams.h +1 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-graph.h +69 -21
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-hparams.h +39 -5
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-kv-cache.h +410 -108
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-memory.h +12 -1
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-model-saver.h +37 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-model.h +19 -3
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-vocab.h +6 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama.h +86 -142
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/minja/chat-template.hpp +9 -5
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/minja/minja.hpp +69 -36
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/rn-llama.h +34 -8
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Info.plist +0 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/ggml-llama.metallib +0 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/rnllama +0 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/chat.h +2 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/common.h +29 -21
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-backend.h +4 -4
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpp.h +1 -1
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu.h +5 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-impl.h +16 -9
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-metal-impl.h +36 -11
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-opt.h +49 -28
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml.h +82 -101
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-arch.h +9 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-batch.h +2 -1
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-chat.h +4 -2
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-context.h +44 -33
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-cparams.h +1 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-graph.h +69 -21
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-hparams.h +39 -5
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cache.h +410 -108
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-memory.h +12 -1
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-model-saver.h +37 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-model.h +19 -3
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-vocab.h +6 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama.h +86 -142
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/minja/chat-template.hpp +9 -5
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/minja/minja.hpp +69 -36
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/rn-llama.h +34 -8
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Info.plist +0 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/_CodeSignature/CodeResources +1 -1
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/ggml-llama-sim.metallib +0 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/rnllama +0 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/chat.h +2 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/common.h +29 -21
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-backend.h +4 -4
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-cpp.h +1 -1
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-cpu.h +5 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-impl.h +16 -9
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-metal-impl.h +36 -11
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-opt.h +49 -28
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml.h +82 -101
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-arch.h +9 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-batch.h +2 -1
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-chat.h +4 -2
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-context.h +44 -33
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-cparams.h +1 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-graph.h +69 -21
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-hparams.h +39 -5
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-kv-cache.h +410 -108
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-memory.h +12 -1
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-model-saver.h +37 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-model.h +19 -3
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-vocab.h +6 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama.h +86 -142
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/minja/chat-template.hpp +9 -5
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/minja/minja.hpp +69 -36
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/rn-llama.h +34 -8
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Info.plist +0 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/ggml-llama.metallib +0 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/rnllama +0 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/chat.h +2 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/common.h +29 -21
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-backend.h +4 -4
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpp.h +1 -1
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu.h +5 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-impl.h +16 -9
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-metal-impl.h +36 -11
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-opt.h +49 -28
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml.h +82 -101
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-arch.h +9 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-batch.h +2 -1
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-chat.h +4 -2
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-context.h +44 -33
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-cparams.h +1 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-graph.h +69 -21
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-hparams.h +39 -5
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cache.h +410 -108
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-memory.h +12 -1
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-model-saver.h +37 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-model.h +19 -3
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-vocab.h +6 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama.h +86 -142
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/minja/chat-template.hpp +9 -5
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/minja/minja.hpp +69 -36
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/rn-llama.h +34 -8
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Info.plist +0 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/_CodeSignature/CodeResources +1 -1
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/ggml-llama-sim.metallib +0 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/rnllama +0 -0
- package/jest/mock.js +33 -7
- package/lib/commonjs/NativeRNLlama.js.map +1 -1
- package/lib/commonjs/index.js +153 -21
- package/lib/commonjs/index.js.map +1 -1
- package/lib/module/NativeRNLlama.js.map +1 -1
- package/lib/module/index.js +152 -20
- package/lib/module/index.js.map +1 -1
- package/lib/typescript/NativeRNLlama.d.ts +54 -4
- package/lib/typescript/NativeRNLlama.d.ts.map +1 -1
- package/lib/typescript/index.d.ts +72 -6
- package/lib/typescript/index.d.ts.map +1 -1
- package/package.json +1 -1
- package/src/NativeRNLlama.ts +72 -4
- package/src/index.ts +212 -38
- package/cpp/binary-ops.h +0 -16
- package/cpp/ops.h +0 -128
- package/cpp/simd-mappings.h +0 -888
- package/cpp/unary-ops.h +0 -28
- package/cpp/vec.h +0 -802
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/binary-ops.h +0 -16
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-cpu-aarch64.h +0 -8
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-cpu-impl.h +0 -512
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-cpu-quants.h +0 -63
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-cpu-traits.h +0 -38
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ops.h +0 -128
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/sgemm.h +0 -14
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/simd-mappings.h +0 -888
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/vec.h +0 -802
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu-aarch64.h +0 -8
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu-impl.h +0 -512
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu-quants.h +0 -63
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu-traits.h +0 -38
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/sgemm.h +0 -14
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/unary-ops.h +0 -28
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/vec.h +0 -802
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/binary-ops.h +0 -16
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-cpu-aarch64.h +0 -8
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-cpu-impl.h +0 -512
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-cpu-quants.h +0 -63
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-cpu-traits.h +0 -38
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ops.h +0 -128
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/sgemm.h +0 -14
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/simd-mappings.h +0 -888
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/unary-ops.h +0 -28
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/binary-ops.h +0 -16
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu-aarch64.h +0 -8
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu-impl.h +0 -512
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu-quants.h +0 -63
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu-traits.h +0 -38
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ops.h +0 -128
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/sgemm.h +0 -14
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/simd-mappings.h +0 -888
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/unary-ops.h +0 -28
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/vec.h +0 -802
- package/lib/commonjs/chat.js +0 -37
- package/lib/commonjs/chat.js.map +0 -1
- package/lib/module/chat.js +0 -33
- package/lib/module/chat.js.map +0 -1
- package/lib/typescript/chat.d.ts +0 -10
- package/lib/typescript/chat.d.ts.map +0 -1
- package/src/chat.ts +0 -44
- /package/cpp/{binary-ops.cpp → ggml-cpu/binary-ops.cpp} +0 -0
- /package/cpp/{ggml-cpu-aarch64.h → ggml-cpu/ggml-cpu-aarch64.h} +0 -0
- /package/cpp/{ggml-cpu-impl.h → ggml-cpu/ggml-cpu-impl.h} +0 -0
- /package/cpp/{ggml-cpu-quants.h → ggml-cpu/ggml-cpu-quants.h} +0 -0
- /package/cpp/{ggml-cpu-traits.cpp → ggml-cpu/ggml-cpu-traits.cpp} +0 -0
- /package/cpp/{ggml-cpu-traits.h → ggml-cpu/ggml-cpu-traits.h} +0 -0
- /package/cpp/{sgemm.h → ggml-cpu/sgemm.h} +0 -0
- /package/cpp/{unary-ops.cpp → ggml-cpu/unary-ops.cpp} +0 -0
package/cpp/ggml.h
CHANGED
@@ -394,8 +394,8 @@ extern "C" {
|
|
394
394
|
|
395
395
|
// precision
|
396
396
|
enum lm_ggml_prec {
|
397
|
-
LM_GGML_PREC_DEFAULT,
|
398
|
-
LM_GGML_PREC_F32,
|
397
|
+
LM_GGML_PREC_DEFAULT = 0, // stored as lm_ggml_tensor.op_params, 0 by default
|
398
|
+
LM_GGML_PREC_F32 = 10,
|
399
399
|
};
|
400
400
|
|
401
401
|
// model file types
|
@@ -482,6 +482,7 @@ extern "C" {
|
|
482
482
|
LM_GGML_OP_CONV_TRANSPOSE_1D,
|
483
483
|
LM_GGML_OP_IM2COL,
|
484
484
|
LM_GGML_OP_IM2COL_BACK,
|
485
|
+
LM_GGML_OP_CONV_2D_DW,
|
485
486
|
LM_GGML_OP_CONV_TRANSPOSE_2D,
|
486
487
|
LM_GGML_OP_POOL_1D,
|
487
488
|
LM_GGML_OP_POOL_2D,
|
@@ -508,17 +509,12 @@ extern "C" {
|
|
508
509
|
|
509
510
|
LM_GGML_OP_UNARY,
|
510
511
|
|
511
|
-
LM_GGML_OP_MAP_UNARY,
|
512
|
-
LM_GGML_OP_MAP_BINARY,
|
513
|
-
|
514
|
-
LM_GGML_OP_MAP_CUSTOM1_F32,
|
515
|
-
LM_GGML_OP_MAP_CUSTOM2_F32,
|
516
|
-
LM_GGML_OP_MAP_CUSTOM3_F32,
|
517
|
-
|
518
512
|
LM_GGML_OP_MAP_CUSTOM1,
|
519
513
|
LM_GGML_OP_MAP_CUSTOM2,
|
520
514
|
LM_GGML_OP_MAP_CUSTOM3,
|
521
515
|
|
516
|
+
LM_GGML_OP_CUSTOM,
|
517
|
+
|
522
518
|
LM_GGML_OP_CROSS_ENTROPY_LOSS,
|
523
519
|
LM_GGML_OP_CROSS_ENTROPY_LOSS_BACK,
|
524
520
|
LM_GGML_OP_OPT_STEP_ADAMW,
|
@@ -541,6 +537,7 @@ extern "C" {
|
|
541
537
|
LM_GGML_UNARY_OP_HARDSWISH,
|
542
538
|
LM_GGML_UNARY_OP_HARDSIGMOID,
|
543
539
|
LM_GGML_UNARY_OP_EXP,
|
540
|
+
LM_GGML_UNARY_OP_GELU_ERF,
|
544
541
|
|
545
542
|
LM_GGML_UNARY_OP_COUNT,
|
546
543
|
};
|
@@ -678,11 +675,18 @@ extern "C" {
|
|
678
675
|
LM_GGML_API bool lm_ggml_is_3d (const struct lm_ggml_tensor * tensor);
|
679
676
|
LM_GGML_API int lm_ggml_n_dims (const struct lm_ggml_tensor * tensor); // returns 1 for scalars
|
680
677
|
|
678
|
+
// returns whether the tensor elements can be iterated over with a flattened index (no gaps, no permutation)
|
681
679
|
LM_GGML_API bool lm_ggml_is_contiguous (const struct lm_ggml_tensor * tensor);
|
682
680
|
LM_GGML_API bool lm_ggml_is_contiguous_0(const struct lm_ggml_tensor * tensor); // same as lm_ggml_is_contiguous()
|
683
681
|
LM_GGML_API bool lm_ggml_is_contiguous_1(const struct lm_ggml_tensor * tensor); // contiguous for dims >= 1
|
684
682
|
LM_GGML_API bool lm_ggml_is_contiguous_2(const struct lm_ggml_tensor * tensor); // contiguous for dims >= 2
|
685
683
|
|
684
|
+
// returns whether the tensor elements are allocated as one contiguous block of memory (no gaps, but permutation ok)
|
685
|
+
LM_GGML_API bool lm_ggml_is_contiguously_allocated(const struct lm_ggml_tensor * tensor);
|
686
|
+
|
687
|
+
// true for tensor that is stored in memory as CxWxHxN and has been permuted to WxHxCxN
|
688
|
+
LM_GGML_API bool lm_ggml_is_contiguous_channels(const struct lm_ggml_tensor * tensor);
|
689
|
+
|
686
690
|
LM_GGML_API bool lm_ggml_are_same_shape (const struct lm_ggml_tensor * t0, const struct lm_ggml_tensor * t1);
|
687
691
|
LM_GGML_API bool lm_ggml_are_same_stride(const struct lm_ggml_tensor * t0, const struct lm_ggml_tensor * t1);
|
688
692
|
|
@@ -766,7 +770,7 @@ extern "C" {
|
|
766
770
|
// Tensor flags
|
767
771
|
LM_GGML_API void lm_ggml_set_input(struct lm_ggml_tensor * tensor);
|
768
772
|
LM_GGML_API void lm_ggml_set_output(struct lm_ggml_tensor * tensor);
|
769
|
-
LM_GGML_API void lm_ggml_set_param(struct
|
773
|
+
LM_GGML_API void lm_ggml_set_param(struct lm_ggml_tensor * tensor);
|
770
774
|
LM_GGML_API void lm_ggml_set_loss(struct lm_ggml_tensor * tensor);
|
771
775
|
|
772
776
|
//
|
@@ -936,7 +940,7 @@ extern "C" {
|
|
936
940
|
LM_GGML_API struct lm_ggml_tensor * lm_ggml_repeat_back(
|
937
941
|
struct lm_ggml_context * ctx,
|
938
942
|
struct lm_ggml_tensor * a,
|
939
|
-
struct lm_ggml_tensor * b);
|
943
|
+
struct lm_ggml_tensor * b); // sum up values that are adjacent in dims > 0 instead of repeated with same stride
|
940
944
|
|
941
945
|
// concat a and b along dim
|
942
946
|
// used in stable-diffusion
|
@@ -1022,6 +1026,16 @@ extern "C" {
|
|
1022
1026
|
struct lm_ggml_context * ctx,
|
1023
1027
|
struct lm_ggml_tensor * a);
|
1024
1028
|
|
1029
|
+
// GELU using erf (error function) when possible
|
1030
|
+
// some backends may fallback to approximation based on Abramowitz and Stegun formula
|
1031
|
+
LM_GGML_API struct lm_ggml_tensor * lm_ggml_gelu_erf(
|
1032
|
+
struct lm_ggml_context * ctx,
|
1033
|
+
struct lm_ggml_tensor * a);
|
1034
|
+
|
1035
|
+
LM_GGML_API struct lm_ggml_tensor * lm_ggml_gelu_erf_inplace(
|
1036
|
+
struct lm_ggml_context * ctx,
|
1037
|
+
struct lm_ggml_tensor * a);
|
1038
|
+
|
1025
1039
|
LM_GGML_API struct lm_ggml_tensor * lm_ggml_gelu_quick(
|
1026
1040
|
struct lm_ggml_context * ctx,
|
1027
1041
|
struct lm_ggml_tensor * a);
|
@@ -1666,7 +1680,7 @@ extern "C" {
|
|
1666
1680
|
struct lm_ggml_tensor * a,
|
1667
1681
|
struct lm_ggml_tensor * b);
|
1668
1682
|
|
1669
|
-
// depthwise
|
1683
|
+
// depthwise (via im2col and mul_mat)
|
1670
1684
|
LM_GGML_API struct lm_ggml_tensor * lm_ggml_conv_2d_dw(
|
1671
1685
|
struct lm_ggml_context * ctx,
|
1672
1686
|
struct lm_ggml_tensor * a, // convolution kernel
|
@@ -1678,6 +1692,22 @@ extern "C" {
|
|
1678
1692
|
int d0, // dilation dimension 0
|
1679
1693
|
int d1); // dilation dimension 1
|
1680
1694
|
|
1695
|
+
// Depthwise 2D convolution
|
1696
|
+
// may be faster than lm_ggml_conv_2d_dw, but not available in all backends
|
1697
|
+
// a: KW KH 1 C convolution kernel
|
1698
|
+
// b: W H C N input data
|
1699
|
+
// res: W_out H_out C N
|
1700
|
+
LM_GGML_API struct lm_ggml_tensor * lm_ggml_conv_2d_dw_direct(
|
1701
|
+
struct lm_ggml_context * ctx,
|
1702
|
+
struct lm_ggml_tensor * a,
|
1703
|
+
struct lm_ggml_tensor * b,
|
1704
|
+
int stride0,
|
1705
|
+
int stride1,
|
1706
|
+
int pad0,
|
1707
|
+
int pad1,
|
1708
|
+
int dilation0,
|
1709
|
+
int dilation1);
|
1710
|
+
|
1681
1711
|
LM_GGML_API struct lm_ggml_tensor * lm_ggml_conv_transpose_2d_p0(
|
1682
1712
|
struct lm_ggml_context * ctx,
|
1683
1713
|
struct lm_ggml_tensor * a,
|
@@ -1723,24 +1753,29 @@ extern "C" {
|
|
1723
1753
|
float p0,
|
1724
1754
|
float p1);
|
1725
1755
|
|
1726
|
-
|
1756
|
+
enum lm_ggml_scale_mode {
|
1757
|
+
LM_GGML_SCALE_MODE_NEAREST = 0,
|
1758
|
+
LM_GGML_SCALE_MODE_BILINEAR = 1,
|
1759
|
+
};
|
1760
|
+
|
1761
|
+
// interpolate
|
1727
1762
|
// multiplies ne0 and ne1 by scale factor
|
1728
|
-
// used in stable-diffusion
|
1729
1763
|
LM_GGML_API struct lm_ggml_tensor * lm_ggml_upscale(
|
1730
1764
|
struct lm_ggml_context * ctx,
|
1731
1765
|
struct lm_ggml_tensor * a,
|
1732
|
-
int scale_factor
|
1766
|
+
int scale_factor,
|
1767
|
+
enum lm_ggml_scale_mode mode);
|
1733
1768
|
|
1734
|
-
//
|
1735
|
-
//
|
1736
|
-
// used in tortoise.cpp
|
1769
|
+
// interpolate
|
1770
|
+
// interpolate scale to specified dimensions
|
1737
1771
|
LM_GGML_API struct lm_ggml_tensor * lm_ggml_upscale_ext(
|
1738
1772
|
struct lm_ggml_context * ctx,
|
1739
1773
|
struct lm_ggml_tensor * a,
|
1740
1774
|
int ne0,
|
1741
1775
|
int ne1,
|
1742
1776
|
int ne2,
|
1743
|
-
int ne3
|
1777
|
+
int ne3,
|
1778
|
+
enum lm_ggml_scale_mode mode);
|
1744
1779
|
|
1745
1780
|
// pad each dimension with zeros: [x, ..., x] -> [x, ..., x, 0, ..., 0]
|
1746
1781
|
LM_GGML_API struct lm_ggml_tensor * lm_ggml_pad(
|
@@ -1917,83 +1952,6 @@ extern "C" {
|
|
1917
1952
|
|
1918
1953
|
// custom operators
|
1919
1954
|
|
1920
|
-
typedef void (*lm_ggml_unary_op_f32_t) (const int, float *, const float *);
|
1921
|
-
typedef void (*lm_ggml_binary_op_f32_t)(const int, float *, const float *, const float *);
|
1922
|
-
|
1923
|
-
typedef void (*lm_ggml_custom1_op_f32_t)(struct lm_ggml_tensor *, const struct lm_ggml_tensor *);
|
1924
|
-
typedef void (*lm_ggml_custom2_op_f32_t)(struct lm_ggml_tensor *, const struct lm_ggml_tensor *, const struct lm_ggml_tensor *);
|
1925
|
-
typedef void (*lm_ggml_custom3_op_f32_t)(struct lm_ggml_tensor *, const struct lm_ggml_tensor *, const struct lm_ggml_tensor *, const struct lm_ggml_tensor *);
|
1926
|
-
|
1927
|
-
LM_GGML_DEPRECATED(LM_GGML_API struct lm_ggml_tensor * lm_ggml_map_unary_f32(
|
1928
|
-
struct lm_ggml_context * ctx,
|
1929
|
-
struct lm_ggml_tensor * a,
|
1930
|
-
lm_ggml_unary_op_f32_t fun),
|
1931
|
-
"use lm_ggml_map_custom1 instead");
|
1932
|
-
|
1933
|
-
LM_GGML_DEPRECATED(LM_GGML_API struct lm_ggml_tensor * lm_ggml_map_unary_inplace_f32(
|
1934
|
-
struct lm_ggml_context * ctx,
|
1935
|
-
struct lm_ggml_tensor * a,
|
1936
|
-
lm_ggml_unary_op_f32_t fun),
|
1937
|
-
"use lm_ggml_map_custom1_inplace instead");
|
1938
|
-
|
1939
|
-
LM_GGML_DEPRECATED(LM_GGML_API struct lm_ggml_tensor * lm_ggml_map_binary_f32(
|
1940
|
-
struct lm_ggml_context * ctx,
|
1941
|
-
struct lm_ggml_tensor * a,
|
1942
|
-
struct lm_ggml_tensor * b,
|
1943
|
-
lm_ggml_binary_op_f32_t fun),
|
1944
|
-
"use lm_ggml_map_custom2 instead");
|
1945
|
-
|
1946
|
-
LM_GGML_DEPRECATED(LM_GGML_API struct lm_ggml_tensor * lm_ggml_map_binary_inplace_f32(
|
1947
|
-
struct lm_ggml_context * ctx,
|
1948
|
-
struct lm_ggml_tensor * a,
|
1949
|
-
struct lm_ggml_tensor * b,
|
1950
|
-
lm_ggml_binary_op_f32_t fun),
|
1951
|
-
"use lm_ggml_map_custom2_inplace instead");
|
1952
|
-
|
1953
|
-
LM_GGML_DEPRECATED(LM_GGML_API struct lm_ggml_tensor * lm_ggml_map_custom1_f32(
|
1954
|
-
struct lm_ggml_context * ctx,
|
1955
|
-
struct lm_ggml_tensor * a,
|
1956
|
-
lm_ggml_custom1_op_f32_t fun),
|
1957
|
-
"use lm_ggml_map_custom1 instead");
|
1958
|
-
|
1959
|
-
LM_GGML_DEPRECATED(LM_GGML_API struct lm_ggml_tensor * lm_ggml_map_custom1_inplace_f32(
|
1960
|
-
struct lm_ggml_context * ctx,
|
1961
|
-
struct lm_ggml_tensor * a,
|
1962
|
-
lm_ggml_custom1_op_f32_t fun),
|
1963
|
-
"use lm_ggml_map_custom1_inplace instead");
|
1964
|
-
|
1965
|
-
LM_GGML_DEPRECATED(LM_GGML_API struct lm_ggml_tensor * lm_ggml_map_custom2_f32(
|
1966
|
-
struct lm_ggml_context * ctx,
|
1967
|
-
struct lm_ggml_tensor * a,
|
1968
|
-
struct lm_ggml_tensor * b,
|
1969
|
-
lm_ggml_custom2_op_f32_t fun),
|
1970
|
-
"use lm_ggml_map_custom2 instead");
|
1971
|
-
|
1972
|
-
LM_GGML_DEPRECATED(LM_GGML_API struct lm_ggml_tensor * lm_ggml_map_custom2_inplace_f32(
|
1973
|
-
struct lm_ggml_context * ctx,
|
1974
|
-
struct lm_ggml_tensor * a,
|
1975
|
-
struct lm_ggml_tensor * b,
|
1976
|
-
lm_ggml_custom2_op_f32_t fun),
|
1977
|
-
"use lm_ggml_map_custom2_inplace instead");
|
1978
|
-
|
1979
|
-
LM_GGML_DEPRECATED(LM_GGML_API struct lm_ggml_tensor * lm_ggml_map_custom3_f32(
|
1980
|
-
struct lm_ggml_context * ctx,
|
1981
|
-
struct lm_ggml_tensor * a,
|
1982
|
-
struct lm_ggml_tensor * b,
|
1983
|
-
struct lm_ggml_tensor * c,
|
1984
|
-
lm_ggml_custom3_op_f32_t fun),
|
1985
|
-
"use lm_ggml_map_custom3 instead");
|
1986
|
-
|
1987
|
-
LM_GGML_DEPRECATED(LM_GGML_API struct lm_ggml_tensor * lm_ggml_map_custom3_inplace_f32(
|
1988
|
-
struct lm_ggml_context * ctx,
|
1989
|
-
struct lm_ggml_tensor * a,
|
1990
|
-
struct lm_ggml_tensor * b,
|
1991
|
-
struct lm_ggml_tensor * c,
|
1992
|
-
lm_ggml_custom3_op_f32_t fun),
|
1993
|
-
"use lm_ggml_map_custom3_inplace instead");
|
1994
|
-
|
1995
|
-
// custom operators v2
|
1996
|
-
|
1997
1955
|
typedef void (*lm_ggml_custom1_op_t)(struct lm_ggml_tensor * dst , const struct lm_ggml_tensor * a, int ith, int nth, void * userdata);
|
1998
1956
|
typedef void (*lm_ggml_custom2_op_t)(struct lm_ggml_tensor * dst , const struct lm_ggml_tensor * a, const struct lm_ggml_tensor * b, int ith, int nth, void * userdata);
|
1999
1957
|
typedef void (*lm_ggml_custom3_op_t)(struct lm_ggml_tensor * dst , const struct lm_ggml_tensor * a, const struct lm_ggml_tensor * b, const struct lm_ggml_tensor * c, int ith, int nth, void * userdata);
|
@@ -2049,6 +2007,30 @@ extern "C" {
|
|
2049
2007
|
int n_tasks,
|
2050
2008
|
void * userdata);
|
2051
2009
|
|
2010
|
+
typedef void (*lm_ggml_custom_op_t)(struct lm_ggml_tensor * dst , int ith, int nth, void * userdata);
|
2011
|
+
|
2012
|
+
LM_GGML_API struct lm_ggml_tensor * lm_ggml_custom_4d(
|
2013
|
+
struct lm_ggml_context * ctx,
|
2014
|
+
enum lm_ggml_type type,
|
2015
|
+
int64_t ne0,
|
2016
|
+
int64_t ne1,
|
2017
|
+
int64_t ne2,
|
2018
|
+
int64_t ne3,
|
2019
|
+
struct lm_ggml_tensor ** args,
|
2020
|
+
int n_args,
|
2021
|
+
lm_ggml_custom_op_t fun,
|
2022
|
+
int n_tasks,
|
2023
|
+
void * userdata);
|
2024
|
+
|
2025
|
+
LM_GGML_API struct lm_ggml_tensor * lm_ggml_custom_inplace(
|
2026
|
+
struct lm_ggml_context * ctx,
|
2027
|
+
struct lm_ggml_tensor * a,
|
2028
|
+
struct lm_ggml_tensor ** args,
|
2029
|
+
int n_args,
|
2030
|
+
lm_ggml_custom_op_t fun,
|
2031
|
+
int n_tasks,
|
2032
|
+
void * userdata);
|
2033
|
+
|
2052
2034
|
// loss function
|
2053
2035
|
|
2054
2036
|
LM_GGML_API struct lm_ggml_tensor * lm_ggml_cross_entropy_loss(
|
@@ -2079,15 +2061,14 @@ extern "C" {
|
|
2079
2061
|
|
2080
2062
|
LM_GGML_API void lm_ggml_build_forward_expand(struct lm_ggml_cgraph * cgraph, struct lm_ggml_tensor * tensor);
|
2081
2063
|
LM_GGML_API void lm_ggml_build_backward_expand(
|
2082
|
-
struct lm_ggml_context *
|
2083
|
-
struct
|
2084
|
-
struct
|
2085
|
-
bool accumulate); // whether or not gradients should be accumulated, requires static allocation of tensors in ctx_static
|
2064
|
+
struct lm_ggml_context * ctx, // context for gradient computation
|
2065
|
+
struct lm_ggml_cgraph * cgraph,
|
2066
|
+
struct lm_ggml_tensor ** grad_accs);
|
2086
2067
|
|
2087
2068
|
// graph allocation in a context
|
2088
2069
|
LM_GGML_API struct lm_ggml_cgraph * lm_ggml_new_graph (struct lm_ggml_context * ctx); // size = LM_GGML_DEFAULT_GRAPH_SIZE, grads = false
|
2089
2070
|
LM_GGML_API struct lm_ggml_cgraph * lm_ggml_new_graph_custom(struct lm_ggml_context * ctx, size_t size, bool grads);
|
2090
|
-
LM_GGML_API struct lm_ggml_cgraph * lm_ggml_graph_dup (struct lm_ggml_context * ctx, struct lm_ggml_cgraph * cgraph);
|
2071
|
+
LM_GGML_API struct lm_ggml_cgraph * lm_ggml_graph_dup (struct lm_ggml_context * ctx, struct lm_ggml_cgraph * cgraph, bool force_grads);
|
2091
2072
|
LM_GGML_API void lm_ggml_graph_cpy (struct lm_ggml_cgraph * src, struct lm_ggml_cgraph * dst);
|
2092
2073
|
LM_GGML_API void lm_ggml_graph_reset (struct lm_ggml_cgraph * cgraph); // set regular grads + optimizer momenta to 0, set loss grad to 1
|
2093
2074
|
LM_GGML_API void lm_ggml_graph_clear (struct lm_ggml_cgraph * cgraph);
|
package/cpp/gguf.cpp
CHANGED
@@ -299,10 +299,10 @@ bool lm_gguf_read_emplace_helper(const struct lm_gguf_reader & gr, std::vector<s
|
|
299
299
|
return false;
|
300
300
|
}
|
301
301
|
} catch (std::length_error &) {
|
302
|
-
|
302
|
+
LM_GGML_LOG_ERROR("%s: encountered length_error while reading value for key '%s'\n", __func__, key.c_str());
|
303
303
|
return false;
|
304
304
|
} catch (std::bad_alloc &) {
|
305
|
-
|
305
|
+
LM_GGML_LOG_ERROR("%s: encountered bad_alloc error while reading value for key '%s'\n", __func__, key.c_str());
|
306
306
|
return false;
|
307
307
|
}
|
308
308
|
kv.emplace_back(key, value);
|
@@ -328,14 +328,14 @@ struct lm_gguf_context * lm_gguf_init_from_file_impl(FILE * file, struct lm_gguf
|
|
328
328
|
ok = ok && gr.read(magic, 4);
|
329
329
|
|
330
330
|
if (!ok) {
|
331
|
-
|
331
|
+
LM_GGML_LOG_ERROR("%s: failed to read magic\n", __func__);
|
332
332
|
lm_gguf_free(ctx);
|
333
333
|
return nullptr;
|
334
334
|
}
|
335
335
|
|
336
336
|
for (uint32_t i = 0; i < magic.size(); i++) {
|
337
337
|
if (magic[i] != LM_GGUF_MAGIC[i]) {
|
338
|
-
|
338
|
+
LM_GGML_LOG_ERROR("%s: invalid magic characters: '%c%c%c%c', expected 'GGUF'\n", __func__, magic[0], magic[1], magic[2], magic[3]);
|
339
339
|
lm_gguf_free(ctx);
|
340
340
|
return nullptr;
|
341
341
|
}
|
@@ -348,11 +348,11 @@ struct lm_gguf_context * lm_gguf_init_from_file_impl(FILE * file, struct lm_gguf
|
|
348
348
|
|
349
349
|
if (ok && gr.read(ctx->version)) {
|
350
350
|
if (ctx->version == 1) {
|
351
|
-
|
351
|
+
LM_GGML_LOG_ERROR("%s: GGUFv1 is no longer supported, please use a more up-to-date version\n", __func__);
|
352
352
|
ok = false;
|
353
353
|
}
|
354
354
|
if (ctx->version > LM_GGUF_VERSION) {
|
355
|
-
|
355
|
+
LM_GGML_LOG_ERROR("%s: this GGUF file is version %" PRIu32 " but this software only supports up to version %d\n",
|
356
356
|
__func__, ctx->version, LM_GGUF_VERSION);
|
357
357
|
ok = false;
|
358
358
|
}
|
@@ -363,7 +363,7 @@ struct lm_gguf_context * lm_gguf_init_from_file_impl(FILE * file, struct lm_gguf
|
|
363
363
|
if (ok && gr.read(n_tensors)) {
|
364
364
|
static_assert(sizeof(size_t) <= 8 && sizeof(lm_gguf_tensor_info) >= 2, "int64_t insufficient for indexing");
|
365
365
|
if (n_tensors < 0 || n_tensors > int64_t(SIZE_MAX/sizeof(lm_gguf_tensor_info))) {
|
366
|
-
|
366
|
+
LM_GGML_LOG_ERROR("%s: number of tensors is %" PRIi64 " but must be in [0, %zu]\n",
|
367
367
|
__func__, n_tensors, SIZE_MAX/sizeof(lm_gguf_tensor_info));
|
368
368
|
ok = false;
|
369
369
|
}
|
@@ -374,7 +374,7 @@ struct lm_gguf_context * lm_gguf_init_from_file_impl(FILE * file, struct lm_gguf
|
|
374
374
|
if (ok && gr.read(n_kv)) {
|
375
375
|
static_assert(sizeof(size_t) <= 8 && sizeof(lm_gguf_tensor_info) >= 2, "int64_t insufficient for indexing");
|
376
376
|
if (n_kv < 0 || n_kv > int64_t(SIZE_MAX/sizeof(lm_gguf_kv))) {
|
377
|
-
|
377
|
+
LM_GGML_LOG_ERROR("%s: number of key value pairs is %" PRIi64 " but must be in [0, %zu]\n",
|
378
378
|
__func__, n_kv, SIZE_MAX/sizeof(lm_gguf_kv));
|
379
379
|
ok = false;
|
380
380
|
}
|
@@ -383,7 +383,7 @@ struct lm_gguf_context * lm_gguf_init_from_file_impl(FILE * file, struct lm_gguf
|
|
383
383
|
}
|
384
384
|
|
385
385
|
if (!ok) {
|
386
|
-
|
386
|
+
LM_GGML_LOG_ERROR("%s: failed to read header\n", __func__);
|
387
387
|
lm_gguf_free(ctx);
|
388
388
|
return nullptr;
|
389
389
|
}
|
@@ -399,15 +399,15 @@ struct lm_gguf_context * lm_gguf_init_from_file_impl(FILE * file, struct lm_gguf
|
|
399
399
|
try {
|
400
400
|
ok = ok && gr.read(key);
|
401
401
|
} catch (std::length_error &) {
|
402
|
-
|
402
|
+
LM_GGML_LOG_ERROR("%s: encountered length_error while reading key %" PRIi64 "\n", __func__, i);
|
403
403
|
ok = false;
|
404
404
|
} catch (std::bad_alloc &) {
|
405
|
-
|
405
|
+
LM_GGML_LOG_ERROR("%s: encountered bad_alloc error while reading key %" PRIi64 "\n", __func__, i);
|
406
406
|
ok = false;
|
407
407
|
}
|
408
408
|
for (size_t j = 0; ok && j < ctx->kv.size(); ++j) {
|
409
409
|
if (key == ctx->kv[j].key) {
|
410
|
-
|
410
|
+
LM_GGML_LOG_ERROR("%s: duplicate key '%s' for tensors %zu and %" PRIi64 " \n", __func__, key.c_str(), j, i);
|
411
411
|
ok = false;
|
412
412
|
}
|
413
413
|
}
|
@@ -441,14 +441,14 @@ struct lm_gguf_context * lm_gguf_init_from_file_impl(FILE * file, struct lm_gguf
|
|
441
441
|
case LM_GGUF_TYPE_ARRAY:
|
442
442
|
default:
|
443
443
|
{
|
444
|
-
|
444
|
+
LM_GGML_LOG_ERROR("%s: key '%s' has invalid GGUF type %d\n", __func__, key.c_str(), type);
|
445
445
|
ok = false;
|
446
446
|
} break;
|
447
447
|
}
|
448
448
|
}
|
449
449
|
|
450
450
|
if (!ok) {
|
451
|
-
|
451
|
+
LM_GGML_LOG_ERROR("%s: failed to read key-value pairs\n", __func__);
|
452
452
|
lm_gguf_free(ctx);
|
453
453
|
return nullptr;
|
454
454
|
}
|
@@ -458,7 +458,7 @@ struct lm_gguf_context * lm_gguf_init_from_file_impl(FILE * file, struct lm_gguf
|
|
458
458
|
ctx->alignment = alignment_idx == -1 ? LM_GGUF_DEFAULT_ALIGNMENT : lm_gguf_get_val_u32(ctx, alignment_idx);
|
459
459
|
|
460
460
|
if (ctx->alignment == 0 || (ctx->alignment & (ctx->alignment - 1)) != 0) {
|
461
|
-
|
461
|
+
LM_GGML_LOG_ERROR("%s: alignment %zu is not a power of 2\n", __func__, ctx->alignment);
|
462
462
|
lm_gguf_free(ctx);
|
463
463
|
return nullptr;
|
464
464
|
}
|
@@ -474,14 +474,14 @@ struct lm_gguf_context * lm_gguf_init_from_file_impl(FILE * file, struct lm_gguf
|
|
474
474
|
try {
|
475
475
|
ok = ok && gr.read(name);
|
476
476
|
} catch (std::length_error &) {
|
477
|
-
|
477
|
+
LM_GGML_LOG_ERROR("%s: encountered length_error while reading tensor name %" PRIi64 "\n", __func__, i);
|
478
478
|
ok = false;
|
479
479
|
} catch (std::bad_alloc &) {
|
480
|
-
|
480
|
+
LM_GGML_LOG_ERROR("%s: encountered bad_alloc error while reading tensor name %" PRIi64 "\n", __func__, i);
|
481
481
|
ok = false;
|
482
482
|
}
|
483
483
|
if (name.length() >= LM_GGML_MAX_NAME) {
|
484
|
-
|
484
|
+
LM_GGML_LOG_ERROR("%s: tensor name %" PRIi64 " is too long: %zu >= %d\n", __func__, i, name.length(), LM_GGML_MAX_NAME);
|
485
485
|
ok = false;
|
486
486
|
break;
|
487
487
|
}
|
@@ -490,7 +490,7 @@ struct lm_gguf_context * lm_gguf_init_from_file_impl(FILE * file, struct lm_gguf
|
|
490
490
|
// make sure there are no duplicate tensor names
|
491
491
|
for (int64_t j = 0; ok && j < i; ++j) {
|
492
492
|
if (strcmp(info.t.name, ctx->info[j].t.name) == 0) {
|
493
|
-
|
493
|
+
LM_GGML_LOG_ERROR("%s: duplicate tensor name '%s' for tensors %" PRIi64 " and %" PRIi64 "\n", __func__, info.t.name, j, i);
|
494
494
|
ok = false;
|
495
495
|
break;
|
496
496
|
}
|
@@ -505,7 +505,7 @@ struct lm_gguf_context * lm_gguf_init_from_file_impl(FILE * file, struct lm_gguf
|
|
505
505
|
uint32_t n_dims = -1;
|
506
506
|
ok = ok && gr.read(n_dims);
|
507
507
|
if (n_dims > LM_GGML_MAX_DIMS) {
|
508
|
-
|
508
|
+
LM_GGML_LOG_ERROR("%s: tensor '%s' has invalid number of dimensions: %" PRIu32 " > %" PRIu32 "\n",
|
509
509
|
__func__, info.t.name, n_dims, LM_GGML_MAX_DIMS);
|
510
510
|
ok = false;
|
511
511
|
break;
|
@@ -518,7 +518,7 @@ struct lm_gguf_context * lm_gguf_init_from_file_impl(FILE * file, struct lm_gguf
|
|
518
518
|
|
519
519
|
// check that all ne are non-negative
|
520
520
|
if (info.t.ne[j] < 0) {
|
521
|
-
|
521
|
+
LM_GGML_LOG_ERROR("%s: tensor '%s' dimension %" PRIu32 " has invalid number of elements: %" PRIi64 " < 0\n",
|
522
522
|
__func__, info.t.name, j, info.t.ne[j]);
|
523
523
|
ok = false;
|
524
524
|
break;
|
@@ -530,7 +530,7 @@ struct lm_gguf_context * lm_gguf_init_from_file_impl(FILE * file, struct lm_gguf
|
|
530
530
|
(INT64_MAX/info.t.ne[2] <= info.t.ne[0]*info.t.ne[1]) ||
|
531
531
|
(INT64_MAX/info.t.ne[3] <= info.t.ne[0]*info.t.ne[1]*info.t.ne[2]))) {
|
532
532
|
|
533
|
-
|
533
|
+
LM_GGML_LOG_ERROR("%s: total number of elements in tensor '%s' with shape "
|
534
534
|
"(%" PRIi64 ", %" PRIi64 ", %" PRIi64 ", %" PRIi64 ") is >= %" PRIi64 "\n",
|
535
535
|
__func__, info.t.name, info.t.ne[0], info.t.ne[1], info.t.ne[2], info.t.ne[3], INT64_MAX);
|
536
536
|
ok = false;
|
@@ -547,7 +547,7 @@ struct lm_gguf_context * lm_gguf_init_from_file_impl(FILE * file, struct lm_gguf
|
|
547
547
|
|
548
548
|
// check that tensor type is within defined range
|
549
549
|
if (info.t.type < 0 || info.t.type >= LM_GGML_TYPE_COUNT) {
|
550
|
-
|
550
|
+
LM_GGML_LOG_ERROR("%s: tensor '%s' has invalid ggml type %d (%s)\n",
|
551
551
|
__func__, info.t.name, info.t.type, lm_ggml_type_name(info.t.type));
|
552
552
|
ok = false;
|
553
553
|
break;
|
@@ -557,7 +557,7 @@ struct lm_gguf_context * lm_gguf_init_from_file_impl(FILE * file, struct lm_gguf
|
|
557
557
|
|
558
558
|
// check that row size is divisible by block size
|
559
559
|
if (blck_size == 0 || info.t.ne[0] % blck_size != 0) {
|
560
|
-
|
560
|
+
LM_GGML_LOG_ERROR("%s: tensor '%s' of type %d (%s) has %" PRId64 " elements per row, "
|
561
561
|
"not a multiple of block size (%" PRId64 ")\n",
|
562
562
|
__func__, info.t.name, (int) info.t.type, lm_ggml_type_name(info.t.type), info.t.ne[0], blck_size);
|
563
563
|
ok = false;
|
@@ -582,7 +582,7 @@ struct lm_gguf_context * lm_gguf_init_from_file_impl(FILE * file, struct lm_gguf
|
|
582
582
|
}
|
583
583
|
|
584
584
|
if (!ok) {
|
585
|
-
|
585
|
+
LM_GGML_LOG_ERROR("%s: failed to read tensor info\n", __func__);
|
586
586
|
lm_gguf_free(ctx);
|
587
587
|
return nullptr;
|
588
588
|
}
|
@@ -590,7 +590,7 @@ struct lm_gguf_context * lm_gguf_init_from_file_impl(FILE * file, struct lm_gguf
|
|
590
590
|
|
591
591
|
// we require the data section to be aligned, so take into account any padding
|
592
592
|
if (fseek(file, LM_GGML_PAD(ftell(file), ctx->alignment), SEEK_SET) != 0) {
|
593
|
-
|
593
|
+
LM_GGML_LOG_ERROR("%s: failed to seek to beginning of data section\n", __func__);
|
594
594
|
lm_gguf_free(ctx);
|
595
595
|
return nullptr;
|
596
596
|
}
|
@@ -604,9 +604,9 @@ struct lm_gguf_context * lm_gguf_init_from_file_impl(FILE * file, struct lm_gguf
|
|
604
604
|
for (size_t i = 0; i < ctx->info.size(); ++i) {
|
605
605
|
const lm_gguf_tensor_info & ti = ctx->info[i];
|
606
606
|
if (ti.offset != ctx->size) {
|
607
|
-
|
607
|
+
LM_GGML_LOG_ERROR("%s: tensor '%s' has offset %" PRIu64 ", expected %zu\n",
|
608
608
|
__func__, ti.t.name, ti.offset, ctx->size);
|
609
|
-
|
609
|
+
LM_GGML_LOG_ERROR("%s: failed to read tensor data\n", __func__);
|
610
610
|
lm_gguf_free(ctx);
|
611
611
|
return nullptr;
|
612
612
|
}
|
@@ -634,7 +634,7 @@ struct lm_gguf_context * lm_gguf_init_from_file_impl(FILE * file, struct lm_gguf
|
|
634
634
|
|
635
635
|
*params.ctx = lm_ggml_init(pdata);
|
636
636
|
if (*params.ctx == nullptr) {
|
637
|
-
|
637
|
+
LM_GGML_LOG_ERROR("%s: failed to initialize ggml context for storing tensors\n", __func__);
|
638
638
|
lm_gguf_free(ctx);
|
639
639
|
return nullptr;
|
640
640
|
}
|
@@ -656,7 +656,7 @@ struct lm_gguf_context * lm_gguf_init_from_file_impl(FILE * file, struct lm_gguf
|
|
656
656
|
ok = ok && gr.read(data->data, ctx->size);
|
657
657
|
|
658
658
|
if (!ok) {
|
659
|
-
|
659
|
+
LM_GGML_LOG_ERROR("%s: failed to read tensor data binary blob\n", __func__);
|
660
660
|
lm_ggml_free(ctx_data);
|
661
661
|
*params.ctx = nullptr;
|
662
662
|
lm_gguf_free(ctx);
|
@@ -689,7 +689,7 @@ struct lm_gguf_context * lm_gguf_init_from_file_impl(FILE * file, struct lm_gguf
|
|
689
689
|
}
|
690
690
|
|
691
691
|
if (!ok) {
|
692
|
-
|
692
|
+
LM_GGML_LOG_ERROR("%s: failed to create tensors\n", __func__);
|
693
693
|
lm_ggml_free(ctx_data);
|
694
694
|
*params.ctx = nullptr;
|
695
695
|
lm_gguf_free(ctx);
|
@@ -706,7 +706,7 @@ struct lm_gguf_context * lm_gguf_init_from_file(const char * fname, struct lm_gg
|
|
706
706
|
FILE * file = lm_ggml_fopen(fname, "rb");
|
707
707
|
|
708
708
|
if (!file) {
|
709
|
-
|
709
|
+
LM_GGML_LOG_ERROR("%s: failed to open GGUF file '%s'\n", __func__, fname);
|
710
710
|
return nullptr;
|
711
711
|
}
|
712
712
|
|
@@ -1305,7 +1305,7 @@ bool lm_gguf_write_to_file(const struct lm_gguf_context * ctx, const char * fnam
|
|
1305
1305
|
FILE * file = lm_ggml_fopen(fname, "wb");
|
1306
1306
|
|
1307
1307
|
if (!file) {
|
1308
|
-
|
1308
|
+
LM_GGML_LOG_ERROR("%s: failed to open file '%s' for writing GGUF data\n", __func__, fname);
|
1309
1309
|
return false;
|
1310
1310
|
}
|
1311
1311
|
|
@@ -16,6 +16,9 @@ using json = nlohmann::ordered_json;
|
|
16
16
|
static std::string build_repetition(const std::string & item_rule, int min_items, int max_items, const std::string & separator_rule = "") {
|
17
17
|
auto has_max = max_items != std::numeric_limits<int>::max();
|
18
18
|
|
19
|
+
if (max_items == 0) {
|
20
|
+
return "";
|
21
|
+
}
|
19
22
|
if (min_items == 0 && max_items == 1) {
|
20
23
|
return item_rule + "?";
|
21
24
|
}
|
package/cpp/llama-adapter.cpp
CHANGED
@@ -253,6 +253,9 @@ static void llama_adapter_lora_init_impl(llama_model & model, const char * path_
|
|
253
253
|
std::vector<lm_ggml_backend_buffer_type_t> buft_extra;
|
254
254
|
{
|
255
255
|
auto * cpu_dev = lm_ggml_backend_dev_by_type(LM_GGML_BACKEND_DEVICE_TYPE_CPU);
|
256
|
+
if (!cpu_dev) {
|
257
|
+
throw std::runtime_error(format("%s: no CPU backend found", __func__));
|
258
|
+
}
|
256
259
|
auto * cpu_reg = lm_ggml_backend_dev_backend_reg(cpu_dev);
|
257
260
|
|
258
261
|
auto lm_ggml_backend_dev_get_extra_bufts_fn = (lm_ggml_backend_dev_get_extra_bufts_t)
|
@@ -291,6 +294,9 @@ static void llama_adapter_lora_init_impl(llama_model & model, const char * path_
|
|
291
294
|
LLAMA_LOG_WARN("%s: lora for '%s' cannot use buft '%s', fallback to CPU\n", __func__, model_tensor->name, lm_ggml_backend_buft_name(buft));
|
292
295
|
|
293
296
|
auto * cpu_dev = lm_ggml_backend_dev_by_type(LM_GGML_BACKEND_DEVICE_TYPE_CPU);
|
297
|
+
if (!cpu_dev) {
|
298
|
+
throw std::runtime_error(format("%s: no CPU backend found", __func__));
|
299
|
+
}
|
294
300
|
buft = lm_ggml_backend_dev_buffer_type(cpu_dev);
|
295
301
|
|
296
302
|
break;
|