cui-llama.rn 1.4.6 → 1.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +20 -20
- package/README.md +317 -319
- package/android/build.gradle +116 -116
- package/android/gradle.properties +5 -5
- package/android/src/main/AndroidManifest.xml +4 -4
- package/android/src/main/CMakeLists.txt +124 -117
- package/android/src/main/java/com/rnllama/LlamaContext.java +645 -645
- package/android/src/main/java/com/rnllama/RNLlama.java +695 -695
- package/android/src/main/java/com/rnllama/RNLlamaPackage.java +48 -48
- package/android/src/main/jni-utils.h +100 -100
- package/android/src/main/jni.cpp +1263 -1245
- package/android/src/main/jniLibs/arm64-v8a/librnllama.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/librnllama_v8.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_dotprod.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_dotprod_i8mm.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_i8mm.so +0 -0
- package/android/src/main/jniLibs/x86_64/librnllama.so +0 -0
- package/android/src/main/jniLibs/x86_64/librnllama_x86_64.so +0 -0
- package/android/src/newarch/java/com/rnllama/RNLlamaModule.java +135 -135
- package/android/src/oldarch/java/com/rnllama/RNLlamaModule.java +136 -136
- package/cpp/README.md +4 -4
- package/cpp/binary-ops.cpp +158 -0
- package/cpp/binary-ops.h +16 -0
- package/cpp/chat.cpp +1769 -1779
- package/cpp/chat.h +9 -1
- package/cpp/common.cpp +20 -522
- package/cpp/common.h +13 -36
- package/cpp/cpu-common.h +72 -0
- package/cpp/ggml-common.h +12 -6
- package/cpp/ggml-cpu-aarch64.cpp +1557 -80
- package/cpp/ggml-cpu-impl.h +2 -21
- package/cpp/ggml-cpu-quants.c +904 -405
- package/cpp/ggml-cpu.c +909 -13237
- package/cpp/ggml-impl.h +50 -23
- package/cpp/ggml-llama-sim.metallib +0 -0
- package/cpp/ggml-llama.metallib +0 -0
- package/cpp/ggml-metal-impl.h +597 -523
- package/cpp/ggml-metal.m +798 -580
- package/cpp/ggml.c +92 -3
- package/cpp/ggml.h +30 -6
- package/cpp/gguf.cpp +1 -0
- package/cpp/llama-adapter.cpp +55 -20
- package/cpp/llama-adapter.h +11 -9
- package/cpp/llama-arch.cpp +217 -16
- package/cpp/llama-arch.h +25 -0
- package/cpp/llama-batch.h +2 -2
- package/cpp/llama-chat.cpp +54 -2
- package/cpp/llama-chat.h +3 -0
- package/cpp/llama-context.cpp +2294 -1238
- package/cpp/llama-context.h +214 -77
- package/cpp/llama-cparams.h +1 -0
- package/cpp/llama-graph.cpp +1695 -0
- package/cpp/llama-graph.h +592 -0
- package/cpp/llama-hparams.cpp +8 -0
- package/cpp/llama-hparams.h +17 -0
- package/cpp/llama-io.cpp +15 -0
- package/cpp/llama-io.h +35 -0
- package/cpp/llama-kv-cache.cpp +965 -303
- package/cpp/llama-kv-cache.h +145 -151
- package/cpp/llama-memory.cpp +1 -0
- package/cpp/llama-memory.h +21 -0
- package/cpp/llama-mmap.cpp +1 -1
- package/cpp/llama-model-loader.cpp +10 -5
- package/cpp/llama-model-loader.h +5 -3
- package/cpp/llama-model.cpp +9194 -201
- package/cpp/llama-model.h +40 -1
- package/cpp/llama-sampling.cpp +5 -0
- package/cpp/llama-vocab.cpp +36 -5
- package/cpp/llama.cpp +51 -9984
- package/cpp/llama.h +102 -22
- package/cpp/log.cpp +34 -0
- package/cpp/minja/chat-template.hpp +15 -7
- package/cpp/minja/minja.hpp +120 -94
- package/cpp/ops.cpp +8723 -0
- package/cpp/ops.h +128 -0
- package/cpp/rn-llama.cpp +873 -882
- package/cpp/rn-llama.h +138 -148
- package/cpp/sampling.cpp +3 -0
- package/cpp/sampling.h +107 -107
- package/cpp/sgemm.cpp +533 -88
- package/cpp/simd-mappings.h +888 -0
- package/cpp/speculative.cpp +4 -4
- package/cpp/unary-ops.cpp +186 -0
- package/cpp/unary-ops.h +28 -0
- package/cpp/unicode-data.cpp +7034 -7034
- package/cpp/unicode-data.h +20 -20
- package/cpp/unicode.cpp +849 -849
- package/cpp/unicode.h +66 -66
- package/cpp/vec.cpp +258 -0
- package/cpp/vec.h +802 -0
- package/ios/CMakeLists.txt +116 -105
- package/ios/RNLlama.h +7 -7
- package/ios/RNLlama.mm +418 -405
- package/ios/RNLlamaContext.h +57 -57
- package/ios/RNLlamaContext.mm +835 -819
- package/ios/rnllama.xcframework/Info.plist +74 -74
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/binary-ops.h +16 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/chat.h +143 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/common.h +677 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/cpu-common.h +72 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-alloc.h +76 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-backend-impl.h +255 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-backend.h +354 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-common.h +1857 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-cpp.h +39 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-cpu-aarch64.h +8 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-cpu-impl.h +512 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-cpu-quants.h +63 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-cpu-traits.h +38 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-cpu.h +138 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-impl.h +594 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-metal-impl.h +597 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-metal.h +66 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-opt.h +216 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-quants.h +100 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-threading.h +14 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml.h +2222 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/gguf.h +202 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/json-schema-to-grammar.h +21 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/json.hpp +24766 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-adapter.h +76 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-arch.h +428 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-batch.h +88 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-chat.h +56 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-context.h +265 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-cparams.h +38 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-cpp.h +30 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-grammar.h +173 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-graph.h +592 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-hparams.h +156 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-impl.h +61 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-io.h +35 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-kv-cache.h +213 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-memory.h +21 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-mmap.h +68 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-model-loader.h +169 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-model.h +409 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-sampling.h +32 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-vocab.h +125 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama.h +1434 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/log.h +132 -0
- package/{cpp → ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/minja}/chat-template.hpp +15 -7
- package/{cpp → ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/minja}/minja.hpp +120 -94
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ops.h +128 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/rn-llama.h +138 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/sampling.h +107 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/sgemm.h +14 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/simd-mappings.h +888 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/speculative.h +28 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/unary-ops.h +28 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/unicode-data.h +20 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/unicode.h +66 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/vec.h +802 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Info.plist +0 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/ggml-llama.metallib +0 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/rnllama +0 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/binary-ops.h +16 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/chat.h +143 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/common.h +677 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/cpu-common.h +72 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-alloc.h +76 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-backend-impl.h +255 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-backend.h +354 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-common.h +1857 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpp.h +39 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu-aarch64.h +8 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu-impl.h +512 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu-quants.h +63 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu-traits.h +38 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu.h +138 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-impl.h +594 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-metal-impl.h +597 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-metal.h +66 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-opt.h +216 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-quants.h +100 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-threading.h +14 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml.h +2222 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/gguf.h +202 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/json-schema-to-grammar.h +21 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/json.hpp +24766 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-adapter.h +76 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-arch.h +428 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-batch.h +88 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-chat.h +56 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-context.h +265 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-cparams.h +38 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-cpp.h +30 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-grammar.h +173 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-graph.h +592 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-hparams.h +156 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-impl.h +61 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-io.h +35 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cache.h +213 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-memory.h +21 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-mmap.h +68 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-model-loader.h +169 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-model.h +409 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-sampling.h +32 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-vocab.h +125 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama.h +1434 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/log.h +132 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/minja/chat-template.hpp +537 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/minja/minja.hpp +2941 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ops.h +128 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/rn-llama.h +138 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/sampling.h +107 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/sgemm.h +14 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/simd-mappings.h +888 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/speculative.h +28 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/unary-ops.h +28 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/unicode-data.h +20 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/unicode.h +66 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/vec.h +802 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Info.plist +0 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/_CodeSignature/CodeResources +101 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/ggml-llama-sim.metallib +0 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/rnllama +0 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/binary-ops.h +16 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/chat.h +143 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/common.h +677 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/cpu-common.h +72 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-alloc.h +76 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-backend-impl.h +255 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-backend.h +354 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-common.h +1857 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-cpp.h +39 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-cpu-aarch64.h +8 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-cpu-impl.h +512 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-cpu-quants.h +63 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-cpu-traits.h +38 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-cpu.h +138 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-impl.h +594 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-metal-impl.h +597 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-metal.h +66 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-opt.h +216 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-quants.h +100 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-threading.h +14 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml.h +2222 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/gguf.h +202 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/json-schema-to-grammar.h +21 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/json.hpp +24766 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-adapter.h +76 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-arch.h +428 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-batch.h +88 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-chat.h +56 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-context.h +265 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-cparams.h +38 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-cpp.h +30 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-grammar.h +173 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-graph.h +592 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-hparams.h +156 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-impl.h +61 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-io.h +35 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-kv-cache.h +213 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-memory.h +21 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-mmap.h +68 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-model-loader.h +169 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-model.h +409 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-sampling.h +32 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-vocab.h +125 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama.h +1434 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/log.h +132 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/minja/chat-template.hpp +537 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/minja/minja.hpp +2941 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ops.h +128 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/rn-llama.h +138 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/sampling.h +107 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/sgemm.h +14 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/simd-mappings.h +888 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/speculative.h +28 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/unary-ops.h +28 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/unicode-data.h +20 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/unicode.h +66 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/vec.h +802 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Info.plist +0 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/ggml-llama.metallib +0 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/rnllama +0 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/binary-ops.h +16 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/chat.h +143 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/common.h +677 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/cpu-common.h +72 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-alloc.h +76 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-backend-impl.h +255 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-backend.h +354 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-common.h +1857 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpp.h +39 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu-aarch64.h +8 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu-impl.h +512 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu-quants.h +63 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu-traits.h +38 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu.h +138 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-impl.h +594 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-metal-impl.h +597 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-metal.h +66 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-opt.h +216 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-quants.h +100 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-threading.h +14 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml.h +2222 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/gguf.h +202 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/json-schema-to-grammar.h +21 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/json.hpp +24766 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-adapter.h +76 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-arch.h +428 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-batch.h +88 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-chat.h +56 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-context.h +265 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-cparams.h +38 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-cpp.h +30 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-grammar.h +173 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-graph.h +592 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-hparams.h +156 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-impl.h +61 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-io.h +35 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cache.h +213 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-memory.h +21 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-mmap.h +68 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-model-loader.h +169 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-model.h +409 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-sampling.h +32 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-vocab.h +125 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama.h +1434 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/log.h +132 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/minja/chat-template.hpp +537 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/minja/minja.hpp +2941 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ops.h +128 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/rn-llama.h +138 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/sampling.h +107 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/sgemm.h +14 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/simd-mappings.h +888 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/speculative.h +28 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/unary-ops.h +28 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/unicode-data.h +20 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/unicode.h +66 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/vec.h +802 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Info.plist +0 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/_CodeSignature/CodeResources +101 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/ggml-llama-sim.metallib +0 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/rnllama +0 -0
- package/jest/mock.js +203 -203
- package/lib/commonjs/NativeRNLlama.js +1 -2
- package/lib/commonjs/NativeRNLlama.js.map +1 -1
- package/lib/commonjs/chat.js.map +1 -1
- package/lib/commonjs/grammar.js +12 -31
- package/lib/commonjs/grammar.js.map +1 -1
- package/lib/commonjs/index.js +47 -47
- package/lib/commonjs/index.js.map +1 -1
- package/lib/commonjs/package.json +1 -0
- package/lib/module/NativeRNLlama.js +2 -0
- package/lib/module/NativeRNLlama.js.map +1 -1
- package/lib/module/chat.js +2 -0
- package/lib/module/chat.js.map +1 -1
- package/lib/module/grammar.js +14 -31
- package/lib/module/grammar.js.map +1 -1
- package/lib/module/index.js +47 -45
- package/lib/module/index.js.map +1 -1
- package/lib/module/package.json +1 -0
- package/lib/typescript/NativeRNLlama.d.ts +6 -4
- package/lib/typescript/NativeRNLlama.d.ts.map +1 -1
- package/lib/typescript/index.d.ts.map +1 -1
- package/llama-rn.podspec +48 -48
- package/package.json +233 -233
- package/src/NativeRNLlama.ts +426 -424
- package/src/chat.ts +44 -44
- package/src/grammar.ts +854 -854
- package/src/index.ts +495 -485
package/cpp/chat.h
CHANGED
@@ -5,8 +5,16 @@
|
|
5
5
|
#include "common.h"
|
6
6
|
#include <string>
|
7
7
|
#include <vector>
|
8
|
+
#include "minja/chat-template.hpp"
|
9
|
+
#include "minja/minja.hpp"
|
8
10
|
|
9
|
-
|
11
|
+
typedef minja::chat_template common_chat_template;
|
12
|
+
|
13
|
+
struct common_chat_templates {
|
14
|
+
bool has_explicit_template; // Model had builtin template or template overridde was specified.
|
15
|
+
std::unique_ptr<common_chat_template> template_default; // always set (defaults to chatml)
|
16
|
+
std::unique_ptr<common_chat_template> template_tool_use;
|
17
|
+
};
|
10
18
|
|
11
19
|
struct common_chat_tool_call {
|
12
20
|
std::string name;
|
package/cpp/common.cpp
CHANGED
@@ -7,9 +7,6 @@
|
|
7
7
|
|
8
8
|
#include "common.h"
|
9
9
|
#include "log.h"
|
10
|
-
// Change JSON_ASSERT from assert() to LM_GGML_ASSERT:
|
11
|
-
#define JSON_ASSERT LM_GGML_ASSERT
|
12
|
-
#include "json.hpp"
|
13
10
|
#include "llama.h"
|
14
11
|
|
15
12
|
#include <algorithm>
|
@@ -50,11 +47,7 @@
|
|
50
47
|
#include <sys/ioctl.h>
|
51
48
|
#include <sys/stat.h>
|
52
49
|
#include <unistd.h>
|
53
|
-
|
54
|
-
#if defined(LLAMA_USE_CURL)
|
55
|
-
#include <curl/curl.h>
|
56
|
-
#include <curl/easy.h>
|
57
|
-
#include <future>
|
50
|
+
|
58
51
|
#endif
|
59
52
|
|
60
53
|
// build info
|
@@ -67,37 +60,6 @@ char const *LLAMA_BUILD_TARGET = "unknown";
|
|
67
60
|
#pragma warning(disable: 4244 4267) // possible loss of data
|
68
61
|
#endif
|
69
62
|
|
70
|
-
#if defined(LLAMA_USE_CURL)
|
71
|
-
#ifdef __linux__
|
72
|
-
#include <linux/limits.h>
|
73
|
-
#elif defined(_WIN32)
|
74
|
-
# if !defined(PATH_MAX)
|
75
|
-
# define PATH_MAX MAX_PATH
|
76
|
-
# endif
|
77
|
-
#else
|
78
|
-
#include <sys/syslimits.h>
|
79
|
-
#endif
|
80
|
-
#define LLAMA_CURL_MAX_URL_LENGTH 2084 // Maximum URL Length in Chrome: 2083
|
81
|
-
|
82
|
-
//
|
83
|
-
// CURL utils
|
84
|
-
//
|
85
|
-
|
86
|
-
using curl_ptr = std::unique_ptr<CURL, decltype(&curl_easy_cleanup)>;
|
87
|
-
|
88
|
-
// cannot use unique_ptr for curl_slist, because we cannot update without destroying the old one
|
89
|
-
struct curl_slist_ptr {
|
90
|
-
struct curl_slist * ptr = nullptr;
|
91
|
-
~curl_slist_ptr() {
|
92
|
-
if (ptr) {
|
93
|
-
curl_slist_free_all(ptr);
|
94
|
-
}
|
95
|
-
}
|
96
|
-
};
|
97
|
-
#endif // LLAMA_USE_CURL
|
98
|
-
|
99
|
-
using json = nlohmann::ordered_json;
|
100
|
-
|
101
63
|
//
|
102
64
|
// CPU utils
|
103
65
|
//
|
@@ -906,22 +868,14 @@ std::string fs_get_cache_file(const std::string & filename) {
|
|
906
868
|
//
|
907
869
|
// Model utils
|
908
870
|
//
|
871
|
+
|
909
872
|
struct common_init_result common_init_from_params(common_params & params) {
|
910
873
|
common_init_result iparams;
|
911
874
|
auto mparams = common_model_params_to_llama(params);
|
912
875
|
|
913
|
-
llama_model * model =
|
914
|
-
|
915
|
-
if (!params.hf_repo.empty() && !params.hf_file.empty()) {
|
916
|
-
model = common_load_model_from_hf(params.hf_repo, params.hf_file, params.model, params.hf_token, mparams);
|
917
|
-
} else if (!params.model_url.empty()) {
|
918
|
-
model = common_load_model_from_url(params.model_url, params.model, params.hf_token, mparams);
|
919
|
-
} else {
|
920
|
-
model = llama_model_load_from_file(params.model.c_str(), mparams);
|
921
|
-
}
|
922
|
-
|
876
|
+
llama_model * model = llama_model_load_from_file(params.model.path.c_str(), mparams);
|
923
877
|
if (model == NULL) {
|
924
|
-
LOG_ERR("%s: failed to load model '%s'\n", __func__, params.model.c_str());
|
878
|
+
LOG_ERR("%s: failed to load model '%s'\n", __func__, params.model.path.c_str());
|
925
879
|
return iparams;
|
926
880
|
}
|
927
881
|
|
@@ -956,13 +910,13 @@ struct common_init_result common_init_from_params(common_params & params) {
|
|
956
910
|
|
957
911
|
llama_context * lctx = llama_init_from_model(model, cparams);
|
958
912
|
if (lctx == NULL) {
|
959
|
-
LOG_ERR("%s: failed to create context with model '%s'\n", __func__, params.model.c_str());
|
913
|
+
LOG_ERR("%s: failed to create context with model '%s'\n", __func__, params.model.path.c_str());
|
960
914
|
llama_model_free(model);
|
961
915
|
return iparams;
|
962
916
|
}
|
963
917
|
|
964
|
-
if (params.ctx_shift && !
|
965
|
-
LOG_WRN("%s: KV cache shifting is not supported for this
|
918
|
+
if (params.ctx_shift && !llama_kv_self_can_shift(lctx)) {
|
919
|
+
LOG_WRN("%s: KV cache shifting is not supported for this context, disabling KV cache shifting\n", __func__);
|
966
920
|
params.ctx_shift = false;
|
967
921
|
}
|
968
922
|
|
@@ -1039,6 +993,8 @@ struct common_init_result common_init_from_params(common_params & params) {
|
|
1039
993
|
if (params.warmup) {
|
1040
994
|
LOG_WRN("%s: warming up the model with an empty run - please wait ... (--no-warmup to disable)\n", __func__);
|
1041
995
|
|
996
|
+
llama_set_warmup(lctx, true);
|
997
|
+
|
1042
998
|
std::vector<llama_token> tmp;
|
1043
999
|
llama_token bos = llama_vocab_bos(vocab);
|
1044
1000
|
llama_token eos = llama_vocab_eos(vocab);
|
@@ -1066,9 +1022,10 @@ struct common_init_result common_init_from_params(common_params & params) {
|
|
1066
1022
|
if (llama_model_has_decoder(model)) {
|
1067
1023
|
llama_decode(lctx, llama_batch_get_one(tmp.data(), std::min(tmp.size(), (size_t) params.n_batch)));
|
1068
1024
|
}
|
1069
|
-
|
1025
|
+
llama_kv_self_clear(lctx);
|
1070
1026
|
llama_synchronize(lctx);
|
1071
1027
|
llama_perf_context_reset(lctx);
|
1028
|
+
llama_set_warmup(lctx, false);
|
1072
1029
|
}
|
1073
1030
|
|
1074
1031
|
iparams.model.reset(model);
|
@@ -1092,6 +1049,7 @@ struct llama_model_params common_model_params_to_llama(common_params & params) {
|
|
1092
1049
|
if (!params.devices.empty()) {
|
1093
1050
|
mparams.devices = params.devices.data();
|
1094
1051
|
}
|
1052
|
+
|
1095
1053
|
if (params.n_gpu_layers != -1) {
|
1096
1054
|
mparams.n_gpu_layers = params.n_gpu_layers;
|
1097
1055
|
}
|
@@ -1105,6 +1063,7 @@ struct llama_model_params common_model_params_to_llama(common_params & params) {
|
|
1105
1063
|
mparams.use_mmap = params.use_mmap;
|
1106
1064
|
mparams.use_mlock = params.use_mlock;
|
1107
1065
|
mparams.check_tensors = params.check_tensors;
|
1066
|
+
|
1108
1067
|
if (params.kv_overrides.empty()) {
|
1109
1068
|
mparams.kv_overrides = NULL;
|
1110
1069
|
} else {
|
@@ -1112,6 +1071,13 @@ struct llama_model_params common_model_params_to_llama(common_params & params) {
|
|
1112
1071
|
mparams.kv_overrides = params.kv_overrides.data();
|
1113
1072
|
}
|
1114
1073
|
|
1074
|
+
if (params.tensor_buft_overrides.empty()) {
|
1075
|
+
mparams.tensor_buft_overrides = NULL;
|
1076
|
+
} else {
|
1077
|
+
LM_GGML_ASSERT(params.tensor_buft_overrides.back().pattern == nullptr && "Tensor buffer overrides not terminated with empty pattern");
|
1078
|
+
mparams.tensor_buft_overrides = params.tensor_buft_overrides.data();
|
1079
|
+
}
|
1080
|
+
|
1115
1081
|
return mparams;
|
1116
1082
|
}
|
1117
1083
|
|
@@ -1171,451 +1137,6 @@ struct lm_ggml_threadpool_params lm_ggml_threadpool_params_from_cpu_params(const
|
|
1171
1137
|
return tpp;
|
1172
1138
|
}
|
1173
1139
|
|
1174
|
-
#ifdef LLAMA_USE_CURL
|
1175
|
-
|
1176
|
-
#define CURL_MAX_RETRY 3
|
1177
|
-
#define CURL_RETRY_DELAY_SECONDS 2
|
1178
|
-
|
1179
|
-
static bool curl_perform_with_retry(const std::string & url, CURL * curl, int max_attempts, int retry_delay_seconds) {
|
1180
|
-
int remaining_attempts = max_attempts;
|
1181
|
-
|
1182
|
-
while (remaining_attempts > 0) {
|
1183
|
-
LOG_INF("%s: Trying to download from %s (attempt %d of %d)...\n", __func__ , url.c_str(), max_attempts - remaining_attempts + 1, max_attempts);
|
1184
|
-
|
1185
|
-
CURLcode res = curl_easy_perform(curl);
|
1186
|
-
if (res == CURLE_OK) {
|
1187
|
-
return true;
|
1188
|
-
}
|
1189
|
-
|
1190
|
-
int exponential_backoff_delay = std::pow(retry_delay_seconds, max_attempts - remaining_attempts) * 1000;
|
1191
|
-
LOG_WRN("%s: curl_easy_perform() failed: %s, retrying after %d milliseconds...\n", __func__, curl_easy_strerror(res), exponential_backoff_delay);
|
1192
|
-
|
1193
|
-
remaining_attempts--;
|
1194
|
-
std::this_thread::sleep_for(std::chrono::milliseconds(exponential_backoff_delay));
|
1195
|
-
}
|
1196
|
-
|
1197
|
-
LOG_ERR("%s: curl_easy_perform() failed after %d attempts\n", __func__, max_attempts);
|
1198
|
-
|
1199
|
-
return false;
|
1200
|
-
}
|
1201
|
-
|
1202
|
-
static bool common_download_file(const std::string & url, const std::string & path, const std::string & hf_token) {
|
1203
|
-
// Initialize libcurl
|
1204
|
-
curl_ptr curl(curl_easy_init(), &curl_easy_cleanup);
|
1205
|
-
curl_slist_ptr http_headers;
|
1206
|
-
if (!curl) {
|
1207
|
-
LOG_ERR("%s: error initializing libcurl\n", __func__);
|
1208
|
-
return false;
|
1209
|
-
}
|
1210
|
-
|
1211
|
-
bool force_download = false;
|
1212
|
-
|
1213
|
-
// Set the URL, allow to follow http redirection
|
1214
|
-
curl_easy_setopt(curl.get(), CURLOPT_URL, url.c_str());
|
1215
|
-
curl_easy_setopt(curl.get(), CURLOPT_FOLLOWLOCATION, 1L);
|
1216
|
-
|
1217
|
-
// Check if hf-token or bearer-token was specified
|
1218
|
-
if (!hf_token.empty()) {
|
1219
|
-
std::string auth_header = "Authorization: Bearer " + hf_token;
|
1220
|
-
http_headers.ptr = curl_slist_append(http_headers.ptr, auth_header.c_str());
|
1221
|
-
curl_easy_setopt(curl.get(), CURLOPT_HTTPHEADER, http_headers.ptr);
|
1222
|
-
}
|
1223
|
-
|
1224
|
-
#if defined(_WIN32)
|
1225
|
-
// CURLSSLOPT_NATIVE_CA tells libcurl to use standard certificate store of
|
1226
|
-
// operating system. Currently implemented under MS-Windows.
|
1227
|
-
curl_easy_setopt(curl.get(), CURLOPT_SSL_OPTIONS, CURLSSLOPT_NATIVE_CA);
|
1228
|
-
#endif
|
1229
|
-
|
1230
|
-
// Check if the file already exists locally
|
1231
|
-
auto file_exists = std::filesystem::exists(path);
|
1232
|
-
|
1233
|
-
// If the file exists, check its JSON metadata companion file.
|
1234
|
-
std::string metadata_path = path + ".json";
|
1235
|
-
nlohmann::json metadata;
|
1236
|
-
std::string etag;
|
1237
|
-
std::string last_modified;
|
1238
|
-
|
1239
|
-
if (file_exists) {
|
1240
|
-
// Try and read the JSON metadata file (note: stream autoclosed upon exiting this block).
|
1241
|
-
std::ifstream metadata_in(metadata_path);
|
1242
|
-
if (metadata_in.good()) {
|
1243
|
-
try {
|
1244
|
-
metadata_in >> metadata;
|
1245
|
-
LOG_INF("%s: previous metadata file found %s: %s\n", __func__, metadata_path.c_str(), metadata.dump().c_str());
|
1246
|
-
if (metadata.contains("url") && metadata.at("url").is_string()) {
|
1247
|
-
auto previous_url = metadata.at("url").get<std::string>();
|
1248
|
-
if (previous_url != url) {
|
1249
|
-
LOG_ERR("%s: Model URL mismatch: %s != %s\n", __func__, url.c_str(), previous_url.c_str());
|
1250
|
-
return false;
|
1251
|
-
}
|
1252
|
-
}
|
1253
|
-
if (metadata.contains("etag") && metadata.at("etag").is_string()) {
|
1254
|
-
etag = metadata.at("etag");
|
1255
|
-
}
|
1256
|
-
if (metadata.contains("lastModified") && metadata.at("lastModified").is_string()) {
|
1257
|
-
last_modified = metadata.at("lastModified");
|
1258
|
-
}
|
1259
|
-
} catch (const nlohmann::json::exception & e) {
|
1260
|
-
LOG_ERR("%s: error reading metadata file %s: %s\n", __func__, metadata_path.c_str(), e.what());
|
1261
|
-
return false;
|
1262
|
-
}
|
1263
|
-
}
|
1264
|
-
} else {
|
1265
|
-
LOG_INF("%s: no previous model file found %s\n", __func__, path.c_str());
|
1266
|
-
}
|
1267
|
-
|
1268
|
-
// Send a HEAD request to retrieve the etag and last-modified headers
|
1269
|
-
struct common_load_model_from_url_headers {
|
1270
|
-
std::string etag;
|
1271
|
-
std::string last_modified;
|
1272
|
-
};
|
1273
|
-
|
1274
|
-
common_load_model_from_url_headers headers;
|
1275
|
-
|
1276
|
-
{
|
1277
|
-
typedef size_t(*CURLOPT_HEADERFUNCTION_PTR)(char *, size_t, size_t, void *);
|
1278
|
-
auto header_callback = [](char * buffer, size_t /*size*/, size_t n_items, void * userdata) -> size_t {
|
1279
|
-
common_load_model_from_url_headers * headers = (common_load_model_from_url_headers *) userdata;
|
1280
|
-
|
1281
|
-
static std::regex header_regex("([^:]+): (.*)\r\n");
|
1282
|
-
static std::regex etag_regex("ETag", std::regex_constants::icase);
|
1283
|
-
static std::regex last_modified_regex("Last-Modified", std::regex_constants::icase);
|
1284
|
-
|
1285
|
-
std::string header(buffer, n_items);
|
1286
|
-
std::smatch match;
|
1287
|
-
if (std::regex_match(header, match, header_regex)) {
|
1288
|
-
const std::string & key = match[1];
|
1289
|
-
const std::string & value = match[2];
|
1290
|
-
if (std::regex_match(key, match, etag_regex)) {
|
1291
|
-
headers->etag = value;
|
1292
|
-
} else if (std::regex_match(key, match, last_modified_regex)) {
|
1293
|
-
headers->last_modified = value;
|
1294
|
-
}
|
1295
|
-
}
|
1296
|
-
return n_items;
|
1297
|
-
};
|
1298
|
-
|
1299
|
-
curl_easy_setopt(curl.get(), CURLOPT_NOBODY, 1L); // will trigger the HEAD verb
|
1300
|
-
curl_easy_setopt(curl.get(), CURLOPT_NOPROGRESS, 1L); // hide head request progress
|
1301
|
-
curl_easy_setopt(curl.get(), CURLOPT_HEADERFUNCTION, static_cast<CURLOPT_HEADERFUNCTION_PTR>(header_callback));
|
1302
|
-
curl_easy_setopt(curl.get(), CURLOPT_HEADERDATA, &headers);
|
1303
|
-
|
1304
|
-
bool was_perform_successful = curl_perform_with_retry(url, curl.get(), CURL_MAX_RETRY, CURL_RETRY_DELAY_SECONDS);
|
1305
|
-
if (!was_perform_successful) {
|
1306
|
-
return false;
|
1307
|
-
}
|
1308
|
-
|
1309
|
-
long http_code = 0;
|
1310
|
-
curl_easy_getinfo(curl.get(), CURLINFO_RESPONSE_CODE, &http_code);
|
1311
|
-
if (http_code != 200) {
|
1312
|
-
// HEAD not supported, we don't know if the file has changed
|
1313
|
-
// force trigger downloading
|
1314
|
-
force_download = true;
|
1315
|
-
LOG_ERR("%s: HEAD invalid http status code received: %ld\n", __func__, http_code);
|
1316
|
-
}
|
1317
|
-
}
|
1318
|
-
|
1319
|
-
bool should_download = !file_exists || force_download;
|
1320
|
-
if (!should_download) {
|
1321
|
-
if (!etag.empty() && etag != headers.etag) {
|
1322
|
-
LOG_WRN("%s: ETag header is different (%s != %s): triggering a new download\n", __func__, etag.c_str(), headers.etag.c_str());
|
1323
|
-
should_download = true;
|
1324
|
-
} else if (!last_modified.empty() && last_modified != headers.last_modified) {
|
1325
|
-
LOG_WRN("%s: Last-Modified header is different (%s != %s): triggering a new download\n", __func__, last_modified.c_str(), headers.last_modified.c_str());
|
1326
|
-
should_download = true;
|
1327
|
-
}
|
1328
|
-
}
|
1329
|
-
if (should_download) {
|
1330
|
-
std::string path_temporary = path + ".downloadInProgress";
|
1331
|
-
if (file_exists) {
|
1332
|
-
LOG_WRN("%s: deleting previous downloaded file: %s\n", __func__, path.c_str());
|
1333
|
-
if (remove(path.c_str()) != 0) {
|
1334
|
-
LOG_ERR("%s: unable to delete file: %s\n", __func__, path.c_str());
|
1335
|
-
return false;
|
1336
|
-
}
|
1337
|
-
}
|
1338
|
-
|
1339
|
-
// Set the output file
|
1340
|
-
|
1341
|
-
struct FILE_deleter {
|
1342
|
-
void operator()(FILE * f) const {
|
1343
|
-
fclose(f);
|
1344
|
-
}
|
1345
|
-
};
|
1346
|
-
|
1347
|
-
std::unique_ptr<FILE, FILE_deleter> outfile(fopen(path_temporary.c_str(), "wb"));
|
1348
|
-
if (!outfile) {
|
1349
|
-
LOG_ERR("%s: error opening local file for writing: %s\n", __func__, path.c_str());
|
1350
|
-
return false;
|
1351
|
-
}
|
1352
|
-
|
1353
|
-
typedef size_t(*CURLOPT_WRITEFUNCTION_PTR)(void * data, size_t size, size_t nmemb, void * fd);
|
1354
|
-
auto write_callback = [](void * data, size_t size, size_t nmemb, void * fd) -> size_t {
|
1355
|
-
return fwrite(data, size, nmemb, (FILE *)fd);
|
1356
|
-
};
|
1357
|
-
curl_easy_setopt(curl.get(), CURLOPT_NOBODY, 0L);
|
1358
|
-
curl_easy_setopt(curl.get(), CURLOPT_WRITEFUNCTION, static_cast<CURLOPT_WRITEFUNCTION_PTR>(write_callback));
|
1359
|
-
curl_easy_setopt(curl.get(), CURLOPT_WRITEDATA, outfile.get());
|
1360
|
-
|
1361
|
-
// display download progress
|
1362
|
-
curl_easy_setopt(curl.get(), CURLOPT_NOPROGRESS, 0L);
|
1363
|
-
|
1364
|
-
// helper function to hide password in URL
|
1365
|
-
auto llama_download_hide_password_in_url = [](const std::string & url) -> std::string {
|
1366
|
-
std::size_t protocol_pos = url.find("://");
|
1367
|
-
if (protocol_pos == std::string::npos) {
|
1368
|
-
return url; // Malformed URL
|
1369
|
-
}
|
1370
|
-
|
1371
|
-
std::size_t at_pos = url.find('@', protocol_pos + 3);
|
1372
|
-
if (at_pos == std::string::npos) {
|
1373
|
-
return url; // No password in URL
|
1374
|
-
}
|
1375
|
-
|
1376
|
-
return url.substr(0, protocol_pos + 3) + "********" + url.substr(at_pos);
|
1377
|
-
};
|
1378
|
-
|
1379
|
-
// start the download
|
1380
|
-
LOG_INF("%s: trying to download model from %s to %s (server_etag:%s, server_last_modified:%s)...\n", __func__,
|
1381
|
-
llama_download_hide_password_in_url(url).c_str(), path.c_str(), headers.etag.c_str(), headers.last_modified.c_str());
|
1382
|
-
bool was_perform_successful = curl_perform_with_retry(url, curl.get(), CURL_MAX_RETRY, CURL_RETRY_DELAY_SECONDS);
|
1383
|
-
if (!was_perform_successful) {
|
1384
|
-
return false;
|
1385
|
-
}
|
1386
|
-
|
1387
|
-
long http_code = 0;
|
1388
|
-
curl_easy_getinfo (curl.get(), CURLINFO_RESPONSE_CODE, &http_code);
|
1389
|
-
if (http_code < 200 || http_code >= 400) {
|
1390
|
-
LOG_ERR("%s: invalid http status code received: %ld\n", __func__, http_code);
|
1391
|
-
return false;
|
1392
|
-
}
|
1393
|
-
|
1394
|
-
// Causes file to be closed explicitly here before we rename it.
|
1395
|
-
outfile.reset();
|
1396
|
-
|
1397
|
-
// Write the updated JSON metadata file.
|
1398
|
-
metadata.update({
|
1399
|
-
{"url", url},
|
1400
|
-
{"etag", headers.etag},
|
1401
|
-
{"lastModified", headers.last_modified}
|
1402
|
-
});
|
1403
|
-
std::ofstream(metadata_path) << metadata.dump(4);
|
1404
|
-
LOG_INF("%s: file metadata saved: %s\n", __func__, metadata_path.c_str());
|
1405
|
-
|
1406
|
-
if (rename(path_temporary.c_str(), path.c_str()) != 0) {
|
1407
|
-
LOG_ERR("%s: unable to rename file: %s to %s\n", __func__, path_temporary.c_str(), path.c_str());
|
1408
|
-
return false;
|
1409
|
-
}
|
1410
|
-
}
|
1411
|
-
|
1412
|
-
return true;
|
1413
|
-
}
|
1414
|
-
|
1415
|
-
struct llama_model * common_load_model_from_url(
|
1416
|
-
const std::string & model_url,
|
1417
|
-
const std::string & local_path,
|
1418
|
-
const std::string & hf_token,
|
1419
|
-
const struct llama_model_params & params) {
|
1420
|
-
// Basic validation of the model_url
|
1421
|
-
if (model_url.empty()) {
|
1422
|
-
LOG_ERR("%s: invalid model_url\n", __func__);
|
1423
|
-
return NULL;
|
1424
|
-
}
|
1425
|
-
|
1426
|
-
if (!common_download_file(model_url, local_path, hf_token)) {
|
1427
|
-
return NULL;
|
1428
|
-
}
|
1429
|
-
|
1430
|
-
// check for additional GGUFs split to download
|
1431
|
-
int n_split = 0;
|
1432
|
-
{
|
1433
|
-
struct lm_gguf_init_params lm_gguf_params = {
|
1434
|
-
/*.no_alloc = */ true,
|
1435
|
-
/*.ctx = */ NULL,
|
1436
|
-
};
|
1437
|
-
auto * ctx_gguf = lm_gguf_init_from_file(local_path.c_str(), lm_gguf_params);
|
1438
|
-
if (!ctx_gguf) {
|
1439
|
-
LOG_ERR("\n%s: failed to load input GGUF from %s\n", __func__, local_path.c_str());
|
1440
|
-
return NULL;
|
1441
|
-
}
|
1442
|
-
|
1443
|
-
auto key_n_split = lm_gguf_find_key(ctx_gguf, LLM_KV_SPLIT_COUNT);
|
1444
|
-
if (key_n_split >= 0) {
|
1445
|
-
n_split = lm_gguf_get_val_u16(ctx_gguf, key_n_split);
|
1446
|
-
}
|
1447
|
-
|
1448
|
-
lm_gguf_free(ctx_gguf);
|
1449
|
-
}
|
1450
|
-
|
1451
|
-
if (n_split > 1) {
|
1452
|
-
char split_prefix[PATH_MAX] = {0};
|
1453
|
-
char split_url_prefix[LLAMA_CURL_MAX_URL_LENGTH] = {0};
|
1454
|
-
|
1455
|
-
// Verify the first split file format
|
1456
|
-
// and extract split URL and PATH prefixes
|
1457
|
-
{
|
1458
|
-
if (!llama_split_prefix(split_prefix, sizeof(split_prefix), local_path.c_str(), 0, n_split)) {
|
1459
|
-
LOG_ERR("\n%s: unexpected model file name: %s n_split=%d\n", __func__, local_path.c_str(), n_split);
|
1460
|
-
return NULL;
|
1461
|
-
}
|
1462
|
-
|
1463
|
-
if (!llama_split_prefix(split_url_prefix, sizeof(split_url_prefix), model_url.c_str(), 0, n_split)) {
|
1464
|
-
LOG_ERR("\n%s: unexpected model url: %s n_split=%d\n", __func__, model_url.c_str(), n_split);
|
1465
|
-
return NULL;
|
1466
|
-
}
|
1467
|
-
}
|
1468
|
-
|
1469
|
-
// Prepare download in parallel
|
1470
|
-
std::vector<std::future<bool>> futures_download;
|
1471
|
-
for (int idx = 1; idx < n_split; idx++) {
|
1472
|
-
futures_download.push_back(std::async(std::launch::async, [&split_prefix, &split_url_prefix, &n_split, hf_token](int download_idx) -> bool {
|
1473
|
-
char split_path[PATH_MAX] = {0};
|
1474
|
-
llama_split_path(split_path, sizeof(split_path), split_prefix, download_idx, n_split);
|
1475
|
-
|
1476
|
-
char split_url[LLAMA_CURL_MAX_URL_LENGTH] = {0};
|
1477
|
-
llama_split_path(split_url, sizeof(split_url), split_url_prefix, download_idx, n_split);
|
1478
|
-
|
1479
|
-
return common_download_file(split_url, split_path, hf_token);
|
1480
|
-
}, idx));
|
1481
|
-
}
|
1482
|
-
|
1483
|
-
// Wait for all downloads to complete
|
1484
|
-
for (auto & f : futures_download) {
|
1485
|
-
if (!f.get()) {
|
1486
|
-
return NULL;
|
1487
|
-
}
|
1488
|
-
}
|
1489
|
-
}
|
1490
|
-
|
1491
|
-
return llama_model_load_from_file(local_path.c_str(), params);
|
1492
|
-
}
|
1493
|
-
|
1494
|
-
struct llama_model * common_load_model_from_hf(
|
1495
|
-
const std::string & repo,
|
1496
|
-
const std::string & remote_path,
|
1497
|
-
const std::string & local_path,
|
1498
|
-
const std::string & hf_token,
|
1499
|
-
const struct llama_model_params & params) {
|
1500
|
-
// construct hugging face model url:
|
1501
|
-
//
|
1502
|
-
// --repo ggml-org/models --file tinyllama-1.1b/ggml-model-f16.gguf
|
1503
|
-
// https://huggingface.co/ggml-org/models/resolve/main/tinyllama-1.1b/ggml-model-f16.gguf
|
1504
|
-
//
|
1505
|
-
// --repo TheBloke/Mixtral-8x7B-v0.1-GGUF --file mixtral-8x7b-v0.1.Q4_K_M.gguf
|
1506
|
-
// https://huggingface.co/TheBloke/Mixtral-8x7B-v0.1-GGUF/resolve/main/mixtral-8x7b-v0.1.Q4_K_M.gguf
|
1507
|
-
//
|
1508
|
-
|
1509
|
-
std::string model_url = "https://huggingface.co/";
|
1510
|
-
model_url += repo;
|
1511
|
-
model_url += "/resolve/main/";
|
1512
|
-
model_url += remote_path;
|
1513
|
-
|
1514
|
-
return common_load_model_from_url(model_url, local_path, hf_token, params);
|
1515
|
-
}
|
1516
|
-
|
1517
|
-
/**
|
1518
|
-
* Allow getting the HF file from the HF repo with tag (like ollama), for example:
|
1519
|
-
* - bartowski/Llama-3.2-3B-Instruct-GGUF:q4
|
1520
|
-
* - bartowski/Llama-3.2-3B-Instruct-GGUF:Q4_K_M
|
1521
|
-
* - bartowski/Llama-3.2-3B-Instruct-GGUF:q5_k_s
|
1522
|
-
* Tag is optional, default to "latest" (meaning it checks for Q4_K_M first, then Q4, then if not found, return the first GGUF file in repo)
|
1523
|
-
*
|
1524
|
-
* Return pair of <repo, file> (with "repo" already having tag removed)
|
1525
|
-
*
|
1526
|
-
* Note: we use the Ollama-compatible HF API, but not using the blobId. Instead, we use the special "ggufFile" field which returns the value for "hf_file". This is done to be backward-compatible with existing cache files.
|
1527
|
-
*/
|
1528
|
-
std::pair<std::string, std::string> common_get_hf_file(const std::string & hf_repo_with_tag, const std::string & hf_token) {
|
1529
|
-
auto parts = string_split<std::string>(hf_repo_with_tag, ':');
|
1530
|
-
std::string tag = parts.size() > 1 ? parts.back() : "latest";
|
1531
|
-
std::string hf_repo = parts[0];
|
1532
|
-
if (string_split<std::string>(hf_repo, '/').size() != 2) {
|
1533
|
-
throw std::invalid_argument("error: invalid HF repo format, expected <user>/<model>[:quant]\n");
|
1534
|
-
}
|
1535
|
-
|
1536
|
-
// fetch model info from Hugging Face Hub API
|
1537
|
-
json model_info;
|
1538
|
-
curl_ptr curl(curl_easy_init(), &curl_easy_cleanup);
|
1539
|
-
curl_slist_ptr http_headers;
|
1540
|
-
std::string res_str;
|
1541
|
-
std::string url = "https://huggingface.co/v2/" + hf_repo + "/manifests/" + tag;
|
1542
|
-
curl_easy_setopt(curl.get(), CURLOPT_URL, url.c_str());
|
1543
|
-
curl_easy_setopt(curl.get(), CURLOPT_NOPROGRESS, 1L);
|
1544
|
-
typedef size_t(*CURLOPT_WRITEFUNCTION_PTR)(void * ptr, size_t size, size_t nmemb, void * data);
|
1545
|
-
auto write_callback = [](void * ptr, size_t size, size_t nmemb, void * data) -> size_t {
|
1546
|
-
static_cast<std::string *>(data)->append((char * ) ptr, size * nmemb);
|
1547
|
-
return size * nmemb;
|
1548
|
-
};
|
1549
|
-
curl_easy_setopt(curl.get(), CURLOPT_WRITEFUNCTION, static_cast<CURLOPT_WRITEFUNCTION_PTR>(write_callback));
|
1550
|
-
curl_easy_setopt(curl.get(), CURLOPT_WRITEDATA, &res_str);
|
1551
|
-
#if defined(_WIN32)
|
1552
|
-
curl_easy_setopt(curl.get(), CURLOPT_SSL_OPTIONS, CURLSSLOPT_NATIVE_CA);
|
1553
|
-
#endif
|
1554
|
-
if (!hf_token.empty()) {
|
1555
|
-
std::string auth_header = "Authorization: Bearer " + hf_token;
|
1556
|
-
http_headers.ptr = curl_slist_append(http_headers.ptr, auth_header.c_str());
|
1557
|
-
}
|
1558
|
-
// Important: the User-Agent must be "llama-cpp" to get the "ggufFile" field in the response
|
1559
|
-
http_headers.ptr = curl_slist_append(http_headers.ptr, "User-Agent: llama-cpp");
|
1560
|
-
http_headers.ptr = curl_slist_append(http_headers.ptr, "Accept: application/json");
|
1561
|
-
curl_easy_setopt(curl.get(), CURLOPT_HTTPHEADER, http_headers.ptr);
|
1562
|
-
|
1563
|
-
CURLcode res = curl_easy_perform(curl.get());
|
1564
|
-
|
1565
|
-
if (res != CURLE_OK) {
|
1566
|
-
throw std::runtime_error("error: cannot make GET request to HF API");
|
1567
|
-
}
|
1568
|
-
|
1569
|
-
long res_code;
|
1570
|
-
curl_easy_getinfo(curl.get(), CURLINFO_RESPONSE_CODE, &res_code);
|
1571
|
-
if (res_code == 200) {
|
1572
|
-
model_info = json::parse(res_str);
|
1573
|
-
} else if (res_code == 401) {
|
1574
|
-
throw std::runtime_error("error: model is private or does not exist; if you are accessing a gated model, please provide a valid HF token");
|
1575
|
-
} else {
|
1576
|
-
throw std::runtime_error(string_format("error from HF API, response code: %ld, data: %s", res_code, res_str.c_str()));
|
1577
|
-
}
|
1578
|
-
|
1579
|
-
// check response
|
1580
|
-
if (!model_info.contains("ggufFile")) {
|
1581
|
-
throw std::runtime_error("error: model does not have ggufFile");
|
1582
|
-
}
|
1583
|
-
json & lm_gguf_file = model_info.at("ggufFile");
|
1584
|
-
if (!lm_gguf_file.contains("rfilename")) {
|
1585
|
-
throw std::runtime_error("error: ggufFile does not have rfilename");
|
1586
|
-
}
|
1587
|
-
|
1588
|
-
return std::make_pair(hf_repo, lm_gguf_file.at("rfilename"));
|
1589
|
-
}
|
1590
|
-
|
1591
|
-
#else
|
1592
|
-
|
1593
|
-
struct llama_model * common_load_model_from_url(
|
1594
|
-
const std::string & /*model_url*/,
|
1595
|
-
const std::string & /*local_path*/,
|
1596
|
-
const std::string & /*hf_token*/,
|
1597
|
-
const struct llama_model_params & /*params*/) {
|
1598
|
-
LOG_WRN("%s: llama.cpp built without libcurl, downloading from an url not supported.\n", __func__);
|
1599
|
-
return nullptr;
|
1600
|
-
}
|
1601
|
-
|
1602
|
-
struct llama_model * common_load_model_from_hf(
|
1603
|
-
const std::string & /*repo*/,
|
1604
|
-
const std::string & /*remote_path*/,
|
1605
|
-
const std::string & /*local_path*/,
|
1606
|
-
const std::string & /*hf_token*/,
|
1607
|
-
const struct llama_model_params & /*params*/) {
|
1608
|
-
LOG_WRN("%s: llama.cpp built without libcurl, downloading from Hugging Face not supported.\n", __func__);
|
1609
|
-
return nullptr;
|
1610
|
-
}
|
1611
|
-
|
1612
|
-
std::pair<std::string, std::string> common_get_hf_file(const std::string &, const std::string &) {
|
1613
|
-
LOG_WRN("%s: llama.cpp built without libcurl, downloading from Hugging Face not supported.\n", __func__);
|
1614
|
-
return std::make_pair("", "");
|
1615
|
-
}
|
1616
|
-
|
1617
|
-
#endif // LLAMA_USE_CURL
|
1618
|
-
|
1619
1140
|
//
|
1620
1141
|
// Batch utils
|
1621
1142
|
//
|
@@ -2039,26 +1560,3 @@ common_control_vector_data common_control_vector_load(const std::vector<common_c
|
|
2039
1560
|
|
2040
1561
|
return result;
|
2041
1562
|
}
|
2042
|
-
|
2043
|
-
template <>
|
2044
|
-
json common_grammar_trigger::to_json() const {
|
2045
|
-
json out {
|
2046
|
-
{"type", (int) type},
|
2047
|
-
{"value", value},
|
2048
|
-
};
|
2049
|
-
if (type == COMMON_GRAMMAR_TRIGGER_TYPE_TOKEN) {
|
2050
|
-
out["token"] = (int) token;
|
2051
|
-
}
|
2052
|
-
return out;
|
2053
|
-
}
|
2054
|
-
|
2055
|
-
template <>
|
2056
|
-
common_grammar_trigger common_grammar_trigger::from_json(const json & in) {
|
2057
|
-
common_grammar_trigger out;
|
2058
|
-
out.type = (common_grammar_trigger_type) in.at("type").get<int>();
|
2059
|
-
out.value = in.at("value").get<std::string>();
|
2060
|
-
if (out.type == COMMON_GRAMMAR_TRIGGER_TYPE_TOKEN) {
|
2061
|
-
out.token = (llama_token) in.at("token").get<int>();
|
2062
|
-
}
|
2063
|
-
return out;
|
2064
|
-
}
|