cui-llama.rn 1.6.0 → 1.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +35 -7
- package/android/src/main/CMakeLists.txt +22 -11
- package/android/src/main/java/com/rnllama/LlamaContext.java +42 -6
- package/android/src/main/java/com/rnllama/RNLlama.java +139 -4
- package/android/src/main/jni.cpp +173 -18
- package/android/src/main/jniLibs/arm64-v8a/librnllama.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/librnllama_v8.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_dotprod.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_dotprod_i8mm.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_i8mm.so +0 -0
- package/android/src/main/jniLibs/x86_64/librnllama.so +0 -0
- package/android/src/main/jniLibs/x86_64/librnllama_x86_64.so +0 -0
- package/android/src/newarch/java/com/rnllama/RNLlamaModule.java +24 -4
- package/android/src/oldarch/java/com/rnllama/RNLlamaModule.java +22 -2
- package/cpp/LICENSE +21 -0
- package/cpp/chat.cpp +129 -107
- package/cpp/chat.h +2 -0
- package/cpp/common.cpp +58 -78
- package/cpp/common.h +29 -21
- package/cpp/ggml-alloc.c +4 -1
- package/cpp/ggml-backend.cpp +9 -5
- package/cpp/ggml-backend.h +4 -4
- package/cpp/ggml-cpp.h +1 -1
- package/cpp/ggml-cpu/amx/amx.cpp +221 -0
- package/cpp/ggml-cpu/amx/amx.h +8 -0
- package/cpp/ggml-cpu/amx/common.h +91 -0
- package/cpp/ggml-cpu/amx/mmq.cpp +2511 -0
- package/cpp/ggml-cpu/amx/mmq.h +10 -0
- package/{ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers → cpp/ggml-cpu}/binary-ops.h +1 -1
- package/cpp/ggml-cpu/common.h +72 -0
- package/cpp/{ggml-cpu-aarch64.cpp → ggml-cpu/ggml-cpu-aarch64.cpp} +809 -103
- package/cpp/{ggml-cpu-quants.c → ggml-cpu/ggml-cpu-quants.c} +306 -6
- package/cpp/{ggml-cpu.c → ggml-cpu/ggml-cpu.c} +114 -55
- package/cpp/{ggml-cpu.cpp → ggml-cpu/ggml-cpu.cpp} +32 -16
- package/cpp/{ops.cpp → ggml-cpu/ops.cpp} +353 -173
- package/{ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers → cpp/ggml-cpu}/ops.h +2 -20
- package/cpp/{sgemm.cpp → ggml-cpu/sgemm.cpp} +501 -0
- package/{ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers → cpp/ggml-cpu}/simd-mappings.h +7 -3
- package/{ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers → cpp/ggml-cpu}/unary-ops.h +1 -1
- package/cpp/{vec.cpp → ggml-cpu/vec.cpp} +0 -6
- package/{ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers → cpp/ggml-cpu}/vec.h +16 -0
- package/cpp/ggml-cpu.h +5 -0
- package/cpp/ggml-impl.h +16 -9
- package/cpp/ggml-llama-sim.metallib +0 -0
- package/cpp/ggml-llama.metallib +0 -0
- package/cpp/ggml-metal-impl.h +36 -11
- package/cpp/ggml-metal.m +810 -176
- package/cpp/ggml-opt.cpp +373 -190
- package/cpp/ggml-opt.h +49 -28
- package/cpp/ggml-quants.c +0 -6
- package/cpp/ggml.c +227 -282
- package/cpp/ggml.h +82 -101
- package/cpp/gguf.cpp +33 -33
- package/cpp/json-schema-to-grammar.cpp +3 -0
- package/cpp/llama-adapter.cpp +6 -0
- package/cpp/llama-arch.cpp +49 -17
- package/cpp/llama-arch.h +9 -0
- package/cpp/llama-batch.cpp +8 -2
- package/cpp/llama-batch.h +2 -1
- package/cpp/llama-chat.cpp +39 -16
- package/cpp/llama-chat.h +4 -2
- package/cpp/llama-context.cpp +440 -611
- package/cpp/llama-context.h +44 -33
- package/cpp/llama-cparams.h +1 -0
- package/cpp/llama-graph.cpp +214 -291
- package/cpp/llama-graph.h +69 -21
- package/cpp/llama-hparams.cpp +17 -1
- package/cpp/llama-hparams.h +39 -5
- package/cpp/llama-kv-cache.cpp +2067 -620
- package/cpp/llama-kv-cache.h +410 -108
- package/cpp/llama-memory.h +12 -1
- package/cpp/llama-model-loader.cpp +24 -15
- package/cpp/llama-model-saver.cpp +281 -0
- package/cpp/llama-model-saver.h +37 -0
- package/cpp/llama-model.cpp +1089 -359
- package/cpp/llama-model.h +19 -3
- package/cpp/llama-sampling.cpp +20 -7
- package/cpp/llama-vocab.cpp +54 -9
- package/cpp/llama-vocab.h +6 -0
- package/cpp/llama.cpp +14 -0
- package/cpp/llama.h +86 -142
- package/cpp/minja/chat-template.hpp +9 -5
- package/cpp/minja/minja.hpp +69 -36
- package/cpp/rn-llama.cpp +602 -190
- package/cpp/rn-llama.h +34 -8
- package/cpp/sampling.cpp +57 -50
- package/cpp/tools/mtmd/clip-impl.h +462 -0
- package/cpp/tools/mtmd/clip.cpp +4024 -0
- package/cpp/tools/mtmd/clip.h +101 -0
- package/cpp/tools/mtmd/miniaudio.h +93468 -0
- package/cpp/tools/mtmd/mtmd-audio.cpp +855 -0
- package/cpp/tools/mtmd/mtmd-audio.h +62 -0
- package/cpp/tools/mtmd/mtmd-helper.cpp +297 -0
- package/cpp/tools/mtmd/mtmd.cpp +942 -0
- package/cpp/tools/mtmd/mtmd.h +362 -0
- package/cpp/tools/mtmd/stb_image.h +7988 -0
- package/ios/CMakeLists.txt +20 -10
- package/ios/RNLlama.h +6 -0
- package/ios/RNLlama.mm +82 -3
- package/ios/RNLlamaContext.h +5 -1
- package/ios/RNLlamaContext.mm +131 -38
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/chat.h +2 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/common.h +29 -21
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-backend.h +4 -4
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-cpp.h +1 -1
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-cpu.h +5 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-impl.h +16 -9
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-metal-impl.h +36 -11
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-opt.h +49 -28
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml.h +82 -101
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-arch.h +9 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-batch.h +2 -1
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-chat.h +4 -2
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-context.h +44 -33
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-cparams.h +1 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-graph.h +69 -21
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-hparams.h +39 -5
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-kv-cache.h +410 -108
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-memory.h +12 -1
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-model-saver.h +37 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-model.h +19 -3
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-vocab.h +6 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama.h +86 -142
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/minja/chat-template.hpp +9 -5
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/minja/minja.hpp +69 -36
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/rn-llama.h +34 -8
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Info.plist +0 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/ggml-llama.metallib +0 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/rnllama +0 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/chat.h +2 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/common.h +29 -21
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-backend.h +4 -4
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpp.h +1 -1
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu.h +5 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-impl.h +16 -9
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-metal-impl.h +36 -11
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-opt.h +49 -28
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml.h +82 -101
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-arch.h +9 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-batch.h +2 -1
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-chat.h +4 -2
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-context.h +44 -33
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-cparams.h +1 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-graph.h +69 -21
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-hparams.h +39 -5
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cache.h +410 -108
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-memory.h +12 -1
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-model-saver.h +37 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-model.h +19 -3
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-vocab.h +6 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama.h +86 -142
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/minja/chat-template.hpp +9 -5
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/minja/minja.hpp +69 -36
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/rn-llama.h +34 -8
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Info.plist +0 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/_CodeSignature/CodeResources +1 -1
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/ggml-llama-sim.metallib +0 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/rnllama +0 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/chat.h +2 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/common.h +29 -21
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-backend.h +4 -4
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-cpp.h +1 -1
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-cpu.h +5 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-impl.h +16 -9
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-metal-impl.h +36 -11
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-opt.h +49 -28
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml.h +82 -101
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-arch.h +9 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-batch.h +2 -1
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-chat.h +4 -2
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-context.h +44 -33
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-cparams.h +1 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-graph.h +69 -21
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-hparams.h +39 -5
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-kv-cache.h +410 -108
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-memory.h +12 -1
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-model-saver.h +37 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-model.h +19 -3
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-vocab.h +6 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama.h +86 -142
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/minja/chat-template.hpp +9 -5
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/minja/minja.hpp +69 -36
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/rn-llama.h +34 -8
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Info.plist +0 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/ggml-llama.metallib +0 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/rnllama +0 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/chat.h +2 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/common.h +29 -21
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-backend.h +4 -4
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpp.h +1 -1
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu.h +5 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-impl.h +16 -9
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-metal-impl.h +36 -11
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-opt.h +49 -28
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml.h +82 -101
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-arch.h +9 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-batch.h +2 -1
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-chat.h +4 -2
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-context.h +44 -33
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-cparams.h +1 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-graph.h +69 -21
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-hparams.h +39 -5
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cache.h +410 -108
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-memory.h +12 -1
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-model-saver.h +37 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-model.h +19 -3
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-vocab.h +6 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama.h +86 -142
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/minja/chat-template.hpp +9 -5
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/minja/minja.hpp +69 -36
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/rn-llama.h +34 -8
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Info.plist +0 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/_CodeSignature/CodeResources +1 -1
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/ggml-llama-sim.metallib +0 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/rnllama +0 -0
- package/jest/mock.js +33 -7
- package/lib/commonjs/NativeRNLlama.js.map +1 -1
- package/lib/commonjs/index.js +153 -21
- package/lib/commonjs/index.js.map +1 -1
- package/lib/module/NativeRNLlama.js.map +1 -1
- package/lib/module/index.js +152 -20
- package/lib/module/index.js.map +1 -1
- package/lib/typescript/NativeRNLlama.d.ts +54 -4
- package/lib/typescript/NativeRNLlama.d.ts.map +1 -1
- package/lib/typescript/index.d.ts +72 -6
- package/lib/typescript/index.d.ts.map +1 -1
- package/package.json +1 -1
- package/src/NativeRNLlama.ts +72 -4
- package/src/index.ts +212 -38
- package/cpp/binary-ops.h +0 -16
- package/cpp/ops.h +0 -128
- package/cpp/simd-mappings.h +0 -888
- package/cpp/unary-ops.h +0 -28
- package/cpp/vec.h +0 -802
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/binary-ops.h +0 -16
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-cpu-aarch64.h +0 -8
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-cpu-impl.h +0 -512
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-cpu-quants.h +0 -63
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-cpu-traits.h +0 -38
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ops.h +0 -128
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/sgemm.h +0 -14
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/simd-mappings.h +0 -888
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/vec.h +0 -802
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu-aarch64.h +0 -8
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu-impl.h +0 -512
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu-quants.h +0 -63
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu-traits.h +0 -38
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/sgemm.h +0 -14
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/unary-ops.h +0 -28
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/vec.h +0 -802
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/binary-ops.h +0 -16
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-cpu-aarch64.h +0 -8
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-cpu-impl.h +0 -512
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-cpu-quants.h +0 -63
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-cpu-traits.h +0 -38
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ops.h +0 -128
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/sgemm.h +0 -14
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/simd-mappings.h +0 -888
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/unary-ops.h +0 -28
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/binary-ops.h +0 -16
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu-aarch64.h +0 -8
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu-impl.h +0 -512
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu-quants.h +0 -63
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu-traits.h +0 -38
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ops.h +0 -128
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/sgemm.h +0 -14
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/simd-mappings.h +0 -888
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/unary-ops.h +0 -28
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/vec.h +0 -802
- package/lib/commonjs/chat.js +0 -37
- package/lib/commonjs/chat.js.map +0 -1
- package/lib/module/chat.js +0 -33
- package/lib/module/chat.js.map +0 -1
- package/lib/typescript/chat.d.ts +0 -10
- package/lib/typescript/chat.d.ts.map +0 -1
- package/src/chat.ts +0 -44
- /package/cpp/{binary-ops.cpp → ggml-cpu/binary-ops.cpp} +0 -0
- /package/cpp/{ggml-cpu-aarch64.h → ggml-cpu/ggml-cpu-aarch64.h} +0 -0
- /package/cpp/{ggml-cpu-impl.h → ggml-cpu/ggml-cpu-impl.h} +0 -0
- /package/cpp/{ggml-cpu-quants.h → ggml-cpu/ggml-cpu-quants.h} +0 -0
- /package/cpp/{ggml-cpu-traits.cpp → ggml-cpu/ggml-cpu-traits.cpp} +0 -0
- /package/cpp/{ggml-cpu-traits.h → ggml-cpu/ggml-cpu-traits.h} +0 -0
- /package/cpp/{sgemm.h → ggml-cpu/sgemm.h} +0 -0
- /package/cpp/{unary-ops.cpp → ggml-cpu/unary-ops.cpp} +0 -0
package/cpp/chat.cpp
CHANGED
@@ -4,6 +4,15 @@
|
|
4
4
|
|
5
5
|
#include <optional>
|
6
6
|
|
7
|
+
static std::string format_time(const std::chrono::system_clock::time_point & now, const std::string & format) {
|
8
|
+
auto time = std::chrono::system_clock::to_time_t(now);
|
9
|
+
auto local_time = *std::localtime(&time);
|
10
|
+
std::ostringstream ss;
|
11
|
+
ss << std::put_time(&local_time, format.c_str());
|
12
|
+
auto res = ss.str();
|
13
|
+
return res;
|
14
|
+
}
|
15
|
+
|
7
16
|
struct templates_params {
|
8
17
|
json messages;
|
9
18
|
json tools;
|
@@ -14,6 +23,7 @@ struct templates_params {
|
|
14
23
|
std::string grammar;
|
15
24
|
bool add_generation_prompt = true;
|
16
25
|
bool extract_reasoning = true;
|
26
|
+
std::chrono::system_clock::time_point now = std::chrono::system_clock::now();
|
17
27
|
};
|
18
28
|
|
19
29
|
common_chat_tool_choice common_chat_tool_choice_parse_oaicompat(const std::string & tool_choice) {
|
@@ -115,7 +125,9 @@ std::vector<common_chat_msg> common_chat_msgs_parse_oaicompat(const json & messa
|
|
115
125
|
msgs.push_back(msg);
|
116
126
|
}
|
117
127
|
} catch (const std::exception & e) {
|
118
|
-
|
128
|
+
// @ngxson : disable otherwise it's bloating the API response
|
129
|
+
// printf("%s\n", std::string("; messages = ") + messages.dump(2));
|
130
|
+
throw std::runtime_error("Failed to parse messages: " + std::string(e.what()));
|
119
131
|
}
|
120
132
|
|
121
133
|
return msgs;
|
@@ -927,78 +939,83 @@ static void expect_tool_parameters(const std::string & name, const json & parame
|
|
927
939
|
}
|
928
940
|
}
|
929
941
|
|
930
|
-
static common_chat_params
|
942
|
+
static common_chat_params common_chat_params_init_llama_3_x(const common_chat_template & tmpl, const struct templates_params & inputs, bool allow_python_tag_builtin_tools) {
|
931
943
|
auto builtin_tools = json::array();
|
932
944
|
common_chat_params data;
|
933
|
-
|
934
|
-
|
935
|
-
|
945
|
+
if (!inputs.tools.is_null()) {
|
946
|
+
data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
|
947
|
+
data.grammar = build_grammar([&](const common_grammar_builder & builder) {
|
948
|
+
std::vector<std::string> tool_rules;
|
936
949
|
|
937
|
-
|
938
|
-
|
939
|
-
|
940
|
-
|
941
|
-
|
942
|
-
|
943
|
-
|
944
|
-
|
945
|
-
|
946
|
-
|
947
|
-
|
950
|
+
auto handle_builtin_tool = [&](const std::string & name, const json & parameters) {
|
951
|
+
if (name == "wolfram_alpha" || name == "web_search" || name == "brave_search") {
|
952
|
+
// https://github.com/meta-llama/llama-stack/blob/main/llama_stack/providers/remote/tool_runtime/wolfram_alpha/wolfram_alpha.py
|
953
|
+
// https://github.com/meta-llama/llama-stack/blob/main/llama_stack/providers/remote/tool_runtime/brave_search/brave_search.py
|
954
|
+
expect_tool_parameters(name, parameters, {"query"});
|
955
|
+
} else if (name == "python" || name == "code_interpreter") {
|
956
|
+
// https://github.com/meta-llama/llama-stack/blob/main/llama_stack/providers/inline/tool_runtime/code_interpreter/code_interpreter.py
|
957
|
+
expect_tool_parameters(name, parameters, {"code"});
|
958
|
+
} else {
|
959
|
+
return false;
|
960
|
+
}
|
948
961
|
|
949
|
-
|
950
|
-
|
951
|
-
|
952
|
-
|
962
|
+
std::vector<std::string> kvs;
|
963
|
+
for (const auto & [key, value] : parameters.at("properties").items()) {
|
964
|
+
kvs.push_back("\"" + key + "=\" " + builder.add_schema(name + "-args-" + key, value)); // NOLINT
|
965
|
+
}
|
953
966
|
|
954
|
-
|
955
|
-
|
956
|
-
|
957
|
-
|
958
|
-
|
967
|
+
tool_rules.push_back(
|
968
|
+
builder.add_rule(
|
969
|
+
name + "-call",
|
970
|
+
"\"<|python_tag|>" + name + ".call(\" " + string_join(kvs, " \", \" ") + " \")\""));
|
971
|
+
builtin_tools.push_back(name);
|
959
972
|
|
960
|
-
|
961
|
-
|
973
|
+
return true;
|
974
|
+
};
|
962
975
|
|
963
|
-
|
964
|
-
|
965
|
-
|
966
|
-
|
967
|
-
|
976
|
+
foreach_function(inputs.tools, [&](const json & tool) {
|
977
|
+
const auto & function = tool.at("function");
|
978
|
+
std::string name = function.at("name");
|
979
|
+
auto parameters = function.at("parameters");
|
980
|
+
builder.resolve_refs(parameters);
|
968
981
|
|
969
|
-
|
970
|
-
|
971
|
-
|
982
|
+
// https://github.com/meta-llama/llama-stack/tree/main/llama_stack/providers/remote/tool_runtime
|
983
|
+
if (allow_python_tag_builtin_tools) {
|
984
|
+
handle_builtin_tool(name, parameters);
|
985
|
+
}
|
986
|
+
tool_rules.push_back(
|
987
|
+
builder.add_rule(
|
988
|
+
name + "-call",
|
989
|
+
"\"{\" space "
|
990
|
+
"( \"\\\"type\\\"\" space \":\" space \"\\\"function\\\"\" space \",\" space )? "
|
991
|
+
" \"\\\"name\\\"\" space \":\" space \"\\\"" + name + "\\\"\" space \",\" space "
|
992
|
+
" \"\\\"parameters\\\"\" space \":\" space " + builder.add_schema(name + "-args", parameters) + " "
|
993
|
+
"\"}\" space"));
|
994
|
+
});
|
995
|
+
// Small models may hallucinate function names so we match anything (*at the start*) that looks like the JSON of a function call, regardless of the name.
|
996
|
+
data.grammar_triggers.push_back({
|
997
|
+
COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_START,
|
998
|
+
"\\{\\s*(?:\"type\"\\s*:\\s*\"function\"\\s*,\\s*)?\"name\"\\s*:\\s*\"", // + name + "\"[\\s\\S]*",
|
999
|
+
});
|
1000
|
+
if (!builtin_tools.empty()) {
|
1001
|
+
data.grammar_triggers.push_back({COMMON_GRAMMAR_TRIGGER_TYPE_WORD, "<|python_tag|>"});
|
1002
|
+
data.preserved_tokens.push_back("<|python_tag|>");
|
972
1003
|
}
|
973
|
-
|
974
|
-
|
975
|
-
|
976
|
-
"\"{\" space "
|
977
|
-
"( \"\\\"type\\\"\" space \":\" space \"\\\"function\\\"\" space \",\" space )? "
|
978
|
-
" \"\\\"name\\\"\" space \":\" space \"\\\"" + name + "\\\"\" space \",\" space "
|
979
|
-
" \"\\\"parameters\\\"\" space \":\" space " + builder.add_schema(name + "-args", parameters) + " "
|
980
|
-
"\"}\" space"));
|
1004
|
+
// Allow a few empty lines on top of the usual constrained json schema space rule.
|
1005
|
+
builder.add_rule("root", string_join(tool_rules, " | "));
|
1006
|
+
data.additional_stops.push_back("<|eom_id|>");
|
981
1007
|
});
|
982
|
-
|
983
|
-
|
984
|
-
|
985
|
-
|
986
|
-
|
987
|
-
|
988
|
-
data.grammar_triggers.push_back({COMMON_GRAMMAR_TRIGGER_TYPE_WORD, "<|python_tag|>"});
|
989
|
-
data.preserved_tokens.push_back("<|python_tag|>");
|
990
|
-
}
|
991
|
-
// Allow a few empty lines on top of the usual constrained json schema space rule.
|
992
|
-
builder.add_rule("root", string_join(tool_rules, " | "));
|
993
|
-
});
|
994
|
-
data.additional_stops.push_back("<|eom_id|>");
|
1008
|
+
data.format = allow_python_tag_builtin_tools && !builtin_tools.empty()
|
1009
|
+
? COMMON_CHAT_FORMAT_LLAMA_3_X_WITH_BUILTIN_TOOLS
|
1010
|
+
: COMMON_CHAT_FORMAT_LLAMA_3_X;
|
1011
|
+
} else {
|
1012
|
+
data.format = COMMON_CHAT_FORMAT_CONTENT_ONLY;
|
1013
|
+
}
|
995
1014
|
data.prompt = apply(tmpl, inputs.messages, inputs.tools.empty() ? json() : inputs.tools, inputs.add_generation_prompt, {
|
1015
|
+
{"date_string", format_time(inputs.now, "%d %b %Y")},
|
996
1016
|
{"tools_in_user_message", false},
|
997
1017
|
{"builtin_tools", builtin_tools.empty() ? json() : builtin_tools},
|
998
1018
|
});
|
999
|
-
data.format = allow_python_tag_builtin_tools && !builtin_tools.empty()
|
1000
|
-
? COMMON_CHAT_FORMAT_LLAMA_3_X_WITH_BUILTIN_TOOLS
|
1001
|
-
: COMMON_CHAT_FORMAT_LLAMA_3_X;
|
1002
1019
|
return data;
|
1003
1020
|
}
|
1004
1021
|
static common_chat_msg common_chat_parse_llama_3_1(const std::string & input, bool with_builtin_tools = false) {
|
@@ -1138,7 +1155,7 @@ static common_chat_params common_chat_params_init_firefunction_v2(const common_c
|
|
1138
1155
|
LOG_DBG("%s\n", __func__);
|
1139
1156
|
common_chat_params data;
|
1140
1157
|
data.prompt = apply(tmpl, inputs.messages, /* tools= */ nullptr, inputs.add_generation_prompt, {
|
1141
|
-
{"datetime", "
|
1158
|
+
{"datetime", format_time(inputs.now, "%b %d %Y %H:%M:%S GMT")},
|
1142
1159
|
{"functions", json(inputs.tools.empty() ? "" : inputs.tools.dump(2))},
|
1143
1160
|
});
|
1144
1161
|
if (inputs.tools.is_array() && !inputs.tools.empty()) {
|
@@ -1273,55 +1290,59 @@ static common_chat_msg common_chat_parse_functionary_v3_2(const std::string & in
|
|
1273
1290
|
static common_chat_params common_chat_params_init_functionary_v3_1_llama_3_1(const common_chat_template & tmpl, const struct templates_params & inputs) {
|
1274
1291
|
// https://github.com/MeetKai/functionary/blob/main/tests/prompt_test_v3-llama3.1.txt
|
1275
1292
|
common_chat_params data;
|
1276
|
-
json tools = inputs.tools.is_null() ? inputs.tools : json::array();
|
1277
|
-
std::string python_code_argument_name;
|
1278
|
-
auto has_raw_python = false;
|
1279
1293
|
|
1280
|
-
|
1281
|
-
|
1282
|
-
|
1283
|
-
|
1284
|
-
|
1285
|
-
|
1286
|
-
std::string
|
1287
|
-
|
1288
|
-
|
1289
|
-
|
1290
|
-
|
1291
|
-
|
1292
|
-
|
1293
|
-
|
1294
|
-
|
1295
|
-
|
1296
|
-
|
1297
|
-
|
1298
|
-
|
1294
|
+
if (!inputs.tools.is_null()) {
|
1295
|
+
std::string python_code_argument_name;
|
1296
|
+
auto has_raw_python = false;
|
1297
|
+
|
1298
|
+
data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
|
1299
|
+
data.grammar = build_grammar([&](const common_grammar_builder & builder) {
|
1300
|
+
std::vector<std::string> tool_rules;
|
1301
|
+
foreach_function(inputs.tools, [&](const json & tool) {
|
1302
|
+
const auto & function = tool.at("function");
|
1303
|
+
const auto & parameters = function.at("parameters");
|
1304
|
+
std::string name = function.at("name");
|
1305
|
+
if (name == "python" || name == "ipython") {
|
1306
|
+
if (!parameters.contains("type")) {
|
1307
|
+
throw std::runtime_error("Missing type in python tool");
|
1308
|
+
}
|
1309
|
+
has_raw_python = true;
|
1310
|
+
const auto & type = parameters.at("type");
|
1311
|
+
if (type == "object") {
|
1312
|
+
auto properties = parameters.at("properties");
|
1313
|
+
for (auto it = properties.begin(); it != properties.end(); ++it) {
|
1314
|
+
if (it.value().at("type") == "string") {
|
1315
|
+
if (!python_code_argument_name.empty()) {
|
1316
|
+
throw std::runtime_error("Multiple string arguments found in python tool");
|
1317
|
+
}
|
1318
|
+
python_code_argument_name = it.key();
|
1299
1319
|
}
|
1300
|
-
python_code_argument_name = it.key();
|
1301
1320
|
}
|
1321
|
+
if (python_code_argument_name.empty()) {
|
1322
|
+
throw std::runtime_error("No string argument found in python tool");
|
1323
|
+
}
|
1324
|
+
} else if (type != "string") {
|
1325
|
+
throw std::runtime_error("Invalid type in python tool: " + type.dump());
|
1302
1326
|
}
|
1303
|
-
if (python_code_argument_name.empty()) {
|
1304
|
-
throw std::runtime_error("No string argument found in python tool");
|
1305
|
-
}
|
1306
|
-
} else if (type != "string") {
|
1307
|
-
throw std::runtime_error("Invalid type in python tool: " + type.dump());
|
1308
1327
|
}
|
1328
|
+
tool_rules.push_back(builder.add_rule(name + "-call", "\"<function=" + name + ">\" " + builder.add_schema(name + "-args", parameters) + " \"</function>\" space"));
|
1329
|
+
});
|
1330
|
+
if (has_raw_python) {
|
1331
|
+
tool_rules.push_back(builder.add_rule("python-call", "\"<|python_tag|>\" .*"));
|
1332
|
+
data.grammar_triggers.push_back({COMMON_GRAMMAR_TRIGGER_TYPE_WORD, "<|python_tag|>"});
|
1333
|
+
data.preserved_tokens.push_back("<|python_tag|>");
|
1309
1334
|
}
|
1310
|
-
|
1335
|
+
auto tool_call = builder.add_rule("tool_call", string_join(tool_rules, " | ")) + " space";
|
1336
|
+
builder.add_rule("root", inputs.parallel_tool_calls ? "(" + tool_call + ")+" : tool_call);
|
1337
|
+
data.grammar_triggers.push_back({COMMON_GRAMMAR_TRIGGER_TYPE_WORD, "<function="});
|
1311
1338
|
});
|
1312
|
-
|
1313
|
-
|
1314
|
-
|
1315
|
-
|
1316
|
-
}
|
1317
|
-
auto tool_call = builder.add_rule("tool_call", string_join(tool_rules, " | ")) + " space";
|
1318
|
-
builder.add_rule("root", inputs.parallel_tool_calls ? "(" + tool_call + ")+" : tool_call);
|
1319
|
-
data.grammar_triggers.push_back({COMMON_GRAMMAR_TRIGGER_TYPE_WORD, "<function="});
|
1320
|
-
});
|
1339
|
+
data.format = COMMON_CHAT_FORMAT_FUNCTIONARY_V3_1_LLAMA_3_1;
|
1340
|
+
} else {
|
1341
|
+
data.format = COMMON_CHAT_FORMAT_CONTENT_ONLY;
|
1342
|
+
}
|
1321
1343
|
|
1322
1344
|
data.prompt = apply(tmpl, inputs.messages, inputs.tools.empty() ? json() : inputs.tools, inputs.add_generation_prompt);
|
1323
1345
|
// TODO: if (has_raw_python)
|
1324
|
-
data.format = COMMON_CHAT_FORMAT_FUNCTIONARY_V3_1_LLAMA_3_1;
|
1325
1346
|
return data;
|
1326
1347
|
}
|
1327
1348
|
static common_chat_msg common_chat_parse_functionary_v3_1_llama_3_1(const std::string & input) {
|
@@ -1581,6 +1602,7 @@ static common_chat_params common_chat_templates_apply_jinja(
|
|
1581
1602
|
params.extract_reasoning = inputs.extract_reasoning;
|
1582
1603
|
params.tool_choice = inputs.tool_choice;
|
1583
1604
|
params.grammar = inputs.grammar;
|
1605
|
+
params.now = inputs.now;
|
1584
1606
|
if (!inputs.json_schema.empty()) {
|
1585
1607
|
params.json_schema = json::parse(inputs.json_schema);
|
1586
1608
|
}
|
@@ -1612,7 +1634,7 @@ static common_chat_params common_chat_templates_apply_jinja(
|
|
1612
1634
|
}
|
1613
1635
|
|
1614
1636
|
// Hermes 2/3 Pro, Qwen 2.5 Instruct (w/ tools)
|
1615
|
-
if (src.find("<tool_call>") != std::string::npos && params.json_schema.is_null()) {
|
1637
|
+
if (src.find("<tool_call>") != std::string::npos && params.json_schema.is_null() && params.tools.is_array() && params.json_schema.is_null()) {
|
1616
1638
|
return common_chat_params_init_hermes_2_pro(tmpl, params);
|
1617
1639
|
}
|
1618
1640
|
|
@@ -1632,21 +1654,21 @@ static common_chat_params common_chat_templates_apply_jinja(
|
|
1632
1654
|
return common_chat_params_init_firefunction_v2(tmpl, params);
|
1633
1655
|
}
|
1634
1656
|
|
1635
|
-
// Plain handler (no tools)
|
1636
|
-
if (params.tools.is_null() || inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_NONE) {
|
1637
|
-
return common_chat_params_init_without_tools(tmpl, params);
|
1638
|
-
}
|
1639
|
-
|
1640
1657
|
// Functionary v3.1 (w/ tools)
|
1641
1658
|
if (src.find("<|start_header_id|>") != std::string::npos
|
1642
1659
|
&& src.find("<function=") != std::string::npos) {
|
1643
1660
|
return common_chat_params_init_functionary_v3_1_llama_3_1(tmpl, params);
|
1644
1661
|
}
|
1645
1662
|
|
1646
|
-
// Llama 3.1, 3.2, 3.3 (w/ tools)
|
1663
|
+
// Llama 3.1, 3.2, 3.3 (also requires date_string so using it even w/o tools)
|
1647
1664
|
if (src.find("<|start_header_id|>ipython<|end_header_id|>") != std::string::npos) {
|
1648
1665
|
auto allow_python_tag_builtin_tools = src.find("<|python_tag|>") != std::string::npos;
|
1649
|
-
return
|
1666
|
+
return common_chat_params_init_llama_3_x(tmpl, params, allow_python_tag_builtin_tools);
|
1667
|
+
}
|
1668
|
+
|
1669
|
+
// Plain handler (no tools)
|
1670
|
+
if (params.tools.is_null() || inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_NONE) {
|
1671
|
+
return common_chat_params_init_without_tools(tmpl, params);
|
1650
1672
|
}
|
1651
1673
|
|
1652
1674
|
// Mistral Nemo (w/ tools)
|
package/cpp/chat.h
CHANGED
@@ -3,6 +3,7 @@
|
|
3
3
|
#pragma once
|
4
4
|
|
5
5
|
#include "common.h"
|
6
|
+
#include <chrono>
|
6
7
|
#include <string>
|
7
8
|
#include <vector>
|
8
9
|
#include "minja/chat-template.hpp"
|
@@ -79,6 +80,7 @@ struct common_chat_templates_inputs {
|
|
79
80
|
common_chat_tool_choice tool_choice = COMMON_CHAT_TOOL_CHOICE_AUTO;
|
80
81
|
bool parallel_tool_calls = false;
|
81
82
|
bool extract_reasoning = true;
|
83
|
+
std::chrono::system_clock::time_point now = std::chrono::system_clock::now();
|
82
84
|
};
|
83
85
|
|
84
86
|
struct common_chat_params {
|
package/cpp/common.cpp
CHANGED
@@ -450,6 +450,25 @@ void string_replace_all(std::string & s, const std::string & search, const std::
|
|
450
450
|
s = std::move(builder);
|
451
451
|
}
|
452
452
|
|
453
|
+
bool string_ends_with(const std::string_view & str, const std::string_view & suffix) {
|
454
|
+
return str.size() >= suffix.size() && str.compare(str.size()-suffix.size(), suffix.size(), suffix) == 0;
|
455
|
+
}
|
456
|
+
size_t string_find_partial_stop(const std::string_view & str, const std::string_view & stop) {
|
457
|
+
if (!str.empty() && !stop.empty()) {
|
458
|
+
const char text_last_char = str.back();
|
459
|
+
for (int64_t char_index = stop.size() - 1; char_index >= 0; char_index--) {
|
460
|
+
if (stop[char_index] == text_last_char) {
|
461
|
+
const auto current_partial = stop.substr(0, char_index + 1);
|
462
|
+
if (string_ends_with(str, current_partial)) {
|
463
|
+
return str.size() - char_index - 1;
|
464
|
+
}
|
465
|
+
}
|
466
|
+
}
|
467
|
+
}
|
468
|
+
|
469
|
+
return std::string::npos;
|
470
|
+
}
|
471
|
+
|
453
472
|
std::string regex_escape(const std::string & s) {
|
454
473
|
static const std::regex special_chars("[.^$|()*+?\\[\\]{}\\\\]");
|
455
474
|
return std::regex_replace(s, special_chars, "\\$0");
|
@@ -837,7 +856,7 @@ std::string fs_get_cache_directory() {
|
|
837
856
|
if (getenv("LLAMA_CACHE")) {
|
838
857
|
cache_directory = std::getenv("LLAMA_CACHE");
|
839
858
|
} else {
|
840
|
-
#
|
859
|
+
#if defined(__linux__) || defined(__FreeBSD__) || defined(_AIX)
|
841
860
|
if (std::getenv("XDG_CACHE_HOME")) {
|
842
861
|
cache_directory = std::getenv("XDG_CACHE_HOME");
|
843
862
|
} else {
|
@@ -847,7 +866,9 @@ std::string fs_get_cache_directory() {
|
|
847
866
|
cache_directory = std::getenv("HOME") + std::string("/Library/Caches/");
|
848
867
|
#elif defined(_WIN32)
|
849
868
|
cache_directory = std::getenv("LOCALAPPDATA");
|
850
|
-
#
|
869
|
+
#else
|
870
|
+
# error Unknown architecture
|
871
|
+
#endif
|
851
872
|
cache_directory = ensure_trailing_slash(cache_directory);
|
852
873
|
cache_directory += "llama.cpp";
|
853
874
|
}
|
@@ -1034,6 +1055,19 @@ struct common_init_result common_init_from_params(common_params & params) {
|
|
1034
1055
|
return iparams;
|
1035
1056
|
}
|
1036
1057
|
|
1058
|
+
std::string get_model_endpoint() {
|
1059
|
+
const char * model_endpoint_env = getenv("MODEL_ENDPOINT");
|
1060
|
+
// We still respect the use of environment-variable "HF_ENDPOINT" for backward-compatibility.
|
1061
|
+
const char * hf_endpoint_env = getenv("HF_ENDPOINT");
|
1062
|
+
const char * endpoint_env = model_endpoint_env ? model_endpoint_env : hf_endpoint_env;
|
1063
|
+
std::string model_endpoint = "https://huggingface.co/";
|
1064
|
+
if (endpoint_env) {
|
1065
|
+
model_endpoint = endpoint_env;
|
1066
|
+
if (model_endpoint.back() != '/') model_endpoint += '/';
|
1067
|
+
}
|
1068
|
+
return model_endpoint;
|
1069
|
+
}
|
1070
|
+
|
1037
1071
|
void common_set_adapter_lora(struct llama_context * ctx, std::vector<common_adapter_lora_info> & lora) {
|
1038
1072
|
llama_clear_adapter_lora(ctx);
|
1039
1073
|
for (auto & la : lora) {
|
@@ -1078,6 +1112,9 @@ struct llama_model_params common_model_params_to_llama(common_params & params) {
|
|
1078
1112
|
mparams.tensor_buft_overrides = params.tensor_buft_overrides.data();
|
1079
1113
|
}
|
1080
1114
|
|
1115
|
+
mparams.progress_callback = params.load_progress_callback;
|
1116
|
+
mparams.progress_callback_user_data = params.load_progress_callback_user_data;
|
1117
|
+
|
1081
1118
|
return mparams;
|
1082
1119
|
}
|
1083
1120
|
|
@@ -1091,7 +1128,6 @@ struct llama_context_params common_context_params_to_llama(const common_params &
|
|
1091
1128
|
cparams.n_threads = params.cpuparams.n_threads;
|
1092
1129
|
cparams.n_threads_batch = params.cpuparams_batch.n_threads == -1 ?
|
1093
1130
|
params.cpuparams.n_threads : params.cpuparams_batch.n_threads;
|
1094
|
-
cparams.logits_all = params.logits_all;
|
1095
1131
|
cparams.embeddings = params.embedding;
|
1096
1132
|
cparams.rope_scaling_type = params.rope_scaling_type;
|
1097
1133
|
cparams.rope_freq_base = params.rope_freq_base;
|
@@ -1109,6 +1145,8 @@ struct llama_context_params common_context_params_to_llama(const common_params &
|
|
1109
1145
|
cparams.offload_kqv = !params.no_kv_offload;
|
1110
1146
|
cparams.flash_attn = params.flash_attn;
|
1111
1147
|
cparams.no_perf = params.no_perf;
|
1148
|
+
cparams.op_offload = !params.no_op_offload;
|
1149
|
+
cparams.swa_full = params.swa_full;
|
1112
1150
|
|
1113
1151
|
if (params.reranking) {
|
1114
1152
|
cparams.embeddings = true;
|
@@ -1301,81 +1339,6 @@ std::string common_detokenize(const struct llama_vocab * vocab, const std::vecto
|
|
1301
1339
|
return text;
|
1302
1340
|
}
|
1303
1341
|
|
1304
|
-
//
|
1305
|
-
// KV cache utils
|
1306
|
-
//
|
1307
|
-
|
1308
|
-
void common_kv_cache_dump_view(const llama_kv_cache_view & view, int row_size) {
|
1309
|
-
static const char slot_chars[] = ".123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz+";
|
1310
|
-
|
1311
|
-
printf("=== Dumping KV cache. total cells %d, max sequences per cell %d, populated cells %d, total tokens in cache %d, largest empty slot=%d @ %d",
|
1312
|
-
view.n_cells, view.n_seq_max, view.used_cells, view.token_count, view.max_contiguous, view.max_contiguous_idx);
|
1313
|
-
|
1314
|
-
llama_kv_cache_view_cell * c_curr = view.cells;
|
1315
|
-
llama_seq_id * cs_curr = view.cells_sequences;
|
1316
|
-
|
1317
|
-
for (int i = 0; i < view.n_cells; i++, c_curr++, cs_curr += view.n_seq_max) {
|
1318
|
-
if (i % row_size == 0) {
|
1319
|
-
printf("\n%5d: ", i);
|
1320
|
-
}
|
1321
|
-
int seq_count = 0;
|
1322
|
-
for (int j = 0; j < view.n_seq_max; j++) {
|
1323
|
-
if (cs_curr[j] >= 0) { seq_count++; }
|
1324
|
-
}
|
1325
|
-
putchar(slot_chars[std::min(sizeof(slot_chars) - 2, size_t(seq_count))]);
|
1326
|
-
}
|
1327
|
-
|
1328
|
-
printf("\n=== Done dumping\n");
|
1329
|
-
}
|
1330
|
-
|
1331
|
-
void common_kv_cache_dump_view_seqs(const llama_kv_cache_view & view, int row_size) {
|
1332
|
-
static const char slot_chars[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";
|
1333
|
-
|
1334
|
-
printf("=== Dumping KV cache. total cells %d, max sequences per cell %d, populated cells %d, total tokens in cache %d, largest empty slot=%d @ %d\n",
|
1335
|
-
view.n_cells, view.n_seq_max, view.used_cells, view.token_count, view.max_contiguous, view.max_contiguous_idx);
|
1336
|
-
|
1337
|
-
std::unordered_map<llama_seq_id, size_t> seqs;
|
1338
|
-
llama_kv_cache_view_cell * c_curr = view.cells;
|
1339
|
-
llama_seq_id * cs_curr = view.cells_sequences;
|
1340
|
-
|
1341
|
-
for (int i = 0; i < view.n_cells; i++, c_curr++, cs_curr += view.n_seq_max) {
|
1342
|
-
for (int j = 0; j < view.n_seq_max; j++) {
|
1343
|
-
if (cs_curr[j] < 0) { continue; }
|
1344
|
-
if (seqs.find(cs_curr[j]) == seqs.end()) {
|
1345
|
-
if (seqs.size() + 1 >= sizeof(slot_chars)) { break; }
|
1346
|
-
const size_t sz = seqs.size();
|
1347
|
-
seqs[cs_curr[j]] = sz;
|
1348
|
-
}
|
1349
|
-
}
|
1350
|
-
if (seqs.size() + 1 >= sizeof(slot_chars)) { break; }
|
1351
|
-
}
|
1352
|
-
|
1353
|
-
printf("=== Sequence legend: ");
|
1354
|
-
for (const auto & it : seqs) {
|
1355
|
-
printf("%zu=%d, ", it.second, it.first);
|
1356
|
-
}
|
1357
|
-
printf("'+'=other sequence ids");
|
1358
|
-
|
1359
|
-
c_curr = view.cells;
|
1360
|
-
cs_curr = view.cells_sequences;
|
1361
|
-
for (int i = 0; i < view.n_cells; i++, c_curr++, cs_curr += view.n_seq_max) {
|
1362
|
-
if (i % row_size == 0) {
|
1363
|
-
printf("\n%5d: ", i);
|
1364
|
-
}
|
1365
|
-
for (int j = 0; j < view.n_seq_max; j++) {
|
1366
|
-
if (cs_curr[j] >= 0) {
|
1367
|
-
const auto & it = seqs.find(cs_curr[j]);
|
1368
|
-
putchar(it != seqs.end() ? int(slot_chars[it->second]) : '+');
|
1369
|
-
} else {
|
1370
|
-
putchar('.');
|
1371
|
-
}
|
1372
|
-
}
|
1373
|
-
putchar(' ');
|
1374
|
-
}
|
1375
|
-
|
1376
|
-
printf("\n=== Done dumping\n");
|
1377
|
-
}
|
1378
|
-
|
1379
1342
|
//
|
1380
1343
|
// Embedding utils
|
1381
1344
|
//
|
@@ -1560,3 +1523,20 @@ common_control_vector_data common_control_vector_load(const std::vector<common_c
|
|
1560
1523
|
|
1561
1524
|
return result;
|
1562
1525
|
}
|
1526
|
+
|
1527
|
+
lm_ggml_opt_dataset_t common_opt_dataset_init(struct llama_context * ctx, const std::vector<llama_token> & tokens, int64_t stride) {
|
1528
|
+
const int64_t ne_datapoint = llama_n_ctx(ctx);
|
1529
|
+
const int64_t ndata = (tokens.size() - ne_datapoint - 1) / stride;
|
1530
|
+
lm_ggml_opt_dataset_t result = lm_ggml_opt_dataset_init(
|
1531
|
+
LM_GGML_TYPE_I32, LM_GGML_TYPE_I32, ne_datapoint, ne_datapoint, ndata, /*ndata_shard =*/ 1);
|
1532
|
+
|
1533
|
+
llama_token * data = (llama_token *) lm_ggml_opt_dataset_data(result)->data;
|
1534
|
+
llama_token * labels = (llama_token *) lm_ggml_opt_dataset_labels(result)->data;
|
1535
|
+
|
1536
|
+
for (int64_t idata = 0; idata < ndata; ++idata) {
|
1537
|
+
memcpy(data + idata*ne_datapoint, tokens.data() + idata*stride + 0, ne_datapoint*sizeof(llama_token));
|
1538
|
+
memcpy(labels + idata*ne_datapoint, tokens.data() + idata*stride + 1, ne_datapoint*sizeof(llama_token));
|
1539
|
+
}
|
1540
|
+
|
1541
|
+
return result;
|
1542
|
+
}
|