cui-llama.rn 1.6.0 → 1.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +35 -7
- package/android/src/main/CMakeLists.txt +22 -11
- package/android/src/main/java/com/rnllama/LlamaContext.java +42 -6
- package/android/src/main/java/com/rnllama/RNLlama.java +139 -4
- package/android/src/main/jni.cpp +173 -18
- package/android/src/main/jniLibs/arm64-v8a/librnllama.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/librnllama_v8.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_dotprod.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_dotprod_i8mm.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_i8mm.so +0 -0
- package/android/src/main/jniLibs/x86_64/librnllama.so +0 -0
- package/android/src/main/jniLibs/x86_64/librnllama_x86_64.so +0 -0
- package/android/src/newarch/java/com/rnllama/RNLlamaModule.java +24 -4
- package/android/src/oldarch/java/com/rnllama/RNLlamaModule.java +22 -2
- package/cpp/LICENSE +21 -0
- package/cpp/chat.cpp +129 -107
- package/cpp/chat.h +2 -0
- package/cpp/common.cpp +58 -78
- package/cpp/common.h +29 -21
- package/cpp/ggml-alloc.c +4 -1
- package/cpp/ggml-backend.cpp +9 -5
- package/cpp/ggml-backend.h +4 -4
- package/cpp/ggml-cpp.h +1 -1
- package/cpp/ggml-cpu/amx/amx.cpp +221 -0
- package/cpp/ggml-cpu/amx/amx.h +8 -0
- package/cpp/ggml-cpu/amx/common.h +91 -0
- package/cpp/ggml-cpu/amx/mmq.cpp +2511 -0
- package/cpp/ggml-cpu/amx/mmq.h +10 -0
- package/{ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers → cpp/ggml-cpu}/binary-ops.h +1 -1
- package/cpp/ggml-cpu/common.h +72 -0
- package/cpp/{ggml-cpu-aarch64.cpp → ggml-cpu/ggml-cpu-aarch64.cpp} +809 -103
- package/cpp/{ggml-cpu-quants.c → ggml-cpu/ggml-cpu-quants.c} +306 -6
- package/cpp/{ggml-cpu.c → ggml-cpu/ggml-cpu.c} +114 -55
- package/cpp/{ggml-cpu.cpp → ggml-cpu/ggml-cpu.cpp} +32 -16
- package/cpp/{ops.cpp → ggml-cpu/ops.cpp} +353 -173
- package/{ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers → cpp/ggml-cpu}/ops.h +2 -20
- package/cpp/{sgemm.cpp → ggml-cpu/sgemm.cpp} +501 -0
- package/{ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers → cpp/ggml-cpu}/simd-mappings.h +7 -3
- package/{ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers → cpp/ggml-cpu}/unary-ops.h +1 -1
- package/cpp/{vec.cpp → ggml-cpu/vec.cpp} +0 -6
- package/{ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers → cpp/ggml-cpu}/vec.h +16 -0
- package/cpp/ggml-cpu.h +5 -0
- package/cpp/ggml-impl.h +16 -9
- package/cpp/ggml-llama-sim.metallib +0 -0
- package/cpp/ggml-llama.metallib +0 -0
- package/cpp/ggml-metal-impl.h +36 -11
- package/cpp/ggml-metal.m +810 -176
- package/cpp/ggml-opt.cpp +373 -190
- package/cpp/ggml-opt.h +49 -28
- package/cpp/ggml-quants.c +0 -6
- package/cpp/ggml.c +227 -282
- package/cpp/ggml.h +82 -101
- package/cpp/gguf.cpp +33 -33
- package/cpp/json-schema-to-grammar.cpp +3 -0
- package/cpp/llama-adapter.cpp +6 -0
- package/cpp/llama-arch.cpp +49 -17
- package/cpp/llama-arch.h +9 -0
- package/cpp/llama-batch.cpp +8 -2
- package/cpp/llama-batch.h +2 -1
- package/cpp/llama-chat.cpp +39 -16
- package/cpp/llama-chat.h +4 -2
- package/cpp/llama-context.cpp +440 -611
- package/cpp/llama-context.h +44 -33
- package/cpp/llama-cparams.h +1 -0
- package/cpp/llama-graph.cpp +214 -291
- package/cpp/llama-graph.h +69 -21
- package/cpp/llama-hparams.cpp +17 -1
- package/cpp/llama-hparams.h +39 -5
- package/cpp/llama-kv-cache.cpp +2067 -620
- package/cpp/llama-kv-cache.h +410 -108
- package/cpp/llama-memory.h +12 -1
- package/cpp/llama-model-loader.cpp +24 -15
- package/cpp/llama-model-saver.cpp +281 -0
- package/cpp/llama-model-saver.h +37 -0
- package/cpp/llama-model.cpp +1089 -359
- package/cpp/llama-model.h +19 -3
- package/cpp/llama-sampling.cpp +20 -7
- package/cpp/llama-vocab.cpp +54 -9
- package/cpp/llama-vocab.h +6 -0
- package/cpp/llama.cpp +14 -0
- package/cpp/llama.h +86 -142
- package/cpp/minja/chat-template.hpp +9 -5
- package/cpp/minja/minja.hpp +69 -36
- package/cpp/rn-llama.cpp +602 -190
- package/cpp/rn-llama.h +34 -8
- package/cpp/sampling.cpp +57 -50
- package/cpp/tools/mtmd/clip-impl.h +462 -0
- package/cpp/tools/mtmd/clip.cpp +4024 -0
- package/cpp/tools/mtmd/clip.h +101 -0
- package/cpp/tools/mtmd/miniaudio.h +93468 -0
- package/cpp/tools/mtmd/mtmd-audio.cpp +855 -0
- package/cpp/tools/mtmd/mtmd-audio.h +62 -0
- package/cpp/tools/mtmd/mtmd-helper.cpp +297 -0
- package/cpp/tools/mtmd/mtmd.cpp +942 -0
- package/cpp/tools/mtmd/mtmd.h +362 -0
- package/cpp/tools/mtmd/stb_image.h +7988 -0
- package/ios/CMakeLists.txt +20 -10
- package/ios/RNLlama.h +6 -0
- package/ios/RNLlama.mm +82 -3
- package/ios/RNLlamaContext.h +5 -1
- package/ios/RNLlamaContext.mm +131 -38
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/chat.h +2 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/common.h +29 -21
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-backend.h +4 -4
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-cpp.h +1 -1
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-cpu.h +5 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-impl.h +16 -9
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-metal-impl.h +36 -11
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-opt.h +49 -28
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml.h +82 -101
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-arch.h +9 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-batch.h +2 -1
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-chat.h +4 -2
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-context.h +44 -33
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-cparams.h +1 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-graph.h +69 -21
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-hparams.h +39 -5
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-kv-cache.h +410 -108
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-memory.h +12 -1
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-model-saver.h +37 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-model.h +19 -3
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-vocab.h +6 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama.h +86 -142
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/minja/chat-template.hpp +9 -5
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/minja/minja.hpp +69 -36
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/rn-llama.h +34 -8
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Info.plist +0 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/ggml-llama.metallib +0 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/rnllama +0 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/chat.h +2 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/common.h +29 -21
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-backend.h +4 -4
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpp.h +1 -1
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu.h +5 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-impl.h +16 -9
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-metal-impl.h +36 -11
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-opt.h +49 -28
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml.h +82 -101
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-arch.h +9 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-batch.h +2 -1
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-chat.h +4 -2
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-context.h +44 -33
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-cparams.h +1 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-graph.h +69 -21
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-hparams.h +39 -5
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cache.h +410 -108
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-memory.h +12 -1
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-model-saver.h +37 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-model.h +19 -3
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-vocab.h +6 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama.h +86 -142
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/minja/chat-template.hpp +9 -5
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/minja/minja.hpp +69 -36
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/rn-llama.h +34 -8
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Info.plist +0 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/_CodeSignature/CodeResources +1 -1
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/ggml-llama-sim.metallib +0 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/rnllama +0 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/chat.h +2 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/common.h +29 -21
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-backend.h +4 -4
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-cpp.h +1 -1
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-cpu.h +5 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-impl.h +16 -9
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-metal-impl.h +36 -11
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-opt.h +49 -28
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml.h +82 -101
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-arch.h +9 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-batch.h +2 -1
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-chat.h +4 -2
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-context.h +44 -33
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-cparams.h +1 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-graph.h +69 -21
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-hparams.h +39 -5
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-kv-cache.h +410 -108
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-memory.h +12 -1
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-model-saver.h +37 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-model.h +19 -3
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-vocab.h +6 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama.h +86 -142
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/minja/chat-template.hpp +9 -5
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/minja/minja.hpp +69 -36
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/rn-llama.h +34 -8
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Info.plist +0 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/ggml-llama.metallib +0 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/rnllama +0 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/chat.h +2 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/common.h +29 -21
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-backend.h +4 -4
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpp.h +1 -1
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu.h +5 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-impl.h +16 -9
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-metal-impl.h +36 -11
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-opt.h +49 -28
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml.h +82 -101
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-arch.h +9 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-batch.h +2 -1
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-chat.h +4 -2
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-context.h +44 -33
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-cparams.h +1 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-graph.h +69 -21
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-hparams.h +39 -5
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cache.h +410 -108
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-memory.h +12 -1
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-model-saver.h +37 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-model.h +19 -3
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-vocab.h +6 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama.h +86 -142
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/minja/chat-template.hpp +9 -5
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/minja/minja.hpp +69 -36
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/rn-llama.h +34 -8
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Info.plist +0 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/_CodeSignature/CodeResources +1 -1
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/ggml-llama-sim.metallib +0 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/rnllama +0 -0
- package/jest/mock.js +33 -7
- package/lib/commonjs/NativeRNLlama.js.map +1 -1
- package/lib/commonjs/index.js +153 -21
- package/lib/commonjs/index.js.map +1 -1
- package/lib/module/NativeRNLlama.js.map +1 -1
- package/lib/module/index.js +152 -20
- package/lib/module/index.js.map +1 -1
- package/lib/typescript/NativeRNLlama.d.ts +54 -4
- package/lib/typescript/NativeRNLlama.d.ts.map +1 -1
- package/lib/typescript/index.d.ts +72 -6
- package/lib/typescript/index.d.ts.map +1 -1
- package/package.json +1 -1
- package/src/NativeRNLlama.ts +72 -4
- package/src/index.ts +212 -38
- package/cpp/binary-ops.h +0 -16
- package/cpp/ops.h +0 -128
- package/cpp/simd-mappings.h +0 -888
- package/cpp/unary-ops.h +0 -28
- package/cpp/vec.h +0 -802
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/binary-ops.h +0 -16
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-cpu-aarch64.h +0 -8
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-cpu-impl.h +0 -512
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-cpu-quants.h +0 -63
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-cpu-traits.h +0 -38
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ops.h +0 -128
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/sgemm.h +0 -14
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/simd-mappings.h +0 -888
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/vec.h +0 -802
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu-aarch64.h +0 -8
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu-impl.h +0 -512
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu-quants.h +0 -63
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu-traits.h +0 -38
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/sgemm.h +0 -14
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/unary-ops.h +0 -28
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/vec.h +0 -802
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/binary-ops.h +0 -16
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-cpu-aarch64.h +0 -8
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-cpu-impl.h +0 -512
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-cpu-quants.h +0 -63
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-cpu-traits.h +0 -38
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ops.h +0 -128
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/sgemm.h +0 -14
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/simd-mappings.h +0 -888
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/unary-ops.h +0 -28
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/binary-ops.h +0 -16
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu-aarch64.h +0 -8
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu-impl.h +0 -512
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu-quants.h +0 -63
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu-traits.h +0 -38
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ops.h +0 -128
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/sgemm.h +0 -14
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/simd-mappings.h +0 -888
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/unary-ops.h +0 -28
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/vec.h +0 -802
- package/lib/commonjs/chat.js +0 -37
- package/lib/commonjs/chat.js.map +0 -1
- package/lib/module/chat.js +0 -33
- package/lib/module/chat.js.map +0 -1
- package/lib/typescript/chat.d.ts +0 -10
- package/lib/typescript/chat.d.ts.map +0 -1
- package/src/chat.ts +0 -44
- /package/cpp/{binary-ops.cpp → ggml-cpu/binary-ops.cpp} +0 -0
- /package/cpp/{ggml-cpu-aarch64.h → ggml-cpu/ggml-cpu-aarch64.h} +0 -0
- /package/cpp/{ggml-cpu-impl.h → ggml-cpu/ggml-cpu-impl.h} +0 -0
- /package/cpp/{ggml-cpu-quants.h → ggml-cpu/ggml-cpu-quants.h} +0 -0
- /package/cpp/{ggml-cpu-traits.cpp → ggml-cpu/ggml-cpu-traits.cpp} +0 -0
- /package/cpp/{ggml-cpu-traits.h → ggml-cpu/ggml-cpu-traits.h} +0 -0
- /package/cpp/{sgemm.h → ggml-cpu/sgemm.h} +0 -0
- /package/cpp/{unary-ops.cpp → ggml-cpu/unary-ops.cpp} +0 -0
package/cpp/llama-model.h
CHANGED
@@ -36,14 +36,17 @@ enum llm_type {
|
|
36
36
|
LLM_TYPE_335M,
|
37
37
|
LLM_TYPE_410M,
|
38
38
|
LLM_TYPE_450M,
|
39
|
+
LLM_TYPE_475M,
|
39
40
|
LLM_TYPE_770M,
|
40
41
|
LLM_TYPE_780M,
|
41
42
|
LLM_TYPE_0_5B,
|
43
|
+
LLM_TYPE_0_6B,
|
42
44
|
LLM_TYPE_1B,
|
43
45
|
LLM_TYPE_1_3B,
|
44
46
|
LLM_TYPE_1_4B,
|
45
47
|
LLM_TYPE_1_5B,
|
46
48
|
LLM_TYPE_1_6B,
|
49
|
+
LLM_TYPE_1_7B,
|
47
50
|
LLM_TYPE_1_8B,
|
48
51
|
LLM_TYPE_2B,
|
49
52
|
LLM_TYPE_2_8B,
|
@@ -62,6 +65,7 @@ enum llm_type {
|
|
62
65
|
LLM_TYPE_15B,
|
63
66
|
LLM_TYPE_16B,
|
64
67
|
LLM_TYPE_20B,
|
68
|
+
LLM_TYPE_27B,
|
65
69
|
LLM_TYPE_30B,
|
66
70
|
LLM_TYPE_32B,
|
67
71
|
LLM_TYPE_34B,
|
@@ -70,7 +74,9 @@ enum llm_type {
|
|
70
74
|
LLM_TYPE_65B,
|
71
75
|
LLM_TYPE_70B,
|
72
76
|
LLM_TYPE_236B,
|
77
|
+
LLM_TYPE_290B,
|
73
78
|
LLM_TYPE_314B,
|
79
|
+
LLM_TYPE_405B,
|
74
80
|
LLM_TYPE_671B,
|
75
81
|
LLM_TYPE_SMALL,
|
76
82
|
LLM_TYPE_MEDIUM,
|
@@ -84,12 +90,14 @@ enum llm_type {
|
|
84
90
|
LLM_TYPE_16x3_8B,
|
85
91
|
LLM_TYPE_10B_128x3_66B,
|
86
92
|
LLM_TYPE_57B_A14B,
|
87
|
-
LLM_TYPE_27B,
|
88
|
-
LLM_TYPE_290B,
|
89
93
|
LLM_TYPE_17B_16E, // llama4 Scout
|
90
94
|
LLM_TYPE_17B_128E, // llama4 Maverick
|
95
|
+
LLM_TYPE_30B_A3B,
|
96
|
+
LLM_TYPE_235B_A22B,
|
91
97
|
};
|
92
98
|
|
99
|
+
std::string llama_rope_scaling_type_name(llama_rope_scaling_type rope_scaling_type);
|
100
|
+
|
93
101
|
struct llama_layer_posnet {
|
94
102
|
// resnet
|
95
103
|
struct lm_ggml_tensor * norm1 = nullptr;
|
@@ -171,6 +179,8 @@ struct llama_layer {
|
|
171
179
|
struct lm_ggml_tensor * wq_b = nullptr;
|
172
180
|
struct lm_ggml_tensor * wkv_a_mqa = nullptr;
|
173
181
|
struct lm_ggml_tensor * wkv_b = nullptr;
|
182
|
+
struct lm_ggml_tensor * wk_b = nullptr;
|
183
|
+
struct lm_ggml_tensor * wv_b = nullptr;
|
174
184
|
struct lm_ggml_tensor * wq_cross = nullptr;
|
175
185
|
struct lm_ggml_tensor * wk_cross = nullptr;
|
176
186
|
struct lm_ggml_tensor * wv_cross = nullptr;
|
@@ -388,8 +398,14 @@ struct llama_model {
|
|
388
398
|
|
389
399
|
const struct lm_ggml_tensor * get_tensor(const char * name) const;
|
390
400
|
|
401
|
+
float get_rope_freq_base (const llama_cparams & cparams, int il) const;
|
402
|
+
float get_rope_freq_scale(const llama_cparams & cparams, int il) const;
|
403
|
+
|
404
|
+
lm_ggml_tensor * get_rope_factors(const llama_cparams & cparams, int il) const;
|
405
|
+
|
406
|
+
// note: can mutate `cparams`
|
391
407
|
// TODO: move this to new llm_arch_model_i interface
|
392
|
-
llama_memory_i * create_memory(
|
408
|
+
llama_memory_i * create_memory(const llama_memory_params & params, llama_cparams & cparams) const;
|
393
409
|
|
394
410
|
// TODO: move this to new llm_arch_model_i interface
|
395
411
|
llm_graph_result_ptr build_graph(
|
package/cpp/llama-sampling.cpp
CHANGED
@@ -232,7 +232,7 @@ static void llama_sampler_top_k_impl(llama_token_data_array * cur_p, int32_t k)
|
|
232
232
|
// }
|
233
233
|
|
234
234
|
if (k <= 0) {
|
235
|
-
|
235
|
+
return;
|
236
236
|
}
|
237
237
|
|
238
238
|
k = std::min(k, (int) cur_p->size);
|
@@ -298,6 +298,7 @@ static void llama_sampler_top_k_impl(llama_token_data_array * cur_p, int32_t k)
|
|
298
298
|
}
|
299
299
|
cur_p->sorted = true;
|
300
300
|
}
|
301
|
+
|
301
302
|
cur_p->size = k;
|
302
303
|
}
|
303
304
|
|
@@ -1750,23 +1751,35 @@ static const char * llama_sampler_top_n_sigma_name(const struct llama_sampler *
|
|
1750
1751
|
static void llama_sampler_top_n_sigma_apply(struct llama_sampler * smpl, llama_token_data_array * cur_p) {
|
1751
1752
|
const auto * ctx = (llama_sampler_top_n_sigma *) smpl->ctx;
|
1752
1753
|
|
1754
|
+
if (ctx->n <= 0.0f || cur_p->size <= 1) {
|
1755
|
+
return;
|
1756
|
+
}
|
1757
|
+
|
1753
1758
|
// find max logit and calculate mean
|
1754
1759
|
float max = cur_p->data[0].logit;
|
1755
1760
|
float logits_sum = 0;
|
1761
|
+
size_t valid_count = 0;
|
1756
1762
|
for (size_t i = 0; i < cur_p->size; ++i) {
|
1757
|
-
|
1758
|
-
|
1763
|
+
// Only count non-negative infinity values
|
1764
|
+
if (cur_p->data[i].logit != -INFINITY) {
|
1765
|
+
if (cur_p->data[i].logit > max) {
|
1766
|
+
max = cur_p->data[i].logit;
|
1767
|
+
}
|
1768
|
+
logits_sum += cur_p->data[i].logit;
|
1769
|
+
valid_count++;
|
1759
1770
|
}
|
1760
|
-
logits_sum += cur_p->data[i].logit;
|
1761
1771
|
}
|
1762
|
-
float mean = logits_sum/
|
1772
|
+
float mean = valid_count > 0 ? logits_sum/valid_count : 0;
|
1763
1773
|
|
1764
1774
|
// calculate standard deviation
|
1765
1775
|
float acc = 0;
|
1766
1776
|
for (size_t i = 0; i < cur_p->size; ++i) {
|
1767
|
-
|
1777
|
+
// Skip -infinity in std calculation
|
1778
|
+
if (cur_p->data[i].logit != -INFINITY) {
|
1779
|
+
acc += pow(cur_p->data[i].logit - mean, 2);
|
1780
|
+
}
|
1768
1781
|
}
|
1769
|
-
float std = sqrt(acc/
|
1782
|
+
float std = valid_count > 0 ? sqrt(acc/valid_count) : 0;
|
1770
1783
|
|
1771
1784
|
//apply mask
|
1772
1785
|
for (size_t i = 0; i < cur_p->size; ++i) {
|
package/cpp/llama-vocab.cpp
CHANGED
@@ -1,5 +1,7 @@
|
|
1
1
|
#include "llama-vocab.h"
|
2
2
|
|
3
|
+
#include "ggml.h"
|
4
|
+
#include "gguf.h"
|
3
5
|
#include "llama-impl.h"
|
4
6
|
#include "llama-model-loader.h"
|
5
7
|
|
@@ -415,6 +417,13 @@ struct llm_tokenizer_bpe : llm_tokenizer {
|
|
415
417
|
"'(?:[sSdDmMtT]|[lL][lL]|[vV][eE]|[rR][eE])|[^\\r\\n\\p{L}\\p{N}]?\\p{L}+|\\p{N}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]|\\s+(?!\\S)|\\s+",
|
416
418
|
};
|
417
419
|
break;
|
420
|
+
case LLAMA_VOCAB_PRE_TYPE_SEED_CODER:
|
421
|
+
regex_exprs = {
|
422
|
+
// original regex from tokenizer.json
|
423
|
+
// "(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\r\n\\p{L}\\p{N}]?\\p{L}+|\\p{N}{1}| ?[^\\s\\p{L}\\p{N}\r\n]+|\\s*[\r\n]+|\\s+(?!\\S)|\\s+"
|
424
|
+
"(?:'[sS]|'[tT]|'[rR][eE]|'[vV][eE]|'[mM]|'[lL][lL]|'[dD])|[^\\r\\n\\p{L}\\p{N}]?\\p{L}+|\\p{N}{1}| ?[^\\s\\p{L}\\p{N}\\r\\n]+|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+",
|
425
|
+
};
|
426
|
+
break;
|
418
427
|
default:
|
419
428
|
// default regex for BPE tokenization pre-processing
|
420
429
|
regex_exprs = {
|
@@ -826,7 +835,7 @@ struct llm_tokenizer_ugm_session {
|
|
826
835
|
}
|
827
836
|
|
828
837
|
// initialize score_sum to -FLT_MAX so it will be always lower than sums of token scores
|
829
|
-
std::vector<struct best_tokenization> tokenization_results(input_len + 1, {vocab.token_unk(), 0, -
|
838
|
+
std::vector<struct best_tokenization> tokenization_results(input_len + 1, {vocab.token_unk(), 0, -DBL_MAX});
|
830
839
|
// at the beginning tokenization score is zero
|
831
840
|
tokenization_results[0] = { vocab.token_unk(), 0, 0 };
|
832
841
|
|
@@ -858,7 +867,7 @@ struct llm_tokenizer_ugm_session {
|
|
858
867
|
const double challenger_score = current_best.score_sum + token_score;
|
859
868
|
struct best_tokenization & current_champ = tokenization_results[prefix_offset];
|
860
869
|
if (challenger_score > current_champ.score_sum) {
|
861
|
-
struct best_tokenization challenger = { token_id, input_offset,
|
870
|
+
struct best_tokenization challenger = { token_id, input_offset, challenger_score };
|
862
871
|
current_champ = challenger;
|
863
872
|
}
|
864
873
|
}
|
@@ -872,7 +881,7 @@ struct llm_tokenizer_ugm_session {
|
|
872
881
|
prefix_offset = input_offset + n_utf8_code_units;
|
873
882
|
struct best_tokenization & current_champ = tokenization_results[prefix_offset];
|
874
883
|
if (challenger_score > current_champ.score_sum) {
|
875
|
-
struct best_tokenization challenger = { vocab.token_unk(), input_offset,
|
884
|
+
struct best_tokenization challenger = { vocab.token_unk(), input_offset, challenger_score };
|
876
885
|
current_champ = challenger;
|
877
886
|
}
|
878
887
|
}
|
@@ -998,7 +1007,7 @@ private:
|
|
998
1007
|
struct best_tokenization {
|
999
1008
|
llama_token token_id;
|
1000
1009
|
size_t input_offset;
|
1001
|
-
|
1010
|
+
double score_sum;
|
1002
1011
|
};
|
1003
1012
|
|
1004
1013
|
struct normalization_result normalize_prefix(const std::string & input, size_t input_offset) {
|
@@ -1227,6 +1236,9 @@ struct fragment_buffer_variant {
|
|
1227
1236
|
struct llama_vocab::impl {
|
1228
1237
|
uint32_t n_token_types = 0; // for BERT-style token types
|
1229
1238
|
|
1239
|
+
std::string tokenizer_model;
|
1240
|
+
std::string tokenizer_pre;
|
1241
|
+
|
1230
1242
|
enum llama_vocab_type type = LLAMA_VOCAB_TYPE_SPM;
|
1231
1243
|
enum llama_vocab_pre_type pre_type = LLAMA_VOCAB_PRE_TYPE_DEFAULT;
|
1232
1244
|
|
@@ -1362,9 +1374,6 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
|
|
1362
1374
|
|
1363
1375
|
// determine vocab type
|
1364
1376
|
{
|
1365
|
-
std::string tokenizer_model;
|
1366
|
-
std::string tokenizer_pre;
|
1367
|
-
|
1368
1377
|
ml.get_key(LLM_KV_TOKENIZER_MODEL, tokenizer_model);
|
1369
1378
|
ml.get_key(LLM_KV_TOKENIZER_PRE, tokenizer_pre, false);
|
1370
1379
|
|
@@ -1459,7 +1468,10 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
|
|
1459
1468
|
|
1460
1469
|
const int precompiled_charsmap_keyidx = lm_gguf_find_key(ctx, kv(LLM_KV_TOKENIZER_PRECOMPILED_CHARSMAP).c_str());
|
1461
1470
|
if (precompiled_charsmap_keyidx != -1) {
|
1462
|
-
|
1471
|
+
const lm_gguf_type pc_type = lm_gguf_get_arr_type(ctx, precompiled_charsmap_keyidx);
|
1472
|
+
LM_GGML_ASSERT(pc_type == LM_GGUF_TYPE_INT8 || pc_type == LM_GGUF_TYPE_UINT8);
|
1473
|
+
|
1474
|
+
const size_t n_precompiled_charsmap = lm_gguf_get_arr_n(ctx, precompiled_charsmap_keyidx);
|
1463
1475
|
const char * pc = (const char *) lm_gguf_get_arr_data(ctx, precompiled_charsmap_keyidx);
|
1464
1476
|
precompiled_charsmap.assign(pc, pc + n_precompiled_charsmap);
|
1465
1477
|
#ifdef IS_BIG_ENDIAN
|
@@ -1506,7 +1518,8 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
|
|
1506
1518
|
tokenizer_pre == "llama3" ||
|
1507
1519
|
tokenizer_pre == "llama-v3" ||
|
1508
1520
|
tokenizer_pre == "llama-bpe"||
|
1509
|
-
tokenizer_pre == "falcon3"
|
1521
|
+
tokenizer_pre == "falcon3" ||
|
1522
|
+
tokenizer_pre == "pixtral") {
|
1510
1523
|
pre_type = LLAMA_VOCAB_PRE_TYPE_LLAMA3;
|
1511
1524
|
ignore_merges = true;
|
1512
1525
|
add_bos = true;
|
@@ -1572,6 +1585,7 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
|
|
1572
1585
|
pre_type = LLAMA_VOCAB_PRE_TYPE_PORO;
|
1573
1586
|
clean_spaces = false;
|
1574
1587
|
} else if (
|
1588
|
+
tokenizer_pre == "glm4" ||
|
1575
1589
|
tokenizer_pre == "chatglm-bpe") {
|
1576
1590
|
pre_type = LLAMA_VOCAB_PRE_TYPE_CHATGLM4;
|
1577
1591
|
special_bos_id = LLAMA_TOKEN_NULL;
|
@@ -1632,6 +1646,10 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
|
|
1632
1646
|
tokenizer_pre == "bailingmoe") {
|
1633
1647
|
pre_type = LLAMA_VOCAB_PRE_TYPE_BAILINGMOE;
|
1634
1648
|
clean_spaces = false;
|
1649
|
+
} else if (
|
1650
|
+
tokenizer_pre == "seed-coder") {
|
1651
|
+
pre_type = LLAMA_VOCAB_PRE_TYPE_SEED_CODER;
|
1652
|
+
clean_spaces = false;
|
1635
1653
|
} else {
|
1636
1654
|
throw std::runtime_error(format("unknown pre-tokenizer type: '%s'", tokenizer_pre.c_str()));
|
1637
1655
|
}
|
@@ -1840,6 +1858,7 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
|
|
1840
1858
|
if (false
|
1841
1859
|
|| t.first == "<|fim_prefix|>" // Qwen
|
1842
1860
|
|| t.first == "<fim-prefix>"
|
1861
|
+
|| t.first == "<fim_prefix>" // Granite
|
1843
1862
|
|| t.first == "<|fim▁begin|>" // DeepSeek
|
1844
1863
|
|| t.first == "<PRE>"
|
1845
1864
|
|| t.first == "▁<PRE>" // CodeLlama
|
@@ -1858,6 +1877,7 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
|
|
1858
1877
|
if (false
|
1859
1878
|
|| t.first == "<|fim_suffix|>" // Qwen
|
1860
1879
|
|| t.first == "<fim-suffix>"
|
1880
|
+
|| t.first == "<fim_suffix>" // Granite
|
1861
1881
|
|| t.first == "<|fim▁hole|>" // DeepSeek
|
1862
1882
|
|| t.first == "<SUF>"
|
1863
1883
|
|| t.first == "▁<SUF>" // CodeLlama
|
@@ -1876,6 +1896,7 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
|
|
1876
1896
|
if (false
|
1877
1897
|
|| t.first == "<|fim_middle|>" // Qwen
|
1878
1898
|
|| t.first == "<fim-middle>"
|
1899
|
+
|| t.first == "<fim_middle>" // Granite
|
1879
1900
|
|| t.first == "<|fim▁end|>" // DeepSeek
|
1880
1901
|
|| t.first == "<MID>"
|
1881
1902
|
|| t.first == "▁<MID>" // CodeLlama
|
@@ -1894,6 +1915,7 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
|
|
1894
1915
|
if (false
|
1895
1916
|
|| t.first == "<|fim_pad|>" // Qwen
|
1896
1917
|
|| t.first == "<fim-pad>"
|
1918
|
+
|| t.first == "<fim_pad>" // Granite
|
1897
1919
|
|| t.first == "<PAD>"
|
1898
1920
|
) {
|
1899
1921
|
special_fim_pad_id = t.second;
|
@@ -1912,6 +1934,7 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
|
|
1912
1934
|
|| t.first == "<|repo_name|>"
|
1913
1935
|
|| t.first == "<fim-repo>"
|
1914
1936
|
|| t.first == "<REPO>"
|
1937
|
+
|| t.first == "<reponame>" // Granite
|
1915
1938
|
) {
|
1916
1939
|
special_fim_rep_id = t.second;
|
1917
1940
|
if ((id_to_token[t.second].attr & LLAMA_TOKEN_ATTR_CONTROL) == 0) {
|
@@ -2771,6 +2794,14 @@ void llama_vocab::load(llama_model_loader & ml, const LLM_KV & kv) {
|
|
2771
2794
|
pimpl->load(ml, kv);
|
2772
2795
|
}
|
2773
2796
|
|
2797
|
+
std::string llama_vocab::get_tokenizer_model() const {
|
2798
|
+
return pimpl->tokenizer_model;
|
2799
|
+
}
|
2800
|
+
|
2801
|
+
std::string llama_vocab::get_tokenizer_pre() const {
|
2802
|
+
return pimpl->tokenizer_pre;
|
2803
|
+
}
|
2804
|
+
|
2774
2805
|
enum llama_vocab_type llama_vocab::get_type() const {
|
2775
2806
|
return pimpl->type;
|
2776
2807
|
}
|
@@ -2993,6 +3024,20 @@ int llama_vocab::find_bpe_rank(const std::string & token_left, const std::string
|
|
2993
3024
|
return it->second;
|
2994
3025
|
}
|
2995
3026
|
|
3027
|
+
std::vector<std::string> llama_vocab::get_bpe_merges() const {
|
3028
|
+
std::vector<std::string> result(pimpl->bpe_ranks.size());
|
3029
|
+
|
3030
|
+
for (const auto & pair : pimpl->bpe_ranks) {
|
3031
|
+
result[pair.second] = pair.first.first + " " + pair.first.second;
|
3032
|
+
}
|
3033
|
+
|
3034
|
+
return result;
|
3035
|
+
}
|
3036
|
+
|
3037
|
+
std::vector<char> llama_vocab::get_precompiled_charsmap() const {
|
3038
|
+
return pimpl->precompiled_charsmap;
|
3039
|
+
}
|
3040
|
+
|
2996
3041
|
int32_t llama_vocab::tokenize(
|
2997
3042
|
const char * text,
|
2998
3043
|
int32_t text_len,
|
package/cpp/llama-vocab.h
CHANGED
@@ -21,6 +21,9 @@ struct llama_vocab {
|
|
21
21
|
|
22
22
|
void load(llama_model_loader & ml, const LLM_KV & kv);
|
23
23
|
|
24
|
+
std::string get_tokenizer_model() const;
|
25
|
+
std::string get_tokenizer_pre() const;
|
26
|
+
|
24
27
|
enum llama_vocab_type get_type() const;
|
25
28
|
enum llama_vocab_pre_type get_pre_type() const;
|
26
29
|
|
@@ -80,6 +83,9 @@ struct llama_vocab {
|
|
80
83
|
int max_token_len() const;
|
81
84
|
|
82
85
|
int find_bpe_rank(const std::string & token_left, const std::string & token_right) const;
|
86
|
+
std::vector<std::string> get_bpe_merges() const;
|
87
|
+
|
88
|
+
std::vector<char> get_precompiled_charsmap() const;
|
83
89
|
|
84
90
|
int32_t tokenize(
|
85
91
|
const char * text,
|
package/cpp/llama.cpp
CHANGED
@@ -4,6 +4,7 @@
|
|
4
4
|
#include "llama-mmap.h"
|
5
5
|
#include "llama-vocab.h"
|
6
6
|
#include "llama-model-loader.h"
|
7
|
+
#include "llama-model-saver.h"
|
7
8
|
#include "llama-model.h"
|
8
9
|
|
9
10
|
#include "ggml.h"
|
@@ -150,6 +151,11 @@ static struct llama_model * llama_model_load_from_file_impl(
|
|
150
151
|
struct llama_model_params params) {
|
151
152
|
lm_ggml_time_init();
|
152
153
|
|
154
|
+
if (!params.vocab_only && lm_ggml_backend_reg_count() == 0) {
|
155
|
+
LLAMA_LOG_ERROR("%s: no backends are loaded. hint: use lm_ggml_backend_load() or lm_ggml_backend_load_all() to load a backend before calling this function\n", __func__);
|
156
|
+
return nullptr;
|
157
|
+
}
|
158
|
+
|
153
159
|
unsigned cur_percentage = 0;
|
154
160
|
if (params.progress_callback == NULL) {
|
155
161
|
params.progress_callback_user_data = &cur_percentage;
|
@@ -264,6 +270,13 @@ struct llama_model * llama_model_load_from_splits(
|
|
264
270
|
return llama_model_load_from_file_impl(splits.front(), splits, params);
|
265
271
|
}
|
266
272
|
|
273
|
+
void llama_model_save_to_file(const struct llama_model * model, const char * path_model) {
|
274
|
+
llama_model_saver ms(*model);
|
275
|
+
ms.add_kv_from_model();
|
276
|
+
ms.add_tensors_from_model();
|
277
|
+
ms.save(path_model);
|
278
|
+
}
|
279
|
+
|
267
280
|
//
|
268
281
|
// chat templates
|
269
282
|
//
|
@@ -349,3 +362,4 @@ const char * llama_print_system_info(void) {
|
|
349
362
|
|
350
363
|
return s.c_str();
|
351
364
|
}
|
365
|
+
|