cui-llama.rn 1.7.4 → 1.7.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +217 -17
- package/android/src/main/CMakeLists.txt +34 -15
- package/android/src/main/java/com/rnllama/LlamaContext.java +79 -5
- package/android/src/main/java/com/rnllama/RNLlama.java +237 -0
- package/android/src/main/jni.cpp +213 -14
- package/android/src/main/jniLibs/arm64-v8a/librnllama.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/librnllama_v8.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_dotprod.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_dotprod_i8mm.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_i8mm.so +0 -0
- package/android/src/main/jniLibs/x86_64/librnllama.so +0 -0
- package/android/src/main/jniLibs/x86_64/librnllama_x86_64.so +0 -0
- package/android/src/newarch/java/com/rnllama/RNLlamaModule.java +35 -0
- package/android/src/oldarch/java/com/rnllama/RNLlamaModule.java +34 -0
- package/cpp/README.md +1 -1
- package/cpp/chat-parser.cpp +385 -0
- package/cpp/chat-parser.h +120 -0
- package/cpp/chat.cpp +726 -596
- package/cpp/chat.h +71 -6
- package/cpp/common.cpp +56 -38
- package/cpp/common.h +9 -3
- package/cpp/ggml-backend-reg.cpp +5 -0
- package/cpp/ggml-backend.cpp +10 -2
- package/cpp/ggml-common.h +4 -0
- package/cpp/ggml-cpu/amx/amx.cpp +1 -1
- package/cpp/ggml-cpu/amx/mmq.cpp +11 -10
- package/cpp/ggml-cpu/arch/arm/cpu-feats.cpp +94 -0
- package/cpp/ggml-cpu/arch/arm/quants.c +4114 -0
- package/cpp/ggml-cpu/arch/arm/repack.cpp +2163 -0
- package/cpp/ggml-cpu/arch/x86/cpu-feats.cpp +327 -0
- package/cpp/ggml-cpu/arch/x86/quants.c +4311 -0
- package/cpp/ggml-cpu/{ggml-cpu-aarch64.cpp → arch/x86/repack.cpp} +79 -3225
- package/cpp/ggml-cpu/arch-fallback.h +184 -0
- package/cpp/ggml-cpu/common.h +4 -3
- package/cpp/ggml-cpu/ggml-cpu-impl.h +21 -16
- package/cpp/ggml-cpu/ggml-cpu.c +123 -104
- package/cpp/ggml-cpu/ggml-cpu.cpp +11 -8
- package/cpp/ggml-cpu/ops.cpp +330 -148
- package/cpp/ggml-cpu/ops.h +1 -0
- package/cpp/ggml-cpu/quants.c +1158 -0
- package/cpp/ggml-cpu/{ggml-cpu-quants.h → quants.h} +26 -0
- package/cpp/ggml-cpu/repack.cpp +1571 -0
- package/cpp/ggml-cpu/repack.h +98 -0
- package/cpp/ggml-cpu/simd-mappings.h +330 -38
- package/cpp/ggml-cpu/{ggml-cpu-traits.cpp → traits.cpp} +1 -1
- package/cpp/ggml-cpu/vec.cpp +87 -18
- package/cpp/ggml-cpu/vec.h +249 -94
- package/cpp/ggml-cpu.h +1 -0
- package/cpp/ggml-impl.h +63 -183
- package/cpp/ggml-llama-sim.metallib +0 -0
- package/cpp/ggml-llama.metallib +0 -0
- package/cpp/ggml-metal.m +152 -45
- package/cpp/ggml-quants.c +0 -2
- package/cpp/ggml.c +61 -21
- package/cpp/ggml.h +22 -3
- package/cpp/gguf.cpp +24 -3
- package/cpp/json-partial.cpp +256 -0
- package/cpp/json-partial.h +38 -0
- package/cpp/json-schema-to-grammar.cpp +5 -47
- package/cpp/json-schema-to-grammar.h +4 -4
- package/cpp/llama-arch.cpp +153 -3
- package/cpp/llama-arch.h +27 -1
- package/cpp/llama-batch.cpp +741 -272
- package/cpp/llama-batch.h +112 -54
- package/cpp/llama-chat.cpp +30 -8
- package/cpp/llama-chat.h +1 -0
- package/cpp/llama-context.cpp +524 -339
- package/cpp/llama-context.h +38 -17
- package/cpp/llama-cparams.cpp +4 -0
- package/cpp/llama-cparams.h +2 -0
- package/cpp/llama-grammar.cpp +12 -2
- package/cpp/llama-graph.cpp +431 -356
- package/cpp/llama-graph.h +126 -58
- package/cpp/llama-hparams.cpp +10 -2
- package/cpp/llama-hparams.h +19 -2
- package/cpp/llama-kv-cache-unified-iswa.cpp +279 -0
- package/cpp/llama-kv-cache-unified-iswa.h +128 -0
- package/cpp/llama-kv-cache-unified.cpp +1841 -0
- package/cpp/llama-kv-cache-unified.h +303 -0
- package/cpp/llama-kv-cells.h +439 -0
- package/cpp/llama-memory-hybrid.cpp +246 -0
- package/cpp/llama-memory-hybrid.h +138 -0
- package/cpp/llama-memory-recurrent.cpp +1112 -0
- package/cpp/llama-memory-recurrent.h +183 -0
- package/cpp/llama-memory.cpp +41 -0
- package/cpp/llama-memory.h +86 -5
- package/cpp/llama-mmap.cpp +1 -1
- package/cpp/llama-model-loader.cpp +42 -17
- package/cpp/llama-model-saver.cpp +1 -0
- package/cpp/llama-model.cpp +1639 -513
- package/cpp/llama-model.h +26 -0
- package/cpp/llama-sampling.cpp +2 -2
- package/cpp/llama-vocab.cpp +65 -28
- package/cpp/llama-vocab.h +1 -0
- package/cpp/llama.cpp +11 -7
- package/cpp/llama.h +150 -42
- package/cpp/minja/chat-template.hpp +1 -1
- package/cpp/minja/minja.hpp +1 -1
- package/cpp/{json.hpp → nlohmann/json.hpp} +3027 -2267
- package/cpp/nlohmann/json_fwd.hpp +187 -0
- package/cpp/regex-partial.cpp +204 -0
- package/cpp/regex-partial.h +56 -0
- package/cpp/rn-llama.cpp +646 -35
- package/cpp/rn-llama.h +32 -1
- package/cpp/rn-tts.h +39 -0
- package/cpp/sampling.cpp +7 -8
- package/cpp/tools/mtmd/clip-impl.h +5 -0
- package/cpp/tools/mtmd/clip.cpp +572 -436
- package/cpp/tools/mtmd/clip.h +14 -4
- package/cpp/tools/mtmd/mtmd-audio.cpp +0 -86
- package/cpp/tools/mtmd/mtmd-audio.h +2 -17
- package/cpp/tools/mtmd/mtmd-helper.cpp +175 -12
- package/cpp/tools/mtmd/mtmd-helper.h +91 -0
- package/cpp/tools/mtmd/mtmd.cpp +368 -248
- package/cpp/tools/mtmd/mtmd.h +6 -70
- package/cpp/unicode.cpp +5 -0
- package/ios/CMakeLists.txt +26 -6
- package/ios/RNLlama.h +1 -1
- package/ios/RNLlama.mm +153 -3
- package/ios/RNLlamaContext.h +9 -1
- package/ios/RNLlamaContext.mm +112 -9
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/chat-parser.h +120 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/chat.h +71 -6
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/common.h +9 -3
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-common.h +4 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-cpu.h +1 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-impl.h +63 -183
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml.h +22 -3
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/json-partial.h +38 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/json-schema-to-grammar.h +4 -4
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-arch.h +27 -1
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-batch.h +112 -54
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-chat.h +1 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-context.h +38 -17
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-cparams.h +2 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-graph.h +126 -58
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-hparams.h +19 -2
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-kv-cache-unified-iswa.h +128 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-kv-cache-unified.h +303 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-kv-cells.h +439 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-memory-hybrid.h +138 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-memory-recurrent.h +183 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-memory.h +86 -5
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-model.h +26 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-vocab.h +1 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama.h +150 -42
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/minja/chat-template.hpp +1 -1
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/minja/minja.hpp +1 -1
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/{json.hpp → nlohmann/json.hpp} +3027 -2267
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/nlohmann/json_fwd.hpp +187 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/regex-partial.h +56 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/rn-llama.h +32 -1
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/rn-tts.h +39 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/ggml-llama.metallib +0 -0
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/rnllama +0 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/chat-parser.h +120 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/chat.h +71 -6
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/common.h +9 -3
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-common.h +4 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu.h +1 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-impl.h +63 -183
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml.h +22 -3
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/json-partial.h +38 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/json-schema-to-grammar.h +4 -4
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-arch.h +27 -1
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-batch.h +112 -54
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-chat.h +1 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-context.h +38 -17
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-cparams.h +2 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-graph.h +126 -58
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-hparams.h +19 -2
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cache-unified-iswa.h +128 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cache-unified.h +303 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cells.h +439 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-memory-hybrid.h +138 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-memory-recurrent.h +183 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-memory.h +86 -5
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-model.h +26 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-vocab.h +1 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama.h +150 -42
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/minja/chat-template.hpp +1 -1
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/minja/minja.hpp +1 -1
- package/ios/rnllama.xcframework/{tvos-arm64/rnllama.framework/Headers → ios-arm64_x86_64-simulator/rnllama.framework/Headers/nlohmann}/json.hpp +3027 -2267
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/nlohmann/json_fwd.hpp +187 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/regex-partial.h +56 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/rn-llama.h +32 -1
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/rn-tts.h +39 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/ggml-llama-sim.metallib +0 -0
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/rnllama +0 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/chat-parser.h +120 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/chat.h +71 -6
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/common.h +9 -3
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-common.h +4 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-cpu.h +1 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-impl.h +63 -183
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml.h +22 -3
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/json-partial.h +38 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/json-schema-to-grammar.h +4 -4
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-arch.h +27 -1
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-batch.h +112 -54
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-chat.h +1 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-context.h +38 -17
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-cparams.h +2 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-graph.h +126 -58
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-hparams.h +19 -2
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-kv-cache-unified-iswa.h +128 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-kv-cache-unified.h +303 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-kv-cells.h +439 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-memory-hybrid.h +138 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-memory-recurrent.h +183 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-memory.h +86 -5
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-model.h +26 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-vocab.h +1 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama.h +150 -42
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/minja/chat-template.hpp +1 -1
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/minja/minja.hpp +1 -1
- package/ios/rnllama.xcframework/{ios-arm64_x86_64-simulator/rnllama.framework/Headers → tvos-arm64/rnllama.framework/Headers/nlohmann}/json.hpp +3027 -2267
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/nlohmann/json_fwd.hpp +187 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/regex-partial.h +56 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/rn-llama.h +32 -1
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/rn-tts.h +39 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/ggml-llama.metallib +0 -0
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/rnllama +0 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/chat-parser.h +120 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/chat.h +71 -6
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/common.h +9 -3
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-common.h +4 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu.h +1 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-impl.h +63 -183
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml.h +22 -3
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/json-partial.h +38 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/json-schema-to-grammar.h +4 -4
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-arch.h +27 -1
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-batch.h +112 -54
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-chat.h +1 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-context.h +38 -17
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-cparams.h +2 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-graph.h +126 -58
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-hparams.h +19 -2
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cache-unified-iswa.h +128 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cache-unified.h +303 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cells.h +439 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-memory-hybrid.h +138 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-memory-recurrent.h +183 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-memory.h +86 -5
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-model.h +26 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-vocab.h +1 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama.h +150 -42
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/minja/chat-template.hpp +1 -1
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/minja/minja.hpp +1 -1
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/nlohmann/json.hpp +25526 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/nlohmann/json_fwd.hpp +187 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/regex-partial.h +56 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/rn-llama.h +32 -1
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/rn-tts.h +39 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/ggml-llama-sim.metallib +0 -0
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/rnllama +0 -0
- package/jest/mock.js +24 -0
- package/package.json +1 -1
- package/src/NativeRNLlama.ts +46 -2
- package/src/index.ts +105 -1
- package/cpp/ggml-cpu/ggml-cpu-aarch64.h +0 -8
- package/cpp/ggml-cpu/ggml-cpu-quants.c +0 -13326
- package/cpp/ggml-cpu/sgemm.cpp +0 -3544
- package/cpp/ggml-cpu/sgemm.h +0 -14
- package/cpp/llama-kv-cache.cpp +0 -2827
- package/cpp/llama-kv-cache.h +0 -515
- package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-kv-cache.h +0 -515
- package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cache.h +0 -515
- package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-kv-cache.h +0 -515
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/json.hpp +0 -24766
- package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cache.h +0 -515
- /package/cpp/ggml-cpu/{ggml-cpu-traits.h → traits.h} +0 -0
- /package/cpp/tools/mtmd/{miniaudio.h → miniaudio/miniaudio.h} +0 -0
- /package/cpp/tools/mtmd/{stb_image.h → stb/stb_image.h} +0 -0
package/cpp/llama-model.h
CHANGED
@@ -73,6 +73,7 @@ enum llm_type {
|
|
73
73
|
LLM_TYPE_40B,
|
74
74
|
LLM_TYPE_65B,
|
75
75
|
LLM_TYPE_70B,
|
76
|
+
LLM_TYPE_142B,
|
76
77
|
LLM_TYPE_236B,
|
77
78
|
LLM_TYPE_290B,
|
78
79
|
LLM_TYPE_314B,
|
@@ -94,6 +95,8 @@ enum llm_type {
|
|
94
95
|
LLM_TYPE_17B_128E, // llama4 Maverick
|
95
96
|
LLM_TYPE_30B_A3B,
|
96
97
|
LLM_TYPE_235B_A22B,
|
98
|
+
LLM_TYPE_E2B,
|
99
|
+
LLM_TYPE_E4B,
|
97
100
|
};
|
98
101
|
|
99
102
|
std::string llama_rope_scaling_type_name(llama_rope_scaling_type rope_scaling_type);
|
@@ -315,6 +318,19 @@ struct llama_layer {
|
|
315
318
|
struct lm_ggml_tensor * ffn_up_scale = nullptr;
|
316
319
|
struct lm_ggml_tensor * ffn_down_scale = nullptr;
|
317
320
|
|
321
|
+
// altup & laurel
|
322
|
+
struct lm_ggml_tensor * per_layer_inp_gate = nullptr;
|
323
|
+
struct lm_ggml_tensor * per_layer_proj = nullptr;
|
324
|
+
struct lm_ggml_tensor * per_layer_post_norm = nullptr;
|
325
|
+
struct lm_ggml_tensor * altup_correct_coef = nullptr;
|
326
|
+
struct lm_ggml_tensor * altup_correct_scale = nullptr;
|
327
|
+
struct lm_ggml_tensor * altup_predict_coef = nullptr;
|
328
|
+
struct lm_ggml_tensor * altup_router = nullptr;
|
329
|
+
struct lm_ggml_tensor * altup_router_norm = nullptr;
|
330
|
+
struct lm_ggml_tensor * laurel_l = nullptr;
|
331
|
+
struct lm_ggml_tensor * laurel_r = nullptr;
|
332
|
+
struct lm_ggml_tensor * laurel_post_norm = nullptr;
|
333
|
+
|
318
334
|
struct llama_layer_posnet posnet;
|
319
335
|
|
320
336
|
struct llama_layer_convnext convnext;
|
@@ -329,6 +345,9 @@ struct llama_model {
|
|
329
345
|
llama_hparams hparams = {};
|
330
346
|
llama_vocab vocab;
|
331
347
|
|
348
|
+
// for classifier models
|
349
|
+
std::vector<std::string> classifier_labels;
|
350
|
+
|
332
351
|
struct lm_ggml_tensor * tok_embd = nullptr;
|
333
352
|
struct lm_ggml_tensor * type_embd = nullptr;
|
334
353
|
struct lm_ggml_tensor * pos_embd = nullptr;
|
@@ -350,6 +369,13 @@ struct llama_model {
|
|
350
369
|
struct lm_ggml_tensor * conv1d = nullptr;
|
351
370
|
struct lm_ggml_tensor * conv1d_b = nullptr;
|
352
371
|
|
372
|
+
// gemma3n altup
|
373
|
+
struct lm_ggml_tensor * tok_embd_per_layer = nullptr;
|
374
|
+
struct lm_ggml_tensor * altup_proj = nullptr;
|
375
|
+
struct lm_ggml_tensor * altup_unembd_proj = nullptr;
|
376
|
+
struct lm_ggml_tensor * per_layer_model_proj = nullptr;
|
377
|
+
struct lm_ggml_tensor * per_layer_proj_norm = nullptr;
|
378
|
+
|
353
379
|
std::vector<llama_layer> layers;
|
354
380
|
|
355
381
|
llama_model_params params;
|
package/cpp/llama-sampling.cpp
CHANGED
@@ -799,7 +799,7 @@ static void llama_sampler_min_p_apply(struct llama_sampler * smpl, llama_token_d
|
|
799
799
|
}
|
800
800
|
|
801
801
|
// if we have enough values the operation was a success
|
802
|
-
if (filtered_tokens.size() >= ctx->min_keep) {
|
802
|
+
if (!filtered_tokens.empty() && filtered_tokens.size() >= ctx->min_keep) {
|
803
803
|
memcpy(cur_p->data, filtered_tokens.data(), filtered_tokens.size()*sizeof(llama_token_data));
|
804
804
|
cur_p->size = filtered_tokens.size();
|
805
805
|
min_p_applied = true;
|
@@ -910,7 +910,7 @@ static void llama_sampler_typical_apply(struct llama_sampler * smpl, llama_token
|
|
910
910
|
cum_sum += cur_p->data[idx].p;
|
911
911
|
|
912
912
|
// Check if the running sum is greater than typical or if we have kept at least min_keep tokens
|
913
|
-
if (cum_sum > ctx->p && i >= ctx->min_keep - 1) {
|
913
|
+
if (cum_sum > ctx->p && (ctx->min_keep == 0 || i >= ctx->min_keep - 1)) {
|
914
914
|
last_idx = i + 1;
|
915
915
|
break;
|
916
916
|
}
|
package/cpp/llama-vocab.cpp
CHANGED
@@ -9,16 +9,16 @@
|
|
9
9
|
|
10
10
|
#include <algorithm>
|
11
11
|
#include <cassert>
|
12
|
+
#include <cctype>
|
12
13
|
#include <cfloat>
|
13
|
-
#include <climits>
|
14
14
|
#include <cstdarg>
|
15
15
|
#include <cstring>
|
16
16
|
#include <forward_list>
|
17
|
+
#include <limits>
|
17
18
|
#include <map>
|
18
19
|
#include <queue>
|
19
20
|
#include <set>
|
20
21
|
#include <unordered_map>
|
21
|
-
#include <cctype>
|
22
22
|
|
23
23
|
//
|
24
24
|
// helpers
|
@@ -1269,6 +1269,7 @@ struct llama_vocab::impl {
|
|
1269
1269
|
bool add_space_prefix = false;
|
1270
1270
|
bool add_bos = false;
|
1271
1271
|
bool add_eos = false;
|
1272
|
+
bool add_sep = false;
|
1272
1273
|
bool ignore_merges = false;
|
1273
1274
|
bool clean_spaces = false; // clean_up_tokenization_spaces
|
1274
1275
|
bool remove_extra_whitespaces = false;
|
@@ -1421,6 +1422,8 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
|
|
1421
1422
|
special_sep_id = 102;
|
1422
1423
|
special_pad_id = 0;
|
1423
1424
|
special_mask_id = 103;
|
1425
|
+
|
1426
|
+
add_sep = true;
|
1424
1427
|
} else if (tokenizer_model == "gpt2") {
|
1425
1428
|
type = LLAMA_VOCAB_TYPE_BPE;
|
1426
1429
|
|
@@ -1550,12 +1553,15 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
|
|
1550
1553
|
tokenizer_pre == "jina-es" ||
|
1551
1554
|
tokenizer_pre == "jina-de" ||
|
1552
1555
|
tokenizer_pre == "gigachat" ||
|
1553
|
-
tokenizer_pre == "jina-v1-en" ||
|
1554
1556
|
tokenizer_pre == "jina-v2-es" ||
|
1555
|
-
tokenizer_pre == "jina-v2-de"
|
1557
|
+
tokenizer_pre == "jina-v2-de") {
|
1558
|
+
pre_type = LLAMA_VOCAB_PRE_TYPE_GPT2;
|
1559
|
+
} else if (
|
1560
|
+
tokenizer_pre == "jina-v1-en" ||
|
1556
1561
|
tokenizer_pre == "jina-v2-code" ||
|
1557
1562
|
tokenizer_pre == "roberta-bpe") {
|
1558
1563
|
pre_type = LLAMA_VOCAB_PRE_TYPE_GPT2;
|
1564
|
+
add_sep = true;
|
1559
1565
|
} else if (
|
1560
1566
|
tokenizer_pre == "refact") {
|
1561
1567
|
pre_type = LLAMA_VOCAB_PRE_TYPE_REFACT;
|
@@ -1665,6 +1671,7 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
|
|
1665
1671
|
clean_spaces = true;
|
1666
1672
|
add_bos = true;
|
1667
1673
|
add_eos = false;
|
1674
|
+
add_sep = true;
|
1668
1675
|
} else if (type == LLAMA_VOCAB_TYPE_UGM) {
|
1669
1676
|
pre_type = LLAMA_VOCAB_PRE_TYPE_DEFAULT;
|
1670
1677
|
add_bos = false;
|
@@ -1801,7 +1808,7 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
|
|
1801
1808
|
}
|
1802
1809
|
}
|
1803
1810
|
|
1804
|
-
// Handle add_bos and
|
1811
|
+
// Handle add_bos, add_eos and add_sep
|
1805
1812
|
{
|
1806
1813
|
bool temp = true;
|
1807
1814
|
|
@@ -1811,6 +1818,9 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
|
|
1811
1818
|
if (ml.get_key(LLM_KV_TOKENIZER_ADD_EOS, temp, false)) {
|
1812
1819
|
add_eos = temp;
|
1813
1820
|
}
|
1821
|
+
if (ml.get_key(LLM_KV_TOKENIZER_ADD_SEP, temp, false)) {
|
1822
|
+
add_sep = temp;
|
1823
|
+
}
|
1814
1824
|
}
|
1815
1825
|
|
1816
1826
|
// auto-detect special tokens by text
|
@@ -1987,6 +1997,7 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
|
|
1987
1997
|
|| t.first == "<|eom_id|>"
|
1988
1998
|
|| t.first == "<EOT>"
|
1989
1999
|
|| t.first == "_<EOT>"
|
2000
|
+
|| t.first == "<|end_of_text|>"
|
1990
2001
|
) {
|
1991
2002
|
special_eog_ids.insert(t.second);
|
1992
2003
|
if ((id_to_token[t.second].attr & LLAMA_TOKEN_ATTR_CONTROL) == 0) {
|
@@ -2059,9 +2070,9 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
|
|
2059
2070
|
//NOTE: Per token attributes are missing from the GGUF file.
|
2060
2071
|
//TODO: Extract attributes from GGUF file.
|
2061
2072
|
{
|
2062
|
-
auto _contains_any = [] (const std::string & str, const std::vector<std::
|
2073
|
+
auto _contains_any = [] (const std::string & str, const std::vector<std::string_view> & substrs) -> bool {
|
2063
2074
|
for (const auto & substr : substrs) {
|
2064
|
-
if (str.find(substr)
|
2075
|
+
if (str.find(substr) != std::string::npos) {
|
2065
2076
|
return true;
|
2066
2077
|
}
|
2067
2078
|
}
|
@@ -2080,9 +2091,11 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
|
|
2080
2091
|
|
2081
2092
|
std::string model_name;
|
2082
2093
|
std::string tokenizer_pre;
|
2094
|
+
std::string general_arch;
|
2083
2095
|
|
2084
2096
|
ml.get_key(LLM_KV_GENERAL_NAME, model_name, false);
|
2085
2097
|
ml.get_key(LLM_KV_TOKENIZER_PRE, tokenizer_pre, false);
|
2098
|
+
ml.get_key(LLM_KV_GENERAL_ARCHITECTURE, general_arch, false);
|
2086
2099
|
|
2087
2100
|
// model name to lowercase
|
2088
2101
|
std::transform(model_name.begin(), model_name.end(), model_name.begin(),
|
@@ -2091,9 +2104,16 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
|
|
2091
2104
|
}
|
2092
2105
|
);
|
2093
2106
|
|
2094
|
-
// set attributes by model/tokenizer name
|
2095
|
-
if (
|
2096
|
-
|
2107
|
+
// set attributes by model/tokenizer/architecture name
|
2108
|
+
if (false
|
2109
|
+
|| _contains_any(tokenizer_pre, {"jina-v2-de", "jina-v2-es", "jina-v2-code"})
|
2110
|
+
|| _contains_any(general_arch, {"nomic-bert-moe"})
|
2111
|
+
) {
|
2112
|
+
if (token_to_id.count("<mask>") == 0) {
|
2113
|
+
LLAMA_LOG_WARN("%s: Mask token is missing in vocab, please reconvert model!\n", __func__);
|
2114
|
+
} else {
|
2115
|
+
_set_token_attr("<mask>", LLAMA_TOKEN_ATTR_LSTRIP, true);
|
2116
|
+
}
|
2097
2117
|
} else if (_contains_any(model_name, {"phi-3", "phi3"})) {
|
2098
2118
|
for (auto id : cache_special_tokens) {
|
2099
2119
|
_set_tokenid_attr(id, LLAMA_TOKEN_ATTR_RSTRIP, true);
|
@@ -2563,6 +2583,10 @@ int32_t llama_vocab::impl::token_to_piece(llama_token token, char * buf, int32_t
|
|
2563
2583
|
// copy piece chars to output text buffer
|
2564
2584
|
// skip up to 'lstrip' leading spaces before copying
|
2565
2585
|
auto _try_copy = [=] (const char * token, size_t size) -> int32_t {
|
2586
|
+
if (size >= static_cast<size_t>(std::numeric_limits<int32_t>::max())) {
|
2587
|
+
LM_GGML_ABORT("invalid token size: %zu exceeds int32_t limit", size);
|
2588
|
+
}
|
2589
|
+
|
2566
2590
|
for (int32_t i = 0; i < lstrip && size && *token == ' '; ++i) {
|
2567
2591
|
token++;
|
2568
2592
|
size--;
|
@@ -2759,26 +2783,26 @@ void llama_vocab::impl::print_info() const {
|
|
2759
2783
|
LLAMA_LOG_INFO("%s: n_merges = %u\n", __func__, (uint32_t) bpe_ranks.size());
|
2760
2784
|
|
2761
2785
|
// special tokens
|
2762
|
-
if (special_bos_id != LLAMA_TOKEN_NULL) { LLAMA_LOG_INFO( "%s: BOS token = %d '%s'\n", __func__, special_bos_id, id_to_token
|
2763
|
-
if (special_eos_id != LLAMA_TOKEN_NULL) { LLAMA_LOG_INFO( "%s: EOS token = %d '%s'\n", __func__, special_eos_id, id_to_token
|
2764
|
-
if (special_eot_id != LLAMA_TOKEN_NULL) { LLAMA_LOG_INFO( "%s: EOT token = %d '%s'\n", __func__, special_eot_id, id_to_token
|
2765
|
-
if (special_eom_id != LLAMA_TOKEN_NULL) { LLAMA_LOG_INFO( "%s: EOM token = %d '%s'\n", __func__, special_eom_id, id_to_token
|
2766
|
-
if (special_unk_id != LLAMA_TOKEN_NULL) { LLAMA_LOG_INFO( "%s: UNK token = %d '%s'\n", __func__, special_unk_id, id_to_token
|
2767
|
-
if (special_sep_id != LLAMA_TOKEN_NULL) { LLAMA_LOG_INFO( "%s: SEP token = %d '%s'\n", __func__, special_sep_id, id_to_token
|
2768
|
-
if (special_pad_id != LLAMA_TOKEN_NULL) { LLAMA_LOG_INFO( "%s: PAD token = %d '%s'\n", __func__, special_pad_id, id_to_token
|
2769
|
-
if (special_mask_id != LLAMA_TOKEN_NULL) { LLAMA_LOG_INFO( "%s: MASK token = %d '%s'\n", __func__, special_mask_id, id_to_token
|
2770
|
-
|
2771
|
-
if (linefeed_id != LLAMA_TOKEN_NULL) { LLAMA_LOG_INFO( "%s: LF token = %d '%s'\n", __func__, linefeed_id, id_to_token
|
2772
|
-
|
2773
|
-
if (special_fim_pre_id != LLAMA_TOKEN_NULL) { LLAMA_LOG_INFO( "%s: FIM PRE token = %d '%s'\n", __func__, special_fim_pre_id, id_to_token
|
2774
|
-
if (special_fim_suf_id != LLAMA_TOKEN_NULL) { LLAMA_LOG_INFO( "%s: FIM SUF token = %d '%s'\n", __func__, special_fim_suf_id, id_to_token
|
2775
|
-
if (special_fim_mid_id != LLAMA_TOKEN_NULL) { LLAMA_LOG_INFO( "%s: FIM MID token = %d '%s'\n", __func__, special_fim_mid_id, id_to_token
|
2776
|
-
if (special_fim_pad_id != LLAMA_TOKEN_NULL) { LLAMA_LOG_INFO( "%s: FIM PAD token = %d '%s'\n", __func__, special_fim_pad_id, id_to_token
|
2777
|
-
if (special_fim_rep_id != LLAMA_TOKEN_NULL) { LLAMA_LOG_INFO( "%s: FIM REP token = %d '%s'\n", __func__, special_fim_rep_id, id_to_token
|
2778
|
-
if (special_fim_sep_id != LLAMA_TOKEN_NULL) { LLAMA_LOG_INFO( "%s: FIM SEP token = %d '%s'\n", __func__, special_fim_sep_id, id_to_token
|
2786
|
+
if (special_bos_id != LLAMA_TOKEN_NULL) { LLAMA_LOG_INFO( "%s: BOS token = %d '%s'\n", __func__, special_bos_id, id_to_token.at(special_bos_id).text.c_str() ); }
|
2787
|
+
if (special_eos_id != LLAMA_TOKEN_NULL) { LLAMA_LOG_INFO( "%s: EOS token = %d '%s'\n", __func__, special_eos_id, id_to_token.at(special_eos_id).text.c_str() ); }
|
2788
|
+
if (special_eot_id != LLAMA_TOKEN_NULL) { LLAMA_LOG_INFO( "%s: EOT token = %d '%s'\n", __func__, special_eot_id, id_to_token.at(special_eot_id).text.c_str() ); }
|
2789
|
+
if (special_eom_id != LLAMA_TOKEN_NULL) { LLAMA_LOG_INFO( "%s: EOM token = %d '%s'\n", __func__, special_eom_id, id_to_token.at(special_eom_id).text.c_str() ); }
|
2790
|
+
if (special_unk_id != LLAMA_TOKEN_NULL) { LLAMA_LOG_INFO( "%s: UNK token = %d '%s'\n", __func__, special_unk_id, id_to_token.at(special_unk_id).text.c_str() ); }
|
2791
|
+
if (special_sep_id != LLAMA_TOKEN_NULL) { LLAMA_LOG_INFO( "%s: SEP token = %d '%s'\n", __func__, special_sep_id, id_to_token.at(special_sep_id).text.c_str() ); }
|
2792
|
+
if (special_pad_id != LLAMA_TOKEN_NULL) { LLAMA_LOG_INFO( "%s: PAD token = %d '%s'\n", __func__, special_pad_id, id_to_token.at(special_pad_id).text.c_str() ); }
|
2793
|
+
if (special_mask_id != LLAMA_TOKEN_NULL) { LLAMA_LOG_INFO( "%s: MASK token = %d '%s'\n", __func__, special_mask_id, id_to_token.at(special_mask_id).text.c_str() ); }
|
2794
|
+
|
2795
|
+
if (linefeed_id != LLAMA_TOKEN_NULL) { LLAMA_LOG_INFO( "%s: LF token = %d '%s'\n", __func__, linefeed_id, id_to_token.at(linefeed_id).text.c_str() ); }
|
2796
|
+
|
2797
|
+
if (special_fim_pre_id != LLAMA_TOKEN_NULL) { LLAMA_LOG_INFO( "%s: FIM PRE token = %d '%s'\n", __func__, special_fim_pre_id, id_to_token.at(special_fim_pre_id).text.c_str() ); }
|
2798
|
+
if (special_fim_suf_id != LLAMA_TOKEN_NULL) { LLAMA_LOG_INFO( "%s: FIM SUF token = %d '%s'\n", __func__, special_fim_suf_id, id_to_token.at(special_fim_suf_id).text.c_str() ); }
|
2799
|
+
if (special_fim_mid_id != LLAMA_TOKEN_NULL) { LLAMA_LOG_INFO( "%s: FIM MID token = %d '%s'\n", __func__, special_fim_mid_id, id_to_token.at(special_fim_mid_id).text.c_str() ); }
|
2800
|
+
if (special_fim_pad_id != LLAMA_TOKEN_NULL) { LLAMA_LOG_INFO( "%s: FIM PAD token = %d '%s'\n", __func__, special_fim_pad_id, id_to_token.at(special_fim_pad_id).text.c_str() ); }
|
2801
|
+
if (special_fim_rep_id != LLAMA_TOKEN_NULL) { LLAMA_LOG_INFO( "%s: FIM REP token = %d '%s'\n", __func__, special_fim_rep_id, id_to_token.at(special_fim_rep_id).text.c_str() ); }
|
2802
|
+
if (special_fim_sep_id != LLAMA_TOKEN_NULL) { LLAMA_LOG_INFO( "%s: FIM SEP token = %d '%s'\n", __func__, special_fim_sep_id, id_to_token.at(special_fim_sep_id).text.c_str() ); }
|
2779
2803
|
|
2780
2804
|
for (const auto & id : special_eog_ids) {
|
2781
|
-
LLAMA_LOG_INFO( "%s: EOG token = %d '%s'\n", __func__, id, id_to_token
|
2805
|
+
LLAMA_LOG_INFO( "%s: EOG token = %d '%s'\n", __func__, id, id_to_token.at(id).text.c_str() );
|
2782
2806
|
}
|
2783
2807
|
|
2784
2808
|
LLAMA_LOG_INFO("%s: max token length = %d\n", __func__, max_token_len);
|
@@ -2986,6 +3010,10 @@ bool llama_vocab::get_add_eos() const {
|
|
2986
3010
|
return pimpl->add_eos;
|
2987
3011
|
}
|
2988
3012
|
|
3013
|
+
bool llama_vocab::get_add_sep() const {
|
3014
|
+
return pimpl->add_sep;
|
3015
|
+
}
|
3016
|
+
|
2989
3017
|
bool llama_vocab::get_ignore_merges() const {
|
2990
3018
|
return pimpl->ignore_merges;
|
2991
3019
|
}
|
@@ -3046,6 +3074,11 @@ int32_t llama_vocab::tokenize(
|
|
3046
3074
|
bool add_special,
|
3047
3075
|
bool parse_special) const {
|
3048
3076
|
auto res = tokenize(std::string(text, text_len), add_special, parse_special);
|
3077
|
+
if (res.size() >= static_cast<size_t>(std::numeric_limits<int32_t>::max())) {
|
3078
|
+
LLAMA_LOG_ERROR("%s: tokenization result size %zu exceeds int32_t limit\n", __func__, res.size());
|
3079
|
+
return std::numeric_limits<int32_t>::min();
|
3080
|
+
}
|
3081
|
+
|
3049
3082
|
if (n_tokens_max < (int) res.size()) {
|
3050
3083
|
// LLAMA_LOG_ERROR("%s: too many tokens\n", __func__);
|
3051
3084
|
return -((int) res.size());
|
@@ -3177,6 +3210,10 @@ bool llama_vocab_get_add_eos(const struct llama_vocab * vocab) {
|
|
3177
3210
|
return vocab->get_add_eos();
|
3178
3211
|
}
|
3179
3212
|
|
3213
|
+
bool llama_vocab_get_add_sep(const struct llama_vocab * vocab) {
|
3214
|
+
return vocab->get_add_sep();
|
3215
|
+
}
|
3216
|
+
|
3180
3217
|
llama_token llama_vocab_fim_pre(const struct llama_vocab * vocab) {
|
3181
3218
|
return vocab->token_fim_pre();
|
3182
3219
|
}
|
package/cpp/llama-vocab.h
CHANGED
@@ -74,6 +74,7 @@ struct llama_vocab {
|
|
74
74
|
bool get_add_space_prefix () const;
|
75
75
|
bool get_add_bos () const;
|
76
76
|
bool get_add_eos () const;
|
77
|
+
bool get_add_sep () const;
|
77
78
|
bool get_ignore_merges () const;
|
78
79
|
bool get_clean_spaces () const;
|
79
80
|
bool get_remove_extra_whitespaces () const;
|
package/cpp/llama.cpp
CHANGED
@@ -209,14 +209,18 @@ static struct llama_model * llama_model_load_from_file_impl(
|
|
209
209
|
|
210
210
|
// if using single GPU mode, remove all except the main GPU
|
211
211
|
if (params.split_mode == LLAMA_SPLIT_MODE_NONE) {
|
212
|
-
if (params.main_gpu < 0
|
213
|
-
|
214
|
-
|
215
|
-
|
212
|
+
if (params.main_gpu < 0) {
|
213
|
+
model->devices.clear();
|
214
|
+
} else {
|
215
|
+
if (params.main_gpu >= (int)model->devices.size()) {
|
216
|
+
LLAMA_LOG_ERROR("%s: invalid value for main_gpu: %d (available devices: %zu)\n", __func__, params.main_gpu, model->devices.size());
|
217
|
+
llama_model_free(model);
|
218
|
+
return nullptr;
|
219
|
+
}
|
220
|
+
lm_ggml_backend_dev_t main_gpu = model->devices[params.main_gpu];
|
221
|
+
model->devices.clear();
|
222
|
+
model->devices.push_back(main_gpu);
|
216
223
|
}
|
217
|
-
lm_ggml_backend_dev_t main_gpu = model->devices[params.main_gpu];
|
218
|
-
model->devices.clear();
|
219
|
-
model->devices.push_back(main_gpu);
|
220
224
|
}
|
221
225
|
|
222
226
|
for (auto * dev : model->devices) {
|