@novastera-oss/llamarn 0.2.1 → 0.2.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +80 -14
- package/RNLlamaCpp.podspec +10 -3
- package/android/CMakeLists.txt +8 -0
- package/android/src/main/cpp/include/llama.h +62 -125
- package/android/src/main/jniLibs/arm64-v8a/libggml-base.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/libggml-cpu.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/libggml.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/libllama.so +0 -0
- package/android/src/main/jniLibs/x86_64/libggml-base.so +0 -0
- package/android/src/main/jniLibs/x86_64/libggml-cpu.so +0 -0
- package/android/src/main/jniLibs/x86_64/libggml.so +0 -0
- package/android/src/main/jniLibs/x86_64/libllama.so +0 -0
- package/cpp/PureCppImpl.cpp +9 -27
- package/cpp/SystemUtils.h +2 -2
- package/cpp/build-info.cpp +2 -2
- package/cpp/llama.cpp/README.md +11 -3
- package/cpp/llama.cpp/build-xcframework.sh +1 -0
- package/cpp/llama.cpp/common/CMakeLists.txt +8 -2
- package/cpp/llama.cpp/common/arg.cpp +153 -113
- package/cpp/llama.cpp/common/chat-parser.cpp +379 -0
- package/cpp/llama.cpp/common/chat-parser.h +117 -0
- package/cpp/llama.cpp/common/chat.cpp +847 -699
- package/cpp/llama.cpp/common/chat.h +73 -6
- package/cpp/llama.cpp/common/common.cpp +50 -82
- package/cpp/llama.cpp/common/common.h +21 -17
- package/cpp/llama.cpp/common/json-partial.cpp +255 -0
- package/cpp/llama.cpp/common/json-partial.h +37 -0
- package/cpp/llama.cpp/common/minja/chat-template.hpp +9 -5
- package/cpp/llama.cpp/common/minja/minja.hpp +69 -36
- package/cpp/llama.cpp/common/regex-partial.cpp +204 -0
- package/cpp/llama.cpp/common/regex-partial.h +56 -0
- package/cpp/llama.cpp/common/sampling.cpp +7 -8
- package/cpp/llama.cpp/convert_hf_to_gguf.py +453 -118
- package/cpp/llama.cpp/convert_hf_to_gguf_update.py +120 -68
- package/cpp/llama.cpp/ggml/CMakeLists.txt +2 -1
- package/cpp/llama.cpp/ggml/cmake/common.cmake +25 -0
- package/cpp/llama.cpp/ggml/include/ggml-opt.h +49 -28
- package/cpp/llama.cpp/ggml/include/ggml.h +26 -7
- package/cpp/llama.cpp/ggml/src/CMakeLists.txt +16 -10
- package/cpp/llama.cpp/ggml/src/ggml-backend.cpp +4 -1
- package/cpp/llama.cpp/ggml/src/ggml-cann/CMakeLists.txt +1 -0
- package/cpp/llama.cpp/ggml/src/ggml-cann/acl_tensor.cpp +2 -0
- package/cpp/llama.cpp/ggml/src/ggml-cann/aclnn_ops.cpp +604 -0
- package/cpp/llama.cpp/ggml/src/ggml-cann/aclnn_ops.h +42 -0
- package/cpp/llama.cpp/ggml/src/ggml-cann/ggml-cann.cpp +54 -2
- package/cpp/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +50 -51
- package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp +2 -2
- package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-impl.h +5 -9
- package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.c +779 -19
- package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +22 -0
- package/cpp/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.cpp +88 -5
- package/cpp/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.h +47 -12
- package/cpp/llama.cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +264 -69
- package/cpp/llama.cpp/ggml/src/ggml-cpu/ops.cpp +322 -100
- package/cpp/llama.cpp/ggml/src/ggml-cpu/simd-mappings.h +117 -1
- package/cpp/llama.cpp/ggml/src/ggml-cpu/vec.cpp +85 -16
- package/cpp/llama.cpp/ggml/src/ggml-cpu/vec.h +220 -49
- package/cpp/llama.cpp/ggml/src/ggml-cuda/acc.cu +40 -26
- package/cpp/llama.cpp/ggml/src/ggml-cuda/common.cuh +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-cuda/cpy.cu +11 -1
- package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-common.cuh +15 -7
- package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-mma-f16.cuh +266 -64
- package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-vec-f16.cuh +49 -4
- package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-vec-f32.cuh +48 -4
- package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn.cu +2 -1
- package/cpp/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu +5 -1
- package/cpp/llama.cpp/ggml/src/ggml-cuda/mmq.cu +2 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/quantize.cu +7 -6
- package/cpp/llama.cpp/ggml/src/ggml-cuda/sum.cu +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-cuda/unary.cu +10 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/unary.cuh +2 -0
- package/cpp/llama.cpp/ggml/src/ggml-impl.h +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal-impl.h +4 -0
- package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal.m +99 -17
- package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal.metal +200 -2
- package/cpp/llama.cpp/ggml/src/ggml-musa/CMakeLists.txt +8 -2
- package/cpp/llama.cpp/ggml/src/ggml-musa/mudnn.cu +112 -0
- package/cpp/llama.cpp/ggml/src/ggml-musa/mudnn.cuh +12 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/CMakeLists.txt +6 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +972 -178
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/argsort.cl +86 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/div.cl +72 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/group_norm.cl +72 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/sigmoid.cl +29 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/sub.cl +72 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/sum_rows.cl +39 -0
- package/cpp/llama.cpp/ggml/src/ggml-opt.cpp +373 -190
- package/cpp/llama.cpp/ggml/src/ggml-sycl/CMakeLists.txt +29 -23
- package/cpp/llama.cpp/ggml/src/ggml-sycl/binbcast.cpp +5 -10
- package/cpp/llama.cpp/ggml/src/ggml-sycl/common.hpp +101 -5
- package/cpp/llama.cpp/ggml/src/ggml-sycl/concat.cpp +31 -33
- package/cpp/llama.cpp/ggml/src/ggml-sycl/conv.cpp +1 -0
- package/cpp/llama.cpp/ggml/src/ggml-sycl/convert.cpp +29 -2
- package/cpp/llama.cpp/ggml/src/ggml-sycl/cpy.cpp +4 -5
- package/cpp/llama.cpp/ggml/src/ggml-sycl/dequantize.hpp +59 -21
- package/cpp/llama.cpp/ggml/src/ggml-sycl/dmmv.cpp +9 -1
- package/cpp/llama.cpp/ggml/src/ggml-sycl/element_wise.cpp +84 -72
- package/cpp/llama.cpp/ggml/src/ggml-sycl/element_wise.hpp +2 -0
- package/cpp/llama.cpp/ggml/src/ggml-sycl/gemm.hpp +37 -8
- package/cpp/llama.cpp/ggml/src/ggml-sycl/getrows.cpp +1 -3
- package/cpp/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +324 -129
- package/cpp/llama.cpp/ggml/src/ggml-sycl/gla.cpp +1 -0
- package/cpp/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +31 -2
- package/cpp/llama.cpp/ggml/src/ggml-sycl/norm.cpp +95 -68
- package/cpp/llama.cpp/ggml/src/ggml-sycl/outprod.cpp +1 -0
- package/cpp/llama.cpp/ggml/src/ggml-sycl/quants.hpp +22 -0
- package/cpp/llama.cpp/ggml/src/ggml-sycl/rope.cpp +1 -2
- package/cpp/llama.cpp/ggml/src/ggml-sycl/softmax.cpp +1 -4
- package/cpp/llama.cpp/ggml/src/ggml-sycl/tsembd.cpp +2 -3
- package/cpp/llama.cpp/ggml/src/ggml-sycl/vecdotq.hpp +69 -43
- package/cpp/llama.cpp/ggml/src/ggml-sycl/wkv.cpp +2 -14
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/CMakeLists.txt +81 -91
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +432 -181
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/CMakeLists.txt +17 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq1_m.comp +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn.comp +6 -152
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_base.comp +162 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm1.comp +360 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm2.comp +2 -118
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm.comp +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +12 -1
- package/cpp/llama.cpp/ggml/src/ggml.c +107 -36
- package/cpp/llama.cpp/ggml/src/gguf.cpp +33 -33
- package/cpp/llama.cpp/gguf-py/gguf/constants.py +100 -15
- package/cpp/llama.cpp/gguf-py/gguf/gguf_reader.py +1 -1
- package/cpp/llama.cpp/gguf-py/gguf/gguf_writer.py +44 -12
- package/cpp/llama.cpp/gguf-py/gguf/scripts/gguf_editor_gui.py +21 -10
- package/cpp/llama.cpp/gguf-py/gguf/scripts/gguf_new_metadata.py +5 -2
- package/cpp/llama.cpp/gguf-py/gguf/tensor_mapping.py +128 -31
- package/cpp/llama.cpp/gguf-py/gguf/utility.py +1 -1
- package/cpp/llama.cpp/gguf-py/pyproject.toml +1 -1
- package/cpp/llama.cpp/include/llama.h +62 -125
- package/cpp/llama.cpp/models/ggml-vocab-bert-bge.gguf.inp +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-bert-bge.gguf.out +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-command-r.gguf.inp +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-command-r.gguf.out +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-deepseek-coder.gguf.inp +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-deepseek-coder.gguf.out +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-deepseek-llm.gguf.inp +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-deepseek-llm.gguf.out +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-falcon.gguf.inp +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-falcon.gguf.out +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-gpt-2.gguf.inp +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-gpt-2.gguf.out +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-llama-bpe.gguf.inp +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-llama-bpe.gguf.out +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-llama-spm.gguf.inp +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-llama-spm.gguf.out +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-mpt.gguf.inp +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-mpt.gguf.out +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-nomic-bert-moe.gguf +0 -0
- package/cpp/llama.cpp/models/ggml-vocab-phi-3.gguf.inp +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-phi-3.gguf.out +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-qwen2.gguf.inp +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-qwen2.gguf.out +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-refact.gguf.inp +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-refact.gguf.out +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-starcoder.gguf.inp +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-starcoder.gguf.out +1 -1
- package/cpp/llama.cpp/models/templates/Qwen-QwQ-32B.jinja +62 -0
- package/cpp/llama.cpp/models/templates/Qwen-Qwen3-0.6B.jinja +85 -0
- package/cpp/llama.cpp/models/templates/README.md +2 -0
- package/cpp/llama.cpp/requirements/requirements-convert_hf_to_gguf.txt +5 -1
- package/cpp/llama.cpp/requirements/requirements-convert_hf_to_gguf_update.txt +5 -1
- package/cpp/llama.cpp/requirements/requirements-convert_lora_to_gguf.txt +2 -0
- package/cpp/llama.cpp/requirements/requirements-gguf_editor_gui.txt +1 -1
- package/cpp/llama.cpp/src/CMakeLists.txt +2 -0
- package/cpp/llama.cpp/src/llama-arch.cpp +6 -0
- package/cpp/llama.cpp/src/llama-arch.h +2 -0
- package/cpp/llama.cpp/src/llama-batch.cpp +3 -1
- package/cpp/llama.cpp/src/llama-context.cpp +340 -123
- package/cpp/llama.cpp/src/llama-context.h +30 -0
- package/cpp/llama.cpp/src/llama-cparams.cpp +4 -0
- package/cpp/llama.cpp/src/llama-cparams.h +2 -0
- package/cpp/llama.cpp/src/llama-grammar.cpp +12 -2
- package/cpp/llama.cpp/src/llama-graph.cpp +157 -247
- package/cpp/llama.cpp/src/llama-graph.h +52 -7
- package/cpp/llama.cpp/src/llama-hparams.cpp +17 -1
- package/cpp/llama.cpp/src/llama-hparams.h +37 -5
- package/cpp/llama.cpp/src/llama-kv-cache.cpp +742 -481
- package/cpp/llama.cpp/src/llama-kv-cache.h +196 -99
- package/cpp/llama.cpp/src/llama-kv-cells.h +379 -0
- package/cpp/llama.cpp/src/llama-memory.h +4 -3
- package/cpp/llama.cpp/src/llama-model-loader.cpp +22 -17
- package/cpp/llama.cpp/src/llama-model-saver.cpp +281 -0
- package/cpp/llama.cpp/src/llama-model-saver.h +37 -0
- package/cpp/llama.cpp/src/llama-model.cpp +529 -172
- package/cpp/llama.cpp/src/llama-model.h +6 -1
- package/cpp/llama.cpp/src/llama-quant.cpp +15 -13
- package/cpp/llama.cpp/src/llama-sampling.cpp +2 -2
- package/cpp/llama.cpp/src/llama-vocab.cpp +35 -8
- package/cpp/llama.cpp/src/llama-vocab.h +6 -0
- package/cpp/llama.cpp/src/llama.cpp +14 -0
- package/cpp/rn-completion.cpp +60 -5
- package/ios/include/chat.h +73 -6
- package/ios/include/common/minja/chat-template.hpp +9 -5
- package/ios/include/common/minja/minja.hpp +69 -36
- package/ios/include/common.h +21 -17
- package/ios/include/llama.h +62 -125
- package/ios/libs/llama.xcframework/Info.plist +19 -19
- package/ios/libs/llama.xcframework/ios-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/ios-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4617 -4487
- package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/ggml-opt.h +237 -0
- package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/ggml.h +26 -7
- package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/llama.h +62 -125
- package/ios/libs/llama.xcframework/ios-arm64/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4638 -4508
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3557 -3435
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/ggml-opt.h +237 -0
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/ggml.h +26 -7
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/llama.h +62 -125
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4638 -4508
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3559 -3437
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/ggml-opt.h +237 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/ggml.h +26 -7
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/llama.h +62 -125
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/ggml-opt.h +237 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/ggml.h +26 -7
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/llama.h +62 -125
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/llama +0 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/ggml-opt.h +237 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/ggml.h +26 -7
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/llama.h +62 -125
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/llama +0 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/tvos-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/tvos-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4616 -4487
- package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/ggml-opt.h +237 -0
- package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/ggml.h +26 -7
- package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/llama.h +62 -125
- package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4637 -4508
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3556 -3435
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/ggml-opt.h +237 -0
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/ggml.h +26 -7
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/llama.h +62 -125
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/xros-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/xros-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4653 -4523
- package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/ggml-opt.h +237 -0
- package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/ggml.h +26 -7
- package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/llama.h +62 -125
- package/ios/libs/llama.xcframework/xros-arm64/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4674 -4544
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3587 -3465
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/ggml-opt.h +237 -0
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/ggml.h +26 -7
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/llama.h +62 -125
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/llama +0 -0
- package/package.json +1 -1
- package/cpp/llama.cpp/common/stb_image.h +0 -7988
- package/cpp/llama.cpp/models/ggml-vocab-chameleon.gguf.inp +0 -112
- package/cpp/llama.cpp/models/ggml-vocab-chameleon.gguf.out +0 -46
- package/cpp/llama.cpp/models/ggml-vocab-deepseek-r1-qwen.gguf.inp +0 -112
- package/cpp/llama.cpp/models/ggml-vocab-deepseek-r1-qwen.gguf.out +0 -46
- package/cpp/llama.cpp/models/ggml-vocab-gpt-4o.gguf.inp +0 -112
- package/cpp/llama.cpp/models/ggml-vocab-gpt-4o.gguf.out +0 -46
- package/cpp/llama.cpp/models/ggml-vocab-llama4.gguf.inp +0 -112
- package/cpp/llama.cpp/models/ggml-vocab-llama4.gguf.out +0 -46
- package/cpp/llama.cpp/models/ggml-vocab-pixtral.gguf.inp +0 -112
- package/cpp/llama.cpp/models/ggml-vocab-pixtral.gguf.out +0 -46
- package/cpp/llama.cpp/models/ggml-vocab-roberta-bpe.gguf.inp +0 -112
- package/cpp/llama.cpp/models/ggml-vocab-roberta-bpe.gguf.out +0 -46
|
@@ -3,6 +3,8 @@
|
|
|
3
3
|
#pragma once
|
|
4
4
|
|
|
5
5
|
#include "common.h"
|
|
6
|
+
#include <functional>
|
|
7
|
+
#include <chrono>
|
|
6
8
|
#include <string>
|
|
7
9
|
#include <vector>
|
|
8
10
|
|
|
@@ -12,11 +14,19 @@ struct common_chat_tool_call {
|
|
|
12
14
|
std::string name;
|
|
13
15
|
std::string arguments;
|
|
14
16
|
std::string id;
|
|
17
|
+
|
|
18
|
+
bool operator==(const common_chat_tool_call & other) const {
|
|
19
|
+
return name == other.name && arguments == other.arguments && id == other.id;
|
|
20
|
+
}
|
|
15
21
|
};
|
|
16
22
|
|
|
17
23
|
struct common_chat_msg_content_part {
|
|
18
24
|
std::string type;
|
|
19
25
|
std::string text;
|
|
26
|
+
|
|
27
|
+
bool operator==(const common_chat_msg_content_part & other) const {
|
|
28
|
+
return type == other.type && text == other.text;
|
|
29
|
+
}
|
|
20
30
|
};
|
|
21
31
|
|
|
22
32
|
struct common_chat_msg {
|
|
@@ -27,6 +37,51 @@ struct common_chat_msg {
|
|
|
27
37
|
std::string reasoning_content;
|
|
28
38
|
std::string tool_name;
|
|
29
39
|
std::string tool_call_id;
|
|
40
|
+
|
|
41
|
+
template <class T> T to_json_oaicompat() const;
|
|
42
|
+
|
|
43
|
+
bool empty() const {
|
|
44
|
+
return content.empty() && content_parts.empty() && tool_calls.empty() && reasoning_content.empty() && tool_name.empty() && tool_call_id.empty();
|
|
45
|
+
}
|
|
46
|
+
void ensure_tool_call_ids_set(std::vector<std::string> & ids_cache, const std::function<std::string()> & gen_tool_call_id) {
|
|
47
|
+
for (auto i = 0u; i < tool_calls.size(); i++) {
|
|
48
|
+
if (ids_cache.size() <= i) {
|
|
49
|
+
auto id = tool_calls[i].id;
|
|
50
|
+
if (id.empty()) {
|
|
51
|
+
id = gen_tool_call_id();
|
|
52
|
+
}
|
|
53
|
+
ids_cache.push_back(id);
|
|
54
|
+
}
|
|
55
|
+
tool_calls[i].id = ids_cache[i];
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
bool operator==(const common_chat_msg & other) const {
|
|
59
|
+
return role == other.role
|
|
60
|
+
&& content == other.content
|
|
61
|
+
&& content_parts == other.content_parts
|
|
62
|
+
&& tool_calls == other.tool_calls
|
|
63
|
+
&& reasoning_content == other.reasoning_content
|
|
64
|
+
&& tool_name == other.tool_name
|
|
65
|
+
&& tool_call_id == other.tool_call_id;
|
|
66
|
+
}
|
|
67
|
+
bool operator!=(const common_chat_msg & other) const {
|
|
68
|
+
return !(*this == other);
|
|
69
|
+
}
|
|
70
|
+
};
|
|
71
|
+
|
|
72
|
+
struct common_chat_msg_diff {
|
|
73
|
+
// std::string reasoning_content_delta;
|
|
74
|
+
std::string content_delta;
|
|
75
|
+
size_t tool_call_index = std::string::npos;
|
|
76
|
+
common_chat_tool_call tool_call_delta;
|
|
77
|
+
|
|
78
|
+
static std::vector<common_chat_msg_diff> compute_diffs(const common_chat_msg & previous_msg, const common_chat_msg & new_msg);
|
|
79
|
+
|
|
80
|
+
bool operator==(const common_chat_msg_diff & other) const {
|
|
81
|
+
return content_delta == other.content_delta
|
|
82
|
+
&& tool_call_index == other.tool_call_index
|
|
83
|
+
&& tool_call_delta == other.tool_call_delta;
|
|
84
|
+
}
|
|
30
85
|
};
|
|
31
86
|
|
|
32
87
|
struct common_chat_tool {
|
|
@@ -48,14 +103,11 @@ enum common_chat_format {
|
|
|
48
103
|
COMMON_CHAT_FORMAT_LLAMA_3_X,
|
|
49
104
|
COMMON_CHAT_FORMAT_LLAMA_3_X_WITH_BUILTIN_TOOLS,
|
|
50
105
|
COMMON_CHAT_FORMAT_DEEPSEEK_R1,
|
|
51
|
-
COMMON_CHAT_FORMAT_DEEPSEEK_R1_EXTRACT_REASONING,
|
|
52
106
|
COMMON_CHAT_FORMAT_FIREFUNCTION_V2,
|
|
53
107
|
COMMON_CHAT_FORMAT_FUNCTIONARY_V3_2,
|
|
54
108
|
COMMON_CHAT_FORMAT_FUNCTIONARY_V3_1_LLAMA_3_1,
|
|
55
109
|
COMMON_CHAT_FORMAT_HERMES_2_PRO,
|
|
56
|
-
COMMON_CHAT_FORMAT_HERMES_2_PRO_EXTRACT_REASONING,
|
|
57
110
|
COMMON_CHAT_FORMAT_COMMAND_R7B,
|
|
58
|
-
COMMON_CHAT_FORMAT_COMMAND_R7B_EXTRACT_REASONING,
|
|
59
111
|
|
|
60
112
|
COMMON_CHAT_FORMAT_COUNT, // Not a format, just the # formats
|
|
61
113
|
};
|
|
@@ -70,7 +122,9 @@ struct common_chat_templates_inputs {
|
|
|
70
122
|
std::vector<common_chat_tool> tools;
|
|
71
123
|
common_chat_tool_choice tool_choice = COMMON_CHAT_TOOL_CHOICE_AUTO;
|
|
72
124
|
bool parallel_tool_calls = false;
|
|
73
|
-
|
|
125
|
+
common_reasoning_format reasoning_format = COMMON_REASONING_FORMAT_NONE;
|
|
126
|
+
bool enable_thinking = true;
|
|
127
|
+
std::chrono::system_clock::time_point now = std::chrono::system_clock::now();
|
|
74
128
|
};
|
|
75
129
|
|
|
76
130
|
struct common_chat_params {
|
|
@@ -78,11 +132,21 @@ struct common_chat_params {
|
|
|
78
132
|
std::string prompt;
|
|
79
133
|
std::string grammar;
|
|
80
134
|
bool grammar_lazy = false;
|
|
135
|
+
bool thinking_forced_open = false;
|
|
81
136
|
std::vector<common_grammar_trigger> grammar_triggers;
|
|
82
137
|
std::vector<std::string> preserved_tokens;
|
|
83
138
|
std::vector<std::string> additional_stops;
|
|
84
139
|
};
|
|
85
140
|
|
|
141
|
+
struct common_chat_syntax {
|
|
142
|
+
common_chat_format format = COMMON_CHAT_FORMAT_CONTENT_ONLY;
|
|
143
|
+
common_reasoning_format reasoning_format = COMMON_REASONING_FORMAT_NONE;
|
|
144
|
+
// Whether reasoning_content should be inlined in the content (e.g. for reasoning_format=deepseek in stream mode)
|
|
145
|
+
bool reasoning_in_content = false;
|
|
146
|
+
bool thinking_forced_open = false;
|
|
147
|
+
bool parse_tool_calls = true;
|
|
148
|
+
};
|
|
149
|
+
|
|
86
150
|
// Check if the template supplied via "--chat-template" is supported or not. Returns true if it's valid
|
|
87
151
|
bool common_chat_verify_template(const std::string & tmpl, bool use_jinja);
|
|
88
152
|
|
|
@@ -119,8 +183,9 @@ std::string common_chat_format_example(
|
|
|
119
183
|
const struct common_chat_templates * tmpls,
|
|
120
184
|
bool use_jinja);
|
|
121
185
|
|
|
122
|
-
|
|
123
|
-
|
|
186
|
+
const char* common_chat_format_name(common_chat_format format);
|
|
187
|
+
const char* common_reasoning_format_name(common_reasoning_format format);
|
|
188
|
+
common_chat_msg common_chat_parse(const std::string & input, bool is_partial, const common_chat_syntax & syntax);
|
|
124
189
|
|
|
125
190
|
common_chat_tool_choice common_chat_tool_choice_parse_oaicompat(const std::string & tool_choice);
|
|
126
191
|
|
|
@@ -133,3 +198,5 @@ template <class T> T common_chat_msgs_to_json_oaicompat(const std::vector<common
|
|
|
133
198
|
// T can be std::string containing JSON or nlohmann::ordered_json
|
|
134
199
|
template <class T> std::vector<common_chat_tool> common_chat_tools_parse_oaicompat(const T & tools);
|
|
135
200
|
template <class T> T common_chat_tools_to_json_oaicompat(const std::vector<common_chat_tool> & tools);
|
|
201
|
+
|
|
202
|
+
template <class T> T common_chat_msg_diff_to_json_oaicompat(const common_chat_msg_diff & diff);
|
|
@@ -443,6 +443,25 @@ void string_replace_all(std::string & s, const std::string & search, const std::
|
|
|
443
443
|
s = std::move(builder);
|
|
444
444
|
}
|
|
445
445
|
|
|
446
|
+
bool string_ends_with(const std::string_view & str, const std::string_view & suffix) {
|
|
447
|
+
return str.size() >= suffix.size() && str.compare(str.size()-suffix.size(), suffix.size(), suffix) == 0;
|
|
448
|
+
}
|
|
449
|
+
size_t string_find_partial_stop(const std::string_view & str, const std::string_view & stop) {
|
|
450
|
+
if (!str.empty() && !stop.empty()) {
|
|
451
|
+
const char text_last_char = str.back();
|
|
452
|
+
for (int64_t char_index = stop.size() - 1; char_index >= 0; char_index--) {
|
|
453
|
+
if (stop[char_index] == text_last_char) {
|
|
454
|
+
const auto current_partial = stop.substr(0, char_index + 1);
|
|
455
|
+
if (string_ends_with(str, current_partial)) {
|
|
456
|
+
return str.size() - char_index - 1;
|
|
457
|
+
}
|
|
458
|
+
}
|
|
459
|
+
}
|
|
460
|
+
}
|
|
461
|
+
|
|
462
|
+
return std::string::npos;
|
|
463
|
+
}
|
|
464
|
+
|
|
446
465
|
std::string regex_escape(const std::string & s) {
|
|
447
466
|
static const std::regex special_chars("[.^$|()*+?\\[\\]{}\\\\]");
|
|
448
467
|
return std::regex_replace(s, special_chars, "\\$0");
|
|
@@ -830,7 +849,7 @@ std::string fs_get_cache_directory() {
|
|
|
830
849
|
if (getenv("LLAMA_CACHE")) {
|
|
831
850
|
cache_directory = std::getenv("LLAMA_CACHE");
|
|
832
851
|
} else {
|
|
833
|
-
#if defined(__linux__) || defined(__FreeBSD__) || defined(_AIX)
|
|
852
|
+
#if defined(__linux__) || defined(__FreeBSD__) || defined(_AIX) || defined(__OpenBSD__)
|
|
834
853
|
if (std::getenv("XDG_CACHE_HOME")) {
|
|
835
854
|
cache_directory = std::getenv("XDG_CACHE_HOME");
|
|
836
855
|
} else {
|
|
@@ -884,13 +903,16 @@ struct common_init_result common_init_from_params(common_params & params) {
|
|
|
884
903
|
ok = false;
|
|
885
904
|
}
|
|
886
905
|
|
|
887
|
-
|
|
888
|
-
|
|
889
|
-
ok = false;
|
|
890
|
-
}
|
|
906
|
+
bool has_eos = llama_vocab_eos(vocab) != LLAMA_TOKEN_NULL;
|
|
907
|
+
bool has_sep = llama_vocab_sep(vocab) != LLAMA_TOKEN_NULL;
|
|
891
908
|
|
|
892
|
-
if (
|
|
893
|
-
LOG_WRN("%s: warning: vocab does not have
|
|
909
|
+
if (!has_eos && !has_sep) {
|
|
910
|
+
LOG_WRN("%s: warning: vocab does not have an EOS token or SEP token, reranking will not work\n", __func__);
|
|
911
|
+
ok = false;
|
|
912
|
+
} else if (!has_eos) {
|
|
913
|
+
LOG_WRN("%s: warning: vocab does not have an EOS token, using SEP token as fallback\n", __func__);
|
|
914
|
+
} else if (!has_sep) {
|
|
915
|
+
LOG_WRN("%s: warning: vocab does not have a SEP token, reranking will not work\n", __func__);
|
|
894
916
|
ok = false;
|
|
895
917
|
}
|
|
896
918
|
|
|
@@ -1083,6 +1105,9 @@ struct llama_model_params common_model_params_to_llama(common_params & params) {
|
|
|
1083
1105
|
mparams.tensor_buft_overrides = params.tensor_buft_overrides.data();
|
|
1084
1106
|
}
|
|
1085
1107
|
|
|
1108
|
+
mparams.progress_callback = params.load_progress_callback;
|
|
1109
|
+
mparams.progress_callback_user_data = params.load_progress_callback_user_data;
|
|
1110
|
+
|
|
1086
1111
|
return mparams;
|
|
1087
1112
|
}
|
|
1088
1113
|
|
|
@@ -1114,6 +1139,7 @@ struct llama_context_params common_context_params_to_llama(const common_params &
|
|
|
1114
1139
|
cparams.flash_attn = params.flash_attn;
|
|
1115
1140
|
cparams.no_perf = params.no_perf;
|
|
1116
1141
|
cparams.op_offload = !params.no_op_offload;
|
|
1142
|
+
cparams.swa_full = params.swa_full;
|
|
1117
1143
|
|
|
1118
1144
|
if (params.reranking) {
|
|
1119
1145
|
cparams.embeddings = true;
|
|
@@ -1306,81 +1332,6 @@ std::string common_detokenize(const struct llama_vocab * vocab, const std::vecto
|
|
|
1306
1332
|
return text;
|
|
1307
1333
|
}
|
|
1308
1334
|
|
|
1309
|
-
//
|
|
1310
|
-
// KV cache utils
|
|
1311
|
-
//
|
|
1312
|
-
|
|
1313
|
-
void common_kv_cache_dump_view(const llama_kv_cache_view & view, int row_size) {
|
|
1314
|
-
static const char slot_chars[] = ".123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz+";
|
|
1315
|
-
|
|
1316
|
-
printf("=== Dumping KV cache. total cells %d, max sequences per cell %d, populated cells %d, total tokens in cache %d, largest empty slot=%d @ %d",
|
|
1317
|
-
view.n_cells, view.n_seq_max, view.used_cells, view.token_count, view.max_contiguous, view.max_contiguous_idx);
|
|
1318
|
-
|
|
1319
|
-
llama_kv_cache_view_cell * c_curr = view.cells;
|
|
1320
|
-
llama_seq_id * cs_curr = view.cells_sequences;
|
|
1321
|
-
|
|
1322
|
-
for (int i = 0; i < view.n_cells; i++, c_curr++, cs_curr += view.n_seq_max) {
|
|
1323
|
-
if (i % row_size == 0) {
|
|
1324
|
-
printf("\n%5d: ", i);
|
|
1325
|
-
}
|
|
1326
|
-
int seq_count = 0;
|
|
1327
|
-
for (int j = 0; j < view.n_seq_max; j++) {
|
|
1328
|
-
if (cs_curr[j] >= 0) { seq_count++; }
|
|
1329
|
-
}
|
|
1330
|
-
putchar(slot_chars[std::min(sizeof(slot_chars) - 2, size_t(seq_count))]);
|
|
1331
|
-
}
|
|
1332
|
-
|
|
1333
|
-
printf("\n=== Done dumping\n");
|
|
1334
|
-
}
|
|
1335
|
-
|
|
1336
|
-
void common_kv_cache_dump_view_seqs(const llama_kv_cache_view & view, int row_size) {
|
|
1337
|
-
static const char slot_chars[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";
|
|
1338
|
-
|
|
1339
|
-
printf("=== Dumping KV cache. total cells %d, max sequences per cell %d, populated cells %d, total tokens in cache %d, largest empty slot=%d @ %d\n",
|
|
1340
|
-
view.n_cells, view.n_seq_max, view.used_cells, view.token_count, view.max_contiguous, view.max_contiguous_idx);
|
|
1341
|
-
|
|
1342
|
-
std::unordered_map<llama_seq_id, size_t> seqs;
|
|
1343
|
-
llama_kv_cache_view_cell * c_curr = view.cells;
|
|
1344
|
-
llama_seq_id * cs_curr = view.cells_sequences;
|
|
1345
|
-
|
|
1346
|
-
for (int i = 0; i < view.n_cells; i++, c_curr++, cs_curr += view.n_seq_max) {
|
|
1347
|
-
for (int j = 0; j < view.n_seq_max; j++) {
|
|
1348
|
-
if (cs_curr[j] < 0) { continue; }
|
|
1349
|
-
if (seqs.find(cs_curr[j]) == seqs.end()) {
|
|
1350
|
-
if (seqs.size() + 1 >= sizeof(slot_chars)) { break; }
|
|
1351
|
-
const size_t sz = seqs.size();
|
|
1352
|
-
seqs[cs_curr[j]] = sz;
|
|
1353
|
-
}
|
|
1354
|
-
}
|
|
1355
|
-
if (seqs.size() + 1 >= sizeof(slot_chars)) { break; }
|
|
1356
|
-
}
|
|
1357
|
-
|
|
1358
|
-
printf("=== Sequence legend: ");
|
|
1359
|
-
for (const auto & it : seqs) {
|
|
1360
|
-
printf("%zu=%d, ", it.second, it.first);
|
|
1361
|
-
}
|
|
1362
|
-
printf("'+'=other sequence ids");
|
|
1363
|
-
|
|
1364
|
-
c_curr = view.cells;
|
|
1365
|
-
cs_curr = view.cells_sequences;
|
|
1366
|
-
for (int i = 0; i < view.n_cells; i++, c_curr++, cs_curr += view.n_seq_max) {
|
|
1367
|
-
if (i % row_size == 0) {
|
|
1368
|
-
printf("\n%5d: ", i);
|
|
1369
|
-
}
|
|
1370
|
-
for (int j = 0; j < view.n_seq_max; j++) {
|
|
1371
|
-
if (cs_curr[j] >= 0) {
|
|
1372
|
-
const auto & it = seqs.find(cs_curr[j]);
|
|
1373
|
-
putchar(it != seqs.end() ? int(slot_chars[it->second]) : '+');
|
|
1374
|
-
} else {
|
|
1375
|
-
putchar('.');
|
|
1376
|
-
}
|
|
1377
|
-
}
|
|
1378
|
-
putchar(' ');
|
|
1379
|
-
}
|
|
1380
|
-
|
|
1381
|
-
printf("\n=== Done dumping\n");
|
|
1382
|
-
}
|
|
1383
|
-
|
|
1384
1335
|
//
|
|
1385
1336
|
// Embedding utils
|
|
1386
1337
|
//
|
|
@@ -1565,3 +1516,20 @@ common_control_vector_data common_control_vector_load(const std::vector<common_c
|
|
|
1565
1516
|
|
|
1566
1517
|
return result;
|
|
1567
1518
|
}
|
|
1519
|
+
|
|
1520
|
+
ggml_opt_dataset_t common_opt_dataset_init(struct llama_context * ctx, const std::vector<llama_token> & tokens, int64_t stride) {
|
|
1521
|
+
const int64_t ne_datapoint = llama_n_ctx(ctx);
|
|
1522
|
+
const int64_t ndata = (tokens.size() - ne_datapoint - 1) / stride;
|
|
1523
|
+
ggml_opt_dataset_t result = ggml_opt_dataset_init(
|
|
1524
|
+
GGML_TYPE_I32, GGML_TYPE_I32, ne_datapoint, ne_datapoint, ndata, /*ndata_shard =*/ 1);
|
|
1525
|
+
|
|
1526
|
+
llama_token * data = (llama_token *) ggml_opt_dataset_data(result)->data;
|
|
1527
|
+
llama_token * labels = (llama_token *) ggml_opt_dataset_labels(result)->data;
|
|
1528
|
+
|
|
1529
|
+
for (int64_t idata = 0; idata < ndata; ++idata) {
|
|
1530
|
+
memcpy(data + idata*ne_datapoint, tokens.data() + idata*stride + 0, ne_datapoint*sizeof(llama_token));
|
|
1531
|
+
memcpy(labels + idata*ne_datapoint, tokens.data() + idata*stride + 1, ne_datapoint*sizeof(llama_token));
|
|
1532
|
+
}
|
|
1533
|
+
|
|
1534
|
+
return result;
|
|
1535
|
+
}
|
|
@@ -6,6 +6,7 @@
|
|
|
6
6
|
|
|
7
7
|
#include <set>
|
|
8
8
|
#include <string>
|
|
9
|
+
#include <string_view>
|
|
9
10
|
#include <vector>
|
|
10
11
|
#include <sstream>
|
|
11
12
|
|
|
@@ -75,7 +76,7 @@ enum llama_example {
|
|
|
75
76
|
LLAMA_EXAMPLE_SERVER,
|
|
76
77
|
LLAMA_EXAMPLE_CVECTOR_GENERATOR,
|
|
77
78
|
LLAMA_EXAMPLE_EXPORT_LORA,
|
|
78
|
-
|
|
79
|
+
LLAMA_EXAMPLE_MTMD,
|
|
79
80
|
LLAMA_EXAMPLE_LOOKUP,
|
|
80
81
|
LLAMA_EXAMPLE_PARALLEL,
|
|
81
82
|
LLAMA_EXAMPLE_TTS,
|
|
@@ -114,7 +115,7 @@ enum common_grammar_trigger_type {
|
|
|
114
115
|
COMMON_GRAMMAR_TRIGGER_TYPE_TOKEN,
|
|
115
116
|
COMMON_GRAMMAR_TRIGGER_TYPE_WORD,
|
|
116
117
|
COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN,
|
|
117
|
-
|
|
118
|
+
COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_FULL,
|
|
118
119
|
};
|
|
119
120
|
|
|
120
121
|
struct common_grammar_trigger {
|
|
@@ -290,6 +291,7 @@ struct common_params {
|
|
|
290
291
|
int32_t verbosity = 0;
|
|
291
292
|
int32_t control_vector_layer_start = -1; // layer range for control vector
|
|
292
293
|
int32_t control_vector_layer_end = -1; // layer range for control vector
|
|
294
|
+
bool offline = false;
|
|
293
295
|
|
|
294
296
|
int32_t ppl_stride = 0; // stride for perplexity calculations. If left at 0, the pre-existing approach will be used.
|
|
295
297
|
int32_t ppl_output_type = 0; // = 0 -> ppl output is as usual, = 1 -> ppl output is num_tokens, ppl, one per line
|
|
@@ -322,13 +324,13 @@ struct common_params {
|
|
|
322
324
|
bool flash_attn = false; // flash attention
|
|
323
325
|
bool no_perf = false; // disable performance metrics
|
|
324
326
|
bool ctx_shift = true; // context shift on inifinite text generation
|
|
327
|
+
bool swa_full = false; // use full-size SWA cache (https://github.com/ggml-org/llama.cpp/pull/13194#issuecomment-2868343055)
|
|
325
328
|
|
|
326
329
|
bool input_prefix_bos = false; // prefix BOS to user inputs, preceding input_prefix
|
|
327
330
|
bool use_mmap = true; // use mmap for faster loads
|
|
328
331
|
bool use_mlock = false; // use mlock to keep model in memory
|
|
329
332
|
bool verbose_prompt = false; // print prompt tokens before generation
|
|
330
333
|
bool display_prompt = true; // print prompt before generation
|
|
331
|
-
bool dump_kv_cache = false; // dump the KV cache contents for debugging purposes
|
|
332
334
|
bool no_kv_offload = false; // disable KV offloading
|
|
333
335
|
bool warmup = true; // warmup run
|
|
334
336
|
bool check_tensors = false; // validate tensor data
|
|
@@ -367,6 +369,8 @@ struct common_params {
|
|
|
367
369
|
bool use_jinja = false; // NOLINT
|
|
368
370
|
bool enable_chat_template = true;
|
|
369
371
|
common_reasoning_format reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK;
|
|
372
|
+
int reasoning_budget = -1;
|
|
373
|
+
bool prefill_assistant = true; // if true, any trailing assistant message will be prefilled into the response
|
|
370
374
|
|
|
371
375
|
std::vector<std::string> api_keys;
|
|
372
376
|
|
|
@@ -426,6 +430,11 @@ struct common_params {
|
|
|
426
430
|
|
|
427
431
|
// common params
|
|
428
432
|
std::string out_file; // output filename for all example programs
|
|
433
|
+
// optional callback for model loading progress and cancellation:
|
|
434
|
+
// called with a progress value between 0.0 and 1.0.
|
|
435
|
+
// return false from callback to abort model loading or true to continue
|
|
436
|
+
llama_progress_callback load_progress_callback = NULL;
|
|
437
|
+
void * load_progress_callback_user_data = NULL;
|
|
429
438
|
};
|
|
430
439
|
|
|
431
440
|
// call once at the start of a program if it uses libcommon
|
|
@@ -503,10 +512,9 @@ static bool string_starts_with(const std::string & str,
|
|
|
503
512
|
return str.rfind(prefix, 0) == 0;
|
|
504
513
|
}
|
|
505
514
|
|
|
506
|
-
|
|
507
|
-
|
|
508
|
-
|
|
509
|
-
}
|
|
515
|
+
// While we wait for C++20's std::string::ends_with...
|
|
516
|
+
bool string_ends_with(const std::string_view & str, const std::string_view & suffix);
|
|
517
|
+
size_t string_find_partial_stop(const std::string_view & str, const std::string_view & stop);
|
|
510
518
|
|
|
511
519
|
bool string_parse_kv_override(const char * data, std::vector<llama_model_kv_override> & overrides);
|
|
512
520
|
void string_process_escapes(std::string & input);
|
|
@@ -615,16 +623,6 @@ std::string common_detokenize(
|
|
|
615
623
|
const std::vector<llama_token> & tokens,
|
|
616
624
|
bool special = true);
|
|
617
625
|
|
|
618
|
-
//
|
|
619
|
-
// KV cache utils
|
|
620
|
-
//
|
|
621
|
-
|
|
622
|
-
// Dump the KV cache view with the number of sequences per cell.
|
|
623
|
-
void common_kv_cache_dump_view(const llama_kv_cache_view & view, int row_size = 80);
|
|
624
|
-
|
|
625
|
-
// Dump the KV cache view showing individual sequences in each cell (long output).
|
|
626
|
-
void common_kv_cache_dump_view_seqs(const llama_kv_cache_view & view, int row_size = 40);
|
|
627
|
-
|
|
628
626
|
//
|
|
629
627
|
// Embedding utils
|
|
630
628
|
//
|
|
@@ -666,3 +664,9 @@ const char * const LLM_KV_SPLIT_COUNT = "split.count";
|
|
|
666
664
|
const char * const LLM_KV_SPLIT_TENSORS_COUNT = "split.tensors.count";
|
|
667
665
|
|
|
668
666
|
}
|
|
667
|
+
|
|
668
|
+
//
|
|
669
|
+
// training utils
|
|
670
|
+
//
|
|
671
|
+
|
|
672
|
+
ggml_opt_dataset_t common_opt_dataset_init(struct llama_context * ctx, const std::vector<llama_token> & tokens, int64_t stride);
|