@novastera-oss/llamarn 0.2.1 → 0.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +80 -14
- package/RNLlamaCpp.podspec +10 -3
- package/android/CMakeLists.txt +8 -0
- package/android/src/main/cpp/include/llama.h +62 -125
- package/android/src/main/jniLibs/arm64-v8a/libggml-base.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/libggml-cpu.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/libggml.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/libllama.so +0 -0
- package/android/src/main/jniLibs/x86_64/libggml-base.so +0 -0
- package/android/src/main/jniLibs/x86_64/libggml-cpu.so +0 -0
- package/android/src/main/jniLibs/x86_64/libggml.so +0 -0
- package/android/src/main/jniLibs/x86_64/libllama.so +0 -0
- package/cpp/build-info.cpp +2 -2
- package/cpp/llama.cpp/README.md +11 -3
- package/cpp/llama.cpp/build-xcframework.sh +1 -0
- package/cpp/llama.cpp/common/CMakeLists.txt +8 -2
- package/cpp/llama.cpp/common/arg.cpp +153 -113
- package/cpp/llama.cpp/common/chat-parser.cpp +379 -0
- package/cpp/llama.cpp/common/chat-parser.h +117 -0
- package/cpp/llama.cpp/common/chat.cpp +847 -699
- package/cpp/llama.cpp/common/chat.h +73 -6
- package/cpp/llama.cpp/common/common.cpp +50 -82
- package/cpp/llama.cpp/common/common.h +21 -17
- package/cpp/llama.cpp/common/json-partial.cpp +255 -0
- package/cpp/llama.cpp/common/json-partial.h +37 -0
- package/cpp/llama.cpp/common/minja/chat-template.hpp +9 -5
- package/cpp/llama.cpp/common/minja/minja.hpp +69 -36
- package/cpp/llama.cpp/common/regex-partial.cpp +204 -0
- package/cpp/llama.cpp/common/regex-partial.h +56 -0
- package/cpp/llama.cpp/common/sampling.cpp +7 -8
- package/cpp/llama.cpp/convert_hf_to_gguf.py +453 -118
- package/cpp/llama.cpp/convert_hf_to_gguf_update.py +120 -68
- package/cpp/llama.cpp/ggml/CMakeLists.txt +2 -1
- package/cpp/llama.cpp/ggml/cmake/common.cmake +25 -0
- package/cpp/llama.cpp/ggml/include/ggml-opt.h +49 -28
- package/cpp/llama.cpp/ggml/include/ggml.h +26 -7
- package/cpp/llama.cpp/ggml/src/CMakeLists.txt +16 -10
- package/cpp/llama.cpp/ggml/src/ggml-backend.cpp +4 -1
- package/cpp/llama.cpp/ggml/src/ggml-cann/CMakeLists.txt +1 -0
- package/cpp/llama.cpp/ggml/src/ggml-cann/acl_tensor.cpp +2 -0
- package/cpp/llama.cpp/ggml/src/ggml-cann/aclnn_ops.cpp +604 -0
- package/cpp/llama.cpp/ggml/src/ggml-cann/aclnn_ops.h +42 -0
- package/cpp/llama.cpp/ggml/src/ggml-cann/ggml-cann.cpp +54 -2
- package/cpp/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +50 -51
- package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp +2 -2
- package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-impl.h +5 -9
- package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.c +779 -19
- package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +22 -0
- package/cpp/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.cpp +88 -5
- package/cpp/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.h +47 -12
- package/cpp/llama.cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +264 -69
- package/cpp/llama.cpp/ggml/src/ggml-cpu/ops.cpp +322 -100
- package/cpp/llama.cpp/ggml/src/ggml-cpu/simd-mappings.h +117 -1
- package/cpp/llama.cpp/ggml/src/ggml-cpu/vec.cpp +85 -16
- package/cpp/llama.cpp/ggml/src/ggml-cpu/vec.h +220 -49
- package/cpp/llama.cpp/ggml/src/ggml-cuda/acc.cu +40 -26
- package/cpp/llama.cpp/ggml/src/ggml-cuda/common.cuh +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-cuda/cpy.cu +11 -1
- package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-common.cuh +15 -7
- package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-mma-f16.cuh +266 -64
- package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-vec-f16.cuh +49 -4
- package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-vec-f32.cuh +48 -4
- package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn.cu +2 -1
- package/cpp/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu +5 -1
- package/cpp/llama.cpp/ggml/src/ggml-cuda/mmq.cu +2 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/quantize.cu +7 -6
- package/cpp/llama.cpp/ggml/src/ggml-cuda/sum.cu +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-cuda/unary.cu +10 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/unary.cuh +2 -0
- package/cpp/llama.cpp/ggml/src/ggml-impl.h +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal-impl.h +4 -0
- package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal.m +99 -17
- package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal.metal +200 -2
- package/cpp/llama.cpp/ggml/src/ggml-musa/CMakeLists.txt +8 -2
- package/cpp/llama.cpp/ggml/src/ggml-musa/mudnn.cu +112 -0
- package/cpp/llama.cpp/ggml/src/ggml-musa/mudnn.cuh +12 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/CMakeLists.txt +6 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +972 -178
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/argsort.cl +86 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/div.cl +72 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/group_norm.cl +72 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/sigmoid.cl +29 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/sub.cl +72 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/sum_rows.cl +39 -0
- package/cpp/llama.cpp/ggml/src/ggml-opt.cpp +373 -190
- package/cpp/llama.cpp/ggml/src/ggml-sycl/CMakeLists.txt +29 -23
- package/cpp/llama.cpp/ggml/src/ggml-sycl/binbcast.cpp +5 -10
- package/cpp/llama.cpp/ggml/src/ggml-sycl/common.hpp +101 -5
- package/cpp/llama.cpp/ggml/src/ggml-sycl/concat.cpp +31 -33
- package/cpp/llama.cpp/ggml/src/ggml-sycl/conv.cpp +1 -0
- package/cpp/llama.cpp/ggml/src/ggml-sycl/convert.cpp +29 -2
- package/cpp/llama.cpp/ggml/src/ggml-sycl/cpy.cpp +4 -5
- package/cpp/llama.cpp/ggml/src/ggml-sycl/dequantize.hpp +59 -21
- package/cpp/llama.cpp/ggml/src/ggml-sycl/dmmv.cpp +9 -1
- package/cpp/llama.cpp/ggml/src/ggml-sycl/element_wise.cpp +84 -72
- package/cpp/llama.cpp/ggml/src/ggml-sycl/element_wise.hpp +2 -0
- package/cpp/llama.cpp/ggml/src/ggml-sycl/gemm.hpp +37 -8
- package/cpp/llama.cpp/ggml/src/ggml-sycl/getrows.cpp +1 -3
- package/cpp/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +324 -129
- package/cpp/llama.cpp/ggml/src/ggml-sycl/gla.cpp +1 -0
- package/cpp/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +31 -2
- package/cpp/llama.cpp/ggml/src/ggml-sycl/norm.cpp +95 -68
- package/cpp/llama.cpp/ggml/src/ggml-sycl/outprod.cpp +1 -0
- package/cpp/llama.cpp/ggml/src/ggml-sycl/quants.hpp +22 -0
- package/cpp/llama.cpp/ggml/src/ggml-sycl/rope.cpp +1 -2
- package/cpp/llama.cpp/ggml/src/ggml-sycl/softmax.cpp +1 -4
- package/cpp/llama.cpp/ggml/src/ggml-sycl/tsembd.cpp +2 -3
- package/cpp/llama.cpp/ggml/src/ggml-sycl/vecdotq.hpp +69 -43
- package/cpp/llama.cpp/ggml/src/ggml-sycl/wkv.cpp +2 -14
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/CMakeLists.txt +81 -91
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +432 -181
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/CMakeLists.txt +17 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq1_m.comp +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn.comp +6 -152
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_base.comp +162 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm1.comp +360 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm2.comp +2 -118
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm.comp +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +12 -1
- package/cpp/llama.cpp/ggml/src/ggml.c +107 -36
- package/cpp/llama.cpp/ggml/src/gguf.cpp +33 -33
- package/cpp/llama.cpp/gguf-py/gguf/constants.py +100 -15
- package/cpp/llama.cpp/gguf-py/gguf/gguf_reader.py +1 -1
- package/cpp/llama.cpp/gguf-py/gguf/gguf_writer.py +44 -12
- package/cpp/llama.cpp/gguf-py/gguf/scripts/gguf_editor_gui.py +21 -10
- package/cpp/llama.cpp/gguf-py/gguf/scripts/gguf_new_metadata.py +5 -2
- package/cpp/llama.cpp/gguf-py/gguf/tensor_mapping.py +128 -31
- package/cpp/llama.cpp/gguf-py/gguf/utility.py +1 -1
- package/cpp/llama.cpp/gguf-py/pyproject.toml +1 -1
- package/cpp/llama.cpp/include/llama.h +62 -125
- package/cpp/llama.cpp/models/ggml-vocab-bert-bge.gguf.inp +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-bert-bge.gguf.out +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-command-r.gguf.inp +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-command-r.gguf.out +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-deepseek-coder.gguf.inp +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-deepseek-coder.gguf.out +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-deepseek-llm.gguf.inp +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-deepseek-llm.gguf.out +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-falcon.gguf.inp +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-falcon.gguf.out +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-gpt-2.gguf.inp +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-gpt-2.gguf.out +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-llama-bpe.gguf.inp +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-llama-bpe.gguf.out +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-llama-spm.gguf.inp +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-llama-spm.gguf.out +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-mpt.gguf.inp +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-mpt.gguf.out +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-nomic-bert-moe.gguf +0 -0
- package/cpp/llama.cpp/models/ggml-vocab-phi-3.gguf.inp +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-phi-3.gguf.out +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-qwen2.gguf.inp +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-qwen2.gguf.out +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-refact.gguf.inp +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-refact.gguf.out +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-starcoder.gguf.inp +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-starcoder.gguf.out +1 -1
- package/cpp/llama.cpp/models/templates/Qwen-QwQ-32B.jinja +62 -0
- package/cpp/llama.cpp/models/templates/Qwen-Qwen3-0.6B.jinja +85 -0
- package/cpp/llama.cpp/models/templates/README.md +2 -0
- package/cpp/llama.cpp/requirements/requirements-convert_hf_to_gguf.txt +5 -1
- package/cpp/llama.cpp/requirements/requirements-convert_hf_to_gguf_update.txt +5 -1
- package/cpp/llama.cpp/requirements/requirements-convert_lora_to_gguf.txt +2 -0
- package/cpp/llama.cpp/requirements/requirements-gguf_editor_gui.txt +1 -1
- package/cpp/llama.cpp/src/CMakeLists.txt +2 -0
- package/cpp/llama.cpp/src/llama-arch.cpp +6 -0
- package/cpp/llama.cpp/src/llama-arch.h +2 -0
- package/cpp/llama.cpp/src/llama-batch.cpp +3 -1
- package/cpp/llama.cpp/src/llama-context.cpp +340 -123
- package/cpp/llama.cpp/src/llama-context.h +30 -0
- package/cpp/llama.cpp/src/llama-cparams.cpp +4 -0
- package/cpp/llama.cpp/src/llama-cparams.h +2 -0
- package/cpp/llama.cpp/src/llama-grammar.cpp +12 -2
- package/cpp/llama.cpp/src/llama-graph.cpp +157 -247
- package/cpp/llama.cpp/src/llama-graph.h +52 -7
- package/cpp/llama.cpp/src/llama-hparams.cpp +17 -1
- package/cpp/llama.cpp/src/llama-hparams.h +37 -5
- package/cpp/llama.cpp/src/llama-kv-cache.cpp +742 -481
- package/cpp/llama.cpp/src/llama-kv-cache.h +196 -99
- package/cpp/llama.cpp/src/llama-kv-cells.h +379 -0
- package/cpp/llama.cpp/src/llama-memory.h +4 -3
- package/cpp/llama.cpp/src/llama-model-loader.cpp +22 -17
- package/cpp/llama.cpp/src/llama-model-saver.cpp +281 -0
- package/cpp/llama.cpp/src/llama-model-saver.h +37 -0
- package/cpp/llama.cpp/src/llama-model.cpp +529 -172
- package/cpp/llama.cpp/src/llama-model.h +6 -1
- package/cpp/llama.cpp/src/llama-quant.cpp +15 -13
- package/cpp/llama.cpp/src/llama-sampling.cpp +2 -2
- package/cpp/llama.cpp/src/llama-vocab.cpp +35 -8
- package/cpp/llama.cpp/src/llama-vocab.h +6 -0
- package/cpp/llama.cpp/src/llama.cpp +14 -0
- package/cpp/rn-completion.cpp +4 -2
- package/ios/include/chat.h +73 -6
- package/ios/include/common/minja/chat-template.hpp +9 -5
- package/ios/include/common/minja/minja.hpp +69 -36
- package/ios/include/common.h +21 -17
- package/ios/include/llama.h +62 -125
- package/ios/libs/llama.xcframework/Info.plist +19 -19
- package/ios/libs/llama.xcframework/ios-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/ios-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4617 -4487
- package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/ggml-opt.h +237 -0
- package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/ggml.h +26 -7
- package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/llama.h +62 -125
- package/ios/libs/llama.xcframework/ios-arm64/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4638 -4508
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3557 -3435
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/ggml-opt.h +237 -0
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/ggml.h +26 -7
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/llama.h +62 -125
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4638 -4508
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3559 -3437
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/ggml-opt.h +237 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/ggml.h +26 -7
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/llama.h +62 -125
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/ggml-opt.h +237 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/ggml.h +26 -7
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/llama.h +62 -125
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/llama +0 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/ggml-opt.h +237 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/ggml.h +26 -7
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/llama.h +62 -125
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/llama +0 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/tvos-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/tvos-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4616 -4487
- package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/ggml-opt.h +237 -0
- package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/ggml.h +26 -7
- package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/llama.h +62 -125
- package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4637 -4508
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3556 -3435
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/ggml-opt.h +237 -0
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/ggml.h +26 -7
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/llama.h +62 -125
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/xros-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/xros-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4653 -4523
- package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/ggml-opt.h +237 -0
- package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/ggml.h +26 -7
- package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/llama.h +62 -125
- package/ios/libs/llama.xcframework/xros-arm64/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4674 -4544
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3587 -3465
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/ggml-opt.h +237 -0
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/ggml.h +26 -7
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/llama.h +62 -125
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/llama +0 -0
- package/package.json +1 -1
- package/cpp/llama.cpp/common/stb_image.h +0 -7988
- package/cpp/llama.cpp/models/ggml-vocab-chameleon.gguf.inp +0 -112
- package/cpp/llama.cpp/models/ggml-vocab-chameleon.gguf.out +0 -46
- package/cpp/llama.cpp/models/ggml-vocab-deepseek-r1-qwen.gguf.inp +0 -112
- package/cpp/llama.cpp/models/ggml-vocab-deepseek-r1-qwen.gguf.out +0 -46
- package/cpp/llama.cpp/models/ggml-vocab-gpt-4o.gguf.inp +0 -112
- package/cpp/llama.cpp/models/ggml-vocab-gpt-4o.gguf.out +0 -46
- package/cpp/llama.cpp/models/ggml-vocab-llama4.gguf.inp +0 -112
- package/cpp/llama.cpp/models/ggml-vocab-llama4.gguf.out +0 -46
- package/cpp/llama.cpp/models/ggml-vocab-pixtral.gguf.inp +0 -112
- package/cpp/llama.cpp/models/ggml-vocab-pixtral.gguf.out +0 -46
- package/cpp/llama.cpp/models/ggml-vocab-roberta-bpe.gguf.inp +0 -112
- package/cpp/llama.cpp/models/ggml-vocab-roberta-bpe.gguf.out +0 -46
|
@@ -11,6 +11,7 @@
|
|
|
11
11
|
#include <algorithm>
|
|
12
12
|
#include <cctype>
|
|
13
13
|
#include <cstddef>
|
|
14
|
+
#include <cstdint>
|
|
14
15
|
#include <cmath>
|
|
15
16
|
#include <exception>
|
|
16
17
|
#include <functional>
|
|
@@ -233,7 +234,7 @@ public:
|
|
|
233
234
|
}
|
|
234
235
|
} else if (is_object()) {
|
|
235
236
|
if (!index.is_hashable())
|
|
236
|
-
throw std::runtime_error("
|
|
237
|
+
throw std::runtime_error("Unhashable type: " + index.dump());
|
|
237
238
|
auto it = object_->find(index.primitive_);
|
|
238
239
|
if (it == object_->end())
|
|
239
240
|
throw std::runtime_error("Key not found: " + index.dump());
|
|
@@ -252,7 +253,7 @@ public:
|
|
|
252
253
|
auto index = key.get<int>();
|
|
253
254
|
return array_->at(index < 0 ? array_->size() + index : index);
|
|
254
255
|
} else if (object_) {
|
|
255
|
-
if (!key.is_hashable()) throw std::runtime_error("
|
|
256
|
+
if (!key.is_hashable()) throw std::runtime_error("Unhashable type: " + dump());
|
|
256
257
|
auto it = object_->find(key.primitive_);
|
|
257
258
|
if (it == object_->end()) return Value();
|
|
258
259
|
return it->second;
|
|
@@ -261,7 +262,7 @@ public:
|
|
|
261
262
|
}
|
|
262
263
|
void set(const Value& key, const Value& value) {
|
|
263
264
|
if (!object_) throw std::runtime_error("Value is not an object: " + dump());
|
|
264
|
-
if (!key.is_hashable()) throw std::runtime_error("
|
|
265
|
+
if (!key.is_hashable()) throw std::runtime_error("Unhashable type: " + dump());
|
|
265
266
|
(*object_)[key.primitive_] = value;
|
|
266
267
|
}
|
|
267
268
|
Value call(const std::shared_ptr<Context> & context, ArgumentsValue & args) const {
|
|
@@ -398,7 +399,7 @@ public:
|
|
|
398
399
|
}
|
|
399
400
|
return false;
|
|
400
401
|
} else if (object_) {
|
|
401
|
-
if (!value.is_hashable()) throw std::runtime_error("
|
|
402
|
+
if (!value.is_hashable()) throw std::runtime_error("Unhashable type: " + value.dump());
|
|
402
403
|
return object_->find(value.primitive_) != object_->end();
|
|
403
404
|
} else {
|
|
404
405
|
throw std::runtime_error("contains can only be called on arrays and objects: " + dump());
|
|
@@ -416,7 +417,7 @@ public:
|
|
|
416
417
|
return const_cast<Value*>(this)->at(index);
|
|
417
418
|
}
|
|
418
419
|
Value& at(const Value & index) {
|
|
419
|
-
if (!index.is_hashable()) throw std::runtime_error("
|
|
420
|
+
if (!index.is_hashable()) throw std::runtime_error("Unhashable type: " + dump());
|
|
420
421
|
if (is_array()) return array_->at(index.get<int>());
|
|
421
422
|
if (is_object()) return object_->at(index.primitive_);
|
|
422
423
|
throw std::runtime_error("Value is not an array or object: " + dump());
|
|
@@ -676,8 +677,8 @@ public:
|
|
|
676
677
|
class VariableExpr : public Expression {
|
|
677
678
|
std::string name;
|
|
678
679
|
public:
|
|
679
|
-
VariableExpr(const Location &
|
|
680
|
-
: Expression(
|
|
680
|
+
VariableExpr(const Location & loc, const std::string& n)
|
|
681
|
+
: Expression(loc), name(n) {}
|
|
681
682
|
std::string get_name() const { return name; }
|
|
682
683
|
Value do_evaluate(const std::shared_ptr<Context> & context) const override {
|
|
683
684
|
if (!context->contains(name)) {
|
|
@@ -1200,9 +1201,9 @@ public:
|
|
|
1200
1201
|
|
|
1201
1202
|
class SliceExpr : public Expression {
|
|
1202
1203
|
public:
|
|
1203
|
-
std::shared_ptr<Expression> start, end;
|
|
1204
|
-
SliceExpr(const Location & loc, std::shared_ptr<Expression> && s, std::shared_ptr<Expression> && e)
|
|
1205
|
-
: Expression(loc), start(std::move(s)), end(std::move(e)) {}
|
|
1204
|
+
std::shared_ptr<Expression> start, end, step;
|
|
1205
|
+
SliceExpr(const Location & loc, std::shared_ptr<Expression> && s, std::shared_ptr<Expression> && e, std::shared_ptr<Expression> && st = nullptr)
|
|
1206
|
+
: Expression(loc), start(std::move(s)), end(std::move(e)), step(std::move(st)) {}
|
|
1206
1207
|
Value do_evaluate(const std::shared_ptr<Context> &) const override {
|
|
1207
1208
|
throw std::runtime_error("SliceExpr not implemented");
|
|
1208
1209
|
}
|
|
@@ -1219,18 +1220,35 @@ public:
|
|
|
1219
1220
|
if (!index) throw std::runtime_error("SubscriptExpr.index is null");
|
|
1220
1221
|
auto target_value = base->evaluate(context);
|
|
1221
1222
|
if (auto slice = dynamic_cast<SliceExpr*>(index.get())) {
|
|
1222
|
-
auto
|
|
1223
|
-
auto
|
|
1223
|
+
auto len = target_value.size();
|
|
1224
|
+
auto wrap = [len](int64_t i) -> int64_t {
|
|
1225
|
+
if (i < 0) {
|
|
1226
|
+
return i + len;
|
|
1227
|
+
}
|
|
1228
|
+
return i;
|
|
1229
|
+
};
|
|
1230
|
+
int64_t step = slice->step ? slice->step->evaluate(context).get<int64_t>() : 1;
|
|
1231
|
+
if (!step) {
|
|
1232
|
+
throw std::runtime_error("slice step cannot be zero");
|
|
1233
|
+
}
|
|
1234
|
+
int64_t start = slice->start ? wrap(slice->start->evaluate(context).get<int64_t>()) : (step < 0 ? len - 1 : 0);
|
|
1235
|
+
int64_t end = slice->end ? wrap(slice->end->evaluate(context).get<int64_t>()) : (step < 0 ? -1 : len);
|
|
1224
1236
|
if (target_value.is_string()) {
|
|
1225
1237
|
std::string s = target_value.get<std::string>();
|
|
1226
|
-
|
|
1227
|
-
|
|
1228
|
-
|
|
1238
|
+
|
|
1239
|
+
std::string result;
|
|
1240
|
+
if (start < end && step == 1) {
|
|
1241
|
+
result = s.substr(start, end - start);
|
|
1242
|
+
} else {
|
|
1243
|
+
for (int64_t i = start; step > 0 ? i < end : i > end; i += step) {
|
|
1244
|
+
result += s[i];
|
|
1245
|
+
}
|
|
1246
|
+
}
|
|
1247
|
+
return result;
|
|
1248
|
+
|
|
1229
1249
|
} else if (target_value.is_array()) {
|
|
1230
|
-
if (start < 0) start = target_value.size() + start;
|
|
1231
|
-
if (end < 0) end = target_value.size() + end;
|
|
1232
1250
|
auto result = Value::array();
|
|
1233
|
-
for (
|
|
1251
|
+
for (int64_t i = start; step > 0 ? i < end : i > end; i += step) {
|
|
1234
1252
|
result.push_back(target_value.at(i));
|
|
1235
1253
|
}
|
|
1236
1254
|
return result;
|
|
@@ -1305,6 +1323,8 @@ public:
|
|
|
1305
1323
|
if (name == "iterable") return l.is_iterable();
|
|
1306
1324
|
if (name == "sequence") return l.is_array();
|
|
1307
1325
|
if (name == "defined") return !l.is_null();
|
|
1326
|
+
if (name == "true") return l.to_bool();
|
|
1327
|
+
if (name == "false") return !l.to_bool();
|
|
1308
1328
|
throw std::runtime_error("Unknown type for 'is' operator: " + name);
|
|
1309
1329
|
};
|
|
1310
1330
|
auto value = eval();
|
|
@@ -1520,6 +1540,10 @@ public:
|
|
|
1520
1540
|
vargs.expectArgs("endswith method", {1, 1}, {0, 0});
|
|
1521
1541
|
auto suffix = vargs.args[0].get<std::string>();
|
|
1522
1542
|
return suffix.length() <= str.length() && std::equal(suffix.rbegin(), suffix.rend(), str.rbegin());
|
|
1543
|
+
} else if (method->get_name() == "startswith") {
|
|
1544
|
+
vargs.expectArgs("startswith method", {1, 1}, {0, 0});
|
|
1545
|
+
auto prefix = vargs.args[0].get<std::string>();
|
|
1546
|
+
return prefix.length() <= str.length() && std::equal(prefix.begin(), prefix.end(), str.begin());
|
|
1523
1547
|
} else if (method->get_name() == "title") {
|
|
1524
1548
|
vargs.expectArgs("title method", {0, 0}, {0, 0});
|
|
1525
1549
|
auto res = str;
|
|
@@ -2082,28 +2106,37 @@ private:
|
|
|
2082
2106
|
|
|
2083
2107
|
while (it != end && consumeSpaces() && peekSymbols({ "[", "." })) {
|
|
2084
2108
|
if (!consumeToken("[").empty()) {
|
|
2085
|
-
|
|
2109
|
+
std::shared_ptr<Expression> index;
|
|
2110
|
+
auto slice_loc = get_location();
|
|
2111
|
+
std::shared_ptr<Expression> start, end, step;
|
|
2112
|
+
bool has_first_colon = false, has_second_colon = false;
|
|
2113
|
+
|
|
2114
|
+
if (!peekSymbols({ ":" })) {
|
|
2115
|
+
start = parseExpression();
|
|
2116
|
+
}
|
|
2117
|
+
|
|
2118
|
+
if (!consumeToken(":").empty()) {
|
|
2119
|
+
has_first_colon = true;
|
|
2120
|
+
if (!peekSymbols({ ":", "]" })) {
|
|
2121
|
+
end = parseExpression();
|
|
2122
|
+
}
|
|
2086
2123
|
if (!consumeToken(":").empty()) {
|
|
2087
|
-
|
|
2088
|
-
|
|
2089
|
-
|
|
2090
|
-
auto slice_start = parseExpression();
|
|
2091
|
-
if (!consumeToken(":").empty()) {
|
|
2092
|
-
consumeSpaces();
|
|
2093
|
-
if (peekSymbols({ "]" })) {
|
|
2094
|
-
index = std::make_shared<SliceExpr>(slice_start->location, std::move(slice_start), nullptr);
|
|
2095
|
-
} else {
|
|
2096
|
-
auto slice_end = parseExpression();
|
|
2097
|
-
index = std::make_shared<SliceExpr>(slice_start->location, std::move(slice_start), std::move(slice_end));
|
|
2098
|
-
}
|
|
2099
|
-
} else {
|
|
2100
|
-
index = std::move(slice_start);
|
|
2124
|
+
has_second_colon = true;
|
|
2125
|
+
if (!peekSymbols({ "]" })) {
|
|
2126
|
+
step = parseExpression();
|
|
2101
2127
|
}
|
|
2102
2128
|
}
|
|
2103
|
-
|
|
2104
|
-
|
|
2129
|
+
}
|
|
2130
|
+
|
|
2131
|
+
if ((has_first_colon || has_second_colon) && (start || end || step)) {
|
|
2132
|
+
index = std::make_shared<SliceExpr>(slice_loc, std::move(start), std::move(end), std::move(step));
|
|
2133
|
+
} else {
|
|
2134
|
+
index = std::move(start);
|
|
2135
|
+
}
|
|
2136
|
+
if (!index) throw std::runtime_error("Empty index in subscript");
|
|
2137
|
+
if (consumeToken("]").empty()) throw std::runtime_error("Expected closing bracket in subscript");
|
|
2105
2138
|
|
|
2106
|
-
|
|
2139
|
+
value = std::make_shared<SubscriptExpr>(value->location, std::move(value), std::move(index));
|
|
2107
2140
|
} else if (!consumeToken(".").empty()) {
|
|
2108
2141
|
auto identifier = parseIdentifier();
|
|
2109
2142
|
if (!identifier) throw std::runtime_error("Expected identifier in subscript");
|
package/ios/include/common.h
CHANGED
|
@@ -6,6 +6,7 @@
|
|
|
6
6
|
|
|
7
7
|
#include <set>
|
|
8
8
|
#include <string>
|
|
9
|
+
#include <string_view>
|
|
9
10
|
#include <vector>
|
|
10
11
|
#include <sstream>
|
|
11
12
|
|
|
@@ -75,7 +76,7 @@ enum llama_example {
|
|
|
75
76
|
LLAMA_EXAMPLE_SERVER,
|
|
76
77
|
LLAMA_EXAMPLE_CVECTOR_GENERATOR,
|
|
77
78
|
LLAMA_EXAMPLE_EXPORT_LORA,
|
|
78
|
-
|
|
79
|
+
LLAMA_EXAMPLE_MTMD,
|
|
79
80
|
LLAMA_EXAMPLE_LOOKUP,
|
|
80
81
|
LLAMA_EXAMPLE_PARALLEL,
|
|
81
82
|
LLAMA_EXAMPLE_TTS,
|
|
@@ -114,7 +115,7 @@ enum common_grammar_trigger_type {
|
|
|
114
115
|
COMMON_GRAMMAR_TRIGGER_TYPE_TOKEN,
|
|
115
116
|
COMMON_GRAMMAR_TRIGGER_TYPE_WORD,
|
|
116
117
|
COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN,
|
|
117
|
-
|
|
118
|
+
COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_FULL,
|
|
118
119
|
};
|
|
119
120
|
|
|
120
121
|
struct common_grammar_trigger {
|
|
@@ -290,6 +291,7 @@ struct common_params {
|
|
|
290
291
|
int32_t verbosity = 0;
|
|
291
292
|
int32_t control_vector_layer_start = -1; // layer range for control vector
|
|
292
293
|
int32_t control_vector_layer_end = -1; // layer range for control vector
|
|
294
|
+
bool offline = false;
|
|
293
295
|
|
|
294
296
|
int32_t ppl_stride = 0; // stride for perplexity calculations. If left at 0, the pre-existing approach will be used.
|
|
295
297
|
int32_t ppl_output_type = 0; // = 0 -> ppl output is as usual, = 1 -> ppl output is num_tokens, ppl, one per line
|
|
@@ -322,13 +324,13 @@ struct common_params {
|
|
|
322
324
|
bool flash_attn = false; // flash attention
|
|
323
325
|
bool no_perf = false; // disable performance metrics
|
|
324
326
|
bool ctx_shift = true; // context shift on inifinite text generation
|
|
327
|
+
bool swa_full = false; // use full-size SWA cache (https://github.com/ggml-org/llama.cpp/pull/13194#issuecomment-2868343055)
|
|
325
328
|
|
|
326
329
|
bool input_prefix_bos = false; // prefix BOS to user inputs, preceding input_prefix
|
|
327
330
|
bool use_mmap = true; // use mmap for faster loads
|
|
328
331
|
bool use_mlock = false; // use mlock to keep model in memory
|
|
329
332
|
bool verbose_prompt = false; // print prompt tokens before generation
|
|
330
333
|
bool display_prompt = true; // print prompt before generation
|
|
331
|
-
bool dump_kv_cache = false; // dump the KV cache contents for debugging purposes
|
|
332
334
|
bool no_kv_offload = false; // disable KV offloading
|
|
333
335
|
bool warmup = true; // warmup run
|
|
334
336
|
bool check_tensors = false; // validate tensor data
|
|
@@ -367,6 +369,8 @@ struct common_params {
|
|
|
367
369
|
bool use_jinja = false; // NOLINT
|
|
368
370
|
bool enable_chat_template = true;
|
|
369
371
|
common_reasoning_format reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK;
|
|
372
|
+
int reasoning_budget = -1;
|
|
373
|
+
bool prefill_assistant = true; // if true, any trailing assistant message will be prefilled into the response
|
|
370
374
|
|
|
371
375
|
std::vector<std::string> api_keys;
|
|
372
376
|
|
|
@@ -426,6 +430,11 @@ struct common_params {
|
|
|
426
430
|
|
|
427
431
|
// common params
|
|
428
432
|
std::string out_file; // output filename for all example programs
|
|
433
|
+
// optional callback for model loading progress and cancellation:
|
|
434
|
+
// called with a progress value between 0.0 and 1.0.
|
|
435
|
+
// return false from callback to abort model loading or true to continue
|
|
436
|
+
llama_progress_callback load_progress_callback = NULL;
|
|
437
|
+
void * load_progress_callback_user_data = NULL;
|
|
429
438
|
};
|
|
430
439
|
|
|
431
440
|
// call once at the start of a program if it uses libcommon
|
|
@@ -503,10 +512,9 @@ static bool string_starts_with(const std::string & str,
|
|
|
503
512
|
return str.rfind(prefix, 0) == 0;
|
|
504
513
|
}
|
|
505
514
|
|
|
506
|
-
|
|
507
|
-
|
|
508
|
-
|
|
509
|
-
}
|
|
515
|
+
// While we wait for C++20's std::string::ends_with...
|
|
516
|
+
bool string_ends_with(const std::string_view & str, const std::string_view & suffix);
|
|
517
|
+
size_t string_find_partial_stop(const std::string_view & str, const std::string_view & stop);
|
|
510
518
|
|
|
511
519
|
bool string_parse_kv_override(const char * data, std::vector<llama_model_kv_override> & overrides);
|
|
512
520
|
void string_process_escapes(std::string & input);
|
|
@@ -615,16 +623,6 @@ std::string common_detokenize(
|
|
|
615
623
|
const std::vector<llama_token> & tokens,
|
|
616
624
|
bool special = true);
|
|
617
625
|
|
|
618
|
-
//
|
|
619
|
-
// KV cache utils
|
|
620
|
-
//
|
|
621
|
-
|
|
622
|
-
// Dump the KV cache view with the number of sequences per cell.
|
|
623
|
-
void common_kv_cache_dump_view(const llama_kv_cache_view & view, int row_size = 80);
|
|
624
|
-
|
|
625
|
-
// Dump the KV cache view showing individual sequences in each cell (long output).
|
|
626
|
-
void common_kv_cache_dump_view_seqs(const llama_kv_cache_view & view, int row_size = 40);
|
|
627
|
-
|
|
628
626
|
//
|
|
629
627
|
// Embedding utils
|
|
630
628
|
//
|
|
@@ -666,3 +664,9 @@ const char * const LLM_KV_SPLIT_COUNT = "split.count";
|
|
|
666
664
|
const char * const LLM_KV_SPLIT_TENSORS_COUNT = "split.tensors.count";
|
|
667
665
|
|
|
668
666
|
}
|
|
667
|
+
|
|
668
|
+
//
|
|
669
|
+
// training utils
|
|
670
|
+
//
|
|
671
|
+
|
|
672
|
+
ggml_opt_dataset_t common_opt_dataset_init(struct llama_context * ctx, const std::vector<llama_token> & tokens, int64_t stride);
|
package/ios/include/llama.h
CHANGED
|
@@ -4,6 +4,7 @@
|
|
|
4
4
|
#include "ggml.h"
|
|
5
5
|
#include "ggml-cpu.h"
|
|
6
6
|
#include "ggml-backend.h"
|
|
7
|
+
#include "ggml-opt.h"
|
|
7
8
|
|
|
8
9
|
#include <stddef.h>
|
|
9
10
|
#include <stdint.h>
|
|
@@ -344,7 +345,7 @@ extern "C" {
|
|
|
344
345
|
float yarn_beta_fast; // YaRN low correction dim
|
|
345
346
|
float yarn_beta_slow; // YaRN high correction dim
|
|
346
347
|
uint32_t yarn_orig_ctx; // YaRN original context size
|
|
347
|
-
float defrag_thold; // defragment the KV cache if holes/size > thold,
|
|
348
|
+
float defrag_thold; // defragment the KV cache if holes/size > thold, <= 0 disabled (default)
|
|
348
349
|
|
|
349
350
|
ggml_backend_sched_eval_callback cb_eval;
|
|
350
351
|
void * cb_eval_user_data;
|
|
@@ -360,10 +361,11 @@ extern "C" {
|
|
|
360
361
|
|
|
361
362
|
// Keep the booleans together and at the end of the struct to avoid misalignment during copy-by-value.
|
|
362
363
|
bool embeddings; // if true, extract embeddings (together with logits)
|
|
363
|
-
bool offload_kqv; //
|
|
364
|
-
bool flash_attn; //
|
|
365
|
-
bool no_perf; //
|
|
366
|
-
bool op_offload; //
|
|
364
|
+
bool offload_kqv; // offload the KQV ops (including the KV cache) to GPU
|
|
365
|
+
bool flash_attn; // use flash attention [EXPERIMENTAL]
|
|
366
|
+
bool no_perf; // measure performance timings
|
|
367
|
+
bool op_offload; // offload host tensor operations to device
|
|
368
|
+
bool swa_full; // use full-size SWA cache (https://github.com/ggml-org/llama.cpp/pull/13194#issuecomment-2868343055)
|
|
367
369
|
};
|
|
368
370
|
|
|
369
371
|
// model quantization parameters
|
|
@@ -445,6 +447,10 @@ extern "C" {
|
|
|
445
447
|
size_t n_paths,
|
|
446
448
|
struct llama_model_params params);
|
|
447
449
|
|
|
450
|
+
LLAMA_API void llama_model_save_to_file(
|
|
451
|
+
const struct llama_model * model,
|
|
452
|
+
const char * path_model);
|
|
453
|
+
|
|
448
454
|
DEPRECATED(LLAMA_API void llama_free_model(struct llama_model * model),
|
|
449
455
|
"use llama_model_free instead");
|
|
450
456
|
|
|
@@ -465,6 +471,7 @@ extern "C" {
|
|
|
465
471
|
LLAMA_API int64_t llama_time_us(void);
|
|
466
472
|
|
|
467
473
|
LLAMA_API size_t llama_max_devices(void);
|
|
474
|
+
LLAMA_API size_t llama_max_parallel_sequences(void);
|
|
468
475
|
|
|
469
476
|
LLAMA_API bool llama_supports_mmap (void);
|
|
470
477
|
LLAMA_API bool llama_supports_mlock (void);
|
|
@@ -602,71 +609,14 @@ extern "C" {
|
|
|
602
609
|
// KV cache
|
|
603
610
|
//
|
|
604
611
|
|
|
605
|
-
// TODO: start using struct llama_kv_cache
|
|
606
|
-
|
|
607
|
-
// Information associated with an individual cell in the KV cache view.
|
|
608
|
-
struct llama_kv_cache_view_cell {
|
|
609
|
-
// The position for this cell. Takes KV cache shifts into account.
|
|
610
|
-
// May be negative if the cell is not populated.
|
|
611
|
-
llama_pos pos;
|
|
612
|
-
};
|
|
613
|
-
|
|
614
|
-
// An updateable view of the KV cache.
|
|
615
|
-
struct llama_kv_cache_view {
|
|
616
|
-
// Number of KV cache cells. This will be the same as the context size.
|
|
617
|
-
int32_t n_cells;
|
|
618
|
-
|
|
619
|
-
// Maximum number of sequences that can exist in a cell. It's not an error
|
|
620
|
-
// if there are more sequences in a cell than this value, however they will
|
|
621
|
-
// not be visible in the view cells_sequences.
|
|
622
|
-
int32_t n_seq_max;
|
|
623
|
-
|
|
624
|
-
// Number of tokens in the cache. For example, if there are two populated
|
|
625
|
-
// cells, the first with 1 sequence id in it and the second with 2 sequence
|
|
626
|
-
// ids then you'll have 3 tokens.
|
|
627
|
-
int32_t token_count;
|
|
628
|
-
|
|
629
|
-
// Number of populated cache cells.
|
|
630
|
-
int32_t used_cells;
|
|
631
|
-
|
|
632
|
-
// Maximum contiguous empty slots in the cache.
|
|
633
|
-
int32_t max_contiguous;
|
|
634
|
-
|
|
635
|
-
// Index to the start of the max_contiguous slot range. Can be negative
|
|
636
|
-
// when cache is full.
|
|
637
|
-
int32_t max_contiguous_idx;
|
|
638
|
-
|
|
639
|
-
// Information for an individual cell.
|
|
640
|
-
struct llama_kv_cache_view_cell * cells;
|
|
641
|
-
|
|
642
|
-
// The sequences for each cell. There will be n_seq_max items per cell.
|
|
643
|
-
llama_seq_id * cells_sequences;
|
|
644
|
-
};
|
|
645
|
-
|
|
646
|
-
// Create an empty KV cache view. (use only for debugging purposes)
|
|
647
|
-
LLAMA_API struct llama_kv_cache_view llama_kv_cache_view_init(const struct llama_context * ctx, int32_t n_seq_max);
|
|
648
|
-
|
|
649
|
-
// Free a KV cache view. (use only for debugging purposes)
|
|
650
|
-
LLAMA_API void llama_kv_cache_view_free(struct llama_kv_cache_view * view);
|
|
651
|
-
|
|
652
|
-
// Update the KV cache view structure with the current state of the KV cache. (use only for debugging purposes)
|
|
653
|
-
// TODO: change signature to llama_kv_cache_view_update(struct llama_kv_cache_view * view, const struct llama_context * ctx)
|
|
654
|
-
LLAMA_API void llama_kv_cache_view_update(const struct llama_context * ctx, struct llama_kv_cache_view * view);
|
|
655
|
-
|
|
656
|
-
///
|
|
657
|
-
|
|
658
612
|
// Returns the number of tokens in the KV cache (slow, use only for debug)
|
|
659
613
|
// If a KV cell has multiple sequences assigned to it, it will be counted multiple times
|
|
660
|
-
LLAMA_API int32_t llama_kv_self_n_tokens(const struct llama_context * ctx)
|
|
661
|
-
|
|
662
|
-
DEPRECATED(LLAMA_API int32_t llama_get_kv_cache_token_count(const struct llama_context * ctx),
|
|
663
|
-
"use llama_kv_self_n_tokens instead");
|
|
614
|
+
DEPRECATED(LLAMA_API int32_t llama_kv_self_n_tokens(const struct llama_context * ctx),
|
|
615
|
+
"Use llama_kv_self_seq_pos_max() and llama_kv_self_seq_pos_min() instead (https://github.com/ggml-org/llama.cpp/issues/13793)");
|
|
664
616
|
|
|
665
617
|
// Returns the number of used KV cells (i.e. have at least one sequence assigned to them)
|
|
666
|
-
LLAMA_API int32_t llama_kv_self_used_cells(const struct llama_context * ctx)
|
|
667
|
-
|
|
668
|
-
DEPRECATED(LLAMA_API int32_t llama_get_kv_cache_used_cells(const struct llama_context * ctx),
|
|
669
|
-
"use llama_kv_self_used_cells instead");
|
|
618
|
+
DEPRECATED(LLAMA_API int32_t llama_kv_self_used_cells(const struct llama_context * ctx),
|
|
619
|
+
"Use llama_kv_self_seq_pos_max() and llama_kv_self_seq_pos_min() instead (https://github.com/ggml-org/llama.cpp/issues/13793)");
|
|
670
620
|
|
|
671
621
|
// Clear the KV cache - both cell info is erased and KV data is zeroed
|
|
672
622
|
LLAMA_API void llama_kv_self_clear(
|
|
@@ -725,10 +675,18 @@ extern "C" {
|
|
|
725
675
|
llama_pos p1,
|
|
726
676
|
int d);
|
|
727
677
|
|
|
678
|
+
// Returns the smallest position present in the KV cache for the specified sequence
|
|
679
|
+
// This is typically non-zero only for SWA caches
|
|
680
|
+
// Return -1 if the sequence is empty
|
|
681
|
+
LLAMA_API llama_pos llama_kv_self_seq_pos_min(
|
|
682
|
+
struct llama_context * ctx,
|
|
683
|
+
llama_seq_id seq_id);
|
|
684
|
+
|
|
728
685
|
// Returns the largest position present in the KV cache for the specified sequence
|
|
686
|
+
// Return -1 if the sequence is empty
|
|
729
687
|
LLAMA_API llama_pos llama_kv_self_seq_pos_max(
|
|
730
688
|
struct llama_context * ctx,
|
|
731
|
-
|
|
689
|
+
llama_seq_id seq_id);
|
|
732
690
|
|
|
733
691
|
// Defragment the KV cache
|
|
734
692
|
// This will be applied:
|
|
@@ -742,61 +700,6 @@ extern "C" {
|
|
|
742
700
|
// Apply the KV cache updates (such as K-shifts, defragmentation, etc.)
|
|
743
701
|
LLAMA_API void llama_kv_self_update(struct llama_context * ctx);
|
|
744
702
|
|
|
745
|
-
DEPRECATED(LLAMA_API void llama_kv_cache_clear(
|
|
746
|
-
struct llama_context * ctx),
|
|
747
|
-
"use llama_kv_self_clear instead");
|
|
748
|
-
|
|
749
|
-
DEPRECATED(LLAMA_API bool llama_kv_cache_seq_rm(
|
|
750
|
-
struct llama_context * ctx,
|
|
751
|
-
llama_seq_id seq_id,
|
|
752
|
-
llama_pos p0,
|
|
753
|
-
llama_pos p1),
|
|
754
|
-
"use llama_kv_self_seq_rm instead");
|
|
755
|
-
|
|
756
|
-
DEPRECATED(LLAMA_API void llama_kv_cache_seq_cp(
|
|
757
|
-
struct llama_context * ctx,
|
|
758
|
-
llama_seq_id seq_id_src,
|
|
759
|
-
llama_seq_id seq_id_dst,
|
|
760
|
-
llama_pos p0,
|
|
761
|
-
llama_pos p1),
|
|
762
|
-
"use llama_kv_self_seq_cp instead");
|
|
763
|
-
|
|
764
|
-
DEPRECATED(LLAMA_API void llama_kv_cache_seq_keep(
|
|
765
|
-
struct llama_context * ctx,
|
|
766
|
-
llama_seq_id seq_id),
|
|
767
|
-
"use llama_kv_self_seq_keep instead");
|
|
768
|
-
|
|
769
|
-
DEPRECATED(LLAMA_API void llama_kv_cache_seq_add(
|
|
770
|
-
struct llama_context * ctx,
|
|
771
|
-
llama_seq_id seq_id,
|
|
772
|
-
llama_pos p0,
|
|
773
|
-
llama_pos p1,
|
|
774
|
-
llama_pos delta),
|
|
775
|
-
"use llama_kv_self_seq_add instead");
|
|
776
|
-
|
|
777
|
-
DEPRECATED(LLAMA_API void llama_kv_cache_seq_div(
|
|
778
|
-
struct llama_context * ctx,
|
|
779
|
-
llama_seq_id seq_id,
|
|
780
|
-
llama_pos p0,
|
|
781
|
-
llama_pos p1,
|
|
782
|
-
int d),
|
|
783
|
-
"use llama_kv_self_seq_div instead");
|
|
784
|
-
|
|
785
|
-
DEPRECATED(LLAMA_API llama_pos llama_kv_cache_seq_pos_max(
|
|
786
|
-
struct llama_context * ctx,
|
|
787
|
-
llama_seq_id seq_id),
|
|
788
|
-
"use llama_kv_self_seq_pos_max instead");
|
|
789
|
-
|
|
790
|
-
DEPRECATED(LLAMA_API void llama_kv_cache_defrag(struct llama_context * ctx),
|
|
791
|
-
"use llama_kv_self_defrag instead");
|
|
792
|
-
|
|
793
|
-
DEPRECATED(LLAMA_API bool llama_kv_cache_can_shift(const struct llama_context * ctx),
|
|
794
|
-
"use llama_kv_self_can_shift instead");
|
|
795
|
-
|
|
796
|
-
DEPRECATED(LLAMA_API void llama_kv_cache_update(struct llama_context * ctx),
|
|
797
|
-
"use llama_kv_self_update instead");
|
|
798
|
-
|
|
799
|
-
|
|
800
703
|
//
|
|
801
704
|
// State / sessions
|
|
802
705
|
//
|
|
@@ -938,9 +841,12 @@ extern "C" {
|
|
|
938
841
|
// Requires KV cache.
|
|
939
842
|
// For encode-decoder contexts, processes the batch using the decoder.
|
|
940
843
|
// Positive return values does not mean a fatal error, but rather a warning.
|
|
941
|
-
//
|
|
942
|
-
//
|
|
943
|
-
//
|
|
844
|
+
// Upon non-zero return values, the KV cache state is restored to the state before this call
|
|
845
|
+
// 0 - success
|
|
846
|
+
// 1 - could not find a KV slot for the batch (try reducing the size of the batch or increase the context)
|
|
847
|
+
// 2 - aborted
|
|
848
|
+
// -1 - invalid input batch
|
|
849
|
+
// < -1 - error
|
|
944
850
|
LLAMA_API int32_t llama_decode(
|
|
945
851
|
struct llama_context * ctx,
|
|
946
852
|
struct llama_batch batch);
|
|
@@ -1433,6 +1339,37 @@ extern "C" {
|
|
|
1433
1339
|
LLAMA_API void llama_perf_sampler_print(const struct llama_sampler * chain);
|
|
1434
1340
|
LLAMA_API void llama_perf_sampler_reset( struct llama_sampler * chain);
|
|
1435
1341
|
|
|
1342
|
+
//
|
|
1343
|
+
// training
|
|
1344
|
+
//
|
|
1345
|
+
|
|
1346
|
+
// function that returns whether or not a given tensor contains trainable parameters
|
|
1347
|
+
typedef bool (*llama_opt_param_filter)(const struct ggml_tensor * tensor, void * userdata);
|
|
1348
|
+
|
|
1349
|
+
// always returns true
|
|
1350
|
+
LLAMA_API bool llama_opt_param_filter_all(const struct ggml_tensor * tensor, void * userdata);
|
|
1351
|
+
|
|
1352
|
+
struct llama_opt_params {
|
|
1353
|
+
uint32_t n_ctx_train; // assumed context size post training, use context size specified in llama_context if 0
|
|
1354
|
+
|
|
1355
|
+
llama_opt_param_filter param_filter; // callback for determining which tensors contain trainable parameters
|
|
1356
|
+
void * param_filter_ud; // userdata for determining which tensors contain trainable parameters
|
|
1357
|
+
|
|
1358
|
+
ggml_opt_get_optimizer_params get_opt_pars; // callback for calculating optimizer parameters
|
|
1359
|
+
void * get_opt_pars_ud; // userdata for calculating optimizer parameters
|
|
1360
|
+
};
|
|
1361
|
+
|
|
1362
|
+
LLAMA_API void llama_opt_init(struct llama_context * lctx, struct llama_model * model, struct llama_opt_params lopt_params);
|
|
1363
|
+
|
|
1364
|
+
LLAMA_API void llama_opt_epoch(
|
|
1365
|
+
struct llama_context * lctx,
|
|
1366
|
+
ggml_opt_dataset_t dataset,
|
|
1367
|
+
ggml_opt_result_t result_train,
|
|
1368
|
+
ggml_opt_result_t result_eval,
|
|
1369
|
+
int64_t idata_split,
|
|
1370
|
+
ggml_opt_epoch_callback callback_train,
|
|
1371
|
+
ggml_opt_epoch_callback callback_eval);
|
|
1372
|
+
|
|
1436
1373
|
#ifdef __cplusplus
|
|
1437
1374
|
}
|
|
1438
1375
|
#endif
|
|
@@ -4,6 +4,25 @@
|
|
|
4
4
|
<dict>
|
|
5
5
|
<key>AvailableLibraries</key>
|
|
6
6
|
<array>
|
|
7
|
+
<dict>
|
|
8
|
+
<key>BinaryPath</key>
|
|
9
|
+
<string>llama.framework/llama</string>
|
|
10
|
+
<key>DebugSymbolsPath</key>
|
|
11
|
+
<string>dSYMs</string>
|
|
12
|
+
<key>LibraryIdentifier</key>
|
|
13
|
+
<string>tvos-arm64_x86_64-simulator</string>
|
|
14
|
+
<key>LibraryPath</key>
|
|
15
|
+
<string>llama.framework</string>
|
|
16
|
+
<key>SupportedArchitectures</key>
|
|
17
|
+
<array>
|
|
18
|
+
<string>arm64</string>
|
|
19
|
+
<string>x86_64</string>
|
|
20
|
+
</array>
|
|
21
|
+
<key>SupportedPlatform</key>
|
|
22
|
+
<string>tvos</string>
|
|
23
|
+
<key>SupportedPlatformVariant</key>
|
|
24
|
+
<string>simulator</string>
|
|
25
|
+
</dict>
|
|
7
26
|
<dict>
|
|
8
27
|
<key>BinaryPath</key>
|
|
9
28
|
<string>llama.framework/llama</string>
|
|
@@ -107,25 +126,6 @@
|
|
|
107
126
|
<key>SupportedPlatformVariant</key>
|
|
108
127
|
<string>simulator</string>
|
|
109
128
|
</dict>
|
|
110
|
-
<dict>
|
|
111
|
-
<key>BinaryPath</key>
|
|
112
|
-
<string>llama.framework/llama</string>
|
|
113
|
-
<key>DebugSymbolsPath</key>
|
|
114
|
-
<string>dSYMs</string>
|
|
115
|
-
<key>LibraryIdentifier</key>
|
|
116
|
-
<string>tvos-arm64_x86_64-simulator</string>
|
|
117
|
-
<key>LibraryPath</key>
|
|
118
|
-
<string>llama.framework</string>
|
|
119
|
-
<key>SupportedArchitectures</key>
|
|
120
|
-
<array>
|
|
121
|
-
<string>arm64</string>
|
|
122
|
-
<string>x86_64</string>
|
|
123
|
-
</array>
|
|
124
|
-
<key>SupportedPlatform</key>
|
|
125
|
-
<string>tvos</string>
|
|
126
|
-
<key>SupportedPlatformVariant</key>
|
|
127
|
-
<string>simulator</string>
|
|
128
|
-
</dict>
|
|
129
129
|
</array>
|
|
130
130
|
<key>CFBundlePackageType</key>
|
|
131
131
|
<string>XFWK</string>
|