@novastera-oss/llamarn 0.2.1 → 0.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +80 -14
- package/RNLlamaCpp.podspec +10 -3
- package/android/CMakeLists.txt +8 -0
- package/android/src/main/cpp/include/llama.h +62 -125
- package/android/src/main/jniLibs/arm64-v8a/libggml-base.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/libggml-cpu.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/libggml.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/libllama.so +0 -0
- package/android/src/main/jniLibs/x86_64/libggml-base.so +0 -0
- package/android/src/main/jniLibs/x86_64/libggml-cpu.so +0 -0
- package/android/src/main/jniLibs/x86_64/libggml.so +0 -0
- package/android/src/main/jniLibs/x86_64/libllama.so +0 -0
- package/cpp/build-info.cpp +2 -2
- package/cpp/llama.cpp/README.md +11 -3
- package/cpp/llama.cpp/build-xcframework.sh +1 -0
- package/cpp/llama.cpp/common/CMakeLists.txt +8 -2
- package/cpp/llama.cpp/common/arg.cpp +153 -113
- package/cpp/llama.cpp/common/chat-parser.cpp +379 -0
- package/cpp/llama.cpp/common/chat-parser.h +117 -0
- package/cpp/llama.cpp/common/chat.cpp +847 -699
- package/cpp/llama.cpp/common/chat.h +73 -6
- package/cpp/llama.cpp/common/common.cpp +50 -82
- package/cpp/llama.cpp/common/common.h +21 -17
- package/cpp/llama.cpp/common/json-partial.cpp +255 -0
- package/cpp/llama.cpp/common/json-partial.h +37 -0
- package/cpp/llama.cpp/common/minja/chat-template.hpp +9 -5
- package/cpp/llama.cpp/common/minja/minja.hpp +69 -36
- package/cpp/llama.cpp/common/regex-partial.cpp +204 -0
- package/cpp/llama.cpp/common/regex-partial.h +56 -0
- package/cpp/llama.cpp/common/sampling.cpp +7 -8
- package/cpp/llama.cpp/convert_hf_to_gguf.py +453 -118
- package/cpp/llama.cpp/convert_hf_to_gguf_update.py +120 -68
- package/cpp/llama.cpp/ggml/CMakeLists.txt +2 -1
- package/cpp/llama.cpp/ggml/cmake/common.cmake +25 -0
- package/cpp/llama.cpp/ggml/include/ggml-opt.h +49 -28
- package/cpp/llama.cpp/ggml/include/ggml.h +26 -7
- package/cpp/llama.cpp/ggml/src/CMakeLists.txt +16 -10
- package/cpp/llama.cpp/ggml/src/ggml-backend.cpp +4 -1
- package/cpp/llama.cpp/ggml/src/ggml-cann/CMakeLists.txt +1 -0
- package/cpp/llama.cpp/ggml/src/ggml-cann/acl_tensor.cpp +2 -0
- package/cpp/llama.cpp/ggml/src/ggml-cann/aclnn_ops.cpp +604 -0
- package/cpp/llama.cpp/ggml/src/ggml-cann/aclnn_ops.h +42 -0
- package/cpp/llama.cpp/ggml/src/ggml-cann/ggml-cann.cpp +54 -2
- package/cpp/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +50 -51
- package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp +2 -2
- package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-impl.h +5 -9
- package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.c +779 -19
- package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +22 -0
- package/cpp/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.cpp +88 -5
- package/cpp/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.h +47 -12
- package/cpp/llama.cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +264 -69
- package/cpp/llama.cpp/ggml/src/ggml-cpu/ops.cpp +322 -100
- package/cpp/llama.cpp/ggml/src/ggml-cpu/simd-mappings.h +117 -1
- package/cpp/llama.cpp/ggml/src/ggml-cpu/vec.cpp +85 -16
- package/cpp/llama.cpp/ggml/src/ggml-cpu/vec.h +220 -49
- package/cpp/llama.cpp/ggml/src/ggml-cuda/acc.cu +40 -26
- package/cpp/llama.cpp/ggml/src/ggml-cuda/common.cuh +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-cuda/cpy.cu +11 -1
- package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-common.cuh +15 -7
- package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-mma-f16.cuh +266 -64
- package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-vec-f16.cuh +49 -4
- package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-vec-f32.cuh +48 -4
- package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn.cu +2 -1
- package/cpp/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu +5 -1
- package/cpp/llama.cpp/ggml/src/ggml-cuda/mmq.cu +2 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/quantize.cu +7 -6
- package/cpp/llama.cpp/ggml/src/ggml-cuda/sum.cu +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-cuda/unary.cu +10 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/unary.cuh +2 -0
- package/cpp/llama.cpp/ggml/src/ggml-impl.h +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal-impl.h +4 -0
- package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal.m +99 -17
- package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal.metal +200 -2
- package/cpp/llama.cpp/ggml/src/ggml-musa/CMakeLists.txt +8 -2
- package/cpp/llama.cpp/ggml/src/ggml-musa/mudnn.cu +112 -0
- package/cpp/llama.cpp/ggml/src/ggml-musa/mudnn.cuh +12 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/CMakeLists.txt +6 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +972 -178
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/argsort.cl +86 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/div.cl +72 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/group_norm.cl +72 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/sigmoid.cl +29 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/sub.cl +72 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/sum_rows.cl +39 -0
- package/cpp/llama.cpp/ggml/src/ggml-opt.cpp +373 -190
- package/cpp/llama.cpp/ggml/src/ggml-sycl/CMakeLists.txt +29 -23
- package/cpp/llama.cpp/ggml/src/ggml-sycl/binbcast.cpp +5 -10
- package/cpp/llama.cpp/ggml/src/ggml-sycl/common.hpp +101 -5
- package/cpp/llama.cpp/ggml/src/ggml-sycl/concat.cpp +31 -33
- package/cpp/llama.cpp/ggml/src/ggml-sycl/conv.cpp +1 -0
- package/cpp/llama.cpp/ggml/src/ggml-sycl/convert.cpp +29 -2
- package/cpp/llama.cpp/ggml/src/ggml-sycl/cpy.cpp +4 -5
- package/cpp/llama.cpp/ggml/src/ggml-sycl/dequantize.hpp +59 -21
- package/cpp/llama.cpp/ggml/src/ggml-sycl/dmmv.cpp +9 -1
- package/cpp/llama.cpp/ggml/src/ggml-sycl/element_wise.cpp +84 -72
- package/cpp/llama.cpp/ggml/src/ggml-sycl/element_wise.hpp +2 -0
- package/cpp/llama.cpp/ggml/src/ggml-sycl/gemm.hpp +37 -8
- package/cpp/llama.cpp/ggml/src/ggml-sycl/getrows.cpp +1 -3
- package/cpp/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +324 -129
- package/cpp/llama.cpp/ggml/src/ggml-sycl/gla.cpp +1 -0
- package/cpp/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +31 -2
- package/cpp/llama.cpp/ggml/src/ggml-sycl/norm.cpp +95 -68
- package/cpp/llama.cpp/ggml/src/ggml-sycl/outprod.cpp +1 -0
- package/cpp/llama.cpp/ggml/src/ggml-sycl/quants.hpp +22 -0
- package/cpp/llama.cpp/ggml/src/ggml-sycl/rope.cpp +1 -2
- package/cpp/llama.cpp/ggml/src/ggml-sycl/softmax.cpp +1 -4
- package/cpp/llama.cpp/ggml/src/ggml-sycl/tsembd.cpp +2 -3
- package/cpp/llama.cpp/ggml/src/ggml-sycl/vecdotq.hpp +69 -43
- package/cpp/llama.cpp/ggml/src/ggml-sycl/wkv.cpp +2 -14
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/CMakeLists.txt +81 -91
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +432 -181
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/CMakeLists.txt +17 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq1_m.comp +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn.comp +6 -152
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_base.comp +162 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm1.comp +360 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm2.comp +2 -118
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm.comp +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +12 -1
- package/cpp/llama.cpp/ggml/src/ggml.c +107 -36
- package/cpp/llama.cpp/ggml/src/gguf.cpp +33 -33
- package/cpp/llama.cpp/gguf-py/gguf/constants.py +100 -15
- package/cpp/llama.cpp/gguf-py/gguf/gguf_reader.py +1 -1
- package/cpp/llama.cpp/gguf-py/gguf/gguf_writer.py +44 -12
- package/cpp/llama.cpp/gguf-py/gguf/scripts/gguf_editor_gui.py +21 -10
- package/cpp/llama.cpp/gguf-py/gguf/scripts/gguf_new_metadata.py +5 -2
- package/cpp/llama.cpp/gguf-py/gguf/tensor_mapping.py +128 -31
- package/cpp/llama.cpp/gguf-py/gguf/utility.py +1 -1
- package/cpp/llama.cpp/gguf-py/pyproject.toml +1 -1
- package/cpp/llama.cpp/include/llama.h +62 -125
- package/cpp/llama.cpp/models/ggml-vocab-bert-bge.gguf.inp +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-bert-bge.gguf.out +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-command-r.gguf.inp +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-command-r.gguf.out +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-deepseek-coder.gguf.inp +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-deepseek-coder.gguf.out +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-deepseek-llm.gguf.inp +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-deepseek-llm.gguf.out +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-falcon.gguf.inp +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-falcon.gguf.out +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-gpt-2.gguf.inp +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-gpt-2.gguf.out +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-llama-bpe.gguf.inp +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-llama-bpe.gguf.out +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-llama-spm.gguf.inp +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-llama-spm.gguf.out +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-mpt.gguf.inp +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-mpt.gguf.out +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-nomic-bert-moe.gguf +0 -0
- package/cpp/llama.cpp/models/ggml-vocab-phi-3.gguf.inp +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-phi-3.gguf.out +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-qwen2.gguf.inp +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-qwen2.gguf.out +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-refact.gguf.inp +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-refact.gguf.out +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-starcoder.gguf.inp +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-starcoder.gguf.out +1 -1
- package/cpp/llama.cpp/models/templates/Qwen-QwQ-32B.jinja +62 -0
- package/cpp/llama.cpp/models/templates/Qwen-Qwen3-0.6B.jinja +85 -0
- package/cpp/llama.cpp/models/templates/README.md +2 -0
- package/cpp/llama.cpp/requirements/requirements-convert_hf_to_gguf.txt +5 -1
- package/cpp/llama.cpp/requirements/requirements-convert_hf_to_gguf_update.txt +5 -1
- package/cpp/llama.cpp/requirements/requirements-convert_lora_to_gguf.txt +2 -0
- package/cpp/llama.cpp/requirements/requirements-gguf_editor_gui.txt +1 -1
- package/cpp/llama.cpp/src/CMakeLists.txt +2 -0
- package/cpp/llama.cpp/src/llama-arch.cpp +6 -0
- package/cpp/llama.cpp/src/llama-arch.h +2 -0
- package/cpp/llama.cpp/src/llama-batch.cpp +3 -1
- package/cpp/llama.cpp/src/llama-context.cpp +340 -123
- package/cpp/llama.cpp/src/llama-context.h +30 -0
- package/cpp/llama.cpp/src/llama-cparams.cpp +4 -0
- package/cpp/llama.cpp/src/llama-cparams.h +2 -0
- package/cpp/llama.cpp/src/llama-grammar.cpp +12 -2
- package/cpp/llama.cpp/src/llama-graph.cpp +157 -247
- package/cpp/llama.cpp/src/llama-graph.h +52 -7
- package/cpp/llama.cpp/src/llama-hparams.cpp +17 -1
- package/cpp/llama.cpp/src/llama-hparams.h +37 -5
- package/cpp/llama.cpp/src/llama-kv-cache.cpp +742 -481
- package/cpp/llama.cpp/src/llama-kv-cache.h +196 -99
- package/cpp/llama.cpp/src/llama-kv-cells.h +379 -0
- package/cpp/llama.cpp/src/llama-memory.h +4 -3
- package/cpp/llama.cpp/src/llama-model-loader.cpp +22 -17
- package/cpp/llama.cpp/src/llama-model-saver.cpp +281 -0
- package/cpp/llama.cpp/src/llama-model-saver.h +37 -0
- package/cpp/llama.cpp/src/llama-model.cpp +529 -172
- package/cpp/llama.cpp/src/llama-model.h +6 -1
- package/cpp/llama.cpp/src/llama-quant.cpp +15 -13
- package/cpp/llama.cpp/src/llama-sampling.cpp +2 -2
- package/cpp/llama.cpp/src/llama-vocab.cpp +35 -8
- package/cpp/llama.cpp/src/llama-vocab.h +6 -0
- package/cpp/llama.cpp/src/llama.cpp +14 -0
- package/cpp/rn-completion.cpp +4 -2
- package/ios/include/chat.h +73 -6
- package/ios/include/common/minja/chat-template.hpp +9 -5
- package/ios/include/common/minja/minja.hpp +69 -36
- package/ios/include/common.h +21 -17
- package/ios/include/llama.h +62 -125
- package/ios/libs/llama.xcframework/Info.plist +19 -19
- package/ios/libs/llama.xcframework/ios-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/ios-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4617 -4487
- package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/ggml-opt.h +237 -0
- package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/ggml.h +26 -7
- package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/llama.h +62 -125
- package/ios/libs/llama.xcframework/ios-arm64/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4638 -4508
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3557 -3435
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/ggml-opt.h +237 -0
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/ggml.h +26 -7
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/llama.h +62 -125
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4638 -4508
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3559 -3437
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/ggml-opt.h +237 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/ggml.h +26 -7
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/llama.h +62 -125
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/ggml-opt.h +237 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/ggml.h +26 -7
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/llama.h +62 -125
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/llama +0 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/ggml-opt.h +237 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/ggml.h +26 -7
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/llama.h +62 -125
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/llama +0 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/tvos-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/tvos-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4616 -4487
- package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/ggml-opt.h +237 -0
- package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/ggml.h +26 -7
- package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/llama.h +62 -125
- package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4637 -4508
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3556 -3435
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/ggml-opt.h +237 -0
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/ggml.h +26 -7
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/llama.h +62 -125
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/xros-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/xros-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4653 -4523
- package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/ggml-opt.h +237 -0
- package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/ggml.h +26 -7
- package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/llama.h +62 -125
- package/ios/libs/llama.xcframework/xros-arm64/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4674 -4544
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3587 -3465
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/ggml-opt.h +237 -0
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/ggml.h +26 -7
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/llama.h +62 -125
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/llama +0 -0
- package/package.json +1 -1
- package/cpp/llama.cpp/common/stb_image.h +0 -7988
- package/cpp/llama.cpp/models/ggml-vocab-chameleon.gguf.inp +0 -112
- package/cpp/llama.cpp/models/ggml-vocab-chameleon.gguf.out +0 -46
- package/cpp/llama.cpp/models/ggml-vocab-deepseek-r1-qwen.gguf.inp +0 -112
- package/cpp/llama.cpp/models/ggml-vocab-deepseek-r1-qwen.gguf.out +0 -46
- package/cpp/llama.cpp/models/ggml-vocab-gpt-4o.gguf.inp +0 -112
- package/cpp/llama.cpp/models/ggml-vocab-gpt-4o.gguf.out +0 -46
- package/cpp/llama.cpp/models/ggml-vocab-llama4.gguf.inp +0 -112
- package/cpp/llama.cpp/models/ggml-vocab-llama4.gguf.out +0 -46
- package/cpp/llama.cpp/models/ggml-vocab-pixtral.gguf.inp +0 -112
- package/cpp/llama.cpp/models/ggml-vocab-pixtral.gguf.out +0 -46
- package/cpp/llama.cpp/models/ggml-vocab-roberta-bpe.gguf.inp +0 -112
- package/cpp/llama.cpp/models/ggml-vocab-roberta-bpe.gguf.out +0 -46
|
@@ -1,28 +1,6 @@
|
|
|
1
1
|
#!/usr/bin/env python3
|
|
2
2
|
# -*- coding: utf-8 -*-
|
|
3
3
|
|
|
4
|
-
# This script downloads the tokenizer models of the specified models from Huggingface and
|
|
5
|
-
# generates the get_vocab_base_pre() function for convert_hf_to_gguf.py
|
|
6
|
-
#
|
|
7
|
-
# This is necessary in order to analyze the type of pre-tokenizer used by the model and
|
|
8
|
-
# provide the necessary information to llama.cpp via the GGUF header in order to implement
|
|
9
|
-
# the same pre-tokenizer.
|
|
10
|
-
#
|
|
11
|
-
# ref: https://github.com/ggml-org/llama.cpp/pull/6920
|
|
12
|
-
#
|
|
13
|
-
# Instructions:
|
|
14
|
-
#
|
|
15
|
-
# - Add a new model to the "models" list
|
|
16
|
-
# - Run the script with your huggingface token:
|
|
17
|
-
#
|
|
18
|
-
# python3 convert_hf_to_gguf_update.py <huggingface_token>
|
|
19
|
-
#
|
|
20
|
-
# - The convert_hf_to_gguf.py script will have had its get_vocab_base_pre() function updated
|
|
21
|
-
# - Update llama.cpp with the new pre-tokenizer if necessary
|
|
22
|
-
#
|
|
23
|
-
# TODO: generate tokenizer tests for llama.cpp
|
|
24
|
-
#
|
|
25
|
-
|
|
26
4
|
import logging
|
|
27
5
|
import os
|
|
28
6
|
import pathlib
|
|
@@ -32,6 +10,7 @@ import requests
|
|
|
32
10
|
import sys
|
|
33
11
|
import json
|
|
34
12
|
import shutil
|
|
13
|
+
import argparse
|
|
35
14
|
|
|
36
15
|
from hashlib import sha256
|
|
37
16
|
from enum import IntEnum, auto
|
|
@@ -41,6 +20,11 @@ logging.basicConfig(level=logging.DEBUG)
|
|
|
41
20
|
logger = logging.getLogger("convert_hf_to_gguf_update")
|
|
42
21
|
sess = requests.Session()
|
|
43
22
|
|
|
23
|
+
convert_py_pth = pathlib.Path("convert_hf_to_gguf.py")
|
|
24
|
+
convert_py = convert_py_pth.read_text(encoding="utf-8")
|
|
25
|
+
hf_token_pth = pathlib.Path.home() / ".cache" / "huggingface" / "token"
|
|
26
|
+
hf_token = hf_token_pth.read_text(encoding="utf-8").strip() if hf_token_pth.exists() else None
|
|
27
|
+
|
|
44
28
|
|
|
45
29
|
class TOKENIZER_TYPE(IntEnum):
|
|
46
30
|
SPM = auto()
|
|
@@ -49,20 +33,49 @@ class TOKENIZER_TYPE(IntEnum):
|
|
|
49
33
|
UGM = auto()
|
|
50
34
|
|
|
51
35
|
|
|
36
|
+
DOC_STRING = """
|
|
37
|
+
This script downloads the tokenizer models of the specified models from Huggingface and
|
|
38
|
+
generates the get_vocab_base_pre() function for convert_hf_to_gguf.py
|
|
39
|
+
|
|
40
|
+
/!\\ It is intended to be used by contributors and is not meant to be run by end users
|
|
41
|
+
|
|
42
|
+
This is necessary in order to analyze the type of pre-tokenizer used by the model and
|
|
43
|
+
provide the necessary information to llama.cpp via the GGUF header in order to implement
|
|
44
|
+
the same pre-tokenizer.
|
|
45
|
+
|
|
46
|
+
ref: https://github.com/ggml-org/llama.cpp/pull/6920
|
|
47
|
+
|
|
48
|
+
Instructions:
|
|
49
|
+
|
|
50
|
+
- Add a new model to the "models" list
|
|
51
|
+
- Run the script with your huggingface token
|
|
52
|
+
By default, token will be read from ~/.cache/huggingface/token
|
|
53
|
+
- The convert_hf_to_gguf.py script will have had its get_vocab_base_pre() function updated
|
|
54
|
+
- Update llama.cpp with the new pre-tokenizer if necessary
|
|
55
|
+
"""
|
|
56
|
+
# TODO: generate tokenizer tests for llama.cpp
|
|
57
|
+
|
|
58
|
+
parser = argparse.ArgumentParser(description=DOC_STRING, formatter_class=argparse.RawTextHelpFormatter)
|
|
59
|
+
parser.add_argument(
|
|
60
|
+
"--full", action="store_true",
|
|
61
|
+
help="download full list of models - make sure you have access to all of them",
|
|
62
|
+
)
|
|
63
|
+
parser.add_argument(
|
|
64
|
+
"hf_token",
|
|
65
|
+
help="optional HF token",
|
|
66
|
+
nargs="?",
|
|
67
|
+
)
|
|
68
|
+
args = parser.parse_args()
|
|
69
|
+
hf_token = args.hf_token if args.hf_token is not None else hf_token
|
|
70
|
+
|
|
71
|
+
if hf_token is None:
|
|
72
|
+
logger.error("HF token is required. Please provide it as an argument or set it in ~/.cache/huggingface/token")
|
|
73
|
+
sys.exit(1)
|
|
74
|
+
|
|
52
75
|
# TODO: this string has to exercise as much pre-tokenizer functionality as possible
|
|
53
76
|
# will be updated with time - contributions welcome
|
|
54
77
|
CHK_TXT = '\n \n\n \n\n\n \t \t\t \t\n \n \n \n \n🚀 (normal) 😶🌫️ (multiple emojis concatenated) ✅ 🦙🦙 3 33 333 3333 33333 333333 3333333 33333333 3.3 3..3 3...3 កាន់តែពិសេសអាច😁 ?我想在apple工作1314151天~ ------======= нещо на Български \'\'\'\'\'\'```````\"\"\"\"......!!!!!!?????? I\'ve been \'told he\'s there, \'RE you sure? \'M not sure I\'ll make it, \'D you like some tea? We\'Ve a\'lL'
|
|
55
78
|
|
|
56
|
-
if len(sys.argv) == 2:
|
|
57
|
-
token = sys.argv[1]
|
|
58
|
-
if not token.startswith("hf_"):
|
|
59
|
-
logger.info("Huggingface token seems invalid")
|
|
60
|
-
logger.info("Usage: python convert_hf_to_gguf_update.py <huggingface_token>")
|
|
61
|
-
sys.exit(1)
|
|
62
|
-
else:
|
|
63
|
-
logger.info("Usage: python convert_hf_to_gguf_update.py <huggingface_token>")
|
|
64
|
-
sys.exit(1)
|
|
65
|
-
|
|
66
79
|
# TODO: add models here, base models preferred
|
|
67
80
|
models = [
|
|
68
81
|
{"name": "llama-spm", "tokt": TOKENIZER_TYPE.SPM, "repo": "https://huggingface.co/meta-llama/Llama-2-7b-hf", },
|
|
@@ -103,7 +116,6 @@ models = [
|
|
|
103
116
|
{"name": "exaone", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct", },
|
|
104
117
|
{"name": "phi-2", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/microsoft/phi-2", },
|
|
105
118
|
{"name": "chameleon", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/facebook/chameleon-7b", },
|
|
106
|
-
{"name": "minerva-7b", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/sapienzanlp/Minerva-7B-base-v1.0", },
|
|
107
119
|
{"name": "roberta-bpe", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/sentence-transformers/stsb-roberta-base"},
|
|
108
120
|
{"name": "gigachat", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/ai-sage/GigaChat-20B-A3B-instruct"},
|
|
109
121
|
{"name": "megrez", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/Infinigence/Megrez-3B-Instruct"},
|
|
@@ -114,11 +126,19 @@ models = [
|
|
|
114
126
|
{"name": "trillion", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/trillionlabs/Trillion-7B-preview", },
|
|
115
127
|
{"name": "bailingmoe", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/inclusionAI/Ling-lite", },
|
|
116
128
|
{"name": "llama4", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/meta-llama/Llama-4-Scout-17B-16E-Instruct", },
|
|
117
|
-
{"name": "glm4", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/THUDM/glm-4-9b-hf", },
|
|
118
129
|
{"name": "pixtral", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/mistral-community/pixtral-12b", },
|
|
119
130
|
{"name": "seed-coder", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/ByteDance-Seed/Seed-Coder-8B-Base", },
|
|
120
131
|
]
|
|
121
132
|
|
|
133
|
+
# some models are known to be broken upstream, so we will skip them as exceptions
|
|
134
|
+
pre_computed_hashes = [
|
|
135
|
+
# chatglm-bpe has 2 hashes, why?
|
|
136
|
+
{"name": "chatglm-bpe", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/THUDM/glm-4-9b-chat", "chkhsh": "b6e8e1518dc4305be2fe39c313ed643381c4da5db34a98f6a04c093f8afbe99b"},
|
|
137
|
+
{"name": "chatglm-bpe", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/THUDM/glm-4-9b-chat", "chkhsh": "81d72c7348a9f0ebe86f23298d37debe0a5e71149e29bd283904c02262b27516"},
|
|
138
|
+
{"name": "glm4", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/THUDM/glm-4-9b-hf", "chkhsh": "a1336059768a55c99a734006ffb02203cd450fed003e9a71886c88acf24fdbc2"},
|
|
139
|
+
{"name": "minerva-7b", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/sapienzanlp/Minerva-7B-base-v1.0", "chkhsh": "1431a23e583c97432bc230bff598d103ddb5a1f89960c8f1d1051aaa944d0b35"},
|
|
140
|
+
]
|
|
141
|
+
|
|
122
142
|
|
|
123
143
|
def download_file_with_auth(url, token, save_path):
|
|
124
144
|
headers = {"Authorization": f"Bearer {token}"}
|
|
@@ -169,9 +189,29 @@ def download_model(model):
|
|
|
169
189
|
if os.path.isfile(save_path):
|
|
170
190
|
logger.info(f"{name}: File {save_path} already exists - skipping")
|
|
171
191
|
continue
|
|
172
|
-
download_file_with_auth(f"{repo}/resolve/main/{file}",
|
|
192
|
+
download_file_with_auth(f"{repo}/resolve/main/{file}", hf_token, save_path)
|
|
193
|
+
|
|
194
|
+
|
|
195
|
+
# get list of existing models and chkhsh from the convert_hf_to_gguf.py file
|
|
196
|
+
# returns mapping res --> chkhsh
|
|
197
|
+
def get_existing_models(convert_py):
|
|
198
|
+
pattern = r'if chkhsh == "([a-f0-9]{64})":\s*\n\s*.*\s*res = "([^"]+)"'
|
|
199
|
+
matches = re.findall(pattern, convert_py)
|
|
200
|
+
output = {}
|
|
201
|
+
for chkhsh, res in matches:
|
|
202
|
+
output[res] = chkhsh
|
|
203
|
+
return output
|
|
204
|
+
|
|
173
205
|
|
|
206
|
+
existing_models = {}
|
|
207
|
+
all_models = models.copy()
|
|
208
|
+
if not args.full:
|
|
209
|
+
# Filter out models that already exist in convert_hf_to_gguf.py
|
|
210
|
+
existing_models = get_existing_models(convert_py)
|
|
211
|
+
all_models = models.copy()
|
|
212
|
+
models = [model for model in all_models if model["name"] not in existing_models]
|
|
174
213
|
|
|
214
|
+
logging.info(f"Downloading {len(models)} models...")
|
|
175
215
|
for model in models:
|
|
176
216
|
try:
|
|
177
217
|
download_model(model)
|
|
@@ -182,9 +222,10 @@ for model in models:
|
|
|
182
222
|
# generate the source code for the convert_hf_to_gguf.py:get_vocab_base_pre() function:
|
|
183
223
|
|
|
184
224
|
src_ifs = ""
|
|
185
|
-
for model in
|
|
225
|
+
for model in [*all_models, *pre_computed_hashes]:
|
|
186
226
|
name = model["name"]
|
|
187
227
|
tokt = model["tokt"]
|
|
228
|
+
chkhsh = model.get("chkhsh")
|
|
188
229
|
|
|
189
230
|
if tokt == TOKENIZER_TYPE.SPM or tokt == TOKENIZER_TYPE.UGM:
|
|
190
231
|
continue
|
|
@@ -195,35 +236,44 @@ for model in models:
|
|
|
195
236
|
continue
|
|
196
237
|
|
|
197
238
|
# create the tokenizer
|
|
198
|
-
|
|
199
|
-
if
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
logger.info("
|
|
221
|
-
|
|
222
|
-
logger.info("
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
239
|
+
if chkhsh is not None:
|
|
240
|
+
# if the model has a pre-computed hash, use it
|
|
241
|
+
logger.info(f"Using pre-computed hash for model {name}: {chkhsh}")
|
|
242
|
+
elif name in existing_models:
|
|
243
|
+
# if the model already exists in convert_hf_to_gguf.py, skip compute hash
|
|
244
|
+
chkhsh = existing_models[name]
|
|
245
|
+
else:
|
|
246
|
+
# otherwise, compute the hash of the tokenizer
|
|
247
|
+
try:
|
|
248
|
+
logger.info(f"Loading tokenizer from {f'models/tokenizers/{name}'}...")
|
|
249
|
+
if name == "t5":
|
|
250
|
+
tokenizer = AutoTokenizer.from_pretrained(f"models/tokenizers/{name}", use_fast=False)
|
|
251
|
+
else:
|
|
252
|
+
tokenizer = AutoTokenizer.from_pretrained(f"models/tokenizers/{name}")
|
|
253
|
+
except OSError as e:
|
|
254
|
+
logger.error(f"Error loading tokenizer for model {name}. The model may not exist or is not accessible with the provided token. Error: {e}")
|
|
255
|
+
continue # Skip to the next model if the tokenizer can't be loaded
|
|
256
|
+
|
|
257
|
+
chktok = tokenizer.encode(CHK_TXT)
|
|
258
|
+
chkhsh = sha256(str(chktok).encode()).hexdigest()
|
|
259
|
+
|
|
260
|
+
logger.info(f"model: {name}")
|
|
261
|
+
logger.info(f"tokt: {tokt}")
|
|
262
|
+
logger.info(f"repo: {model['repo']}")
|
|
263
|
+
logger.info(f"chktok: {chktok}")
|
|
264
|
+
logger.info(f"chkhsh: {chkhsh}")
|
|
265
|
+
|
|
266
|
+
# print the "pre_tokenizer" content from the tokenizer.json
|
|
267
|
+
with open(f"models/tokenizers/{name}/tokenizer.json", "r", encoding="utf-8") as f:
|
|
268
|
+
cfg = json.load(f)
|
|
269
|
+
normalizer = cfg["normalizer"]
|
|
270
|
+
logger.info("normalizer: " + json.dumps(normalizer, indent=4))
|
|
271
|
+
pre_tokenizer = cfg["pre_tokenizer"]
|
|
272
|
+
logger.info("pre_tokenizer: " + json.dumps(pre_tokenizer, indent=4))
|
|
273
|
+
if "ignore_merges" in cfg["model"]:
|
|
274
|
+
logger.info("ignore_merges: " + json.dumps(cfg["model"]["ignore_merges"], indent=4))
|
|
275
|
+
|
|
276
|
+
logger.info("")
|
|
227
277
|
|
|
228
278
|
src_ifs += f" if chkhsh == \"{chkhsh}\":\n"
|
|
229
279
|
src_ifs += f" # ref: {model['repo']}\n"
|
|
@@ -271,8 +321,6 @@ src_func = f"""
|
|
|
271
321
|
return res
|
|
272
322
|
"""
|
|
273
323
|
|
|
274
|
-
convert_py_pth = pathlib.Path("convert_hf_to_gguf.py")
|
|
275
|
-
convert_py = convert_py_pth.read_text(encoding="utf-8")
|
|
276
324
|
convert_py = re.sub(
|
|
277
325
|
r"(# Marker: Start get_vocab_base_pre)(.+?)( +# Marker: End get_vocab_base_pre)",
|
|
278
326
|
lambda m: m.group(1) + src_func + m.group(3),
|
|
@@ -288,7 +336,7 @@ logger.info("+++ convert_hf_to_gguf.py was updated")
|
|
|
288
336
|
|
|
289
337
|
tests = [
|
|
290
338
|
"ied 4 ½ months",
|
|
291
|
-
"
|
|
339
|
+
"Äpfel",
|
|
292
340
|
"",
|
|
293
341
|
" ",
|
|
294
342
|
" ",
|
|
@@ -367,6 +415,10 @@ for model in models:
|
|
|
367
415
|
logger.error(f"Failed to load tokenizer for model {name}. Error: {e}")
|
|
368
416
|
continue # Skip this model and continue with the next one in the loop
|
|
369
417
|
|
|
418
|
+
if not os.path.exists(f"models/ggml-vocab-{name}.gguf"):
|
|
419
|
+
logger.info(f"Skip vocab files for model {name}, no GGUF file found")
|
|
420
|
+
continue
|
|
421
|
+
|
|
370
422
|
with open(f"models/ggml-vocab-{name}.gguf.inp", "w", encoding="utf-8") as f:
|
|
371
423
|
for text in tests:
|
|
372
424
|
f.write(f"{text}")
|
|
@@ -129,6 +129,7 @@ option(GGML_LASX "ggml: enable lasx" ON)
|
|
|
129
129
|
option(GGML_LSX "ggml: enable lsx" ON)
|
|
130
130
|
option(GGML_RVV "ggml: enable rvv" ON)
|
|
131
131
|
option(GGML_RV_ZFH "ggml: enable riscv zfh" OFF)
|
|
132
|
+
option(GGML_XTHEADVECTOR "ggml: enable xtheadvector" OFF)
|
|
132
133
|
option(GGML_VXE "ggml: enable vxe" ON)
|
|
133
134
|
|
|
134
135
|
option(GGML_CPU_ALL_VARIANTS "ggml: build all variants of the CPU backend (requires GGML_BACKEND_DL)" OFF)
|
|
@@ -176,7 +177,6 @@ option(GGML_VULKAN_CHECK_RESULTS "ggml: run Vulkan op checks"
|
|
|
176
177
|
option(GGML_VULKAN_DEBUG "ggml: enable Vulkan debug output" OFF)
|
|
177
178
|
option(GGML_VULKAN_MEMORY_DEBUG "ggml: enable Vulkan memory debug output" OFF)
|
|
178
179
|
option(GGML_VULKAN_SHADER_DEBUG_INFO "ggml: enable Vulkan shader debug info" OFF)
|
|
179
|
-
option(GGML_VULKAN_PERF "ggml: enable Vulkan perf output" OFF)
|
|
180
180
|
option(GGML_VULKAN_VALIDATE "ggml: enable Vulkan validation" OFF)
|
|
181
181
|
option(GGML_VULKAN_RUN_TESTS "ggml: run Vulkan tests" OFF)
|
|
182
182
|
option(GGML_KOMPUTE "ggml: use Kompute" OFF)
|
|
@@ -193,6 +193,7 @@ option(GGML_RPC "ggml: use RPC"
|
|
|
193
193
|
option(GGML_SYCL "ggml: use SYCL" OFF)
|
|
194
194
|
option(GGML_SYCL_F16 "ggml: use 16 bit floats for sycl calculations" OFF)
|
|
195
195
|
option(GGML_SYCL_GRAPH "ggml: enable graphs in the SYCL backend" ON)
|
|
196
|
+
option(GGML_SYCL_DNN "ggml: enable oneDNN in the SYCL backend" ON)
|
|
196
197
|
set (GGML_SYCL_TARGET "INTEL" CACHE STRING
|
|
197
198
|
"ggml: sycl target device")
|
|
198
199
|
set (GGML_SYCL_DEVICE_ARCH "" CACHE STRING
|
|
@@ -24,3 +24,28 @@ function(ggml_get_flags CCID CCVER)
|
|
|
24
24
|
set(GF_C_FLAGS ${C_FLAGS} PARENT_SCOPE)
|
|
25
25
|
set(GF_CXX_FLAGS ${CXX_FLAGS} PARENT_SCOPE)
|
|
26
26
|
endfunction()
|
|
27
|
+
|
|
28
|
+
function(ggml_get_system_arch)
|
|
29
|
+
if (CMAKE_OSX_ARCHITECTURES STREQUAL "arm64" OR
|
|
30
|
+
CMAKE_GENERATOR_PLATFORM_LWR STREQUAL "arm64" OR
|
|
31
|
+
(NOT CMAKE_OSX_ARCHITECTURES AND NOT CMAKE_GENERATOR_PLATFORM_LWR AND
|
|
32
|
+
CMAKE_SYSTEM_PROCESSOR MATCHES "^(aarch64|arm.*|ARM64)$"))
|
|
33
|
+
set(GGML_SYSTEM_ARCH "ARM" PARENT_SCOPE)
|
|
34
|
+
elseif (CMAKE_OSX_ARCHITECTURES STREQUAL "x86_64" OR
|
|
35
|
+
CMAKE_GENERATOR_PLATFORM_LWR MATCHES "^(x86_64|i686|amd64|x64|win32)$" OR
|
|
36
|
+
(NOT CMAKE_OSX_ARCHITECTURES AND NOT CMAKE_GENERATOR_PLATFORM_LWR AND
|
|
37
|
+
CMAKE_SYSTEM_PROCESSOR MATCHES "^(x86_64|i686|AMD64|amd64)$"))
|
|
38
|
+
set(GGML_SYSTEM_ARCH "x86" PARENT_SCOPE)
|
|
39
|
+
elseif ("${CMAKE_SYSTEM_PROCESSOR} " STREQUAL "ppc64le " OR
|
|
40
|
+
"${CMAKE_SYSTEM_PROCESSOR} " STREQUAL "powerpc ")
|
|
41
|
+
set(GGML_SYSTEM_ARCH "PowerPC" PARENT_SCOPE)
|
|
42
|
+
elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "loongarch64")
|
|
43
|
+
set(GGML_SYSTEM_ARCH "loongarch64" PARENT_SCOPE)
|
|
44
|
+
elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "riscv64")
|
|
45
|
+
set(GGML_SYSTEM_ARCH "riscv64" PARENT_SCOPE)
|
|
46
|
+
elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "s390x")
|
|
47
|
+
set(GGML_SYSTEM_ARCH "s390x" PARENT_SCOPE)
|
|
48
|
+
else()
|
|
49
|
+
set(GGML_SYSTEM_ARCH "UNKNOWN" PARENT_SCOPE)
|
|
50
|
+
endif()
|
|
51
|
+
endfunction()
|
|
@@ -37,13 +37,16 @@ extern "C" {
|
|
|
37
37
|
// ====== Dataset ======
|
|
38
38
|
|
|
39
39
|
GGML_API ggml_opt_dataset_t ggml_opt_dataset_init(
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
int64_t
|
|
43
|
-
int64_t
|
|
40
|
+
enum ggml_type type_data, // the type for the internal data tensor
|
|
41
|
+
enum ggml_type type_label, // the type for the internal labels tensor
|
|
42
|
+
int64_t ne_datapoint, // number of elements per datapoint
|
|
43
|
+
int64_t ne_label, // number of elements per label
|
|
44
|
+
int64_t ndata, // total number of datapoints/labels
|
|
45
|
+
int64_t ndata_shard); // number of datapoints/labels per shard (unit at which the dataset is shuffled/copied)
|
|
44
46
|
GGML_API void ggml_opt_dataset_free(ggml_opt_dataset_t dataset);
|
|
45
47
|
|
|
46
48
|
// get underlying tensors that store the data
|
|
49
|
+
GGML_API int64_t ggml_opt_dataset_ndata (ggml_opt_dataset_t dataset);
|
|
47
50
|
GGML_API struct ggml_tensor * ggml_opt_dataset_data (ggml_opt_dataset_t dataset); // shape = [ne_datapoint, ndata]
|
|
48
51
|
GGML_API struct ggml_tensor * ggml_opt_dataset_labels(ggml_opt_dataset_t dataset); // shape = [nd_label, ndata]
|
|
49
52
|
|
|
@@ -56,13 +59,19 @@ extern "C" {
|
|
|
56
59
|
struct ggml_tensor * data_batch, // shape = [ne_datapoint, ndata_batch]
|
|
57
60
|
struct ggml_tensor * labels_batch, // shape = [ne_label, ndata_batch]
|
|
58
61
|
int64_t ibatch);
|
|
62
|
+
GGML_API void ggml_opt_dataset_get_batch_host(
|
|
63
|
+
ggml_opt_dataset_t dataset,
|
|
64
|
+
void * data_batch,
|
|
65
|
+
size_t nb_data_batch,
|
|
66
|
+
void * labels_batch,
|
|
67
|
+
int64_t ibatch);
|
|
59
68
|
|
|
60
69
|
// ====== Model / Context ======
|
|
61
70
|
|
|
62
71
|
enum ggml_opt_build_type {
|
|
63
|
-
GGML_OPT_BUILD_TYPE_FORWARD,
|
|
64
|
-
GGML_OPT_BUILD_TYPE_GRAD,
|
|
65
|
-
GGML_OPT_BUILD_TYPE_OPT,
|
|
72
|
+
GGML_OPT_BUILD_TYPE_FORWARD = 10,
|
|
73
|
+
GGML_OPT_BUILD_TYPE_GRAD = 20,
|
|
74
|
+
GGML_OPT_BUILD_TYPE_OPT = 30,
|
|
66
75
|
};
|
|
67
76
|
|
|
68
77
|
// parameters that control which optimizer is used and how said optimizer tries to find the minimal loss
|
|
@@ -81,20 +90,22 @@ extern "C" {
|
|
|
81
90
|
// userdata can be used to pass arbitrary data
|
|
82
91
|
typedef struct ggml_opt_optimizer_params (*ggml_opt_get_optimizer_params)(void * userdata);
|
|
83
92
|
|
|
84
|
-
// returns the default optimizer params (constant)
|
|
93
|
+
// returns the default optimizer params (constant, hard-coded values)
|
|
85
94
|
// userdata is not used
|
|
86
95
|
GGML_API struct ggml_opt_optimizer_params ggml_opt_get_default_optimizer_params(void * userdata);
|
|
87
96
|
|
|
97
|
+
// casts userdata to ggml_opt_optimizer_params and returns it
|
|
98
|
+
GGML_API struct ggml_opt_optimizer_params ggml_opt_get_constant_optimizer_params(void * userdata);
|
|
99
|
+
|
|
88
100
|
// parameters for initializing a new optimization context
|
|
89
101
|
struct ggml_opt_params {
|
|
90
102
|
ggml_backend_sched_t backend_sched; // defines which backends are used to construct the compute graphs
|
|
91
103
|
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
struct ggml_tensor
|
|
97
|
-
struct ggml_tensor * outputs;
|
|
104
|
+
// by default the forward graph needs to be reconstructed for each eval
|
|
105
|
+
// if ctx_compute, inputs, and outputs are set the graphs are instead allocated statically
|
|
106
|
+
struct ggml_context * ctx_compute;
|
|
107
|
+
struct ggml_tensor * inputs;
|
|
108
|
+
struct ggml_tensor * outputs;
|
|
98
109
|
|
|
99
110
|
enum ggml_opt_loss_type loss_type;
|
|
100
111
|
enum ggml_opt_build_type build_type;
|
|
@@ -107,12 +118,9 @@ extern "C" {
|
|
|
107
118
|
|
|
108
119
|
// get parameters for an optimization context with defaults set where possible
|
|
109
120
|
// parameters for which no sensible defaults exist are supplied as arguments to this function
|
|
110
|
-
GGML_API ggml_opt_params ggml_opt_default_params(
|
|
111
|
-
ggml_backend_sched_t
|
|
112
|
-
|
|
113
|
-
struct ggml_tensor * inputs,
|
|
114
|
-
struct ggml_tensor * outputs,
|
|
115
|
-
enum ggml_opt_loss_type loss_type);
|
|
121
|
+
GGML_API struct ggml_opt_params ggml_opt_default_params(
|
|
122
|
+
ggml_backend_sched_t backend_sched,
|
|
123
|
+
enum ggml_opt_loss_type loss_type);
|
|
116
124
|
|
|
117
125
|
GGML_API ggml_opt_context_t ggml_opt_init(struct ggml_opt_params params);
|
|
118
126
|
GGML_API void ggml_opt_free(ggml_opt_context_t opt_ctx);
|
|
@@ -120,7 +128,10 @@ extern "C" {
|
|
|
120
128
|
// set gradients to zero, initilize loss, and optionally reset the optimizer
|
|
121
129
|
GGML_API void ggml_opt_reset(ggml_opt_context_t opt_ctx, bool optimizer);
|
|
122
130
|
|
|
131
|
+
GGML_API bool ggml_opt_static_graphs(ggml_opt_context_t opt_ctx); // whether the graphs are allocated_statically
|
|
132
|
+
|
|
123
133
|
// get underlying tensors that store data
|
|
134
|
+
// if not using static graphs these pointers become invalid with the next call to ggml_opt_alloc
|
|
124
135
|
GGML_API struct ggml_tensor * ggml_opt_inputs( ggml_opt_context_t opt_ctx); // forward graph input tensor
|
|
125
136
|
GGML_API struct ggml_tensor * ggml_opt_outputs( ggml_opt_context_t opt_ctx); // forward graph output tensor
|
|
126
137
|
GGML_API struct ggml_tensor * ggml_opt_labels( ggml_opt_context_t opt_ctx); // labels to compare outputs against
|
|
@@ -128,11 +139,12 @@ extern "C" {
|
|
|
128
139
|
GGML_API struct ggml_tensor * ggml_opt_pred( ggml_opt_context_t opt_ctx); // predictions made by outputs
|
|
129
140
|
GGML_API struct ggml_tensor * ggml_opt_ncorrect(ggml_opt_context_t opt_ctx); // number of matching predictions between outputs and labels
|
|
130
141
|
|
|
142
|
+
// get the gradient accumulator for a node from the forward graph
|
|
131
143
|
GGML_API struct ggml_tensor * ggml_opt_grad_acc(ggml_opt_context_t opt_ctx, struct ggml_tensor * node);
|
|
132
144
|
|
|
133
145
|
// ====== Optimization Result ======
|
|
134
146
|
|
|
135
|
-
GGML_API ggml_opt_result_t ggml_opt_result_init();
|
|
147
|
+
GGML_API ggml_opt_result_t ggml_opt_result_init(void);
|
|
136
148
|
GGML_API void ggml_opt_result_free(ggml_opt_result_t result);
|
|
137
149
|
GGML_API void ggml_opt_result_reset(ggml_opt_result_t result);
|
|
138
150
|
|
|
@@ -144,11 +156,20 @@ extern "C" {
|
|
|
144
156
|
|
|
145
157
|
// ====== Computation ======
|
|
146
158
|
|
|
147
|
-
//
|
|
148
|
-
GGML_API void
|
|
159
|
+
// if not using static graphs, this function must be called prior to ggml_opt_alloc
|
|
160
|
+
GGML_API void ggml_opt_prepare_alloc(
|
|
161
|
+
ggml_opt_context_t opt_ctx,
|
|
162
|
+
struct ggml_context * ctx_compute,
|
|
163
|
+
struct ggml_cgraph * gf,
|
|
164
|
+
struct ggml_tensor * inputs,
|
|
165
|
+
struct ggml_tensor * outputs);
|
|
166
|
+
|
|
167
|
+
// allocate the next graph for evaluation, either forward or forward + backward
|
|
168
|
+
// must be called exactly once prior to calling ggml_opt_eval
|
|
169
|
+
GGML_API void ggml_opt_alloc(ggml_opt_context_t opt_ctx, bool backward);
|
|
149
170
|
|
|
150
|
-
// do forward pass, increment result if not NULL, do backward pass
|
|
151
|
-
GGML_API void
|
|
171
|
+
// do forward pass, increment result if not NULL, do backward pass if allocated
|
|
172
|
+
GGML_API void ggml_opt_eval(ggml_opt_context_t opt_ctx, ggml_opt_result_t result);
|
|
152
173
|
|
|
153
174
|
// ############################################################################
|
|
154
175
|
// ## The high-level functions start here. They do not depend on any private ##
|
|
@@ -200,9 +221,9 @@ extern "C" {
|
|
|
200
221
|
// fit model defined by inputs and outputs to dataset
|
|
201
222
|
GGML_API void ggml_opt_fit(
|
|
202
223
|
ggml_backend_sched_t backend_sched, // backend scheduler for constructing the compute graphs
|
|
203
|
-
ggml_context
|
|
204
|
-
ggml_tensor
|
|
205
|
-
ggml_tensor
|
|
224
|
+
struct ggml_context * ctx_compute, // context with temporarily allocated tensors to calculate the outputs
|
|
225
|
+
struct ggml_tensor * inputs, // input tensor with shape [ne_datapoint, ndata_batch]
|
|
226
|
+
struct ggml_tensor * outputs, // output tensor, must have shape [ne_label, ndata_batch] if labels are used
|
|
206
227
|
ggml_opt_dataset_t dataset, // dataset with data and optionally also labels
|
|
207
228
|
enum ggml_opt_loss_type loss_type, // loss to minimize
|
|
208
229
|
ggml_opt_get_optimizer_params get_opt_pars, // callback to get optimizer params, userdata is pointer to epoch (of type int64_t)
|
|
@@ -536,6 +536,7 @@ extern "C" {
|
|
|
536
536
|
GGML_UNARY_OP_HARDSWISH,
|
|
537
537
|
GGML_UNARY_OP_HARDSIGMOID,
|
|
538
538
|
GGML_UNARY_OP_EXP,
|
|
539
|
+
GGML_UNARY_OP_GELU_ERF,
|
|
539
540
|
|
|
540
541
|
GGML_UNARY_OP_COUNT,
|
|
541
542
|
};
|
|
@@ -768,7 +769,7 @@ extern "C" {
|
|
|
768
769
|
// Tensor flags
|
|
769
770
|
GGML_API void ggml_set_input(struct ggml_tensor * tensor);
|
|
770
771
|
GGML_API void ggml_set_output(struct ggml_tensor * tensor);
|
|
771
|
-
GGML_API void ggml_set_param(struct
|
|
772
|
+
GGML_API void ggml_set_param(struct ggml_tensor * tensor);
|
|
772
773
|
GGML_API void ggml_set_loss(struct ggml_tensor * tensor);
|
|
773
774
|
|
|
774
775
|
//
|
|
@@ -934,11 +935,20 @@ extern "C" {
|
|
|
934
935
|
struct ggml_tensor * a,
|
|
935
936
|
struct ggml_tensor * b);
|
|
936
937
|
|
|
938
|
+
// repeat a to the specified shape
|
|
939
|
+
GGML_API struct ggml_tensor * ggml_repeat_4d(
|
|
940
|
+
struct ggml_context * ctx,
|
|
941
|
+
struct ggml_tensor * a,
|
|
942
|
+
int64_t ne0,
|
|
943
|
+
int64_t ne1,
|
|
944
|
+
int64_t ne2,
|
|
945
|
+
int64_t ne3);
|
|
946
|
+
|
|
937
947
|
// sums repetitions in a into shape of b
|
|
938
948
|
GGML_API struct ggml_tensor * ggml_repeat_back(
|
|
939
949
|
struct ggml_context * ctx,
|
|
940
950
|
struct ggml_tensor * a,
|
|
941
|
-
struct ggml_tensor * b);
|
|
951
|
+
struct ggml_tensor * b); // sum up values that are adjacent in dims > 0 instead of repeated with same stride
|
|
942
952
|
|
|
943
953
|
// concat a and b along dim
|
|
944
954
|
// used in stable-diffusion
|
|
@@ -1024,6 +1034,16 @@ extern "C" {
|
|
|
1024
1034
|
struct ggml_context * ctx,
|
|
1025
1035
|
struct ggml_tensor * a);
|
|
1026
1036
|
|
|
1037
|
+
// GELU using erf (error function) when possible
|
|
1038
|
+
// some backends may fallback to approximation based on Abramowitz and Stegun formula
|
|
1039
|
+
GGML_API struct ggml_tensor * ggml_gelu_erf(
|
|
1040
|
+
struct ggml_context * ctx,
|
|
1041
|
+
struct ggml_tensor * a);
|
|
1042
|
+
|
|
1043
|
+
GGML_API struct ggml_tensor * ggml_gelu_erf_inplace(
|
|
1044
|
+
struct ggml_context * ctx,
|
|
1045
|
+
struct ggml_tensor * a);
|
|
1046
|
+
|
|
1027
1047
|
GGML_API struct ggml_tensor * ggml_gelu_quick(
|
|
1028
1048
|
struct ggml_context * ctx,
|
|
1029
1049
|
struct ggml_tensor * a);
|
|
@@ -2049,15 +2069,14 @@ extern "C" {
|
|
|
2049
2069
|
|
|
2050
2070
|
GGML_API void ggml_build_forward_expand(struct ggml_cgraph * cgraph, struct ggml_tensor * tensor);
|
|
2051
2071
|
GGML_API void ggml_build_backward_expand(
|
|
2052
|
-
struct ggml_context *
|
|
2053
|
-
struct
|
|
2054
|
-
struct
|
|
2055
|
-
bool accumulate); // whether or not gradients should be accumulated, requires static allocation of tensors in ctx_static
|
|
2072
|
+
struct ggml_context * ctx, // context for gradient computation
|
|
2073
|
+
struct ggml_cgraph * cgraph,
|
|
2074
|
+
struct ggml_tensor ** grad_accs);
|
|
2056
2075
|
|
|
2057
2076
|
// graph allocation in a context
|
|
2058
2077
|
GGML_API struct ggml_cgraph * ggml_new_graph (struct ggml_context * ctx); // size = GGML_DEFAULT_GRAPH_SIZE, grads = false
|
|
2059
2078
|
GGML_API struct ggml_cgraph * ggml_new_graph_custom(struct ggml_context * ctx, size_t size, bool grads);
|
|
2060
|
-
GGML_API struct ggml_cgraph * ggml_graph_dup (struct ggml_context * ctx, struct ggml_cgraph * cgraph);
|
|
2079
|
+
GGML_API struct ggml_cgraph * ggml_graph_dup (struct ggml_context * ctx, struct ggml_cgraph * cgraph, bool force_grads);
|
|
2061
2080
|
GGML_API void ggml_graph_cpy (struct ggml_cgraph * src, struct ggml_cgraph * dst);
|
|
2062
2081
|
GGML_API void ggml_graph_reset (struct ggml_cgraph * cgraph); // set regular grads + optimizer momenta to 0, set loss grad to 1
|
|
2063
2082
|
GGML_API void ggml_graph_clear (struct ggml_cgraph * cgraph);
|
|
@@ -109,6 +109,8 @@ if (MSVC)
|
|
|
109
109
|
else ()
|
|
110
110
|
set(CMAKE_GENERATOR_PLATFORM_LWR "")
|
|
111
111
|
endif ()
|
|
112
|
+
ggml_get_system_arch()
|
|
113
|
+
message(STATUS "GGML_SYSTEM_ARCH: ${GGML_SYSTEM_ARCH}")
|
|
112
114
|
|
|
113
115
|
if (NOT MSVC)
|
|
114
116
|
if (GGML_STATIC)
|
|
@@ -287,16 +289,20 @@ if (GGML_CPU_ALL_VARIANTS)
|
|
|
287
289
|
if (NOT GGML_BACKEND_DL)
|
|
288
290
|
message(FATAL_ERROR "GGML_CPU_ALL_VARIANTS requires GGML_BACKEND_DL")
|
|
289
291
|
endif()
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
292
|
+
if (GGML_SYSTEM_ARCH STREQUAL "x86")
|
|
293
|
+
ggml_add_cpu_backend_variant(x64)
|
|
294
|
+
ggml_add_cpu_backend_variant(sse42 SSE42)
|
|
295
|
+
ggml_add_cpu_backend_variant(sandybridge SSE42 AVX)
|
|
296
|
+
ggml_add_cpu_backend_variant(haswell SSE42 AVX F16C AVX2 BMI2 FMA)
|
|
297
|
+
ggml_add_cpu_backend_variant(skylakex SSE42 AVX F16C AVX2 BMI2 FMA AVX512)
|
|
298
|
+
ggml_add_cpu_backend_variant(icelake SSE42 AVX F16C AVX2 BMI2 FMA AVX512 AVX512_VBMI AVX512_VNNI)
|
|
299
|
+
ggml_add_cpu_backend_variant(alderlake SSE42 AVX F16C AVX2 BMI2 FMA AVX_VNNI)
|
|
300
|
+
if (NOT MSVC)
|
|
301
|
+
# MSVC doesn't support AMX
|
|
302
|
+
ggml_add_cpu_backend_variant(sapphirerapids SSE42 AVX F16C AVX2 BMI2 FMA AVX512 AVX512_VBMI AVX512_VNNI AVX512_BF16 AMX_TILE AMX_INT8)
|
|
303
|
+
endif()
|
|
304
|
+
else()
|
|
305
|
+
message(FATAL_ERROR "GGML_CPU_ALL_VARIANTS not yet supported on ${GGML_SYSTEM_ARCH}")
|
|
300
306
|
endif()
|
|
301
307
|
elseif (GGML_CPU)
|
|
302
308
|
ggml_add_cpu_backend_variant_impl("")
|
|
@@ -1111,7 +1111,7 @@ static void ggml_backend_sched_split_graph(ggml_backend_sched_t sched, struct gg
|
|
|
1111
1111
|
|
|
1112
1112
|
const int node_backend_id = tensor_backend_id(node);
|
|
1113
1113
|
|
|
1114
|
-
assert(node_backend_id != -1); // all nodes should be assigned by now
|
|
1114
|
+
assert(node_backend_id != -1); // all nodes should be assigned by now, this can happen if there is no CPU fallback
|
|
1115
1115
|
|
|
1116
1116
|
// check if we should start a new split based on the sources of the current node
|
|
1117
1117
|
bool need_new_split = false;
|
|
@@ -1598,6 +1598,9 @@ void ggml_backend_sched_synchronize(ggml_backend_sched_t sched) {
|
|
|
1598
1598
|
for (int i = 0; i < sched->n_backends; i++) {
|
|
1599
1599
|
ggml_backend_synchronize(sched->backends[i]);
|
|
1600
1600
|
}
|
|
1601
|
+
// reset the current copy to 0 so that the graphs will be similar during generation
|
|
1602
|
+
// necessary for CUDA graphs
|
|
1603
|
+
sched->cur_copy = 0;
|
|
1601
1604
|
}
|
|
1602
1605
|
|
|
1603
1606
|
void ggml_backend_sched_set_eval_callback(ggml_backend_sched_t sched, ggml_backend_sched_eval_callback callback, void * user_data) {
|
|
@@ -30,6 +30,7 @@ string(TOLOWER ${SOC_TYPE} SOC_VERSION) # SOC_VERSION need lower
|
|
|
30
30
|
string(REGEX MATCH "[0-9]+[a-zA-Z]" SOC_TYPE_MAJOR_SN "${SOC_VERSION}")
|
|
31
31
|
set(SOC_TYPE_COMPILE_OPTION "ASCEND_${SOC_TYPE_MAJOR_SN}")
|
|
32
32
|
string(TOUPPER ${SOC_TYPE_COMPILE_OPTION} SOC_TYPE_COMPILE_OPTION)
|
|
33
|
+
message(STATUS "CANN: SOC_VERSION = ${SOC_VERSION}")
|
|
33
34
|
|
|
34
35
|
if (CANN_INSTALL_DIR)
|
|
35
36
|
# Only Support Linux.
|