@novastera-oss/llamarn 0.2.1 → 0.2.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +80 -14
- package/RNLlamaCpp.podspec +10 -3
- package/android/CMakeLists.txt +8 -0
- package/android/src/main/cpp/include/llama.h +62 -125
- package/android/src/main/jniLibs/arm64-v8a/libggml-base.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/libggml-cpu.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/libggml.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/libllama.so +0 -0
- package/android/src/main/jniLibs/x86_64/libggml-base.so +0 -0
- package/android/src/main/jniLibs/x86_64/libggml-cpu.so +0 -0
- package/android/src/main/jniLibs/x86_64/libggml.so +0 -0
- package/android/src/main/jniLibs/x86_64/libllama.so +0 -0
- package/cpp/PureCppImpl.cpp +9 -27
- package/cpp/SystemUtils.h +2 -2
- package/cpp/build-info.cpp +2 -2
- package/cpp/llama.cpp/README.md +11 -3
- package/cpp/llama.cpp/build-xcframework.sh +1 -0
- package/cpp/llama.cpp/common/CMakeLists.txt +8 -2
- package/cpp/llama.cpp/common/arg.cpp +153 -113
- package/cpp/llama.cpp/common/chat-parser.cpp +379 -0
- package/cpp/llama.cpp/common/chat-parser.h +117 -0
- package/cpp/llama.cpp/common/chat.cpp +847 -699
- package/cpp/llama.cpp/common/chat.h +73 -6
- package/cpp/llama.cpp/common/common.cpp +50 -82
- package/cpp/llama.cpp/common/common.h +21 -17
- package/cpp/llama.cpp/common/json-partial.cpp +255 -0
- package/cpp/llama.cpp/common/json-partial.h +37 -0
- package/cpp/llama.cpp/common/minja/chat-template.hpp +9 -5
- package/cpp/llama.cpp/common/minja/minja.hpp +69 -36
- package/cpp/llama.cpp/common/regex-partial.cpp +204 -0
- package/cpp/llama.cpp/common/regex-partial.h +56 -0
- package/cpp/llama.cpp/common/sampling.cpp +7 -8
- package/cpp/llama.cpp/convert_hf_to_gguf.py +453 -118
- package/cpp/llama.cpp/convert_hf_to_gguf_update.py +120 -68
- package/cpp/llama.cpp/ggml/CMakeLists.txt +2 -1
- package/cpp/llama.cpp/ggml/cmake/common.cmake +25 -0
- package/cpp/llama.cpp/ggml/include/ggml-opt.h +49 -28
- package/cpp/llama.cpp/ggml/include/ggml.h +26 -7
- package/cpp/llama.cpp/ggml/src/CMakeLists.txt +16 -10
- package/cpp/llama.cpp/ggml/src/ggml-backend.cpp +4 -1
- package/cpp/llama.cpp/ggml/src/ggml-cann/CMakeLists.txt +1 -0
- package/cpp/llama.cpp/ggml/src/ggml-cann/acl_tensor.cpp +2 -0
- package/cpp/llama.cpp/ggml/src/ggml-cann/aclnn_ops.cpp +604 -0
- package/cpp/llama.cpp/ggml/src/ggml-cann/aclnn_ops.h +42 -0
- package/cpp/llama.cpp/ggml/src/ggml-cann/ggml-cann.cpp +54 -2
- package/cpp/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +50 -51
- package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp +2 -2
- package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-impl.h +5 -9
- package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.c +779 -19
- package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +22 -0
- package/cpp/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.cpp +88 -5
- package/cpp/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.h +47 -12
- package/cpp/llama.cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +264 -69
- package/cpp/llama.cpp/ggml/src/ggml-cpu/ops.cpp +322 -100
- package/cpp/llama.cpp/ggml/src/ggml-cpu/simd-mappings.h +117 -1
- package/cpp/llama.cpp/ggml/src/ggml-cpu/vec.cpp +85 -16
- package/cpp/llama.cpp/ggml/src/ggml-cpu/vec.h +220 -49
- package/cpp/llama.cpp/ggml/src/ggml-cuda/acc.cu +40 -26
- package/cpp/llama.cpp/ggml/src/ggml-cuda/common.cuh +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-cuda/cpy.cu +11 -1
- package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-common.cuh +15 -7
- package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-mma-f16.cuh +266 -64
- package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-vec-f16.cuh +49 -4
- package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-vec-f32.cuh +48 -4
- package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn.cu +2 -1
- package/cpp/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu +5 -1
- package/cpp/llama.cpp/ggml/src/ggml-cuda/mmq.cu +2 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/quantize.cu +7 -6
- package/cpp/llama.cpp/ggml/src/ggml-cuda/sum.cu +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-cuda/unary.cu +10 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/unary.cuh +2 -0
- package/cpp/llama.cpp/ggml/src/ggml-impl.h +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal-impl.h +4 -0
- package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal.m +99 -17
- package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal.metal +200 -2
- package/cpp/llama.cpp/ggml/src/ggml-musa/CMakeLists.txt +8 -2
- package/cpp/llama.cpp/ggml/src/ggml-musa/mudnn.cu +112 -0
- package/cpp/llama.cpp/ggml/src/ggml-musa/mudnn.cuh +12 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/CMakeLists.txt +6 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +972 -178
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/argsort.cl +86 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/div.cl +72 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/group_norm.cl +72 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/sigmoid.cl +29 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/sub.cl +72 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/sum_rows.cl +39 -0
- package/cpp/llama.cpp/ggml/src/ggml-opt.cpp +373 -190
- package/cpp/llama.cpp/ggml/src/ggml-sycl/CMakeLists.txt +29 -23
- package/cpp/llama.cpp/ggml/src/ggml-sycl/binbcast.cpp +5 -10
- package/cpp/llama.cpp/ggml/src/ggml-sycl/common.hpp +101 -5
- package/cpp/llama.cpp/ggml/src/ggml-sycl/concat.cpp +31 -33
- package/cpp/llama.cpp/ggml/src/ggml-sycl/conv.cpp +1 -0
- package/cpp/llama.cpp/ggml/src/ggml-sycl/convert.cpp +29 -2
- package/cpp/llama.cpp/ggml/src/ggml-sycl/cpy.cpp +4 -5
- package/cpp/llama.cpp/ggml/src/ggml-sycl/dequantize.hpp +59 -21
- package/cpp/llama.cpp/ggml/src/ggml-sycl/dmmv.cpp +9 -1
- package/cpp/llama.cpp/ggml/src/ggml-sycl/element_wise.cpp +84 -72
- package/cpp/llama.cpp/ggml/src/ggml-sycl/element_wise.hpp +2 -0
- package/cpp/llama.cpp/ggml/src/ggml-sycl/gemm.hpp +37 -8
- package/cpp/llama.cpp/ggml/src/ggml-sycl/getrows.cpp +1 -3
- package/cpp/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +324 -129
- package/cpp/llama.cpp/ggml/src/ggml-sycl/gla.cpp +1 -0
- package/cpp/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +31 -2
- package/cpp/llama.cpp/ggml/src/ggml-sycl/norm.cpp +95 -68
- package/cpp/llama.cpp/ggml/src/ggml-sycl/outprod.cpp +1 -0
- package/cpp/llama.cpp/ggml/src/ggml-sycl/quants.hpp +22 -0
- package/cpp/llama.cpp/ggml/src/ggml-sycl/rope.cpp +1 -2
- package/cpp/llama.cpp/ggml/src/ggml-sycl/softmax.cpp +1 -4
- package/cpp/llama.cpp/ggml/src/ggml-sycl/tsembd.cpp +2 -3
- package/cpp/llama.cpp/ggml/src/ggml-sycl/vecdotq.hpp +69 -43
- package/cpp/llama.cpp/ggml/src/ggml-sycl/wkv.cpp +2 -14
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/CMakeLists.txt +81 -91
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +432 -181
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/CMakeLists.txt +17 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq1_m.comp +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn.comp +6 -152
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_base.comp +162 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm1.comp +360 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm2.comp +2 -118
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm.comp +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +12 -1
- package/cpp/llama.cpp/ggml/src/ggml.c +107 -36
- package/cpp/llama.cpp/ggml/src/gguf.cpp +33 -33
- package/cpp/llama.cpp/gguf-py/gguf/constants.py +100 -15
- package/cpp/llama.cpp/gguf-py/gguf/gguf_reader.py +1 -1
- package/cpp/llama.cpp/gguf-py/gguf/gguf_writer.py +44 -12
- package/cpp/llama.cpp/gguf-py/gguf/scripts/gguf_editor_gui.py +21 -10
- package/cpp/llama.cpp/gguf-py/gguf/scripts/gguf_new_metadata.py +5 -2
- package/cpp/llama.cpp/gguf-py/gguf/tensor_mapping.py +128 -31
- package/cpp/llama.cpp/gguf-py/gguf/utility.py +1 -1
- package/cpp/llama.cpp/gguf-py/pyproject.toml +1 -1
- package/cpp/llama.cpp/include/llama.h +62 -125
- package/cpp/llama.cpp/models/ggml-vocab-bert-bge.gguf.inp +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-bert-bge.gguf.out +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-command-r.gguf.inp +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-command-r.gguf.out +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-deepseek-coder.gguf.inp +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-deepseek-coder.gguf.out +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-deepseek-llm.gguf.inp +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-deepseek-llm.gguf.out +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-falcon.gguf.inp +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-falcon.gguf.out +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-gpt-2.gguf.inp +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-gpt-2.gguf.out +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-llama-bpe.gguf.inp +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-llama-bpe.gguf.out +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-llama-spm.gguf.inp +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-llama-spm.gguf.out +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-mpt.gguf.inp +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-mpt.gguf.out +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-nomic-bert-moe.gguf +0 -0
- package/cpp/llama.cpp/models/ggml-vocab-phi-3.gguf.inp +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-phi-3.gguf.out +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-qwen2.gguf.inp +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-qwen2.gguf.out +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-refact.gguf.inp +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-refact.gguf.out +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-starcoder.gguf.inp +1 -1
- package/cpp/llama.cpp/models/ggml-vocab-starcoder.gguf.out +1 -1
- package/cpp/llama.cpp/models/templates/Qwen-QwQ-32B.jinja +62 -0
- package/cpp/llama.cpp/models/templates/Qwen-Qwen3-0.6B.jinja +85 -0
- package/cpp/llama.cpp/models/templates/README.md +2 -0
- package/cpp/llama.cpp/requirements/requirements-convert_hf_to_gguf.txt +5 -1
- package/cpp/llama.cpp/requirements/requirements-convert_hf_to_gguf_update.txt +5 -1
- package/cpp/llama.cpp/requirements/requirements-convert_lora_to_gguf.txt +2 -0
- package/cpp/llama.cpp/requirements/requirements-gguf_editor_gui.txt +1 -1
- package/cpp/llama.cpp/src/CMakeLists.txt +2 -0
- package/cpp/llama.cpp/src/llama-arch.cpp +6 -0
- package/cpp/llama.cpp/src/llama-arch.h +2 -0
- package/cpp/llama.cpp/src/llama-batch.cpp +3 -1
- package/cpp/llama.cpp/src/llama-context.cpp +340 -123
- package/cpp/llama.cpp/src/llama-context.h +30 -0
- package/cpp/llama.cpp/src/llama-cparams.cpp +4 -0
- package/cpp/llama.cpp/src/llama-cparams.h +2 -0
- package/cpp/llama.cpp/src/llama-grammar.cpp +12 -2
- package/cpp/llama.cpp/src/llama-graph.cpp +157 -247
- package/cpp/llama.cpp/src/llama-graph.h +52 -7
- package/cpp/llama.cpp/src/llama-hparams.cpp +17 -1
- package/cpp/llama.cpp/src/llama-hparams.h +37 -5
- package/cpp/llama.cpp/src/llama-kv-cache.cpp +742 -481
- package/cpp/llama.cpp/src/llama-kv-cache.h +196 -99
- package/cpp/llama.cpp/src/llama-kv-cells.h +379 -0
- package/cpp/llama.cpp/src/llama-memory.h +4 -3
- package/cpp/llama.cpp/src/llama-model-loader.cpp +22 -17
- package/cpp/llama.cpp/src/llama-model-saver.cpp +281 -0
- package/cpp/llama.cpp/src/llama-model-saver.h +37 -0
- package/cpp/llama.cpp/src/llama-model.cpp +529 -172
- package/cpp/llama.cpp/src/llama-model.h +6 -1
- package/cpp/llama.cpp/src/llama-quant.cpp +15 -13
- package/cpp/llama.cpp/src/llama-sampling.cpp +2 -2
- package/cpp/llama.cpp/src/llama-vocab.cpp +35 -8
- package/cpp/llama.cpp/src/llama-vocab.h +6 -0
- package/cpp/llama.cpp/src/llama.cpp +14 -0
- package/cpp/rn-completion.cpp +60 -5
- package/ios/include/chat.h +73 -6
- package/ios/include/common/minja/chat-template.hpp +9 -5
- package/ios/include/common/minja/minja.hpp +69 -36
- package/ios/include/common.h +21 -17
- package/ios/include/llama.h +62 -125
- package/ios/libs/llama.xcframework/Info.plist +19 -19
- package/ios/libs/llama.xcframework/ios-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/ios-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4617 -4487
- package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/ggml-opt.h +237 -0
- package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/ggml.h +26 -7
- package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/llama.h +62 -125
- package/ios/libs/llama.xcframework/ios-arm64/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4638 -4508
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3557 -3435
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/ggml-opt.h +237 -0
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/ggml.h +26 -7
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/llama.h +62 -125
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4638 -4508
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3559 -3437
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/ggml-opt.h +237 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/ggml.h +26 -7
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/llama.h +62 -125
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/ggml-opt.h +237 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/ggml.h +26 -7
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/llama.h +62 -125
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/llama +0 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/ggml-opt.h +237 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/ggml.h +26 -7
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/llama.h +62 -125
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/llama +0 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/tvos-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/tvos-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4616 -4487
- package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/ggml-opt.h +237 -0
- package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/ggml.h +26 -7
- package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/llama.h +62 -125
- package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4637 -4508
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3556 -3435
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/ggml-opt.h +237 -0
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/ggml.h +26 -7
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/llama.h +62 -125
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/xros-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/xros-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4653 -4523
- package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/ggml-opt.h +237 -0
- package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/ggml.h +26 -7
- package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/llama.h +62 -125
- package/ios/libs/llama.xcframework/xros-arm64/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4674 -4544
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3587 -3465
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/ggml-opt.h +237 -0
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/ggml.h +26 -7
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/llama.h +62 -125
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/llama +0 -0
- package/package.json +1 -1
- package/cpp/llama.cpp/common/stb_image.h +0 -7988
- package/cpp/llama.cpp/models/ggml-vocab-chameleon.gguf.inp +0 -112
- package/cpp/llama.cpp/models/ggml-vocab-chameleon.gguf.out +0 -46
- package/cpp/llama.cpp/models/ggml-vocab-deepseek-r1-qwen.gguf.inp +0 -112
- package/cpp/llama.cpp/models/ggml-vocab-deepseek-r1-qwen.gguf.out +0 -46
- package/cpp/llama.cpp/models/ggml-vocab-gpt-4o.gguf.inp +0 -112
- package/cpp/llama.cpp/models/ggml-vocab-gpt-4o.gguf.out +0 -46
- package/cpp/llama.cpp/models/ggml-vocab-llama4.gguf.inp +0 -112
- package/cpp/llama.cpp/models/ggml-vocab-llama4.gguf.out +0 -46
- package/cpp/llama.cpp/models/ggml-vocab-pixtral.gguf.inp +0 -112
- package/cpp/llama.cpp/models/ggml-vocab-pixtral.gguf.out +0 -46
- package/cpp/llama.cpp/models/ggml-vocab-roberta-bpe.gguf.inp +0 -112
- package/cpp/llama.cpp/models/ggml-vocab-roberta-bpe.gguf.out +0 -46
|
@@ -64,12 +64,17 @@
|
|
|
64
64
|
// precomputed f32 table for f16 (256 KB) (ggml-impl.h)
|
|
65
65
|
float ggml_table_f32_f16[1 << 16];
|
|
66
66
|
|
|
67
|
-
#if
|
|
68
|
-
(
|
|
67
|
+
#if defined(__linux__) || \
|
|
68
|
+
defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || \
|
|
69
|
+
(defined(__APPLE__) && !TARGET_OS_TV && !TARGET_OS_WATCH)
|
|
70
|
+
|
|
69
71
|
#include <unistd.h>
|
|
70
72
|
#include <sys/types.h>
|
|
71
73
|
#include <sys/stat.h>
|
|
72
74
|
#include <sys/wait.h>
|
|
75
|
+
#if defined(__linux__)
|
|
76
|
+
#include <sys/prctl.h>
|
|
77
|
+
#endif
|
|
73
78
|
|
|
74
79
|
#if defined(__ANDROID__)
|
|
75
80
|
#include <unwind.h>
|
|
@@ -133,10 +138,36 @@ static void ggml_print_backtrace(void) {
|
|
|
133
138
|
if (GGML_NO_BACKTRACE) {
|
|
134
139
|
return;
|
|
135
140
|
}
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
141
|
+
#if defined(__linux__)
|
|
142
|
+
FILE * f = fopen("/proc/self/status", "r");
|
|
143
|
+
size_t size = 0;
|
|
144
|
+
char * line = NULL;
|
|
145
|
+
ssize_t length = 0;
|
|
146
|
+
while ((length = getline(&line, &size, f)) > 0) {
|
|
147
|
+
if (!strncmp(line, "TracerPid:", sizeof("TracerPid:") - 1) &&
|
|
148
|
+
(length != sizeof("TracerPid:\t0\n") - 1 || line[length - 2] != '0')) {
|
|
149
|
+
// Already being debugged, and the breakpoint is the later abort()
|
|
150
|
+
free(line);
|
|
151
|
+
fclose(f);
|
|
152
|
+
return;
|
|
153
|
+
}
|
|
154
|
+
}
|
|
155
|
+
free(line);
|
|
156
|
+
fclose(f);
|
|
157
|
+
int lock[2] = { -1, -1 };
|
|
158
|
+
(void) !pipe(lock); // Don't start gdb until after PR_SET_PTRACER
|
|
159
|
+
#endif
|
|
160
|
+
const int parent_pid = getpid();
|
|
161
|
+
const int child_pid = fork();
|
|
162
|
+
if (child_pid < 0) { // error
|
|
163
|
+
return;
|
|
164
|
+
} else if (child_pid == 0) { // child
|
|
165
|
+
char attach[32];
|
|
166
|
+
snprintf(attach, sizeof(attach), "attach %d", parent_pid);
|
|
167
|
+
#if defined(__linux__)
|
|
168
|
+
close(lock[1]);
|
|
169
|
+
(void) !read(lock[0], lock, 1);
|
|
170
|
+
#endif
|
|
140
171
|
// try gdb
|
|
141
172
|
execlp("gdb", "gdb", "--batch",
|
|
142
173
|
"-ex", "set style enabled on",
|
|
@@ -149,18 +180,18 @@ static void ggml_print_backtrace(void) {
|
|
|
149
180
|
execlp("lldb", "lldb", "--batch",
|
|
150
181
|
"-o", "bt",
|
|
151
182
|
"-o", "quit",
|
|
152
|
-
"-p", attach,
|
|
183
|
+
"-p", &attach[sizeof("attach ") - 1],
|
|
153
184
|
(char *) NULL);
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
185
|
+
// gdb failed, fallback to backtrace_symbols
|
|
186
|
+
ggml_print_backtrace_symbols();
|
|
187
|
+
_Exit(0);
|
|
188
|
+
} else { // parent
|
|
189
|
+
#if defined(__linux__)
|
|
190
|
+
prctl(PR_SET_PTRACER, child_pid);
|
|
191
|
+
close(lock[1]);
|
|
192
|
+
close(lock[0]);
|
|
193
|
+
#endif
|
|
194
|
+
waitpid(child_pid, NULL, 0);
|
|
164
195
|
}
|
|
165
196
|
}
|
|
166
197
|
#else
|
|
@@ -1068,9 +1099,10 @@ static const char * GGML_UNARY_OP_NAME[GGML_UNARY_OP_COUNT] = {
|
|
|
1068
1099
|
"HARDSWISH",
|
|
1069
1100
|
"HARDSIGMOID",
|
|
1070
1101
|
"EXP",
|
|
1102
|
+
"GELU_ERF",
|
|
1071
1103
|
};
|
|
1072
1104
|
|
|
1073
|
-
static_assert(GGML_UNARY_OP_COUNT ==
|
|
1105
|
+
static_assert(GGML_UNARY_OP_COUNT == 15, "GGML_UNARY_OP_COUNT != 15");
|
|
1074
1106
|
|
|
1075
1107
|
|
|
1076
1108
|
static_assert(sizeof(struct ggml_object)%GGML_MEM_ALIGN == 0, "ggml_object size must be a multiple of GGML_MEM_ALIGN");
|
|
@@ -2280,6 +2312,26 @@ struct ggml_tensor * ggml_repeat(
|
|
|
2280
2312
|
return result;
|
|
2281
2313
|
}
|
|
2282
2314
|
|
|
2315
|
+
struct ggml_tensor * ggml_repeat_4d(
|
|
2316
|
+
struct ggml_context * ctx,
|
|
2317
|
+
struct ggml_tensor * a,
|
|
2318
|
+
int64_t ne0, int64_t ne1, int64_t ne2, int64_t ne3) {
|
|
2319
|
+
const bool can_repeat = ggml_is_empty(a) || (
|
|
2320
|
+
(ne0 % a->ne[0] == 0) &&
|
|
2321
|
+
(ne1 % a->ne[1] == 0) &&
|
|
2322
|
+
(ne2 % a->ne[2] == 0) &&
|
|
2323
|
+
(ne3 % a->ne[3] == 0)
|
|
2324
|
+
);
|
|
2325
|
+
GGML_ASSERT(can_repeat);
|
|
2326
|
+
|
|
2327
|
+
struct ggml_tensor * result = ggml_new_tensor_4d(ctx, a->type, ne0, ne1, ne2, ne3);
|
|
2328
|
+
|
|
2329
|
+
result->op = GGML_OP_REPEAT;
|
|
2330
|
+
result->src[0] = a;
|
|
2331
|
+
|
|
2332
|
+
return result;
|
|
2333
|
+
}
|
|
2334
|
+
|
|
2283
2335
|
// ggml_repeat_back
|
|
2284
2336
|
|
|
2285
2337
|
struct ggml_tensor * ggml_repeat_back(
|
|
@@ -2470,6 +2522,20 @@ struct ggml_tensor * ggml_gelu_inplace(
|
|
|
2470
2522
|
return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_GELU);
|
|
2471
2523
|
}
|
|
2472
2524
|
|
|
2525
|
+
// ggml_gelu_erf
|
|
2526
|
+
|
|
2527
|
+
struct ggml_tensor * ggml_gelu_erf(
|
|
2528
|
+
struct ggml_context * ctx,
|
|
2529
|
+
struct ggml_tensor * a) {
|
|
2530
|
+
return ggml_unary(ctx, a, GGML_UNARY_OP_GELU_ERF);
|
|
2531
|
+
}
|
|
2532
|
+
|
|
2533
|
+
struct ggml_tensor * ggml_gelu_erf_inplace(
|
|
2534
|
+
struct ggml_context * ctx,
|
|
2535
|
+
struct ggml_tensor * a) {
|
|
2536
|
+
return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_GELU_ERF);
|
|
2537
|
+
}
|
|
2538
|
+
|
|
2473
2539
|
// ggml_gelu_quick
|
|
2474
2540
|
|
|
2475
2541
|
struct ggml_tensor * ggml_gelu_quick(
|
|
@@ -5499,7 +5565,7 @@ static void ggml_compute_backward(
|
|
|
5499
5565
|
// tensor = src0 * 1 + src1 * 0
|
|
5500
5566
|
if (src0_needs_grads) {
|
|
5501
5567
|
// dsrc0 = dtensor * 1
|
|
5502
|
-
ggml_add_or_set(ctx, cgraph, isrc0, grad);
|
|
5568
|
+
ggml_add_or_set(ctx, cgraph, isrc0, ggml_reshape(ctx, grad, src0));
|
|
5503
5569
|
}
|
|
5504
5570
|
if (src1_needs_grads) {
|
|
5505
5571
|
// dsrc1 = dtensor * 0 -> noop
|
|
@@ -5780,10 +5846,9 @@ void ggml_build_forward_expand(struct ggml_cgraph * cgraph, struct ggml_tensor *
|
|
|
5780
5846
|
}
|
|
5781
5847
|
|
|
5782
5848
|
void ggml_build_backward_expand(
|
|
5783
|
-
struct ggml_context *
|
|
5784
|
-
struct
|
|
5785
|
-
struct
|
|
5786
|
-
bool accumulate) {
|
|
5849
|
+
struct ggml_context * ctx,
|
|
5850
|
+
struct ggml_cgraph * cgraph,
|
|
5851
|
+
struct ggml_tensor ** grad_accs) {
|
|
5787
5852
|
GGML_ASSERT(cgraph->n_nodes > 0);
|
|
5788
5853
|
GGML_ASSERT(cgraph->grads);
|
|
5789
5854
|
GGML_ASSERT(cgraph->grad_accs);
|
|
@@ -5856,21 +5921,24 @@ void ggml_build_backward_expand(
|
|
|
5856
5921
|
GGML_ASSERT(!node->view_src || node->op == GGML_OP_CPY || node->op == GGML_OP_VIEW ||
|
|
5857
5922
|
node->op == GGML_OP_RESHAPE || node->op == GGML_OP_PERMUTE || node->op == GGML_OP_TRANSPOSE);
|
|
5858
5923
|
|
|
5859
|
-
const size_t
|
|
5860
|
-
GGML_ASSERT(
|
|
5861
|
-
GGML_ASSERT(ggml_bitset_get(cgraph->visited_hash_set.used,
|
|
5862
|
-
if (
|
|
5863
|
-
cgraph->grad_accs[
|
|
5864
|
-
cgraph->grads[
|
|
5865
|
-
|
|
5924
|
+
const size_t ihash = ggml_hash_find(&cgraph->visited_hash_set, node);
|
|
5925
|
+
GGML_ASSERT(ihash != GGML_HASHSET_FULL);
|
|
5926
|
+
GGML_ASSERT(ggml_bitset_get(cgraph->visited_hash_set.used, ihash));
|
|
5927
|
+
if (grad_accs && grad_accs[i]) {
|
|
5928
|
+
cgraph->grad_accs[ihash] = grad_accs[i];
|
|
5929
|
+
cgraph->grads[ihash] = cgraph->grad_accs[ihash];
|
|
5930
|
+
} else if (node->flags & GGML_TENSOR_FLAG_LOSS) {
|
|
5931
|
+
// loss tensors always need a gradient accumulator
|
|
5932
|
+
cgraph->grad_accs[ihash] = ggml_new_tensor(ctx, GGML_TYPE_F32, GGML_MAX_DIMS, node->ne);
|
|
5933
|
+
cgraph->grads[ihash] = cgraph->grad_accs[ihash];
|
|
5866
5934
|
}
|
|
5867
|
-
grads_needed[
|
|
5935
|
+
grads_needed[ihash] = true;
|
|
5868
5936
|
}
|
|
5869
5937
|
|
|
5870
5938
|
for (int i = n_nodes_f - 1; i >= 0; --i) {
|
|
5871
5939
|
// inplace operations to add gradients are not created by ggml_compute_backward except for gradient accumulation
|
|
5872
5940
|
// use allocator to automatically make inplace operations
|
|
5873
|
-
ggml_compute_backward(
|
|
5941
|
+
ggml_compute_backward(ctx, cgraph, i, grads_needed);
|
|
5874
5942
|
}
|
|
5875
5943
|
|
|
5876
5944
|
free(grads_needed);
|
|
@@ -6016,8 +6084,8 @@ void ggml_graph_cpy(struct ggml_cgraph * src, struct ggml_cgraph * dst) {
|
|
|
6016
6084
|
}
|
|
6017
6085
|
}
|
|
6018
6086
|
|
|
6019
|
-
struct ggml_cgraph * ggml_graph_dup(struct ggml_context * ctx, struct ggml_cgraph * cgraph) {
|
|
6020
|
-
struct ggml_cgraph * result = ggml_new_graph_custom(ctx, cgraph->size, cgraph->grads
|
|
6087
|
+
struct ggml_cgraph * ggml_graph_dup(struct ggml_context * ctx, struct ggml_cgraph * cgraph, bool force_grads) {
|
|
6088
|
+
struct ggml_cgraph * result = ggml_new_graph_custom(ctx, cgraph->size, cgraph->grads || force_grads);
|
|
6021
6089
|
ggml_graph_cpy(cgraph, result);
|
|
6022
6090
|
return result;
|
|
6023
6091
|
}
|
|
@@ -6036,6 +6104,9 @@ struct ggml_tensor * ggml_set_zero(struct ggml_tensor * tensor) {
|
|
|
6036
6104
|
}
|
|
6037
6105
|
|
|
6038
6106
|
void ggml_graph_reset(struct ggml_cgraph * cgraph) {
|
|
6107
|
+
if (!cgraph) {
|
|
6108
|
+
return;
|
|
6109
|
+
}
|
|
6039
6110
|
GGML_ASSERT(cgraph->grads != NULL);
|
|
6040
6111
|
|
|
6041
6112
|
for (int i = 0; i < cgraph->n_nodes; i++) {
|
|
@@ -6345,8 +6416,8 @@ void ggml_set_output(struct ggml_tensor * tensor) {
|
|
|
6345
6416
|
tensor->flags |= GGML_TENSOR_FLAG_OUTPUT;
|
|
6346
6417
|
}
|
|
6347
6418
|
|
|
6348
|
-
void ggml_set_param(struct
|
|
6349
|
-
|
|
6419
|
+
void ggml_set_param(struct ggml_tensor * tensor) {
|
|
6420
|
+
GGML_ASSERT(tensor->op == GGML_OP_NONE);
|
|
6350
6421
|
tensor->flags |= GGML_TENSOR_FLAG_PARAM;
|
|
6351
6422
|
}
|
|
6352
6423
|
|
|
@@ -299,10 +299,10 @@ bool gguf_read_emplace_helper(const struct gguf_reader & gr, std::vector<struct
|
|
|
299
299
|
return false;
|
|
300
300
|
}
|
|
301
301
|
} catch (std::length_error &) {
|
|
302
|
-
|
|
302
|
+
GGML_LOG_ERROR("%s: encountered length_error while reading value for key '%s'\n", __func__, key.c_str());
|
|
303
303
|
return false;
|
|
304
304
|
} catch (std::bad_alloc &) {
|
|
305
|
-
|
|
305
|
+
GGML_LOG_ERROR("%s: encountered bad_alloc error while reading value for key '%s'\n", __func__, key.c_str());
|
|
306
306
|
return false;
|
|
307
307
|
}
|
|
308
308
|
kv.emplace_back(key, value);
|
|
@@ -328,14 +328,14 @@ struct gguf_context * gguf_init_from_file_impl(FILE * file, struct gguf_init_par
|
|
|
328
328
|
ok = ok && gr.read(magic, 4);
|
|
329
329
|
|
|
330
330
|
if (!ok) {
|
|
331
|
-
|
|
331
|
+
GGML_LOG_ERROR("%s: failed to read magic\n", __func__);
|
|
332
332
|
gguf_free(ctx);
|
|
333
333
|
return nullptr;
|
|
334
334
|
}
|
|
335
335
|
|
|
336
336
|
for (uint32_t i = 0; i < magic.size(); i++) {
|
|
337
337
|
if (magic[i] != GGUF_MAGIC[i]) {
|
|
338
|
-
|
|
338
|
+
GGML_LOG_ERROR("%s: invalid magic characters: '%c%c%c%c', expected 'GGUF'\n", __func__, magic[0], magic[1], magic[2], magic[3]);
|
|
339
339
|
gguf_free(ctx);
|
|
340
340
|
return nullptr;
|
|
341
341
|
}
|
|
@@ -348,11 +348,11 @@ struct gguf_context * gguf_init_from_file_impl(FILE * file, struct gguf_init_par
|
|
|
348
348
|
|
|
349
349
|
if (ok && gr.read(ctx->version)) {
|
|
350
350
|
if (ctx->version == 1) {
|
|
351
|
-
|
|
351
|
+
GGML_LOG_ERROR("%s: GGUFv1 is no longer supported, please use a more up-to-date version\n", __func__);
|
|
352
352
|
ok = false;
|
|
353
353
|
}
|
|
354
354
|
if (ctx->version > GGUF_VERSION) {
|
|
355
|
-
|
|
355
|
+
GGML_LOG_ERROR("%s: this GGUF file is version %" PRIu32 " but this software only supports up to version %d\n",
|
|
356
356
|
__func__, ctx->version, GGUF_VERSION);
|
|
357
357
|
ok = false;
|
|
358
358
|
}
|
|
@@ -363,7 +363,7 @@ struct gguf_context * gguf_init_from_file_impl(FILE * file, struct gguf_init_par
|
|
|
363
363
|
if (ok && gr.read(n_tensors)) {
|
|
364
364
|
static_assert(sizeof(size_t) <= 8 && sizeof(gguf_tensor_info) >= 2, "int64_t insufficient for indexing");
|
|
365
365
|
if (n_tensors < 0 || n_tensors > int64_t(SIZE_MAX/sizeof(gguf_tensor_info))) {
|
|
366
|
-
|
|
366
|
+
GGML_LOG_ERROR("%s: number of tensors is %" PRIi64 " but must be in [0, %zu]\n",
|
|
367
367
|
__func__, n_tensors, SIZE_MAX/sizeof(gguf_tensor_info));
|
|
368
368
|
ok = false;
|
|
369
369
|
}
|
|
@@ -374,7 +374,7 @@ struct gguf_context * gguf_init_from_file_impl(FILE * file, struct gguf_init_par
|
|
|
374
374
|
if (ok && gr.read(n_kv)) {
|
|
375
375
|
static_assert(sizeof(size_t) <= 8 && sizeof(gguf_tensor_info) >= 2, "int64_t insufficient for indexing");
|
|
376
376
|
if (n_kv < 0 || n_kv > int64_t(SIZE_MAX/sizeof(gguf_kv))) {
|
|
377
|
-
|
|
377
|
+
GGML_LOG_ERROR("%s: number of key value pairs is %" PRIi64 " but must be in [0, %zu]\n",
|
|
378
378
|
__func__, n_kv, SIZE_MAX/sizeof(gguf_kv));
|
|
379
379
|
ok = false;
|
|
380
380
|
}
|
|
@@ -383,7 +383,7 @@ struct gguf_context * gguf_init_from_file_impl(FILE * file, struct gguf_init_par
|
|
|
383
383
|
}
|
|
384
384
|
|
|
385
385
|
if (!ok) {
|
|
386
|
-
|
|
386
|
+
GGML_LOG_ERROR("%s: failed to read header\n", __func__);
|
|
387
387
|
gguf_free(ctx);
|
|
388
388
|
return nullptr;
|
|
389
389
|
}
|
|
@@ -399,15 +399,15 @@ struct gguf_context * gguf_init_from_file_impl(FILE * file, struct gguf_init_par
|
|
|
399
399
|
try {
|
|
400
400
|
ok = ok && gr.read(key);
|
|
401
401
|
} catch (std::length_error &) {
|
|
402
|
-
|
|
402
|
+
GGML_LOG_ERROR("%s: encountered length_error while reading key %" PRIi64 "\n", __func__, i);
|
|
403
403
|
ok = false;
|
|
404
404
|
} catch (std::bad_alloc &) {
|
|
405
|
-
|
|
405
|
+
GGML_LOG_ERROR("%s: encountered bad_alloc error while reading key %" PRIi64 "\n", __func__, i);
|
|
406
406
|
ok = false;
|
|
407
407
|
}
|
|
408
408
|
for (size_t j = 0; ok && j < ctx->kv.size(); ++j) {
|
|
409
409
|
if (key == ctx->kv[j].key) {
|
|
410
|
-
|
|
410
|
+
GGML_LOG_ERROR("%s: duplicate key '%s' for tensors %zu and %" PRIi64 " \n", __func__, key.c_str(), j, i);
|
|
411
411
|
ok = false;
|
|
412
412
|
}
|
|
413
413
|
}
|
|
@@ -441,14 +441,14 @@ struct gguf_context * gguf_init_from_file_impl(FILE * file, struct gguf_init_par
|
|
|
441
441
|
case GGUF_TYPE_ARRAY:
|
|
442
442
|
default:
|
|
443
443
|
{
|
|
444
|
-
|
|
444
|
+
GGML_LOG_ERROR("%s: key '%s' has invalid GGUF type %d\n", __func__, key.c_str(), type);
|
|
445
445
|
ok = false;
|
|
446
446
|
} break;
|
|
447
447
|
}
|
|
448
448
|
}
|
|
449
449
|
|
|
450
450
|
if (!ok) {
|
|
451
|
-
|
|
451
|
+
GGML_LOG_ERROR("%s: failed to read key-value pairs\n", __func__);
|
|
452
452
|
gguf_free(ctx);
|
|
453
453
|
return nullptr;
|
|
454
454
|
}
|
|
@@ -458,7 +458,7 @@ struct gguf_context * gguf_init_from_file_impl(FILE * file, struct gguf_init_par
|
|
|
458
458
|
ctx->alignment = alignment_idx == -1 ? GGUF_DEFAULT_ALIGNMENT : gguf_get_val_u32(ctx, alignment_idx);
|
|
459
459
|
|
|
460
460
|
if (ctx->alignment == 0 || (ctx->alignment & (ctx->alignment - 1)) != 0) {
|
|
461
|
-
|
|
461
|
+
GGML_LOG_ERROR("%s: alignment %zu is not a power of 2\n", __func__, ctx->alignment);
|
|
462
462
|
gguf_free(ctx);
|
|
463
463
|
return nullptr;
|
|
464
464
|
}
|
|
@@ -474,14 +474,14 @@ struct gguf_context * gguf_init_from_file_impl(FILE * file, struct gguf_init_par
|
|
|
474
474
|
try {
|
|
475
475
|
ok = ok && gr.read(name);
|
|
476
476
|
} catch (std::length_error &) {
|
|
477
|
-
|
|
477
|
+
GGML_LOG_ERROR("%s: encountered length_error while reading tensor name %" PRIi64 "\n", __func__, i);
|
|
478
478
|
ok = false;
|
|
479
479
|
} catch (std::bad_alloc &) {
|
|
480
|
-
|
|
480
|
+
GGML_LOG_ERROR("%s: encountered bad_alloc error while reading tensor name %" PRIi64 "\n", __func__, i);
|
|
481
481
|
ok = false;
|
|
482
482
|
}
|
|
483
483
|
if (name.length() >= GGML_MAX_NAME) {
|
|
484
|
-
|
|
484
|
+
GGML_LOG_ERROR("%s: tensor name %" PRIi64 " is too long: %zu >= %d\n", __func__, i, name.length(), GGML_MAX_NAME);
|
|
485
485
|
ok = false;
|
|
486
486
|
break;
|
|
487
487
|
}
|
|
@@ -490,7 +490,7 @@ struct gguf_context * gguf_init_from_file_impl(FILE * file, struct gguf_init_par
|
|
|
490
490
|
// make sure there are no duplicate tensor names
|
|
491
491
|
for (int64_t j = 0; ok && j < i; ++j) {
|
|
492
492
|
if (strcmp(info.t.name, ctx->info[j].t.name) == 0) {
|
|
493
|
-
|
|
493
|
+
GGML_LOG_ERROR("%s: duplicate tensor name '%s' for tensors %" PRIi64 " and %" PRIi64 "\n", __func__, info.t.name, j, i);
|
|
494
494
|
ok = false;
|
|
495
495
|
break;
|
|
496
496
|
}
|
|
@@ -505,7 +505,7 @@ struct gguf_context * gguf_init_from_file_impl(FILE * file, struct gguf_init_par
|
|
|
505
505
|
uint32_t n_dims = -1;
|
|
506
506
|
ok = ok && gr.read(n_dims);
|
|
507
507
|
if (n_dims > GGML_MAX_DIMS) {
|
|
508
|
-
|
|
508
|
+
GGML_LOG_ERROR("%s: tensor '%s' has invalid number of dimensions: %" PRIu32 " > %" PRIu32 "\n",
|
|
509
509
|
__func__, info.t.name, n_dims, GGML_MAX_DIMS);
|
|
510
510
|
ok = false;
|
|
511
511
|
break;
|
|
@@ -518,7 +518,7 @@ struct gguf_context * gguf_init_from_file_impl(FILE * file, struct gguf_init_par
|
|
|
518
518
|
|
|
519
519
|
// check that all ne are non-negative
|
|
520
520
|
if (info.t.ne[j] < 0) {
|
|
521
|
-
|
|
521
|
+
GGML_LOG_ERROR("%s: tensor '%s' dimension %" PRIu32 " has invalid number of elements: %" PRIi64 " < 0\n",
|
|
522
522
|
__func__, info.t.name, j, info.t.ne[j]);
|
|
523
523
|
ok = false;
|
|
524
524
|
break;
|
|
@@ -530,7 +530,7 @@ struct gguf_context * gguf_init_from_file_impl(FILE * file, struct gguf_init_par
|
|
|
530
530
|
(INT64_MAX/info.t.ne[2] <= info.t.ne[0]*info.t.ne[1]) ||
|
|
531
531
|
(INT64_MAX/info.t.ne[3] <= info.t.ne[0]*info.t.ne[1]*info.t.ne[2]))) {
|
|
532
532
|
|
|
533
|
-
|
|
533
|
+
GGML_LOG_ERROR("%s: total number of elements in tensor '%s' with shape "
|
|
534
534
|
"(%" PRIi64 ", %" PRIi64 ", %" PRIi64 ", %" PRIi64 ") is >= %" PRIi64 "\n",
|
|
535
535
|
__func__, info.t.name, info.t.ne[0], info.t.ne[1], info.t.ne[2], info.t.ne[3], INT64_MAX);
|
|
536
536
|
ok = false;
|
|
@@ -547,7 +547,7 @@ struct gguf_context * gguf_init_from_file_impl(FILE * file, struct gguf_init_par
|
|
|
547
547
|
|
|
548
548
|
// check that tensor type is within defined range
|
|
549
549
|
if (info.t.type < 0 || info.t.type >= GGML_TYPE_COUNT) {
|
|
550
|
-
|
|
550
|
+
GGML_LOG_ERROR("%s: tensor '%s' has invalid ggml type %d (%s)\n",
|
|
551
551
|
__func__, info.t.name, info.t.type, ggml_type_name(info.t.type));
|
|
552
552
|
ok = false;
|
|
553
553
|
break;
|
|
@@ -557,7 +557,7 @@ struct gguf_context * gguf_init_from_file_impl(FILE * file, struct gguf_init_par
|
|
|
557
557
|
|
|
558
558
|
// check that row size is divisible by block size
|
|
559
559
|
if (blck_size == 0 || info.t.ne[0] % blck_size != 0) {
|
|
560
|
-
|
|
560
|
+
GGML_LOG_ERROR("%s: tensor '%s' of type %d (%s) has %" PRId64 " elements per row, "
|
|
561
561
|
"not a multiple of block size (%" PRId64 ")\n",
|
|
562
562
|
__func__, info.t.name, (int) info.t.type, ggml_type_name(info.t.type), info.t.ne[0], blck_size);
|
|
563
563
|
ok = false;
|
|
@@ -582,7 +582,7 @@ struct gguf_context * gguf_init_from_file_impl(FILE * file, struct gguf_init_par
|
|
|
582
582
|
}
|
|
583
583
|
|
|
584
584
|
if (!ok) {
|
|
585
|
-
|
|
585
|
+
GGML_LOG_ERROR("%s: failed to read tensor info\n", __func__);
|
|
586
586
|
gguf_free(ctx);
|
|
587
587
|
return nullptr;
|
|
588
588
|
}
|
|
@@ -590,7 +590,7 @@ struct gguf_context * gguf_init_from_file_impl(FILE * file, struct gguf_init_par
|
|
|
590
590
|
|
|
591
591
|
// we require the data section to be aligned, so take into account any padding
|
|
592
592
|
if (fseek(file, GGML_PAD(ftell(file), ctx->alignment), SEEK_SET) != 0) {
|
|
593
|
-
|
|
593
|
+
GGML_LOG_ERROR("%s: failed to seek to beginning of data section\n", __func__);
|
|
594
594
|
gguf_free(ctx);
|
|
595
595
|
return nullptr;
|
|
596
596
|
}
|
|
@@ -604,9 +604,9 @@ struct gguf_context * gguf_init_from_file_impl(FILE * file, struct gguf_init_par
|
|
|
604
604
|
for (size_t i = 0; i < ctx->info.size(); ++i) {
|
|
605
605
|
const gguf_tensor_info & ti = ctx->info[i];
|
|
606
606
|
if (ti.offset != ctx->size) {
|
|
607
|
-
|
|
607
|
+
GGML_LOG_ERROR("%s: tensor '%s' has offset %" PRIu64 ", expected %zu\n",
|
|
608
608
|
__func__, ti.t.name, ti.offset, ctx->size);
|
|
609
|
-
|
|
609
|
+
GGML_LOG_ERROR("%s: failed to read tensor data\n", __func__);
|
|
610
610
|
gguf_free(ctx);
|
|
611
611
|
return nullptr;
|
|
612
612
|
}
|
|
@@ -634,7 +634,7 @@ struct gguf_context * gguf_init_from_file_impl(FILE * file, struct gguf_init_par
|
|
|
634
634
|
|
|
635
635
|
*params.ctx = ggml_init(pdata);
|
|
636
636
|
if (*params.ctx == nullptr) {
|
|
637
|
-
|
|
637
|
+
GGML_LOG_ERROR("%s: failed to initialize ggml context for storing tensors\n", __func__);
|
|
638
638
|
gguf_free(ctx);
|
|
639
639
|
return nullptr;
|
|
640
640
|
}
|
|
@@ -656,7 +656,7 @@ struct gguf_context * gguf_init_from_file_impl(FILE * file, struct gguf_init_par
|
|
|
656
656
|
ok = ok && gr.read(data->data, ctx->size);
|
|
657
657
|
|
|
658
658
|
if (!ok) {
|
|
659
|
-
|
|
659
|
+
GGML_LOG_ERROR("%s: failed to read tensor data binary blob\n", __func__);
|
|
660
660
|
ggml_free(ctx_data);
|
|
661
661
|
*params.ctx = nullptr;
|
|
662
662
|
gguf_free(ctx);
|
|
@@ -689,7 +689,7 @@ struct gguf_context * gguf_init_from_file_impl(FILE * file, struct gguf_init_par
|
|
|
689
689
|
}
|
|
690
690
|
|
|
691
691
|
if (!ok) {
|
|
692
|
-
|
|
692
|
+
GGML_LOG_ERROR("%s: failed to create tensors\n", __func__);
|
|
693
693
|
ggml_free(ctx_data);
|
|
694
694
|
*params.ctx = nullptr;
|
|
695
695
|
gguf_free(ctx);
|
|
@@ -706,7 +706,7 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
|
|
|
706
706
|
FILE * file = ggml_fopen(fname, "rb");
|
|
707
707
|
|
|
708
708
|
if (!file) {
|
|
709
|
-
|
|
709
|
+
GGML_LOG_ERROR("%s: failed to open GGUF file '%s'\n", __func__, fname);
|
|
710
710
|
return nullptr;
|
|
711
711
|
}
|
|
712
712
|
|
|
@@ -1305,7 +1305,7 @@ bool gguf_write_to_file(const struct gguf_context * ctx, const char * fname, boo
|
|
|
1305
1305
|
FILE * file = ggml_fopen(fname, "wb");
|
|
1306
1306
|
|
|
1307
1307
|
if (!file) {
|
|
1308
|
-
|
|
1308
|
+
GGML_LOG_ERROR("%s: failed to open file '%s' for writing GGUF data\n", __func__, fname);
|
|
1309
1309
|
return false;
|
|
1310
1310
|
}
|
|
1311
1311
|
|