@novastera-oss/llamarn 0.2.9 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/android/src/main/jniLibs/arm64-v8a/libggml-base.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/libggml-cpu.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/libggml.so +0 -0
- package/android/src/main/jniLibs/arm64-v8a/libllama.so +0 -0
- package/android/src/main/jniLibs/armeabi-v7a/libggml-base.so +0 -0
- package/android/src/main/jniLibs/armeabi-v7a/libggml-cpu.so +0 -0
- package/android/src/main/jniLibs/armeabi-v7a/libggml.so +0 -0
- package/android/src/main/jniLibs/armeabi-v7a/libllama.so +0 -0
- package/android/src/main/jniLibs/x86/libggml-base.so +0 -0
- package/android/src/main/jniLibs/x86/libggml-cpu.so +0 -0
- package/android/src/main/jniLibs/x86/libggml.so +0 -0
- package/android/src/main/jniLibs/x86/libllama.so +0 -0
- package/android/src/main/jniLibs/x86_64/libggml-base.so +0 -0
- package/android/src/main/jniLibs/x86_64/libggml-cpu.so +0 -0
- package/android/src/main/jniLibs/x86_64/libggml.so +0 -0
- package/android/src/main/jniLibs/x86_64/libllama.so +0 -0
- package/cpp/build-info.cpp +2 -2
- package/cpp/llama.cpp/CMakeLists.txt +0 -1
- package/cpp/llama.cpp/README.md +4 -5
- package/cpp/llama.cpp/build-xcframework.sh +1 -1
- package/cpp/llama.cpp/common/CMakeLists.txt +4 -5
- package/cpp/llama.cpp/common/arg.cpp +17 -0
- package/cpp/llama.cpp/common/chat.cpp +37 -20
- package/cpp/llama.cpp/common/chat.h +2 -0
- package/cpp/llama.cpp/common/common.h +4 -0
- package/cpp/llama.cpp/convert_hf_to_gguf.py +745 -6
- package/cpp/llama.cpp/convert_hf_to_gguf_update.py +9 -0
- package/cpp/llama.cpp/ggml/CMakeLists.txt +7 -2
- package/cpp/llama.cpp/ggml/include/ggml-backend.h +1 -1
- package/cpp/llama.cpp/ggml/include/ggml.h +173 -10
- package/cpp/llama.cpp/ggml/src/CMakeLists.txt +0 -1
- package/cpp/llama.cpp/ggml/src/ggml-backend-reg.cpp +0 -8
- package/cpp/llama.cpp/ggml/src/ggml-backend.cpp +36 -18
- package/cpp/llama.cpp/ggml/src/ggml-cann/aclnn_ops.cpp +68 -5
- package/cpp/llama.cpp/ggml/src/ggml-cann/ggml-cann.cpp +16 -2
- package/cpp/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +6 -1
- package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +28 -1
- package/cpp/llama.cpp/ggml/src/ggml-cpu/ops.cpp +1203 -163
- package/cpp/llama.cpp/ggml/src/ggml-cpu/ops.h +6 -0
- package/cpp/llama.cpp/ggml/src/ggml-cpu/simd-mappings.h +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-cpu/vec.cpp +33 -9
- package/cpp/llama.cpp/ggml/src/ggml-cpu/vec.h +142 -9
- package/cpp/llama.cpp/ggml/src/ggml-cuda/common.cuh +17 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/convert.cu +22 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/convert.cuh +5 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/cross-entropy-loss.cu +2 -14
- package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-common.cuh +4 -1
- package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-mma-f16.cuh +8 -4
- package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-tile-f16.cu +6 -4
- package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-tile-f32.cu +14 -12
- package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-vec-f16.cuh +5 -3
- package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-vec-f32.cuh +15 -10
- package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-wmma-f16.cu +8 -6
- package/cpp/llama.cpp/ggml/src/ggml-cuda/getrows.cu +8 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu +185 -79
- package/cpp/llama.cpp/ggml/src/ggml-cuda/mmq.cuh +2 -8
- package/cpp/llama.cpp/ggml/src/ggml-cuda/rope.cu +21 -27
- package/cpp/llama.cpp/ggml/src/ggml-cuda/scale.cu +8 -6
- package/cpp/llama.cpp/ggml/src/ggml-cuda/softmax.cu +119 -58
- package/cpp/llama.cpp/ggml/src/ggml-cuda/ssm-conv.cu +10 -2
- package/cpp/llama.cpp/ggml/src/ggml-cuda/ssm-scan.cu +192 -52
- package/cpp/llama.cpp/ggml/src/ggml-cuda/unary.cu +97 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/unary.cuh +11 -0
- package/cpp/llama.cpp/ggml/src/ggml-cuda/upscale.cu +92 -6
- package/cpp/llama.cpp/ggml/src/ggml-cuda/vendors/hip.h +14 -5
- package/cpp/llama.cpp/ggml/src/ggml-impl.h +64 -0
- package/cpp/llama.cpp/ggml/src/ggml-metal/CMakeLists.txt +4 -2
- package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal-impl.h +35 -9
- package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal.m +167 -39
- package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal.metal +254 -57
- package/cpp/llama.cpp/ggml/src/ggml-opencl/CMakeLists.txt +3 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +505 -40
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/gelu.cl +27 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/glu.cl +337 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/mul_mat_f16_f32.cl +130 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/scale.cl +3 -2
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/set_rows.cl +95 -0
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/softmax_4_f16.cl +24 -11
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/softmax_4_f32.cl +24 -11
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/softmax_f16.cl +24 -11
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/softmax_f32.cl +24 -11
- package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/upscale.cl +2 -3
- package/cpp/llama.cpp/ggml/src/ggml-quants.c +6 -6
- package/cpp/llama.cpp/ggml/src/ggml-sycl/backend.hpp +1 -0
- package/cpp/llama.cpp/ggml/src/ggml-sycl/element_wise.cpp +693 -1034
- package/cpp/llama.cpp/ggml/src/ggml-sycl/element_wise.hpp +18 -9
- package/cpp/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +60 -9
- package/cpp/llama.cpp/ggml/src/ggml-sycl/rope.cpp +15 -18
- package/cpp/llama.cpp/ggml/src/ggml-sycl/set_rows.cpp +131 -0
- package/cpp/llama.cpp/ggml/src/ggml-sycl/set_rows.hpp +8 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +711 -292
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/copy_to_quant.comp +58 -7
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn.comp +28 -23
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_base.comp +14 -9
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm1.comp +38 -32
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm2.comp +32 -27
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_split_k_reduce.comp +44 -12
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/geglu.comp +13 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/geglu_erf.comp +27 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/geglu_quick.comp +11 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/gelu_erf.comp +39 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/glu_head.comp +15 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/glu_main.comp +29 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm.comp +128 -72
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm_cm2.comp +38 -9
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/reglu.comp +9 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/rms_norm.comp +12 -3
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/roll.comp +46 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/rope_multi.comp +7 -9
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/rope_neox.comp +7 -9
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/rope_norm.comp +7 -9
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/scale.comp +1 -1
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/soft_max.comp +20 -4
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/swiglu.comp +9 -0
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/upscale.comp +69 -5
- package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +23 -3
- package/cpp/llama.cpp/ggml/src/ggml.c +382 -61
- package/cpp/llama.cpp/ggml/src/gguf.cpp +8 -1
- package/cpp/llama.cpp/gguf-py/gguf/constants.py +209 -0
- package/cpp/llama.cpp/gguf-py/gguf/gguf_writer.py +8 -2
- package/cpp/llama.cpp/gguf-py/gguf/tensor_mapping.py +73 -21
- package/cpp/llama.cpp/gguf-py/gguf/vocab.py +12 -3
- package/cpp/llama.cpp/include/llama.h +0 -40
- package/cpp/llama.cpp/src/llama-arch.cpp +210 -3
- package/cpp/llama.cpp/src/llama-arch.h +18 -1
- package/cpp/llama.cpp/src/llama-batch.cpp +27 -1
- package/cpp/llama.cpp/src/llama-batch.h +8 -1
- package/cpp/llama.cpp/src/llama-chat.cpp +15 -0
- package/cpp/llama.cpp/src/llama-chat.h +1 -0
- package/cpp/llama.cpp/src/llama-graph.cpp +119 -184
- package/cpp/llama.cpp/src/llama-graph.h +47 -60
- package/cpp/llama.cpp/src/llama-hparams.cpp +7 -1
- package/cpp/llama.cpp/src/llama-hparams.h +3 -0
- package/cpp/llama.cpp/src/llama-kv-cache-unified-iswa.cpp +28 -18
- package/cpp/llama.cpp/src/llama-kv-cache-unified-iswa.h +4 -2
- package/cpp/llama.cpp/src/llama-kv-cache-unified.cpp +214 -65
- package/cpp/llama.cpp/src/llama-kv-cache-unified.h +62 -24
- package/cpp/llama.cpp/src/llama-kv-cells.h +62 -10
- package/cpp/llama.cpp/src/llama-memory-hybrid.cpp +9 -4
- package/cpp/llama.cpp/src/llama-memory-hybrid.h +3 -1
- package/cpp/llama.cpp/src/llama-memory-recurrent.cpp +20 -10
- package/cpp/llama.cpp/src/llama-memory.cpp +17 -0
- package/cpp/llama.cpp/src/llama-memory.h +3 -0
- package/cpp/llama.cpp/src/llama-model.cpp +2530 -685
- package/cpp/llama.cpp/src/llama-model.h +18 -0
- package/cpp/llama.cpp/src/llama-quant.cpp +1 -0
- package/cpp/llama.cpp/src/llama-vocab.cpp +13 -2
- package/cpp/llama.cpp/src/llama-vocab.h +41 -0
- package/ios/include/chat.h +2 -0
- package/ios/include/common.h +4 -0
- package/ios/include/llama.h +0 -40
- package/ios/libs/llama.xcframework/Info.plist +19 -19
- package/ios/libs/llama.xcframework/ios-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/ios-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +5055 -4886
- package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/ggml-backend.h +1 -1
- package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/ggml.h +173 -10
- package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/llama.h +0 -40
- package/ios/libs/llama.xcframework/ios-arm64/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +5030 -4861
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3889 -3764
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/ggml-backend.h +1 -1
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/ggml.h +173 -10
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/llama.h +0 -40
- package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +5030 -4861
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3891 -3766
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/ggml-backend.h +1 -1
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/ggml.h +173 -10
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/llama.h +0 -40
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/ggml-backend.h +1 -1
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/ggml.h +173 -10
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/llama.h +0 -40
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/llama +0 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/ggml-backend.h +1 -1
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/ggml.h +173 -10
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/llama.h +0 -40
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/llama +0 -0
- package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/tvos-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/tvos-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +5059 -4890
- package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/ggml-backend.h +1 -1
- package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/ggml.h +173 -10
- package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/llama.h +0 -40
- package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +5030 -4861
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3889 -3764
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/ggml-backend.h +1 -1
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/ggml.h +173 -10
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/llama.h +0 -40
- package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/xros-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/xros-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +5091 -4922
- package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/ggml-backend.h +1 -1
- package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/ggml.h +173 -10
- package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/llama.h +0 -40
- package/ios/libs/llama.xcframework/xros-arm64/llama.framework/llama +0 -0
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +5066 -4897
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3919 -3794
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/ggml-backend.h +1 -1
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/ggml.h +173 -10
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/llama.h +0 -40
- package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/llama +0 -0
- package/package.json +1 -1
- package/cpp/llama.cpp/ggml/include/ggml-kompute.h +0 -50
- package/cpp/llama.cpp/ggml/src/ggml-kompute/CMakeLists.txt +0 -166
- package/cpp/llama.cpp/ggml/src/ggml-kompute/ggml-kompute.cpp +0 -2251
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/common.comp +0 -112
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_add.comp +0 -58
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_addrow.comp +0 -25
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f16_f16.comp +0 -52
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f16_f32.comp +0 -52
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f32_f16.comp +0 -52
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f32_f32.comp +0 -52
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_diagmask.comp +0 -30
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_gelu.comp +0 -22
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_getrows.comp +0 -17
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_getrows_f16.comp +0 -31
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_getrows_f32.comp +0 -31
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_getrows_q4_0.comp +0 -38
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_getrows_q4_1.comp +0 -39
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_getrows_q6_k.comp +0 -44
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul.comp +0 -52
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_f16.comp +0 -69
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_mat_f32.comp +0 -51
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q4_0.comp +0 -33
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q4_1.comp +0 -35
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q4_k.comp +0 -140
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q6_k.comp +0 -106
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q8_0.comp +0 -73
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mv_q_n.comp +0 -52
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mv_q_n_pre.comp +0 -28
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_norm.comp +0 -84
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_relu.comp +0 -21
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_rmsnorm.comp +0 -53
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_rope_neox_f16.comp +0 -52
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_rope_neox_f32.comp +0 -52
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_rope_norm_f16.comp +0 -52
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_rope_norm_f32.comp +0 -52
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_scale.comp +0 -19
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_scale_8.comp +0 -23
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_silu.comp +0 -22
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_softmax.comp +0 -72
- package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/rope_common.comp +0 -71
|
@@ -202,19 +202,34 @@ void ggml_print_backtrace(void) {
|
|
|
202
202
|
}
|
|
203
203
|
#endif
|
|
204
204
|
|
|
205
|
+
static ggml_abort_callback_t g_abort_callback = NULL;
|
|
206
|
+
|
|
207
|
+
// Set the abort callback (passing null will restore original abort functionality: printing a message to stdout)
|
|
208
|
+
GGML_API ggml_abort_callback_t ggml_set_abort_callback(ggml_abort_callback_t callback) {
|
|
209
|
+
ggml_abort_callback_t ret_val = g_abort_callback;
|
|
210
|
+
g_abort_callback = callback;
|
|
211
|
+
return ret_val;
|
|
212
|
+
}
|
|
213
|
+
|
|
205
214
|
void ggml_abort(const char * file, int line, const char * fmt, ...) {
|
|
206
215
|
fflush(stdout);
|
|
207
216
|
|
|
208
|
-
|
|
217
|
+
char message[2048];
|
|
218
|
+
int offset = snprintf(message, sizeof(message), "%s:%d: ", file, line);
|
|
209
219
|
|
|
210
220
|
va_list args;
|
|
211
221
|
va_start(args, fmt);
|
|
212
|
-
|
|
222
|
+
vsnprintf(message + offset, sizeof(message) - offset, fmt, args);
|
|
213
223
|
va_end(args);
|
|
214
224
|
|
|
215
|
-
|
|
225
|
+
if (g_abort_callback) {
|
|
226
|
+
g_abort_callback(message);
|
|
227
|
+
} else {
|
|
228
|
+
// default: print error and backtrace to stderr
|
|
229
|
+
fprintf(stderr, "%s\n", message);
|
|
230
|
+
ggml_print_backtrace();
|
|
231
|
+
}
|
|
216
232
|
|
|
217
|
-
ggml_print_backtrace();
|
|
218
233
|
abort();
|
|
219
234
|
}
|
|
220
235
|
|
|
@@ -458,6 +473,14 @@ bool ggml_guid_matches(ggml_guid_t guid_a, ggml_guid_t guid_b) {
|
|
|
458
473
|
return memcmp(guid_a, guid_b, sizeof(ggml_guid)) == 0;
|
|
459
474
|
}
|
|
460
475
|
|
|
476
|
+
const char * ggml_version(void) {
|
|
477
|
+
return GGML_VERSION;
|
|
478
|
+
}
|
|
479
|
+
|
|
480
|
+
const char * ggml_commit(void) {
|
|
481
|
+
return GGML_COMMIT;
|
|
482
|
+
}
|
|
483
|
+
|
|
461
484
|
//
|
|
462
485
|
// timing
|
|
463
486
|
//
|
|
@@ -945,6 +968,7 @@ static const char * GGML_OP_NAME[GGML_OP_COUNT] = {
|
|
|
945
968
|
"CONV_TRANSPOSE_1D",
|
|
946
969
|
"IM2COL",
|
|
947
970
|
"IM2COL_BACK",
|
|
971
|
+
"CONV_2D",
|
|
948
972
|
"CONV_2D_DW",
|
|
949
973
|
"CONV_TRANSPOSE_2D",
|
|
950
974
|
"POOL_1D",
|
|
@@ -982,9 +1006,11 @@ static const char * GGML_OP_NAME[GGML_OP_COUNT] = {
|
|
|
982
1006
|
"CROSS_ENTROPY_LOSS",
|
|
983
1007
|
"CROSS_ENTROPY_LOSS_BACK",
|
|
984
1008
|
"OPT_STEP_ADAMW",
|
|
1009
|
+
|
|
1010
|
+
"GLU",
|
|
985
1011
|
};
|
|
986
1012
|
|
|
987
|
-
static_assert(GGML_OP_COUNT ==
|
|
1013
|
+
static_assert(GGML_OP_COUNT == 86, "GGML_OP_COUNT != 86");
|
|
988
1014
|
|
|
989
1015
|
static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
|
|
990
1016
|
"none",
|
|
@@ -1042,6 +1068,7 @@ static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
|
|
|
1042
1068
|
"conv_transpose_1d(x)",
|
|
1043
1069
|
"im2col(x)",
|
|
1044
1070
|
"im2col_back(x)",
|
|
1071
|
+
"conv_2d(x)",
|
|
1045
1072
|
"conv_2d_dw(x)",
|
|
1046
1073
|
"conv_transpose_2d(x)",
|
|
1047
1074
|
"pool_1d(x)",
|
|
@@ -1079,9 +1106,11 @@ static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
|
|
|
1079
1106
|
"cross_entropy_loss(x,y)",
|
|
1080
1107
|
"cross_entropy_loss_back(x,y)",
|
|
1081
1108
|
"adamw(x)",
|
|
1109
|
+
|
|
1110
|
+
"glu(x)",
|
|
1082
1111
|
};
|
|
1083
1112
|
|
|
1084
|
-
static_assert(GGML_OP_COUNT ==
|
|
1113
|
+
static_assert(GGML_OP_COUNT == 86, "GGML_OP_COUNT != 86");
|
|
1085
1114
|
|
|
1086
1115
|
static_assert(GGML_OP_POOL_COUNT == 2, "GGML_OP_POOL_COUNT != 2");
|
|
1087
1116
|
|
|
@@ -1107,6 +1136,17 @@ static const char * GGML_UNARY_OP_NAME[GGML_UNARY_OP_COUNT] = {
|
|
|
1107
1136
|
static_assert(GGML_UNARY_OP_COUNT == 15, "GGML_UNARY_OP_COUNT != 15");
|
|
1108
1137
|
|
|
1109
1138
|
|
|
1139
|
+
static const char * GGML_GLU_OP_NAME[GGML_GLU_OP_COUNT] = {
|
|
1140
|
+
"REGLU",
|
|
1141
|
+
"GEGLU",
|
|
1142
|
+
"SWIGLU",
|
|
1143
|
+
"GEGLU_ERF",
|
|
1144
|
+
"GEGLU_QUICK",
|
|
1145
|
+
};
|
|
1146
|
+
|
|
1147
|
+
static_assert(GGML_GLU_OP_COUNT == 5, "GGML_GLU_OP_COUNT != 5");
|
|
1148
|
+
|
|
1149
|
+
|
|
1110
1150
|
static_assert(sizeof(struct ggml_object)%GGML_MEM_ALIGN == 0, "ggml_object size must be a multiple of GGML_MEM_ALIGN");
|
|
1111
1151
|
static_assert(sizeof(struct ggml_tensor)%GGML_MEM_ALIGN == 0, "ggml_tensor size must be a multiple of GGML_MEM_ALIGN");
|
|
1112
1152
|
|
|
@@ -1209,11 +1249,19 @@ const char * ggml_unary_op_name(enum ggml_unary_op op) {
|
|
|
1209
1249
|
return GGML_UNARY_OP_NAME[op];
|
|
1210
1250
|
}
|
|
1211
1251
|
|
|
1252
|
+
const char * ggml_glu_op_name(enum ggml_glu_op op) {
|
|
1253
|
+
return GGML_GLU_OP_NAME[op];
|
|
1254
|
+
}
|
|
1255
|
+
|
|
1212
1256
|
const char * ggml_op_desc(const struct ggml_tensor * t) {
|
|
1213
1257
|
if (t->op == GGML_OP_UNARY) {
|
|
1214
1258
|
enum ggml_unary_op uop = ggml_get_unary_op(t);
|
|
1215
1259
|
return ggml_unary_op_name(uop);
|
|
1216
1260
|
}
|
|
1261
|
+
if (t->op == GGML_OP_GLU) {
|
|
1262
|
+
enum ggml_glu_op gop = ggml_get_glu_op(t);
|
|
1263
|
+
return ggml_glu_op_name(gop);
|
|
1264
|
+
}
|
|
1217
1265
|
return ggml_op_name(t->op);
|
|
1218
1266
|
}
|
|
1219
1267
|
|
|
@@ -1730,6 +1778,11 @@ enum ggml_unary_op ggml_get_unary_op(const struct ggml_tensor * tensor) {
|
|
|
1730
1778
|
return (enum ggml_unary_op) ggml_get_op_params_i32(tensor, 0);
|
|
1731
1779
|
}
|
|
1732
1780
|
|
|
1781
|
+
enum ggml_glu_op ggml_get_glu_op(const struct ggml_tensor * tensor) {
|
|
1782
|
+
GGML_ASSERT(tensor->op == GGML_OP_GLU);
|
|
1783
|
+
return (enum ggml_glu_op) ggml_get_op_params_i32(tensor, 0);
|
|
1784
|
+
}
|
|
1785
|
+
|
|
1733
1786
|
const char * ggml_get_name(const struct ggml_tensor * tensor) {
|
|
1734
1787
|
return tensor->name;
|
|
1735
1788
|
}
|
|
@@ -2609,6 +2662,156 @@ struct ggml_tensor * ggml_exp_inplace(
|
|
|
2609
2662
|
return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_EXP);
|
|
2610
2663
|
}
|
|
2611
2664
|
|
|
2665
|
+
// ggml_glu
|
|
2666
|
+
|
|
2667
|
+
static struct ggml_tensor * ggml_glu_impl(
|
|
2668
|
+
struct ggml_context * ctx,
|
|
2669
|
+
struct ggml_tensor * a,
|
|
2670
|
+
struct ggml_tensor * b,
|
|
2671
|
+
enum ggml_glu_op op,
|
|
2672
|
+
bool swapped) {
|
|
2673
|
+
GGML_ASSERT(ggml_is_contiguous_1(a));
|
|
2674
|
+
|
|
2675
|
+
if (b) {
|
|
2676
|
+
GGML_ASSERT(ggml_is_contiguous_1(b));
|
|
2677
|
+
GGML_ASSERT(ggml_are_same_shape(a, b));
|
|
2678
|
+
GGML_ASSERT(a->type == b->type);
|
|
2679
|
+
}
|
|
2680
|
+
|
|
2681
|
+
int64_t ne[GGML_MAX_DIMS] = { a->ne[0] / 2 }; for (int i = 1; i < GGML_MAX_DIMS; i++) ne[i] = a->ne[i];
|
|
2682
|
+
struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, GGML_MAX_DIMS, b ? a->ne : ne, NULL, 0);
|
|
2683
|
+
|
|
2684
|
+
ggml_set_op_params_i32(result, 0, (int32_t) op);
|
|
2685
|
+
ggml_set_op_params_i32(result, 1, (int32_t) swapped);
|
|
2686
|
+
|
|
2687
|
+
result->op = GGML_OP_GLU;
|
|
2688
|
+
result->src[0] = a;
|
|
2689
|
+
result->src[1] = b;
|
|
2690
|
+
|
|
2691
|
+
return result;
|
|
2692
|
+
}
|
|
2693
|
+
|
|
2694
|
+
struct ggml_tensor * ggml_glu(
|
|
2695
|
+
struct ggml_context * ctx,
|
|
2696
|
+
struct ggml_tensor * a,
|
|
2697
|
+
enum ggml_glu_op op,
|
|
2698
|
+
bool swapped) {
|
|
2699
|
+
return ggml_glu_impl(ctx, a, NULL, op, swapped);
|
|
2700
|
+
}
|
|
2701
|
+
|
|
2702
|
+
struct ggml_tensor * ggml_glu_split(
|
|
2703
|
+
struct ggml_context * ctx,
|
|
2704
|
+
struct ggml_tensor * a,
|
|
2705
|
+
struct ggml_tensor * b,
|
|
2706
|
+
enum ggml_glu_op op) {
|
|
2707
|
+
return ggml_glu_impl(ctx, a, b, op, false);
|
|
2708
|
+
}
|
|
2709
|
+
|
|
2710
|
+
// ggml_reglu
|
|
2711
|
+
|
|
2712
|
+
struct ggml_tensor * ggml_reglu(
|
|
2713
|
+
struct ggml_context * ctx,
|
|
2714
|
+
struct ggml_tensor * a) {
|
|
2715
|
+
return ggml_glu_impl(ctx, a, NULL, GGML_GLU_OP_REGLU, false);
|
|
2716
|
+
}
|
|
2717
|
+
|
|
2718
|
+
struct ggml_tensor * ggml_reglu_swapped(
|
|
2719
|
+
struct ggml_context * ctx,
|
|
2720
|
+
struct ggml_tensor * a) {
|
|
2721
|
+
return ggml_glu_impl(ctx, a, NULL, GGML_GLU_OP_REGLU, true);
|
|
2722
|
+
}
|
|
2723
|
+
|
|
2724
|
+
struct ggml_tensor * ggml_reglu_split(
|
|
2725
|
+
struct ggml_context * ctx,
|
|
2726
|
+
struct ggml_tensor * a,
|
|
2727
|
+
struct ggml_tensor * b) {
|
|
2728
|
+
return ggml_glu_impl(ctx, a, b, GGML_GLU_OP_REGLU, false);
|
|
2729
|
+
}
|
|
2730
|
+
|
|
2731
|
+
// ggml_geglu
|
|
2732
|
+
|
|
2733
|
+
struct ggml_tensor * ggml_geglu(
|
|
2734
|
+
struct ggml_context * ctx,
|
|
2735
|
+
struct ggml_tensor * a) {
|
|
2736
|
+
return ggml_glu_impl(ctx, a, NULL, GGML_GLU_OP_GEGLU, false);
|
|
2737
|
+
}
|
|
2738
|
+
|
|
2739
|
+
struct ggml_tensor * ggml_geglu_swapped(
|
|
2740
|
+
struct ggml_context * ctx,
|
|
2741
|
+
struct ggml_tensor * a) {
|
|
2742
|
+
return ggml_glu_impl(ctx, a, NULL, GGML_GLU_OP_GEGLU, true);
|
|
2743
|
+
}
|
|
2744
|
+
|
|
2745
|
+
struct ggml_tensor * ggml_geglu_split(
|
|
2746
|
+
struct ggml_context * ctx,
|
|
2747
|
+
struct ggml_tensor * a,
|
|
2748
|
+
struct ggml_tensor * b) {
|
|
2749
|
+
return ggml_glu_impl(ctx, a, b, GGML_GLU_OP_GEGLU, false);
|
|
2750
|
+
}
|
|
2751
|
+
|
|
2752
|
+
// ggml_swiglu
|
|
2753
|
+
|
|
2754
|
+
struct ggml_tensor * ggml_swiglu(
|
|
2755
|
+
struct ggml_context * ctx,
|
|
2756
|
+
struct ggml_tensor * a) {
|
|
2757
|
+
return ggml_glu_impl(ctx, a, NULL, GGML_GLU_OP_SWIGLU, false);
|
|
2758
|
+
}
|
|
2759
|
+
|
|
2760
|
+
struct ggml_tensor * ggml_swiglu_swapped(
|
|
2761
|
+
struct ggml_context * ctx,
|
|
2762
|
+
struct ggml_tensor * a) {
|
|
2763
|
+
return ggml_glu_impl(ctx, a, NULL, GGML_GLU_OP_SWIGLU, true);
|
|
2764
|
+
}
|
|
2765
|
+
|
|
2766
|
+
struct ggml_tensor * ggml_swiglu_split(
|
|
2767
|
+
struct ggml_context * ctx,
|
|
2768
|
+
struct ggml_tensor * a,
|
|
2769
|
+
struct ggml_tensor * b) {
|
|
2770
|
+
return ggml_glu_impl(ctx, a, b, GGML_GLU_OP_SWIGLU, false);
|
|
2771
|
+
}
|
|
2772
|
+
|
|
2773
|
+
// ggml_geglu_erf
|
|
2774
|
+
|
|
2775
|
+
struct ggml_tensor * ggml_geglu_erf(
|
|
2776
|
+
struct ggml_context * ctx,
|
|
2777
|
+
struct ggml_tensor * a) {
|
|
2778
|
+
return ggml_glu_impl(ctx, a, NULL, GGML_GLU_OP_GEGLU_ERF, false);
|
|
2779
|
+
}
|
|
2780
|
+
|
|
2781
|
+
struct ggml_tensor * ggml_geglu_erf_swapped(
|
|
2782
|
+
struct ggml_context * ctx,
|
|
2783
|
+
struct ggml_tensor * a) {
|
|
2784
|
+
return ggml_glu_impl(ctx, a, NULL, GGML_GLU_OP_GEGLU_ERF, true);
|
|
2785
|
+
}
|
|
2786
|
+
|
|
2787
|
+
struct ggml_tensor * ggml_geglu_erf_split(
|
|
2788
|
+
struct ggml_context * ctx,
|
|
2789
|
+
struct ggml_tensor * a,
|
|
2790
|
+
struct ggml_tensor * b) {
|
|
2791
|
+
return ggml_glu_impl(ctx, a, b, GGML_GLU_OP_GEGLU_ERF, false);
|
|
2792
|
+
}
|
|
2793
|
+
|
|
2794
|
+
// ggml_geglu_quick
|
|
2795
|
+
|
|
2796
|
+
struct ggml_tensor * ggml_geglu_quick(
|
|
2797
|
+
struct ggml_context * ctx,
|
|
2798
|
+
struct ggml_tensor * a) {
|
|
2799
|
+
return ggml_glu_impl(ctx, a, NULL, GGML_GLU_OP_GEGLU_QUICK, false);
|
|
2800
|
+
}
|
|
2801
|
+
|
|
2802
|
+
struct ggml_tensor * ggml_geglu_quick_swapped(
|
|
2803
|
+
struct ggml_context * ctx,
|
|
2804
|
+
struct ggml_tensor * a) {
|
|
2805
|
+
return ggml_glu_impl(ctx, a, NULL, GGML_GLU_OP_GEGLU_QUICK, true);
|
|
2806
|
+
}
|
|
2807
|
+
|
|
2808
|
+
struct ggml_tensor * ggml_geglu_quick_split(
|
|
2809
|
+
struct ggml_context * ctx,
|
|
2810
|
+
struct ggml_tensor * a,
|
|
2811
|
+
struct ggml_tensor * b) {
|
|
2812
|
+
return ggml_glu_impl(ctx, a, b, GGML_GLU_OP_GEGLU_QUICK, false);
|
|
2813
|
+
}
|
|
2814
|
+
|
|
2612
2815
|
// ggml_norm
|
|
2613
2816
|
|
|
2614
2817
|
static struct ggml_tensor * ggml_norm_impl(
|
|
@@ -2866,12 +3069,14 @@ static struct ggml_tensor * ggml_scale_impl(
|
|
|
2866
3069
|
struct ggml_context * ctx,
|
|
2867
3070
|
struct ggml_tensor * a,
|
|
2868
3071
|
float s,
|
|
3072
|
+
float b,
|
|
2869
3073
|
bool inplace) {
|
|
2870
3074
|
GGML_ASSERT(ggml_is_padded_1d(a));
|
|
2871
3075
|
|
|
2872
3076
|
struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
|
2873
3077
|
|
|
2874
|
-
|
|
3078
|
+
float params[2] = { s, b };
|
|
3079
|
+
ggml_set_op_params(result, ¶ms, sizeof(params));
|
|
2875
3080
|
|
|
2876
3081
|
result->op = GGML_OP_SCALE;
|
|
2877
3082
|
result->src[0] = a;
|
|
@@ -2883,14 +3088,30 @@ struct ggml_tensor * ggml_scale(
|
|
|
2883
3088
|
struct ggml_context * ctx,
|
|
2884
3089
|
struct ggml_tensor * a,
|
|
2885
3090
|
float s) {
|
|
2886
|
-
return ggml_scale_impl(ctx, a, s, false);
|
|
3091
|
+
return ggml_scale_impl(ctx, a, s, 0.0, false);
|
|
2887
3092
|
}
|
|
2888
3093
|
|
|
2889
3094
|
struct ggml_tensor * ggml_scale_inplace(
|
|
2890
3095
|
struct ggml_context * ctx,
|
|
2891
3096
|
struct ggml_tensor * a,
|
|
2892
3097
|
float s) {
|
|
2893
|
-
return ggml_scale_impl(ctx, a, s, true);
|
|
3098
|
+
return ggml_scale_impl(ctx, a, s, 0.0, true);
|
|
3099
|
+
}
|
|
3100
|
+
|
|
3101
|
+
struct ggml_tensor * ggml_scale_bias(
|
|
3102
|
+
struct ggml_context * ctx,
|
|
3103
|
+
struct ggml_tensor * a,
|
|
3104
|
+
float s,
|
|
3105
|
+
float b) {
|
|
3106
|
+
return ggml_scale_impl(ctx, a, s, b, false);
|
|
3107
|
+
}
|
|
3108
|
+
|
|
3109
|
+
struct ggml_tensor * ggml_scale_bias_inplace(
|
|
3110
|
+
struct ggml_context * ctx,
|
|
3111
|
+
struct ggml_tensor * a,
|
|
3112
|
+
float s,
|
|
3113
|
+
float b) {
|
|
3114
|
+
return ggml_scale_impl(ctx, a, s, b, true);
|
|
2894
3115
|
}
|
|
2895
3116
|
|
|
2896
3117
|
// ggml_set
|
|
@@ -3515,9 +3736,10 @@ static struct ggml_tensor * ggml_soft_max_impl(
|
|
|
3515
3736
|
if (mask) {
|
|
3516
3737
|
GGML_ASSERT(mask->type == GGML_TYPE_F16 || mask->type == GGML_TYPE_F32);
|
|
3517
3738
|
GGML_ASSERT(ggml_is_contiguous(mask));
|
|
3518
|
-
GGML_ASSERT(ggml_is_matrix(mask));
|
|
3519
3739
|
GGML_ASSERT(mask->ne[0] == a->ne[0]);
|
|
3520
3740
|
GGML_ASSERT(mask->ne[1] >= a->ne[1]);
|
|
3741
|
+
GGML_ASSERT(a->ne[2]%mask->ne[2] == 0);
|
|
3742
|
+
GGML_ASSERT(a->ne[3]%mask->ne[3] == 0);
|
|
3521
3743
|
}
|
|
3522
3744
|
|
|
3523
3745
|
if (max_bias > 0.0f) {
|
|
@@ -4157,6 +4379,44 @@ struct ggml_tensor * ggml_conv_2d_dw_direct(
|
|
|
4157
4379
|
return result;
|
|
4158
4380
|
}
|
|
4159
4381
|
|
|
4382
|
+
// ggml_conv_2d_direct
|
|
4383
|
+
|
|
4384
|
+
struct ggml_tensor * ggml_conv_2d_direct(
|
|
4385
|
+
struct ggml_context * ctx,
|
|
4386
|
+
struct ggml_tensor * a, // convolution kernel [KW, KH, IC, OC]
|
|
4387
|
+
struct ggml_tensor * b, // input data [W, H, C, N]
|
|
4388
|
+
int s0, // stride dimension 0
|
|
4389
|
+
int s1, // stride dimension 1
|
|
4390
|
+
int p0, // padding dimension 0
|
|
4391
|
+
int p1, // padding dimension 1
|
|
4392
|
+
int d0, // dilation dimension 0
|
|
4393
|
+
int d1) {// dilation dimension 1
|
|
4394
|
+
|
|
4395
|
+
GGML_ASSERT(a->ne[2] == b->ne[2]);
|
|
4396
|
+
//GGML_ASSERT(a->type == b->type);
|
|
4397
|
+
|
|
4398
|
+
int64_t ne[4];
|
|
4399
|
+
ne[0] = ggml_calc_conv_output_size(b->ne[0], a->ne[0], s0, p0, d0);
|
|
4400
|
+
ne[1] = ggml_calc_conv_output_size(b->ne[1], a->ne[1], s1, p1, d1);
|
|
4401
|
+
ne[2] = a->ne[3];
|
|
4402
|
+
ne[3] = b->ne[3];
|
|
4403
|
+
|
|
4404
|
+
struct ggml_tensor * result = ggml_new_tensor(ctx, b->type, 4, ne);
|
|
4405
|
+
|
|
4406
|
+
ggml_set_op_params_i32(result, 0, s0);
|
|
4407
|
+
ggml_set_op_params_i32(result, 1, s1);
|
|
4408
|
+
ggml_set_op_params_i32(result, 2, p0);
|
|
4409
|
+
ggml_set_op_params_i32(result, 3, p1);
|
|
4410
|
+
ggml_set_op_params_i32(result, 4, d0);
|
|
4411
|
+
ggml_set_op_params_i32(result, 5, d1);
|
|
4412
|
+
|
|
4413
|
+
result->op = GGML_OP_CONV_2D;
|
|
4414
|
+
result->src[0] = a;
|
|
4415
|
+
result->src[1] = b;
|
|
4416
|
+
|
|
4417
|
+
return result;
|
|
4418
|
+
}
|
|
4419
|
+
|
|
4160
4420
|
// ggml_conv_transpose_2d_p0
|
|
4161
4421
|
|
|
4162
4422
|
static int64_t ggml_calc_conv_transpose_output_size(int64_t ins, int64_t ks, int s, int p) {
|
|
@@ -4273,24 +4533,21 @@ struct ggml_tensor * ggml_pool_2d_back(
|
|
|
4273
4533
|
return result;
|
|
4274
4534
|
}
|
|
4275
4535
|
|
|
4276
|
-
// ggml_upscale
|
|
4536
|
+
// ggml_upscale / ggml_interpolate
|
|
4277
4537
|
|
|
4278
|
-
static struct ggml_tensor *
|
|
4538
|
+
static struct ggml_tensor * ggml_interpolate_impl(
|
|
4279
4539
|
struct ggml_context * ctx,
|
|
4280
4540
|
struct ggml_tensor * a,
|
|
4281
|
-
|
|
4282
|
-
|
|
4283
|
-
|
|
4284
|
-
|
|
4285
|
-
|
|
4286
|
-
GGML_ASSERT(
|
|
4287
|
-
GGML_ASSERT(a->ne[1] <= ne1);
|
|
4288
|
-
GGML_ASSERT(a->ne[2] <= ne2);
|
|
4289
|
-
GGML_ASSERT(a->ne[3] <= ne3);
|
|
4541
|
+
int64_t ne0,
|
|
4542
|
+
int64_t ne1,
|
|
4543
|
+
int64_t ne2,
|
|
4544
|
+
int64_t ne3,
|
|
4545
|
+
uint32_t mode) {
|
|
4546
|
+
GGML_ASSERT((mode & 0xFF) < GGML_SCALE_MODE_COUNT);
|
|
4290
4547
|
|
|
4291
4548
|
struct ggml_tensor * result = ggml_new_tensor_4d(ctx, a->type, ne0, ne1, ne2, ne3);
|
|
4292
4549
|
|
|
4293
|
-
ggml_set_op_params_i32(result, 0, mode);
|
|
4550
|
+
ggml_set_op_params_i32(result, 0, (int32_t)mode);
|
|
4294
4551
|
|
|
4295
4552
|
result->op = GGML_OP_UPSCALE;
|
|
4296
4553
|
result->src[0] = a;
|
|
@@ -4303,7 +4560,8 @@ struct ggml_tensor * ggml_upscale(
|
|
|
4303
4560
|
struct ggml_tensor * a,
|
|
4304
4561
|
int scale_factor,
|
|
4305
4562
|
enum ggml_scale_mode mode) {
|
|
4306
|
-
|
|
4563
|
+
GGML_ASSERT(scale_factor > 1);
|
|
4564
|
+
return ggml_interpolate_impl(ctx, a, a->ne[0] * scale_factor, a->ne[1] * scale_factor, a->ne[2], a->ne[3], mode);
|
|
4307
4565
|
}
|
|
4308
4566
|
|
|
4309
4567
|
struct ggml_tensor * ggml_upscale_ext(
|
|
@@ -4314,7 +4572,18 @@ struct ggml_tensor * ggml_upscale_ext(
|
|
|
4314
4572
|
int ne2,
|
|
4315
4573
|
int ne3,
|
|
4316
4574
|
enum ggml_scale_mode mode) {
|
|
4317
|
-
return
|
|
4575
|
+
return ggml_interpolate_impl(ctx, a, ne0, ne1, ne2, ne3, mode);
|
|
4576
|
+
}
|
|
4577
|
+
|
|
4578
|
+
struct ggml_tensor * ggml_interpolate(
|
|
4579
|
+
struct ggml_context * ctx,
|
|
4580
|
+
struct ggml_tensor * a,
|
|
4581
|
+
int64_t ne0,
|
|
4582
|
+
int64_t ne1,
|
|
4583
|
+
int64_t ne2,
|
|
4584
|
+
int64_t ne3,
|
|
4585
|
+
uint32_t mode) {
|
|
4586
|
+
return ggml_interpolate_impl(ctx, a, ne0, ne1, ne2, ne3, mode);
|
|
4318
4587
|
}
|
|
4319
4588
|
|
|
4320
4589
|
// ggml_pad
|
|
@@ -4491,13 +4760,17 @@ struct ggml_tensor * ggml_flash_attn_ext(
|
|
|
4491
4760
|
GGML_ASSERT(ggml_can_mul_mat(k, q));
|
|
4492
4761
|
// TODO: check if vT can be multiplied by (k*qT)
|
|
4493
4762
|
|
|
4763
|
+
GGML_ASSERT(q->ne[3] == k->ne[3]);
|
|
4764
|
+
GGML_ASSERT(q->ne[3] == v->ne[3]);
|
|
4765
|
+
|
|
4494
4766
|
if (mask) {
|
|
4495
4767
|
GGML_ASSERT(ggml_is_contiguous(mask));
|
|
4496
|
-
GGML_ASSERT(mask->ne[2] == 1);
|
|
4497
|
-
GGML_ASSERT(mask->ne[3] == 1);
|
|
4498
4768
|
GGML_ASSERT(mask->ne[1] >= GGML_PAD(q->ne[1], GGML_KQ_MASK_PAD) &&
|
|
4499
4769
|
"the Flash-Attention kernel requires the mask to be padded to GGML_KQ_MASK_PAD and at least n_queries big");
|
|
4500
4770
|
//GGML_ASSERT(ggml_can_repeat_rows(mask, qk));
|
|
4771
|
+
|
|
4772
|
+
GGML_ASSERT(q->ne[2] % mask->ne[2] == 0);
|
|
4773
|
+
GGML_ASSERT(q->ne[3] % mask->ne[3] == 0);
|
|
4501
4774
|
}
|
|
4502
4775
|
|
|
4503
4776
|
if (max_bias > 0.0f) {
|
|
@@ -4625,7 +4898,6 @@ struct ggml_tensor * ggml_ssm_conv(
|
|
|
4625
4898
|
const int64_t n_s = sx->ne[2];
|
|
4626
4899
|
|
|
4627
4900
|
// TODO: maybe support other strides than 1?
|
|
4628
|
-
// FIXME: this is always true?
|
|
4629
4901
|
GGML_ASSERT(sx->ne[0] == d_conv - 1 + n_t);
|
|
4630
4902
|
GGML_ASSERT(sx->ne[1] == d_inner);
|
|
4631
4903
|
GGML_ASSERT(n_t >= 0);
|
|
@@ -4648,36 +4920,49 @@ struct ggml_tensor * ggml_ssm_scan(
|
|
|
4648
4920
|
struct ggml_tensor * dt,
|
|
4649
4921
|
struct ggml_tensor * A,
|
|
4650
4922
|
struct ggml_tensor * B,
|
|
4651
|
-
struct ggml_tensor * C
|
|
4923
|
+
struct ggml_tensor * C,
|
|
4924
|
+
struct ggml_tensor * ids) {
|
|
4652
4925
|
GGML_ASSERT(ggml_is_contiguous(s));
|
|
4653
|
-
GGML_ASSERT(ggml_is_contiguous(x));
|
|
4654
4926
|
GGML_ASSERT(ggml_is_contiguous(dt));
|
|
4655
4927
|
GGML_ASSERT(ggml_is_contiguous(A));
|
|
4656
|
-
GGML_ASSERT(
|
|
4657
|
-
GGML_ASSERT(ggml_is_3d(B));
|
|
4658
|
-
GGML_ASSERT(ggml_is_3d(s));
|
|
4928
|
+
GGML_ASSERT(x->nb[0] == ggml_type_size(x->type));
|
|
4659
4929
|
GGML_ASSERT(B->nb[0] == ggml_type_size(B->type));
|
|
4660
4930
|
GGML_ASSERT(C->nb[0] == ggml_type_size(C->type));
|
|
4661
|
-
GGML_ASSERT(
|
|
4931
|
+
GGML_ASSERT(x->nb[1] == x->ne[0]*x->nb[0]);
|
|
4932
|
+
GGML_ASSERT(B->nb[1] == B->ne[0]*B->nb[0]);
|
|
4933
|
+
GGML_ASSERT(C->nb[1] == C->ne[0]*C->nb[0]);
|
|
4662
4934
|
GGML_ASSERT(ggml_are_same_shape(B, C));
|
|
4935
|
+
GGML_ASSERT(ids->type == GGML_TYPE_I32);
|
|
4663
4936
|
|
|
4664
4937
|
{
|
|
4665
4938
|
const int64_t d_state = s->ne[0];
|
|
4666
|
-
const int64_t
|
|
4667
|
-
const int64_t
|
|
4668
|
-
const int64_t
|
|
4669
|
-
|
|
4670
|
-
|
|
4671
|
-
GGML_ASSERT(
|
|
4672
|
-
GGML_ASSERT(
|
|
4673
|
-
GGML_ASSERT(
|
|
4939
|
+
const int64_t head_dim = x->ne[0];
|
|
4940
|
+
const int64_t n_head = x->ne[1];
|
|
4941
|
+
const int64_t n_seq_tokens = x->ne[2];
|
|
4942
|
+
const int64_t n_seqs = x->ne[3];
|
|
4943
|
+
|
|
4944
|
+
GGML_ASSERT(dt->ne[0] == n_head);
|
|
4945
|
+
GGML_ASSERT(dt->ne[1] == n_seq_tokens);
|
|
4946
|
+
GGML_ASSERT(dt->ne[2] == n_seqs);
|
|
4947
|
+
GGML_ASSERT(ggml_is_3d(dt));
|
|
4948
|
+
GGML_ASSERT(s->ne[1] == head_dim);
|
|
4949
|
+
GGML_ASSERT(s->ne[2] == n_head);
|
|
4674
4950
|
GGML_ASSERT(B->ne[0] == d_state);
|
|
4675
|
-
GGML_ASSERT(B->ne[
|
|
4676
|
-
GGML_ASSERT(B->ne[
|
|
4951
|
+
GGML_ASSERT(B->ne[2] == n_seq_tokens);
|
|
4952
|
+
GGML_ASSERT(B->ne[3] == n_seqs);
|
|
4953
|
+
GGML_ASSERT(ids->ne[0] == n_seqs);
|
|
4954
|
+
GGML_ASSERT(ggml_is_vector(ids));
|
|
4955
|
+
GGML_ASSERT(A->ne[1] == n_head);
|
|
4956
|
+
GGML_ASSERT(ggml_is_matrix(A));
|
|
4957
|
+
|
|
4958
|
+
if (A->ne[0] != 1) {
|
|
4959
|
+
// Mamba-1 has more granular decay factors
|
|
4960
|
+
GGML_ASSERT(A->ne[0] == d_state);
|
|
4961
|
+
}
|
|
4677
4962
|
}
|
|
4678
4963
|
|
|
4679
4964
|
// concatenated y + ssm_states
|
|
4680
|
-
struct ggml_tensor * result = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, ggml_nelements(x) +
|
|
4965
|
+
struct ggml_tensor * result = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, ggml_nelements(x) + s->ne[0]*s->ne[1]*s->ne[2]*ids->ne[0]);
|
|
4681
4966
|
|
|
4682
4967
|
result->op = GGML_OP_SSM_SCAN;
|
|
4683
4968
|
result->src[0] = s;
|
|
@@ -4686,6 +4971,7 @@ struct ggml_tensor * ggml_ssm_scan(
|
|
|
4686
4971
|
result->src[3] = A;
|
|
4687
4972
|
result->src[4] = B;
|
|
4688
4973
|
result->src[5] = C;
|
|
4974
|
+
result->src[6] = ids;
|
|
4689
4975
|
|
|
4690
4976
|
return result;
|
|
4691
4977
|
}
|
|
@@ -5509,7 +5795,7 @@ static void ggml_compute_backward(
|
|
|
5509
5795
|
} break;
|
|
5510
5796
|
case GGML_OP_MEAN: {
|
|
5511
5797
|
if (src0_needs_grads) {
|
|
5512
|
-
ggml_add1_or_set(ctx, cgraph, isrc0, ggml_scale_impl(ctx, grad, 1.0f/src0->ne[0], false));
|
|
5798
|
+
ggml_add1_or_set(ctx, cgraph, isrc0, ggml_scale_impl(ctx, grad, 1.0f/src0->ne[0], 0.0, false));
|
|
5513
5799
|
}
|
|
5514
5800
|
} break;
|
|
5515
5801
|
case GGML_OP_REPEAT: {
|
|
@@ -5586,7 +5872,7 @@ static void ggml_compute_backward(
|
|
|
5586
5872
|
if (src0_needs_grads) {
|
|
5587
5873
|
float s;
|
|
5588
5874
|
memcpy(&s, tensor->op_params, sizeof(float));
|
|
5589
|
-
ggml_add_or_set(ctx, cgraph, isrc0, ggml_scale_impl(ctx, grad, s, false));
|
|
5875
|
+
ggml_add_or_set(ctx, cgraph, isrc0, ggml_scale_impl(ctx, grad, s, 0.0, false));
|
|
5590
5876
|
}
|
|
5591
5877
|
} break;
|
|
5592
5878
|
case GGML_OP_SET: {
|
|
@@ -5826,13 +6112,28 @@ static void ggml_compute_backward(
|
|
|
5826
6112
|
}
|
|
5827
6113
|
GGML_ASSERT(!src1_needs_grads && "backward pass for labels not implemented");
|
|
5828
6114
|
} break;
|
|
6115
|
+
case GGML_OP_GLU: {
|
|
6116
|
+
switch (ggml_get_glu_op(tensor)) {
|
|
6117
|
+
case GGML_GLU_OP_SWIGLU: {
|
|
6118
|
+
if (src0_needs_grads) {
|
|
6119
|
+
GGML_ASSERT(src1 && "backward pass only implemented for split swiglu");
|
|
6120
|
+
ggml_add_or_set(ctx, cgraph, isrc0, ggml_silu_back(ctx, ggml_mul(ctx, grad, src1), src0));
|
|
6121
|
+
}
|
|
6122
|
+
if (src1_needs_grads) {
|
|
6123
|
+
ggml_add_or_set(ctx, cgraph, isrc1, ggml_mul(ctx, ggml_silu(ctx, src0), grad));
|
|
6124
|
+
}
|
|
6125
|
+
} break;
|
|
6126
|
+
default: {
|
|
6127
|
+
GGML_ABORT("unsupported glu op for backward pass: %s", ggml_glu_op_name(ggml_get_glu_op(tensor)));
|
|
6128
|
+
} //break;
|
|
6129
|
+
}
|
|
6130
|
+
} break;
|
|
5829
6131
|
case GGML_OP_NONE: {
|
|
5830
6132
|
// noop
|
|
5831
6133
|
} break;
|
|
5832
6134
|
case GGML_OP_COUNT:
|
|
5833
6135
|
default: {
|
|
5834
|
-
|
|
5835
|
-
GGML_ABORT("fatal error");
|
|
6136
|
+
GGML_ABORT("%s: unsupported ggml op for backward pass: %s\n", __func__, ggml_op_name(tensor->op));
|
|
5836
6137
|
} //break;
|
|
5837
6138
|
}
|
|
5838
6139
|
|
|
@@ -5841,19 +6142,32 @@ static void ggml_compute_backward(
|
|
|
5841
6142
|
GGML_ASSERT(!src2_needs_grads || ggml_are_same_shape(src2, cgraph->grads[isrc2]));
|
|
5842
6143
|
}
|
|
5843
6144
|
|
|
5844
|
-
static
|
|
6145
|
+
static size_t ggml_visit_parents(struct ggml_cgraph * cgraph, struct ggml_tensor * node) {
|
|
5845
6146
|
// check if already visited
|
|
5846
|
-
|
|
5847
|
-
|
|
6147
|
+
size_t node_hash_pos = ggml_hash_find(&cgraph->visited_hash_set, node);
|
|
6148
|
+
GGML_ASSERT(node_hash_pos != GGML_HASHSET_FULL);
|
|
6149
|
+
if (!ggml_bitset_get(cgraph->visited_hash_set.used, node_hash_pos)) {
|
|
6150
|
+
// This is the first time we see this node in the current graph.
|
|
6151
|
+
cgraph->visited_hash_set.keys[node_hash_pos] = node;
|
|
6152
|
+
ggml_bitset_set(cgraph->visited_hash_set.used, node_hash_pos);
|
|
6153
|
+
cgraph->use_counts[node_hash_pos] = 0;
|
|
6154
|
+
} else {
|
|
6155
|
+
// already visited
|
|
6156
|
+
return node_hash_pos;
|
|
5848
6157
|
}
|
|
5849
6158
|
|
|
5850
6159
|
for (int i = 0; i < GGML_MAX_SRC; ++i) {
|
|
5851
6160
|
const int k =
|
|
5852
6161
|
(cgraph->order == GGML_CGRAPH_EVAL_ORDER_LEFT_TO_RIGHT) ? i :
|
|
5853
6162
|
(cgraph->order == GGML_CGRAPH_EVAL_ORDER_RIGHT_TO_LEFT) ? (GGML_MAX_SRC-1-i) :
|
|
5854
|
-
/* unknown order, just fall back to using i*/ i;
|
|
5855
|
-
|
|
5856
|
-
|
|
6163
|
+
/* unknown order, just fall back to using i */ i;
|
|
6164
|
+
|
|
6165
|
+
struct ggml_tensor * src = node->src[k];
|
|
6166
|
+
if (src) {
|
|
6167
|
+
size_t src_hash_pos = ggml_visit_parents(cgraph, src);
|
|
6168
|
+
|
|
6169
|
+
// Update the use count for this operand.
|
|
6170
|
+
cgraph->use_counts[src_hash_pos]++;
|
|
5857
6171
|
}
|
|
5858
6172
|
}
|
|
5859
6173
|
|
|
@@ -5877,6 +6191,8 @@ static void ggml_visit_parents(struct ggml_cgraph * cgraph, struct ggml_tensor *
|
|
|
5877
6191
|
cgraph->nodes[cgraph->n_nodes] = node;
|
|
5878
6192
|
cgraph->n_nodes++;
|
|
5879
6193
|
}
|
|
6194
|
+
|
|
6195
|
+
return node_hash_pos;
|
|
5880
6196
|
}
|
|
5881
6197
|
|
|
5882
6198
|
static void ggml_build_forward_impl(struct ggml_cgraph * cgraph, struct ggml_tensor * tensor, bool expand) {
|
|
@@ -6014,6 +6330,7 @@ static size_t ggml_graph_nbytes(size_t size, bool grads) {
|
|
|
6014
6330
|
incr_ptr_aligned(&p, sizeof(struct ggml_cgraph), 1);
|
|
6015
6331
|
incr_ptr_aligned(&p, size * sizeof(struct ggml_tensor *), sizeof(struct ggml_tensor *)); // nodes
|
|
6016
6332
|
incr_ptr_aligned(&p, size * sizeof(struct ggml_tensor *), sizeof(struct ggml_tensor *)); // leafs
|
|
6333
|
+
incr_ptr_aligned(&p, hash_size * sizeof(int32_t), sizeof(int32_t)); // use_counts
|
|
6017
6334
|
incr_ptr_aligned(&p, hash_size * sizeof(struct ggml_tensor *), sizeof(struct ggml_tensor *)); // hash keys
|
|
6018
6335
|
if (grads) {
|
|
6019
6336
|
incr_ptr_aligned(&p, hash_size * sizeof(struct ggml_tensor *), sizeof(struct ggml_tensor *)); // grads
|
|
@@ -6043,11 +6360,12 @@ struct ggml_cgraph * ggml_new_graph_custom(struct ggml_context * ctx, size_t siz
|
|
|
6043
6360
|
|
|
6044
6361
|
void * p = cgraph + 1;
|
|
6045
6362
|
|
|
6046
|
-
struct ggml_tensor ** nodes_ptr
|
|
6047
|
-
struct ggml_tensor ** leafs_ptr
|
|
6048
|
-
|
|
6049
|
-
struct ggml_tensor **
|
|
6050
|
-
struct ggml_tensor **
|
|
6363
|
+
struct ggml_tensor ** nodes_ptr = incr_ptr_aligned(&p, size * sizeof(struct ggml_tensor *), sizeof(struct ggml_tensor *));
|
|
6364
|
+
struct ggml_tensor ** leafs_ptr = incr_ptr_aligned(&p, size * sizeof(struct ggml_tensor *), sizeof(struct ggml_tensor *));
|
|
6365
|
+
int32_t * use_counts_ptr = incr_ptr_aligned(&p, hash_size * sizeof(int32_t), sizeof(int32_t));
|
|
6366
|
+
struct ggml_tensor ** hash_keys_ptr = incr_ptr_aligned(&p, hash_size * sizeof(struct ggml_tensor *), sizeof(struct ggml_tensor *));
|
|
6367
|
+
struct ggml_tensor ** grads_ptr = grads ? incr_ptr_aligned(&p, hash_size * sizeof(struct ggml_tensor *), sizeof(struct ggml_tensor *)) : NULL;
|
|
6368
|
+
struct ggml_tensor ** grad_accs_ptr = grads ? incr_ptr_aligned(&p, hash_size * sizeof(struct ggml_tensor *), sizeof(struct ggml_tensor *)) : NULL;
|
|
6051
6369
|
|
|
6052
6370
|
ggml_bitset_t * hash_used = incr_ptr_aligned(&p, ggml_bitset_size(hash_size) * sizeof(ggml_bitset_t), sizeof(ggml_bitset_t));
|
|
6053
6371
|
|
|
@@ -6062,6 +6380,7 @@ struct ggml_cgraph * ggml_new_graph_custom(struct ggml_context * ctx, size_t siz
|
|
|
6062
6380
|
/*.grads =*/ grads_ptr,
|
|
6063
6381
|
/*.grad_accs =*/ grad_accs_ptr,
|
|
6064
6382
|
/*.leafs =*/ leafs_ptr,
|
|
6383
|
+
/*.use_counts =*/ use_counts_ptr,
|
|
6065
6384
|
/*.hash_table =*/ { hash_size, hash_used, hash_keys_ptr },
|
|
6066
6385
|
/*.order =*/ GGML_CGRAPH_EVAL_ORDER_LEFT_TO_RIGHT,
|
|
6067
6386
|
};
|
|
@@ -6088,7 +6407,8 @@ struct ggml_cgraph ggml_graph_view(struct ggml_cgraph * cgraph0, int i0, int i1)
|
|
|
6088
6407
|
/*.grads =*/ NULL, // gradients would need visited_hash_set
|
|
6089
6408
|
/*.grad_accs =*/ NULL,
|
|
6090
6409
|
/*.leafs =*/ NULL,
|
|
6091
|
-
/*.
|
|
6410
|
+
/*.use_counts =*/ cgraph0->use_counts,
|
|
6411
|
+
/*.visited_hash_set =*/ cgraph0->visited_hash_set,
|
|
6092
6412
|
/*.order =*/ cgraph0->order,
|
|
6093
6413
|
};
|
|
6094
6414
|
|
|
@@ -6115,7 +6435,8 @@ void ggml_graph_cpy(struct ggml_cgraph * src, struct ggml_cgraph * dst) {
|
|
|
6115
6435
|
for (size_t i = 0; i < src->visited_hash_set.size; ++i) {
|
|
6116
6436
|
// copy all hashset keys (tensors) that are in use
|
|
6117
6437
|
if (ggml_bitset_get(src->visited_hash_set.used, i)) {
|
|
6118
|
-
ggml_hash_insert(&dst->visited_hash_set, src->visited_hash_set.keys[i]);
|
|
6438
|
+
size_t new_hash_pos = ggml_hash_insert(&dst->visited_hash_set, src->visited_hash_set.keys[i]);
|
|
6439
|
+
dst->use_counts[new_hash_pos] = src->use_counts[i];
|
|
6119
6440
|
}
|
|
6120
6441
|
}
|
|
6121
6442
|
|