@novastera-oss/llamarn 0.2.7 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (319) hide show
  1. package/android/src/main/cpp/include/llama.h +8 -3
  2. package/android/src/main/jniLibs/arm64-v8a/libggml-base.so +0 -0
  3. package/android/src/main/jniLibs/arm64-v8a/libggml-cpu.so +0 -0
  4. package/android/src/main/jniLibs/arm64-v8a/libggml.so +0 -0
  5. package/android/src/main/jniLibs/arm64-v8a/libllama.so +0 -0
  6. package/android/src/main/jniLibs/armeabi-v7a/libggml-base.so +0 -0
  7. package/android/src/main/jniLibs/armeabi-v7a/libggml-cpu.so +0 -0
  8. package/android/src/main/jniLibs/armeabi-v7a/libggml.so +0 -0
  9. package/android/src/main/jniLibs/armeabi-v7a/libllama.so +0 -0
  10. package/android/src/main/jniLibs/x86/libggml-base.so +0 -0
  11. package/android/src/main/jniLibs/x86/libggml-cpu.so +0 -0
  12. package/android/src/main/jniLibs/x86/libggml.so +0 -0
  13. package/android/src/main/jniLibs/x86/libllama.so +0 -0
  14. package/android/src/main/jniLibs/x86_64/libggml-base.so +0 -0
  15. package/android/src/main/jniLibs/x86_64/libggml-cpu.so +0 -0
  16. package/android/src/main/jniLibs/x86_64/libggml.so +0 -0
  17. package/android/src/main/jniLibs/x86_64/libllama.so +0 -0
  18. package/cpp/LlamaCppModel.cpp +56 -22
  19. package/cpp/build-info.cpp +2 -2
  20. package/cpp/llama.cpp/CMakeLists.txt +1 -2
  21. package/cpp/llama.cpp/README.md +4 -5
  22. package/cpp/llama.cpp/build-xcframework.sh +1 -1
  23. package/cpp/llama.cpp/common/CMakeLists.txt +4 -5
  24. package/cpp/llama.cpp/common/arg.cpp +24 -0
  25. package/cpp/llama.cpp/common/chat.cpp +37 -20
  26. package/cpp/llama.cpp/common/chat.h +2 -0
  27. package/cpp/llama.cpp/common/common.cpp +3 -0
  28. package/cpp/llama.cpp/common/common.h +5 -0
  29. package/cpp/llama.cpp/common/json-schema-to-grammar.cpp +3 -46
  30. package/cpp/llama.cpp/convert_hf_to_gguf.py +860 -23
  31. package/cpp/llama.cpp/convert_hf_to_gguf_update.py +9 -0
  32. package/cpp/llama.cpp/ggml/CMakeLists.txt +8 -2
  33. package/cpp/llama.cpp/ggml/include/ggml-backend.h +1 -1
  34. package/cpp/llama.cpp/ggml/include/ggml-cpu.h +2 -0
  35. package/cpp/llama.cpp/ggml/include/ggml.h +206 -10
  36. package/cpp/llama.cpp/ggml/src/CMakeLists.txt +17 -1
  37. package/cpp/llama.cpp/ggml/src/ggml-backend-reg.cpp +0 -8
  38. package/cpp/llama.cpp/ggml/src/ggml-backend.cpp +36 -18
  39. package/cpp/llama.cpp/ggml/src/ggml-cann/aclnn_ops.cpp +68 -5
  40. package/cpp/llama.cpp/ggml/src/ggml-cann/common.h +1 -1
  41. package/cpp/llama.cpp/ggml/src/ggml-cann/ggml-cann.cpp +16 -2
  42. package/cpp/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +37 -3
  43. package/cpp/llama.cpp/ggml/src/ggml-cpu/amx/mmq.cpp +10 -9
  44. package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/arm/quants.c +109 -108
  45. package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/arm/repack.cpp +1027 -1038
  46. package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/loongarch/quants.c +53 -52
  47. package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/powerpc/cpu-feats.cpp +82 -0
  48. package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/powerpc/quants.c +56 -55
  49. package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/riscv/quants.c +42 -41
  50. package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/riscv/repack.cpp +24 -23
  51. package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/s390/quants.c +29 -28
  52. package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/wasm/quants.c +30 -29
  53. package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/x86/quants.c +83 -82
  54. package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/x86/repack.cpp +20 -19
  55. package/cpp/llama.cpp/ggml/src/ggml-cpu/common.h +3 -2
  56. package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-impl.h +9 -3
  57. package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +111 -103
  58. package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +4 -0
  59. package/cpp/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm.cpp +3 -2
  60. package/cpp/llama.cpp/ggml/src/ggml-cpu/ops.cpp +1405 -240
  61. package/cpp/llama.cpp/ggml/src/ggml-cpu/ops.h +8 -0
  62. package/cpp/llama.cpp/ggml/src/ggml-cpu/quants.c +25 -24
  63. package/cpp/llama.cpp/ggml/src/ggml-cpu/repack.cpp +56 -40
  64. package/cpp/llama.cpp/ggml/src/ggml-cpu/simd-mappings.h +212 -34
  65. package/cpp/llama.cpp/ggml/src/ggml-cpu/vec.cpp +35 -11
  66. package/cpp/llama.cpp/ggml/src/ggml-cpu/vec.h +187 -54
  67. package/cpp/llama.cpp/ggml/src/ggml-cuda/common.cuh +71 -29
  68. package/cpp/llama.cpp/ggml/src/ggml-cuda/conv2d-dw.cu +161 -0
  69. package/cpp/llama.cpp/ggml/src/ggml-cuda/conv2d-dw.cuh +5 -0
  70. package/cpp/llama.cpp/ggml/src/ggml-cuda/conv2d-transpose.cu +91 -0
  71. package/cpp/llama.cpp/ggml/src/ggml-cuda/conv2d-transpose.cuh +4 -0
  72. package/cpp/llama.cpp/ggml/src/ggml-cuda/convert.cu +22 -0
  73. package/cpp/llama.cpp/ggml/src/ggml-cuda/convert.cuh +5 -0
  74. package/cpp/llama.cpp/ggml/src/ggml-cuda/cross-entropy-loss.cu +2 -14
  75. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-common.cuh +4 -1
  76. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-mma-f16.cuh +8 -4
  77. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-tile-f16.cu +6 -4
  78. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-tile-f32.cu +14 -12
  79. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-vec-f16.cuh +5 -3
  80. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-vec-f32.cuh +15 -10
  81. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-wmma-f16.cu +12 -6
  82. package/cpp/llama.cpp/ggml/src/ggml-cuda/getrows.cu +8 -0
  83. package/cpp/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu +269 -110
  84. package/cpp/llama.cpp/ggml/src/ggml-cuda/mean.cu +19 -0
  85. package/cpp/llama.cpp/ggml/src/ggml-cuda/mean.cuh +3 -0
  86. package/cpp/llama.cpp/ggml/src/ggml-cuda/mmq.cuh +2 -8
  87. package/cpp/llama.cpp/ggml/src/ggml-cuda/mmv.cu +257 -87
  88. package/cpp/llama.cpp/ggml/src/ggml-cuda/mmv.cuh +2 -3
  89. package/cpp/llama.cpp/ggml/src/ggml-cuda/rope.cu +21 -27
  90. package/cpp/llama.cpp/ggml/src/ggml-cuda/scale.cu +8 -6
  91. package/cpp/llama.cpp/ggml/src/ggml-cuda/softmax.cu +119 -58
  92. package/cpp/llama.cpp/ggml/src/ggml-cuda/ssm-conv.cu +10 -2
  93. package/cpp/llama.cpp/ggml/src/ggml-cuda/ssm-scan.cu +192 -52
  94. package/cpp/llama.cpp/ggml/src/ggml-cuda/sumrows.cu +5 -18
  95. package/cpp/llama.cpp/ggml/src/ggml-cuda/sumrows.cuh +0 -1
  96. package/cpp/llama.cpp/ggml/src/ggml-cuda/unary.cu +97 -0
  97. package/cpp/llama.cpp/ggml/src/ggml-cuda/unary.cuh +11 -0
  98. package/cpp/llama.cpp/ggml/src/ggml-cuda/upscale.cu +92 -6
  99. package/cpp/llama.cpp/ggml/src/ggml-cuda/vendors/hip.h +14 -5
  100. package/cpp/llama.cpp/ggml/src/ggml-impl.h +125 -183
  101. package/cpp/llama.cpp/ggml/src/ggml-metal/CMakeLists.txt +4 -2
  102. package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal-impl.h +51 -9
  103. package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal.m +394 -80
  104. package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal.metal +616 -239
  105. package/cpp/llama.cpp/ggml/src/ggml-musa/mudnn.cuh +2 -2
  106. package/cpp/llama.cpp/ggml/src/ggml-opencl/CMakeLists.txt +3 -0
  107. package/cpp/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +741 -571
  108. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/gelu.cl +27 -0
  109. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/glu.cl +337 -0
  110. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/mul_mat_f16_f32.cl +130 -0
  111. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/scale.cl +3 -2
  112. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/set_rows.cl +95 -0
  113. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/softmax_4_f16.cl +24 -11
  114. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/softmax_4_f32.cl +24 -11
  115. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/softmax_f16.cl +24 -11
  116. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/softmax_f32.cl +24 -11
  117. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/upscale.cl +2 -3
  118. package/cpp/llama.cpp/ggml/src/ggml-quants.c +6 -6
  119. package/cpp/llama.cpp/ggml/src/ggml-sycl/backend.hpp +1 -0
  120. package/cpp/llama.cpp/ggml/src/ggml-sycl/binbcast.cpp +5 -6
  121. package/cpp/llama.cpp/ggml/src/ggml-sycl/common.hpp +1 -24
  122. package/cpp/llama.cpp/ggml/src/ggml-sycl/concat.cpp +28 -41
  123. package/cpp/llama.cpp/ggml/src/ggml-sycl/conv.cpp +4 -10
  124. package/cpp/llama.cpp/ggml/src/ggml-sycl/convert.cpp +99 -166
  125. package/cpp/llama.cpp/ggml/src/ggml-sycl/cpy.cpp +94 -72
  126. package/cpp/llama.cpp/ggml/src/ggml-sycl/dmmv.cpp +49 -67
  127. package/cpp/llama.cpp/ggml/src/ggml-sycl/dpct/helper.hpp +31 -1
  128. package/cpp/llama.cpp/ggml/src/ggml-sycl/element_wise.cpp +697 -1098
  129. package/cpp/llama.cpp/ggml/src/ggml-sycl/element_wise.hpp +18 -9
  130. package/cpp/llama.cpp/ggml/src/ggml-sycl/getrows.cpp +6 -9
  131. package/cpp/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +104 -62
  132. package/cpp/llama.cpp/ggml/src/ggml-sycl/gla.cpp +2 -2
  133. package/cpp/llama.cpp/ggml/src/ggml-sycl/im2col.cpp +1 -1
  134. package/cpp/llama.cpp/ggml/src/ggml-sycl/mmq.cpp +60 -80
  135. package/cpp/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +132 -201
  136. package/cpp/llama.cpp/ggml/src/ggml-sycl/norm.cpp +55 -74
  137. package/cpp/llama.cpp/ggml/src/ggml-sycl/rope.cpp +39 -38
  138. package/cpp/llama.cpp/ggml/src/ggml-sycl/set_rows.cpp +131 -0
  139. package/cpp/llama.cpp/ggml/src/ggml-sycl/set_rows.hpp +8 -0
  140. package/cpp/llama.cpp/ggml/src/ggml-sycl/softmax.cpp +3 -3
  141. package/cpp/llama.cpp/ggml/src/ggml-sycl/sycl_hw.cpp +3 -1
  142. package/cpp/llama.cpp/ggml/src/ggml-sycl/sycl_hw.hpp +3 -0
  143. package/cpp/llama.cpp/ggml/src/ggml-sycl/tsembd.cpp +3 -8
  144. package/cpp/llama.cpp/ggml/src/ggml-sycl/wkv.cpp +12 -16
  145. package/cpp/llama.cpp/ggml/src/ggml-vulkan/CMakeLists.txt +12 -1
  146. package/cpp/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +767 -292
  147. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/CMakeLists.txt +4 -0
  148. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/copy_to_quant.comp +58 -7
  149. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn.comp +28 -23
  150. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_base.comp +14 -9
  151. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm1.comp +38 -32
  152. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm2.comp +32 -27
  153. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_split_k_reduce.comp +44 -12
  154. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/geglu.comp +13 -0
  155. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/geglu_erf.comp +27 -0
  156. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/geglu_quick.comp +11 -0
  157. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/gelu_erf.comp +39 -0
  158. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/glu_head.comp +15 -0
  159. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/glu_main.comp +29 -0
  160. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm.comp +128 -72
  161. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm_cm2.comp +38 -9
  162. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/reglu.comp +9 -0
  163. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/rms_norm.comp +12 -3
  164. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/roll.comp +46 -0
  165. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/rope_multi.comp +7 -9
  166. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/rope_neox.comp +7 -9
  167. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/rope_norm.comp +7 -9
  168. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/scale.comp +1 -1
  169. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/soft_max.comp +20 -4
  170. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/swiglu.comp +9 -0
  171. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/upscale.comp +69 -5
  172. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +23 -3
  173. package/cpp/llama.cpp/ggml/src/ggml.c +449 -72
  174. package/cpp/llama.cpp/ggml/src/gguf.cpp +13 -2
  175. package/cpp/llama.cpp/gguf-py/gguf/constants.py +285 -0
  176. package/cpp/llama.cpp/gguf-py/gguf/gguf_writer.py +27 -0
  177. package/cpp/llama.cpp/gguf-py/gguf/tensor_mapping.py +137 -21
  178. package/cpp/llama.cpp/gguf-py/gguf/vocab.py +109 -7
  179. package/cpp/llama.cpp/gguf-py/pyproject.toml +2 -2
  180. package/cpp/llama.cpp/include/llama.h +8 -43
  181. package/cpp/llama.cpp/models/templates/Mistral-Small-3.2-24B-Instruct-2506.jinja +124 -0
  182. package/cpp/llama.cpp/src/llama-arch.cpp +265 -3
  183. package/cpp/llama.cpp/src/llama-arch.h +36 -1
  184. package/cpp/llama.cpp/src/llama-batch.cpp +596 -359
  185. package/cpp/llama.cpp/src/llama-batch.h +105 -70
  186. package/cpp/llama.cpp/src/llama-chat.cpp +26 -6
  187. package/cpp/llama.cpp/src/llama-chat.h +1 -0
  188. package/cpp/llama.cpp/src/llama-context.cpp +101 -107
  189. package/cpp/llama.cpp/src/llama-context.h +13 -13
  190. package/cpp/llama.cpp/src/llama-graph.cpp +286 -404
  191. package/cpp/llama.cpp/src/llama-graph.h +78 -79
  192. package/cpp/llama.cpp/src/llama-hparams.cpp +11 -1
  193. package/cpp/llama.cpp/src/llama-hparams.h +11 -0
  194. package/cpp/llama.cpp/src/llama-kv-cache-unified-iswa.cpp +74 -66
  195. package/cpp/llama.cpp/src/llama-kv-cache-unified-iswa.h +23 -26
  196. package/cpp/llama.cpp/src/llama-kv-cache-unified.cpp +312 -157
  197. package/cpp/llama.cpp/src/llama-kv-cache-unified.h +79 -46
  198. package/cpp/llama.cpp/src/llama-kv-cells.h +97 -21
  199. package/cpp/llama.cpp/src/llama-memory-hybrid.cpp +73 -69
  200. package/cpp/llama.cpp/src/llama-memory-hybrid.h +19 -22
  201. package/cpp/llama.cpp/src/llama-memory-recurrent.cpp +88 -77
  202. package/cpp/llama.cpp/src/llama-memory-recurrent.h +15 -20
  203. package/cpp/llama.cpp/src/llama-memory.cpp +17 -0
  204. package/cpp/llama.cpp/src/llama-memory.h +21 -22
  205. package/cpp/llama.cpp/src/llama-model-saver.cpp +1 -0
  206. package/cpp/llama.cpp/src/llama-model.cpp +5301 -2922
  207. package/cpp/llama.cpp/src/llama-model.h +40 -0
  208. package/cpp/llama.cpp/src/llama-quant.cpp +88 -5
  209. package/cpp/llama.cpp/src/llama-vocab.cpp +37 -3
  210. package/cpp/llama.cpp/src/llama-vocab.h +42 -0
  211. package/cpp/rn-utils.h +3 -0
  212. package/ios/include/chat.h +2 -0
  213. package/ios/include/common.h +5 -0
  214. package/ios/include/llama.h +8 -43
  215. package/ios/libs/llama.xcframework/Info.plist +19 -19
  216. package/ios/libs/llama.xcframework/ios-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  217. package/ios/libs/llama.xcframework/ios-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +5059 -4863
  218. package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/ggml-backend.h +1 -1
  219. package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/ggml-cpu.h +2 -0
  220. package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/ggml.h +206 -10
  221. package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/llama.h +8 -43
  222. package/ios/libs/llama.xcframework/ios-arm64/llama.framework/llama +0 -0
  223. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  224. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +5030 -4834
  225. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3889 -3742
  226. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/ggml-backend.h +1 -1
  227. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/ggml-cpu.h +2 -0
  228. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/ggml.h +206 -10
  229. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/llama.h +8 -43
  230. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/llama +0 -0
  231. package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  232. package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +5030 -4834
  233. package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3891 -3744
  234. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/ggml-backend.h +1 -1
  235. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/ggml-cpu.h +2 -0
  236. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/ggml.h +206 -10
  237. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/llama.h +8 -43
  238. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/ggml-backend.h +1 -1
  239. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/ggml-cpu.h +2 -0
  240. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/ggml.h +206 -10
  241. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/llama.h +8 -43
  242. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/llama +0 -0
  243. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/ggml-backend.h +1 -1
  244. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/ggml-cpu.h +2 -0
  245. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/ggml.h +206 -10
  246. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/llama.h +8 -43
  247. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/llama +0 -0
  248. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/llama +0 -0
  249. package/ios/libs/llama.xcframework/tvos-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  250. package/ios/libs/llama.xcframework/tvos-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +5059 -4863
  251. package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/ggml-backend.h +1 -1
  252. package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/ggml-cpu.h +2 -0
  253. package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/ggml.h +206 -10
  254. package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/llama.h +8 -43
  255. package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/llama +0 -0
  256. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  257. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +5030 -4834
  258. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3889 -3742
  259. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/ggml-backend.h +1 -1
  260. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/ggml-cpu.h +2 -0
  261. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/ggml.h +206 -10
  262. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/llama.h +8 -43
  263. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/llama +0 -0
  264. package/ios/libs/llama.xcframework/xros-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  265. package/ios/libs/llama.xcframework/xros-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +5095 -4900
  266. package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/ggml-backend.h +1 -1
  267. package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/ggml-cpu.h +2 -0
  268. package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/ggml.h +206 -10
  269. package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/llama.h +8 -43
  270. package/ios/libs/llama.xcframework/xros-arm64/llama.framework/llama +0 -0
  271. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  272. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +5066 -4871
  273. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3919 -3773
  274. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/ggml-backend.h +1 -1
  275. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/ggml-cpu.h +2 -0
  276. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/ggml.h +206 -10
  277. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/llama.h +8 -43
  278. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/llama +0 -0
  279. package/package.json +1 -1
  280. package/cpp/llama.cpp/ggml/include/ggml-kompute.h +0 -50
  281. package/cpp/llama.cpp/ggml/src/ggml-kompute/CMakeLists.txt +0 -166
  282. package/cpp/llama.cpp/ggml/src/ggml-kompute/ggml-kompute.cpp +0 -2251
  283. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/common.comp +0 -112
  284. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_add.comp +0 -58
  285. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_addrow.comp +0 -25
  286. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f16_f16.comp +0 -52
  287. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f16_f32.comp +0 -52
  288. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f32_f16.comp +0 -52
  289. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f32_f32.comp +0 -52
  290. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_diagmask.comp +0 -30
  291. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_gelu.comp +0 -22
  292. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_getrows.comp +0 -17
  293. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_getrows_f16.comp +0 -31
  294. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_getrows_f32.comp +0 -31
  295. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_getrows_q4_0.comp +0 -38
  296. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_getrows_q4_1.comp +0 -39
  297. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_getrows_q6_k.comp +0 -44
  298. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul.comp +0 -52
  299. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_f16.comp +0 -69
  300. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_mat_f32.comp +0 -51
  301. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q4_0.comp +0 -33
  302. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q4_1.comp +0 -35
  303. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q4_k.comp +0 -140
  304. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q6_k.comp +0 -106
  305. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q8_0.comp +0 -73
  306. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mv_q_n.comp +0 -52
  307. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mv_q_n_pre.comp +0 -28
  308. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_norm.comp +0 -84
  309. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_relu.comp +0 -21
  310. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_rmsnorm.comp +0 -53
  311. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_rope_neox_f16.comp +0 -52
  312. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_rope_neox_f32.comp +0 -52
  313. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_rope_norm_f16.comp +0 -52
  314. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_rope_norm_f32.comp +0 -52
  315. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_scale.comp +0 -19
  316. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_scale_8.comp +0 -23
  317. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_silu.comp +0 -22
  318. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_softmax.comp +0 -72
  319. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/rope_common.comp +0 -71
@@ -339,7 +339,7 @@ extern "C" {
339
339
  typedef bool (*ggml_backend_eval_callback)(int node_index, struct ggml_tensor * t1, struct ggml_tensor * t2, void * user_data);
340
340
 
341
341
  // Compare the output of two backends
342
- GGML_API bool ggml_backend_compare_graph_backend(ggml_backend_t backend1, ggml_backend_t backend2, struct ggml_cgraph * graph, ggml_backend_eval_callback callback, void * user_data);
342
+ GGML_API bool ggml_backend_compare_graph_backend(ggml_backend_t backend1, ggml_backend_t backend2, struct ggml_cgraph * graph, ggml_backend_eval_callback callback, void * user_data, struct ggml_tensor * test_node);
343
343
 
344
344
  // Tensor initialization
345
345
  GGML_API enum ggml_status ggml_backend_tensor_alloc(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, void * addr);
@@ -101,6 +101,7 @@ extern "C" {
101
101
  GGML_BACKEND_API int ggml_cpu_has_riscv_v (void);
102
102
  GGML_BACKEND_API int ggml_cpu_has_vsx (void);
103
103
  GGML_BACKEND_API int ggml_cpu_has_vxe (void);
104
+ GGML_BACKEND_API int ggml_cpu_has_nnpa (void);
104
105
  GGML_BACKEND_API int ggml_cpu_has_wasm_simd (void);
105
106
  GGML_BACKEND_API int ggml_cpu_has_llamafile (void);
106
107
 
@@ -133,6 +134,7 @@ extern "C" {
133
134
 
134
135
  GGML_BACKEND_API ggml_backend_reg_t ggml_backend_cpu_reg(void);
135
136
 
137
+ GGML_BACKEND_API void ggml_cpu_fp32_to_fp32(const float *, float *, int64_t);
136
138
  GGML_BACKEND_API void ggml_cpu_fp32_to_fp16(const float *, ggml_fp16_t *, int64_t);
137
139
  GGML_BACKEND_API void ggml_cpu_fp16_to_fp32(const ggml_fp16_t *, float *, int64_t);
138
140
  GGML_BACKEND_API void ggml_cpu_fp32_to_bf16(const float *, ggml_bf16_t *, int64_t);
@@ -314,6 +314,13 @@
314
314
  extern "C" {
315
315
  #endif
316
316
 
317
+ // Function type used in fatal error callbacks
318
+ typedef void (*ggml_abort_callback_t)(const char * error_message);
319
+
320
+ // Set the abort callback (passing null will restore original abort functionality: printing a message to stdout)
321
+ // Returns the old callback for chaining
322
+ GGML_API ggml_abort_callback_t ggml_set_abort_callback(ggml_abort_callback_t callback);
323
+
317
324
  GGML_NORETURN GGML_ATTRIBUTE_FORMAT(3, 4)
318
325
  GGML_API void ggml_abort(const char * file, int line, const char * fmt, ...);
319
326
 
@@ -470,6 +477,7 @@ extern "C" {
470
477
  GGML_OP_TRANSPOSE,
471
478
  GGML_OP_GET_ROWS,
472
479
  GGML_OP_GET_ROWS_BACK,
480
+ GGML_OP_SET_ROWS,
473
481
  GGML_OP_DIAG,
474
482
  GGML_OP_DIAG_MASK_INF,
475
483
  GGML_OP_DIAG_MASK_ZERO,
@@ -481,14 +489,16 @@ extern "C" {
481
489
  GGML_OP_CONV_TRANSPOSE_1D,
482
490
  GGML_OP_IM2COL,
483
491
  GGML_OP_IM2COL_BACK,
492
+ GGML_OP_CONV_2D,
484
493
  GGML_OP_CONV_2D_DW,
485
494
  GGML_OP_CONV_TRANSPOSE_2D,
486
495
  GGML_OP_POOL_1D,
487
496
  GGML_OP_POOL_2D,
488
497
  GGML_OP_POOL_2D_BACK,
489
- GGML_OP_UPSCALE, // nearest interpolate
498
+ GGML_OP_UPSCALE,
490
499
  GGML_OP_PAD,
491
500
  GGML_OP_PAD_REFLECT_1D,
501
+ GGML_OP_ROLL,
492
502
  GGML_OP_ARANGE,
493
503
  GGML_OP_TIMESTEP_EMBEDDING,
494
504
  GGML_OP_ARGSORT,
@@ -518,6 +528,8 @@ extern "C" {
518
528
  GGML_OP_CROSS_ENTROPY_LOSS_BACK,
519
529
  GGML_OP_OPT_STEP_ADAMW,
520
530
 
531
+ GGML_OP_GLU,
532
+
521
533
  GGML_OP_COUNT,
522
534
  };
523
535
 
@@ -541,6 +553,16 @@ extern "C" {
541
553
  GGML_UNARY_OP_COUNT,
542
554
  };
543
555
 
556
+ enum ggml_glu_op {
557
+ GGML_GLU_OP_REGLU,
558
+ GGML_GLU_OP_GEGLU,
559
+ GGML_GLU_OP_SWIGLU,
560
+ GGML_GLU_OP_GEGLU_ERF,
561
+ GGML_GLU_OP_GEGLU_QUICK,
562
+
563
+ GGML_GLU_OP_COUNT,
564
+ };
565
+
544
566
  enum ggml_object_type {
545
567
  GGML_OBJECT_TYPE_TENSOR,
546
568
  GGML_OBJECT_TYPE_GRAPH,
@@ -626,6 +648,9 @@ extern "C" {
626
648
 
627
649
  // misc
628
650
 
651
+ GGML_API const char * ggml_version(void);
652
+ GGML_API const char * ggml_commit(void);
653
+
629
654
  GGML_API void ggml_time_init(void); // call this once at the beginning of the program
630
655
  GGML_API int64_t ggml_time_ms(void);
631
656
  GGML_API int64_t ggml_time_us(void);
@@ -656,6 +681,7 @@ extern "C" {
656
681
  GGML_API const char * ggml_op_symbol(enum ggml_op op);
657
682
 
658
683
  GGML_API const char * ggml_unary_op_name(enum ggml_unary_op op);
684
+ GGML_API const char * ggml_glu_op_name(enum ggml_glu_op op);
659
685
  GGML_API const char * ggml_op_desc(const struct ggml_tensor * t); // unary or op name
660
686
 
661
687
  GGML_API size_t ggml_element_size(const struct ggml_tensor * tensor);
@@ -686,6 +712,9 @@ extern "C" {
686
712
  // true for tensor that is stored in memory as CxWxHxN and has been permuted to WxHxCxN
687
713
  GGML_API bool ggml_is_contiguous_channels(const struct ggml_tensor * tensor);
688
714
 
715
+ // true if the elements in dimension 0 are contiguous, or there is just 1 block of elements
716
+ GGML_API bool ggml_is_contiguous_rows(const struct ggml_tensor * tensor);
717
+
689
718
  GGML_API bool ggml_are_same_shape (const struct ggml_tensor * t0, const struct ggml_tensor * t1);
690
719
  GGML_API bool ggml_are_same_stride(const struct ggml_tensor * t0, const struct ggml_tensor * t1);
691
720
 
@@ -757,6 +786,7 @@ extern "C" {
757
786
  GGML_API void ggml_unravel_index(const struct ggml_tensor * tensor, int64_t i, int64_t * i0, int64_t * i1, int64_t * i2, int64_t * i3);
758
787
 
759
788
  GGML_API enum ggml_unary_op ggml_get_unary_op(const struct ggml_tensor * tensor);
789
+ GGML_API enum ggml_glu_op ggml_get_glu_op(const struct ggml_tensor * tensor);
760
790
 
761
791
  GGML_API void * ggml_get_data (const struct ggml_tensor * tensor);
762
792
  GGML_API float * ggml_get_data_f32(const struct ggml_tensor * tensor);
@@ -1085,6 +1115,89 @@ extern "C" {
1085
1115
  struct ggml_context * ctx,
1086
1116
  struct ggml_tensor * a);
1087
1117
 
1118
+ // gated linear unit ops
1119
+ // A: n columns, r rows,
1120
+ // result is n / 2 columns, r rows,
1121
+ // expects gate in second half of row, unless swapped is true
1122
+ GGML_API struct ggml_tensor * ggml_glu(
1123
+ struct ggml_context * ctx,
1124
+ struct ggml_tensor * a,
1125
+ enum ggml_glu_op op,
1126
+ bool swapped);
1127
+
1128
+ GGML_API struct ggml_tensor * ggml_reglu(
1129
+ struct ggml_context * ctx,
1130
+ struct ggml_tensor * a);
1131
+
1132
+ GGML_API struct ggml_tensor * ggml_reglu_swapped(
1133
+ struct ggml_context * ctx,
1134
+ struct ggml_tensor * a);
1135
+
1136
+ GGML_API struct ggml_tensor * ggml_geglu(
1137
+ struct ggml_context * ctx,
1138
+ struct ggml_tensor * a);
1139
+
1140
+ GGML_API struct ggml_tensor * ggml_geglu_swapped(
1141
+ struct ggml_context * ctx,
1142
+ struct ggml_tensor * a);
1143
+
1144
+ GGML_API struct ggml_tensor * ggml_swiglu(
1145
+ struct ggml_context * ctx,
1146
+ struct ggml_tensor * a);
1147
+
1148
+ GGML_API struct ggml_tensor * ggml_swiglu_swapped(
1149
+ struct ggml_context * ctx,
1150
+ struct ggml_tensor * a);
1151
+
1152
+ GGML_API struct ggml_tensor * ggml_geglu_erf(
1153
+ struct ggml_context * ctx,
1154
+ struct ggml_tensor * a);
1155
+
1156
+ GGML_API struct ggml_tensor * ggml_geglu_erf_swapped(
1157
+ struct ggml_context * ctx,
1158
+ struct ggml_tensor * a);
1159
+
1160
+ GGML_API struct ggml_tensor * ggml_geglu_quick(
1161
+ struct ggml_context * ctx,
1162
+ struct ggml_tensor * a);
1163
+
1164
+ GGML_API struct ggml_tensor * ggml_geglu_quick_swapped(
1165
+ struct ggml_context * ctx,
1166
+ struct ggml_tensor * a);
1167
+
1168
+ // A: n columns, r rows,
1169
+ // B: n columns, r rows,
1170
+ GGML_API struct ggml_tensor * ggml_glu_split(
1171
+ struct ggml_context * ctx,
1172
+ struct ggml_tensor * a,
1173
+ struct ggml_tensor * b,
1174
+ enum ggml_glu_op op);
1175
+
1176
+ GGML_API struct ggml_tensor * ggml_reglu_split(
1177
+ struct ggml_context * ctx,
1178
+ struct ggml_tensor * a,
1179
+ struct ggml_tensor * b);
1180
+
1181
+ GGML_API struct ggml_tensor * ggml_geglu_split(
1182
+ struct ggml_context * ctx,
1183
+ struct ggml_tensor * a,
1184
+ struct ggml_tensor * b);
1185
+
1186
+ GGML_API struct ggml_tensor * ggml_swiglu_split(
1187
+ struct ggml_context * ctx,
1188
+ struct ggml_tensor * a,
1189
+ struct ggml_tensor * b);
1190
+
1191
+ GGML_API struct ggml_tensor * ggml_geglu_erf_split(
1192
+ struct ggml_context * ctx,
1193
+ struct ggml_tensor * a,
1194
+ struct ggml_tensor * b);
1195
+
1196
+ GGML_API struct ggml_tensor * ggml_geglu_quick_split(
1197
+ struct ggml_context * ctx,
1198
+ struct ggml_tensor * a,
1199
+ struct ggml_tensor * b);
1200
+
1088
1201
  // normalize along rows
1089
1202
  GGML_API struct ggml_tensor * ggml_norm(
1090
1203
  struct ggml_context * ctx,
@@ -1184,6 +1297,19 @@ extern "C" {
1184
1297
  struct ggml_tensor * a,
1185
1298
  float s);
1186
1299
 
1300
+ // x = s * a + b
1301
+ GGML_API struct ggml_tensor * ggml_scale_bias(
1302
+ struct ggml_context * ctx,
1303
+ struct ggml_tensor * a,
1304
+ float s,
1305
+ float b);
1306
+
1307
+ GGML_API struct ggml_tensor * ggml_scale_bias_inplace(
1308
+ struct ggml_context * ctx,
1309
+ struct ggml_tensor * a,
1310
+ float s,
1311
+ float b);
1312
+
1187
1313
  // b -> view(a,offset,nb1,nb2,3), return modified a
1188
1314
  GGML_API struct ggml_tensor * ggml_set(
1189
1315
  struct ggml_context * ctx,
@@ -1374,6 +1500,23 @@ extern "C" {
1374
1500
  struct ggml_tensor * b, // row indices
1375
1501
  struct ggml_tensor * c); // data for ggml_get_rows, only used for its shape
1376
1502
 
1503
+ // a TD [n_embd, ne1, ne2, ne3]
1504
+ // b TS [n_embd, n_rows, ne02, ne03] | ne02 == ne2, ne03 == ne3
1505
+ // c I64 [n_rows, ne11, ne12, 1] | c[i] in [0, ne1)
1506
+ //
1507
+ // undefined behavior if destination rows overlap
1508
+ //
1509
+ // broadcast:
1510
+ // ne2 % ne11 == 0
1511
+ // ne3 % ne12 == 0
1512
+ //
1513
+ // return view(a)
1514
+ GGML_API struct ggml_tensor * ggml_set_rows(
1515
+ struct ggml_context * ctx,
1516
+ struct ggml_tensor * a, // destination
1517
+ struct ggml_tensor * b, // source
1518
+ struct ggml_tensor * c); // row indices
1519
+
1377
1520
  GGML_API struct ggml_tensor * ggml_diag(
1378
1521
  struct ggml_context * ctx,
1379
1522
  struct ggml_tensor * a);
@@ -1411,8 +1554,14 @@ extern "C" {
1411
1554
  struct ggml_context * ctx,
1412
1555
  struct ggml_tensor * a);
1413
1556
 
1557
+ // a [ne0, ne01, ne02, ne03]
1558
+ // mask [ne0, ne11, ne12, ne13] | ne11 >= ne01, F16 or F32, optional
1559
+ //
1560
+ // broadcast:
1561
+ // ne02 % ne12 == 0
1562
+ // ne03 % ne13 == 0
1563
+ //
1414
1564
  // fused soft_max(a*scale + mask*(ALiBi slope))
1415
- // mask is optional
1416
1565
  // max_bias = 0.0f for no ALiBi
1417
1566
  GGML_API struct ggml_tensor * ggml_soft_max_ext(
1418
1567
  struct ggml_context * ctx,
@@ -1722,6 +1871,17 @@ extern "C" {
1722
1871
  struct ggml_tensor * b,
1723
1872
  int stride);
1724
1873
 
1874
+ GGML_API struct ggml_tensor * ggml_conv_2d_direct(
1875
+ struct ggml_context * ctx,
1876
+ struct ggml_tensor * a, // convolution kernel [KW, KH, IC, OC]
1877
+ struct ggml_tensor * b, // input data [W, H, C, N]
1878
+ int s0, // stride dimension 0
1879
+ int s1, // stride dimension 1
1880
+ int p0, // padding dimension 0
1881
+ int p1, // padding dimension 1
1882
+ int d0, // dilation dimension 0
1883
+ int d1); // dilation dimension 1
1884
+
1725
1885
  enum ggml_op_pool {
1726
1886
  GGML_OP_POOL_MAX,
1727
1887
  GGML_OP_POOL_AVG,
@@ -1764,6 +1924,12 @@ extern "C" {
1764
1924
  enum ggml_scale_mode {
1765
1925
  GGML_SCALE_MODE_NEAREST = 0,
1766
1926
  GGML_SCALE_MODE_BILINEAR = 1,
1927
+
1928
+ GGML_SCALE_MODE_COUNT
1929
+ };
1930
+
1931
+ enum ggml_scale_flag {
1932
+ GGML_SCALE_FLAG_ALIGN_CORNERS = (1 << 8)
1767
1933
  };
1768
1934
 
1769
1935
  // interpolate
@@ -1776,14 +1942,26 @@ extern "C" {
1776
1942
 
1777
1943
  // interpolate
1778
1944
  // interpolate scale to specified dimensions
1779
- GGML_API struct ggml_tensor * ggml_upscale_ext(
1945
+ GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_upscale_ext(
1780
1946
  struct ggml_context * ctx,
1781
1947
  struct ggml_tensor * a,
1782
1948
  int ne0,
1783
1949
  int ne1,
1784
1950
  int ne2,
1785
1951
  int ne3,
1786
- enum ggml_scale_mode mode);
1952
+ enum ggml_scale_mode mode),
1953
+ "use ggml_interpolate instead");
1954
+
1955
+ // Up- or downsamples the input to the specified size.
1956
+ // 2D scale modes (eg. bilinear) are applied to the first two dimensions.
1957
+ GGML_API struct ggml_tensor * ggml_interpolate(
1958
+ struct ggml_context * ctx,
1959
+ struct ggml_tensor * a,
1960
+ int64_t ne0,
1961
+ int64_t ne1,
1962
+ int64_t ne2,
1963
+ int64_t ne3,
1964
+ uint32_t mode); // ggml_scale_mode [ | ggml_scale_flag...]
1787
1965
 
1788
1966
  // pad each dimension with zeros: [x, ..., x] -> [x, ..., x, 0, ..., 0]
1789
1967
  GGML_API struct ggml_tensor * ggml_pad(
@@ -1801,6 +1979,17 @@ extern "C" {
1801
1979
  int p0,
1802
1980
  int p1);
1803
1981
 
1982
+ // Move tensor elements by an offset given for each dimension. Elements that
1983
+ // are shifted beyond the last position are wrapped around to the beginning.
1984
+ GGML_API struct ggml_tensor * ggml_roll(
1985
+ struct ggml_context * ctx,
1986
+ struct ggml_tensor * a,
1987
+ int shift0,
1988
+ int shift1,
1989
+ int shift2,
1990
+ int shift3);
1991
+
1992
+
1804
1993
  // Ref: https://github.com/CompVis/stable-diffusion/blob/main/ldm/modules/diffusionmodules/util.py#L151
1805
1994
  // timesteps: [N,]
1806
1995
  // return: [N, dim]
@@ -1835,11 +2024,17 @@ extern "C" {
1835
2024
 
1836
2025
  #define GGML_KQ_MASK_PAD 64
1837
2026
 
1838
- // q: [n_embd_k, n_batch, n_head, 1]
1839
- // k: [n_embd_k, n_kv, n_head_kv, 1]
1840
- // v: [n_embd_v, n_kv, n_head_kv, 1] !! not transposed !!
1841
- // mask: [n_kv, n_batch_pad, 1, 1] !! n_batch_pad = GGML_PAD(n_batch, GGML_KQ_MASK_PAD) !!
1842
- // res: [n_embd_v, n_head, n_batch, 1] !! permuted !!
2027
+ // q: [n_embd_k, n_batch, n_head, ne3 ]
2028
+ // k: [n_embd_k, n_kv, n_head_kv, ne3 ]
2029
+ // v: [n_embd_v, n_kv, n_head_kv, ne3 ] !! not transposed !!
2030
+ // mask: [n_kv, n_batch_pad, ne32, ne33] !! n_batch_pad = GGML_PAD(n_batch, GGML_KQ_MASK_PAD) !!
2031
+ // res: [n_embd_v, n_head, n_batch, ne3 ] !! permuted !!
2032
+ //
2033
+ // broadcast:
2034
+ // n_head % n_head_kv == 0
2035
+ // n_head % ne32 == 0
2036
+ // ne3 % ne33 == 0
2037
+ //
1843
2038
  GGML_API struct ggml_tensor * ggml_flash_attn_ext(
1844
2039
  struct ggml_context * ctx,
1845
2040
  struct ggml_tensor * q,
@@ -1878,7 +2073,8 @@ extern "C" {
1878
2073
  struct ggml_tensor * dt,
1879
2074
  struct ggml_tensor * A,
1880
2075
  struct ggml_tensor * B,
1881
- struct ggml_tensor * C);
2076
+ struct ggml_tensor * C,
2077
+ struct ggml_tensor * ids);
1882
2078
 
1883
2079
  // partition into non-overlapping windows with padding if needed
1884
2080
  // example:
@@ -79,46 +79,6 @@ extern "C" {
79
79
  LLAMA_VOCAB_TYPE_RWKV = 5, // RWKV tokenizer based on greedy tokenization
80
80
  };
81
81
 
82
- // pre-tokenization types
83
- enum llama_vocab_pre_type {
84
- LLAMA_VOCAB_PRE_TYPE_DEFAULT = 0,
85
- LLAMA_VOCAB_PRE_TYPE_LLAMA3 = 1,
86
- LLAMA_VOCAB_PRE_TYPE_DEEPSEEK_LLM = 2,
87
- LLAMA_VOCAB_PRE_TYPE_DEEPSEEK_CODER = 3,
88
- LLAMA_VOCAB_PRE_TYPE_FALCON = 4,
89
- LLAMA_VOCAB_PRE_TYPE_MPT = 5,
90
- LLAMA_VOCAB_PRE_TYPE_STARCODER = 6,
91
- LLAMA_VOCAB_PRE_TYPE_GPT2 = 7,
92
- LLAMA_VOCAB_PRE_TYPE_REFACT = 8,
93
- LLAMA_VOCAB_PRE_TYPE_COMMAND_R = 9,
94
- LLAMA_VOCAB_PRE_TYPE_STABLELM2 = 10,
95
- LLAMA_VOCAB_PRE_TYPE_QWEN2 = 11,
96
- LLAMA_VOCAB_PRE_TYPE_OLMO = 12,
97
- LLAMA_VOCAB_PRE_TYPE_DBRX = 13,
98
- LLAMA_VOCAB_PRE_TYPE_SMAUG = 14,
99
- LLAMA_VOCAB_PRE_TYPE_PORO = 15,
100
- LLAMA_VOCAB_PRE_TYPE_CHATGLM3 = 16,
101
- LLAMA_VOCAB_PRE_TYPE_CHATGLM4 = 17,
102
- LLAMA_VOCAB_PRE_TYPE_VIKING = 18,
103
- LLAMA_VOCAB_PRE_TYPE_JAIS = 19,
104
- LLAMA_VOCAB_PRE_TYPE_TEKKEN = 20,
105
- LLAMA_VOCAB_PRE_TYPE_SMOLLM = 21,
106
- LLAMA_VOCAB_PRE_TYPE_CODESHELL = 22,
107
- LLAMA_VOCAB_PRE_TYPE_BLOOM = 23,
108
- LLAMA_VOCAB_PRE_TYPE_GPT3_FINNISH = 24,
109
- LLAMA_VOCAB_PRE_TYPE_EXAONE = 25,
110
- LLAMA_VOCAB_PRE_TYPE_CHAMELEON = 26,
111
- LLAMA_VOCAB_PRE_TYPE_MINERVA = 27,
112
- LLAMA_VOCAB_PRE_TYPE_DEEPSEEK3_LLM = 28,
113
- LLAMA_VOCAB_PRE_TYPE_GPT4O = 29,
114
- LLAMA_VOCAB_PRE_TYPE_SUPERBPE = 30,
115
- LLAMA_VOCAB_PRE_TYPE_TRILLION = 31,
116
- LLAMA_VOCAB_PRE_TYPE_BAILINGMOE = 32,
117
- LLAMA_VOCAB_PRE_TYPE_LLAMA4 = 33,
118
- LLAMA_VOCAB_PRE_TYPE_PIXTRAL = 34,
119
- LLAMA_VOCAB_PRE_TYPE_SEED_CODER = 35,
120
- };
121
-
122
82
  enum llama_rope_type {
123
83
  LLAMA_ROPE_TYPE_NONE = -1,
124
84
  LLAMA_ROPE_TYPE_NORM = 0,
@@ -390,6 +350,7 @@ extern "C" {
390
350
  void * imatrix; // pointer to importance matrix data
391
351
  void * kv_overrides; // pointer to vector containing overrides
392
352
  void * tensor_types; // pointer to vector containing tensor types
353
+ void * prune_layers; // pointer to vector containing layer indices to prune
393
354
  } llama_model_quantize_params;
394
355
 
395
356
  typedef struct llama_logit_bias {
@@ -943,12 +904,14 @@ extern "C" {
943
904
  // Requires the context to have a memory.
944
905
  // For encode-decoder contexts, processes the batch using the decoder.
945
906
  // Positive return values does not mean a fatal error, but rather a warning.
946
- // Upon non-zero return values, the memory state is restored to the state before this call
907
+ // Upon fatal-error or abort, the ubatches that managed to be been processed will remain in the memory state of the context
908
+ // To handle this correctly, query the memory state using llama_memory_seq_pos_min() and llama_memory_seq_pos_max()
909
+ // Upon other return values, the memory state is restored to the state before this call
947
910
  // 0 - success
948
911
  // 1 - could not find a KV slot for the batch (try reducing the size of the batch or increase the context)
949
- // 2 - aborted
912
+ // 2 - aborted (processed ubatches will remain in the context's memory)
950
913
  // -1 - invalid input batch
951
- // < -1 - error
914
+ // < -1 - fatal error (processed ubatches will remain in the context's memory)
952
915
  LLAMA_API int32_t llama_decode(
953
916
  struct llama_context * ctx,
954
917
  struct llama_batch batch);
@@ -1044,6 +1007,7 @@ extern "C" {
1044
1007
 
1045
1008
  LLAMA_API bool llama_vocab_get_add_bos(const struct llama_vocab * vocab);
1046
1009
  LLAMA_API bool llama_vocab_get_add_eos(const struct llama_vocab * vocab);
1010
+ LLAMA_API bool llama_vocab_get_add_sep(const struct llama_vocab * vocab);
1047
1011
 
1048
1012
  LLAMA_API llama_token llama_vocab_fim_pre(const struct llama_vocab * vocab);
1049
1013
  LLAMA_API llama_token llama_vocab_fim_suf(const struct llama_vocab * vocab);
@@ -1087,6 +1051,7 @@ extern "C" {
1087
1051
  /// @param tokens The tokens pointer must be large enough to hold the resulting tokens.
1088
1052
  /// @return Returns the number of tokens on success, no more than n_tokens_max
1089
1053
  /// @return Returns a negative number on failure - the number of tokens that would have been returned
1054
+ /// @return Returns INT32_MIN on overflow (e.g., tokenization result size exceeds int32_t limit)
1090
1055
  /// @param add_special Allow to add BOS and EOS tokens if model is configured to do so.
1091
1056
  /// @param parse_special Allow tokenizing special and/or control tokens which otherwise are not exposed and treated
1092
1057
  /// as plaintext. Does not insert a leading space.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@novastera-oss/llamarn",
3
- "version": "0.2.7",
3
+ "version": "0.3.0",
4
4
  "description": "An attempt at a pure cpp turbo module library",
5
5
  "source": "./src/index.tsx",
6
6
  "main": "./lib/module/index.js",
@@ -1,50 +0,0 @@
1
- #pragma once
2
-
3
- #include "ggml.h"
4
- #include "ggml-backend.h"
5
-
6
- #include <stdbool.h>
7
- #include <stddef.h>
8
- #include <stdint.h>
9
-
10
- #ifdef __cplusplus
11
- extern "C" {
12
- #endif
13
-
14
- #define GGML_KOMPUTE_MAX_DEVICES 16
15
-
16
- struct ggml_vk_device {
17
- int index;
18
- int type; // same as VkPhysicalDeviceType
19
- size_t heapSize;
20
- const char * name;
21
- const char * vendor;
22
- int subgroupSize;
23
- uint64_t bufferAlignment;
24
- uint64_t maxAlloc;
25
- };
26
-
27
- struct ggml_vk_device * ggml_vk_available_devices(size_t memoryRequired, size_t * count);
28
- bool ggml_vk_get_device(struct ggml_vk_device * device, size_t memoryRequired, const char * name);
29
- bool ggml_vk_has_vulkan(void);
30
- bool ggml_vk_has_device(void);
31
- struct ggml_vk_device ggml_vk_current_device(void);
32
-
33
- //
34
- // backend API
35
- //
36
-
37
- // forward declaration
38
- typedef struct ggml_backend * ggml_backend_t;
39
-
40
- GGML_BACKEND_API ggml_backend_t ggml_backend_kompute_init(int device);
41
-
42
- GGML_BACKEND_API bool ggml_backend_is_kompute(ggml_backend_t backend);
43
-
44
- GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_kompute_buffer_type(int device);
45
-
46
- GGML_BACKEND_API ggml_backend_reg_t ggml_backend_kompute_reg(void);
47
-
48
- #ifdef __cplusplus
49
- }
50
- #endif