@novastera-oss/llamarn 0.1.5-beta.3 → 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (274) hide show
  1. package/README.md +80 -14
  2. package/RNLlamaCpp.podspec +10 -3
  3. package/android/CMakeLists.txt +8 -0
  4. package/android/src/main/cpp/include/llama.h +62 -125
  5. package/android/src/main/jniLibs/arm64-v8a/libggml-base.so +0 -0
  6. package/android/src/main/jniLibs/arm64-v8a/libggml-cpu.so +0 -0
  7. package/android/src/main/jniLibs/arm64-v8a/libggml.so +0 -0
  8. package/android/src/main/jniLibs/arm64-v8a/libllama.so +0 -0
  9. package/android/src/main/jniLibs/x86_64/libggml-base.so +0 -0
  10. package/android/src/main/jniLibs/x86_64/libggml-cpu.so +0 -0
  11. package/android/src/main/jniLibs/x86_64/libggml.so +0 -0
  12. package/android/src/main/jniLibs/x86_64/libllama.so +0 -0
  13. package/cpp/LlamaCppModel.cpp +148 -5
  14. package/cpp/LlamaCppModel.h +11 -2
  15. package/cpp/PureCppImpl.cpp +3 -3
  16. package/cpp/PureCppImpl.h +3 -0
  17. package/cpp/build-info.cpp +2 -2
  18. package/cpp/llama.cpp/README.md +11 -3
  19. package/cpp/llama.cpp/build-xcframework.sh +1 -0
  20. package/cpp/llama.cpp/common/CMakeLists.txt +8 -2
  21. package/cpp/llama.cpp/common/arg.cpp +153 -113
  22. package/cpp/llama.cpp/common/chat-parser.cpp +379 -0
  23. package/cpp/llama.cpp/common/chat-parser.h +117 -0
  24. package/cpp/llama.cpp/common/chat.cpp +847 -699
  25. package/cpp/llama.cpp/common/chat.h +73 -6
  26. package/cpp/llama.cpp/common/common.cpp +50 -82
  27. package/cpp/llama.cpp/common/common.h +21 -17
  28. package/cpp/llama.cpp/common/json-partial.cpp +255 -0
  29. package/cpp/llama.cpp/common/json-partial.h +37 -0
  30. package/cpp/llama.cpp/common/minja/chat-template.hpp +9 -5
  31. package/cpp/llama.cpp/common/minja/minja.hpp +69 -36
  32. package/cpp/llama.cpp/common/regex-partial.cpp +204 -0
  33. package/cpp/llama.cpp/common/regex-partial.h +56 -0
  34. package/cpp/llama.cpp/common/sampling.cpp +7 -8
  35. package/cpp/llama.cpp/convert_hf_to_gguf.py +453 -118
  36. package/cpp/llama.cpp/convert_hf_to_gguf_update.py +120 -68
  37. package/cpp/llama.cpp/ggml/CMakeLists.txt +2 -1
  38. package/cpp/llama.cpp/ggml/cmake/common.cmake +25 -0
  39. package/cpp/llama.cpp/ggml/include/ggml-opt.h +49 -28
  40. package/cpp/llama.cpp/ggml/include/ggml.h +26 -7
  41. package/cpp/llama.cpp/ggml/src/CMakeLists.txt +16 -10
  42. package/cpp/llama.cpp/ggml/src/ggml-backend.cpp +4 -1
  43. package/cpp/llama.cpp/ggml/src/ggml-cann/CMakeLists.txt +1 -0
  44. package/cpp/llama.cpp/ggml/src/ggml-cann/acl_tensor.cpp +2 -0
  45. package/cpp/llama.cpp/ggml/src/ggml-cann/aclnn_ops.cpp +604 -0
  46. package/cpp/llama.cpp/ggml/src/ggml-cann/aclnn_ops.h +42 -0
  47. package/cpp/llama.cpp/ggml/src/ggml-cann/ggml-cann.cpp +54 -2
  48. package/cpp/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +50 -51
  49. package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp +2 -2
  50. package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-impl.h +5 -9
  51. package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.c +779 -19
  52. package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +22 -0
  53. package/cpp/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.cpp +88 -5
  54. package/cpp/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.h +47 -12
  55. package/cpp/llama.cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +264 -69
  56. package/cpp/llama.cpp/ggml/src/ggml-cpu/ops.cpp +322 -100
  57. package/cpp/llama.cpp/ggml/src/ggml-cpu/simd-mappings.h +117 -1
  58. package/cpp/llama.cpp/ggml/src/ggml-cpu/vec.cpp +85 -16
  59. package/cpp/llama.cpp/ggml/src/ggml-cpu/vec.h +220 -49
  60. package/cpp/llama.cpp/ggml/src/ggml-cuda/acc.cu +40 -26
  61. package/cpp/llama.cpp/ggml/src/ggml-cuda/common.cuh +1 -1
  62. package/cpp/llama.cpp/ggml/src/ggml-cuda/cpy.cu +11 -1
  63. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-common.cuh +15 -7
  64. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-mma-f16.cuh +266 -64
  65. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-vec-f16.cuh +49 -4
  66. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-vec-f32.cuh +48 -4
  67. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn.cu +2 -1
  68. package/cpp/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu +5 -1
  69. package/cpp/llama.cpp/ggml/src/ggml-cuda/mmq.cu +2 -0
  70. package/cpp/llama.cpp/ggml/src/ggml-cuda/quantize.cu +7 -6
  71. package/cpp/llama.cpp/ggml/src/ggml-cuda/sum.cu +1 -1
  72. package/cpp/llama.cpp/ggml/src/ggml-cuda/unary.cu +10 -0
  73. package/cpp/llama.cpp/ggml/src/ggml-cuda/unary.cuh +2 -0
  74. package/cpp/llama.cpp/ggml/src/ggml-impl.h +1 -1
  75. package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal-impl.h +4 -0
  76. package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal.m +99 -17
  77. package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal.metal +200 -2
  78. package/cpp/llama.cpp/ggml/src/ggml-musa/CMakeLists.txt +8 -2
  79. package/cpp/llama.cpp/ggml/src/ggml-musa/mudnn.cu +112 -0
  80. package/cpp/llama.cpp/ggml/src/ggml-musa/mudnn.cuh +12 -0
  81. package/cpp/llama.cpp/ggml/src/ggml-opencl/CMakeLists.txt +6 -0
  82. package/cpp/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +972 -178
  83. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/argsort.cl +86 -0
  84. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/div.cl +72 -0
  85. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/group_norm.cl +72 -0
  86. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/sigmoid.cl +29 -0
  87. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/sub.cl +72 -0
  88. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/sum_rows.cl +39 -0
  89. package/cpp/llama.cpp/ggml/src/ggml-opt.cpp +373 -190
  90. package/cpp/llama.cpp/ggml/src/ggml-sycl/CMakeLists.txt +29 -23
  91. package/cpp/llama.cpp/ggml/src/ggml-sycl/binbcast.cpp +5 -10
  92. package/cpp/llama.cpp/ggml/src/ggml-sycl/common.hpp +101 -5
  93. package/cpp/llama.cpp/ggml/src/ggml-sycl/concat.cpp +31 -33
  94. package/cpp/llama.cpp/ggml/src/ggml-sycl/conv.cpp +1 -0
  95. package/cpp/llama.cpp/ggml/src/ggml-sycl/convert.cpp +29 -2
  96. package/cpp/llama.cpp/ggml/src/ggml-sycl/cpy.cpp +4 -5
  97. package/cpp/llama.cpp/ggml/src/ggml-sycl/dequantize.hpp +59 -21
  98. package/cpp/llama.cpp/ggml/src/ggml-sycl/dmmv.cpp +9 -1
  99. package/cpp/llama.cpp/ggml/src/ggml-sycl/element_wise.cpp +84 -72
  100. package/cpp/llama.cpp/ggml/src/ggml-sycl/element_wise.hpp +2 -0
  101. package/cpp/llama.cpp/ggml/src/ggml-sycl/gemm.hpp +37 -8
  102. package/cpp/llama.cpp/ggml/src/ggml-sycl/getrows.cpp +1 -3
  103. package/cpp/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +324 -129
  104. package/cpp/llama.cpp/ggml/src/ggml-sycl/gla.cpp +1 -0
  105. package/cpp/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +31 -2
  106. package/cpp/llama.cpp/ggml/src/ggml-sycl/norm.cpp +95 -68
  107. package/cpp/llama.cpp/ggml/src/ggml-sycl/outprod.cpp +1 -0
  108. package/cpp/llama.cpp/ggml/src/ggml-sycl/quants.hpp +22 -0
  109. package/cpp/llama.cpp/ggml/src/ggml-sycl/rope.cpp +1 -2
  110. package/cpp/llama.cpp/ggml/src/ggml-sycl/softmax.cpp +1 -4
  111. package/cpp/llama.cpp/ggml/src/ggml-sycl/tsembd.cpp +2 -3
  112. package/cpp/llama.cpp/ggml/src/ggml-sycl/vecdotq.hpp +69 -43
  113. package/cpp/llama.cpp/ggml/src/ggml-sycl/wkv.cpp +2 -14
  114. package/cpp/llama.cpp/ggml/src/ggml-vulkan/CMakeLists.txt +81 -91
  115. package/cpp/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +432 -181
  116. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/CMakeLists.txt +17 -0
  117. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq1_m.comp +1 -1
  118. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn.comp +6 -152
  119. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_base.comp +162 -0
  120. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm1.comp +360 -0
  121. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm2.comp +2 -118
  122. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm.comp +1 -1
  123. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +12 -1
  124. package/cpp/llama.cpp/ggml/src/ggml.c +107 -36
  125. package/cpp/llama.cpp/ggml/src/gguf.cpp +33 -33
  126. package/cpp/llama.cpp/gguf-py/gguf/constants.py +100 -15
  127. package/cpp/llama.cpp/gguf-py/gguf/gguf_reader.py +1 -1
  128. package/cpp/llama.cpp/gguf-py/gguf/gguf_writer.py +44 -12
  129. package/cpp/llama.cpp/gguf-py/gguf/scripts/gguf_editor_gui.py +21 -10
  130. package/cpp/llama.cpp/gguf-py/gguf/scripts/gguf_new_metadata.py +5 -2
  131. package/cpp/llama.cpp/gguf-py/gguf/tensor_mapping.py +128 -31
  132. package/cpp/llama.cpp/gguf-py/gguf/utility.py +1 -1
  133. package/cpp/llama.cpp/gguf-py/pyproject.toml +1 -1
  134. package/cpp/llama.cpp/include/llama.h +62 -125
  135. package/cpp/llama.cpp/models/ggml-vocab-bert-bge.gguf.inp +1 -1
  136. package/cpp/llama.cpp/models/ggml-vocab-bert-bge.gguf.out +1 -1
  137. package/cpp/llama.cpp/models/ggml-vocab-command-r.gguf.inp +1 -1
  138. package/cpp/llama.cpp/models/ggml-vocab-command-r.gguf.out +1 -1
  139. package/cpp/llama.cpp/models/ggml-vocab-deepseek-coder.gguf.inp +1 -1
  140. package/cpp/llama.cpp/models/ggml-vocab-deepseek-coder.gguf.out +1 -1
  141. package/cpp/llama.cpp/models/ggml-vocab-deepseek-llm.gguf.inp +1 -1
  142. package/cpp/llama.cpp/models/ggml-vocab-deepseek-llm.gguf.out +1 -1
  143. package/cpp/llama.cpp/models/ggml-vocab-falcon.gguf.inp +1 -1
  144. package/cpp/llama.cpp/models/ggml-vocab-falcon.gguf.out +1 -1
  145. package/cpp/llama.cpp/models/ggml-vocab-gpt-2.gguf.inp +1 -1
  146. package/cpp/llama.cpp/models/ggml-vocab-gpt-2.gguf.out +1 -1
  147. package/cpp/llama.cpp/models/ggml-vocab-llama-bpe.gguf.inp +1 -1
  148. package/cpp/llama.cpp/models/ggml-vocab-llama-bpe.gguf.out +1 -1
  149. package/cpp/llama.cpp/models/ggml-vocab-llama-spm.gguf.inp +1 -1
  150. package/cpp/llama.cpp/models/ggml-vocab-llama-spm.gguf.out +1 -1
  151. package/cpp/llama.cpp/models/ggml-vocab-mpt.gguf.inp +1 -1
  152. package/cpp/llama.cpp/models/ggml-vocab-mpt.gguf.out +1 -1
  153. package/cpp/llama.cpp/models/ggml-vocab-nomic-bert-moe.gguf +0 -0
  154. package/cpp/llama.cpp/models/ggml-vocab-phi-3.gguf.inp +1 -1
  155. package/cpp/llama.cpp/models/ggml-vocab-phi-3.gguf.out +1 -1
  156. package/cpp/llama.cpp/models/ggml-vocab-qwen2.gguf.inp +1 -1
  157. package/cpp/llama.cpp/models/ggml-vocab-qwen2.gguf.out +1 -1
  158. package/cpp/llama.cpp/models/ggml-vocab-refact.gguf.inp +1 -1
  159. package/cpp/llama.cpp/models/ggml-vocab-refact.gguf.out +1 -1
  160. package/cpp/llama.cpp/models/ggml-vocab-starcoder.gguf.inp +1 -1
  161. package/cpp/llama.cpp/models/ggml-vocab-starcoder.gguf.out +1 -1
  162. package/cpp/llama.cpp/models/templates/Qwen-QwQ-32B.jinja +62 -0
  163. package/cpp/llama.cpp/models/templates/Qwen-Qwen3-0.6B.jinja +85 -0
  164. package/cpp/llama.cpp/models/templates/README.md +2 -0
  165. package/cpp/llama.cpp/requirements/requirements-convert_hf_to_gguf.txt +5 -1
  166. package/cpp/llama.cpp/requirements/requirements-convert_hf_to_gguf_update.txt +5 -1
  167. package/cpp/llama.cpp/requirements/requirements-convert_lora_to_gguf.txt +2 -0
  168. package/cpp/llama.cpp/requirements/requirements-gguf_editor_gui.txt +1 -1
  169. package/cpp/llama.cpp/src/CMakeLists.txt +2 -0
  170. package/cpp/llama.cpp/src/llama-arch.cpp +6 -0
  171. package/cpp/llama.cpp/src/llama-arch.h +2 -0
  172. package/cpp/llama.cpp/src/llama-batch.cpp +3 -1
  173. package/cpp/llama.cpp/src/llama-context.cpp +340 -123
  174. package/cpp/llama.cpp/src/llama-context.h +30 -0
  175. package/cpp/llama.cpp/src/llama-cparams.cpp +4 -0
  176. package/cpp/llama.cpp/src/llama-cparams.h +2 -0
  177. package/cpp/llama.cpp/src/llama-grammar.cpp +12 -2
  178. package/cpp/llama.cpp/src/llama-graph.cpp +157 -247
  179. package/cpp/llama.cpp/src/llama-graph.h +52 -7
  180. package/cpp/llama.cpp/src/llama-hparams.cpp +17 -1
  181. package/cpp/llama.cpp/src/llama-hparams.h +37 -5
  182. package/cpp/llama.cpp/src/llama-kv-cache.cpp +742 -481
  183. package/cpp/llama.cpp/src/llama-kv-cache.h +196 -99
  184. package/cpp/llama.cpp/src/llama-kv-cells.h +379 -0
  185. package/cpp/llama.cpp/src/llama-memory.h +4 -3
  186. package/cpp/llama.cpp/src/llama-model-loader.cpp +22 -17
  187. package/cpp/llama.cpp/src/llama-model-saver.cpp +281 -0
  188. package/cpp/llama.cpp/src/llama-model-saver.h +37 -0
  189. package/cpp/llama.cpp/src/llama-model.cpp +529 -172
  190. package/cpp/llama.cpp/src/llama-model.h +6 -1
  191. package/cpp/llama.cpp/src/llama-quant.cpp +15 -13
  192. package/cpp/llama.cpp/src/llama-sampling.cpp +2 -2
  193. package/cpp/llama.cpp/src/llama-vocab.cpp +35 -8
  194. package/cpp/llama.cpp/src/llama-vocab.h +6 -0
  195. package/cpp/llama.cpp/src/llama.cpp +14 -0
  196. package/cpp/rn-completion.cpp +4 -2
  197. package/ios/include/chat.h +73 -6
  198. package/ios/include/common/minja/chat-template.hpp +9 -5
  199. package/ios/include/common/minja/minja.hpp +69 -36
  200. package/ios/include/common.h +21 -17
  201. package/ios/include/llama.h +62 -125
  202. package/ios/libs/llama.xcframework/Info.plist +19 -19
  203. package/ios/libs/llama.xcframework/ios-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  204. package/ios/libs/llama.xcframework/ios-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4617 -4487
  205. package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/ggml-opt.h +237 -0
  206. package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/ggml.h +26 -7
  207. package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/llama.h +62 -125
  208. package/ios/libs/llama.xcframework/ios-arm64/llama.framework/llama +0 -0
  209. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  210. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4638 -4508
  211. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3557 -3435
  212. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/ggml-opt.h +237 -0
  213. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/ggml.h +26 -7
  214. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/llama.h +62 -125
  215. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/llama +0 -0
  216. package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  217. package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4638 -4508
  218. package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3559 -3437
  219. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/ggml-opt.h +237 -0
  220. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/ggml.h +26 -7
  221. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/llama.h +62 -125
  222. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/ggml-opt.h +237 -0
  223. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/ggml.h +26 -7
  224. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/llama.h +62 -125
  225. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/llama +0 -0
  226. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/ggml-opt.h +237 -0
  227. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/ggml.h +26 -7
  228. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/llama.h +62 -125
  229. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/llama +0 -0
  230. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/llama +0 -0
  231. package/ios/libs/llama.xcframework/tvos-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  232. package/ios/libs/llama.xcframework/tvos-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4616 -4487
  233. package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/ggml-opt.h +237 -0
  234. package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/ggml.h +26 -7
  235. package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/llama.h +62 -125
  236. package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/llama +0 -0
  237. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  238. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4637 -4508
  239. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3556 -3435
  240. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/ggml-opt.h +237 -0
  241. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/ggml.h +26 -7
  242. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/llama.h +62 -125
  243. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/llama +0 -0
  244. package/ios/libs/llama.xcframework/xros-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  245. package/ios/libs/llama.xcframework/xros-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4653 -4523
  246. package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/ggml-opt.h +237 -0
  247. package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/ggml.h +26 -7
  248. package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/llama.h +62 -125
  249. package/ios/libs/llama.xcframework/xros-arm64/llama.framework/llama +0 -0
  250. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  251. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4674 -4544
  252. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3587 -3465
  253. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/ggml-opt.h +237 -0
  254. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/ggml.h +26 -7
  255. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/llama.h +62 -125
  256. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/llama +0 -0
  257. package/lib/module/NativeRNLlamaCpp.js.map +1 -1
  258. package/lib/typescript/src/NativeRNLlamaCpp.d.ts +3 -0
  259. package/lib/typescript/src/NativeRNLlamaCpp.d.ts.map +1 -1
  260. package/package.json +2 -1
  261. package/src/NativeRNLlamaCpp.ts +1 -0
  262. package/cpp/llama.cpp/common/stb_image.h +0 -7988
  263. package/cpp/llama.cpp/models/ggml-vocab-chameleon.gguf.inp +0 -112
  264. package/cpp/llama.cpp/models/ggml-vocab-chameleon.gguf.out +0 -46
  265. package/cpp/llama.cpp/models/ggml-vocab-deepseek-r1-qwen.gguf.inp +0 -112
  266. package/cpp/llama.cpp/models/ggml-vocab-deepseek-r1-qwen.gguf.out +0 -46
  267. package/cpp/llama.cpp/models/ggml-vocab-gpt-4o.gguf.inp +0 -112
  268. package/cpp/llama.cpp/models/ggml-vocab-gpt-4o.gguf.out +0 -46
  269. package/cpp/llama.cpp/models/ggml-vocab-llama4.gguf.inp +0 -112
  270. package/cpp/llama.cpp/models/ggml-vocab-llama4.gguf.out +0 -46
  271. package/cpp/llama.cpp/models/ggml-vocab-pixtral.gguf.inp +0 -112
  272. package/cpp/llama.cpp/models/ggml-vocab-pixtral.gguf.out +0 -46
  273. package/cpp/llama.cpp/models/ggml-vocab-roberta-bpe.gguf.inp +0 -112
  274. package/cpp/llama.cpp/models/ggml-vocab-roberta-bpe.gguf.out +0 -46
@@ -0,0 +1,86 @@
1
+ #pragma OPENCL EXTENSION cl_khr_fp16 : enable
2
+
3
+ #ifdef cl_intel_subgroups
4
+ #pragma OPENCL EXTENSION cl_intel_subgroups : enable
5
+ #else
6
+ #pragma OPENCL EXTENSION cl_khr_subgroups : enable
7
+ #endif
8
+
9
+ #ifdef cl_intel_required_subgroup_size
10
+ #pragma OPENCL EXTENSION cl_intel_required_subgroup_size : enable
11
+ #define INTEL_GPU 1
12
+ #define REQD_SUBGROUP_SIZE_16 __attribute__((intel_reqd_sub_group_size(16)))
13
+ #define REQD_SUBGROUP_SIZE_32 __attribute__((intel_reqd_sub_group_size(32)))
14
+ #elif defined(cl_qcom_reqd_sub_group_size)
15
+ #pragma OPENCL EXTENSION cl_qcom_reqd_sub_group_size : enable
16
+ #define ADRENO_GPU 1
17
+ #define REQD_SUBGROUP_SIZE_64 __attribute__((qcom_reqd_sub_group_size("half")))
18
+ #define REQD_SUBGROUP_SIZE_128 __attribute__((qcom_reqd_sub_group_size("full")))
19
+ #endif
20
+
21
+ #define SWAP(x, y, T) { T tmp = (x); (x) = (y); (y) = tmp; }
22
+
23
+ enum ggml_sort_order {
24
+ GGML_SORT_ORDER_ASC,
25
+ GGML_SORT_ORDER_DESC,
26
+ };
27
+
28
+ kernel void kernel_argsort_f32_i32(
29
+ global float * src0,
30
+ ulong offset0,
31
+ global int * dst,
32
+ ulong offsetd,
33
+ const int ne00,
34
+ const int ne00_pad,
35
+ const int order,
36
+ local int * dst_row
37
+ ) {
38
+ // bitonic sort
39
+ int col = get_local_id(0);
40
+ int row = get_group_id(1);
41
+
42
+ if (col >= ne00_pad) {
43
+ return;
44
+ }
45
+
46
+ src0 = (global char *)((global char *)src0 + offset0);
47
+ dst = (global float *)((global char *)dst + offsetd);
48
+
49
+ global float * x_row = src0 + row * ne00;
50
+
51
+ // initialize indices
52
+ dst_row[col] = col;
53
+
54
+ barrier(CLK_LOCAL_MEM_FENCE);
55
+
56
+ for (int k = 2; k <= ne00_pad; k *= 2) {
57
+ for (int j = k / 2; j > 0; j /= 2) {
58
+ int ixj = col ^ j;
59
+ if (ixj > col) {
60
+ if ((col & k) == 0) {
61
+ if (dst_row[col] >= ne00 ||
62
+ (dst_row[ixj] < ne00 && (order == GGML_SORT_ORDER_ASC ?
63
+ x_row[dst_row[col]] > x_row[dst_row[ixj]] :
64
+ x_row[dst_row[col]] < x_row[dst_row[ixj]]))
65
+ ) {
66
+ SWAP(dst_row[col], dst_row[ixj], int);
67
+ }
68
+ } else {
69
+ if (dst_row[ixj] >= ne00 ||
70
+ (dst_row[col] < ne00 && (order == GGML_SORT_ORDER_ASC ?
71
+ x_row[dst_row[col]] < x_row[dst_row[ixj]] :
72
+ x_row[dst_row[col]] > x_row[dst_row[ixj]]))
73
+ ) {
74
+ SWAP(dst_row[col], dst_row[ixj], int);
75
+ }
76
+ }
77
+ }
78
+ barrier(CLK_LOCAL_MEM_FENCE);
79
+ }
80
+ }
81
+
82
+ // copy the result to dst without the padding
83
+ if (col < ne00) {
84
+ dst[row * ne00 + col] = dst_row[col];
85
+ }
86
+ }
@@ -0,0 +1,72 @@
1
+ #pragma OPENCL EXTENSION cl_khr_fp16 : enable
2
+
3
+ //------------------------------------------------------------------------------
4
+ // div
5
+ //------------------------------------------------------------------------------
6
+ kernel void kernel_div(
7
+ global char * src0,
8
+ ulong offset0,
9
+ global char * src1,
10
+ ulong offset1,
11
+ global char * dst,
12
+ ulong offsetd,
13
+ ulong nb00,
14
+ ulong nb01,
15
+ ulong nb02,
16
+ ulong nb03,
17
+ int ne10,
18
+ int ne11,
19
+ int ne12,
20
+ int ne13,
21
+ ulong nb10,
22
+ ulong nb11,
23
+ ulong nb12,
24
+ ulong nb13,
25
+ int ne0,
26
+ ulong nb0,
27
+ ulong nb1,
28
+ ulong nb2,
29
+ ulong nb3
30
+ ) {
31
+ src0 = src0 + offset0;
32
+ src1 = src1 + offset1;
33
+ dst = dst + offsetd;
34
+
35
+ int i03 = get_group_id(2);
36
+ int i02 = get_group_id(1);
37
+ int i01 = get_group_id(0);
38
+
39
+ int i13 = i03 % ne13;
40
+ int i12 = i02 % ne12;
41
+ int i11 = i01 % ne11;
42
+
43
+ global char * src0_ptr = src0 + i03*nb03 + i02*nb02 + i01*nb01;
44
+ global char * src1_ptr = src1 + i13*nb13 + i12*nb12 + i11*nb11;
45
+ global char * dst_ptr = dst + i03*nb3 + i02*nb2 + i01*nb1;
46
+
47
+ for (int i0 = get_local_id(0); i0 < ne0; i0 += get_local_size(0)) {
48
+ const int i10 = i0 % ne10;
49
+ *((global float *)(dst_ptr + i0*nb0)) = *((global float *)(src0_ptr + i0*nb00)) / *((global float *)(src1_ptr + i10*nb10));
50
+ }
51
+ }
52
+
53
+ // assumption: src1 is a row
54
+ // broadcast src1 into src0
55
+ kernel void kernel_div_row(
56
+ global float4 * src0,
57
+ ulong offset0,
58
+ global float4 * src1,
59
+ ulong offset1,
60
+ global float4 * dst,
61
+ ulong offsetd,
62
+ int ne
63
+ ) {
64
+ src0 = (global float4*)((global char*)src0 + offset0);
65
+ src1 = (global float4*)((global char*)src1 + offset1);
66
+ dst = (global float4*)((global char*)dst + offsetd);
67
+
68
+ // This performs better than using %.
69
+ uint gid = get_global_id(0);
70
+ uint idx1 = gid - (gid/ne)*ne; // get_global_id(0) % ne
71
+ dst[gid] = src0[gid] / src1[idx1];
72
+ }
@@ -0,0 +1,72 @@
1
+ #pragma OPENCL EXTENSION cl_khr_fp16 : enable
2
+
3
+ #ifdef cl_intel_subgroups
4
+ #pragma OPENCL EXTENSION cl_intel_subgroups : enable
5
+ #else
6
+ #pragma OPENCL EXTENSION cl_khr_subgroups : enable
7
+ #endif
8
+
9
+ #ifdef cl_intel_required_subgroup_size
10
+ #pragma OPENCL EXTENSION cl_intel_required_subgroup_size : enable
11
+ #define INTEL_GPU 1
12
+ #define REQD_SUBGROUP_SIZE_16 __attribute__((intel_reqd_sub_group_size(16)))
13
+ #define REQD_SUBGROUP_SIZE_32 __attribute__((intel_reqd_sub_group_size(32)))
14
+ #elif defined(cl_qcom_reqd_sub_group_size)
15
+ #pragma OPENCL EXTENSION cl_qcom_reqd_sub_group_size : enable
16
+ #define ADRENO_GPU 1
17
+ #define REQD_SUBGROUP_SIZE_64 __attribute__((qcom_reqd_sub_group_size("half")))
18
+ #define REQD_SUBGROUP_SIZE_128 __attribute__((qcom_reqd_sub_group_size("full")))
19
+ #endif
20
+
21
+ // Workgroup must be a subgroup
22
+ #ifdef INTEL_GPU
23
+ REQD_SUBGROUP_SIZE_32
24
+ #elif defined (ADRENO_GPU)
25
+ REQD_SUBGROUP_SIZE_64
26
+ #endif
27
+ kernel void kernel_group_norm(
28
+ global float * src0,
29
+ ulong offset0,
30
+ global float * dst,
31
+ ulong offsetd,
32
+ int ne,
33
+ int group_size,
34
+ float eps
35
+ ) {
36
+ src0 = (global float *)((global char *)src0 + offset0);
37
+ dst = (global float *)((global char *)dst + offsetd);
38
+
39
+ int start = get_group_id(0) * group_size;
40
+ int end = start + group_size;
41
+
42
+ start += get_local_id(0);
43
+
44
+ if (end >= ne) {
45
+ end = ne;
46
+ }
47
+
48
+ float tmp = 0.0f;
49
+
50
+ for (int j = start; j < end; j += get_local_size(0)) {
51
+ tmp += src0[j];
52
+ }
53
+
54
+ tmp = sub_group_reduce_add(tmp);
55
+
56
+ const float mean = tmp / group_size;
57
+ tmp = 0.0f;
58
+
59
+ for (int j = start; j < end; j += get_local_size(0)) {
60
+ float xi = src0[j] - mean;
61
+ dst[j] = xi;
62
+ tmp += xi * xi;
63
+ }
64
+
65
+ tmp = sub_group_reduce_add(tmp);
66
+
67
+ const float variance = tmp / group_size;
68
+ const float scale = 1.0f/sqrt(variance + eps);
69
+ for (int j = start; j < end; j += get_local_size(0)) {
70
+ dst[j] *= scale;
71
+ }
72
+ }
@@ -0,0 +1,29 @@
1
+ #pragma OPENCL EXTENSION cl_khr_fp16 : enable
2
+
3
+ //------------------------------------------------------------------------------
4
+ // sigmoid
5
+ //------------------------------------------------------------------------------
6
+
7
+ kernel void kernel_sigmoid_f32(
8
+ global float * src0,
9
+ ulong offset0,
10
+ global float * dst,
11
+ ulong offsetd
12
+ ) {
13
+ src0 = (global float*)((global char*)src0 + offset0);
14
+ dst = (global float*)((global char*)dst + offsetd);
15
+
16
+ dst[get_global_id(0)] = 1.0f / (1.0f + exp(-src0[get_global_id(0)]));
17
+ }
18
+
19
+ kernel void kernel_sigmoid_f16(
20
+ global half * src0,
21
+ ulong offset0,
22
+ global half * dst,
23
+ ulong offsetd
24
+ ) {
25
+ src0 = (global half*)((global char*)src0 + offset0);
26
+ dst = (global half*)((global char*)dst + offsetd);
27
+
28
+ dst[get_global_id(0)] = 1.0f / (1.0f + exp(-src0[get_global_id(0)]));
29
+ }
@@ -0,0 +1,72 @@
1
+ #pragma OPENCL EXTENSION cl_khr_fp16 : enable
2
+
3
+ //------------------------------------------------------------------------------
4
+ // div
5
+ //------------------------------------------------------------------------------
6
+ kernel void kernel_sub(
7
+ global char * src0,
8
+ ulong offset0,
9
+ global char * src1,
10
+ ulong offset1,
11
+ global char * dst,
12
+ ulong offsetd,
13
+ ulong nb00,
14
+ ulong nb01,
15
+ ulong nb02,
16
+ ulong nb03,
17
+ int ne10,
18
+ int ne11,
19
+ int ne12,
20
+ int ne13,
21
+ ulong nb10,
22
+ ulong nb11,
23
+ ulong nb12,
24
+ ulong nb13,
25
+ int ne0,
26
+ ulong nb0,
27
+ ulong nb1,
28
+ ulong nb2,
29
+ ulong nb3
30
+ ) {
31
+ src0 = src0 + offset0;
32
+ src1 = src1 + offset1;
33
+ dst = dst + offsetd;
34
+
35
+ int i03 = get_group_id(2);
36
+ int i02 = get_group_id(1);
37
+ int i01 = get_group_id(0);
38
+
39
+ int i13 = i03 % ne13;
40
+ int i12 = i02 % ne12;
41
+ int i11 = i01 % ne11;
42
+
43
+ global char * src0_ptr = src0 + i03*nb03 + i02*nb02 + i01*nb01;
44
+ global char * src1_ptr = src1 + i13*nb13 + i12*nb12 + i11*nb11;
45
+ global char * dst_ptr = dst + i03*nb3 + i02*nb2 + i01*nb1;
46
+
47
+ for (int i0 = get_local_id(0); i0 < ne0; i0 += get_local_size(0)) {
48
+ const int i10 = i0 % ne10;
49
+ *((global float *)(dst_ptr + i0*nb0)) = *((global float *)(src0_ptr + i0*nb00)) - *((global float *)(src1_ptr + i10*nb10));
50
+ }
51
+ }
52
+
53
+ // assumption: src1 is a row
54
+ // broadcast src1 into src0
55
+ kernel void kernel_sub_row(
56
+ global float4 * src0,
57
+ ulong offset0,
58
+ global float4 * src1,
59
+ ulong offset1,
60
+ global float4 * dst,
61
+ ulong offsetd,
62
+ int ne
63
+ ) {
64
+ src0 = (global float4*)((global char*)src0 + offset0);
65
+ src1 = (global float4*)((global char*)src1 + offset1);
66
+ dst = (global float4*)((global char*)dst + offsetd);
67
+
68
+ // This performs better than using %.
69
+ uint gid = get_global_id(0);
70
+ uint idx1 = gid - (gid/ne)*ne; // get_global_id(0) % ne
71
+ dst[gid] = src0[gid] - src1[idx1];
72
+ }
@@ -0,0 +1,39 @@
1
+
2
+ kernel void kernel_sum_rows_f32(
3
+ global float * src0,
4
+ ulong offset0,
5
+ global float * dst,
6
+ ulong offsetd,
7
+ int ne00,
8
+ int ne01,
9
+ int ne02,
10
+ int ne03,
11
+ ulong nb01,
12
+ ulong nb02,
13
+ ulong nb03,
14
+ ulong nb1,
15
+ ulong nb2,
16
+ ulong nb3
17
+ ) {
18
+ src0 = (global float *)((global char *)src0 + offset0);
19
+ dst = (global float *)((global char *)dst + offsetd);
20
+
21
+ int i3 = get_global_id(2);
22
+ int i2 = get_global_id(1);
23
+ int i1 = get_global_id(0);
24
+
25
+ if (i3 >= ne03 || i2 >= ne02 || i1 >= ne01) {
26
+ return;
27
+ }
28
+
29
+ global float * src_row = (global float *) ((global char *) src0 + i1*nb01 + i2*nb02 + i3*nb03);
30
+ global float * dst_row = (global float *) ((global char *) dst + i1*nb1 + i2*nb2 + i3*nb3);
31
+
32
+ float row_sum = 0;
33
+
34
+ for (int i0 = 0; i0 < ne00; i0++) {
35
+ row_sum += src_row[i0];
36
+ }
37
+
38
+ dst_row[0] = row_sum;
39
+ }