@novastera-oss/llamarn 0.0.1-alpha.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (989) hide show
  1. package/INTERFACE.md +389 -0
  2. package/LICENSE +201 -0
  3. package/README.md +235 -0
  4. package/RNLlamaCpp.podspec +69 -0
  5. package/android/CMakeLists.txt +107 -0
  6. package/android/build.gradle +111 -0
  7. package/android/generated/java/com/novastera/llamarn/NativeRNLlamaCppSpec.java +47 -0
  8. package/android/generated/jni/CMakeLists.txt +36 -0
  9. package/android/generated/jni/RNLlamaCppSpec-generated.cpp +44 -0
  10. package/android/generated/jni/RNLlamaCppSpec.h +31 -0
  11. package/android/generated/jni/react/renderer/components/RNLlamaCppSpec/RNLlamaCppSpecJSI-generated.cpp +42 -0
  12. package/android/generated/jni/react/renderer/components/RNLlamaCppSpec/RNLlamaCppSpecJSI.h +336 -0
  13. package/android/gradle.properties +5 -0
  14. package/android/src/main/AndroidManifest.xml +3 -0
  15. package/android/src/main/AndroidManifestNew.xml +2 -0
  16. package/android/src/main/cpp/include/llama-cpp.h +30 -0
  17. package/android/src/main/cpp/include/llama.h +1440 -0
  18. package/android/src/main/java/com/novastera/llamarn/RNLlamaCppPackage.kt +21 -0
  19. package/android/src/main/jniLibs/arm64-v8a/libOpenCL.so +0 -0
  20. package/android/src/main/jniLibs/arm64-v8a/libggml-base.so +0 -0
  21. package/android/src/main/jniLibs/arm64-v8a/libggml-cpu.so +0 -0
  22. package/android/src/main/jniLibs/arm64-v8a/libggml.so +0 -0
  23. package/android/src/main/jniLibs/arm64-v8a/libllama.so +0 -0
  24. package/android/src/main/jniLibs/x86_64/libOpenCL.so +0 -0
  25. package/android/src/main/jniLibs/x86_64/libggml-base.so +0 -0
  26. package/android/src/main/jniLibs/x86_64/libggml-cpu.so +0 -0
  27. package/android/src/main/jniLibs/x86_64/libggml.so +0 -0
  28. package/android/src/main/jniLibs/x86_64/libllama.so +0 -0
  29. package/cpp/LlamaCppModel.cpp +984 -0
  30. package/cpp/LlamaCppModel.h +162 -0
  31. package/cpp/PureCppImpl.cpp +308 -0
  32. package/cpp/PureCppImpl.h +59 -0
  33. package/cpp/SystemUtils.cpp +180 -0
  34. package/cpp/SystemUtils.h +74 -0
  35. package/cpp/build-info.cpp +4 -0
  36. package/cpp/llama.cpp/AUTHORS +1106 -0
  37. package/cpp/llama.cpp/CMakeLists.txt +254 -0
  38. package/cpp/llama.cpp/CMakePresets.json +84 -0
  39. package/cpp/llama.cpp/CODEOWNERS +11 -0
  40. package/cpp/llama.cpp/CONTRIBUTING.md +127 -0
  41. package/cpp/llama.cpp/LICENSE +21 -0
  42. package/cpp/llama.cpp/Makefile +1608 -0
  43. package/cpp/llama.cpp/README.md +575 -0
  44. package/cpp/llama.cpp/SECURITY.md +68 -0
  45. package/cpp/llama.cpp/build-xcframework.sh +540 -0
  46. package/cpp/llama.cpp/cmake/arm64-apple-clang.cmake +16 -0
  47. package/cpp/llama.cpp/cmake/arm64-windows-llvm.cmake +16 -0
  48. package/cpp/llama.cpp/cmake/build-info.cmake +64 -0
  49. package/cpp/llama.cpp/cmake/common.cmake +35 -0
  50. package/cpp/llama.cpp/cmake/git-vars.cmake +22 -0
  51. package/cpp/llama.cpp/cmake/llama-config.cmake.in +30 -0
  52. package/cpp/llama.cpp/cmake/llama.pc.in +10 -0
  53. package/cpp/llama.cpp/cmake/x64-windows-llvm.cmake +5 -0
  54. package/cpp/llama.cpp/common/CMakeLists.txt +170 -0
  55. package/cpp/llama.cpp/common/arg.cpp +3337 -0
  56. package/cpp/llama.cpp/common/arg.h +89 -0
  57. package/cpp/llama.cpp/common/base64.hpp +392 -0
  58. package/cpp/llama.cpp/common/build-info.cpp.in +4 -0
  59. package/cpp/llama.cpp/common/chat.cpp +1781 -0
  60. package/cpp/llama.cpp/common/chat.h +135 -0
  61. package/cpp/llama.cpp/common/cmake/build-info-gen-cpp.cmake +24 -0
  62. package/cpp/llama.cpp/common/common.cpp +1567 -0
  63. package/cpp/llama.cpp/common/common.h +668 -0
  64. package/cpp/llama.cpp/common/console.cpp +504 -0
  65. package/cpp/llama.cpp/common/console.h +19 -0
  66. package/cpp/llama.cpp/common/json-schema-to-grammar.cpp +1027 -0
  67. package/cpp/llama.cpp/common/json-schema-to-grammar.h +21 -0
  68. package/cpp/llama.cpp/common/json.hpp +24766 -0
  69. package/cpp/llama.cpp/common/llguidance.cpp +254 -0
  70. package/cpp/llama.cpp/common/log.cpp +393 -0
  71. package/cpp/llama.cpp/common/log.h +103 -0
  72. package/cpp/llama.cpp/common/minja/chat-template.hpp +537 -0
  73. package/cpp/llama.cpp/common/minja/minja.hpp +2941 -0
  74. package/cpp/llama.cpp/common/ngram-cache.cpp +286 -0
  75. package/cpp/llama.cpp/common/ngram-cache.h +101 -0
  76. package/cpp/llama.cpp/common/sampling.cpp +580 -0
  77. package/cpp/llama.cpp/common/sampling.h +107 -0
  78. package/cpp/llama.cpp/common/speculative.cpp +278 -0
  79. package/cpp/llama.cpp/common/speculative.h +28 -0
  80. package/cpp/llama.cpp/common/stb_image.h +7988 -0
  81. package/cpp/llama.cpp/convert_hf_to_gguf.py +6195 -0
  82. package/cpp/llama.cpp/convert_hf_to_gguf_update.py +393 -0
  83. package/cpp/llama.cpp/convert_llama_ggml_to_gguf.py +450 -0
  84. package/cpp/llama.cpp/convert_lora_to_gguf.py +461 -0
  85. package/cpp/llama.cpp/flake.lock +58 -0
  86. package/cpp/llama.cpp/flake.nix +185 -0
  87. package/cpp/llama.cpp/ggml/CMakeLists.txt +388 -0
  88. package/cpp/llama.cpp/ggml/cmake/GitVars.cmake +22 -0
  89. package/cpp/llama.cpp/ggml/cmake/common.cmake +26 -0
  90. package/cpp/llama.cpp/ggml/cmake/ggml-config.cmake.in +152 -0
  91. package/cpp/llama.cpp/ggml/include/ggml-alloc.h +76 -0
  92. package/cpp/llama.cpp/ggml/include/ggml-backend.h +354 -0
  93. package/cpp/llama.cpp/ggml/include/ggml-blas.h +25 -0
  94. package/cpp/llama.cpp/ggml/include/ggml-cann.h +123 -0
  95. package/cpp/llama.cpp/ggml/include/ggml-cpp.h +39 -0
  96. package/cpp/llama.cpp/ggml/include/ggml-cpu.h +143 -0
  97. package/cpp/llama.cpp/ggml/include/ggml-cuda.h +47 -0
  98. package/cpp/llama.cpp/ggml/include/ggml-kompute.h +50 -0
  99. package/cpp/llama.cpp/ggml/include/ggml-metal.h +66 -0
  100. package/cpp/llama.cpp/ggml/include/ggml-opencl.h +26 -0
  101. package/cpp/llama.cpp/ggml/include/ggml-opt.h +216 -0
  102. package/cpp/llama.cpp/ggml/include/ggml-rpc.h +33 -0
  103. package/cpp/llama.cpp/ggml/include/ggml-sycl.h +49 -0
  104. package/cpp/llama.cpp/ggml/include/ggml-vulkan.h +29 -0
  105. package/cpp/llama.cpp/ggml/include/ggml.h +2192 -0
  106. package/cpp/llama.cpp/ggml/include/gguf.h +202 -0
  107. package/cpp/llama.cpp/ggml/src/CMakeLists.txt +345 -0
  108. package/cpp/llama.cpp/ggml/src/ggml-alloc.c +1042 -0
  109. package/cpp/llama.cpp/ggml/src/ggml-backend-impl.h +255 -0
  110. package/cpp/llama.cpp/ggml/src/ggml-backend-reg.cpp +586 -0
  111. package/cpp/llama.cpp/ggml/src/ggml-backend.cpp +2008 -0
  112. package/cpp/llama.cpp/ggml/src/ggml-blas/CMakeLists.txt +87 -0
  113. package/cpp/llama.cpp/ggml/src/ggml-blas/ggml-blas.cpp +517 -0
  114. package/cpp/llama.cpp/ggml/src/ggml-cann/CMakeLists.txt +74 -0
  115. package/cpp/llama.cpp/ggml/src/ggml-cann/Doxyfile +2579 -0
  116. package/cpp/llama.cpp/ggml/src/ggml-cann/acl_tensor.cpp +179 -0
  117. package/cpp/llama.cpp/ggml/src/ggml-cann/acl_tensor.h +258 -0
  118. package/cpp/llama.cpp/ggml/src/ggml-cann/aclnn_ops.cpp +2589 -0
  119. package/cpp/llama.cpp/ggml/src/ggml-cann/aclnn_ops.h +1083 -0
  120. package/cpp/llama.cpp/ggml/src/ggml-cann/common.h +420 -0
  121. package/cpp/llama.cpp/ggml/src/ggml-cann/ggml-cann.cpp +2554 -0
  122. package/cpp/llama.cpp/ggml/src/ggml-common.h +1857 -0
  123. package/cpp/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +495 -0
  124. package/cpp/llama.cpp/ggml/src/ggml-cpu/amx/amx.cpp +221 -0
  125. package/cpp/llama.cpp/ggml/src/ggml-cpu/amx/amx.h +8 -0
  126. package/cpp/llama.cpp/ggml/src/ggml-cpu/amx/common.h +91 -0
  127. package/cpp/llama.cpp/ggml/src/ggml-cpu/amx/mmq.cpp +2511 -0
  128. package/cpp/llama.cpp/ggml/src/ggml-cpu/amx/mmq.h +10 -0
  129. package/cpp/llama.cpp/ggml/src/ggml-cpu/binary-ops.cpp +158 -0
  130. package/cpp/llama.cpp/ggml/src/ggml-cpu/binary-ops.h +16 -0
  131. package/cpp/llama.cpp/ggml/src/ggml-cpu/cmake/FindSIMD.cmake +100 -0
  132. package/cpp/llama.cpp/ggml/src/ggml-cpu/common.h +72 -0
  133. package/cpp/llama.cpp/ggml/src/ggml-cpu/cpu-feats-x86.cpp +327 -0
  134. package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp +6431 -0
  135. package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.h +8 -0
  136. package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-hbm.cpp +55 -0
  137. package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-hbm.h +8 -0
  138. package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-impl.h +512 -0
  139. package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.c +13131 -0
  140. package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.h +63 -0
  141. package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-traits.cpp +36 -0
  142. package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-traits.h +38 -0
  143. package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +3492 -0
  144. package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +671 -0
  145. package/cpp/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.cpp +254 -0
  146. package/cpp/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.h +60 -0
  147. package/cpp/llama.cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +287 -0
  148. package/cpp/llama.cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.h +17 -0
  149. package/cpp/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm.cpp +3544 -0
  150. package/cpp/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm.h +14 -0
  151. package/cpp/llama.cpp/ggml/src/ggml-cpu/ops.cpp +8796 -0
  152. package/cpp/llama.cpp/ggml/src/ggml-cpu/ops.h +110 -0
  153. package/cpp/llama.cpp/ggml/src/ggml-cpu/simd-mappings.h +892 -0
  154. package/cpp/llama.cpp/ggml/src/ggml-cpu/unary-ops.cpp +186 -0
  155. package/cpp/llama.cpp/ggml/src/ggml-cpu/unary-ops.h +28 -0
  156. package/cpp/llama.cpp/ggml/src/ggml-cpu/vec.cpp +252 -0
  157. package/cpp/llama.cpp/ggml/src/ggml-cpu/vec.h +802 -0
  158. package/cpp/llama.cpp/ggml/src/ggml-cuda/CMakeLists.txt +184 -0
  159. package/cpp/llama.cpp/ggml/src/ggml-cuda/acc.cu +47 -0
  160. package/cpp/llama.cpp/ggml/src/ggml-cuda/acc.cuh +5 -0
  161. package/cpp/llama.cpp/ggml/src/ggml-cuda/arange.cu +34 -0
  162. package/cpp/llama.cpp/ggml/src/ggml-cuda/arange.cuh +5 -0
  163. package/cpp/llama.cpp/ggml/src/ggml-cuda/argmax.cu +91 -0
  164. package/cpp/llama.cpp/ggml/src/ggml-cuda/argmax.cuh +3 -0
  165. package/cpp/llama.cpp/ggml/src/ggml-cuda/argsort.cu +104 -0
  166. package/cpp/llama.cpp/ggml/src/ggml-cuda/argsort.cuh +3 -0
  167. package/cpp/llama.cpp/ggml/src/ggml-cuda/binbcast.cu +363 -0
  168. package/cpp/llama.cpp/ggml/src/ggml-cuda/binbcast.cuh +9 -0
  169. package/cpp/llama.cpp/ggml/src/ggml-cuda/clamp.cu +45 -0
  170. package/cpp/llama.cpp/ggml/src/ggml-cuda/clamp.cuh +5 -0
  171. package/cpp/llama.cpp/ggml/src/ggml-cuda/common.cuh +828 -0
  172. package/cpp/llama.cpp/ggml/src/ggml-cuda/concat.cu +221 -0
  173. package/cpp/llama.cpp/ggml/src/ggml-cuda/concat.cuh +5 -0
  174. package/cpp/llama.cpp/ggml/src/ggml-cuda/conv-transpose-1d.cu +89 -0
  175. package/cpp/llama.cpp/ggml/src/ggml-cuda/conv-transpose-1d.cuh +5 -0
  176. package/cpp/llama.cpp/ggml/src/ggml-cuda/convert.cu +730 -0
  177. package/cpp/llama.cpp/ggml/src/ggml-cuda/convert.cuh +26 -0
  178. package/cpp/llama.cpp/ggml/src/ggml-cuda/count-equal.cu +64 -0
  179. package/cpp/llama.cpp/ggml/src/ggml-cuda/count-equal.cuh +5 -0
  180. package/cpp/llama.cpp/ggml/src/ggml-cuda/cp-async.cuh +57 -0
  181. package/cpp/llama.cpp/ggml/src/ggml-cuda/cpy.cu +695 -0
  182. package/cpp/llama.cpp/ggml/src/ggml-cuda/cpy.cuh +11 -0
  183. package/cpp/llama.cpp/ggml/src/ggml-cuda/cross-entropy-loss.cu +189 -0
  184. package/cpp/llama.cpp/ggml/src/ggml-cuda/cross-entropy-loss.cuh +7 -0
  185. package/cpp/llama.cpp/ggml/src/ggml-cuda/dequantize.cuh +103 -0
  186. package/cpp/llama.cpp/ggml/src/ggml-cuda/diagmask.cu +40 -0
  187. package/cpp/llama.cpp/ggml/src/ggml-cuda/diagmask.cuh +5 -0
  188. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-common.cuh +873 -0
  189. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-mma-f16.cuh +1269 -0
  190. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-tile-f16.cu +357 -0
  191. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-tile-f16.cuh +3 -0
  192. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-tile-f32.cu +365 -0
  193. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-tile-f32.cuh +3 -0
  194. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-vec-f16.cuh +437 -0
  195. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-vec-f32.cuh +428 -0
  196. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-wmma-f16.cu +634 -0
  197. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-wmma-f16.cuh +3 -0
  198. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn.cu +345 -0
  199. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn.cuh +3 -0
  200. package/cpp/llama.cpp/ggml/src/ggml-cuda/getrows.cu +275 -0
  201. package/cpp/llama.cpp/ggml/src/ggml-cuda/getrows.cuh +15 -0
  202. package/cpp/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu +3501 -0
  203. package/cpp/llama.cpp/ggml/src/ggml-cuda/gla.cu +93 -0
  204. package/cpp/llama.cpp/ggml/src/ggml-cuda/gla.cuh +3 -0
  205. package/cpp/llama.cpp/ggml/src/ggml-cuda/im2col.cu +103 -0
  206. package/cpp/llama.cpp/ggml/src/ggml-cuda/im2col.cuh +5 -0
  207. package/cpp/llama.cpp/ggml/src/ggml-cuda/mma.cuh +396 -0
  208. package/cpp/llama.cpp/ggml/src/ggml-cuda/mmq.cu +322 -0
  209. package/cpp/llama.cpp/ggml/src/ggml-cuda/mmq.cuh +3217 -0
  210. package/cpp/llama.cpp/ggml/src/ggml-cuda/mmv.cu +336 -0
  211. package/cpp/llama.cpp/ggml/src/ggml-cuda/mmv.cuh +12 -0
  212. package/cpp/llama.cpp/ggml/src/ggml-cuda/mmvq.cu +595 -0
  213. package/cpp/llama.cpp/ggml/src/ggml-cuda/mmvq.cuh +12 -0
  214. package/cpp/llama.cpp/ggml/src/ggml-cuda/norm.cu +458 -0
  215. package/cpp/llama.cpp/ggml/src/ggml-cuda/norm.cuh +11 -0
  216. package/cpp/llama.cpp/ggml/src/ggml-cuda/opt-step-adamw.cu +78 -0
  217. package/cpp/llama.cpp/ggml/src/ggml-cuda/opt-step-adamw.cuh +5 -0
  218. package/cpp/llama.cpp/ggml/src/ggml-cuda/out-prod.cu +68 -0
  219. package/cpp/llama.cpp/ggml/src/ggml-cuda/out-prod.cuh +3 -0
  220. package/cpp/llama.cpp/ggml/src/ggml-cuda/pad.cu +49 -0
  221. package/cpp/llama.cpp/ggml/src/ggml-cuda/pad.cuh +5 -0
  222. package/cpp/llama.cpp/ggml/src/ggml-cuda/pool2d.cu +94 -0
  223. package/cpp/llama.cpp/ggml/src/ggml-cuda/pool2d.cuh +5 -0
  224. package/cpp/llama.cpp/ggml/src/ggml-cuda/quantize.cu +189 -0
  225. package/cpp/llama.cpp/ggml/src/ggml-cuda/quantize.cuh +27 -0
  226. package/cpp/llama.cpp/ggml/src/ggml-cuda/rope.cu +456 -0
  227. package/cpp/llama.cpp/ggml/src/ggml-cuda/rope.cuh +7 -0
  228. package/cpp/llama.cpp/ggml/src/ggml-cuda/scale.cu +31 -0
  229. package/cpp/llama.cpp/ggml/src/ggml-cuda/scale.cuh +5 -0
  230. package/cpp/llama.cpp/ggml/src/ggml-cuda/softmax.cu +283 -0
  231. package/cpp/llama.cpp/ggml/src/ggml-cuda/softmax.cuh +7 -0
  232. package/cpp/llama.cpp/ggml/src/ggml-cuda/ssm-conv.cu +148 -0
  233. package/cpp/llama.cpp/ggml/src/ggml-cuda/ssm-conv.cuh +3 -0
  234. package/cpp/llama.cpp/ggml/src/ggml-cuda/ssm-scan.cu +153 -0
  235. package/cpp/llama.cpp/ggml/src/ggml-cuda/ssm-scan.cuh +3 -0
  236. package/cpp/llama.cpp/ggml/src/ggml-cuda/sum.cu +45 -0
  237. package/cpp/llama.cpp/ggml/src/ggml-cuda/sum.cuh +5 -0
  238. package/cpp/llama.cpp/ggml/src/ggml-cuda/sumrows.cu +39 -0
  239. package/cpp/llama.cpp/ggml/src/ggml-cuda/sumrows.cuh +5 -0
  240. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_1-ncols2_16.cu +5 -0
  241. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_1-ncols2_8.cu +10 -0
  242. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_16-ncols2_1.cu +10 -0
  243. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_16-ncols2_2.cu +10 -0
  244. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_16-ncols2_4.cu +10 -0
  245. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_2-ncols2_16.cu +5 -0
  246. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_2-ncols2_4.cu +10 -0
  247. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_2-ncols2_8.cu +10 -0
  248. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_32-ncols2_1.cu +10 -0
  249. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_32-ncols2_2.cu +10 -0
  250. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_4-ncols2_16.cu +5 -0
  251. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_4-ncols2_2.cu +10 -0
  252. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_4-ncols2_4.cu +10 -0
  253. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_4-ncols2_8.cu +10 -0
  254. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_64-ncols2_1.cu +10 -0
  255. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_8-ncols2_1.cu +10 -0
  256. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_8-ncols2_2.cu +10 -0
  257. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_8-ncols2_4.cu +10 -0
  258. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_8-ncols2_8.cu +10 -0
  259. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-f16.cu +5 -0
  260. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q4_0.cu +5 -0
  261. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q4_1.cu +5 -0
  262. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q5_0.cu +5 -0
  263. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q5_1.cu +5 -0
  264. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q8_0.cu +5 -0
  265. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-f16.cu +5 -0
  266. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q4_0.cu +5 -0
  267. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q4_1.cu +5 -0
  268. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q5_0.cu +5 -0
  269. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q5_1.cu +5 -0
  270. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q8_0.cu +5 -0
  271. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-f16.cu +5 -0
  272. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q4_0.cu +5 -0
  273. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q4_1.cu +5 -0
  274. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q5_0.cu +5 -0
  275. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q5_1.cu +5 -0
  276. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q8_0.cu +5 -0
  277. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-f16.cu +5 -0
  278. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q4_0.cu +5 -0
  279. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q4_1.cu +5 -0
  280. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q5_0.cu +5 -0
  281. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q5_1.cu +5 -0
  282. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q8_0.cu +5 -0
  283. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-f16.cu +5 -0
  284. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q4_0.cu +5 -0
  285. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q4_1.cu +5 -0
  286. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q5_0.cu +5 -0
  287. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q5_1.cu +5 -0
  288. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q8_0.cu +5 -0
  289. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-f16.cu +5 -0
  290. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q4_0.cu +5 -0
  291. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q4_1.cu +5 -0
  292. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q5_0.cu +5 -0
  293. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q5_1.cu +5 -0
  294. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q8_0.cu +5 -0
  295. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs256-f16-f16.cu +5 -0
  296. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-f16.cu +5 -0
  297. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q4_0.cu +5 -0
  298. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q4_1.cu +5 -0
  299. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q5_0.cu +5 -0
  300. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q5_1.cu +5 -0
  301. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q8_0.cu +5 -0
  302. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-f16.cu +5 -0
  303. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q4_0.cu +5 -0
  304. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q4_1.cu +5 -0
  305. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q5_0.cu +5 -0
  306. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q5_1.cu +5 -0
  307. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q8_0.cu +5 -0
  308. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-f16.cu +5 -0
  309. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q4_0.cu +5 -0
  310. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q4_1.cu +5 -0
  311. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q5_0.cu +5 -0
  312. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q5_1.cu +5 -0
  313. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q8_0.cu +5 -0
  314. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-f16.cu +5 -0
  315. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q4_0.cu +5 -0
  316. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q4_1.cu +5 -0
  317. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q5_0.cu +5 -0
  318. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q5_1.cu +5 -0
  319. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q8_0.cu +5 -0
  320. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-f16.cu +5 -0
  321. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q4_0.cu +5 -0
  322. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q4_1.cu +5 -0
  323. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q5_0.cu +5 -0
  324. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q5_1.cu +5 -0
  325. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q8_0.cu +5 -0
  326. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-f16.cu +5 -0
  327. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q4_0.cu +5 -0
  328. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q4_1.cu +5 -0
  329. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q5_0.cu +5 -0
  330. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q5_1.cu +5 -0
  331. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q8_0.cu +5 -0
  332. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-f16.cu +5 -0
  333. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q4_0.cu +5 -0
  334. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q4_1.cu +5 -0
  335. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q5_0.cu +5 -0
  336. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q5_1.cu +5 -0
  337. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q8_0.cu +5 -0
  338. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs256-f16-f16.cu +5 -0
  339. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-f16.cu +5 -0
  340. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q4_0.cu +5 -0
  341. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q4_1.cu +5 -0
  342. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q5_0.cu +5 -0
  343. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q5_1.cu +5 -0
  344. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q8_0.cu +5 -0
  345. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/generate_cu_files.py +78 -0
  346. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-iq1_s.cu +5 -0
  347. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-iq2_s.cu +5 -0
  348. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-iq2_xs.cu +5 -0
  349. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-iq2_xxs.cu +5 -0
  350. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-iq3_s.cu +5 -0
  351. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-iq3_xxs.cu +5 -0
  352. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-iq4_nl.cu +5 -0
  353. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-iq4_xs.cu +5 -0
  354. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-q2_k.cu +5 -0
  355. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-q3_k.cu +5 -0
  356. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-q4_0.cu +5 -0
  357. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-q4_1.cu +5 -0
  358. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-q4_k.cu +5 -0
  359. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-q5_0.cu +5 -0
  360. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-q5_1.cu +5 -0
  361. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-q5_k.cu +5 -0
  362. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-q6_k.cu +5 -0
  363. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-q8_0.cu +5 -0
  364. package/cpp/llama.cpp/ggml/src/ggml-cuda/tsembd.cu +47 -0
  365. package/cpp/llama.cpp/ggml/src/ggml-cuda/tsembd.cuh +5 -0
  366. package/cpp/llama.cpp/ggml/src/ggml-cuda/unary.cu +279 -0
  367. package/cpp/llama.cpp/ggml/src/ggml-cuda/unary.cuh +57 -0
  368. package/cpp/llama.cpp/ggml/src/ggml-cuda/upscale.cu +51 -0
  369. package/cpp/llama.cpp/ggml/src/ggml-cuda/upscale.cuh +5 -0
  370. package/cpp/llama.cpp/ggml/src/ggml-cuda/vecdotq.cuh +1135 -0
  371. package/cpp/llama.cpp/ggml/src/ggml-cuda/vendors/cuda.h +15 -0
  372. package/cpp/llama.cpp/ggml/src/ggml-cuda/vendors/hip.h +243 -0
  373. package/cpp/llama.cpp/ggml/src/ggml-cuda/vendors/musa.h +140 -0
  374. package/cpp/llama.cpp/ggml/src/ggml-cuda/wkv.cu +199 -0
  375. package/cpp/llama.cpp/ggml/src/ggml-cuda/wkv.cuh +7 -0
  376. package/cpp/llama.cpp/ggml/src/ggml-hip/CMakeLists.txt +131 -0
  377. package/cpp/llama.cpp/ggml/src/ggml-impl.h +601 -0
  378. package/cpp/llama.cpp/ggml/src/ggml-kompute/CMakeLists.txt +166 -0
  379. package/cpp/llama.cpp/ggml/src/ggml-kompute/ggml-kompute.cpp +2251 -0
  380. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/common.comp +112 -0
  381. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_add.comp +58 -0
  382. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_addrow.comp +25 -0
  383. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f16_f16.comp +52 -0
  384. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f16_f32.comp +52 -0
  385. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f32_f16.comp +52 -0
  386. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f32_f32.comp +52 -0
  387. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_diagmask.comp +30 -0
  388. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_gelu.comp +22 -0
  389. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_getrows.comp +17 -0
  390. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_getrows_f16.comp +31 -0
  391. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_getrows_f32.comp +31 -0
  392. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_getrows_q4_0.comp +38 -0
  393. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_getrows_q4_1.comp +39 -0
  394. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_getrows_q6_k.comp +44 -0
  395. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul.comp +52 -0
  396. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_f16.comp +69 -0
  397. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_mat_f32.comp +51 -0
  398. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q4_0.comp +33 -0
  399. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q4_1.comp +35 -0
  400. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q4_k.comp +140 -0
  401. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q6_k.comp +106 -0
  402. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q8_0.comp +73 -0
  403. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mv_q_n.comp +52 -0
  404. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mv_q_n_pre.comp +28 -0
  405. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_norm.comp +84 -0
  406. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_relu.comp +21 -0
  407. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_rmsnorm.comp +53 -0
  408. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_rope_neox_f16.comp +52 -0
  409. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_rope_neox_f32.comp +52 -0
  410. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_rope_norm_f16.comp +52 -0
  411. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_rope_norm_f32.comp +52 -0
  412. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_scale.comp +19 -0
  413. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_scale_8.comp +23 -0
  414. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_silu.comp +22 -0
  415. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_softmax.comp +72 -0
  416. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/rope_common.comp +71 -0
  417. package/cpp/llama.cpp/ggml/src/ggml-metal/CMakeLists.txt +120 -0
  418. package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal-impl.h +618 -0
  419. package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal.m +5916 -0
  420. package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal.metal +6891 -0
  421. package/cpp/llama.cpp/ggml/src/ggml-musa/CMakeLists.txt +107 -0
  422. package/cpp/llama.cpp/ggml/src/ggml-opencl/CMakeLists.txt +96 -0
  423. package/cpp/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +4966 -0
  424. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/add.cl +83 -0
  425. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/clamp.cl +20 -0
  426. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/cpy.cl +184 -0
  427. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/cvt.cl +118 -0
  428. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/diag_mask_inf.cl +58 -0
  429. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/embed_kernel.py +26 -0
  430. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/gelu.cl +62 -0
  431. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/gemv_noshuffle.cl +268 -0
  432. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/gemv_noshuffle_general.cl +274 -0
  433. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/get_rows.cl +163 -0
  434. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/im2col_f16.cl +57 -0
  435. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/im2col_f32.cl +57 -0
  436. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/mul.cl +79 -0
  437. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/mul_mat_Ab_Bi_8x4.cl +139 -0
  438. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/mul_mv_f16_f16.cl +118 -0
  439. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/mul_mv_f16_f32.cl +118 -0
  440. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/mul_mv_f16_f32_1row.cl +94 -0
  441. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/mul_mv_f16_f32_l4.cl +84 -0
  442. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/mul_mv_f32_f32.cl +118 -0
  443. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32.cl +192 -0
  444. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32_1d_16x_flat.cl +307 -0
  445. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32_1d_8x_flat.cl +265 -0
  446. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32_8x_flat.cl +272 -0
  447. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32_v.cl +254 -0
  448. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/mul_mv_q6_k.cl +190 -0
  449. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/norm.cl +81 -0
  450. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/relu.cl +16 -0
  451. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/rms_norm.cl +96 -0
  452. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/rope.cl +721 -0
  453. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/scale.cl +16 -0
  454. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/silu.cl +30 -0
  455. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/softmax_4_f16.cl +87 -0
  456. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/softmax_4_f32.cl +87 -0
  457. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/softmax_f16.cl +86 -0
  458. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/softmax_f32.cl +86 -0
  459. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/transpose.cl +84 -0
  460. package/cpp/llama.cpp/ggml/src/ggml-opt.cpp +854 -0
  461. package/cpp/llama.cpp/ggml/src/ggml-quants.c +5232 -0
  462. package/cpp/llama.cpp/ggml/src/ggml-quants.h +100 -0
  463. package/cpp/llama.cpp/ggml/src/ggml-rpc/CMakeLists.txt +9 -0
  464. package/cpp/llama.cpp/ggml/src/ggml-rpc/ggml-rpc.cpp +1813 -0
  465. package/cpp/llama.cpp/ggml/src/ggml-sycl/CMakeLists.txt +183 -0
  466. package/cpp/llama.cpp/ggml/src/ggml-sycl/backend.hpp +37 -0
  467. package/cpp/llama.cpp/ggml/src/ggml-sycl/binbcast.cpp +350 -0
  468. package/cpp/llama.cpp/ggml/src/ggml-sycl/binbcast.hpp +39 -0
  469. package/cpp/llama.cpp/ggml/src/ggml-sycl/common.cpp +83 -0
  470. package/cpp/llama.cpp/ggml/src/ggml-sycl/common.hpp +493 -0
  471. package/cpp/llama.cpp/ggml/src/ggml-sycl/concat.cpp +197 -0
  472. package/cpp/llama.cpp/ggml/src/ggml-sycl/concat.hpp +20 -0
  473. package/cpp/llama.cpp/ggml/src/ggml-sycl/conv.cpp +100 -0
  474. package/cpp/llama.cpp/ggml/src/ggml-sycl/conv.hpp +20 -0
  475. package/cpp/llama.cpp/ggml/src/ggml-sycl/convert.cpp +596 -0
  476. package/cpp/llama.cpp/ggml/src/ggml-sycl/convert.hpp +34 -0
  477. package/cpp/llama.cpp/ggml/src/ggml-sycl/cpy.cpp +701 -0
  478. package/cpp/llama.cpp/ggml/src/ggml-sycl/cpy.hpp +11 -0
  479. package/cpp/llama.cpp/ggml/src/ggml-sycl/dequantize.hpp +753 -0
  480. package/cpp/llama.cpp/ggml/src/ggml-sycl/dmmv.cpp +1154 -0
  481. package/cpp/llama.cpp/ggml/src/ggml-sycl/dmmv.hpp +27 -0
  482. package/cpp/llama.cpp/ggml/src/ggml-sycl/dpct/helper.hpp +2957 -0
  483. package/cpp/llama.cpp/ggml/src/ggml-sycl/element_wise.cpp +1559 -0
  484. package/cpp/llama.cpp/ggml/src/ggml-sycl/element_wise.hpp +75 -0
  485. package/cpp/llama.cpp/ggml/src/ggml-sycl/gemm.hpp +70 -0
  486. package/cpp/llama.cpp/ggml/src/ggml-sycl/getrows.cpp +311 -0
  487. package/cpp/llama.cpp/ggml/src/ggml-sycl/getrows.hpp +20 -0
  488. package/cpp/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +4302 -0
  489. package/cpp/llama.cpp/ggml/src/ggml-sycl/gla.cpp +105 -0
  490. package/cpp/llama.cpp/ggml/src/ggml-sycl/gla.hpp +8 -0
  491. package/cpp/llama.cpp/ggml/src/ggml-sycl/im2col.cpp +136 -0
  492. package/cpp/llama.cpp/ggml/src/ggml-sycl/im2col.hpp +21 -0
  493. package/cpp/llama.cpp/ggml/src/ggml-sycl/mmq.cpp +3030 -0
  494. package/cpp/llama.cpp/ggml/src/ggml-sycl/mmq.hpp +33 -0
  495. package/cpp/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +1081 -0
  496. package/cpp/llama.cpp/ggml/src/ggml-sycl/mmvq.hpp +27 -0
  497. package/cpp/llama.cpp/ggml/src/ggml-sycl/norm.cpp +474 -0
  498. package/cpp/llama.cpp/ggml/src/ggml-sycl/norm.hpp +26 -0
  499. package/cpp/llama.cpp/ggml/src/ggml-sycl/outprod.cpp +46 -0
  500. package/cpp/llama.cpp/ggml/src/ggml-sycl/outprod.hpp +10 -0
  501. package/cpp/llama.cpp/ggml/src/ggml-sycl/presets.hpp +74 -0
  502. package/cpp/llama.cpp/ggml/src/ggml-sycl/quants.hpp +61 -0
  503. package/cpp/llama.cpp/ggml/src/ggml-sycl/rope.cpp +362 -0
  504. package/cpp/llama.cpp/ggml/src/ggml-sycl/rope.hpp +20 -0
  505. package/cpp/llama.cpp/ggml/src/ggml-sycl/softmax.cpp +264 -0
  506. package/cpp/llama.cpp/ggml/src/ggml-sycl/softmax.hpp +20 -0
  507. package/cpp/llama.cpp/ggml/src/ggml-sycl/sycl_hw.cpp +13 -0
  508. package/cpp/llama.cpp/ggml/src/ggml-sycl/sycl_hw.hpp +23 -0
  509. package/cpp/llama.cpp/ggml/src/ggml-sycl/tsembd.cpp +73 -0
  510. package/cpp/llama.cpp/ggml/src/ggml-sycl/tsembd.hpp +20 -0
  511. package/cpp/llama.cpp/ggml/src/ggml-sycl/vecdotq.hpp +1189 -0
  512. package/cpp/llama.cpp/ggml/src/ggml-sycl/wkv.cpp +305 -0
  513. package/cpp/llama.cpp/ggml/src/ggml-sycl/wkv.hpp +10 -0
  514. package/cpp/llama.cpp/ggml/src/ggml-threading.cpp +12 -0
  515. package/cpp/llama.cpp/ggml/src/ggml-threading.h +14 -0
  516. package/cpp/llama.cpp/ggml/src/ggml-vulkan/CMakeLists.txt +202 -0
  517. package/cpp/llama.cpp/ggml/src/ggml-vulkan/cmake/host-toolchain.cmake.in +15 -0
  518. package/cpp/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +10502 -0
  519. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/CMakeLists.txt +22 -0
  520. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/acc.comp +29 -0
  521. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/add.comp +29 -0
  522. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/argmax.comp +51 -0
  523. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/argsort.comp +69 -0
  524. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/clamp.comp +17 -0
  525. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/concat.comp +41 -0
  526. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/contig_copy.comp +49 -0
  527. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/conv2d_dw.comp +105 -0
  528. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/copy.comp +23 -0
  529. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/copy_from_quant.comp +51 -0
  530. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/copy_to_quant.comp +242 -0
  531. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/cos.comp +17 -0
  532. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/count_equal.comp +31 -0
  533. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_f32.comp +20 -0
  534. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_funcs.comp +462 -0
  535. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_funcs_cm2.comp +699 -0
  536. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_head.comp +13 -0
  537. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq1_m.comp +42 -0
  538. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq1_s.comp +35 -0
  539. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq2_s.comp +44 -0
  540. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq2_xs.comp +43 -0
  541. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq2_xxs.comp +48 -0
  542. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq3_s.comp +39 -0
  543. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq3_xxs.comp +49 -0
  544. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq4_nl.comp +32 -0
  545. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq4_xs.comp +34 -0
  546. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q2_k.comp +34 -0
  547. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q3_k.comp +42 -0
  548. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q4_0.comp +30 -0
  549. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q4_1.comp +32 -0
  550. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q4_k.comp +68 -0
  551. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q5_0.comp +34 -0
  552. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q5_1.comp +35 -0
  553. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q5_k.comp +70 -0
  554. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q6_k.comp +33 -0
  555. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q8_0.comp +31 -0
  556. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/diag_mask_inf.comp +34 -0
  557. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/div.comp +27 -0
  558. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn.comp +483 -0
  559. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm2.comp +383 -0
  560. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_split_k_reduce.comp +59 -0
  561. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/gelu.comp +25 -0
  562. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/gelu_quick.comp +23 -0
  563. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/generic_binary_head.comp +64 -0
  564. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/generic_head.comp +9 -0
  565. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/generic_unary_head.comp +76 -0
  566. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/get_rows.comp +33 -0
  567. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/get_rows_quant.comp +41 -0
  568. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/group_norm.comp +66 -0
  569. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/im2col.comp +100 -0
  570. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/l2_norm.comp +41 -0
  571. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/leaky_relu.comp +22 -0
  572. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul.comp +27 -0
  573. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_split_k_reduce.comp +48 -0
  574. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec.comp +169 -0
  575. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_base.comp +118 -0
  576. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq1_m.comp +82 -0
  577. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq1_s.comp +79 -0
  578. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq2_s.comp +90 -0
  579. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq2_xs.comp +87 -0
  580. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq2_xxs.comp +87 -0
  581. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq3_s.comp +90 -0
  582. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq3_xxs.comp +88 -0
  583. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_nc.comp +118 -0
  584. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_p021.comp +154 -0
  585. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q2_k.comp +130 -0
  586. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q3_k.comp +132 -0
  587. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q4_k.comp +136 -0
  588. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q5_k.comp +167 -0
  589. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q6_k.comp +130 -0
  590. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm.comp +868 -0
  591. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm_cm2.comp +441 -0
  592. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mmq.comp +442 -0
  593. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mmq_funcs.comp +99 -0
  594. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/norm.comp +44 -0
  595. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/opt_step_adamw.comp +42 -0
  596. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/pad.comp +28 -0
  597. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/pool2d.comp +74 -0
  598. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/quantize_q8_1.comp +77 -0
  599. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/relu.comp +21 -0
  600. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/repeat.comp +26 -0
  601. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/repeat_back.comp +37 -0
  602. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/rms_norm.comp +52 -0
  603. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/rms_norm_back.comp +55 -0
  604. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/rope_head.comp +58 -0
  605. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/rope_multi.comp +60 -0
  606. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/rope_neox.comp +43 -0
  607. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/rope_norm.comp +43 -0
  608. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/rope_vision.comp +47 -0
  609. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/scale.comp +24 -0
  610. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/sigmoid.comp +20 -0
  611. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/silu.comp +22 -0
  612. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/silu_back.comp +26 -0
  613. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/sin.comp +17 -0
  614. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/soft_max.comp +173 -0
  615. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/soft_max_back.comp +50 -0
  616. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/square.comp +17 -0
  617. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/sub.comp +29 -0
  618. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/sum_rows.comp +37 -0
  619. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/tanh.comp +20 -0
  620. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/test_bfloat16_support.comp +7 -0
  621. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/test_coopmat2_support.comp +7 -0
  622. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/test_coopmat_support.comp +7 -0
  623. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/test_integer_dot_support.comp +7 -0
  624. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/timestep_embedding.comp +41 -0
  625. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/types.comp +1373 -0
  626. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/upscale.comp +36 -0
  627. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +740 -0
  628. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/wkv6.comp +87 -0
  629. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/wkv7.comp +91 -0
  630. package/cpp/llama.cpp/ggml/src/ggml.c +6499 -0
  631. package/cpp/llama.cpp/ggml/src/gguf.cpp +1330 -0
  632. package/cpp/llama.cpp/gguf-py/LICENSE +21 -0
  633. package/cpp/llama.cpp/gguf-py/README.md +99 -0
  634. package/cpp/llama.cpp/gguf-py/examples/reader.py +49 -0
  635. package/cpp/llama.cpp/gguf-py/examples/writer.py +39 -0
  636. package/cpp/llama.cpp/gguf-py/gguf/__init__.py +9 -0
  637. package/cpp/llama.cpp/gguf-py/gguf/constants.py +2296 -0
  638. package/cpp/llama.cpp/gguf-py/gguf/gguf.py +15 -0
  639. package/cpp/llama.cpp/gguf-py/gguf/gguf_reader.py +367 -0
  640. package/cpp/llama.cpp/gguf-py/gguf/gguf_writer.py +1041 -0
  641. package/cpp/llama.cpp/gguf-py/gguf/lazy.py +223 -0
  642. package/cpp/llama.cpp/gguf-py/gguf/metadata.py +642 -0
  643. package/cpp/llama.cpp/gguf-py/gguf/py.typed +0 -0
  644. package/cpp/llama.cpp/gguf-py/gguf/quants.py +1269 -0
  645. package/cpp/llama.cpp/gguf-py/gguf/scripts/gguf_convert_endian.py +182 -0
  646. package/cpp/llama.cpp/gguf-py/gguf/scripts/gguf_dump.py +454 -0
  647. package/cpp/llama.cpp/gguf-py/gguf/scripts/gguf_editor_gui.py +1610 -0
  648. package/cpp/llama.cpp/gguf-py/gguf/scripts/gguf_hash.py +102 -0
  649. package/cpp/llama.cpp/gguf-py/gguf/scripts/gguf_new_metadata.py +207 -0
  650. package/cpp/llama.cpp/gguf-py/gguf/scripts/gguf_set_metadata.py +95 -0
  651. package/cpp/llama.cpp/gguf-py/gguf/tensor_mapping.py +1172 -0
  652. package/cpp/llama.cpp/gguf-py/gguf/utility.py +264 -0
  653. package/cpp/llama.cpp/gguf-py/gguf/vocab.py +492 -0
  654. package/cpp/llama.cpp/gguf-py/pyproject.toml +43 -0
  655. package/cpp/llama.cpp/gguf-py/tests/__init__.py +1 -0
  656. package/cpp/llama.cpp/gguf-py/tests/test_metadata.py +238 -0
  657. package/cpp/llama.cpp/gguf-py/tests/test_quants.py +238 -0
  658. package/cpp/llama.cpp/grammars/README.md +382 -0
  659. package/cpp/llama.cpp/grammars/arithmetic.gbnf +6 -0
  660. package/cpp/llama.cpp/grammars/c.gbnf +42 -0
  661. package/cpp/llama.cpp/grammars/chess.gbnf +13 -0
  662. package/cpp/llama.cpp/grammars/english.gbnf +6 -0
  663. package/cpp/llama.cpp/grammars/japanese.gbnf +7 -0
  664. package/cpp/llama.cpp/grammars/json.gbnf +25 -0
  665. package/cpp/llama.cpp/grammars/json_arr.gbnf +34 -0
  666. package/cpp/llama.cpp/grammars/list.gbnf +4 -0
  667. package/cpp/llama.cpp/include/llama-cpp.h +30 -0
  668. package/cpp/llama.cpp/include/llama.h +1440 -0
  669. package/cpp/llama.cpp/licenses/LICENSE-curl +9 -0
  670. package/cpp/llama.cpp/licenses/LICENSE-httplib +21 -0
  671. package/cpp/llama.cpp/licenses/LICENSE-jsonhpp +21 -0
  672. package/cpp/llama.cpp/licenses/LICENSE-linenoise +26 -0
  673. package/cpp/llama.cpp/media/llama0-banner.png +0 -0
  674. package/cpp/llama.cpp/media/llama0-logo.png +0 -0
  675. package/cpp/llama.cpp/media/llama1-banner.png +0 -0
  676. package/cpp/llama.cpp/media/llama1-logo.png +0 -0
  677. package/cpp/llama.cpp/media/llama1-logo.svg +34 -0
  678. package/cpp/llama.cpp/media/matmul.png +0 -0
  679. package/cpp/llama.cpp/media/matmul.svg +1238 -0
  680. package/cpp/llama.cpp/models/ggml-vocab-aquila.gguf +0 -0
  681. package/cpp/llama.cpp/models/ggml-vocab-baichuan.gguf +0 -0
  682. package/cpp/llama.cpp/models/ggml-vocab-bert-bge.gguf +0 -0
  683. package/cpp/llama.cpp/models/ggml-vocab-bert-bge.gguf.inp +112 -0
  684. package/cpp/llama.cpp/models/ggml-vocab-bert-bge.gguf.out +46 -0
  685. package/cpp/llama.cpp/models/ggml-vocab-chameleon.gguf.inp +112 -0
  686. package/cpp/llama.cpp/models/ggml-vocab-chameleon.gguf.out +46 -0
  687. package/cpp/llama.cpp/models/ggml-vocab-command-r.gguf +0 -0
  688. package/cpp/llama.cpp/models/ggml-vocab-command-r.gguf.inp +112 -0
  689. package/cpp/llama.cpp/models/ggml-vocab-command-r.gguf.out +46 -0
  690. package/cpp/llama.cpp/models/ggml-vocab-deepseek-coder.gguf +0 -0
  691. package/cpp/llama.cpp/models/ggml-vocab-deepseek-coder.gguf.inp +112 -0
  692. package/cpp/llama.cpp/models/ggml-vocab-deepseek-coder.gguf.out +46 -0
  693. package/cpp/llama.cpp/models/ggml-vocab-deepseek-llm.gguf +0 -0
  694. package/cpp/llama.cpp/models/ggml-vocab-deepseek-llm.gguf.inp +112 -0
  695. package/cpp/llama.cpp/models/ggml-vocab-deepseek-llm.gguf.out +46 -0
  696. package/cpp/llama.cpp/models/ggml-vocab-deepseek-r1-qwen.gguf.inp +112 -0
  697. package/cpp/llama.cpp/models/ggml-vocab-deepseek-r1-qwen.gguf.out +46 -0
  698. package/cpp/llama.cpp/models/ggml-vocab-falcon.gguf +0 -0
  699. package/cpp/llama.cpp/models/ggml-vocab-falcon.gguf.inp +112 -0
  700. package/cpp/llama.cpp/models/ggml-vocab-falcon.gguf.out +46 -0
  701. package/cpp/llama.cpp/models/ggml-vocab-gpt-2.gguf +0 -0
  702. package/cpp/llama.cpp/models/ggml-vocab-gpt-2.gguf.inp +112 -0
  703. package/cpp/llama.cpp/models/ggml-vocab-gpt-2.gguf.out +46 -0
  704. package/cpp/llama.cpp/models/ggml-vocab-gpt-4o.gguf.inp +112 -0
  705. package/cpp/llama.cpp/models/ggml-vocab-gpt-4o.gguf.out +46 -0
  706. package/cpp/llama.cpp/models/ggml-vocab-gpt-neox.gguf +0 -0
  707. package/cpp/llama.cpp/models/ggml-vocab-llama-bpe.gguf +0 -0
  708. package/cpp/llama.cpp/models/ggml-vocab-llama-bpe.gguf.inp +112 -0
  709. package/cpp/llama.cpp/models/ggml-vocab-llama-bpe.gguf.out +46 -0
  710. package/cpp/llama.cpp/models/ggml-vocab-llama-spm.gguf +0 -0
  711. package/cpp/llama.cpp/models/ggml-vocab-llama-spm.gguf.inp +112 -0
  712. package/cpp/llama.cpp/models/ggml-vocab-llama-spm.gguf.out +46 -0
  713. package/cpp/llama.cpp/models/ggml-vocab-llama4.gguf.inp +112 -0
  714. package/cpp/llama.cpp/models/ggml-vocab-llama4.gguf.out +46 -0
  715. package/cpp/llama.cpp/models/ggml-vocab-mpt.gguf +0 -0
  716. package/cpp/llama.cpp/models/ggml-vocab-mpt.gguf.inp +112 -0
  717. package/cpp/llama.cpp/models/ggml-vocab-mpt.gguf.out +46 -0
  718. package/cpp/llama.cpp/models/ggml-vocab-phi-3.gguf +0 -0
  719. package/cpp/llama.cpp/models/ggml-vocab-phi-3.gguf.inp +112 -0
  720. package/cpp/llama.cpp/models/ggml-vocab-phi-3.gguf.out +46 -0
  721. package/cpp/llama.cpp/models/ggml-vocab-pixtral.gguf.inp +112 -0
  722. package/cpp/llama.cpp/models/ggml-vocab-pixtral.gguf.out +46 -0
  723. package/cpp/llama.cpp/models/ggml-vocab-qwen2.gguf +0 -0
  724. package/cpp/llama.cpp/models/ggml-vocab-qwen2.gguf.inp +112 -0
  725. package/cpp/llama.cpp/models/ggml-vocab-qwen2.gguf.out +46 -0
  726. package/cpp/llama.cpp/models/ggml-vocab-refact.gguf +0 -0
  727. package/cpp/llama.cpp/models/ggml-vocab-refact.gguf.inp +112 -0
  728. package/cpp/llama.cpp/models/ggml-vocab-refact.gguf.out +46 -0
  729. package/cpp/llama.cpp/models/ggml-vocab-roberta-bpe.gguf.inp +112 -0
  730. package/cpp/llama.cpp/models/ggml-vocab-roberta-bpe.gguf.out +46 -0
  731. package/cpp/llama.cpp/models/ggml-vocab-starcoder.gguf +0 -0
  732. package/cpp/llama.cpp/models/ggml-vocab-starcoder.gguf.inp +112 -0
  733. package/cpp/llama.cpp/models/ggml-vocab-starcoder.gguf.out +46 -0
  734. package/cpp/llama.cpp/models/templates/CohereForAI-c4ai-command-r-plus-tool_use.jinja +202 -0
  735. package/cpp/llama.cpp/models/templates/CohereForAI-c4ai-command-r7b-12-2024-tool_use.jinja +156 -0
  736. package/cpp/llama.cpp/models/templates/NousResearch-Hermes-2-Pro-Llama-3-8B-tool_use.jinja +152 -0
  737. package/cpp/llama.cpp/models/templates/NousResearch-Hermes-3-Llama-3.1-8B-tool_use.jinja +152 -0
  738. package/cpp/llama.cpp/models/templates/Qwen-Qwen2.5-7B-Instruct.jinja +54 -0
  739. package/cpp/llama.cpp/models/templates/README.md +22 -0
  740. package/cpp/llama.cpp/models/templates/deepseek-ai-DeepSeek-R1-Distill-Llama-8B.jinja +1 -0
  741. package/cpp/llama.cpp/models/templates/deepseek-ai-DeepSeek-R1-Distill-Qwen-32B.jinja +1 -0
  742. package/cpp/llama.cpp/models/templates/fireworks-ai-llama-3-firefunction-v2.jinja +57 -0
  743. package/cpp/llama.cpp/models/templates/google-gemma-2-2b-it.jinja +4 -0
  744. package/cpp/llama.cpp/models/templates/llama-cpp-deepseek-r1.jinja +76 -0
  745. package/cpp/llama.cpp/models/templates/meetkai-functionary-medium-v3.1.jinja +58 -0
  746. package/cpp/llama.cpp/models/templates/meetkai-functionary-medium-v3.2.jinja +287 -0
  747. package/cpp/llama.cpp/models/templates/meta-llama-Llama-3.1-8B-Instruct.jinja +109 -0
  748. package/cpp/llama.cpp/models/templates/meta-llama-Llama-3.2-3B-Instruct.jinja +93 -0
  749. package/cpp/llama.cpp/models/templates/meta-llama-Llama-3.3-70B-Instruct.jinja +109 -0
  750. package/cpp/llama.cpp/models/templates/microsoft-Phi-3.5-mini-instruct.jinja +8 -0
  751. package/cpp/llama.cpp/models/templates/mistralai-Mistral-Nemo-Instruct-2407.jinja +87 -0
  752. package/cpp/llama.cpp/mypy.ini +7 -0
  753. package/cpp/llama.cpp/pocs/CMakeLists.txt +14 -0
  754. package/cpp/llama.cpp/pocs/vdot/CMakeLists.txt +9 -0
  755. package/cpp/llama.cpp/pocs/vdot/q8dot.cpp +173 -0
  756. package/cpp/llama.cpp/pocs/vdot/vdot.cpp +311 -0
  757. package/cpp/llama.cpp/poetry.lock +1197 -0
  758. package/cpp/llama.cpp/prompts/LLM-questions.txt +49 -0
  759. package/cpp/llama.cpp/prompts/alpaca.txt +1 -0
  760. package/cpp/llama.cpp/prompts/assistant.txt +31 -0
  761. package/cpp/llama.cpp/prompts/chat-with-baichuan.txt +4 -0
  762. package/cpp/llama.cpp/prompts/chat-with-bob.txt +7 -0
  763. package/cpp/llama.cpp/prompts/chat-with-qwen.txt +1 -0
  764. package/cpp/llama.cpp/prompts/chat-with-vicuna-v0.txt +7 -0
  765. package/cpp/llama.cpp/prompts/chat-with-vicuna-v1.txt +7 -0
  766. package/cpp/llama.cpp/prompts/chat.txt +28 -0
  767. package/cpp/llama.cpp/prompts/dan-modified.txt +1 -0
  768. package/cpp/llama.cpp/prompts/dan.txt +1 -0
  769. package/cpp/llama.cpp/prompts/mnemonics.txt +93 -0
  770. package/cpp/llama.cpp/prompts/parallel-questions.txt +43 -0
  771. package/cpp/llama.cpp/prompts/reason-act.txt +18 -0
  772. package/cpp/llama.cpp/pyproject.toml +45 -0
  773. package/cpp/llama.cpp/pyrightconfig.json +22 -0
  774. package/cpp/llama.cpp/requirements/requirements-all.txt +15 -0
  775. package/cpp/llama.cpp/requirements/requirements-compare-llama-bench.txt +2 -0
  776. package/cpp/llama.cpp/requirements/requirements-convert_hf_to_gguf.txt +3 -0
  777. package/cpp/llama.cpp/requirements/requirements-convert_hf_to_gguf_update.txt +3 -0
  778. package/cpp/llama.cpp/requirements/requirements-convert_legacy_llama.txt +5 -0
  779. package/cpp/llama.cpp/requirements/requirements-convert_llama_ggml_to_gguf.txt +1 -0
  780. package/cpp/llama.cpp/requirements/requirements-convert_lora_to_gguf.txt +2 -0
  781. package/cpp/llama.cpp/requirements/requirements-gguf_editor_gui.txt +3 -0
  782. package/cpp/llama.cpp/requirements/requirements-pydantic.txt +3 -0
  783. package/cpp/llama.cpp/requirements/requirements-test-tokenizer-random.txt +1 -0
  784. package/cpp/llama.cpp/requirements/requirements-tool_bench.txt +12 -0
  785. package/cpp/llama.cpp/requirements.txt +13 -0
  786. package/cpp/llama.cpp/src/CMakeLists.txt +45 -0
  787. package/cpp/llama.cpp/src/llama-adapter.cpp +388 -0
  788. package/cpp/llama.cpp/src/llama-adapter.h +76 -0
  789. package/cpp/llama.cpp/src/llama-arch.cpp +1743 -0
  790. package/cpp/llama.cpp/src/llama-arch.h +437 -0
  791. package/cpp/llama.cpp/src/llama-batch.cpp +372 -0
  792. package/cpp/llama.cpp/src/llama-batch.h +89 -0
  793. package/cpp/llama.cpp/src/llama-chat.cpp +663 -0
  794. package/cpp/llama.cpp/src/llama-chat.h +58 -0
  795. package/cpp/llama.cpp/src/llama-context.cpp +2459 -0
  796. package/cpp/llama.cpp/src/llama-context.h +246 -0
  797. package/cpp/llama.cpp/src/llama-cparams.cpp +1 -0
  798. package/cpp/llama.cpp/src/llama-cparams.h +39 -0
  799. package/cpp/llama.cpp/src/llama-grammar.cpp +1219 -0
  800. package/cpp/llama.cpp/src/llama-grammar.h +173 -0
  801. package/cpp/llama.cpp/src/llama-graph.cpp +1713 -0
  802. package/cpp/llama.cpp/src/llama-graph.h +595 -0
  803. package/cpp/llama.cpp/src/llama-hparams.cpp +79 -0
  804. package/cpp/llama.cpp/src/llama-hparams.h +161 -0
  805. package/cpp/llama.cpp/src/llama-impl.cpp +167 -0
  806. package/cpp/llama.cpp/src/llama-impl.h +61 -0
  807. package/cpp/llama.cpp/src/llama-io.cpp +15 -0
  808. package/cpp/llama.cpp/src/llama-io.h +35 -0
  809. package/cpp/llama.cpp/src/llama-kv-cache.cpp +2486 -0
  810. package/cpp/llama.cpp/src/llama-kv-cache.h +405 -0
  811. package/cpp/llama.cpp/src/llama-memory.cpp +1 -0
  812. package/cpp/llama.cpp/src/llama-memory.h +31 -0
  813. package/cpp/llama.cpp/src/llama-mmap.cpp +600 -0
  814. package/cpp/llama.cpp/src/llama-mmap.h +68 -0
  815. package/cpp/llama.cpp/src/llama-model-loader.cpp +1133 -0
  816. package/cpp/llama.cpp/src/llama-model-loader.h +169 -0
  817. package/cpp/llama.cpp/src/llama-model.cpp +13453 -0
  818. package/cpp/llama.cpp/src/llama-model.h +420 -0
  819. package/cpp/llama.cpp/src/llama-quant.cpp +964 -0
  820. package/cpp/llama.cpp/src/llama-quant.h +1 -0
  821. package/cpp/llama.cpp/src/llama-sampling.cpp +2575 -0
  822. package/cpp/llama.cpp/src/llama-sampling.h +32 -0
  823. package/cpp/llama.cpp/src/llama-vocab.cpp +3313 -0
  824. package/cpp/llama.cpp/src/llama-vocab.h +125 -0
  825. package/cpp/llama.cpp/src/llama.cpp +340 -0
  826. package/cpp/llama.cpp/src/unicode-data.cpp +7034 -0
  827. package/cpp/llama.cpp/src/unicode-data.h +20 -0
  828. package/cpp/llama.cpp/src/unicode.cpp +849 -0
  829. package/cpp/llama.cpp/src/unicode.h +66 -0
  830. package/cpp/rn-completion.cpp +431 -0
  831. package/cpp/rn-llama.hpp +60 -0
  832. package/cpp/rn-utils.hpp +331 -0
  833. package/ios/OnLoad.mm +22 -0
  834. package/ios/generated/RNLlamaCppSpec/RNLlamaCppSpec-generated.mm +64 -0
  835. package/ios/generated/RNLlamaCppSpec/RNLlamaCppSpec.h +251 -0
  836. package/ios/generated/RNLlamaCppSpecJSI-generated.cpp +42 -0
  837. package/ios/generated/RNLlamaCppSpecJSI.h +336 -0
  838. package/ios/include/chat.h +135 -0
  839. package/ios/include/common/base64.hpp +392 -0
  840. package/ios/include/common/json.hpp +24766 -0
  841. package/ios/include/common/minja/chat-template.hpp +537 -0
  842. package/ios/include/common/minja/minja.hpp +2941 -0
  843. package/ios/include/common.h +668 -0
  844. package/ios/include/json-schema-to-grammar.h +21 -0
  845. package/ios/include/llama-cpp.h +30 -0
  846. package/ios/include/llama.h +1440 -0
  847. package/ios/include/log.h +103 -0
  848. package/ios/include/ngram-cache.h +101 -0
  849. package/ios/include/sampling.h +107 -0
  850. package/ios/include/speculative.h +28 -0
  851. package/ios/libs/llama.xcframework/Info.plist +135 -0
  852. package/ios/libs/llama.xcframework/ios-arm64/dSYMs/llama.dSYM/Contents/Info.plist +20 -0
  853. package/ios/libs/llama.xcframework/ios-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  854. package/ios/libs/llama.xcframework/ios-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4492 -0
  855. package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/ggml-alloc.h +76 -0
  856. package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/ggml-backend.h +354 -0
  857. package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/ggml-blas.h +25 -0
  858. package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/ggml-cpu.h +143 -0
  859. package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/ggml-metal.h +66 -0
  860. package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/ggml.h +2192 -0
  861. package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/gguf.h +202 -0
  862. package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/llama.h +1440 -0
  863. package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Info.plist +36 -0
  864. package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Modules/module.modulemap +17 -0
  865. package/ios/libs/llama.xcframework/ios-arm64/llama.framework/llama +0 -0
  866. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Info.plist +20 -0
  867. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  868. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4513 -0
  869. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3440 -0
  870. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/ggml-alloc.h +76 -0
  871. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/ggml-backend.h +354 -0
  872. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/ggml-blas.h +25 -0
  873. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/ggml-cpu.h +143 -0
  874. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/ggml-metal.h +66 -0
  875. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/ggml.h +2192 -0
  876. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/gguf.h +202 -0
  877. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/llama.h +1440 -0
  878. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Info.plist +36 -0
  879. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Modules/module.modulemap +17 -0
  880. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/llama +0 -0
  881. package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Info.plist +20 -0
  882. package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  883. package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4513 -0
  884. package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3442 -0
  885. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/ggml-alloc.h +76 -0
  886. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/ggml-backend.h +354 -0
  887. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/ggml-blas.h +25 -0
  888. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/ggml-cpu.h +143 -0
  889. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/ggml-metal.h +66 -0
  890. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/ggml.h +2192 -0
  891. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/gguf.h +202 -0
  892. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/llama.h +1440 -0
  893. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Modules/module.modulemap +17 -0
  894. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Resources/Info.plist +32 -0
  895. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/ggml-alloc.h +76 -0
  896. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/ggml-backend.h +354 -0
  897. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/ggml-blas.h +25 -0
  898. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/ggml-cpu.h +143 -0
  899. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/ggml-metal.h +66 -0
  900. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/ggml.h +2192 -0
  901. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/gguf.h +202 -0
  902. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/llama.h +1440 -0
  903. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Modules/module.modulemap +17 -0
  904. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Resources/Info.plist +32 -0
  905. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/llama +0 -0
  906. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/ggml-alloc.h +76 -0
  907. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/ggml-backend.h +354 -0
  908. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/ggml-blas.h +25 -0
  909. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/ggml-cpu.h +143 -0
  910. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/ggml-metal.h +66 -0
  911. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/ggml.h +2192 -0
  912. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/gguf.h +202 -0
  913. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/llama.h +1440 -0
  914. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Modules/module.modulemap +17 -0
  915. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Resources/Info.plist +32 -0
  916. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/llama +0 -0
  917. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/llama +0 -0
  918. package/ios/libs/llama.xcframework/tvos-arm64/dSYMs/llama.dSYM/Contents/Info.plist +20 -0
  919. package/ios/libs/llama.xcframework/tvos-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  920. package/ios/libs/llama.xcframework/tvos-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4492 -0
  921. package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/ggml-alloc.h +76 -0
  922. package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/ggml-backend.h +354 -0
  923. package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/ggml-blas.h +25 -0
  924. package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/ggml-cpu.h +143 -0
  925. package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/ggml-metal.h +66 -0
  926. package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/ggml.h +2192 -0
  927. package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/gguf.h +202 -0
  928. package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/llama.h +1440 -0
  929. package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Info.plist +35 -0
  930. package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Modules/module.modulemap +17 -0
  931. package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/llama +0 -0
  932. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Info.plist +20 -0
  933. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  934. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4513 -0
  935. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3440 -0
  936. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/ggml-alloc.h +76 -0
  937. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/ggml-backend.h +354 -0
  938. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/ggml-blas.h +25 -0
  939. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/ggml-cpu.h +143 -0
  940. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/ggml-metal.h +66 -0
  941. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/ggml.h +2192 -0
  942. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/gguf.h +202 -0
  943. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/llama.h +1440 -0
  944. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Info.plist +35 -0
  945. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Modules/module.modulemap +17 -0
  946. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/llama +0 -0
  947. package/ios/libs/llama.xcframework/xros-arm64/dSYMs/llama.dSYM/Contents/Info.plist +20 -0
  948. package/ios/libs/llama.xcframework/xros-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  949. package/ios/libs/llama.xcframework/xros-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4528 -0
  950. package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/ggml-alloc.h +76 -0
  951. package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/ggml-backend.h +354 -0
  952. package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/ggml-blas.h +25 -0
  953. package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/ggml-cpu.h +143 -0
  954. package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/ggml-metal.h +66 -0
  955. package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/ggml.h +2192 -0
  956. package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/gguf.h +202 -0
  957. package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/llama.h +1440 -0
  958. package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Info.plist +32 -0
  959. package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Modules/module.modulemap +17 -0
  960. package/ios/libs/llama.xcframework/xros-arm64/llama.framework/llama +0 -0
  961. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Info.plist +20 -0
  962. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  963. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4549 -0
  964. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3470 -0
  965. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/ggml-alloc.h +76 -0
  966. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/ggml-backend.h +354 -0
  967. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/ggml-blas.h +25 -0
  968. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/ggml-cpu.h +143 -0
  969. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/ggml-metal.h +66 -0
  970. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/ggml.h +2192 -0
  971. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/gguf.h +202 -0
  972. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/llama.h +1440 -0
  973. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Info.plist +32 -0
  974. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Modules/module.modulemap +17 -0
  975. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/llama +0 -0
  976. package/lib/module/NativeRNLlamaCpp.js +35 -0
  977. package/lib/module/NativeRNLlamaCpp.js.map +1 -0
  978. package/lib/module/index.js +20 -0
  979. package/lib/module/index.js.map +1 -0
  980. package/lib/module/package.json +1 -0
  981. package/lib/typescript/package.json +1 -0
  982. package/lib/typescript/src/NativeRNLlamaCpp.d.ts +222 -0
  983. package/lib/typescript/src/NativeRNLlamaCpp.d.ts.map +1 -0
  984. package/lib/typescript/src/index.d.ts +5 -0
  985. package/lib/typescript/src/index.d.ts.map +1 -0
  986. package/package.json +161 -0
  987. package/react-native.config.js +15 -0
  988. package/src/NativeRNLlamaCpp.ts +282 -0
  989. package/src/index.tsx +54 -0
@@ -0,0 +1,1567 @@
1
+ #if defined(_MSC_VER)
2
+ #define _SILENCE_CXX17_CODECVT_HEADER_DEPRECATION_WARNING
3
+ #endif
4
+
5
+ #include "ggml.h"
6
+ #include "gguf.h"
7
+
8
+ #include "common.h"
9
+ #include "log.h"
10
+ #include "llama.h"
11
+
12
+ #include <algorithm>
13
+ #include <cinttypes>
14
+ #include <climits>
15
+ #include <cmath>
16
+ #include <codecvt>
17
+ #include <cstdarg>
18
+ #include <cstring>
19
+ #include <ctime>
20
+ #include <filesystem>
21
+ #include <fstream>
22
+ #include <iostream>
23
+ #include <iterator>
24
+ #include <regex>
25
+ #include <sstream>
26
+ #include <string>
27
+ #include <thread>
28
+ #include <unordered_map>
29
+ #include <unordered_set>
30
+ #include <vector>
31
+
32
+ #if defined(__APPLE__) && defined(__MACH__)
33
+ #include <sys/types.h>
34
+ #include <sys/sysctl.h>
35
+ #endif
36
+
37
+ #if defined(_WIN32)
38
+ #define WIN32_LEAN_AND_MEAN
39
+ #ifndef NOMINMAX
40
+ # define NOMINMAX
41
+ #endif
42
+ #include <locale>
43
+ #include <windows.h>
44
+ #include <fcntl.h>
45
+ #include <io.h>
46
+ #else
47
+ #include <sys/ioctl.h>
48
+ #include <sys/stat.h>
49
+ #include <unistd.h>
50
+ #endif
51
+
52
+ #if defined(_MSC_VER)
53
+ #pragma warning(disable: 4244 4267) // possible loss of data
54
+ #endif
55
+
56
+ //
57
+ // CPU utils
58
+ //
59
+
60
+ int32_t cpu_get_num_physical_cores() {
61
+ #ifdef __linux__
62
+ // enumerate the set of thread siblings, num entries is num cores
63
+ std::unordered_set<std::string> siblings;
64
+ for (uint32_t cpu=0; cpu < UINT32_MAX; ++cpu) {
65
+ std::ifstream thread_siblings("/sys/devices/system/cpu/cpu"
66
+ + std::to_string(cpu) + "/topology/thread_siblings");
67
+ if (!thread_siblings.is_open()) {
68
+ break; // no more cpus
69
+ }
70
+ std::string line;
71
+ if (std::getline(thread_siblings, line)) {
72
+ siblings.insert(line);
73
+ }
74
+ }
75
+ if (!siblings.empty()) {
76
+ return static_cast<int32_t>(siblings.size());
77
+ }
78
+ #elif defined(__APPLE__) && defined(__MACH__)
79
+ int32_t num_physical_cores;
80
+ size_t len = sizeof(num_physical_cores);
81
+ int result = sysctlbyname("hw.perflevel0.physicalcpu", &num_physical_cores, &len, NULL, 0);
82
+ if (result == 0) {
83
+ return num_physical_cores;
84
+ }
85
+ result = sysctlbyname("hw.physicalcpu", &num_physical_cores, &len, NULL, 0);
86
+ if (result == 0) {
87
+ return num_physical_cores;
88
+ }
89
+ #elif defined(_WIN32) && (_WIN32_WINNT >= 0x0601) && !defined(__MINGW64__) // windows 7 and later
90
+ // TODO: windows + arm64 + mingw64
91
+ unsigned int n_threads_win = std::thread::hardware_concurrency();
92
+ unsigned int default_threads = n_threads_win > 0 ? (n_threads_win <= 4 ? n_threads_win : n_threads_win / 2) : 4;
93
+
94
+ DWORD buffer_size = 0;
95
+ if (!GetLogicalProcessorInformationEx(RelationProcessorCore, nullptr, &buffer_size)) {
96
+ if (GetLastError() != ERROR_INSUFFICIENT_BUFFER) {
97
+ return default_threads;
98
+ }
99
+ }
100
+
101
+ std::vector<char> buffer(buffer_size);
102
+ if (!GetLogicalProcessorInformationEx(RelationProcessorCore, reinterpret_cast<PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX>(buffer.data()), &buffer_size)) {
103
+ return default_threads;
104
+ }
105
+
106
+ int32_t num_physical_cores = 0;
107
+ PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX info = reinterpret_cast<PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX>(buffer.data());
108
+ while (buffer_size > 0) {
109
+ if (info->Relationship == RelationProcessorCore) {
110
+ num_physical_cores += info->Processor.GroupCount;
111
+ }
112
+ buffer_size -= info->Size;
113
+ info = reinterpret_cast<PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX>(reinterpret_cast<char*>(info) + info->Size);
114
+ }
115
+
116
+ return num_physical_cores > 0 ? num_physical_cores : default_threads;
117
+ #endif
118
+ unsigned int n_threads = std::thread::hardware_concurrency();
119
+ return n_threads > 0 ? (n_threads <= 4 ? n_threads : n_threads / 2) : 4;
120
+ }
121
+
122
+ #if defined(__x86_64__) && defined(__linux__) && !defined(__ANDROID__)
123
+ #include <pthread.h>
124
+
125
+ static void cpuid(unsigned leaf, unsigned subleaf,
126
+ unsigned *eax, unsigned *ebx, unsigned *ecx, unsigned *edx) {
127
+ __asm__("movq\t%%rbx,%%rsi\n\t"
128
+ "cpuid\n\t"
129
+ "xchgq\t%%rbx,%%rsi"
130
+ : "=a"(*eax), "=S"(*ebx), "=c"(*ecx), "=d"(*edx)
131
+ : "0"(leaf), "2"(subleaf));
132
+ }
133
+
134
+ static int pin_cpu(int cpu) {
135
+ cpu_set_t mask;
136
+ CPU_ZERO(&mask);
137
+ CPU_SET(cpu, &mask);
138
+ return pthread_setaffinity_np(pthread_self(), sizeof(mask), &mask);
139
+ }
140
+
141
+ static bool is_hybrid_cpu(void) {
142
+ unsigned eax, ebx, ecx, edx;
143
+ cpuid(7, 0, &eax, &ebx, &ecx, &edx);
144
+ return !!(edx & (1u << 15));
145
+ }
146
+
147
+ static bool is_running_on_efficiency_core(void) {
148
+ unsigned eax, ebx, ecx, edx;
149
+ cpuid(0x1a, 0, &eax, &ebx, &ecx, &edx);
150
+ int intel_atom = 0x20;
151
+ int core_type = (eax & 0xff000000u) >> 24;
152
+ return core_type == intel_atom;
153
+ }
154
+
155
+ static int cpu_count_math_cpus(int n_cpu) {
156
+ int result = 0;
157
+ for (int cpu = 0; cpu < n_cpu; ++cpu) {
158
+ if (pin_cpu(cpu)) {
159
+ return -1;
160
+ }
161
+ if (is_running_on_efficiency_core()) {
162
+ continue; // efficiency cores harm lockstep threading
163
+ }
164
+ ++cpu; // hyperthreading isn't useful for linear algebra
165
+ ++result;
166
+ }
167
+ return result;
168
+ }
169
+
170
+ #endif // __x86_64__ && __linux__
171
+
172
+ /**
173
+ * Returns number of CPUs on system that are useful for math.
174
+ */
175
+ int32_t cpu_get_num_math() {
176
+ #if defined(__x86_64__) && defined(__linux__) && !defined(__ANDROID__)
177
+ int n_cpu = sysconf(_SC_NPROCESSORS_ONLN);
178
+ if (n_cpu < 1) {
179
+ return cpu_get_num_physical_cores();
180
+ }
181
+ if (is_hybrid_cpu()) {
182
+ cpu_set_t affinity;
183
+ if (!pthread_getaffinity_np(pthread_self(), sizeof(affinity), &affinity)) {
184
+ int result = cpu_count_math_cpus(n_cpu);
185
+ pthread_setaffinity_np(pthread_self(), sizeof(affinity), &affinity);
186
+ if (result > 0) {
187
+ return result;
188
+ }
189
+ }
190
+ }
191
+ #endif
192
+ return cpu_get_num_physical_cores();
193
+ }
194
+
195
+ // Helper for setting process priority
196
+
197
+ #if defined(_WIN32)
198
+
199
+ bool set_process_priority(enum ggml_sched_priority prio) {
200
+ if (prio == GGML_SCHED_PRIO_NORMAL) {
201
+ return true;
202
+ }
203
+
204
+ DWORD p = NORMAL_PRIORITY_CLASS;
205
+ switch (prio) {
206
+ case GGML_SCHED_PRIO_NORMAL: p = NORMAL_PRIORITY_CLASS; break;
207
+ case GGML_SCHED_PRIO_MEDIUM: p = ABOVE_NORMAL_PRIORITY_CLASS; break;
208
+ case GGML_SCHED_PRIO_HIGH: p = HIGH_PRIORITY_CLASS; break;
209
+ case GGML_SCHED_PRIO_REALTIME: p = REALTIME_PRIORITY_CLASS; break;
210
+ }
211
+
212
+ if (!SetPriorityClass(GetCurrentProcess(), p)) {
213
+ LOG_WRN("failed to set process priority class %d : (%d)\n", prio, (int) GetLastError());
214
+ return false;
215
+ }
216
+
217
+ return true;
218
+ }
219
+
220
+ #else // MacOS and POSIX
221
+ #include <sys/types.h>
222
+ #include <sys/resource.h>
223
+
224
+ bool set_process_priority(enum ggml_sched_priority prio) {
225
+ if (prio == GGML_SCHED_PRIO_NORMAL) {
226
+ return true;
227
+ }
228
+
229
+ int p = 0;
230
+ switch (prio) {
231
+ case GGML_SCHED_PRIO_NORMAL: p = 0; break;
232
+ case GGML_SCHED_PRIO_MEDIUM: p = -5; break;
233
+ case GGML_SCHED_PRIO_HIGH: p = -10; break;
234
+ case GGML_SCHED_PRIO_REALTIME: p = -20; break;
235
+ }
236
+
237
+ if (!setpriority(PRIO_PROCESS, 0, p)) {
238
+ LOG_WRN("failed to set process priority %d : %s (%d)\n", prio, strerror(errno), errno);
239
+ return false;
240
+ }
241
+ return true;
242
+ }
243
+
244
+ #endif
245
+
246
+ //
247
+ // CLI argument parsing
248
+ //
249
+
250
+
251
+ void postprocess_cpu_params(cpu_params& cpuparams, const cpu_params* role_model) {
252
+ int32_t n_set = 0;
253
+
254
+ if (cpuparams.n_threads < 0) {
255
+ // Assuming everything about cpuparams is invalid
256
+ if (role_model != nullptr) {
257
+ cpuparams = *role_model;
258
+ } else {
259
+ cpuparams.n_threads = cpu_get_num_math();
260
+ }
261
+ }
262
+
263
+ for (int32_t i = 0; i < GGML_MAX_N_THREADS; i++) {
264
+ if (cpuparams.cpumask[i]) {
265
+ n_set++;
266
+ }
267
+ }
268
+
269
+ if (n_set && n_set < cpuparams.n_threads) {
270
+ // Not enough set bits, may experience performance issues.
271
+ LOG_WRN("Not enough set bits in CPU mask (%d) to satisfy requested thread count: %d\n", n_set, cpuparams.n_threads);
272
+ }
273
+ }
274
+
275
+ bool parse_cpu_range(const std::string & range, bool (&boolmask)[GGML_MAX_N_THREADS]) {
276
+ size_t dash_loc = range.find('-');
277
+ if (dash_loc == std::string::npos) {
278
+ LOG_ERR("Format of CPU range is invalid! Expected [<start>]-[<end>].\n");
279
+ return false;
280
+ }
281
+
282
+ size_t start_i;
283
+ size_t end_i;
284
+
285
+ if (dash_loc == 0) {
286
+ start_i = 0;
287
+ } else {
288
+ start_i = std::stoull(range.substr(0, dash_loc));
289
+ if (start_i >= GGML_MAX_N_THREADS) {
290
+ LOG_ERR("Start index out of bounds!\n");
291
+ return false;
292
+ }
293
+ }
294
+
295
+ if (dash_loc == range.length() - 1) {
296
+ end_i = GGML_MAX_N_THREADS - 1;
297
+ } else {
298
+ end_i = std::stoull(range.substr(dash_loc + 1));
299
+ if (end_i >= GGML_MAX_N_THREADS) {
300
+ LOG_ERR("End index out of bounds!\n");
301
+ return false;
302
+ }
303
+ }
304
+
305
+ for (size_t i = start_i; i <= end_i; i++) {
306
+ boolmask[i] = true;
307
+ }
308
+
309
+ return true;
310
+ }
311
+
312
+ bool parse_cpu_mask(const std::string & mask, bool (&boolmask)[GGML_MAX_N_THREADS]) {
313
+ // Discard potential 0x prefix
314
+ size_t start_i = 0;
315
+ if (mask.length() >= 2 && mask.substr(0, 2) == "0x") {
316
+ start_i = 2;
317
+ }
318
+
319
+ size_t num_digits = mask.length() - start_i;
320
+ if (num_digits > 128) num_digits = 128;
321
+
322
+ size_t end_i = num_digits + start_i;
323
+
324
+ for (size_t i = start_i, n = (num_digits*4 - 1); i < end_i; i++, n-=4) {
325
+ char c = mask.at(i);
326
+ int8_t id = c;
327
+
328
+ if ((c >= '0' && c <= '9')) {
329
+ id -= '0';
330
+ } else if (c >= 'a' && c <= 'f') {
331
+ id -= 'a' - 10;
332
+ } else if (c >= 'A' && c <= 'F') {
333
+ id -= 'A' - 10;
334
+ } else {
335
+ LOG_ERR("Invalid hex character '%c' at position %d\n", c, int32_t(i));
336
+ return false;
337
+ }
338
+
339
+ boolmask[ n ] = boolmask[ n ] || ((id & 8) != 0);
340
+ boolmask[n - 1] = boolmask[n - 1] || ((id & 4) != 0);
341
+ boolmask[n - 2] = boolmask[n - 2] || ((id & 2) != 0);
342
+ boolmask[n - 3] = boolmask[n - 3] || ((id & 1) != 0);
343
+ }
344
+
345
+ return true;
346
+ }
347
+
348
+ void common_init() {
349
+ llama_log_set([](ggml_log_level level, const char * text, void * /*user_data*/) {
350
+ if (LOG_DEFAULT_LLAMA <= common_log_verbosity_thold) {
351
+ common_log_add(common_log_main(), level, "%s", text);
352
+ }
353
+ }, NULL);
354
+
355
+ #ifdef NDEBUG
356
+ const char * build_type = "";
357
+ #else
358
+ const char * build_type = " (debug)";
359
+ #endif
360
+
361
+ LOG_INF("build: %d (%s) with %s for %s%s\n", LLAMA_BUILD_NUMBER, LLAMA_COMMIT, LLAMA_COMPILER, LLAMA_BUILD_TARGET, build_type);
362
+ }
363
+
364
+ std::string common_params_get_system_info(const common_params & params) {
365
+ std::ostringstream os;
366
+
367
+ os << "system_info: n_threads = " << params.cpuparams.n_threads;
368
+ if (params.cpuparams_batch.n_threads != -1) {
369
+ os << " (n_threads_batch = " << params.cpuparams_batch.n_threads << ")";
370
+ }
371
+ #if defined(_WIN32) && (_WIN32_WINNT >= 0x0601) && !defined(__MINGW64__) // windows 7 and later
372
+ // TODO: windows + arm64 + mingw64
373
+ DWORD logicalProcessorCount = GetActiveProcessorCount(ALL_PROCESSOR_GROUPS);
374
+ os << " / " << logicalProcessorCount << " | " << llama_print_system_info();
375
+ #else
376
+ os << " / " << std::thread::hardware_concurrency() << " | " << llama_print_system_info();
377
+ #endif
378
+
379
+ return os.str();
380
+ }
381
+
382
+ //
383
+ // String utils
384
+ //
385
+
386
+ std::string string_format(const char * fmt, ...) {
387
+ va_list ap;
388
+ va_list ap2;
389
+ va_start(ap, fmt);
390
+ va_copy(ap2, ap);
391
+ int size = vsnprintf(NULL, 0, fmt, ap);
392
+ GGML_ASSERT(size >= 0 && size < INT_MAX); // NOLINT
393
+ std::vector<char> buf(size + 1);
394
+ int size2 = vsnprintf(buf.data(), size + 1, fmt, ap2);
395
+ GGML_ASSERT(size2 == size);
396
+ va_end(ap2);
397
+ va_end(ap);
398
+ return std::string(buf.data(), size);
399
+ }
400
+
401
+ std::string string_strip(const std::string & str) {
402
+ size_t start = 0;
403
+ size_t end = str.size();
404
+ while (start < end && std::isspace(str[start])) {
405
+ start++;
406
+ }
407
+ while (end > start && std::isspace(str[end - 1])) {
408
+ end--;
409
+ }
410
+ return str.substr(start, end - start);
411
+ }
412
+
413
+ std::string string_get_sortable_timestamp() {
414
+ using clock = std::chrono::system_clock;
415
+
416
+ const clock::time_point current_time = clock::now();
417
+ const time_t as_time_t = clock::to_time_t(current_time);
418
+ char timestamp_no_ns[100];
419
+ std::strftime(timestamp_no_ns, 100, "%Y_%m_%d-%H_%M_%S", std::localtime(&as_time_t));
420
+
421
+ const int64_t ns = std::chrono::duration_cast<std::chrono::nanoseconds>(
422
+ current_time.time_since_epoch() % 1000000000).count();
423
+ char timestamp_ns[11];
424
+ snprintf(timestamp_ns, 11, "%09" PRId64, ns);
425
+
426
+ return std::string(timestamp_no_ns) + "." + std::string(timestamp_ns);
427
+ }
428
+
429
+ void string_replace_all(std::string & s, const std::string & search, const std::string & replace) {
430
+ if (search.empty()) {
431
+ return;
432
+ }
433
+ std::string builder;
434
+ builder.reserve(s.length());
435
+ size_t pos = 0;
436
+ size_t last_pos = 0;
437
+ while ((pos = s.find(search, last_pos)) != std::string::npos) {
438
+ builder.append(s, last_pos, pos - last_pos);
439
+ builder.append(replace);
440
+ last_pos = pos + search.length();
441
+ }
442
+ builder.append(s, last_pos, std::string::npos);
443
+ s = std::move(builder);
444
+ }
445
+
446
+ std::string regex_escape(const std::string & s) {
447
+ static const std::regex special_chars("[.^$|()*+?\\[\\]{}\\\\]");
448
+ return std::regex_replace(s, special_chars, "\\$0");
449
+ }
450
+
451
+ std::string string_join(const std::vector<std::string> & values, const std::string & separator) {
452
+ std::ostringstream result;
453
+ for (size_t i = 0; i < values.size(); ++i) {
454
+ if (i > 0) {
455
+ result << separator;
456
+ }
457
+ result << values[i];
458
+ }
459
+ return result.str();
460
+ }
461
+
462
+ std::vector<std::string> string_split(const std::string & str, const std::string & delimiter) {
463
+ std::vector<std::string> parts;
464
+ size_t start = 0;
465
+ size_t end = str.find(delimiter);
466
+
467
+ while (end != std::string::npos) {
468
+ parts.push_back(str.substr(start, end - start));
469
+ start = end + delimiter.length();
470
+ end = str.find(delimiter, start);
471
+ }
472
+
473
+ parts.push_back(str.substr(start));
474
+
475
+ return parts;
476
+ }
477
+
478
+ std::string string_repeat(const std::string & str, size_t n) {
479
+ if (n == 0) {
480
+ return "";
481
+ }
482
+
483
+ std::string result;
484
+ result.reserve(str.length() * n);
485
+
486
+ for (size_t i = 0; i < n; ++i) {
487
+ result += str;
488
+ }
489
+
490
+ return result;
491
+ }
492
+
493
+ std::string string_from(bool value) {
494
+ return value ? "true" : "false";
495
+ }
496
+
497
+ std::string string_from(const std::vector<int> & values) {
498
+ std::stringstream buf;
499
+
500
+ buf << "[ ";
501
+ bool first = true;
502
+ for (auto e : values) {
503
+ if (first) {
504
+ first = false;
505
+ } else {
506
+ buf << ", ";
507
+ }
508
+ buf << std::to_string(e);
509
+ }
510
+ buf << " ]";
511
+
512
+ return buf.str();
513
+ }
514
+
515
+ std::string string_from(const struct llama_context * ctx, const std::vector<llama_token> & tokens) {
516
+ std::stringstream buf;
517
+
518
+ buf << "[ ";
519
+
520
+ bool first = true;
521
+ for (const auto & token : tokens) {
522
+ if (!first) {
523
+ buf << ", ";
524
+ } else {
525
+ first = false;
526
+ }
527
+
528
+ auto detokenized = common_token_to_piece(ctx, token);
529
+
530
+ detokenized.erase(
531
+ std::remove_if(
532
+ detokenized.begin(),
533
+ detokenized.end(),
534
+ [](const unsigned char c) { return !std::isprint(c); }),
535
+ detokenized.end());
536
+
537
+ buf << "'" << detokenized << "'"
538
+ << ":" << std::to_string(token);
539
+ }
540
+
541
+ buf << " ]";
542
+
543
+ return buf.str();
544
+ }
545
+
546
+ std::string string_from(const struct llama_context * ctx, const struct llama_batch & batch) {
547
+ std::stringstream buf;
548
+
549
+ buf << "[ ";
550
+
551
+ bool first = true;
552
+ for (int i = 0; i < batch.n_tokens; ++i) {
553
+ if (!first) {
554
+ buf << ", ";
555
+ } else {
556
+ first = false;
557
+ }
558
+
559
+ auto detokenized = common_token_to_piece(ctx, batch.token[i]);
560
+
561
+ detokenized.erase(
562
+ std::remove_if(
563
+ detokenized.begin(),
564
+ detokenized.end(),
565
+ [](const unsigned char c) { return !std::isprint(c); }),
566
+ detokenized.end());
567
+
568
+ buf << "\n" << std::to_string(i)
569
+ << ", token '" << detokenized << "'"
570
+ << ", pos " << std::to_string(batch.pos[i])
571
+ << ", n_seq_id " << std::to_string(batch.n_seq_id[i])
572
+ << ", seq_id " << std::to_string(batch.seq_id[i][0])
573
+ << ", logits " << std::to_string(batch.logits[i]);
574
+ }
575
+
576
+ buf << " ]";
577
+
578
+ return buf.str();
579
+ }
580
+
581
+ void string_process_escapes(std::string & input) {
582
+ std::size_t input_len = input.length();
583
+ std::size_t output_idx = 0;
584
+
585
+ for (std::size_t input_idx = 0; input_idx < input_len; ++input_idx) {
586
+ if (input[input_idx] == '\\' && input_idx + 1 < input_len) {
587
+ switch (input[++input_idx]) {
588
+ case 'n': input[output_idx++] = '\n'; break;
589
+ case 'r': input[output_idx++] = '\r'; break;
590
+ case 't': input[output_idx++] = '\t'; break;
591
+ case '\'': input[output_idx++] = '\''; break;
592
+ case '\"': input[output_idx++] = '\"'; break;
593
+ case '\\': input[output_idx++] = '\\'; break;
594
+ case 'x':
595
+ // Handle \x12, etc
596
+ if (input_idx + 2 < input_len) {
597
+ const char x[3] = { input[input_idx + 1], input[input_idx + 2], 0 };
598
+ char *err_p = nullptr;
599
+ const long val = std::strtol(x, &err_p, 16);
600
+ if (err_p == x + 2) {
601
+ input_idx += 2;
602
+ input[output_idx++] = char(val);
603
+ break;
604
+ }
605
+ }
606
+ // fall through
607
+ default: input[output_idx++] = '\\';
608
+ input[output_idx++] = input[input_idx]; break;
609
+ }
610
+ } else {
611
+ input[output_idx++] = input[input_idx];
612
+ }
613
+ }
614
+
615
+ input.resize(output_idx);
616
+ }
617
+
618
+ bool string_parse_kv_override(const char * data, std::vector<llama_model_kv_override> & overrides) {
619
+ const char * sep = strchr(data, '=');
620
+ if (sep == nullptr || sep - data >= 128) {
621
+ LOG_ERR("%s: malformed KV override '%s'\n", __func__, data);
622
+ return false;
623
+ }
624
+ llama_model_kv_override kvo;
625
+ std::strncpy(kvo.key, data, sep - data);
626
+ kvo.key[sep - data] = 0;
627
+ sep++;
628
+ if (strncmp(sep, "int:", 4) == 0) {
629
+ sep += 4;
630
+ kvo.tag = LLAMA_KV_OVERRIDE_TYPE_INT;
631
+ kvo.val_i64 = std::atol(sep);
632
+ } else if (strncmp(sep, "float:", 6) == 0) {
633
+ sep += 6;
634
+ kvo.tag = LLAMA_KV_OVERRIDE_TYPE_FLOAT;
635
+ kvo.val_f64 = std::atof(sep);
636
+ } else if (strncmp(sep, "bool:", 5) == 0) {
637
+ sep += 5;
638
+ kvo.tag = LLAMA_KV_OVERRIDE_TYPE_BOOL;
639
+ if (std::strcmp(sep, "true") == 0) {
640
+ kvo.val_bool = true;
641
+ } else if (std::strcmp(sep, "false") == 0) {
642
+ kvo.val_bool = false;
643
+ } else {
644
+ LOG_ERR("%s: invalid boolean value for KV override '%s'\n", __func__, data);
645
+ return false;
646
+ }
647
+ } else if (strncmp(sep, "str:", 4) == 0) {
648
+ sep += 4;
649
+ kvo.tag = LLAMA_KV_OVERRIDE_TYPE_STR;
650
+ if (strlen(sep) > 127) {
651
+ LOG_ERR("%s: malformed KV override '%s', value cannot exceed 127 chars\n", __func__, data);
652
+ return false;
653
+ }
654
+ strncpy(kvo.val_str, sep, 127);
655
+ kvo.val_str[127] = '\0';
656
+ } else {
657
+ LOG_ERR("%s: invalid type for KV override '%s'\n", __func__, data);
658
+ return false;
659
+ }
660
+ overrides.emplace_back(std::move(kvo));
661
+ return true;
662
+ }
663
+
664
+ //
665
+ // Filesystem utils
666
+ //
667
+
668
+ // Validate if a filename is safe to use
669
+ // To validate a full path, split the path by the OS-specific path separator, and validate each part with this function
670
+ bool fs_validate_filename(const std::string & filename) {
671
+ if (!filename.length()) {
672
+ // Empty filename invalid
673
+ return false;
674
+ }
675
+ if (filename.length() > 255) {
676
+ // Limit at common largest possible filename on Linux filesystems
677
+ // to avoid unnecessary further validation
678
+ // (On systems with smaller limits it will be caught by the OS)
679
+ return false;
680
+ }
681
+
682
+ std::u32string filename_utf32;
683
+ try {
684
+ #if defined(__clang__)
685
+ // disable C++17 deprecation warning for std::codecvt_utf8
686
+ # pragma clang diagnostic push
687
+ # pragma clang diagnostic ignored "-Wdeprecated-declarations"
688
+ #endif
689
+ std::wstring_convert<std::codecvt_utf8<char32_t>, char32_t> converter;
690
+
691
+ #if defined(__clang__)
692
+ # pragma clang diagnostic pop
693
+ #endif
694
+
695
+ filename_utf32 = converter.from_bytes(filename);
696
+
697
+ // If the reverse conversion mismatches, it means overlong UTF-8 sequences were used,
698
+ // or invalid encodings were encountered. Reject such attempts
699
+ std::string filename_reencoded = converter.to_bytes(filename_utf32);
700
+ if (filename_reencoded != filename) {
701
+ return false;
702
+ }
703
+ } catch (const std::exception &) {
704
+ return false;
705
+ }
706
+
707
+ // Check for forbidden codepoints:
708
+ // - Control characters
709
+ // - Unicode equivalents of illegal characters
710
+ // - UTF-16 surrogate pairs
711
+ // - UTF-8 replacement character
712
+ // - Byte order mark (BOM)
713
+ // - Illegal characters: / \ : * ? " < > |
714
+ for (char32_t c : filename_utf32) {
715
+ if (c <= 0x1F // Control characters (C0)
716
+ || c == 0x7F // Control characters (DEL)
717
+ || (c >= 0x80 && c <= 0x9F) // Control characters (C1)
718
+ || c == 0xFF0E // Fullwidth Full Stop (period equivalent)
719
+ || c == 0x2215 // Division Slash (forward slash equivalent)
720
+ || c == 0x2216 // Set Minus (backslash equivalent)
721
+ || (c >= 0xD800 && c <= 0xDFFF) // UTF-16 surrogate pairs
722
+ || c == 0xFFFD // Replacement Character (UTF-8)
723
+ || c == 0xFEFF // Byte Order Mark (BOM)
724
+ || c == '/' || c == '\\' || c == ':' || c == '*' // Illegal characters
725
+ || c == '?' || c == '"' || c == '<' || c == '>' || c == '|') {
726
+ return false;
727
+ }
728
+ }
729
+
730
+ // Reject any leading or trailing ' ', or any trailing '.', these are stripped on Windows and will cause a different filename
731
+ // Unicode and other whitespace is not affected, only 0x20 space
732
+ if (filename.front() == ' ' || filename.back() == ' ' || filename.back() == '.') {
733
+ return false;
734
+ }
735
+
736
+ // Reject any ".." (currently stricter than necessary, it should be fine to just check for == ".." instead)
737
+ if (filename.find("..") != std::string::npos) {
738
+ return false;
739
+ }
740
+
741
+ // Reject "."
742
+ if (filename == ".") {
743
+ return false;
744
+ }
745
+
746
+ return true;
747
+ }
748
+
749
+ // returns true if successful, false otherwise
750
+ bool fs_create_directory_with_parents(const std::string & path) {
751
+ #ifdef _WIN32
752
+ std::wstring_convert<std::codecvt_utf8<wchar_t>> converter;
753
+ std::wstring wpath = converter.from_bytes(path);
754
+
755
+ // if the path already exists, check whether it's a directory
756
+ const DWORD attributes = GetFileAttributesW(wpath.c_str());
757
+ if ((attributes != INVALID_FILE_ATTRIBUTES) && (attributes & FILE_ATTRIBUTE_DIRECTORY)) {
758
+ return true;
759
+ }
760
+
761
+ size_t pos_slash = 0;
762
+
763
+ // process path from front to back, procedurally creating directories
764
+ while ((pos_slash = path.find('\\', pos_slash)) != std::string::npos) {
765
+ const std::wstring subpath = wpath.substr(0, pos_slash);
766
+ const wchar_t * test = subpath.c_str();
767
+
768
+ const bool success = CreateDirectoryW(test, NULL);
769
+ if (!success) {
770
+ const DWORD error = GetLastError();
771
+
772
+ // if the path already exists, ensure that it's a directory
773
+ if (error == ERROR_ALREADY_EXISTS) {
774
+ const DWORD attributes = GetFileAttributesW(subpath.c_str());
775
+ if (attributes == INVALID_FILE_ATTRIBUTES || !(attributes & FILE_ATTRIBUTE_DIRECTORY)) {
776
+ return false;
777
+ }
778
+ } else {
779
+ return false;
780
+ }
781
+ }
782
+
783
+ pos_slash += 1;
784
+ }
785
+
786
+ return true;
787
+ #else
788
+ // if the path already exists, check whether it's a directory
789
+ struct stat info;
790
+ if (stat(path.c_str(), &info) == 0) {
791
+ return S_ISDIR(info.st_mode);
792
+ }
793
+
794
+ size_t pos_slash = 1; // skip leading slashes for directory creation
795
+
796
+ // process path from front to back, procedurally creating directories
797
+ while ((pos_slash = path.find('/', pos_slash)) != std::string::npos) {
798
+ const std::string subpath = path.substr(0, pos_slash);
799
+ struct stat info;
800
+
801
+ // if the path already exists, ensure that it's a directory
802
+ if (stat(subpath.c_str(), &info) == 0) {
803
+ if (!S_ISDIR(info.st_mode)) {
804
+ return false;
805
+ }
806
+ } else {
807
+ // create parent directories
808
+ const int ret = mkdir(subpath.c_str(), 0755);
809
+ if (ret != 0) {
810
+ return false;
811
+ }
812
+ }
813
+
814
+ pos_slash += 1;
815
+ }
816
+
817
+ return true;
818
+ #endif // _WIN32
819
+ }
820
+
821
+ std::string fs_get_cache_directory() {
822
+ std::string cache_directory = "";
823
+ auto ensure_trailing_slash = [](std::string p) {
824
+ // Make sure to add trailing slash
825
+ if (p.back() != DIRECTORY_SEPARATOR) {
826
+ p += DIRECTORY_SEPARATOR;
827
+ }
828
+ return p;
829
+ };
830
+ if (getenv("LLAMA_CACHE")) {
831
+ cache_directory = std::getenv("LLAMA_CACHE");
832
+ } else {
833
+ #if defined(__linux__) || defined(__FreeBSD__) || defined(_AIX)
834
+ if (std::getenv("XDG_CACHE_HOME")) {
835
+ cache_directory = std::getenv("XDG_CACHE_HOME");
836
+ } else {
837
+ cache_directory = std::getenv("HOME") + std::string("/.cache/");
838
+ }
839
+ #elif defined(__APPLE__)
840
+ cache_directory = std::getenv("HOME") + std::string("/Library/Caches/");
841
+ #elif defined(_WIN32)
842
+ cache_directory = std::getenv("LOCALAPPDATA");
843
+ #else
844
+ # error Unknown architecture
845
+ #endif
846
+ cache_directory = ensure_trailing_slash(cache_directory);
847
+ cache_directory += "llama.cpp";
848
+ }
849
+ return ensure_trailing_slash(cache_directory);
850
+ }
851
+
852
+ std::string fs_get_cache_file(const std::string & filename) {
853
+ GGML_ASSERT(filename.find(DIRECTORY_SEPARATOR) == std::string::npos);
854
+ std::string cache_directory = fs_get_cache_directory();
855
+ const bool success = fs_create_directory_with_parents(cache_directory);
856
+ if (!success) {
857
+ throw std::runtime_error("failed to create cache directory: " + cache_directory);
858
+ }
859
+ return cache_directory + filename;
860
+ }
861
+
862
+
863
+ //
864
+ // Model utils
865
+ //
866
+
867
+ struct common_init_result common_init_from_params(common_params & params) {
868
+ common_init_result iparams;
869
+ auto mparams = common_model_params_to_llama(params);
870
+
871
+ llama_model * model = llama_model_load_from_file(params.model.path.c_str(), mparams);
872
+ if (model == NULL) {
873
+ LOG_ERR("%s: failed to load model '%s'\n", __func__, params.model.path.c_str());
874
+ return iparams;
875
+ }
876
+
877
+ const llama_vocab * vocab = llama_model_get_vocab(model);
878
+
879
+ if (params.reranking) {
880
+ bool ok = true;
881
+
882
+ if (llama_vocab_bos(vocab) == LLAMA_TOKEN_NULL) {
883
+ LOG_WRN("%s: warning: vocab does not have a BOS token, reranking will not work\n", __func__);
884
+ ok = false;
885
+ }
886
+
887
+ if (llama_vocab_eos(vocab) == LLAMA_TOKEN_NULL) {
888
+ LOG_WRN("%s: warning: vocab does not have an EOS token, reranking will not work\n", __func__);
889
+ ok = false;
890
+ }
891
+
892
+ if (llama_vocab_sep(vocab) == LLAMA_TOKEN_NULL) {
893
+ LOG_WRN("%s: warning: vocab does not have a SEP token, reranking will not work\n", __func__);
894
+ ok = false;
895
+ }
896
+
897
+ if (!ok) {
898
+ llama_model_free(model);
899
+
900
+ return iparams;
901
+ }
902
+ }
903
+
904
+ auto cparams = common_context_params_to_llama(params);
905
+
906
+ llama_context * lctx = llama_init_from_model(model, cparams);
907
+ if (lctx == NULL) {
908
+ LOG_ERR("%s: failed to create context with model '%s'\n", __func__, params.model.path.c_str());
909
+ llama_model_free(model);
910
+ return iparams;
911
+ }
912
+
913
+ if (params.ctx_shift && !llama_kv_self_can_shift(lctx)) {
914
+ LOG_WRN("%s: KV cache shifting is not supported for this context, disabling KV cache shifting\n", __func__);
915
+ params.ctx_shift = false;
916
+ }
917
+
918
+ if (!params.control_vectors.empty()) {
919
+ if (params.control_vector_layer_start <= 0) params.control_vector_layer_start = 1;
920
+ if (params.control_vector_layer_end <= 0) params.control_vector_layer_end = llama_model_n_layer(model);
921
+
922
+ const auto cvec = common_control_vector_load(params.control_vectors);
923
+ if (cvec.n_embd == -1) {
924
+ llama_free(lctx);
925
+ llama_model_free(model);
926
+
927
+ return iparams;
928
+ }
929
+
930
+ int err = llama_apply_adapter_cvec(
931
+ lctx,
932
+ cvec.data.data(),
933
+ cvec.data.size(),
934
+ cvec.n_embd,
935
+ params.control_vector_layer_start,
936
+ params.control_vector_layer_end);
937
+ if (err) {
938
+ llama_free(lctx);
939
+ llama_model_free(model);
940
+
941
+ return iparams;
942
+ }
943
+ }
944
+
945
+ // load and optionally apply lora adapters
946
+ for (auto & la : params.lora_adapters) {
947
+ llama_adapter_lora_ptr lora;
948
+ lora.reset(llama_adapter_lora_init(model, la.path.c_str()));
949
+ if (lora == nullptr) {
950
+ LOG_ERR("%s: failed to apply lora adapter '%s'\n", __func__, la.path.c_str());
951
+ llama_free(lctx);
952
+ llama_model_free(model);
953
+ return iparams;
954
+ }
955
+
956
+ la.ptr = lora.get();
957
+ iparams.lora.emplace_back(std::move(lora)); // copy to list of loaded adapters
958
+ }
959
+
960
+ if (!params.lora_init_without_apply) {
961
+ common_set_adapter_lora(lctx, params.lora_adapters);
962
+ }
963
+
964
+ if (params.sampling.ignore_eos && llama_vocab_eos(vocab) == LLAMA_TOKEN_NULL) {
965
+ LOG_WRN("%s: warning: vocab does not have an EOS token, ignoring --ignore-eos\n", __func__);
966
+ params.sampling.ignore_eos = false;
967
+ }
968
+
969
+ if (params.sampling.ignore_eos) {
970
+ for (llama_token i = 0; i < llama_vocab_n_tokens(vocab); i++) {
971
+ if (llama_vocab_is_eog(vocab, i)) {
972
+ LOG_INF("%s: added %s logit bias = %f\n", __func__, common_token_to_piece(lctx, i).c_str(), -INFINITY);
973
+ params.sampling.logit_bias.push_back({i, -INFINITY});
974
+ }
975
+ }
976
+ }
977
+
978
+ if (params.sampling.penalty_last_n == -1) {
979
+ LOG_INF("%s: setting penalty_last_n to ctx_size = %d\n", __func__, llama_n_ctx(lctx));
980
+ params.sampling.penalty_last_n = llama_n_ctx(lctx);
981
+ }
982
+
983
+ if (params.sampling.dry_penalty_last_n == -1) {
984
+ LOG_INF("%s: setting dry_penalty_last_n to ctx_size = %d\n", __func__, llama_n_ctx(lctx));
985
+ params.sampling.dry_penalty_last_n = llama_n_ctx(lctx);
986
+ }
987
+
988
+ if (params.warmup) {
989
+ LOG_WRN("%s: warming up the model with an empty run - please wait ... (--no-warmup to disable)\n", __func__);
990
+
991
+ llama_set_warmup(lctx, true);
992
+
993
+ std::vector<llama_token> tmp;
994
+ llama_token bos = llama_vocab_bos(vocab);
995
+ llama_token eos = llama_vocab_eos(vocab);
996
+
997
+ // some models (e.g. T5) don't have a BOS token
998
+ if (bos != LLAMA_TOKEN_NULL) {
999
+ tmp.push_back(bos);
1000
+ }
1001
+ if (eos != LLAMA_TOKEN_NULL) {
1002
+ tmp.push_back(eos);
1003
+ }
1004
+ if (tmp.empty()) {
1005
+ tmp.push_back(0);
1006
+ }
1007
+
1008
+ if (llama_model_has_encoder(model)) {
1009
+ llama_encode(lctx, llama_batch_get_one(tmp.data(), tmp.size()));
1010
+ llama_token decoder_start_token_id = llama_model_decoder_start_token(model);
1011
+ if (decoder_start_token_id == LLAMA_TOKEN_NULL) {
1012
+ decoder_start_token_id = bos;
1013
+ }
1014
+ tmp.clear();
1015
+ tmp.push_back(decoder_start_token_id);
1016
+ }
1017
+ if (llama_model_has_decoder(model)) {
1018
+ llama_decode(lctx, llama_batch_get_one(tmp.data(), std::min(tmp.size(), (size_t) params.n_batch)));
1019
+ }
1020
+ llama_kv_self_clear(lctx);
1021
+ llama_synchronize(lctx);
1022
+ llama_perf_context_reset(lctx);
1023
+ llama_set_warmup(lctx, false);
1024
+ }
1025
+
1026
+ iparams.model.reset(model);
1027
+ iparams.context.reset(lctx);
1028
+
1029
+ return iparams;
1030
+ }
1031
+
1032
+ std::string get_model_endpoint() {
1033
+ const char * model_endpoint_env = getenv("MODEL_ENDPOINT");
1034
+ // We still respect the use of environment-variable "HF_ENDPOINT" for backward-compatibility.
1035
+ const char * hf_endpoint_env = getenv("HF_ENDPOINT");
1036
+ const char * endpoint_env = model_endpoint_env ? model_endpoint_env : hf_endpoint_env;
1037
+ std::string model_endpoint = "https://huggingface.co/";
1038
+ if (endpoint_env) {
1039
+ model_endpoint = endpoint_env;
1040
+ if (model_endpoint.back() != '/') model_endpoint += '/';
1041
+ }
1042
+ return model_endpoint;
1043
+ }
1044
+
1045
+ void common_set_adapter_lora(struct llama_context * ctx, std::vector<common_adapter_lora_info> & lora) {
1046
+ llama_clear_adapter_lora(ctx);
1047
+ for (auto & la : lora) {
1048
+ if (la.scale != 0.0f) {
1049
+ llama_set_adapter_lora(ctx, la.ptr, la.scale);
1050
+ }
1051
+ }
1052
+ }
1053
+
1054
+ struct llama_model_params common_model_params_to_llama(common_params & params) {
1055
+ auto mparams = llama_model_default_params();
1056
+
1057
+ if (!params.devices.empty()) {
1058
+ mparams.devices = params.devices.data();
1059
+ }
1060
+
1061
+ if (params.n_gpu_layers != -1) {
1062
+ mparams.n_gpu_layers = params.n_gpu_layers;
1063
+ }
1064
+
1065
+ mparams.main_gpu = params.main_gpu;
1066
+ mparams.split_mode = params.split_mode;
1067
+ mparams.tensor_split = params.tensor_split;
1068
+ mparams.use_mmap = params.use_mmap;
1069
+ mparams.use_mlock = params.use_mlock;
1070
+ mparams.check_tensors = params.check_tensors;
1071
+
1072
+ if (params.kv_overrides.empty()) {
1073
+ mparams.kv_overrides = NULL;
1074
+ } else {
1075
+ GGML_ASSERT(params.kv_overrides.back().key[0] == 0 && "KV overrides not terminated with empty key");
1076
+ mparams.kv_overrides = params.kv_overrides.data();
1077
+ }
1078
+
1079
+ if (params.tensor_buft_overrides.empty()) {
1080
+ mparams.tensor_buft_overrides = NULL;
1081
+ } else {
1082
+ GGML_ASSERT(params.tensor_buft_overrides.back().pattern == nullptr && "Tensor buffer overrides not terminated with empty pattern");
1083
+ mparams.tensor_buft_overrides = params.tensor_buft_overrides.data();
1084
+ }
1085
+
1086
+ return mparams;
1087
+ }
1088
+
1089
+ struct llama_context_params common_context_params_to_llama(const common_params & params) {
1090
+ auto cparams = llama_context_default_params();
1091
+
1092
+ cparams.n_ctx = params.n_ctx;
1093
+ cparams.n_seq_max = params.n_parallel;
1094
+ cparams.n_batch = params.n_batch;
1095
+ cparams.n_ubatch = params.n_ubatch;
1096
+ cparams.n_threads = params.cpuparams.n_threads;
1097
+ cparams.n_threads_batch = params.cpuparams_batch.n_threads == -1 ?
1098
+ params.cpuparams.n_threads : params.cpuparams_batch.n_threads;
1099
+ cparams.embeddings = params.embedding;
1100
+ cparams.rope_scaling_type = params.rope_scaling_type;
1101
+ cparams.rope_freq_base = params.rope_freq_base;
1102
+ cparams.rope_freq_scale = params.rope_freq_scale;
1103
+ cparams.yarn_ext_factor = params.yarn_ext_factor;
1104
+ cparams.yarn_attn_factor = params.yarn_attn_factor;
1105
+ cparams.yarn_beta_fast = params.yarn_beta_fast;
1106
+ cparams.yarn_beta_slow = params.yarn_beta_slow;
1107
+ cparams.yarn_orig_ctx = params.yarn_orig_ctx;
1108
+ cparams.pooling_type = params.pooling_type;
1109
+ cparams.attention_type = params.attention_type;
1110
+ cparams.defrag_thold = params.defrag_thold;
1111
+ cparams.cb_eval = params.cb_eval;
1112
+ cparams.cb_eval_user_data = params.cb_eval_user_data;
1113
+ cparams.offload_kqv = !params.no_kv_offload;
1114
+ cparams.flash_attn = params.flash_attn;
1115
+ cparams.no_perf = params.no_perf;
1116
+ cparams.op_offload = !params.no_op_offload;
1117
+
1118
+ if (params.reranking) {
1119
+ cparams.embeddings = true;
1120
+ cparams.pooling_type = LLAMA_POOLING_TYPE_RANK;
1121
+ }
1122
+
1123
+ cparams.type_k = params.cache_type_k;
1124
+ cparams.type_v = params.cache_type_v;
1125
+
1126
+ return cparams;
1127
+ }
1128
+
1129
+ struct ggml_threadpool_params ggml_threadpool_params_from_cpu_params(const cpu_params & params) {
1130
+ struct ggml_threadpool_params tpp;
1131
+
1132
+ ggml_threadpool_params_init(&tpp, params.n_threads); // setup the defaults
1133
+
1134
+ if (params.mask_valid) {
1135
+ std::memcpy(&tpp.cpumask, &params.cpumask, GGML_MAX_N_THREADS);
1136
+ }
1137
+
1138
+ tpp.prio = params.priority;
1139
+ tpp.poll = params.poll;
1140
+ tpp.strict_cpu = params.strict_cpu;
1141
+
1142
+ return tpp;
1143
+ }
1144
+
1145
+ //
1146
+ // Batch utils
1147
+ //
1148
+
1149
+ void common_batch_clear(struct llama_batch & batch) {
1150
+ batch.n_tokens = 0;
1151
+ }
1152
+
1153
+ void common_batch_add(
1154
+ struct llama_batch & batch,
1155
+ llama_token id,
1156
+ llama_pos pos,
1157
+ const std::vector<llama_seq_id> & seq_ids,
1158
+ bool logits) {
1159
+ GGML_ASSERT(batch.seq_id[batch.n_tokens] && "llama_batch size exceeded");
1160
+
1161
+ batch.token [batch.n_tokens] = id;
1162
+ batch.pos [batch.n_tokens] = pos;
1163
+ batch.n_seq_id[batch.n_tokens] = seq_ids.size();
1164
+ for (size_t i = 0; i < seq_ids.size(); ++i) {
1165
+ batch.seq_id[batch.n_tokens][i] = seq_ids[i];
1166
+ }
1167
+ batch.logits [batch.n_tokens] = logits;
1168
+
1169
+ batch.n_tokens++;
1170
+ }
1171
+
1172
+ //
1173
+ // Token utils
1174
+ //
1175
+
1176
+ size_t common_lcp(const llama_tokens & a, const llama_tokens & b) {
1177
+ size_t i;
1178
+ for (i = 0; i < a.size() && i < b.size() && a[i] == b[i]; i++) {}
1179
+
1180
+ return i;
1181
+ }
1182
+
1183
+ size_t common_lcs(const llama_tokens & a, const llama_tokens & b) {
1184
+ // check for empty sequences
1185
+ if (a.empty() || b.empty()) {
1186
+ return 0;
1187
+ }
1188
+
1189
+ // get the lengths of the input sequences
1190
+ size_t a_len = a.size();
1191
+ size_t b_len = b.size();
1192
+
1193
+ // initialize the maximum length of the longest common subsequence (LCS)
1194
+ size_t max_length = 0;
1195
+
1196
+ // use two rows instead of a 2D matrix to optimize space
1197
+ std::vector<size_t> prev_row(b_len + 1, 0);
1198
+ std::vector<size_t> curr_row(b_len + 1, 0);
1199
+
1200
+ // iterate through the elements of a
1201
+ for (size_t i = 1; i <= a_len; i++) {
1202
+ // iterate through the elements of b
1203
+ for (size_t j = 1; j <= b_len; j++) {
1204
+ // if elements at the current positions match
1205
+ if (a[i - 1] == b[j - 1]) {
1206
+ // if it's the first element of either sequences, set LCS length to 1
1207
+ if (i == 1 || j == 1) {
1208
+ curr_row[j] = 1;
1209
+ } else {
1210
+ // increment LCS length by 1 compared to the previous element
1211
+ curr_row[j] = prev_row[j - 1] + 1;
1212
+ }
1213
+
1214
+ // update max_length if necessary
1215
+ if (curr_row[j] > max_length) {
1216
+ max_length = curr_row[j];
1217
+ }
1218
+ } else {
1219
+ // reset LCS length if elements don't match
1220
+ curr_row[j] = 0;
1221
+ }
1222
+ }
1223
+
1224
+ // update the previous row for the next iteration
1225
+ prev_row = curr_row;
1226
+ }
1227
+
1228
+ // return the maximum length of the LCS
1229
+ return max_length;
1230
+ }
1231
+
1232
+ //
1233
+ // Vocab utils
1234
+ //
1235
+
1236
+ std::vector<llama_token> common_tokenize(
1237
+ const struct llama_context * ctx,
1238
+ const std::string & text,
1239
+ bool add_special,
1240
+ bool parse_special) {
1241
+ const llama_model * model = llama_get_model(ctx);
1242
+ const llama_vocab * vocab = llama_model_get_vocab(model);
1243
+ return common_tokenize(vocab, text, add_special, parse_special);
1244
+ }
1245
+
1246
+ std::vector<llama_token> common_tokenize(
1247
+ const struct llama_vocab * vocab,
1248
+ const std::string & text,
1249
+ bool add_special,
1250
+ bool parse_special) {
1251
+ // upper limit for the number of tokens
1252
+ int n_tokens = text.length() + 2 * add_special;
1253
+ std::vector<llama_token> result(n_tokens);
1254
+ n_tokens = llama_tokenize(vocab, text.data(), text.length(), result.data(), result.size(), add_special, parse_special);
1255
+ if (n_tokens < 0) {
1256
+ result.resize(-n_tokens);
1257
+ int check = llama_tokenize(vocab, text.data(), text.length(), result.data(), result.size(), add_special, parse_special);
1258
+ GGML_ASSERT(check == -n_tokens);
1259
+ } else {
1260
+ result.resize(n_tokens);
1261
+ }
1262
+ return result;
1263
+ }
1264
+
1265
+ std::string common_token_to_piece(const struct llama_context * ctx, llama_token token, bool special) {
1266
+ const llama_model * model = llama_get_model(ctx);
1267
+ const llama_vocab * vocab = llama_model_get_vocab(model);
1268
+ return common_token_to_piece(vocab, token, special);
1269
+ }
1270
+
1271
+ std::string common_token_to_piece(const struct llama_vocab * vocab, llama_token token, bool special) {
1272
+ std::string piece;
1273
+ piece.resize(piece.capacity()); // using string internal cache, 15 bytes + '\n'
1274
+ const int n_chars = llama_token_to_piece(vocab, token, &piece[0], piece.size(), 0, special);
1275
+ if (n_chars < 0) {
1276
+ piece.resize(-n_chars);
1277
+ int check = llama_token_to_piece(vocab, token, &piece[0], piece.size(), 0, special);
1278
+ GGML_ASSERT(check == -n_chars);
1279
+ }
1280
+ else {
1281
+ piece.resize(n_chars);
1282
+ }
1283
+
1284
+ return piece;
1285
+ }
1286
+
1287
+ std::string common_detokenize(const struct llama_context * ctx, const std::vector<llama_token> & tokens, bool special) {
1288
+ const llama_model * model = llama_get_model(ctx);
1289
+ const llama_vocab * vocab = llama_model_get_vocab(model);
1290
+ return common_detokenize(vocab, tokens, special);
1291
+ }
1292
+
1293
+ std::string common_detokenize(const struct llama_vocab * vocab, const std::vector<llama_token> & tokens, bool special) {
1294
+ std::string text;
1295
+ text.resize(std::max(text.capacity(), tokens.size()));
1296
+ int32_t n_chars = llama_detokenize(vocab, tokens.data(), (int32_t)tokens.size(), &text[0], (int32_t)text.size(), false, special);
1297
+ if (n_chars < 0) {
1298
+ text.resize(-n_chars);
1299
+ n_chars = llama_detokenize(vocab, tokens.data(), (int32_t)tokens.size(), &text[0], (int32_t)text.size(), false, special);
1300
+ GGML_ASSERT(n_chars <= (int32_t)text.size()); // whitespace trimming is performed after per-token detokenization
1301
+ }
1302
+
1303
+ text.resize(n_chars);
1304
+
1305
+ // NOTE: the original tokenizer decodes bytes after collecting the pieces.
1306
+ return text;
1307
+ }
1308
+
1309
+ //
1310
+ // KV cache utils
1311
+ //
1312
+
1313
+ void common_kv_cache_dump_view(const llama_kv_cache_view & view, int row_size) {
1314
+ static const char slot_chars[] = ".123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz+";
1315
+
1316
+ printf("=== Dumping KV cache. total cells %d, max sequences per cell %d, populated cells %d, total tokens in cache %d, largest empty slot=%d @ %d",
1317
+ view.n_cells, view.n_seq_max, view.used_cells, view.token_count, view.max_contiguous, view.max_contiguous_idx);
1318
+
1319
+ llama_kv_cache_view_cell * c_curr = view.cells;
1320
+ llama_seq_id * cs_curr = view.cells_sequences;
1321
+
1322
+ for (int i = 0; i < view.n_cells; i++, c_curr++, cs_curr += view.n_seq_max) {
1323
+ if (i % row_size == 0) {
1324
+ printf("\n%5d: ", i);
1325
+ }
1326
+ int seq_count = 0;
1327
+ for (int j = 0; j < view.n_seq_max; j++) {
1328
+ if (cs_curr[j] >= 0) { seq_count++; }
1329
+ }
1330
+ putchar(slot_chars[std::min(sizeof(slot_chars) - 2, size_t(seq_count))]);
1331
+ }
1332
+
1333
+ printf("\n=== Done dumping\n");
1334
+ }
1335
+
1336
+ void common_kv_cache_dump_view_seqs(const llama_kv_cache_view & view, int row_size) {
1337
+ static const char slot_chars[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";
1338
+
1339
+ printf("=== Dumping KV cache. total cells %d, max sequences per cell %d, populated cells %d, total tokens in cache %d, largest empty slot=%d @ %d\n",
1340
+ view.n_cells, view.n_seq_max, view.used_cells, view.token_count, view.max_contiguous, view.max_contiguous_idx);
1341
+
1342
+ std::unordered_map<llama_seq_id, size_t> seqs;
1343
+ llama_kv_cache_view_cell * c_curr = view.cells;
1344
+ llama_seq_id * cs_curr = view.cells_sequences;
1345
+
1346
+ for (int i = 0; i < view.n_cells; i++, c_curr++, cs_curr += view.n_seq_max) {
1347
+ for (int j = 0; j < view.n_seq_max; j++) {
1348
+ if (cs_curr[j] < 0) { continue; }
1349
+ if (seqs.find(cs_curr[j]) == seqs.end()) {
1350
+ if (seqs.size() + 1 >= sizeof(slot_chars)) { break; }
1351
+ const size_t sz = seqs.size();
1352
+ seqs[cs_curr[j]] = sz;
1353
+ }
1354
+ }
1355
+ if (seqs.size() + 1 >= sizeof(slot_chars)) { break; }
1356
+ }
1357
+
1358
+ printf("=== Sequence legend: ");
1359
+ for (const auto & it : seqs) {
1360
+ printf("%zu=%d, ", it.second, it.first);
1361
+ }
1362
+ printf("'+'=other sequence ids");
1363
+
1364
+ c_curr = view.cells;
1365
+ cs_curr = view.cells_sequences;
1366
+ for (int i = 0; i < view.n_cells; i++, c_curr++, cs_curr += view.n_seq_max) {
1367
+ if (i % row_size == 0) {
1368
+ printf("\n%5d: ", i);
1369
+ }
1370
+ for (int j = 0; j < view.n_seq_max; j++) {
1371
+ if (cs_curr[j] >= 0) {
1372
+ const auto & it = seqs.find(cs_curr[j]);
1373
+ putchar(it != seqs.end() ? int(slot_chars[it->second]) : '+');
1374
+ } else {
1375
+ putchar('.');
1376
+ }
1377
+ }
1378
+ putchar(' ');
1379
+ }
1380
+
1381
+ printf("\n=== Done dumping\n");
1382
+ }
1383
+
1384
+ //
1385
+ // Embedding utils
1386
+ //
1387
+
1388
+ void common_embd_normalize(const float * inp, float * out, int n, int embd_norm) {
1389
+ double sum = 0.0;
1390
+
1391
+ switch (embd_norm) {
1392
+ case -1: // no normalisation
1393
+ sum = 1.0;
1394
+ break;
1395
+ case 0: // max absolute
1396
+ for (int i = 0; i < n; i++) {
1397
+ if (sum < std::abs(inp[i])) {
1398
+ sum = std::abs(inp[i]);
1399
+ }
1400
+ }
1401
+ sum /= 32760.0; // make an int16 range
1402
+ break;
1403
+ case 2: // euclidean
1404
+ for (int i = 0; i < n; i++) {
1405
+ sum += inp[i] * inp[i];
1406
+ }
1407
+ sum = std::sqrt(sum);
1408
+ break;
1409
+ default: // p-norm (euclidean is p-norm p=2)
1410
+ for (int i = 0; i < n; i++) {
1411
+ sum += std::pow(std::abs(inp[i]), embd_norm);
1412
+ }
1413
+ sum = std::pow(sum, 1.0 / embd_norm);
1414
+ break;
1415
+ }
1416
+
1417
+ const float norm = sum > 0.0 ? 1.0 / sum : 0.0f;
1418
+
1419
+ for (int i = 0; i < n; i++) {
1420
+ out[i] = inp[i] * norm;
1421
+ }
1422
+ }
1423
+
1424
+ float common_embd_similarity_cos(const float * embd1, const float * embd2, int n){
1425
+ double sum = 0.0;
1426
+ double sum1 = 0.0;
1427
+ double sum2 = 0.0;
1428
+
1429
+ for (int i = 0; i < n; i++) {
1430
+ sum += embd1[i] * embd2[i];
1431
+ sum1 += embd1[i] * embd1[i];
1432
+ sum2 += embd2[i] * embd2[i];
1433
+ }
1434
+
1435
+ // Handle the case where one or both vectors are zero vectors
1436
+ if (sum1 == 0.0 || sum2 == 0.0) {
1437
+ if (sum1 == 0.0 && sum2 == 0.0) {
1438
+ return 1.0f; // two zero vectors are similar
1439
+ }
1440
+ return 0.0f;
1441
+ }
1442
+
1443
+ return sum / (sqrt(sum1) * sqrt(sum2));
1444
+ }
1445
+
1446
+ //
1447
+ // Control vector utils
1448
+ //
1449
+
1450
+ static common_control_vector_data common_control_vector_load_one(const common_control_vector_load_info & load_info) {
1451
+ common_control_vector_data result = { -1, {} };
1452
+
1453
+ ggml_context * ctx = nullptr;
1454
+ struct gguf_init_params meta_gguf_params = {
1455
+ /* .no_alloc = */ false,
1456
+ /* .ctx = */ &ctx,
1457
+ };
1458
+ struct gguf_context * ctx_gguf = gguf_init_from_file(load_info.fname.c_str(), meta_gguf_params);
1459
+ if (!ctx_gguf) {
1460
+ LOG_ERR("%s: failed to load control vector file from %s\n", __func__, load_info.fname.c_str());
1461
+ return result;
1462
+ }
1463
+
1464
+ int32_t n_tensors = gguf_get_n_tensors(ctx_gguf);
1465
+ if (n_tensors == 0) {
1466
+ LOG_WRN("%s: no direction tensors found in %s\n", __func__, load_info.fname.c_str());
1467
+ }
1468
+
1469
+ for (int i = 0; i < n_tensors; i++) {
1470
+ std::string name = gguf_get_tensor_name(ctx_gguf, i);
1471
+
1472
+ int layer_idx = -1;
1473
+
1474
+ // split on '.'
1475
+ size_t dotpos = name.find('.');
1476
+ if (dotpos != std::string::npos && name.substr(0, dotpos) == "direction") {
1477
+ try {
1478
+ layer_idx = std::stoi(name.substr(dotpos + 1));
1479
+ } catch (...) {
1480
+ layer_idx = -1;
1481
+ }
1482
+ }
1483
+ if (layer_idx < 0) {
1484
+ LOG_ERR("%s: invalid/unparsable direction tensor layer index in %s\n", __func__, load_info.fname.c_str());
1485
+ result.n_embd = -1;
1486
+ break;
1487
+ } else if (layer_idx == 0) {
1488
+ LOG_ERR("%s: invalid (zero) direction tensor layer index in %s\n", __func__, load_info.fname.c_str());
1489
+ result.n_embd = -1;
1490
+ break;
1491
+ }
1492
+
1493
+ struct ggml_tensor * tensor = ggml_get_tensor(ctx, name.c_str());
1494
+ if (tensor->type != GGML_TYPE_F32) {
1495
+ LOG_ERR("%s: invalid (non-F32) direction tensor type in %s\n", __func__, load_info.fname.c_str());
1496
+ result.n_embd = -1;
1497
+ break;
1498
+ }
1499
+ if (ggml_n_dims(tensor) != 1) {
1500
+ LOG_ERR("%s: invalid (non-1D) direction tensor shape in %s\n", __func__, load_info.fname.c_str());
1501
+ result.n_embd = -1;
1502
+ break;
1503
+ }
1504
+
1505
+ if (result.n_embd == -1) {
1506
+ result.n_embd = ggml_nelements(tensor);
1507
+ } else if (ggml_nelements(tensor) != result.n_embd) {
1508
+ LOG_ERR("%s: direction tensor in %s does not match previous dimensions\n", __func__, load_info.fname.c_str());
1509
+ result.n_embd = -1;
1510
+ break;
1511
+ }
1512
+
1513
+ // extend if necessary - do not store data for layer 0 (it's not used)
1514
+ result.data.resize(std::max(result.data.size(), static_cast<size_t>(result.n_embd * layer_idx)), 0.0f);
1515
+
1516
+ const float * src = (const float *) tensor->data;
1517
+ float * dst = result.data.data() + result.n_embd * (layer_idx - 1); // layer 1 at [0]
1518
+ for (int j = 0; j < result.n_embd; j++) {
1519
+ dst[j] += src[j] * load_info.strength; // allows multiple directions for same layer in same file
1520
+ }
1521
+
1522
+ }
1523
+
1524
+ if (result.n_embd == -1) {
1525
+ LOG_WRN("%s: skipping %s due to invalid direction tensors\n", __func__, load_info.fname.c_str());
1526
+ result.data.clear();
1527
+ }
1528
+
1529
+ gguf_free(ctx_gguf);
1530
+ ggml_free(ctx);
1531
+
1532
+ return result;
1533
+ }
1534
+
1535
+ common_control_vector_data common_control_vector_load(const std::vector<common_control_vector_load_info> & load_infos) {
1536
+ common_control_vector_data result = { -1, {} };
1537
+
1538
+ for (const auto & info : load_infos) {
1539
+ auto cur = common_control_vector_load_one(info);
1540
+
1541
+ if (cur.n_embd == -1) {
1542
+ result.n_embd = -1;
1543
+ break;
1544
+ }
1545
+ if (result.n_embd != -1 && result.n_embd != cur.n_embd) {
1546
+ LOG_ERR("%s: control vectors in %s does not match previous dimensions\n", __func__, info.fname.c_str());
1547
+ result.n_embd = -1;
1548
+ break;
1549
+ }
1550
+
1551
+ if (result.n_embd == -1) {
1552
+ result = std::move(cur);
1553
+ } else {
1554
+ result.data.resize(std::max(result.data.size(), cur.data.size()), 0.0f); // extend if necessary
1555
+ for (size_t i = 0; i < cur.data.size(); i++) {
1556
+ result.data[i] += cur.data[i];
1557
+ }
1558
+ }
1559
+ }
1560
+
1561
+ if (result.n_embd == -1) {
1562
+ LOG_ERR("%s: no valid control vector files passed\n", __func__);
1563
+ result.data.clear();
1564
+ }
1565
+
1566
+ return result;
1567
+ }