@novastera-oss/llamarn 0.0.1-alpha.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (989) hide show
  1. package/INTERFACE.md +389 -0
  2. package/LICENSE +201 -0
  3. package/README.md +235 -0
  4. package/RNLlamaCpp.podspec +69 -0
  5. package/android/CMakeLists.txt +107 -0
  6. package/android/build.gradle +111 -0
  7. package/android/generated/java/com/novastera/llamarn/NativeRNLlamaCppSpec.java +47 -0
  8. package/android/generated/jni/CMakeLists.txt +36 -0
  9. package/android/generated/jni/RNLlamaCppSpec-generated.cpp +44 -0
  10. package/android/generated/jni/RNLlamaCppSpec.h +31 -0
  11. package/android/generated/jni/react/renderer/components/RNLlamaCppSpec/RNLlamaCppSpecJSI-generated.cpp +42 -0
  12. package/android/generated/jni/react/renderer/components/RNLlamaCppSpec/RNLlamaCppSpecJSI.h +336 -0
  13. package/android/gradle.properties +5 -0
  14. package/android/src/main/AndroidManifest.xml +3 -0
  15. package/android/src/main/AndroidManifestNew.xml +2 -0
  16. package/android/src/main/cpp/include/llama-cpp.h +30 -0
  17. package/android/src/main/cpp/include/llama.h +1440 -0
  18. package/android/src/main/java/com/novastera/llamarn/RNLlamaCppPackage.kt +21 -0
  19. package/android/src/main/jniLibs/arm64-v8a/libOpenCL.so +0 -0
  20. package/android/src/main/jniLibs/arm64-v8a/libggml-base.so +0 -0
  21. package/android/src/main/jniLibs/arm64-v8a/libggml-cpu.so +0 -0
  22. package/android/src/main/jniLibs/arm64-v8a/libggml.so +0 -0
  23. package/android/src/main/jniLibs/arm64-v8a/libllama.so +0 -0
  24. package/android/src/main/jniLibs/x86_64/libOpenCL.so +0 -0
  25. package/android/src/main/jniLibs/x86_64/libggml-base.so +0 -0
  26. package/android/src/main/jniLibs/x86_64/libggml-cpu.so +0 -0
  27. package/android/src/main/jniLibs/x86_64/libggml.so +0 -0
  28. package/android/src/main/jniLibs/x86_64/libllama.so +0 -0
  29. package/cpp/LlamaCppModel.cpp +984 -0
  30. package/cpp/LlamaCppModel.h +162 -0
  31. package/cpp/PureCppImpl.cpp +308 -0
  32. package/cpp/PureCppImpl.h +59 -0
  33. package/cpp/SystemUtils.cpp +180 -0
  34. package/cpp/SystemUtils.h +74 -0
  35. package/cpp/build-info.cpp +4 -0
  36. package/cpp/llama.cpp/AUTHORS +1106 -0
  37. package/cpp/llama.cpp/CMakeLists.txt +254 -0
  38. package/cpp/llama.cpp/CMakePresets.json +84 -0
  39. package/cpp/llama.cpp/CODEOWNERS +11 -0
  40. package/cpp/llama.cpp/CONTRIBUTING.md +127 -0
  41. package/cpp/llama.cpp/LICENSE +21 -0
  42. package/cpp/llama.cpp/Makefile +1608 -0
  43. package/cpp/llama.cpp/README.md +575 -0
  44. package/cpp/llama.cpp/SECURITY.md +68 -0
  45. package/cpp/llama.cpp/build-xcframework.sh +540 -0
  46. package/cpp/llama.cpp/cmake/arm64-apple-clang.cmake +16 -0
  47. package/cpp/llama.cpp/cmake/arm64-windows-llvm.cmake +16 -0
  48. package/cpp/llama.cpp/cmake/build-info.cmake +64 -0
  49. package/cpp/llama.cpp/cmake/common.cmake +35 -0
  50. package/cpp/llama.cpp/cmake/git-vars.cmake +22 -0
  51. package/cpp/llama.cpp/cmake/llama-config.cmake.in +30 -0
  52. package/cpp/llama.cpp/cmake/llama.pc.in +10 -0
  53. package/cpp/llama.cpp/cmake/x64-windows-llvm.cmake +5 -0
  54. package/cpp/llama.cpp/common/CMakeLists.txt +170 -0
  55. package/cpp/llama.cpp/common/arg.cpp +3337 -0
  56. package/cpp/llama.cpp/common/arg.h +89 -0
  57. package/cpp/llama.cpp/common/base64.hpp +392 -0
  58. package/cpp/llama.cpp/common/build-info.cpp.in +4 -0
  59. package/cpp/llama.cpp/common/chat.cpp +1781 -0
  60. package/cpp/llama.cpp/common/chat.h +135 -0
  61. package/cpp/llama.cpp/common/cmake/build-info-gen-cpp.cmake +24 -0
  62. package/cpp/llama.cpp/common/common.cpp +1567 -0
  63. package/cpp/llama.cpp/common/common.h +668 -0
  64. package/cpp/llama.cpp/common/console.cpp +504 -0
  65. package/cpp/llama.cpp/common/console.h +19 -0
  66. package/cpp/llama.cpp/common/json-schema-to-grammar.cpp +1027 -0
  67. package/cpp/llama.cpp/common/json-schema-to-grammar.h +21 -0
  68. package/cpp/llama.cpp/common/json.hpp +24766 -0
  69. package/cpp/llama.cpp/common/llguidance.cpp +254 -0
  70. package/cpp/llama.cpp/common/log.cpp +393 -0
  71. package/cpp/llama.cpp/common/log.h +103 -0
  72. package/cpp/llama.cpp/common/minja/chat-template.hpp +537 -0
  73. package/cpp/llama.cpp/common/minja/minja.hpp +2941 -0
  74. package/cpp/llama.cpp/common/ngram-cache.cpp +286 -0
  75. package/cpp/llama.cpp/common/ngram-cache.h +101 -0
  76. package/cpp/llama.cpp/common/sampling.cpp +580 -0
  77. package/cpp/llama.cpp/common/sampling.h +107 -0
  78. package/cpp/llama.cpp/common/speculative.cpp +278 -0
  79. package/cpp/llama.cpp/common/speculative.h +28 -0
  80. package/cpp/llama.cpp/common/stb_image.h +7988 -0
  81. package/cpp/llama.cpp/convert_hf_to_gguf.py +6195 -0
  82. package/cpp/llama.cpp/convert_hf_to_gguf_update.py +393 -0
  83. package/cpp/llama.cpp/convert_llama_ggml_to_gguf.py +450 -0
  84. package/cpp/llama.cpp/convert_lora_to_gguf.py +461 -0
  85. package/cpp/llama.cpp/flake.lock +58 -0
  86. package/cpp/llama.cpp/flake.nix +185 -0
  87. package/cpp/llama.cpp/ggml/CMakeLists.txt +388 -0
  88. package/cpp/llama.cpp/ggml/cmake/GitVars.cmake +22 -0
  89. package/cpp/llama.cpp/ggml/cmake/common.cmake +26 -0
  90. package/cpp/llama.cpp/ggml/cmake/ggml-config.cmake.in +152 -0
  91. package/cpp/llama.cpp/ggml/include/ggml-alloc.h +76 -0
  92. package/cpp/llama.cpp/ggml/include/ggml-backend.h +354 -0
  93. package/cpp/llama.cpp/ggml/include/ggml-blas.h +25 -0
  94. package/cpp/llama.cpp/ggml/include/ggml-cann.h +123 -0
  95. package/cpp/llama.cpp/ggml/include/ggml-cpp.h +39 -0
  96. package/cpp/llama.cpp/ggml/include/ggml-cpu.h +143 -0
  97. package/cpp/llama.cpp/ggml/include/ggml-cuda.h +47 -0
  98. package/cpp/llama.cpp/ggml/include/ggml-kompute.h +50 -0
  99. package/cpp/llama.cpp/ggml/include/ggml-metal.h +66 -0
  100. package/cpp/llama.cpp/ggml/include/ggml-opencl.h +26 -0
  101. package/cpp/llama.cpp/ggml/include/ggml-opt.h +216 -0
  102. package/cpp/llama.cpp/ggml/include/ggml-rpc.h +33 -0
  103. package/cpp/llama.cpp/ggml/include/ggml-sycl.h +49 -0
  104. package/cpp/llama.cpp/ggml/include/ggml-vulkan.h +29 -0
  105. package/cpp/llama.cpp/ggml/include/ggml.h +2192 -0
  106. package/cpp/llama.cpp/ggml/include/gguf.h +202 -0
  107. package/cpp/llama.cpp/ggml/src/CMakeLists.txt +345 -0
  108. package/cpp/llama.cpp/ggml/src/ggml-alloc.c +1042 -0
  109. package/cpp/llama.cpp/ggml/src/ggml-backend-impl.h +255 -0
  110. package/cpp/llama.cpp/ggml/src/ggml-backend-reg.cpp +586 -0
  111. package/cpp/llama.cpp/ggml/src/ggml-backend.cpp +2008 -0
  112. package/cpp/llama.cpp/ggml/src/ggml-blas/CMakeLists.txt +87 -0
  113. package/cpp/llama.cpp/ggml/src/ggml-blas/ggml-blas.cpp +517 -0
  114. package/cpp/llama.cpp/ggml/src/ggml-cann/CMakeLists.txt +74 -0
  115. package/cpp/llama.cpp/ggml/src/ggml-cann/Doxyfile +2579 -0
  116. package/cpp/llama.cpp/ggml/src/ggml-cann/acl_tensor.cpp +179 -0
  117. package/cpp/llama.cpp/ggml/src/ggml-cann/acl_tensor.h +258 -0
  118. package/cpp/llama.cpp/ggml/src/ggml-cann/aclnn_ops.cpp +2589 -0
  119. package/cpp/llama.cpp/ggml/src/ggml-cann/aclnn_ops.h +1083 -0
  120. package/cpp/llama.cpp/ggml/src/ggml-cann/common.h +420 -0
  121. package/cpp/llama.cpp/ggml/src/ggml-cann/ggml-cann.cpp +2554 -0
  122. package/cpp/llama.cpp/ggml/src/ggml-common.h +1857 -0
  123. package/cpp/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +495 -0
  124. package/cpp/llama.cpp/ggml/src/ggml-cpu/amx/amx.cpp +221 -0
  125. package/cpp/llama.cpp/ggml/src/ggml-cpu/amx/amx.h +8 -0
  126. package/cpp/llama.cpp/ggml/src/ggml-cpu/amx/common.h +91 -0
  127. package/cpp/llama.cpp/ggml/src/ggml-cpu/amx/mmq.cpp +2511 -0
  128. package/cpp/llama.cpp/ggml/src/ggml-cpu/amx/mmq.h +10 -0
  129. package/cpp/llama.cpp/ggml/src/ggml-cpu/binary-ops.cpp +158 -0
  130. package/cpp/llama.cpp/ggml/src/ggml-cpu/binary-ops.h +16 -0
  131. package/cpp/llama.cpp/ggml/src/ggml-cpu/cmake/FindSIMD.cmake +100 -0
  132. package/cpp/llama.cpp/ggml/src/ggml-cpu/common.h +72 -0
  133. package/cpp/llama.cpp/ggml/src/ggml-cpu/cpu-feats-x86.cpp +327 -0
  134. package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp +6431 -0
  135. package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.h +8 -0
  136. package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-hbm.cpp +55 -0
  137. package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-hbm.h +8 -0
  138. package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-impl.h +512 -0
  139. package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.c +13131 -0
  140. package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.h +63 -0
  141. package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-traits.cpp +36 -0
  142. package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-traits.h +38 -0
  143. package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +3492 -0
  144. package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +671 -0
  145. package/cpp/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.cpp +254 -0
  146. package/cpp/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.h +60 -0
  147. package/cpp/llama.cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +287 -0
  148. package/cpp/llama.cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.h +17 -0
  149. package/cpp/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm.cpp +3544 -0
  150. package/cpp/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm.h +14 -0
  151. package/cpp/llama.cpp/ggml/src/ggml-cpu/ops.cpp +8796 -0
  152. package/cpp/llama.cpp/ggml/src/ggml-cpu/ops.h +110 -0
  153. package/cpp/llama.cpp/ggml/src/ggml-cpu/simd-mappings.h +892 -0
  154. package/cpp/llama.cpp/ggml/src/ggml-cpu/unary-ops.cpp +186 -0
  155. package/cpp/llama.cpp/ggml/src/ggml-cpu/unary-ops.h +28 -0
  156. package/cpp/llama.cpp/ggml/src/ggml-cpu/vec.cpp +252 -0
  157. package/cpp/llama.cpp/ggml/src/ggml-cpu/vec.h +802 -0
  158. package/cpp/llama.cpp/ggml/src/ggml-cuda/CMakeLists.txt +184 -0
  159. package/cpp/llama.cpp/ggml/src/ggml-cuda/acc.cu +47 -0
  160. package/cpp/llama.cpp/ggml/src/ggml-cuda/acc.cuh +5 -0
  161. package/cpp/llama.cpp/ggml/src/ggml-cuda/arange.cu +34 -0
  162. package/cpp/llama.cpp/ggml/src/ggml-cuda/arange.cuh +5 -0
  163. package/cpp/llama.cpp/ggml/src/ggml-cuda/argmax.cu +91 -0
  164. package/cpp/llama.cpp/ggml/src/ggml-cuda/argmax.cuh +3 -0
  165. package/cpp/llama.cpp/ggml/src/ggml-cuda/argsort.cu +104 -0
  166. package/cpp/llama.cpp/ggml/src/ggml-cuda/argsort.cuh +3 -0
  167. package/cpp/llama.cpp/ggml/src/ggml-cuda/binbcast.cu +363 -0
  168. package/cpp/llama.cpp/ggml/src/ggml-cuda/binbcast.cuh +9 -0
  169. package/cpp/llama.cpp/ggml/src/ggml-cuda/clamp.cu +45 -0
  170. package/cpp/llama.cpp/ggml/src/ggml-cuda/clamp.cuh +5 -0
  171. package/cpp/llama.cpp/ggml/src/ggml-cuda/common.cuh +828 -0
  172. package/cpp/llama.cpp/ggml/src/ggml-cuda/concat.cu +221 -0
  173. package/cpp/llama.cpp/ggml/src/ggml-cuda/concat.cuh +5 -0
  174. package/cpp/llama.cpp/ggml/src/ggml-cuda/conv-transpose-1d.cu +89 -0
  175. package/cpp/llama.cpp/ggml/src/ggml-cuda/conv-transpose-1d.cuh +5 -0
  176. package/cpp/llama.cpp/ggml/src/ggml-cuda/convert.cu +730 -0
  177. package/cpp/llama.cpp/ggml/src/ggml-cuda/convert.cuh +26 -0
  178. package/cpp/llama.cpp/ggml/src/ggml-cuda/count-equal.cu +64 -0
  179. package/cpp/llama.cpp/ggml/src/ggml-cuda/count-equal.cuh +5 -0
  180. package/cpp/llama.cpp/ggml/src/ggml-cuda/cp-async.cuh +57 -0
  181. package/cpp/llama.cpp/ggml/src/ggml-cuda/cpy.cu +695 -0
  182. package/cpp/llama.cpp/ggml/src/ggml-cuda/cpy.cuh +11 -0
  183. package/cpp/llama.cpp/ggml/src/ggml-cuda/cross-entropy-loss.cu +189 -0
  184. package/cpp/llama.cpp/ggml/src/ggml-cuda/cross-entropy-loss.cuh +7 -0
  185. package/cpp/llama.cpp/ggml/src/ggml-cuda/dequantize.cuh +103 -0
  186. package/cpp/llama.cpp/ggml/src/ggml-cuda/diagmask.cu +40 -0
  187. package/cpp/llama.cpp/ggml/src/ggml-cuda/diagmask.cuh +5 -0
  188. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-common.cuh +873 -0
  189. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-mma-f16.cuh +1269 -0
  190. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-tile-f16.cu +357 -0
  191. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-tile-f16.cuh +3 -0
  192. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-tile-f32.cu +365 -0
  193. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-tile-f32.cuh +3 -0
  194. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-vec-f16.cuh +437 -0
  195. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-vec-f32.cuh +428 -0
  196. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-wmma-f16.cu +634 -0
  197. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-wmma-f16.cuh +3 -0
  198. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn.cu +345 -0
  199. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn.cuh +3 -0
  200. package/cpp/llama.cpp/ggml/src/ggml-cuda/getrows.cu +275 -0
  201. package/cpp/llama.cpp/ggml/src/ggml-cuda/getrows.cuh +15 -0
  202. package/cpp/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu +3501 -0
  203. package/cpp/llama.cpp/ggml/src/ggml-cuda/gla.cu +93 -0
  204. package/cpp/llama.cpp/ggml/src/ggml-cuda/gla.cuh +3 -0
  205. package/cpp/llama.cpp/ggml/src/ggml-cuda/im2col.cu +103 -0
  206. package/cpp/llama.cpp/ggml/src/ggml-cuda/im2col.cuh +5 -0
  207. package/cpp/llama.cpp/ggml/src/ggml-cuda/mma.cuh +396 -0
  208. package/cpp/llama.cpp/ggml/src/ggml-cuda/mmq.cu +322 -0
  209. package/cpp/llama.cpp/ggml/src/ggml-cuda/mmq.cuh +3217 -0
  210. package/cpp/llama.cpp/ggml/src/ggml-cuda/mmv.cu +336 -0
  211. package/cpp/llama.cpp/ggml/src/ggml-cuda/mmv.cuh +12 -0
  212. package/cpp/llama.cpp/ggml/src/ggml-cuda/mmvq.cu +595 -0
  213. package/cpp/llama.cpp/ggml/src/ggml-cuda/mmvq.cuh +12 -0
  214. package/cpp/llama.cpp/ggml/src/ggml-cuda/norm.cu +458 -0
  215. package/cpp/llama.cpp/ggml/src/ggml-cuda/norm.cuh +11 -0
  216. package/cpp/llama.cpp/ggml/src/ggml-cuda/opt-step-adamw.cu +78 -0
  217. package/cpp/llama.cpp/ggml/src/ggml-cuda/opt-step-adamw.cuh +5 -0
  218. package/cpp/llama.cpp/ggml/src/ggml-cuda/out-prod.cu +68 -0
  219. package/cpp/llama.cpp/ggml/src/ggml-cuda/out-prod.cuh +3 -0
  220. package/cpp/llama.cpp/ggml/src/ggml-cuda/pad.cu +49 -0
  221. package/cpp/llama.cpp/ggml/src/ggml-cuda/pad.cuh +5 -0
  222. package/cpp/llama.cpp/ggml/src/ggml-cuda/pool2d.cu +94 -0
  223. package/cpp/llama.cpp/ggml/src/ggml-cuda/pool2d.cuh +5 -0
  224. package/cpp/llama.cpp/ggml/src/ggml-cuda/quantize.cu +189 -0
  225. package/cpp/llama.cpp/ggml/src/ggml-cuda/quantize.cuh +27 -0
  226. package/cpp/llama.cpp/ggml/src/ggml-cuda/rope.cu +456 -0
  227. package/cpp/llama.cpp/ggml/src/ggml-cuda/rope.cuh +7 -0
  228. package/cpp/llama.cpp/ggml/src/ggml-cuda/scale.cu +31 -0
  229. package/cpp/llama.cpp/ggml/src/ggml-cuda/scale.cuh +5 -0
  230. package/cpp/llama.cpp/ggml/src/ggml-cuda/softmax.cu +283 -0
  231. package/cpp/llama.cpp/ggml/src/ggml-cuda/softmax.cuh +7 -0
  232. package/cpp/llama.cpp/ggml/src/ggml-cuda/ssm-conv.cu +148 -0
  233. package/cpp/llama.cpp/ggml/src/ggml-cuda/ssm-conv.cuh +3 -0
  234. package/cpp/llama.cpp/ggml/src/ggml-cuda/ssm-scan.cu +153 -0
  235. package/cpp/llama.cpp/ggml/src/ggml-cuda/ssm-scan.cuh +3 -0
  236. package/cpp/llama.cpp/ggml/src/ggml-cuda/sum.cu +45 -0
  237. package/cpp/llama.cpp/ggml/src/ggml-cuda/sum.cuh +5 -0
  238. package/cpp/llama.cpp/ggml/src/ggml-cuda/sumrows.cu +39 -0
  239. package/cpp/llama.cpp/ggml/src/ggml-cuda/sumrows.cuh +5 -0
  240. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_1-ncols2_16.cu +5 -0
  241. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_1-ncols2_8.cu +10 -0
  242. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_16-ncols2_1.cu +10 -0
  243. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_16-ncols2_2.cu +10 -0
  244. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_16-ncols2_4.cu +10 -0
  245. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_2-ncols2_16.cu +5 -0
  246. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_2-ncols2_4.cu +10 -0
  247. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_2-ncols2_8.cu +10 -0
  248. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_32-ncols2_1.cu +10 -0
  249. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_32-ncols2_2.cu +10 -0
  250. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_4-ncols2_16.cu +5 -0
  251. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_4-ncols2_2.cu +10 -0
  252. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_4-ncols2_4.cu +10 -0
  253. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_4-ncols2_8.cu +10 -0
  254. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_64-ncols2_1.cu +10 -0
  255. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_8-ncols2_1.cu +10 -0
  256. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_8-ncols2_2.cu +10 -0
  257. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_8-ncols2_4.cu +10 -0
  258. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-mma-f16-instance-ncols1_8-ncols2_8.cu +10 -0
  259. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-f16.cu +5 -0
  260. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q4_0.cu +5 -0
  261. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q4_1.cu +5 -0
  262. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q5_0.cu +5 -0
  263. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q5_1.cu +5 -0
  264. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q8_0.cu +5 -0
  265. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-f16.cu +5 -0
  266. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q4_0.cu +5 -0
  267. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q4_1.cu +5 -0
  268. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q5_0.cu +5 -0
  269. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q5_1.cu +5 -0
  270. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q8_0.cu +5 -0
  271. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-f16.cu +5 -0
  272. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q4_0.cu +5 -0
  273. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q4_1.cu +5 -0
  274. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q5_0.cu +5 -0
  275. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q5_1.cu +5 -0
  276. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q8_0.cu +5 -0
  277. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-f16.cu +5 -0
  278. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q4_0.cu +5 -0
  279. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q4_1.cu +5 -0
  280. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q5_0.cu +5 -0
  281. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q5_1.cu +5 -0
  282. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q8_0.cu +5 -0
  283. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-f16.cu +5 -0
  284. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q4_0.cu +5 -0
  285. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q4_1.cu +5 -0
  286. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q5_0.cu +5 -0
  287. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q5_1.cu +5 -0
  288. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q8_0.cu +5 -0
  289. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-f16.cu +5 -0
  290. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q4_0.cu +5 -0
  291. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q4_1.cu +5 -0
  292. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q5_0.cu +5 -0
  293. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q5_1.cu +5 -0
  294. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q8_0.cu +5 -0
  295. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs256-f16-f16.cu +5 -0
  296. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-f16.cu +5 -0
  297. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q4_0.cu +5 -0
  298. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q4_1.cu +5 -0
  299. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q5_0.cu +5 -0
  300. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q5_1.cu +5 -0
  301. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q8_0.cu +5 -0
  302. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-f16.cu +5 -0
  303. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q4_0.cu +5 -0
  304. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q4_1.cu +5 -0
  305. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q5_0.cu +5 -0
  306. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q5_1.cu +5 -0
  307. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q8_0.cu +5 -0
  308. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-f16.cu +5 -0
  309. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q4_0.cu +5 -0
  310. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q4_1.cu +5 -0
  311. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q5_0.cu +5 -0
  312. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q5_1.cu +5 -0
  313. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q8_0.cu +5 -0
  314. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-f16.cu +5 -0
  315. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q4_0.cu +5 -0
  316. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q4_1.cu +5 -0
  317. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q5_0.cu +5 -0
  318. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q5_1.cu +5 -0
  319. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q8_0.cu +5 -0
  320. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-f16.cu +5 -0
  321. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q4_0.cu +5 -0
  322. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q4_1.cu +5 -0
  323. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q5_0.cu +5 -0
  324. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q5_1.cu +5 -0
  325. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q8_0.cu +5 -0
  326. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-f16.cu +5 -0
  327. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q4_0.cu +5 -0
  328. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q4_1.cu +5 -0
  329. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q5_0.cu +5 -0
  330. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q5_1.cu +5 -0
  331. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q8_0.cu +5 -0
  332. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-f16.cu +5 -0
  333. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q4_0.cu +5 -0
  334. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q4_1.cu +5 -0
  335. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q5_0.cu +5 -0
  336. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q5_1.cu +5 -0
  337. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q8_0.cu +5 -0
  338. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs256-f16-f16.cu +5 -0
  339. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-f16.cu +5 -0
  340. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q4_0.cu +5 -0
  341. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q4_1.cu +5 -0
  342. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q5_0.cu +5 -0
  343. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q5_1.cu +5 -0
  344. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q8_0.cu +5 -0
  345. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/generate_cu_files.py +78 -0
  346. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-iq1_s.cu +5 -0
  347. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-iq2_s.cu +5 -0
  348. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-iq2_xs.cu +5 -0
  349. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-iq2_xxs.cu +5 -0
  350. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-iq3_s.cu +5 -0
  351. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-iq3_xxs.cu +5 -0
  352. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-iq4_nl.cu +5 -0
  353. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-iq4_xs.cu +5 -0
  354. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-q2_k.cu +5 -0
  355. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-q3_k.cu +5 -0
  356. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-q4_0.cu +5 -0
  357. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-q4_1.cu +5 -0
  358. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-q4_k.cu +5 -0
  359. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-q5_0.cu +5 -0
  360. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-q5_1.cu +5 -0
  361. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-q5_k.cu +5 -0
  362. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-q6_k.cu +5 -0
  363. package/cpp/llama.cpp/ggml/src/ggml-cuda/template-instances/mmq-instance-q8_0.cu +5 -0
  364. package/cpp/llama.cpp/ggml/src/ggml-cuda/tsembd.cu +47 -0
  365. package/cpp/llama.cpp/ggml/src/ggml-cuda/tsembd.cuh +5 -0
  366. package/cpp/llama.cpp/ggml/src/ggml-cuda/unary.cu +279 -0
  367. package/cpp/llama.cpp/ggml/src/ggml-cuda/unary.cuh +57 -0
  368. package/cpp/llama.cpp/ggml/src/ggml-cuda/upscale.cu +51 -0
  369. package/cpp/llama.cpp/ggml/src/ggml-cuda/upscale.cuh +5 -0
  370. package/cpp/llama.cpp/ggml/src/ggml-cuda/vecdotq.cuh +1135 -0
  371. package/cpp/llama.cpp/ggml/src/ggml-cuda/vendors/cuda.h +15 -0
  372. package/cpp/llama.cpp/ggml/src/ggml-cuda/vendors/hip.h +243 -0
  373. package/cpp/llama.cpp/ggml/src/ggml-cuda/vendors/musa.h +140 -0
  374. package/cpp/llama.cpp/ggml/src/ggml-cuda/wkv.cu +199 -0
  375. package/cpp/llama.cpp/ggml/src/ggml-cuda/wkv.cuh +7 -0
  376. package/cpp/llama.cpp/ggml/src/ggml-hip/CMakeLists.txt +131 -0
  377. package/cpp/llama.cpp/ggml/src/ggml-impl.h +601 -0
  378. package/cpp/llama.cpp/ggml/src/ggml-kompute/CMakeLists.txt +166 -0
  379. package/cpp/llama.cpp/ggml/src/ggml-kompute/ggml-kompute.cpp +2251 -0
  380. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/common.comp +112 -0
  381. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_add.comp +58 -0
  382. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_addrow.comp +25 -0
  383. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f16_f16.comp +52 -0
  384. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f16_f32.comp +52 -0
  385. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f32_f16.comp +52 -0
  386. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f32_f32.comp +52 -0
  387. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_diagmask.comp +30 -0
  388. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_gelu.comp +22 -0
  389. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_getrows.comp +17 -0
  390. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_getrows_f16.comp +31 -0
  391. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_getrows_f32.comp +31 -0
  392. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_getrows_q4_0.comp +38 -0
  393. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_getrows_q4_1.comp +39 -0
  394. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_getrows_q6_k.comp +44 -0
  395. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul.comp +52 -0
  396. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_f16.comp +69 -0
  397. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_mat_f32.comp +51 -0
  398. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q4_0.comp +33 -0
  399. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q4_1.comp +35 -0
  400. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q4_k.comp +140 -0
  401. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q6_k.comp +106 -0
  402. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q8_0.comp +73 -0
  403. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mv_q_n.comp +52 -0
  404. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_mul_mv_q_n_pre.comp +28 -0
  405. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_norm.comp +84 -0
  406. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_relu.comp +21 -0
  407. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_rmsnorm.comp +53 -0
  408. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_rope_neox_f16.comp +52 -0
  409. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_rope_neox_f32.comp +52 -0
  410. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_rope_norm_f16.comp +52 -0
  411. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_rope_norm_f32.comp +52 -0
  412. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_scale.comp +19 -0
  413. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_scale_8.comp +23 -0
  414. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_silu.comp +22 -0
  415. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/op_softmax.comp +72 -0
  416. package/cpp/llama.cpp/ggml/src/ggml-kompute/kompute-shaders/rope_common.comp +71 -0
  417. package/cpp/llama.cpp/ggml/src/ggml-metal/CMakeLists.txt +120 -0
  418. package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal-impl.h +618 -0
  419. package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal.m +5916 -0
  420. package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal.metal +6891 -0
  421. package/cpp/llama.cpp/ggml/src/ggml-musa/CMakeLists.txt +107 -0
  422. package/cpp/llama.cpp/ggml/src/ggml-opencl/CMakeLists.txt +96 -0
  423. package/cpp/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +4966 -0
  424. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/add.cl +83 -0
  425. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/clamp.cl +20 -0
  426. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/cpy.cl +184 -0
  427. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/cvt.cl +118 -0
  428. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/diag_mask_inf.cl +58 -0
  429. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/embed_kernel.py +26 -0
  430. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/gelu.cl +62 -0
  431. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/gemv_noshuffle.cl +268 -0
  432. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/gemv_noshuffle_general.cl +274 -0
  433. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/get_rows.cl +163 -0
  434. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/im2col_f16.cl +57 -0
  435. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/im2col_f32.cl +57 -0
  436. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/mul.cl +79 -0
  437. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/mul_mat_Ab_Bi_8x4.cl +139 -0
  438. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/mul_mv_f16_f16.cl +118 -0
  439. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/mul_mv_f16_f32.cl +118 -0
  440. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/mul_mv_f16_f32_1row.cl +94 -0
  441. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/mul_mv_f16_f32_l4.cl +84 -0
  442. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/mul_mv_f32_f32.cl +118 -0
  443. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32.cl +192 -0
  444. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32_1d_16x_flat.cl +307 -0
  445. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32_1d_8x_flat.cl +265 -0
  446. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32_8x_flat.cl +272 -0
  447. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/mul_mv_q4_0_f32_v.cl +254 -0
  448. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/mul_mv_q6_k.cl +190 -0
  449. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/norm.cl +81 -0
  450. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/relu.cl +16 -0
  451. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/rms_norm.cl +96 -0
  452. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/rope.cl +721 -0
  453. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/scale.cl +16 -0
  454. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/silu.cl +30 -0
  455. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/softmax_4_f16.cl +87 -0
  456. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/softmax_4_f32.cl +87 -0
  457. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/softmax_f16.cl +86 -0
  458. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/softmax_f32.cl +86 -0
  459. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/transpose.cl +84 -0
  460. package/cpp/llama.cpp/ggml/src/ggml-opt.cpp +854 -0
  461. package/cpp/llama.cpp/ggml/src/ggml-quants.c +5232 -0
  462. package/cpp/llama.cpp/ggml/src/ggml-quants.h +100 -0
  463. package/cpp/llama.cpp/ggml/src/ggml-rpc/CMakeLists.txt +9 -0
  464. package/cpp/llama.cpp/ggml/src/ggml-rpc/ggml-rpc.cpp +1813 -0
  465. package/cpp/llama.cpp/ggml/src/ggml-sycl/CMakeLists.txt +183 -0
  466. package/cpp/llama.cpp/ggml/src/ggml-sycl/backend.hpp +37 -0
  467. package/cpp/llama.cpp/ggml/src/ggml-sycl/binbcast.cpp +350 -0
  468. package/cpp/llama.cpp/ggml/src/ggml-sycl/binbcast.hpp +39 -0
  469. package/cpp/llama.cpp/ggml/src/ggml-sycl/common.cpp +83 -0
  470. package/cpp/llama.cpp/ggml/src/ggml-sycl/common.hpp +493 -0
  471. package/cpp/llama.cpp/ggml/src/ggml-sycl/concat.cpp +197 -0
  472. package/cpp/llama.cpp/ggml/src/ggml-sycl/concat.hpp +20 -0
  473. package/cpp/llama.cpp/ggml/src/ggml-sycl/conv.cpp +100 -0
  474. package/cpp/llama.cpp/ggml/src/ggml-sycl/conv.hpp +20 -0
  475. package/cpp/llama.cpp/ggml/src/ggml-sycl/convert.cpp +596 -0
  476. package/cpp/llama.cpp/ggml/src/ggml-sycl/convert.hpp +34 -0
  477. package/cpp/llama.cpp/ggml/src/ggml-sycl/cpy.cpp +701 -0
  478. package/cpp/llama.cpp/ggml/src/ggml-sycl/cpy.hpp +11 -0
  479. package/cpp/llama.cpp/ggml/src/ggml-sycl/dequantize.hpp +753 -0
  480. package/cpp/llama.cpp/ggml/src/ggml-sycl/dmmv.cpp +1154 -0
  481. package/cpp/llama.cpp/ggml/src/ggml-sycl/dmmv.hpp +27 -0
  482. package/cpp/llama.cpp/ggml/src/ggml-sycl/dpct/helper.hpp +2957 -0
  483. package/cpp/llama.cpp/ggml/src/ggml-sycl/element_wise.cpp +1559 -0
  484. package/cpp/llama.cpp/ggml/src/ggml-sycl/element_wise.hpp +75 -0
  485. package/cpp/llama.cpp/ggml/src/ggml-sycl/gemm.hpp +70 -0
  486. package/cpp/llama.cpp/ggml/src/ggml-sycl/getrows.cpp +311 -0
  487. package/cpp/llama.cpp/ggml/src/ggml-sycl/getrows.hpp +20 -0
  488. package/cpp/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +4302 -0
  489. package/cpp/llama.cpp/ggml/src/ggml-sycl/gla.cpp +105 -0
  490. package/cpp/llama.cpp/ggml/src/ggml-sycl/gla.hpp +8 -0
  491. package/cpp/llama.cpp/ggml/src/ggml-sycl/im2col.cpp +136 -0
  492. package/cpp/llama.cpp/ggml/src/ggml-sycl/im2col.hpp +21 -0
  493. package/cpp/llama.cpp/ggml/src/ggml-sycl/mmq.cpp +3030 -0
  494. package/cpp/llama.cpp/ggml/src/ggml-sycl/mmq.hpp +33 -0
  495. package/cpp/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +1081 -0
  496. package/cpp/llama.cpp/ggml/src/ggml-sycl/mmvq.hpp +27 -0
  497. package/cpp/llama.cpp/ggml/src/ggml-sycl/norm.cpp +474 -0
  498. package/cpp/llama.cpp/ggml/src/ggml-sycl/norm.hpp +26 -0
  499. package/cpp/llama.cpp/ggml/src/ggml-sycl/outprod.cpp +46 -0
  500. package/cpp/llama.cpp/ggml/src/ggml-sycl/outprod.hpp +10 -0
  501. package/cpp/llama.cpp/ggml/src/ggml-sycl/presets.hpp +74 -0
  502. package/cpp/llama.cpp/ggml/src/ggml-sycl/quants.hpp +61 -0
  503. package/cpp/llama.cpp/ggml/src/ggml-sycl/rope.cpp +362 -0
  504. package/cpp/llama.cpp/ggml/src/ggml-sycl/rope.hpp +20 -0
  505. package/cpp/llama.cpp/ggml/src/ggml-sycl/softmax.cpp +264 -0
  506. package/cpp/llama.cpp/ggml/src/ggml-sycl/softmax.hpp +20 -0
  507. package/cpp/llama.cpp/ggml/src/ggml-sycl/sycl_hw.cpp +13 -0
  508. package/cpp/llama.cpp/ggml/src/ggml-sycl/sycl_hw.hpp +23 -0
  509. package/cpp/llama.cpp/ggml/src/ggml-sycl/tsembd.cpp +73 -0
  510. package/cpp/llama.cpp/ggml/src/ggml-sycl/tsembd.hpp +20 -0
  511. package/cpp/llama.cpp/ggml/src/ggml-sycl/vecdotq.hpp +1189 -0
  512. package/cpp/llama.cpp/ggml/src/ggml-sycl/wkv.cpp +305 -0
  513. package/cpp/llama.cpp/ggml/src/ggml-sycl/wkv.hpp +10 -0
  514. package/cpp/llama.cpp/ggml/src/ggml-threading.cpp +12 -0
  515. package/cpp/llama.cpp/ggml/src/ggml-threading.h +14 -0
  516. package/cpp/llama.cpp/ggml/src/ggml-vulkan/CMakeLists.txt +202 -0
  517. package/cpp/llama.cpp/ggml/src/ggml-vulkan/cmake/host-toolchain.cmake.in +15 -0
  518. package/cpp/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +10502 -0
  519. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/CMakeLists.txt +22 -0
  520. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/acc.comp +29 -0
  521. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/add.comp +29 -0
  522. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/argmax.comp +51 -0
  523. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/argsort.comp +69 -0
  524. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/clamp.comp +17 -0
  525. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/concat.comp +41 -0
  526. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/contig_copy.comp +49 -0
  527. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/conv2d_dw.comp +105 -0
  528. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/copy.comp +23 -0
  529. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/copy_from_quant.comp +51 -0
  530. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/copy_to_quant.comp +242 -0
  531. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/cos.comp +17 -0
  532. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/count_equal.comp +31 -0
  533. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_f32.comp +20 -0
  534. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_funcs.comp +462 -0
  535. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_funcs_cm2.comp +699 -0
  536. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_head.comp +13 -0
  537. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq1_m.comp +42 -0
  538. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq1_s.comp +35 -0
  539. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq2_s.comp +44 -0
  540. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq2_xs.comp +43 -0
  541. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq2_xxs.comp +48 -0
  542. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq3_s.comp +39 -0
  543. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq3_xxs.comp +49 -0
  544. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq4_nl.comp +32 -0
  545. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_iq4_xs.comp +34 -0
  546. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q2_k.comp +34 -0
  547. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q3_k.comp +42 -0
  548. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q4_0.comp +30 -0
  549. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q4_1.comp +32 -0
  550. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q4_k.comp +68 -0
  551. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q5_0.comp +34 -0
  552. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q5_1.comp +35 -0
  553. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q5_k.comp +70 -0
  554. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q6_k.comp +33 -0
  555. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q8_0.comp +31 -0
  556. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/diag_mask_inf.comp +34 -0
  557. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/div.comp +27 -0
  558. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn.comp +483 -0
  559. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm2.comp +383 -0
  560. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_split_k_reduce.comp +59 -0
  561. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/gelu.comp +25 -0
  562. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/gelu_quick.comp +23 -0
  563. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/generic_binary_head.comp +64 -0
  564. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/generic_head.comp +9 -0
  565. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/generic_unary_head.comp +76 -0
  566. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/get_rows.comp +33 -0
  567. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/get_rows_quant.comp +41 -0
  568. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/group_norm.comp +66 -0
  569. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/im2col.comp +100 -0
  570. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/l2_norm.comp +41 -0
  571. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/leaky_relu.comp +22 -0
  572. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul.comp +27 -0
  573. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_split_k_reduce.comp +48 -0
  574. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec.comp +169 -0
  575. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_base.comp +118 -0
  576. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq1_m.comp +82 -0
  577. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq1_s.comp +79 -0
  578. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq2_s.comp +90 -0
  579. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq2_xs.comp +87 -0
  580. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq2_xxs.comp +87 -0
  581. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq3_s.comp +90 -0
  582. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_iq3_xxs.comp +88 -0
  583. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_nc.comp +118 -0
  584. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_p021.comp +154 -0
  585. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q2_k.comp +130 -0
  586. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q3_k.comp +132 -0
  587. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q4_k.comp +136 -0
  588. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q5_k.comp +167 -0
  589. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mat_vec_q6_k.comp +130 -0
  590. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm.comp +868 -0
  591. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm_cm2.comp +441 -0
  592. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mmq.comp +442 -0
  593. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/mul_mmq_funcs.comp +99 -0
  594. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/norm.comp +44 -0
  595. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/opt_step_adamw.comp +42 -0
  596. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/pad.comp +28 -0
  597. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/pool2d.comp +74 -0
  598. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/quantize_q8_1.comp +77 -0
  599. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/relu.comp +21 -0
  600. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/repeat.comp +26 -0
  601. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/repeat_back.comp +37 -0
  602. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/rms_norm.comp +52 -0
  603. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/rms_norm_back.comp +55 -0
  604. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/rope_head.comp +58 -0
  605. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/rope_multi.comp +60 -0
  606. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/rope_neox.comp +43 -0
  607. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/rope_norm.comp +43 -0
  608. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/rope_vision.comp +47 -0
  609. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/scale.comp +24 -0
  610. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/sigmoid.comp +20 -0
  611. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/silu.comp +22 -0
  612. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/silu_back.comp +26 -0
  613. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/sin.comp +17 -0
  614. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/soft_max.comp +173 -0
  615. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/soft_max_back.comp +50 -0
  616. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/square.comp +17 -0
  617. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/sub.comp +29 -0
  618. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/sum_rows.comp +37 -0
  619. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/tanh.comp +20 -0
  620. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/test_bfloat16_support.comp +7 -0
  621. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/test_coopmat2_support.comp +7 -0
  622. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/test_coopmat_support.comp +7 -0
  623. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/test_integer_dot_support.comp +7 -0
  624. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/timestep_embedding.comp +41 -0
  625. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/types.comp +1373 -0
  626. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/upscale.comp +36 -0
  627. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +740 -0
  628. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/wkv6.comp +87 -0
  629. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/wkv7.comp +91 -0
  630. package/cpp/llama.cpp/ggml/src/ggml.c +6499 -0
  631. package/cpp/llama.cpp/ggml/src/gguf.cpp +1330 -0
  632. package/cpp/llama.cpp/gguf-py/LICENSE +21 -0
  633. package/cpp/llama.cpp/gguf-py/README.md +99 -0
  634. package/cpp/llama.cpp/gguf-py/examples/reader.py +49 -0
  635. package/cpp/llama.cpp/gguf-py/examples/writer.py +39 -0
  636. package/cpp/llama.cpp/gguf-py/gguf/__init__.py +9 -0
  637. package/cpp/llama.cpp/gguf-py/gguf/constants.py +2296 -0
  638. package/cpp/llama.cpp/gguf-py/gguf/gguf.py +15 -0
  639. package/cpp/llama.cpp/gguf-py/gguf/gguf_reader.py +367 -0
  640. package/cpp/llama.cpp/gguf-py/gguf/gguf_writer.py +1041 -0
  641. package/cpp/llama.cpp/gguf-py/gguf/lazy.py +223 -0
  642. package/cpp/llama.cpp/gguf-py/gguf/metadata.py +642 -0
  643. package/cpp/llama.cpp/gguf-py/gguf/py.typed +0 -0
  644. package/cpp/llama.cpp/gguf-py/gguf/quants.py +1269 -0
  645. package/cpp/llama.cpp/gguf-py/gguf/scripts/gguf_convert_endian.py +182 -0
  646. package/cpp/llama.cpp/gguf-py/gguf/scripts/gguf_dump.py +454 -0
  647. package/cpp/llama.cpp/gguf-py/gguf/scripts/gguf_editor_gui.py +1610 -0
  648. package/cpp/llama.cpp/gguf-py/gguf/scripts/gguf_hash.py +102 -0
  649. package/cpp/llama.cpp/gguf-py/gguf/scripts/gguf_new_metadata.py +207 -0
  650. package/cpp/llama.cpp/gguf-py/gguf/scripts/gguf_set_metadata.py +95 -0
  651. package/cpp/llama.cpp/gguf-py/gguf/tensor_mapping.py +1172 -0
  652. package/cpp/llama.cpp/gguf-py/gguf/utility.py +264 -0
  653. package/cpp/llama.cpp/gguf-py/gguf/vocab.py +492 -0
  654. package/cpp/llama.cpp/gguf-py/pyproject.toml +43 -0
  655. package/cpp/llama.cpp/gguf-py/tests/__init__.py +1 -0
  656. package/cpp/llama.cpp/gguf-py/tests/test_metadata.py +238 -0
  657. package/cpp/llama.cpp/gguf-py/tests/test_quants.py +238 -0
  658. package/cpp/llama.cpp/grammars/README.md +382 -0
  659. package/cpp/llama.cpp/grammars/arithmetic.gbnf +6 -0
  660. package/cpp/llama.cpp/grammars/c.gbnf +42 -0
  661. package/cpp/llama.cpp/grammars/chess.gbnf +13 -0
  662. package/cpp/llama.cpp/grammars/english.gbnf +6 -0
  663. package/cpp/llama.cpp/grammars/japanese.gbnf +7 -0
  664. package/cpp/llama.cpp/grammars/json.gbnf +25 -0
  665. package/cpp/llama.cpp/grammars/json_arr.gbnf +34 -0
  666. package/cpp/llama.cpp/grammars/list.gbnf +4 -0
  667. package/cpp/llama.cpp/include/llama-cpp.h +30 -0
  668. package/cpp/llama.cpp/include/llama.h +1440 -0
  669. package/cpp/llama.cpp/licenses/LICENSE-curl +9 -0
  670. package/cpp/llama.cpp/licenses/LICENSE-httplib +21 -0
  671. package/cpp/llama.cpp/licenses/LICENSE-jsonhpp +21 -0
  672. package/cpp/llama.cpp/licenses/LICENSE-linenoise +26 -0
  673. package/cpp/llama.cpp/media/llama0-banner.png +0 -0
  674. package/cpp/llama.cpp/media/llama0-logo.png +0 -0
  675. package/cpp/llama.cpp/media/llama1-banner.png +0 -0
  676. package/cpp/llama.cpp/media/llama1-logo.png +0 -0
  677. package/cpp/llama.cpp/media/llama1-logo.svg +34 -0
  678. package/cpp/llama.cpp/media/matmul.png +0 -0
  679. package/cpp/llama.cpp/media/matmul.svg +1238 -0
  680. package/cpp/llama.cpp/models/ggml-vocab-aquila.gguf +0 -0
  681. package/cpp/llama.cpp/models/ggml-vocab-baichuan.gguf +0 -0
  682. package/cpp/llama.cpp/models/ggml-vocab-bert-bge.gguf +0 -0
  683. package/cpp/llama.cpp/models/ggml-vocab-bert-bge.gguf.inp +112 -0
  684. package/cpp/llama.cpp/models/ggml-vocab-bert-bge.gguf.out +46 -0
  685. package/cpp/llama.cpp/models/ggml-vocab-chameleon.gguf.inp +112 -0
  686. package/cpp/llama.cpp/models/ggml-vocab-chameleon.gguf.out +46 -0
  687. package/cpp/llama.cpp/models/ggml-vocab-command-r.gguf +0 -0
  688. package/cpp/llama.cpp/models/ggml-vocab-command-r.gguf.inp +112 -0
  689. package/cpp/llama.cpp/models/ggml-vocab-command-r.gguf.out +46 -0
  690. package/cpp/llama.cpp/models/ggml-vocab-deepseek-coder.gguf +0 -0
  691. package/cpp/llama.cpp/models/ggml-vocab-deepseek-coder.gguf.inp +112 -0
  692. package/cpp/llama.cpp/models/ggml-vocab-deepseek-coder.gguf.out +46 -0
  693. package/cpp/llama.cpp/models/ggml-vocab-deepseek-llm.gguf +0 -0
  694. package/cpp/llama.cpp/models/ggml-vocab-deepseek-llm.gguf.inp +112 -0
  695. package/cpp/llama.cpp/models/ggml-vocab-deepseek-llm.gguf.out +46 -0
  696. package/cpp/llama.cpp/models/ggml-vocab-deepseek-r1-qwen.gguf.inp +112 -0
  697. package/cpp/llama.cpp/models/ggml-vocab-deepseek-r1-qwen.gguf.out +46 -0
  698. package/cpp/llama.cpp/models/ggml-vocab-falcon.gguf +0 -0
  699. package/cpp/llama.cpp/models/ggml-vocab-falcon.gguf.inp +112 -0
  700. package/cpp/llama.cpp/models/ggml-vocab-falcon.gguf.out +46 -0
  701. package/cpp/llama.cpp/models/ggml-vocab-gpt-2.gguf +0 -0
  702. package/cpp/llama.cpp/models/ggml-vocab-gpt-2.gguf.inp +112 -0
  703. package/cpp/llama.cpp/models/ggml-vocab-gpt-2.gguf.out +46 -0
  704. package/cpp/llama.cpp/models/ggml-vocab-gpt-4o.gguf.inp +112 -0
  705. package/cpp/llama.cpp/models/ggml-vocab-gpt-4o.gguf.out +46 -0
  706. package/cpp/llama.cpp/models/ggml-vocab-gpt-neox.gguf +0 -0
  707. package/cpp/llama.cpp/models/ggml-vocab-llama-bpe.gguf +0 -0
  708. package/cpp/llama.cpp/models/ggml-vocab-llama-bpe.gguf.inp +112 -0
  709. package/cpp/llama.cpp/models/ggml-vocab-llama-bpe.gguf.out +46 -0
  710. package/cpp/llama.cpp/models/ggml-vocab-llama-spm.gguf +0 -0
  711. package/cpp/llama.cpp/models/ggml-vocab-llama-spm.gguf.inp +112 -0
  712. package/cpp/llama.cpp/models/ggml-vocab-llama-spm.gguf.out +46 -0
  713. package/cpp/llama.cpp/models/ggml-vocab-llama4.gguf.inp +112 -0
  714. package/cpp/llama.cpp/models/ggml-vocab-llama4.gguf.out +46 -0
  715. package/cpp/llama.cpp/models/ggml-vocab-mpt.gguf +0 -0
  716. package/cpp/llama.cpp/models/ggml-vocab-mpt.gguf.inp +112 -0
  717. package/cpp/llama.cpp/models/ggml-vocab-mpt.gguf.out +46 -0
  718. package/cpp/llama.cpp/models/ggml-vocab-phi-3.gguf +0 -0
  719. package/cpp/llama.cpp/models/ggml-vocab-phi-3.gguf.inp +112 -0
  720. package/cpp/llama.cpp/models/ggml-vocab-phi-3.gguf.out +46 -0
  721. package/cpp/llama.cpp/models/ggml-vocab-pixtral.gguf.inp +112 -0
  722. package/cpp/llama.cpp/models/ggml-vocab-pixtral.gguf.out +46 -0
  723. package/cpp/llama.cpp/models/ggml-vocab-qwen2.gguf +0 -0
  724. package/cpp/llama.cpp/models/ggml-vocab-qwen2.gguf.inp +112 -0
  725. package/cpp/llama.cpp/models/ggml-vocab-qwen2.gguf.out +46 -0
  726. package/cpp/llama.cpp/models/ggml-vocab-refact.gguf +0 -0
  727. package/cpp/llama.cpp/models/ggml-vocab-refact.gguf.inp +112 -0
  728. package/cpp/llama.cpp/models/ggml-vocab-refact.gguf.out +46 -0
  729. package/cpp/llama.cpp/models/ggml-vocab-roberta-bpe.gguf.inp +112 -0
  730. package/cpp/llama.cpp/models/ggml-vocab-roberta-bpe.gguf.out +46 -0
  731. package/cpp/llama.cpp/models/ggml-vocab-starcoder.gguf +0 -0
  732. package/cpp/llama.cpp/models/ggml-vocab-starcoder.gguf.inp +112 -0
  733. package/cpp/llama.cpp/models/ggml-vocab-starcoder.gguf.out +46 -0
  734. package/cpp/llama.cpp/models/templates/CohereForAI-c4ai-command-r-plus-tool_use.jinja +202 -0
  735. package/cpp/llama.cpp/models/templates/CohereForAI-c4ai-command-r7b-12-2024-tool_use.jinja +156 -0
  736. package/cpp/llama.cpp/models/templates/NousResearch-Hermes-2-Pro-Llama-3-8B-tool_use.jinja +152 -0
  737. package/cpp/llama.cpp/models/templates/NousResearch-Hermes-3-Llama-3.1-8B-tool_use.jinja +152 -0
  738. package/cpp/llama.cpp/models/templates/Qwen-Qwen2.5-7B-Instruct.jinja +54 -0
  739. package/cpp/llama.cpp/models/templates/README.md +22 -0
  740. package/cpp/llama.cpp/models/templates/deepseek-ai-DeepSeek-R1-Distill-Llama-8B.jinja +1 -0
  741. package/cpp/llama.cpp/models/templates/deepseek-ai-DeepSeek-R1-Distill-Qwen-32B.jinja +1 -0
  742. package/cpp/llama.cpp/models/templates/fireworks-ai-llama-3-firefunction-v2.jinja +57 -0
  743. package/cpp/llama.cpp/models/templates/google-gemma-2-2b-it.jinja +4 -0
  744. package/cpp/llama.cpp/models/templates/llama-cpp-deepseek-r1.jinja +76 -0
  745. package/cpp/llama.cpp/models/templates/meetkai-functionary-medium-v3.1.jinja +58 -0
  746. package/cpp/llama.cpp/models/templates/meetkai-functionary-medium-v3.2.jinja +287 -0
  747. package/cpp/llama.cpp/models/templates/meta-llama-Llama-3.1-8B-Instruct.jinja +109 -0
  748. package/cpp/llama.cpp/models/templates/meta-llama-Llama-3.2-3B-Instruct.jinja +93 -0
  749. package/cpp/llama.cpp/models/templates/meta-llama-Llama-3.3-70B-Instruct.jinja +109 -0
  750. package/cpp/llama.cpp/models/templates/microsoft-Phi-3.5-mini-instruct.jinja +8 -0
  751. package/cpp/llama.cpp/models/templates/mistralai-Mistral-Nemo-Instruct-2407.jinja +87 -0
  752. package/cpp/llama.cpp/mypy.ini +7 -0
  753. package/cpp/llama.cpp/pocs/CMakeLists.txt +14 -0
  754. package/cpp/llama.cpp/pocs/vdot/CMakeLists.txt +9 -0
  755. package/cpp/llama.cpp/pocs/vdot/q8dot.cpp +173 -0
  756. package/cpp/llama.cpp/pocs/vdot/vdot.cpp +311 -0
  757. package/cpp/llama.cpp/poetry.lock +1197 -0
  758. package/cpp/llama.cpp/prompts/LLM-questions.txt +49 -0
  759. package/cpp/llama.cpp/prompts/alpaca.txt +1 -0
  760. package/cpp/llama.cpp/prompts/assistant.txt +31 -0
  761. package/cpp/llama.cpp/prompts/chat-with-baichuan.txt +4 -0
  762. package/cpp/llama.cpp/prompts/chat-with-bob.txt +7 -0
  763. package/cpp/llama.cpp/prompts/chat-with-qwen.txt +1 -0
  764. package/cpp/llama.cpp/prompts/chat-with-vicuna-v0.txt +7 -0
  765. package/cpp/llama.cpp/prompts/chat-with-vicuna-v1.txt +7 -0
  766. package/cpp/llama.cpp/prompts/chat.txt +28 -0
  767. package/cpp/llama.cpp/prompts/dan-modified.txt +1 -0
  768. package/cpp/llama.cpp/prompts/dan.txt +1 -0
  769. package/cpp/llama.cpp/prompts/mnemonics.txt +93 -0
  770. package/cpp/llama.cpp/prompts/parallel-questions.txt +43 -0
  771. package/cpp/llama.cpp/prompts/reason-act.txt +18 -0
  772. package/cpp/llama.cpp/pyproject.toml +45 -0
  773. package/cpp/llama.cpp/pyrightconfig.json +22 -0
  774. package/cpp/llama.cpp/requirements/requirements-all.txt +15 -0
  775. package/cpp/llama.cpp/requirements/requirements-compare-llama-bench.txt +2 -0
  776. package/cpp/llama.cpp/requirements/requirements-convert_hf_to_gguf.txt +3 -0
  777. package/cpp/llama.cpp/requirements/requirements-convert_hf_to_gguf_update.txt +3 -0
  778. package/cpp/llama.cpp/requirements/requirements-convert_legacy_llama.txt +5 -0
  779. package/cpp/llama.cpp/requirements/requirements-convert_llama_ggml_to_gguf.txt +1 -0
  780. package/cpp/llama.cpp/requirements/requirements-convert_lora_to_gguf.txt +2 -0
  781. package/cpp/llama.cpp/requirements/requirements-gguf_editor_gui.txt +3 -0
  782. package/cpp/llama.cpp/requirements/requirements-pydantic.txt +3 -0
  783. package/cpp/llama.cpp/requirements/requirements-test-tokenizer-random.txt +1 -0
  784. package/cpp/llama.cpp/requirements/requirements-tool_bench.txt +12 -0
  785. package/cpp/llama.cpp/requirements.txt +13 -0
  786. package/cpp/llama.cpp/src/CMakeLists.txt +45 -0
  787. package/cpp/llama.cpp/src/llama-adapter.cpp +388 -0
  788. package/cpp/llama.cpp/src/llama-adapter.h +76 -0
  789. package/cpp/llama.cpp/src/llama-arch.cpp +1743 -0
  790. package/cpp/llama.cpp/src/llama-arch.h +437 -0
  791. package/cpp/llama.cpp/src/llama-batch.cpp +372 -0
  792. package/cpp/llama.cpp/src/llama-batch.h +89 -0
  793. package/cpp/llama.cpp/src/llama-chat.cpp +663 -0
  794. package/cpp/llama.cpp/src/llama-chat.h +58 -0
  795. package/cpp/llama.cpp/src/llama-context.cpp +2459 -0
  796. package/cpp/llama.cpp/src/llama-context.h +246 -0
  797. package/cpp/llama.cpp/src/llama-cparams.cpp +1 -0
  798. package/cpp/llama.cpp/src/llama-cparams.h +39 -0
  799. package/cpp/llama.cpp/src/llama-grammar.cpp +1219 -0
  800. package/cpp/llama.cpp/src/llama-grammar.h +173 -0
  801. package/cpp/llama.cpp/src/llama-graph.cpp +1713 -0
  802. package/cpp/llama.cpp/src/llama-graph.h +595 -0
  803. package/cpp/llama.cpp/src/llama-hparams.cpp +79 -0
  804. package/cpp/llama.cpp/src/llama-hparams.h +161 -0
  805. package/cpp/llama.cpp/src/llama-impl.cpp +167 -0
  806. package/cpp/llama.cpp/src/llama-impl.h +61 -0
  807. package/cpp/llama.cpp/src/llama-io.cpp +15 -0
  808. package/cpp/llama.cpp/src/llama-io.h +35 -0
  809. package/cpp/llama.cpp/src/llama-kv-cache.cpp +2486 -0
  810. package/cpp/llama.cpp/src/llama-kv-cache.h +405 -0
  811. package/cpp/llama.cpp/src/llama-memory.cpp +1 -0
  812. package/cpp/llama.cpp/src/llama-memory.h +31 -0
  813. package/cpp/llama.cpp/src/llama-mmap.cpp +600 -0
  814. package/cpp/llama.cpp/src/llama-mmap.h +68 -0
  815. package/cpp/llama.cpp/src/llama-model-loader.cpp +1133 -0
  816. package/cpp/llama.cpp/src/llama-model-loader.h +169 -0
  817. package/cpp/llama.cpp/src/llama-model.cpp +13453 -0
  818. package/cpp/llama.cpp/src/llama-model.h +420 -0
  819. package/cpp/llama.cpp/src/llama-quant.cpp +964 -0
  820. package/cpp/llama.cpp/src/llama-quant.h +1 -0
  821. package/cpp/llama.cpp/src/llama-sampling.cpp +2575 -0
  822. package/cpp/llama.cpp/src/llama-sampling.h +32 -0
  823. package/cpp/llama.cpp/src/llama-vocab.cpp +3313 -0
  824. package/cpp/llama.cpp/src/llama-vocab.h +125 -0
  825. package/cpp/llama.cpp/src/llama.cpp +340 -0
  826. package/cpp/llama.cpp/src/unicode-data.cpp +7034 -0
  827. package/cpp/llama.cpp/src/unicode-data.h +20 -0
  828. package/cpp/llama.cpp/src/unicode.cpp +849 -0
  829. package/cpp/llama.cpp/src/unicode.h +66 -0
  830. package/cpp/rn-completion.cpp +431 -0
  831. package/cpp/rn-llama.hpp +60 -0
  832. package/cpp/rn-utils.hpp +331 -0
  833. package/ios/OnLoad.mm +22 -0
  834. package/ios/generated/RNLlamaCppSpec/RNLlamaCppSpec-generated.mm +64 -0
  835. package/ios/generated/RNLlamaCppSpec/RNLlamaCppSpec.h +251 -0
  836. package/ios/generated/RNLlamaCppSpecJSI-generated.cpp +42 -0
  837. package/ios/generated/RNLlamaCppSpecJSI.h +336 -0
  838. package/ios/include/chat.h +135 -0
  839. package/ios/include/common/base64.hpp +392 -0
  840. package/ios/include/common/json.hpp +24766 -0
  841. package/ios/include/common/minja/chat-template.hpp +537 -0
  842. package/ios/include/common/minja/minja.hpp +2941 -0
  843. package/ios/include/common.h +668 -0
  844. package/ios/include/json-schema-to-grammar.h +21 -0
  845. package/ios/include/llama-cpp.h +30 -0
  846. package/ios/include/llama.h +1440 -0
  847. package/ios/include/log.h +103 -0
  848. package/ios/include/ngram-cache.h +101 -0
  849. package/ios/include/sampling.h +107 -0
  850. package/ios/include/speculative.h +28 -0
  851. package/ios/libs/llama.xcframework/Info.plist +135 -0
  852. package/ios/libs/llama.xcframework/ios-arm64/dSYMs/llama.dSYM/Contents/Info.plist +20 -0
  853. package/ios/libs/llama.xcframework/ios-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  854. package/ios/libs/llama.xcframework/ios-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4492 -0
  855. package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/ggml-alloc.h +76 -0
  856. package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/ggml-backend.h +354 -0
  857. package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/ggml-blas.h +25 -0
  858. package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/ggml-cpu.h +143 -0
  859. package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/ggml-metal.h +66 -0
  860. package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/ggml.h +2192 -0
  861. package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/gguf.h +202 -0
  862. package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/llama.h +1440 -0
  863. package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Info.plist +36 -0
  864. package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Modules/module.modulemap +17 -0
  865. package/ios/libs/llama.xcframework/ios-arm64/llama.framework/llama +0 -0
  866. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Info.plist +20 -0
  867. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  868. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4513 -0
  869. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3440 -0
  870. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/ggml-alloc.h +76 -0
  871. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/ggml-backend.h +354 -0
  872. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/ggml-blas.h +25 -0
  873. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/ggml-cpu.h +143 -0
  874. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/ggml-metal.h +66 -0
  875. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/ggml.h +2192 -0
  876. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/gguf.h +202 -0
  877. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/llama.h +1440 -0
  878. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Info.plist +36 -0
  879. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Modules/module.modulemap +17 -0
  880. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/llama +0 -0
  881. package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Info.plist +20 -0
  882. package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  883. package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4513 -0
  884. package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3442 -0
  885. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/ggml-alloc.h +76 -0
  886. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/ggml-backend.h +354 -0
  887. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/ggml-blas.h +25 -0
  888. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/ggml-cpu.h +143 -0
  889. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/ggml-metal.h +66 -0
  890. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/ggml.h +2192 -0
  891. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/gguf.h +202 -0
  892. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/llama.h +1440 -0
  893. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Modules/module.modulemap +17 -0
  894. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Resources/Info.plist +32 -0
  895. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/ggml-alloc.h +76 -0
  896. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/ggml-backend.h +354 -0
  897. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/ggml-blas.h +25 -0
  898. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/ggml-cpu.h +143 -0
  899. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/ggml-metal.h +66 -0
  900. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/ggml.h +2192 -0
  901. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/gguf.h +202 -0
  902. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/llama.h +1440 -0
  903. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Modules/module.modulemap +17 -0
  904. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Resources/Info.plist +32 -0
  905. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/llama +0 -0
  906. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/ggml-alloc.h +76 -0
  907. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/ggml-backend.h +354 -0
  908. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/ggml-blas.h +25 -0
  909. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/ggml-cpu.h +143 -0
  910. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/ggml-metal.h +66 -0
  911. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/ggml.h +2192 -0
  912. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/gguf.h +202 -0
  913. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/llama.h +1440 -0
  914. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Modules/module.modulemap +17 -0
  915. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Resources/Info.plist +32 -0
  916. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/llama +0 -0
  917. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/llama +0 -0
  918. package/ios/libs/llama.xcframework/tvos-arm64/dSYMs/llama.dSYM/Contents/Info.plist +20 -0
  919. package/ios/libs/llama.xcframework/tvos-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  920. package/ios/libs/llama.xcframework/tvos-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4492 -0
  921. package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/ggml-alloc.h +76 -0
  922. package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/ggml-backend.h +354 -0
  923. package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/ggml-blas.h +25 -0
  924. package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/ggml-cpu.h +143 -0
  925. package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/ggml-metal.h +66 -0
  926. package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/ggml.h +2192 -0
  927. package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/gguf.h +202 -0
  928. package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/llama.h +1440 -0
  929. package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Info.plist +35 -0
  930. package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Modules/module.modulemap +17 -0
  931. package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/llama +0 -0
  932. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Info.plist +20 -0
  933. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  934. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4513 -0
  935. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3440 -0
  936. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/ggml-alloc.h +76 -0
  937. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/ggml-backend.h +354 -0
  938. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/ggml-blas.h +25 -0
  939. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/ggml-cpu.h +143 -0
  940. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/ggml-metal.h +66 -0
  941. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/ggml.h +2192 -0
  942. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/gguf.h +202 -0
  943. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/llama.h +1440 -0
  944. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Info.plist +35 -0
  945. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Modules/module.modulemap +17 -0
  946. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/llama +0 -0
  947. package/ios/libs/llama.xcframework/xros-arm64/dSYMs/llama.dSYM/Contents/Info.plist +20 -0
  948. package/ios/libs/llama.xcframework/xros-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  949. package/ios/libs/llama.xcframework/xros-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4528 -0
  950. package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/ggml-alloc.h +76 -0
  951. package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/ggml-backend.h +354 -0
  952. package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/ggml-blas.h +25 -0
  953. package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/ggml-cpu.h +143 -0
  954. package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/ggml-metal.h +66 -0
  955. package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/ggml.h +2192 -0
  956. package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/gguf.h +202 -0
  957. package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/llama.h +1440 -0
  958. package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Info.plist +32 -0
  959. package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Modules/module.modulemap +17 -0
  960. package/ios/libs/llama.xcframework/xros-arm64/llama.framework/llama +0 -0
  961. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Info.plist +20 -0
  962. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  963. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4549 -0
  964. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3470 -0
  965. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/ggml-alloc.h +76 -0
  966. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/ggml-backend.h +354 -0
  967. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/ggml-blas.h +25 -0
  968. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/ggml-cpu.h +143 -0
  969. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/ggml-metal.h +66 -0
  970. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/ggml.h +2192 -0
  971. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/gguf.h +202 -0
  972. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/llama.h +1440 -0
  973. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Info.plist +32 -0
  974. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Modules/module.modulemap +17 -0
  975. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/llama +0 -0
  976. package/lib/module/NativeRNLlamaCpp.js +35 -0
  977. package/lib/module/NativeRNLlamaCpp.js.map +1 -0
  978. package/lib/module/index.js +20 -0
  979. package/lib/module/index.js.map +1 -0
  980. package/lib/module/package.json +1 -0
  981. package/lib/typescript/package.json +1 -0
  982. package/lib/typescript/src/NativeRNLlamaCpp.d.ts +222 -0
  983. package/lib/typescript/src/NativeRNLlamaCpp.d.ts.map +1 -0
  984. package/lib/typescript/src/index.d.ts +5 -0
  985. package/lib/typescript/src/index.d.ts.map +1 -0
  986. package/package.json +161 -0
  987. package/react-native.config.js +15 -0
  988. package/src/NativeRNLlamaCpp.ts +282 -0
  989. package/src/index.tsx +54 -0
@@ -0,0 +1,1219 @@
1
+ #include "llama-grammar.h"
2
+
3
+ #include "llama-impl.h"
4
+ #include "llama-vocab.h"
5
+ #include "llama-sampling.h"
6
+
7
+ #include <cmath>
8
+ #include <algorithm>
9
+ #include <stdexcept>
10
+
11
+ //
12
+ // helpers
13
+ //
14
+
15
+ // NOTE: assumes valid utf8 (but checks for overrun)
16
+ static std::pair<uint32_t, const char *> decode_utf8(const char * src) {
17
+ static const int lookup[] = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 3, 4 };
18
+ uint8_t first_byte = static_cast<uint8_t>(*src);
19
+ uint8_t highbits = first_byte >> 4;
20
+ int len = lookup[highbits];
21
+ uint8_t mask = (1 << (8 - len)) - 1;
22
+ uint32_t value = first_byte & mask;
23
+ const char * end = src + len; // may overrun!
24
+ const char * pos = src + 1;
25
+ for ( ; pos < end && *pos; pos++) {
26
+ value = (value << 6) + (static_cast<uint8_t>(*pos) & 0x3F);
27
+ }
28
+ return std::make_pair(value, pos);
29
+ }
30
+
31
+ static std::pair<std::vector<uint32_t>, llama_partial_utf8> decode_utf8(
32
+ const std::string & src,
33
+ llama_partial_utf8 partial_start) {
34
+ static const int lookup[] = { 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 2, 2, 3, 4 };
35
+ const char * pos = src.c_str();
36
+ std::vector<uint32_t> code_points;
37
+
38
+ // common english strings have the same number of codepoints and bytes. `+ 1` for the terminating 0.
39
+ code_points.reserve(src.size() + 1);
40
+ uint32_t value = partial_start.value;
41
+ int n_remain = partial_start.n_remain;
42
+
43
+ // continue previous decode, if applicable
44
+ while (*pos != 0 && n_remain > 0) {
45
+ uint8_t next_byte = static_cast<uint8_t>(*pos);
46
+ if ((next_byte >> 6) != 2) {
47
+ // invalid sequence, abort
48
+ code_points.push_back(0);
49
+ return std::make_pair(std::move(code_points), llama_partial_utf8{ 0, -1 });
50
+ }
51
+ value = (value << 6) + (next_byte & 0x3F);
52
+ ++pos;
53
+ --n_remain;
54
+ }
55
+
56
+ if (partial_start.n_remain > 0 && n_remain == 0) {
57
+ code_points.push_back(value);
58
+ }
59
+
60
+ // decode any subsequent utf-8 sequences, which may end in an incomplete one
61
+ while (*pos != 0) {
62
+ uint8_t first_byte = static_cast<uint8_t>(*pos);
63
+ uint8_t highbits = first_byte >> 4;
64
+ n_remain = lookup[highbits] - 1;
65
+
66
+ if (n_remain < 0) {
67
+ // invalid sequence, abort
68
+ code_points.clear();
69
+ code_points.push_back(0);
70
+ return std::make_pair(std::move(code_points), llama_partial_utf8{ 0, n_remain });
71
+ }
72
+
73
+ uint8_t mask = (1 << (7 - n_remain)) - 1;
74
+ value = first_byte & mask;
75
+
76
+ ++pos;
77
+ while (*pos != 0 && n_remain > 0) {
78
+ value = (value << 6) + (static_cast<uint8_t>(*pos) & 0x3F);
79
+ ++pos;
80
+ --n_remain;
81
+ }
82
+ if (n_remain == 0) {
83
+ code_points.push_back(value);
84
+ }
85
+ }
86
+ code_points.push_back(0);
87
+
88
+ return std::make_pair(std::move(code_points), llama_partial_utf8{ value, n_remain });
89
+ }
90
+
91
+ static bool is_digit_char(char c) {
92
+ return '0' <= c && c <= '9';
93
+ }
94
+
95
+ static bool is_word_char(char c) {
96
+ return ('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z') || c == '-' || is_digit_char(c);
97
+ }
98
+
99
+ static std::pair<uint32_t, const char *> parse_hex(const char * src, int size) {
100
+ const char * pos = src;
101
+ const char * end = src + size;
102
+ uint32_t value = 0;
103
+ for ( ; pos < end && *pos; pos++) {
104
+ value <<= 4;
105
+ char c = *pos;
106
+ if ('a' <= c && c <= 'f') {
107
+ value += c - 'a' + 10;
108
+ } else if ('A' <= c && c <= 'F') {
109
+ value += c - 'A' + 10;
110
+ } else if ('0' <= c && c <= '9') {
111
+ value += c - '0';
112
+ } else {
113
+ break;
114
+ }
115
+ }
116
+ if (pos != end) {
117
+ throw std::runtime_error("expecting " + std::to_string(size) + " hex chars at " + src);
118
+ }
119
+ return std::make_pair(value, pos);
120
+ }
121
+
122
+ static const char * parse_space(const char * src, bool newline_ok) {
123
+ const char * pos = src;
124
+ while (*pos == ' ' || *pos == '\t' || *pos == '#' ||
125
+ (newline_ok && (*pos == '\r' || *pos == '\n'))) {
126
+ if (*pos == '#') {
127
+ while (*pos && *pos != '\r' && *pos != '\n') {
128
+ pos++;
129
+ }
130
+ } else {
131
+ pos++;
132
+ }
133
+ }
134
+ return pos;
135
+ }
136
+
137
+ static const char * parse_name(const char * src) {
138
+ const char * pos = src;
139
+ while (is_word_char(*pos)) {
140
+ pos++;
141
+ }
142
+ if (pos == src) {
143
+ throw std::runtime_error(std::string("expecting name at ") + src);
144
+ }
145
+ return pos;
146
+ }
147
+
148
+ static const char * parse_int(const char * src) {
149
+ const char * pos = src;
150
+ while (is_digit_char(*pos)) {
151
+ pos++;
152
+ }
153
+ if (pos == src) {
154
+ throw std::runtime_error(std::string("expecting integer at ") + src);
155
+ }
156
+ return pos;
157
+ }
158
+
159
+ static std::pair<uint32_t, const char *> parse_char(const char * src) {
160
+ if (*src == '\\') {
161
+ switch (src[1]) {
162
+ case 'x': return parse_hex(src + 2, 2);
163
+ case 'u': return parse_hex(src + 2, 4);
164
+ case 'U': return parse_hex(src + 2, 8);
165
+ case 't': return std::make_pair('\t', src + 2);
166
+ case 'r': return std::make_pair('\r', src + 2);
167
+ case 'n': return std::make_pair('\n', src + 2);
168
+ case '\\':
169
+ case '"':
170
+ case '[':
171
+ case ']':
172
+ return std::make_pair(src[1], src + 2);
173
+ default:
174
+ throw std::runtime_error(std::string("unknown escape at ") + src);
175
+ }
176
+ } else if (*src) {
177
+ return decode_utf8(src);
178
+ }
179
+ throw std::runtime_error("unexpected end of input");
180
+ }
181
+
182
+ static void print_grammar_char(FILE * file, uint32_t c) {
183
+ if (0x20 <= c && c <= 0x7f) {
184
+ fprintf(file, "%c", static_cast<char>(c));
185
+ } else {
186
+ // cop out of encoding UTF-8
187
+ fprintf(file, "<U+%04X>", c);
188
+ }
189
+ }
190
+
191
+ static bool is_char_element(llama_grammar_element elem) {
192
+ switch (elem.type) {
193
+ case LLAMA_GRETYPE_CHAR: return true;
194
+ case LLAMA_GRETYPE_CHAR_NOT: return true;
195
+ case LLAMA_GRETYPE_CHAR_ALT: return true;
196
+ case LLAMA_GRETYPE_CHAR_RNG_UPPER: return true;
197
+ case LLAMA_GRETYPE_CHAR_ANY: return true;
198
+ default: return false;
199
+ }
200
+ }
201
+
202
+ static void print_rule_binary(FILE * file, const llama_grammar_rule & rule) {
203
+ for (auto elem : rule) {
204
+ switch (elem.type) {
205
+ case LLAMA_GRETYPE_END: fprintf(file, "END"); break;
206
+ case LLAMA_GRETYPE_ALT: fprintf(file, "ALT"); break;
207
+ case LLAMA_GRETYPE_RULE_REF: fprintf(file, "RULE_REF"); break;
208
+ case LLAMA_GRETYPE_CHAR: fprintf(file, "CHAR"); break;
209
+ case LLAMA_GRETYPE_CHAR_NOT: fprintf(file, "CHAR_NOT"); break;
210
+ case LLAMA_GRETYPE_CHAR_RNG_UPPER: fprintf(file, "CHAR_RNG_UPPER"); break;
211
+ case LLAMA_GRETYPE_CHAR_ALT: fprintf(file, "CHAR_ALT"); break;
212
+ case LLAMA_GRETYPE_CHAR_ANY: fprintf(file, "CHAR_ANY"); break;
213
+ }
214
+ switch (elem.type) {
215
+ case LLAMA_GRETYPE_END:
216
+ case LLAMA_GRETYPE_ALT:
217
+ case LLAMA_GRETYPE_RULE_REF:
218
+ fprintf(file, "(%u) ", elem.value);
219
+ break;
220
+ case LLAMA_GRETYPE_CHAR:
221
+ case LLAMA_GRETYPE_CHAR_NOT:
222
+ case LLAMA_GRETYPE_CHAR_RNG_UPPER:
223
+ case LLAMA_GRETYPE_CHAR_ALT:
224
+ case LLAMA_GRETYPE_CHAR_ANY:
225
+ fprintf(file, "(\"");
226
+ print_grammar_char(file, elem.value);
227
+ fprintf(file, "\") ");
228
+ break;
229
+ }
230
+ }
231
+ fprintf(file, "\n");
232
+ }
233
+
234
+ static void print_rule(
235
+ FILE * file,
236
+ uint32_t rule_id,
237
+ const llama_grammar_rule & rule,
238
+ const std::map<uint32_t, std::string> & symbol_id_names) {
239
+ if (rule.empty() || rule.back().type != LLAMA_GRETYPE_END) {
240
+ throw std::runtime_error(
241
+ "malformed rule, does not end with LLAMA_GRETYPE_END: " + std::to_string(rule_id));
242
+ }
243
+ fprintf(file, "%s ::= ", symbol_id_names.at(rule_id).c_str());
244
+ for (size_t i = 0, end = rule.size() - 1; i < end; i++) {
245
+ llama_grammar_element elem = rule[i];
246
+ switch (elem.type) {
247
+ case LLAMA_GRETYPE_END:
248
+ throw std::runtime_error(
249
+ "unexpected end of rule: " + std::to_string(rule_id) + "," +
250
+ std::to_string(i));
251
+ case LLAMA_GRETYPE_ALT:
252
+ fprintf(file, "| ");
253
+ break;
254
+ case LLAMA_GRETYPE_RULE_REF:
255
+ fprintf(file, "%s ", symbol_id_names.at(elem.value).c_str());
256
+ break;
257
+ case LLAMA_GRETYPE_CHAR:
258
+ fprintf(file, "[");
259
+ print_grammar_char(file, elem.value);
260
+ break;
261
+ case LLAMA_GRETYPE_CHAR_NOT:
262
+ fprintf(file, "[^");
263
+ print_grammar_char(file, elem.value);
264
+ break;
265
+ case LLAMA_GRETYPE_CHAR_RNG_UPPER:
266
+ if (i == 0 || !is_char_element(rule[i - 1])) {
267
+ throw std::runtime_error(
268
+ "LLAMA_GRETYPE_CHAR_RNG_UPPER without preceding char: " +
269
+ std::to_string(rule_id) + "," + std::to_string(i));
270
+ }
271
+ fprintf(file, "-");
272
+ print_grammar_char(file, elem.value);
273
+ break;
274
+ case LLAMA_GRETYPE_CHAR_ALT:
275
+ if (i == 0 || !is_char_element(rule[i - 1])) {
276
+ throw std::runtime_error(
277
+ "LLAMA_GRETYPE_CHAR_ALT without preceding char: " +
278
+ std::to_string(rule_id) + "," + std::to_string(i));
279
+ }
280
+ print_grammar_char(file, elem.value);
281
+ break;
282
+ case LLAMA_GRETYPE_CHAR_ANY:
283
+ fprintf(file, ".");
284
+ break;
285
+ }
286
+ if (is_char_element(elem)) {
287
+ switch (rule[i + 1].type) {
288
+ case LLAMA_GRETYPE_CHAR_ALT:
289
+ case LLAMA_GRETYPE_CHAR_RNG_UPPER:
290
+ case LLAMA_GRETYPE_CHAR_ANY:
291
+ break;
292
+ default:
293
+ fprintf(file, "] ");
294
+ }
295
+ }
296
+ }
297
+ fprintf(file, "\n");
298
+ }
299
+
300
+ //
301
+ // implementation
302
+ //
303
+
304
+ uint32_t llama_grammar_parser::get_symbol_id(const char * src, size_t len) {
305
+ uint32_t next_id = static_cast<uint32_t>(symbol_ids.size());
306
+ auto result = symbol_ids.emplace(std::string(src, len), next_id);
307
+ return result.first->second;
308
+ }
309
+
310
+ uint32_t llama_grammar_parser::generate_symbol_id(const std::string & base_name) {
311
+ uint32_t next_id = static_cast<uint32_t>(symbol_ids.size());
312
+ symbol_ids[base_name + '_' + std::to_string(next_id)] = next_id;
313
+ return next_id;
314
+ }
315
+
316
+ void llama_grammar_parser::add_rule(uint32_t rule_id, const llama_grammar_rule & rule) {
317
+ if (rules.size() <= rule_id) {
318
+ rules.resize(rule_id + 1);
319
+ }
320
+ rules[rule_id] = rule;
321
+ }
322
+
323
+ const char * llama_grammar_parser::parse_alternates(
324
+ const char * src,
325
+ const std::string & rule_name,
326
+ uint32_t rule_id,
327
+ bool is_nested) {
328
+ llama_grammar_rule rule;
329
+ const char * pos = parse_sequence(src, rule_name, rule, is_nested);
330
+ while (*pos == '|') {
331
+ rule.push_back({LLAMA_GRETYPE_ALT, 0});
332
+ pos = parse_space(pos + 1, true);
333
+ pos = parse_sequence(pos, rule_name, rule, is_nested);
334
+ }
335
+ rule.push_back({LLAMA_GRETYPE_END, 0});
336
+ add_rule(rule_id, rule);
337
+ return pos;
338
+ }
339
+
340
+ const char * llama_grammar_parser::parse_sequence(
341
+ const char * src,
342
+ const std::string & rule_name,
343
+ llama_grammar_rule & rule,
344
+ bool is_nested) {
345
+ size_t last_sym_start = rule.size();
346
+ const char * pos = src;
347
+
348
+ auto handle_repetitions = [&](int min_times, int max_times) {
349
+
350
+ if (last_sym_start == rule.size()) {
351
+ throw std::runtime_error(std::string("expecting preceding item to */+/?/{ at ") + pos);
352
+ }
353
+
354
+ // apply transformation to previous symbol (last_sym_start to end) according to
355
+ // the following rewrite rules:
356
+ // S{m,n} --> S S S (m times) S'(n-m)
357
+ // S'(x) ::= S S'(x-1) |
358
+ // (... n-m definitions of these S' rules ...)
359
+ // S'(1) ::= S |
360
+ // S{m,} --> S S S (m times) S'
361
+ // S' ::= S S' |
362
+ // S* --> S{0,}
363
+ // --> S' ::= S S' |
364
+ // S+ --> S{1,}
365
+ // --> S S'
366
+ // S' ::= S S' |
367
+ // S? --> S{0,1}
368
+ // --> S'
369
+ // S' ::= S |
370
+
371
+ llama_grammar_rule prev_rule(rule.begin() + last_sym_start, rule.end());
372
+ if (min_times == 0) {
373
+ rule.resize(last_sym_start);
374
+ } else {
375
+ // Repeat the previous elements (min_times - 1) times
376
+ for (int i = 1; i < min_times; i++) {
377
+ rule.insert(rule.end(), prev_rule.begin(), prev_rule.end());
378
+ }
379
+ }
380
+
381
+ uint32_t last_rec_rule_id = 0;
382
+ auto n_opt = max_times < 0 ? 1 : max_times - min_times;
383
+
384
+ llama_grammar_rule rec_rule(prev_rule);
385
+ for (int i = 0; i < n_opt; i++) {
386
+ rec_rule.resize(prev_rule.size());
387
+ uint32_t rec_rule_id = generate_symbol_id( rule_name);
388
+ if (i > 0 || max_times < 0) {
389
+ rec_rule.push_back({LLAMA_GRETYPE_RULE_REF, max_times < 0 ? rec_rule_id : last_rec_rule_id});
390
+ }
391
+ rec_rule.push_back({LLAMA_GRETYPE_ALT, 0});
392
+ rec_rule.push_back({LLAMA_GRETYPE_END, 0});
393
+ add_rule( rec_rule_id, rec_rule);
394
+ last_rec_rule_id = rec_rule_id;
395
+ }
396
+ if (n_opt > 0) {
397
+ rule.push_back({LLAMA_GRETYPE_RULE_REF, last_rec_rule_id});
398
+ }
399
+ };
400
+
401
+ while (*pos) {
402
+ if (*pos == '"') { // literal string
403
+ pos++;
404
+ last_sym_start = rule.size();
405
+ while (*pos != '"') {
406
+ if (!*pos) {
407
+ throw std::runtime_error("unexpected end of input");
408
+ }
409
+ auto char_pair = parse_char(pos);
410
+ pos = char_pair.second;
411
+ rule.push_back({LLAMA_GRETYPE_CHAR, char_pair.first});
412
+ }
413
+ pos = parse_space(pos + 1, is_nested);
414
+ } else if (*pos == '[') { // char range(s)
415
+ pos++;
416
+ enum llama_gretype start_type = LLAMA_GRETYPE_CHAR;
417
+ if (*pos == '^') {
418
+ pos++;
419
+ start_type = LLAMA_GRETYPE_CHAR_NOT;
420
+ }
421
+ last_sym_start = rule.size();
422
+ while (*pos != ']') {
423
+ if (!*pos) {
424
+ throw std::runtime_error("unexpected end of input");
425
+ }
426
+ auto char_pair = parse_char(pos);
427
+ pos = char_pair.second;
428
+ enum llama_gretype type = last_sym_start < rule.size()
429
+ ? LLAMA_GRETYPE_CHAR_ALT
430
+ : start_type;
431
+
432
+ rule.push_back({type, char_pair.first});
433
+ if (pos[0] == '-' && pos[1] != ']') {
434
+ if (!pos[1]) {
435
+ throw std::runtime_error("unexpected end of input");
436
+ }
437
+ auto endchar_pair = parse_char(pos + 1);
438
+ pos = endchar_pair.second;
439
+ rule.push_back({LLAMA_GRETYPE_CHAR_RNG_UPPER, endchar_pair.first});
440
+ }
441
+ }
442
+ pos = parse_space(pos + 1, is_nested);
443
+ } else if (is_word_char(*pos)) { // rule reference
444
+ const char * name_end = parse_name(pos);
445
+ uint32_t ref_rule_id = get_symbol_id(pos, name_end - pos);
446
+ pos = parse_space(name_end, is_nested);
447
+ last_sym_start = rule.size();
448
+ rule.push_back({LLAMA_GRETYPE_RULE_REF, ref_rule_id});
449
+ } else if (*pos == '(') { // grouping
450
+ // parse nested alternates into synthesized rule
451
+ pos = parse_space(pos + 1, true);
452
+ uint32_t sub_rule_id = generate_symbol_id(rule_name);
453
+ pos = parse_alternates(pos, rule_name, sub_rule_id, true);
454
+ last_sym_start = rule.size();
455
+ // output reference to synthesized rule
456
+ rule.push_back({LLAMA_GRETYPE_RULE_REF, sub_rule_id});
457
+ if (*pos != ')') {
458
+ throw std::runtime_error(std::string("expecting ')' at ") + pos);
459
+ }
460
+ pos = parse_space(pos + 1, is_nested);
461
+ } else if (*pos == '.') { // any char
462
+ last_sym_start = rule.size();
463
+ rule.push_back({LLAMA_GRETYPE_CHAR_ANY, 0});
464
+ pos = parse_space(pos + 1, is_nested);
465
+ } else if (*pos == '*') {
466
+ pos = parse_space(pos + 1, is_nested);
467
+ handle_repetitions(0, -1);
468
+ } else if (*pos == '+') {
469
+ pos = parse_space(pos + 1, is_nested);
470
+ handle_repetitions(1, -1);
471
+ } else if (*pos == '?') {
472
+ pos = parse_space(pos + 1, is_nested);
473
+ handle_repetitions(0, 1);
474
+ } else if (*pos == '{') {
475
+ pos = parse_space(pos + 1, is_nested);
476
+
477
+ if (!is_digit_char(*pos)) {
478
+ throw std::runtime_error(std::string("expecting an int at ") + pos);
479
+ }
480
+ const char * int_end = parse_int(pos);
481
+ int min_times = std::stoul(std::string(pos, int_end - pos));
482
+ pos = parse_space(int_end, is_nested);
483
+
484
+ int max_times = -1;
485
+
486
+ if (*pos == '}') {
487
+ max_times = min_times;
488
+ pos = parse_space(pos + 1, is_nested);
489
+ } else if (*pos == ',') {
490
+ pos = parse_space(pos + 1, is_nested);
491
+
492
+ if (is_digit_char(*pos)) {
493
+ const char * int_end = parse_int(pos);
494
+ max_times = std::stoul(std::string(pos, int_end - pos));
495
+ pos = parse_space(int_end, is_nested);
496
+ }
497
+
498
+ if (*pos != '}') {
499
+ throw std::runtime_error(std::string("expecting '}' at ") + pos);
500
+ }
501
+ pos = parse_space(pos + 1, is_nested);
502
+ } else {
503
+ throw std::runtime_error(std::string("expecting ',' at ") + pos);
504
+ }
505
+ handle_repetitions(min_times, max_times);
506
+ } else {
507
+ break;
508
+ }
509
+ }
510
+ return pos;
511
+ }
512
+
513
+ const char * llama_grammar_parser::parse_rule(const char * src) {
514
+ const char * name_end = parse_name(src);
515
+ const char * pos = parse_space(name_end, false);
516
+ size_t name_len = name_end - src;
517
+ uint32_t rule_id = get_symbol_id(src, name_len);
518
+ const std::string name(src, name_len);
519
+
520
+ if (!(pos[0] == ':' && pos[1] == ':' && pos[2] == '=')) {
521
+ throw std::runtime_error(std::string("expecting ::= at ") + pos);
522
+ }
523
+ pos = parse_space(pos + 3, true);
524
+
525
+ pos = parse_alternates(pos, name, rule_id, false);
526
+
527
+ if (*pos == '\r') {
528
+ pos += pos[1] == '\n' ? 2 : 1;
529
+ } else if (*pos == '\n') {
530
+ pos++;
531
+ } else if (*pos) {
532
+ throw std::runtime_error(std::string("expecting newline or end at ") + pos);
533
+ }
534
+ return parse_space(pos, true);
535
+ }
536
+
537
+ bool llama_grammar_parser::parse(const char * src) {
538
+ try {
539
+ const char * pos = parse_space(src, true);
540
+ while (*pos) {
541
+ pos = parse_rule(pos);
542
+ }
543
+ // Validate the state to ensure that all rules are defined
544
+ for (const auto & rule : rules) {
545
+ if (rule.empty()) {
546
+ throw std::runtime_error("Undefined rule");
547
+ }
548
+ for (const auto & elem : rule) {
549
+ if (elem.type == LLAMA_GRETYPE_RULE_REF) {
550
+ // Ensure that the rule at that location exists
551
+ if (elem.value >= rules.size() || rules[elem.value].empty()) {
552
+ // Get the name of the rule that is missing
553
+ for (const auto & kv : symbol_ids) {
554
+ if (kv.second == elem.value) {
555
+ throw std::runtime_error("Undefined rule identifier '" + kv.first + "'");
556
+ }
557
+ }
558
+ }
559
+ }
560
+ }
561
+ }
562
+ } catch (const std::exception & err) {
563
+ fprintf(stderr, "%s: error parsing grammar: %s\n\n%s\n", __func__, err.what(), src);
564
+ rules.clear();
565
+ return false;
566
+ }
567
+
568
+ return true;
569
+ }
570
+
571
+ void llama_grammar_parser::print(FILE * file) {
572
+ try {
573
+ std::map<uint32_t, std::string> symbol_id_names;
574
+ for (const auto & kv : symbol_ids) {
575
+ symbol_id_names[kv.second] = kv.first;
576
+ }
577
+ for (size_t i = 0, end = rules.size(); i < end; i++) {
578
+ // fprintf(file, "%zu: ", i);
579
+ // print_rule_binary(file, rules[i]);
580
+ print_rule(file, uint32_t(i), rules[i], symbol_id_names);
581
+ // fprintf(file, "\n");
582
+ }
583
+ } catch (const std::exception & err) {
584
+ fprintf(stderr, "\n%s: error printing grammar: %s\n", __func__, err.what());
585
+ }
586
+ }
587
+
588
+ llama_grammar_stack llama_grammar_parser::c_rules() const {
589
+ llama_grammar_stack ret;
590
+ ret.reserve(rules.size());
591
+ for (const auto & rule : rules) {
592
+ ret.push_back(rule.data());
593
+ }
594
+ return ret;
595
+ }
596
+
597
+ // returns true iff pos points to the end of one of the definitions of a rule
598
+ static bool llama_grammar_is_end_of_sequence(const llama_grammar_element * pos) {
599
+ switch (pos->type) {
600
+ case LLAMA_GRETYPE_END: return true; // NOLINT
601
+ case LLAMA_GRETYPE_ALT: return true; // NOLINT
602
+ default: return false;
603
+ }
604
+ }
605
+
606
+ // returns true iff chr satisfies the char range at pos (regular or inverse range)
607
+ // asserts that pos is pointing to a char range element
608
+ static std::pair<bool, const llama_grammar_element *> llama_grammar_match_char(
609
+ const llama_grammar_element * pos,
610
+ const uint32_t chr) {
611
+ bool found = false;
612
+ bool is_positive_char = pos->type == LLAMA_GRETYPE_CHAR || pos->type == LLAMA_GRETYPE_CHAR_ANY;
613
+
614
+ GGML_ASSERT(is_positive_char || pos->type == LLAMA_GRETYPE_CHAR_NOT); // NOLINT
615
+
616
+ do {
617
+ if (pos[1].type == LLAMA_GRETYPE_CHAR_RNG_UPPER) {
618
+ // inclusive range, e.g. [a-z]
619
+ found = found || (pos->value <= chr && chr <= pos[1].value);
620
+ pos += 2;
621
+ } else if (pos->type == LLAMA_GRETYPE_CHAR_ANY) {
622
+ // Any character matches "."
623
+ found = true;
624
+ pos += 1;
625
+ } else {
626
+ // exact char match, e.g. [a] or "a"
627
+ found = found || pos->value == chr;
628
+ pos += 1;
629
+ }
630
+ } while (pos->type == LLAMA_GRETYPE_CHAR_ALT);
631
+
632
+ return std::make_pair(found == is_positive_char, pos);
633
+ }
634
+
635
+ // returns true iff some continuation of the given partial UTF-8 sequence could satisfy the char
636
+ // range at pos (regular or inverse range)
637
+ // asserts that pos is pointing to a char range element
638
+ static bool llama_grammar_match_partial_char(
639
+ const llama_grammar_element * pos,
640
+ const llama_partial_utf8 partial_utf8) {
641
+ bool is_positive_char = pos->type == LLAMA_GRETYPE_CHAR || pos->type == LLAMA_GRETYPE_CHAR_ANY;
642
+ GGML_ASSERT(is_positive_char || pos->type == LLAMA_GRETYPE_CHAR_NOT);
643
+
644
+ uint32_t partial_value = partial_utf8.value;
645
+ int n_remain = partial_utf8.n_remain;
646
+
647
+ // invalid sequence or 7-bit char split across 2 bytes (overlong)
648
+ if (n_remain < 0 || (n_remain == 1 && partial_value < 2)) {
649
+ return false;
650
+ }
651
+
652
+ // range of possible code points this partial UTF-8 sequence could complete to
653
+ uint32_t low = partial_value << (n_remain * 6);
654
+ uint32_t high = low | ((1 << (n_remain * 6)) - 1);
655
+
656
+ if (low == 0) {
657
+ if (n_remain == 2) {
658
+ low = 1 << 11;
659
+ } else if (n_remain == 3) {
660
+ low = 1 << 16;
661
+ }
662
+ }
663
+
664
+ do {
665
+ if (pos[1].type == LLAMA_GRETYPE_CHAR_RNG_UPPER) {
666
+ // inclusive range, e.g. [a-z]
667
+ if (pos->value <= high && low <= pos[1].value) {
668
+ return is_positive_char;
669
+ }
670
+ pos += 2;
671
+ } else if (pos->type == LLAMA_GRETYPE_CHAR_ANY) {
672
+ // Any character matches "."
673
+ return true;
674
+ } else {
675
+ // exact char match, e.g. [a] or "a"
676
+ if (low <= pos->value && pos->value <= high) {
677
+ return is_positive_char;
678
+ }
679
+ pos += 1;
680
+ }
681
+ } while (pos->type == LLAMA_GRETYPE_CHAR_ALT);
682
+
683
+ return !is_positive_char;
684
+ }
685
+
686
+ // transforms a grammar pushdown stack into N possible stacks, all ending
687
+ // at a character range (terminal element)
688
+ static void llama_grammar_advance_stack(
689
+ const llama_grammar_rules & rules,
690
+ const llama_grammar_stack & stack,
691
+ llama_grammar_stacks & new_stacks) {
692
+ if (stack.empty()) {
693
+ if (std::find(new_stacks.begin(), new_stacks.end(), stack) == new_stacks.end()) {
694
+ new_stacks.emplace_back(stack);
695
+ }
696
+ return;
697
+ }
698
+
699
+ const llama_grammar_element * pos = stack.back();
700
+
701
+ switch (pos->type) {
702
+ case LLAMA_GRETYPE_RULE_REF: {
703
+ const size_t rule_id = static_cast<size_t>(pos->value);
704
+ const llama_grammar_element * subpos = rules[rule_id].data();
705
+ do {
706
+ // init new stack without the top (pos)
707
+ llama_grammar_stack new_stack(stack.begin(), stack.end() - 1);
708
+ if (!llama_grammar_is_end_of_sequence(pos + 1)) {
709
+ // if this rule ref is followed by another element, add that to stack
710
+ new_stack.push_back(pos + 1);
711
+ }
712
+ if (!llama_grammar_is_end_of_sequence(subpos)) {
713
+ // if alternate is nonempty, add to stack
714
+ new_stack.push_back(subpos);
715
+ }
716
+ llama_grammar_advance_stack(rules, new_stack, new_stacks);
717
+ while (!llama_grammar_is_end_of_sequence(subpos)) {
718
+ // scan to end of alternate def
719
+ subpos++;
720
+ }
721
+ if (subpos->type == LLAMA_GRETYPE_ALT) {
722
+ // there's another alternate def of this rule to process
723
+ subpos++;
724
+ } else {
725
+ break;
726
+ }
727
+ } while (true);
728
+ break;
729
+ }
730
+ case LLAMA_GRETYPE_CHAR:
731
+ case LLAMA_GRETYPE_CHAR_NOT:
732
+ case LLAMA_GRETYPE_CHAR_ANY:
733
+ if (std::find(new_stacks.begin(), new_stacks.end(), stack) == new_stacks.end()) {
734
+ // only add the stack if it's not a duplicate of one we already have
735
+ new_stacks.emplace_back(stack);
736
+ }
737
+ break;
738
+ default:
739
+ // end of alternate (LLAMA_GRETYPE_END, LLAMA_GRETYPE_ALT) or middle of char range
740
+ // (LLAMA_GRETYPE_CHAR_ALT, LLAMA_GRETYPE_CHAR_RNG_UPPER); stack should never be left on
741
+ // those
742
+ GGML_ABORT("fatal error");
743
+ }
744
+ }
745
+
746
+ static llama_grammar_candidates llama_grammar_reject_candidates(
747
+ const llama_grammar_rules & rules,
748
+ const llama_grammar_stacks & stacks,
749
+ const llama_grammar_candidates & candidates) {
750
+ GGML_ASSERT(!stacks.empty()); // REVIEW
751
+
752
+ if (candidates.empty()) {
753
+ return {};
754
+ }
755
+
756
+ auto rejects = llama_grammar_reject_candidates_for_stack(rules, stacks.front(), candidates);
757
+
758
+ for (size_t i = 1, size = stacks.size(); i < size; ++i) {
759
+ rejects = llama_grammar_reject_candidates_for_stack(rules, stacks[i], rejects);
760
+ }
761
+
762
+ return rejects;
763
+ }
764
+
765
+ static bool llama_grammar_detect_left_recursion(
766
+ const llama_grammar_rules & rules,
767
+ size_t rule_index,
768
+ std::vector<bool> * rules_visited,
769
+ std::vector<bool> * rules_in_progress,
770
+ std::vector<bool> * rules_may_be_empty) {
771
+ if ((*rules_in_progress)[rule_index]) {
772
+ return true;
773
+ }
774
+
775
+ (*rules_in_progress)[rule_index] = true;
776
+
777
+ const llama_grammar_rule & rule = rules[rule_index];
778
+
779
+ // First check if the rule might produce the empty string. This could be done combined with the second
780
+ // step but it's more readable as two steps.
781
+ bool at_rule_start = true;
782
+ for (size_t i = 0; i < rule.size(); i++) {
783
+ if (llama_grammar_is_end_of_sequence(&rule[i])) {
784
+ if (at_rule_start) {
785
+ (*rules_may_be_empty)[rule_index] = true;
786
+ break;
787
+ }
788
+ at_rule_start = true;
789
+ } else {
790
+ at_rule_start = false;
791
+ }
792
+ }
793
+
794
+ // Second, recurse into leftmost nonterminals (or next-leftmost as long as the previous nonterminal may
795
+ // be empty)
796
+ bool recurse_into_nonterminal = true;
797
+ for (size_t i = 0; i < rule.size(); i++) {
798
+ if (rule[i].type == LLAMA_GRETYPE_RULE_REF && recurse_into_nonterminal) {
799
+ if (llama_grammar_detect_left_recursion(rules, (size_t)rule[i].value, rules_visited, rules_in_progress, rules_may_be_empty)) {
800
+ return true;
801
+ }
802
+ if (!((*rules_may_be_empty)[(size_t)rule[i].value])) {
803
+ recurse_into_nonterminal = false;
804
+ }
805
+ } else if (llama_grammar_is_end_of_sequence(&rule[i])) {
806
+ recurse_into_nonterminal = true;
807
+ } else {
808
+ recurse_into_nonterminal = false;
809
+ }
810
+ }
811
+
812
+ (*rules_in_progress)[rule_index] = false;
813
+ (*rules_visited)[rule_index] = true;
814
+
815
+ return false;
816
+ }
817
+
818
+ const llama_grammar_rules & llama_grammar_get_rules(const struct llama_grammar * grammar) {
819
+ return grammar->rules;
820
+ }
821
+
822
+ llama_grammar_stacks & llama_grammar_get_stacks(struct llama_grammar * grammar) {
823
+ return grammar->stacks;
824
+ }
825
+
826
+ void llama_grammar_accept(struct llama_grammar * grammar, uint32_t chr) {
827
+ llama_grammar_stacks stacks_new;
828
+ stacks_new.reserve(grammar->stacks.size());
829
+
830
+ for (const auto & stack : grammar->stacks) {
831
+ if (stack.empty()) {
832
+ continue;
833
+ }
834
+
835
+ auto match = llama_grammar_match_char(stack.back(), chr);
836
+ if (match.first) {
837
+ const llama_grammar_element * pos = match.second;
838
+
839
+ // update top of stack to next element, if any
840
+ llama_grammar_stack new_stack(stack.begin(), stack.end() - 1);
841
+ if (!llama_grammar_is_end_of_sequence(pos)) {
842
+ new_stack.push_back(pos);
843
+ }
844
+ llama_grammar_advance_stack(grammar->rules, new_stack, stacks_new);
845
+ }
846
+ }
847
+
848
+ grammar->stacks = std::move(stacks_new);
849
+ }
850
+
851
+ llama_grammar_candidates llama_grammar_reject_candidates_for_stack(
852
+ const llama_grammar_rules & rules,
853
+ const llama_grammar_stack & stack,
854
+ const llama_grammar_candidates & candidates) {
855
+
856
+ llama_grammar_candidates rejects;
857
+ rejects.reserve(candidates.size());
858
+
859
+ if (stack.empty()) {
860
+ for (const auto & tok : candidates) {
861
+ if (*tok.code_points != 0 || tok.partial_utf8.n_remain != 0) {
862
+ rejects.push_back(tok);
863
+ }
864
+ }
865
+ return rejects;
866
+ }
867
+
868
+ const llama_grammar_element * stack_pos = stack.back();
869
+
870
+ llama_grammar_candidates next_candidates;
871
+ next_candidates.reserve(candidates.size());
872
+
873
+ for (const auto & tok : candidates) {
874
+ if (*tok.code_points == 0) {
875
+ // reached end of full codepoints in token, reject iff it ended in a partial sequence
876
+ // that cannot satisfy this position in grammar
877
+ if (tok.partial_utf8.n_remain != 0 &&
878
+ !llama_grammar_match_partial_char(stack_pos, tok.partial_utf8)) {
879
+ rejects.push_back(tok);
880
+ }
881
+ } else if (llama_grammar_match_char(stack_pos, *tok.code_points).first) {
882
+ next_candidates.push_back({ tok.index, tok.code_points + 1, tok.partial_utf8 });
883
+ } else {
884
+ rejects.push_back(tok);
885
+ }
886
+ }
887
+
888
+ const auto * stack_pos_after = llama_grammar_match_char(stack_pos, 0).second;
889
+
890
+ // update top of stack to next element, if any
891
+ llama_grammar_stack stack_after(stack.begin(), stack.end() - 1);
892
+ if (!llama_grammar_is_end_of_sequence(stack_pos_after)) {
893
+ stack_after.push_back(stack_pos_after);
894
+ }
895
+ llama_grammar_stacks next_stacks;
896
+ llama_grammar_advance_stack(rules, stack_after, next_stacks);
897
+
898
+ auto next_rejects = llama_grammar_reject_candidates(rules, next_stacks, next_candidates);
899
+ for (const auto & tok : next_rejects) {
900
+ rejects.push_back({ tok.index, tok.code_points - 1, tok.partial_utf8 });
901
+ }
902
+
903
+ return rejects;
904
+ }
905
+
906
+ ////////////////////
907
+
908
+ struct llama_grammar * llama_grammar_init_impl(
909
+ const struct llama_vocab * vocab,
910
+ const llama_grammar_element ** rules,
911
+ size_t n_rules,
912
+ size_t start_rule_index) {
913
+ const llama_grammar_element * pos;
914
+
915
+ // copy rule definitions into vectors
916
+ llama_grammar_rules vec_rules(n_rules);
917
+ for (size_t i = 0; i < n_rules; i++) {
918
+ for (pos = rules[i]; pos->type != LLAMA_GRETYPE_END; pos++) {
919
+ vec_rules[i].push_back(*pos);
920
+ }
921
+ vec_rules[i].push_back({LLAMA_GRETYPE_END, 0});
922
+ }
923
+
924
+ // Check for left recursion
925
+ std::vector<bool> rules_visited(n_rules);
926
+ std::vector<bool> rules_in_progress(n_rules);
927
+ std::vector<bool> rules_may_be_empty(n_rules);
928
+ for (size_t i = 0; i < n_rules; i++) {
929
+ if (rules_visited[i]) {
930
+ continue;
931
+ }
932
+ if (llama_grammar_detect_left_recursion(vec_rules, i, &rules_visited, &rules_in_progress, &rules_may_be_empty)) {
933
+ LLAMA_LOG_ERROR("unsupported grammar, left recursion detected for nonterminal at index %zu", i);
934
+ return nullptr;
935
+ }
936
+ }
937
+
938
+ // loop over alternates of start rule to build initial stacks
939
+ llama_grammar_stacks stacks;
940
+ pos = vec_rules[start_rule_index].data();
941
+ do {
942
+ llama_grammar_stack stack;
943
+ if (!llama_grammar_is_end_of_sequence(pos)) {
944
+ // if alternate is nonempty, add to stack
945
+ stack.push_back(pos);
946
+ }
947
+ llama_grammar_advance_stack(vec_rules, stack, stacks);
948
+ while (!llama_grammar_is_end_of_sequence(pos)) {
949
+ // scan to end of alternate def
950
+ pos++;
951
+ }
952
+ if (pos->type == LLAMA_GRETYPE_ALT) {
953
+ // there's another alternate def of this rule to process
954
+ pos++;
955
+ } else {
956
+ break;
957
+ }
958
+ } while (true);
959
+
960
+ // Important: vec_rules has to be moved here, not copied, because stacks contains
961
+ // pointers to elements of vec_rules. If vec_rules were copied into llama_grammar
962
+ // then the pointers would be invalidated when the local vec_rules goes out of scope.
963
+ return new llama_grammar {
964
+ vocab,
965
+ std::move(vec_rules),
966
+ std::move(stacks),
967
+ /* .partial_utf8 = */ {},
968
+ /* .lazy =*/ false,
969
+ /* .awaiting_trigger = */ false,
970
+ /* .trigger_buffer = */ "",
971
+ /* .trigger_tokens = */ {},
972
+ /* .trigger_patterns = */ {},
973
+ };
974
+ }
975
+
976
+ struct llama_grammar * llama_grammar_init_impl(
977
+ const struct llama_vocab * vocab,
978
+ const char * grammar_str,
979
+ const char * grammar_root,
980
+ bool lazy,
981
+ const char ** trigger_patterns,
982
+ size_t num_trigger_patterns,
983
+ const llama_token * trigger_tokens,
984
+ size_t num_trigger_tokens) {
985
+ llama_grammar_parser parser;
986
+
987
+ // if there is a grammar, parse it
988
+ // rules will be empty (default) if there are parse errors
989
+ if (!parser.parse(grammar_str) || parser.rules.empty()) {
990
+ fprintf(stderr, "%s: failed to parse grammar\n", __func__);
991
+ return nullptr;
992
+ }
993
+
994
+ // Ensure that there is a "root" node.
995
+ if (parser.symbol_ids.find("root") == parser.symbol_ids.end()) {
996
+ fprintf(stderr, "%s: grammar does not contain a 'root' symbol\n", __func__);
997
+ return nullptr;
998
+ }
999
+
1000
+ std::vector<const llama_grammar_element *> grammar_rules(parser.c_rules());
1001
+
1002
+ const size_t n_rules = grammar_rules.size();
1003
+ const size_t start_rule_index = parser.symbol_ids.at(grammar_root);
1004
+
1005
+ const llama_grammar_element * pos;
1006
+
1007
+ // copy rule definitions into vectors
1008
+ llama_grammar_rules vec_rules(n_rules);
1009
+ for (size_t i = 0; i < n_rules; i++) {
1010
+ for (pos = grammar_rules[i]; pos->type != LLAMA_GRETYPE_END; pos++) {
1011
+ vec_rules[i].push_back(*pos);
1012
+ }
1013
+ vec_rules[i].push_back({LLAMA_GRETYPE_END, 0});
1014
+ }
1015
+
1016
+ // Check for left recursion
1017
+ std::vector<bool> rules_visited(n_rules);
1018
+ std::vector<bool> rules_in_progress(n_rules);
1019
+ std::vector<bool> rules_may_be_empty(n_rules);
1020
+ for (size_t i = 0; i < n_rules; i++) {
1021
+ if (rules_visited[i]) {
1022
+ continue;
1023
+ }
1024
+ if (llama_grammar_detect_left_recursion(vec_rules, i, &rules_visited, &rules_in_progress, &rules_may_be_empty)) {
1025
+ LLAMA_LOG_ERROR("unsupported grammar, left recursion detected for nonterminal at index %zu", i);
1026
+ return nullptr;
1027
+ }
1028
+ }
1029
+
1030
+ // loop over alternates of start rule to build initial stacks
1031
+ llama_grammar_stacks stacks;
1032
+ pos = vec_rules[start_rule_index].data();
1033
+ do {
1034
+ llama_grammar_stack stack;
1035
+ if (!llama_grammar_is_end_of_sequence(pos)) {
1036
+ // if alternate is nonempty, add to stack
1037
+ stack.push_back(pos);
1038
+ }
1039
+ llama_grammar_advance_stack(vec_rules, stack, stacks);
1040
+ while (!llama_grammar_is_end_of_sequence(pos)) {
1041
+ // scan to end of alternate def
1042
+ pos++;
1043
+ }
1044
+ if (pos->type == LLAMA_GRETYPE_ALT) {
1045
+ // there's another alternate def of this rule to process
1046
+ pos++;
1047
+ } else {
1048
+ break;
1049
+ }
1050
+ } while (true);
1051
+
1052
+ std::vector<llama_token> vec_trigger_tokens;
1053
+ std::vector<llama_grammar_trigger_pattern> vec_trigger_patterns;
1054
+ for (size_t i = 0; i < num_trigger_tokens; i++) {
1055
+ GGML_ASSERT(trigger_tokens != nullptr);
1056
+ vec_trigger_tokens.push_back(trigger_tokens[i]);
1057
+ }
1058
+ for (size_t i = 0; i < num_trigger_patterns; i++) {
1059
+ GGML_ASSERT(trigger_patterns != nullptr);
1060
+ auto & trigger = vec_trigger_patterns.emplace_back();
1061
+ trigger.pattern = trigger_patterns[i];
1062
+ trigger.regex = std::regex(trigger.pattern);
1063
+ }
1064
+
1065
+ // Important: vec_rules has to be moved here, not copied, because stacks contains
1066
+ // pointers to elements of vec_rules. If vec_rules were copied into llama_grammar
1067
+ // then the pointers would be invalidated when the local vec_rules goes out of scope.
1068
+ return new llama_grammar {
1069
+ vocab,
1070
+ std::move(vec_rules),
1071
+ std::move(stacks),
1072
+ /* .partial_utf8 = */ {},
1073
+ /* .lazy = */ lazy,
1074
+ /* .awaiting_trigger = */ lazy,
1075
+ /* .trigger_buffer = */ "",
1076
+ std::move(vec_trigger_tokens),
1077
+ std::move(vec_trigger_patterns),
1078
+ };
1079
+ }
1080
+
1081
+ void llama_grammar_free_impl(struct llama_grammar * grammar) {
1082
+ if (grammar == nullptr) {
1083
+ return;
1084
+ }
1085
+
1086
+ delete grammar;
1087
+ }
1088
+
1089
+ struct llama_grammar * llama_grammar_clone_impl(const struct llama_grammar & grammar) {
1090
+ auto * result = new llama_grammar {
1091
+ grammar.vocab,
1092
+ grammar.rules,
1093
+ grammar.stacks,
1094
+ grammar.partial_utf8,
1095
+ grammar.lazy,
1096
+ grammar.awaiting_trigger,
1097
+ grammar.trigger_buffer,
1098
+ grammar.trigger_tokens,
1099
+ grammar.trigger_patterns,
1100
+ };
1101
+
1102
+ // redirect elements in stacks to point to new rules
1103
+ for (size_t is = 0; is < result->stacks.size(); is++) {
1104
+ for (size_t ie = 0; ie < result->stacks[is].size(); ie++) {
1105
+ for (size_t ir0 = 0; ir0 < grammar.rules.size(); ir0++) {
1106
+ for (size_t ir1 = 0; ir1 < grammar.rules[ir0].size(); ir1++) {
1107
+ if (grammar.stacks[is][ie] == &grammar.rules[ir0][ir1]) {
1108
+ result->stacks[is][ie] = &result->rules[ir0][ir1];
1109
+ }
1110
+ }
1111
+ }
1112
+ }
1113
+ }
1114
+
1115
+ return result;
1116
+ }
1117
+
1118
+ void llama_grammar_apply_impl(const struct llama_grammar & grammar, llama_token_data_array * cur_p) {
1119
+ GGML_ASSERT(grammar.vocab != nullptr);
1120
+
1121
+ if (grammar.awaiting_trigger) {
1122
+ return;
1123
+ }
1124
+
1125
+ bool allow_eog = false;
1126
+ for (const auto & stack : grammar.stacks) {
1127
+ if (stack.empty()) {
1128
+ allow_eog = true;
1129
+ break;
1130
+ }
1131
+ }
1132
+
1133
+ std::vector<std::pair<std::vector<uint32_t>, llama_partial_utf8>> candidates_decoded;
1134
+ candidates_decoded.reserve(cur_p->size);
1135
+
1136
+ llama_grammar_candidates candidates_grammar;
1137
+ candidates_grammar.reserve(cur_p->size);
1138
+
1139
+ for (size_t i = 0; i < cur_p->size; ++i) {
1140
+ const llama_token id = cur_p->data[i].id;
1141
+ const std::string & piece = grammar.vocab->token_to_piece(id);
1142
+
1143
+ if (grammar.vocab->is_eog(id)) {
1144
+ if (!allow_eog) {
1145
+ cur_p->data[i].logit = -INFINITY;
1146
+ }
1147
+ } else if (piece.empty() || piece[0] == 0) {
1148
+ cur_p->data[i].logit = -INFINITY;
1149
+ } else {
1150
+ candidates_decoded.push_back(decode_utf8(piece, grammar.partial_utf8));
1151
+ candidates_grammar.push_back({ i, candidates_decoded.back().first.data(), candidates_decoded.back().second });
1152
+ }
1153
+ }
1154
+
1155
+ const auto rejects = llama_grammar_reject_candidates(grammar.rules, grammar.stacks, candidates_grammar);
1156
+ for (const auto & reject : rejects) {
1157
+ cur_p->data[reject.index].logit = -INFINITY;
1158
+ }
1159
+ }
1160
+
1161
+ void llama_grammar_accept_impl(struct llama_grammar & grammar, llama_token token) {
1162
+ GGML_ASSERT(grammar.vocab != nullptr);
1163
+
1164
+ const auto & piece = grammar.vocab->token_to_piece(token);
1165
+
1166
+ if (grammar.awaiting_trigger) {
1167
+ if (std::find(grammar.trigger_tokens.begin(), grammar.trigger_tokens.end(), token) != grammar.trigger_tokens.end()) {
1168
+ grammar.awaiting_trigger = false;
1169
+ grammar.trigger_buffer.clear();
1170
+ llama_grammar_accept_str(grammar, piece);
1171
+ LLAMA_LOG_DEBUG("Grammar triggered on token %u (`%s`)", token, piece.c_str());
1172
+ return;
1173
+ } else {
1174
+ grammar.trigger_buffer += piece;
1175
+
1176
+ std::smatch match;
1177
+ for (const auto & trigger_pattern : grammar.trigger_patterns) {
1178
+ if (std::regex_match(grammar.trigger_buffer, match, trigger_pattern.regex)) {
1179
+ grammar.awaiting_trigger = false;
1180
+ // get from the first match to the end of the string
1181
+ auto constrained_str = grammar.trigger_buffer.substr(match.position(1));
1182
+ // std::string constrained_str(match[1].first, grammar.trigger_buffer.end());
1183
+ grammar.trigger_buffer.clear();
1184
+ llama_grammar_accept_str(grammar, constrained_str);
1185
+ LLAMA_LOG_DEBUG("Grammar triggered on regex: '%s'\n", constrained_str.c_str());
1186
+ return;
1187
+ }
1188
+ }
1189
+ LLAMA_LOG_DEBUG("Grammar still awaiting trigger after token %d (`%s`)\n", token, piece.c_str());
1190
+ return;
1191
+ }
1192
+ }
1193
+
1194
+ if (grammar.vocab->is_eog(token)) {
1195
+ for (const auto & stack : grammar.stacks) {
1196
+ if (stack.empty()) {
1197
+ return;
1198
+ }
1199
+ }
1200
+ GGML_ABORT("fatal error");
1201
+ }
1202
+
1203
+ llama_grammar_accept_str(grammar, piece);
1204
+ }
1205
+
1206
+ void llama_grammar_accept_str(struct llama_grammar & grammar, const std::string & piece) {
1207
+ // Note terminating 0 in decoded string
1208
+ const auto decoded = decode_utf8(piece, grammar.partial_utf8);
1209
+ const auto & code_points = decoded.first;
1210
+
1211
+ for (auto it = code_points.begin(), end = code_points.end() - 1; it != end; ++it) {
1212
+ llama_grammar_accept(&grammar, *it);
1213
+ }
1214
+
1215
+ grammar.partial_utf8 = decoded.second;
1216
+ if (grammar.stacks.empty()) {
1217
+ throw std::runtime_error("Unexpected empty grammar stack after accepting piece: " + piece);
1218
+ }
1219
+ }