@fugood/llama.node 0.3.16 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (281) hide show
  1. package/CMakeLists.txt +6 -1
  2. package/bin/darwin/arm64/llama-node.node +0 -0
  3. package/bin/darwin/x64/llama-node.node +0 -0
  4. package/bin/linux/arm64/llama-node.node +0 -0
  5. package/bin/linux/x64/llama-node.node +0 -0
  6. package/bin/linux-cuda/arm64/llama-node.node +0 -0
  7. package/bin/linux-cuda/x64/llama-node.node +0 -0
  8. package/bin/linux-vulkan/arm64/llama-node.node +0 -0
  9. package/bin/linux-vulkan/x64/llama-node.node +0 -0
  10. package/bin/win32/arm64/llama-node.node +0 -0
  11. package/bin/win32/arm64/node.lib +0 -0
  12. package/bin/win32/x64/llama-node.node +0 -0
  13. package/bin/win32/x64/node.lib +0 -0
  14. package/bin/win32-vulkan/arm64/llama-node.node +0 -0
  15. package/bin/win32-vulkan/arm64/node.lib +0 -0
  16. package/bin/win32-vulkan/x64/llama-node.node +0 -0
  17. package/bin/win32-vulkan/x64/node.lib +0 -0
  18. package/lib/binding.ts +44 -2
  19. package/lib/index.js +132 -1
  20. package/lib/index.ts +203 -3
  21. package/package.json +2 -1
  22. package/src/EmbeddingWorker.cpp +1 -1
  23. package/src/LlamaCompletionWorker.cpp +374 -19
  24. package/src/LlamaCompletionWorker.h +31 -10
  25. package/src/LlamaContext.cpp +216 -7
  26. package/src/LlamaContext.h +12 -0
  27. package/src/common.hpp +15 -0
  28. package/src/llama.cpp/.github/workflows/build-linux-cross.yml +233 -0
  29. package/src/llama.cpp/.github/workflows/build.yml +89 -767
  30. package/src/llama.cpp/.github/workflows/docker.yml +9 -6
  31. package/src/llama.cpp/.github/workflows/release.yml +716 -0
  32. package/src/llama.cpp/.github/workflows/server.yml +19 -23
  33. package/src/llama.cpp/CMakeLists.txt +11 -1
  34. package/src/llama.cpp/cmake/build-info.cmake +8 -2
  35. package/src/llama.cpp/cmake/x64-windows-llvm.cmake +0 -6
  36. package/src/llama.cpp/common/CMakeLists.txt +35 -4
  37. package/src/llama.cpp/common/arg.cpp +844 -121
  38. package/src/llama.cpp/common/arg.h +9 -0
  39. package/src/llama.cpp/common/chat.cpp +129 -107
  40. package/src/llama.cpp/common/chat.h +2 -0
  41. package/src/llama.cpp/common/common.cpp +64 -518
  42. package/src/llama.cpp/common/common.h +35 -45
  43. package/src/llama.cpp/common/json-schema-to-grammar.cpp +3 -0
  44. package/src/llama.cpp/common/llguidance.cpp +31 -47
  45. package/src/llama.cpp/common/minja/chat-template.hpp +23 -11
  46. package/src/llama.cpp/common/minja/minja.hpp +186 -127
  47. package/src/llama.cpp/common/regex-partial.cpp +204 -0
  48. package/src/llama.cpp/common/regex-partial.h +56 -0
  49. package/src/llama.cpp/common/sampling.cpp +60 -50
  50. package/src/llama.cpp/docs/build.md +122 -7
  51. package/src/llama.cpp/examples/CMakeLists.txt +2 -32
  52. package/src/llama.cpp/examples/batched/batched.cpp +1 -1
  53. package/src/llama.cpp/examples/embedding/embedding.cpp +9 -12
  54. package/src/llama.cpp/examples/gritlm/gritlm.cpp +1 -1
  55. package/src/llama.cpp/examples/llama.android/llama/build.gradle.kts +1 -0
  56. package/src/llama.cpp/examples/parallel/parallel.cpp +89 -15
  57. package/src/llama.cpp/examples/passkey/passkey.cpp +1 -1
  58. package/src/llama.cpp/examples/speculative/speculative.cpp +1 -1
  59. package/src/llama.cpp/examples/speculative-simple/speculative-simple.cpp +1 -1
  60. package/src/llama.cpp/examples/sycl/build.sh +2 -2
  61. package/src/llama.cpp/examples/sycl/win-build-sycl.bat +2 -2
  62. package/src/llama.cpp/examples/training/CMakeLists.txt +5 -0
  63. package/src/llama.cpp/examples/training/finetune.cpp +96 -0
  64. package/src/llama.cpp/ggml/CMakeLists.txt +35 -2
  65. package/src/llama.cpp/ggml/cmake/GitVars.cmake +22 -0
  66. package/src/llama.cpp/ggml/include/ggml-backend.h +4 -4
  67. package/src/llama.cpp/ggml/include/ggml-cpp.h +1 -1
  68. package/src/llama.cpp/ggml/include/ggml-cpu.h +5 -0
  69. package/src/llama.cpp/ggml/include/ggml-opt.h +47 -28
  70. package/src/llama.cpp/ggml/include/ggml-rpc.h +6 -1
  71. package/src/llama.cpp/ggml/include/ggml.h +76 -106
  72. package/src/llama.cpp/ggml/src/CMakeLists.txt +11 -8
  73. package/src/llama.cpp/ggml/src/ggml-alloc.c +4 -1
  74. package/src/llama.cpp/ggml/src/ggml-backend.cpp +9 -5
  75. package/src/llama.cpp/ggml/src/ggml-cann/CMakeLists.txt +0 -2
  76. package/src/llama.cpp/ggml/src/ggml-cann/acl_tensor.cpp +8 -4
  77. package/src/llama.cpp/ggml/src/ggml-cann/acl_tensor.h +5 -5
  78. package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.cpp +692 -1534
  79. package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.h +613 -122
  80. package/src/llama.cpp/ggml/src/ggml-cann/common.h +135 -1
  81. package/src/llama.cpp/ggml/src/ggml-cann/ggml-cann.cpp +507 -137
  82. package/src/llama.cpp/ggml/src/ggml-common.h +12 -6
  83. package/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +66 -33
  84. package/src/llama.cpp/ggml/src/ggml-cpu/binary-ops.cpp +158 -0
  85. package/src/llama.cpp/ggml/src/ggml-cpu/binary-ops.h +16 -0
  86. package/src/llama.cpp/ggml/src/ggml-cpu/common.h +72 -0
  87. package/src/llama.cpp/ggml/src/ggml-cpu/cpu-feats-x86.cpp +1 -1
  88. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp +896 -194
  89. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-impl.h +2 -21
  90. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.c +1060 -410
  91. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +1008 -13533
  92. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +31 -16
  93. package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.cpp +90 -12
  94. package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.h +47 -13
  95. package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +266 -72
  96. package/src/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm.cpp +1034 -88
  97. package/src/llama.cpp/ggml/src/ggml-cpu/ops.cpp +8796 -0
  98. package/src/llama.cpp/ggml/src/ggml-cpu/ops.h +110 -0
  99. package/src/llama.cpp/ggml/src/ggml-cpu/simd-mappings.h +892 -0
  100. package/src/llama.cpp/ggml/src/ggml-cpu/unary-ops.cpp +186 -0
  101. package/src/llama.cpp/ggml/src/ggml-cpu/unary-ops.h +28 -0
  102. package/src/llama.cpp/ggml/src/ggml-cpu/vec.cpp +252 -0
  103. package/src/llama.cpp/ggml/src/ggml-cpu/vec.h +802 -0
  104. package/src/llama.cpp/ggml/src/ggml-cuda/CMakeLists.txt +23 -4
  105. package/src/llama.cpp/ggml/src/ggml-cuda/vendors/hip.h +7 -0
  106. package/src/llama.cpp/ggml/src/ggml-cuda/vendors/musa.h +1 -0
  107. package/src/llama.cpp/ggml/src/ggml-hip/CMakeLists.txt +0 -4
  108. package/src/llama.cpp/ggml/src/ggml-impl.h +52 -18
  109. package/src/llama.cpp/ggml/src/ggml-metal/ggml-metal-impl.h +106 -14
  110. package/src/llama.cpp/ggml/src/ggml-opencl/CMakeLists.txt +67 -119
  111. package/src/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +1023 -262
  112. package/src/llama.cpp/ggml/src/ggml-opt.cpp +368 -190
  113. package/src/llama.cpp/ggml/src/ggml-quants.c +0 -6
  114. package/src/llama.cpp/ggml/src/ggml-rpc/ggml-rpc.cpp +307 -40
  115. package/src/llama.cpp/ggml/src/ggml-sycl/CMakeLists.txt +125 -45
  116. package/src/llama.cpp/ggml/src/ggml-sycl/backend.hpp +10 -8
  117. package/src/llama.cpp/ggml/src/ggml-sycl/binbcast.cpp +239 -0
  118. package/src/llama.cpp/ggml/src/ggml-sycl/binbcast.hpp +39 -0
  119. package/src/llama.cpp/ggml/src/ggml-sycl/common.cpp +0 -35
  120. package/src/llama.cpp/ggml/src/ggml-sycl/common.hpp +9 -307
  121. package/src/llama.cpp/ggml/src/ggml-sycl/convert.cpp +72 -25
  122. package/src/llama.cpp/ggml/src/ggml-sycl/convert.hpp +14 -7
  123. package/src/llama.cpp/ggml/src/ggml-sycl/dequantize.hpp +59 -21
  124. package/src/llama.cpp/ggml/src/ggml-sycl/dmmv.cpp +7 -1
  125. package/src/llama.cpp/ggml/src/ggml-sycl/dpct/helper.hpp +79 -90
  126. package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.cpp +944 -438
  127. package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.hpp +22 -23
  128. package/src/llama.cpp/ggml/src/ggml-sycl/gemm.hpp +37 -8
  129. package/src/llama.cpp/ggml/src/ggml-sycl/getrows.cpp +24 -20
  130. package/src/llama.cpp/ggml/src/ggml-sycl/getrows.hpp +1 -4
  131. package/src/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +507 -411
  132. package/src/llama.cpp/ggml/src/ggml-sycl/im2col.cpp +84 -74
  133. package/src/llama.cpp/ggml/src/ggml-sycl/im2col.hpp +1 -3
  134. package/src/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +185 -89
  135. package/src/llama.cpp/ggml/src/ggml-sycl/norm.cpp +37 -49
  136. package/src/llama.cpp/ggml/src/ggml-sycl/norm.hpp +7 -22
  137. package/src/llama.cpp/ggml/src/ggml-sycl/outprod.cpp +4 -14
  138. package/src/llama.cpp/ggml/src/ggml-sycl/quants.hpp +83 -0
  139. package/src/llama.cpp/ggml/src/ggml-sycl/rope.cpp +204 -118
  140. package/src/llama.cpp/ggml/src/ggml-sycl/rope.hpp +1 -3
  141. package/src/llama.cpp/ggml/src/ggml-sycl/vecdotq.hpp +128 -53
  142. package/src/llama.cpp/ggml/src/ggml-vulkan/CMakeLists.txt +83 -49
  143. package/src/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +1278 -282
  144. package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/CMakeLists.txt +32 -0
  145. package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +133 -30
  146. package/src/llama.cpp/ggml/src/ggml.c +170 -265
  147. package/src/llama.cpp/ggml/src/gguf.cpp +34 -33
  148. package/src/llama.cpp/include/llama.h +82 -22
  149. package/src/llama.cpp/models/ggml-vocab-llama4.gguf.inp +112 -0
  150. package/src/llama.cpp/models/ggml-vocab-llama4.gguf.out +46 -0
  151. package/src/llama.cpp/models/ggml-vocab-pixtral.gguf.inp +112 -0
  152. package/src/llama.cpp/models/ggml-vocab-pixtral.gguf.out +46 -0
  153. package/src/llama.cpp/requirements/requirements-all.txt +5 -3
  154. package/src/llama.cpp/requirements/requirements-gguf_editor_gui.txt +3 -0
  155. package/src/llama.cpp/scripts/xxd.cmake +1 -1
  156. package/src/llama.cpp/src/CMakeLists.txt +4 -2
  157. package/src/llama.cpp/src/llama-adapter.cpp +43 -1
  158. package/src/llama.cpp/src/llama-arch.cpp +163 -17
  159. package/src/llama.cpp/src/llama-arch.h +16 -0
  160. package/src/llama.cpp/src/llama-batch.cpp +5 -1
  161. package/src/llama.cpp/src/llama-batch.h +2 -1
  162. package/src/llama.cpp/src/llama-chat.cpp +91 -16
  163. package/src/llama.cpp/src/llama-chat.h +7 -2
  164. package/src/llama.cpp/src/llama-context.cpp +479 -575
  165. package/src/llama.cpp/src/llama-context.h +44 -33
  166. package/src/llama.cpp/src/llama-cparams.h +1 -0
  167. package/src/llama.cpp/src/llama-graph.cpp +209 -157
  168. package/src/llama.cpp/src/llama-graph.h +38 -14
  169. package/src/llama.cpp/src/llama-hparams.h +13 -0
  170. package/src/llama.cpp/src/llama-kv-cache.cpp +1604 -543
  171. package/src/llama.cpp/src/llama-kv-cache.h +283 -171
  172. package/src/llama.cpp/src/llama-memory.h +12 -2
  173. package/src/llama.cpp/src/llama-mmap.cpp +1 -1
  174. package/src/llama.cpp/src/llama-model-loader.cpp +34 -20
  175. package/src/llama.cpp/src/llama-model-loader.h +5 -3
  176. package/src/llama.cpp/src/llama-model-saver.cpp +281 -0
  177. package/src/llama.cpp/src/llama-model-saver.h +37 -0
  178. package/src/llama.cpp/src/llama-model.cpp +1803 -330
  179. package/src/llama.cpp/src/llama-model.h +21 -2
  180. package/src/llama.cpp/src/llama-quant.cpp +33 -10
  181. package/src/llama.cpp/src/llama-sampling.cpp +25 -7
  182. package/src/llama.cpp/src/llama-vocab.cpp +86 -10
  183. package/src/llama.cpp/src/llama-vocab.h +6 -0
  184. package/src/llama.cpp/src/llama.cpp +15 -1
  185. package/src/llama.cpp/tests/CMakeLists.txt +52 -31
  186. package/src/llama.cpp/tests/test-arg-parser.cpp +51 -4
  187. package/src/llama.cpp/tests/test-backend-ops.cpp +189 -90
  188. package/src/llama.cpp/tests/test-chat-template.cpp +26 -6
  189. package/src/llama.cpp/tests/test-chat.cpp +15 -3
  190. package/src/llama.cpp/{examples/gbnf-validator/gbnf-validator.cpp → tests/test-gbnf-validator.cpp} +2 -2
  191. package/src/llama.cpp/tests/test-grammar-integration.cpp +3 -2
  192. package/src/llama.cpp/tests/test-grammar-llguidance.cpp +63 -2
  193. package/src/llama.cpp/tests/test-grammar-parser.cpp +3 -1
  194. package/src/llama.cpp/tests/test-json-schema-to-grammar.cpp +17 -1
  195. package/src/llama.cpp/tests/test-llama-grammar.cpp +2 -1
  196. package/src/llama.cpp/tests/test-mtmd-c-api.c +63 -0
  197. package/src/llama.cpp/tests/test-opt.cpp +33 -21
  198. package/src/llama.cpp/{examples/quantize-stats/quantize-stats.cpp → tests/test-quantize-stats.cpp} +3 -1
  199. package/src/llama.cpp/tests/test-regex-partial.cpp +288 -0
  200. package/src/llama.cpp/tests/test-sampling.cpp +1 -1
  201. package/src/llama.cpp/tests/test-tokenizer-1-bpe.cpp +2 -1
  202. package/src/llama.cpp/tests/test-tokenizer-1-spm.cpp +2 -1
  203. package/src/llama.cpp/tools/CMakeLists.txt +39 -0
  204. package/src/llama.cpp/{examples → tools}/batched-bench/batched-bench.cpp +3 -3
  205. package/src/llama.cpp/{examples → tools}/export-lora/export-lora.cpp +1 -1
  206. package/src/llama.cpp/{examples → tools}/gguf-split/gguf-split.cpp +15 -16
  207. package/src/llama.cpp/{examples → tools}/imatrix/imatrix.cpp +11 -9
  208. package/src/llama.cpp/{examples → tools}/llama-bench/llama-bench.cpp +623 -274
  209. package/src/llama.cpp/{examples → tools}/main/main.cpp +22 -14
  210. package/src/llama.cpp/tools/mtmd/CMakeLists.txt +47 -0
  211. package/src/llama.cpp/tools/mtmd/clip-impl.h +365 -0
  212. package/src/llama.cpp/tools/mtmd/clip.cpp +3646 -0
  213. package/src/llama.cpp/tools/mtmd/clip.h +99 -0
  214. package/src/llama.cpp/tools/mtmd/deprecation-warning.cpp +22 -0
  215. package/src/llama.cpp/tools/mtmd/mtmd-cli.cpp +370 -0
  216. package/src/llama.cpp/tools/mtmd/mtmd-helper.cpp +310 -0
  217. package/src/llama.cpp/tools/mtmd/mtmd.cpp +678 -0
  218. package/src/llama.cpp/tools/mtmd/mtmd.h +331 -0
  219. package/src/llama.cpp/{examples → tools}/perplexity/perplexity.cpp +21 -5
  220. package/src/llama.cpp/{examples → tools}/quantize/quantize.cpp +53 -3
  221. package/src/llama.cpp/tools/rpc/CMakeLists.txt +4 -0
  222. package/src/llama.cpp/tools/rpc/rpc-server.cpp +322 -0
  223. package/src/llama.cpp/tools/run/CMakeLists.txt +16 -0
  224. package/src/llama.cpp/{examples → tools}/run/run.cpp +30 -30
  225. package/src/llama.cpp/{examples → tools}/server/CMakeLists.txt +2 -1
  226. package/src/llama.cpp/{examples → tools}/server/httplib.h +313 -247
  227. package/src/llama.cpp/{examples → tools}/server/server.cpp +529 -215
  228. package/src/llama.cpp/{examples → tools}/server/utils.hpp +427 -6
  229. package/src/llama.cpp/{examples → tools}/tts/tts.cpp +6 -9
  230. package/src/llama.cpp/cmake/arm64-windows-msvc.cmake +0 -6
  231. package/src/llama.cpp/examples/gbnf-validator/CMakeLists.txt +0 -5
  232. package/src/llama.cpp/examples/infill/CMakeLists.txt +0 -5
  233. package/src/llama.cpp/examples/infill/infill.cpp +0 -590
  234. package/src/llama.cpp/examples/llava/CMakeLists.txt +0 -66
  235. package/src/llama.cpp/examples/llava/android/build_64.sh +0 -8
  236. package/src/llama.cpp/examples/llava/clip-quantize-cli.cpp +0 -59
  237. package/src/llama.cpp/examples/llava/clip.cpp +0 -3206
  238. package/src/llama.cpp/examples/llava/clip.h +0 -118
  239. package/src/llama.cpp/examples/llava/gemma3-cli.cpp +0 -341
  240. package/src/llama.cpp/examples/llava/llava-cli.cpp +0 -332
  241. package/src/llama.cpp/examples/llava/llava.cpp +0 -574
  242. package/src/llama.cpp/examples/llava/llava.h +0 -49
  243. package/src/llama.cpp/examples/llava/minicpmv-cli.cpp +0 -354
  244. package/src/llama.cpp/examples/llava/qwen2vl-cli.cpp +0 -584
  245. package/src/llama.cpp/examples/quantize-stats/CMakeLists.txt +0 -6
  246. package/src/llama.cpp/examples/rpc/CMakeLists.txt +0 -2
  247. package/src/llama.cpp/examples/rpc/rpc-server.cpp +0 -171
  248. package/src/llama.cpp/examples/run/CMakeLists.txt +0 -5
  249. package/src/llama.cpp/ggml/src/ggml-cann/kernels/CMakeLists.txt +0 -30
  250. package/src/llama.cpp/ggml/src/ggml-cann/kernels/ascendc_kernels.h +0 -19
  251. package/src/llama.cpp/ggml/src/ggml-cann/kernels/dup.cpp +0 -234
  252. package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_f16.cpp +0 -197
  253. package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_f32.cpp +0 -190
  254. package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_q4_0.cpp +0 -204
  255. package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_q8_0.cpp +0 -191
  256. package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_f16_q8_0.cpp +0 -218
  257. package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_f32_q8_0.cpp +0 -216
  258. package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_float_to_q4_0.cpp +0 -295
  259. /package/src/llama.cpp/{examples → tools}/batched-bench/CMakeLists.txt +0 -0
  260. /package/src/llama.cpp/{examples → tools}/cvector-generator/CMakeLists.txt +0 -0
  261. /package/src/llama.cpp/{examples → tools}/cvector-generator/completions.txt +0 -0
  262. /package/src/llama.cpp/{examples → tools}/cvector-generator/cvector-generator.cpp +0 -0
  263. /package/src/llama.cpp/{examples → tools}/cvector-generator/mean.hpp +0 -0
  264. /package/src/llama.cpp/{examples → tools}/cvector-generator/negative.txt +0 -0
  265. /package/src/llama.cpp/{examples → tools}/cvector-generator/pca.hpp +0 -0
  266. /package/src/llama.cpp/{examples → tools}/cvector-generator/positive.txt +0 -0
  267. /package/src/llama.cpp/{examples → tools}/export-lora/CMakeLists.txt +0 -0
  268. /package/src/llama.cpp/{examples → tools}/gguf-split/CMakeLists.txt +0 -0
  269. /package/src/llama.cpp/{examples → tools}/imatrix/CMakeLists.txt +0 -0
  270. /package/src/llama.cpp/{examples → tools}/llama-bench/CMakeLists.txt +0 -0
  271. /package/src/llama.cpp/{examples → tools}/main/CMakeLists.txt +0 -0
  272. /package/src/llama.cpp/{examples/llava → tools/mtmd}/requirements.txt +0 -0
  273. /package/src/llama.cpp/{examples → tools}/perplexity/CMakeLists.txt +0 -0
  274. /package/src/llama.cpp/{examples → tools}/quantize/CMakeLists.txt +0 -0
  275. /package/src/llama.cpp/{examples → tools}/run/linenoise.cpp/linenoise.cpp +0 -0
  276. /package/src/llama.cpp/{examples → tools}/run/linenoise.cpp/linenoise.h +0 -0
  277. /package/src/llama.cpp/{examples → tools}/server/bench/requirements.txt +0 -0
  278. /package/src/llama.cpp/{examples → tools}/server/tests/requirements.txt +0 -0
  279. /package/src/llama.cpp/{examples → tools}/tokenize/CMakeLists.txt +0 -0
  280. /package/src/llama.cpp/{examples → tools}/tokenize/tokenize.cpp +0 -0
  281. /package/src/llama.cpp/{examples → tools}/tts/CMakeLists.txt +0 -0
@@ -12,6 +12,91 @@
12
12
  #include "SaveSessionWorker.h"
13
13
  #include "TokenizeWorker.h"
14
14
 
15
+ // Helper function for formatted strings (for console logs)
16
+ template<typename ... Args>
17
+ static std::string format_string(const std::string& format, Args ... args) {
18
+ int size_s = std::snprintf(nullptr, 0, format.c_str(), args ...) + 1; // +1 for null terminator
19
+ if (size_s <= 0) { return "Error formatting string"; }
20
+ auto size = static_cast<size_t>(size_s);
21
+ std::unique_ptr<char[]> buf(new char[size]);
22
+ std::snprintf(buf.get(), size, format.c_str(), args ...);
23
+ return std::string(buf.get(), buf.get() + size - 1); // -1 to exclude null terminator
24
+ }
25
+
26
+ // Computes FNV-1a hash of the data
27
+ static std::string fnv_hash(const uint8_t* data, size_t len) {
28
+ const uint64_t fnv_prime = 0x100000001b3ULL;
29
+ uint64_t hash = 0xcbf29ce484222325ULL;
30
+
31
+ for (size_t i = 0; i < len; ++i) {
32
+ hash ^= data[i];
33
+ hash *= fnv_prime;
34
+ }
35
+ return std::to_string(hash);
36
+ }
37
+
38
+ static const std::string base64_chars =
39
+ "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
40
+ "abcdefghijklmnopqrstuvwxyz"
41
+ "0123456789+/";
42
+
43
+ // Base64 decoding function
44
+ static std::vector<uint8_t> base64_decode(const std::string &encoded_string) {
45
+ std::vector<uint8_t> decoded;
46
+ int in_len = encoded_string.size();
47
+ int i = 0;
48
+ int j = 0;
49
+ int in_ = 0;
50
+ unsigned char char_array_4[4], char_array_3[3];
51
+
52
+ while (in_len-- && (encoded_string[in_] != '=')) {
53
+ if (isspace(encoded_string[in_])) {
54
+ in_++;
55
+ continue;
56
+ }
57
+
58
+ if (encoded_string[in_] == '=' || base64_chars.find(encoded_string[in_]) == std::string::npos) {
59
+ break;
60
+ }
61
+
62
+ char_array_4[i++] = encoded_string[in_]; in_++;
63
+ if (i == 4) {
64
+ for (i = 0; i < 4; i++) {
65
+ char_array_4[i] = base64_chars.find(char_array_4[i]);
66
+ }
67
+
68
+ char_array_3[0] = (char_array_4[0] << 2) + ((char_array_4[1] & 0x30) >> 4);
69
+ char_array_3[1] = ((char_array_4[1] & 0xf) << 4) + ((char_array_4[2] & 0x3c) >> 2);
70
+ char_array_3[2] = ((char_array_4[2] & 0x3) << 6) + char_array_4[3];
71
+
72
+ for (i = 0; i < 3; i++) {
73
+ decoded.push_back(char_array_3[i]);
74
+ }
75
+ i = 0;
76
+ }
77
+ }
78
+
79
+ if (i) {
80
+ for (j = i; j < 4; j++) {
81
+ char_array_4[j] = 0;
82
+ }
83
+
84
+ for (j = 0; j < 4; j++) {
85
+ char_array_4[j] = base64_chars.find(char_array_4[j]);
86
+ }
87
+
88
+ char_array_3[0] = (char_array_4[0] << 2) + ((char_array_4[1] & 0x30) >> 4);
89
+ char_array_3[1] = ((char_array_4[1] & 0xf) << 4) + ((char_array_4[2] & 0x3c) >> 2);
90
+ char_array_3[2] = ((char_array_4[2] & 0x3) << 6) + char_array_4[3];
91
+
92
+ for (j = 0; j < i - 1; j++) {
93
+ decoded.push_back(char_array_3[j]);
94
+ }
95
+ }
96
+
97
+ return decoded;
98
+ }
99
+
15
100
  using json = nlohmann::ordered_json;
16
101
 
17
102
  // loadModelInfo(path: string): object
@@ -116,6 +201,15 @@ void LlamaContext::Init(Napi::Env env, Napi::Object &exports) {
116
201
  InstanceMethod<&LlamaContext::GetLoadedLoraAdapters>(
117
202
  "getLoadedLoraAdapters",
118
203
  static_cast<napi_property_attributes>(napi_enumerable)),
204
+ InstanceMethod<&LlamaContext::InitMultimodal>(
205
+ "initMultimodal",
206
+ static_cast<napi_property_attributes>(napi_enumerable)),
207
+ InstanceMethod<&LlamaContext::IsMultimodalEnabled>(
208
+ "isMultimodalEnabled",
209
+ static_cast<napi_property_attributes>(napi_enumerable)),
210
+ InstanceMethod<&LlamaContext::ReleaseMultimodal>(
211
+ "releaseMultimodal",
212
+ static_cast<napi_property_attributes>(napi_enumerable)),
119
213
  InstanceMethod<&LlamaContext::Release>(
120
214
  "release", static_cast<napi_property_attributes>(napi_enumerable)),
121
215
  StaticMethod<&LlamaContext::ModelInfo>(
@@ -173,8 +267,8 @@ LlamaContext::LlamaContext(const Napi::CallbackInfo &info)
173
267
  auto options = info[0].As<Napi::Object>();
174
268
 
175
269
  common_params params;
176
- params.model = get_option<std::string>(options, "model", "");
177
- if (params.model.empty()) {
270
+ params.model.path = get_option<std::string>(options, "model", "");
271
+ if (params.model.path.empty()) {
178
272
  Napi::TypeError::New(env, "Model is required").ThrowAsJavaScriptException();
179
273
  }
180
274
 
@@ -211,6 +305,7 @@ LlamaContext::LlamaContext(const Napi::CallbackInfo &info)
211
305
  params.flash_attn = get_option<bool>(options, "flash_attn", false);
212
306
  params.cache_type_k = kv_cache_type_from_str(get_option<std::string>(options, "cache_type_k", "f16").c_str());
213
307
  params.cache_type_v = kv_cache_type_from_str(get_option<std::string>(options, "cache_type_v", "f16").c_str());
308
+ params.ctx_shift = get_option<bool>(options, "ctx_shift", true);
214
309
 
215
310
  params.use_mlock = get_option<bool>(options, "use_mlock", false);
216
311
  params.use_mmap = get_option<bool>(options, "use_mmap", true);
@@ -447,7 +542,6 @@ Napi::Value LlamaContext::GetFormattedChat(const Napi::CallbackInfo &info) {
447
542
  Napi::TypeError::New(env, "Array expected").ThrowAsJavaScriptException();
448
543
  }
449
544
  auto messages = json_stringify(info[0].As<Napi::Array>());
450
- printf("messages: %s\n", messages.c_str());
451
545
  auto chat_template = info[1].IsString() ? info[1].ToString().Utf8Value() : "";
452
546
 
453
547
  auto has_params = info.Length() >= 2;
@@ -544,6 +638,25 @@ Napi::Value LlamaContext::Completion(const Napi::CallbackInfo &info) {
544
638
  }
545
639
  }
546
640
 
641
+ // Process image_paths parameter
642
+ std::vector<std::string> image_paths;
643
+ if (options.Has("image_paths")) {
644
+ if (options.Get("image_paths").IsArray()) {
645
+ auto image_paths_array = options.Get("image_paths").As<Napi::Array>();
646
+ for (size_t i = 0; i < image_paths_array.Length(); i++) {
647
+ image_paths.push_back(image_paths_array.Get(i).ToString().Utf8Value());
648
+ }
649
+ } else if (options.Get("image_paths").IsString()) {
650
+ image_paths.push_back(options.Get("image_paths").ToString().Utf8Value());
651
+ }
652
+ }
653
+
654
+ // Check if multimodal is enabled when image_paths are provided
655
+ if (!image_paths.empty() && !(_has_multimodal && _mtmd_ctx != nullptr)) {
656
+ Napi::Error::New(env, "Multimodal support must be enabled via initMultimodal to use image_paths").ThrowAsJavaScriptException();
657
+ return env.Undefined();
658
+ }
659
+
547
660
  int32_t chat_format = get_option<int32_t>(options, "chat_format", 0);
548
661
 
549
662
  common_params params = _sess->params();
@@ -726,17 +839,17 @@ Napi::Value LlamaContext::Completion(const Napi::CallbackInfo &info) {
726
839
  }
727
840
 
728
841
  auto *worker =
729
- new LlamaCompletionWorker(info, _sess, callback, params, stop_words, chat_format);
842
+ new LlamaCompletionWorker(info, _sess, callback, params, stop_words, chat_format, image_paths);
730
843
  worker->Queue();
731
844
  _wip = worker;
732
- worker->onComplete([this]() { _wip = nullptr; });
845
+ worker->OnComplete([this]() { _wip = nullptr; });
733
846
  return worker->Promise();
734
847
  }
735
848
 
736
849
  // stopCompletion(): void
737
850
  void LlamaContext::StopCompletion(const Napi::CallbackInfo &info) {
738
851
  if (_wip != nullptr) {
739
- _wip->Stop();
852
+ _wip->SetStop();
740
853
  }
741
854
  }
742
855
 
@@ -889,14 +1002,110 @@ Napi::Value LlamaContext::GetLoadedLoraAdapters(const Napi::CallbackInfo &info)
889
1002
  Napi::Value LlamaContext::Release(const Napi::CallbackInfo &info) {
890
1003
  auto env = info.Env();
891
1004
  if (_wip != nullptr) {
892
- _wip->Stop();
1005
+ _wip->SetStop();
893
1006
  }
894
1007
  if (_sess == nullptr) {
895
1008
  auto promise = Napi::Promise::Deferred(env);
896
1009
  promise.Resolve(env.Undefined());
897
1010
  return promise.Promise();
898
1011
  }
1012
+
1013
+ // Clear the mtmd context reference in the session
1014
+ if (_mtmd_ctx != nullptr) {
1015
+ _sess->set_mtmd_ctx(nullptr);
1016
+ }
1017
+
899
1018
  auto *worker = new DisposeWorker(info, std::move(_sess));
900
1019
  worker->Queue();
901
1020
  return worker->Promise();
902
1021
  }
1022
+
1023
+ LlamaContext::~LlamaContext() {
1024
+ if (_mtmd_ctx != nullptr) {
1025
+ mtmd_free(_mtmd_ctx);
1026
+ _mtmd_ctx = nullptr;
1027
+ _has_multimodal = false;
1028
+ }
1029
+ }
1030
+
1031
+ // initMultimodal(options: { path: string, use_gpu?: boolean }): boolean
1032
+ Napi::Value LlamaContext::InitMultimodal(const Napi::CallbackInfo &info) {
1033
+ Napi::Env env = info.Env();
1034
+
1035
+ if (info.Length() < 1 || !info[0].IsObject()) {
1036
+ Napi::TypeError::New(env, "Object expected for mmproj path").ThrowAsJavaScriptException();
1037
+ }
1038
+
1039
+ auto options = info[0].As<Napi::Object>();
1040
+ auto mmproj_path = options.Get("path").ToString().Utf8Value();
1041
+ auto use_gpu = options.Get("use_gpu").ToBoolean().Value();
1042
+
1043
+ if (mmproj_path.empty()) {
1044
+ Napi::TypeError::New(env, "mmproj path is required").ThrowAsJavaScriptException();
1045
+ }
1046
+
1047
+ console_log(env, "Initializing multimodal with mmproj path: " + mmproj_path);
1048
+
1049
+ auto model = _sess->model();
1050
+ auto ctx = _sess->context();
1051
+ if (model == nullptr) {
1052
+ Napi::Error::New(env, "Model not loaded").ThrowAsJavaScriptException();
1053
+ return Napi::Boolean::New(env, false);
1054
+ }
1055
+
1056
+ if (_mtmd_ctx != nullptr) {
1057
+ mtmd_free(_mtmd_ctx);
1058
+ _mtmd_ctx = nullptr;
1059
+ _has_multimodal = false;
1060
+ }
1061
+
1062
+ // Initialize mtmd context
1063
+ mtmd_context_params mtmd_params = mtmd_context_params_default();
1064
+ mtmd_params.use_gpu = use_gpu;
1065
+ mtmd_params.print_timings = false;
1066
+ mtmd_params.n_threads = _sess->params().cpuparams.n_threads;
1067
+ mtmd_params.verbosity = (ggml_log_level)GGML_LOG_LEVEL_INFO;
1068
+
1069
+ console_log(env, format_string("Initializing mtmd context with threads=%d, use_gpu=%d",
1070
+ mtmd_params.n_threads, mtmd_params.use_gpu ? 1 : 0));
1071
+
1072
+ _mtmd_ctx = mtmd_init_from_file(mmproj_path.c_str(), model, mtmd_params);
1073
+ if (_mtmd_ctx == nullptr) {
1074
+ Napi::Error::New(env, "Failed to initialize multimodal context").ThrowAsJavaScriptException();
1075
+ return Napi::Boolean::New(env, false);
1076
+ }
1077
+
1078
+ _has_multimodal = true;
1079
+
1080
+ // Share the mtmd context with the session
1081
+ _sess->set_mtmd_ctx(_mtmd_ctx);
1082
+
1083
+ // Check if the model uses M-RoPE or non-causal attention
1084
+ bool uses_mrope = mtmd_decode_use_mrope(_mtmd_ctx);
1085
+ bool uses_non_causal = mtmd_decode_use_non_causal(_mtmd_ctx);
1086
+ console_log(env, format_string("Model multimodal properties: uses_mrope=%d, uses_non_causal=%d",
1087
+ uses_mrope ? 1 : 0, uses_non_causal ? 1 : 0));
1088
+
1089
+ console_log(env, "Multimodal context initialized successfully with mmproj: " + mmproj_path);
1090
+ return Napi::Boolean::New(env, true);
1091
+ }
1092
+
1093
+ // isMultimodalEnabled(): boolean
1094
+ Napi::Value LlamaContext::IsMultimodalEnabled(const Napi::CallbackInfo &info) {
1095
+ return Napi::Boolean::New(info.Env(), _has_multimodal && _mtmd_ctx != nullptr);
1096
+ }
1097
+
1098
+ // releaseMultimodal(): void
1099
+ void LlamaContext::ReleaseMultimodal(const Napi::CallbackInfo &info) {
1100
+ if (_mtmd_ctx != nullptr) {
1101
+ // Clear the mtmd context reference in the session
1102
+ if (_sess != nullptr) {
1103
+ _sess->set_mtmd_ctx(nullptr);
1104
+ }
1105
+
1106
+ // Free the mtmd context
1107
+ mtmd_free(_mtmd_ctx);
1108
+ _mtmd_ctx = nullptr;
1109
+ _has_multimodal = false;
1110
+ }
1111
+ }
@@ -1,10 +1,13 @@
1
1
  #include "common.hpp"
2
+ #include "tools/mtmd/mtmd.h"
3
+ #include "tools/mtmd/clip.h"
2
4
 
3
5
  class LlamaCompletionWorker;
4
6
 
5
7
  class LlamaContext : public Napi::ObjectWrap<LlamaContext> {
6
8
  public:
7
9
  LlamaContext(const Napi::CallbackInfo &info);
10
+ ~LlamaContext();
8
11
  static void ToggleNativeLog(const Napi::CallbackInfo &info);
9
12
  static Napi::Value ModelInfo(const Napi::CallbackInfo& info);
10
13
  static void Init(Napi::Env env, Napi::Object &exports);
@@ -24,6 +27,11 @@ private:
24
27
  void RemoveLoraAdapters(const Napi::CallbackInfo &info);
25
28
  Napi::Value GetLoadedLoraAdapters(const Napi::CallbackInfo &info);
26
29
  Napi::Value Release(const Napi::CallbackInfo &info);
30
+
31
+ // Multimodal methods
32
+ Napi::Value InitMultimodal(const Napi::CallbackInfo &info);
33
+ Napi::Value IsMultimodalEnabled(const Napi::CallbackInfo &info);
34
+ void ReleaseMultimodal(const Napi::CallbackInfo &info);
27
35
 
28
36
  std::string _info;
29
37
  Napi::Object _meta;
@@ -31,4 +39,8 @@ private:
31
39
  common_chat_templates_ptr _templates;
32
40
  std::vector<common_adapter_lora_info> _lora;
33
41
  LlamaCompletionWorker *_wip = nullptr;
42
+
43
+ // Multimodal support
44
+ mtmd_context *_mtmd_ctx = nullptr;
45
+ bool _has_multimodal = false;
34
46
  };
package/src/common.hpp CHANGED
@@ -4,6 +4,7 @@
4
4
  #include "common/sampling.h"
5
5
  #include "chat.h"
6
6
  #include "llama.h"
7
+ #include "tools/mtmd/mtmd.h"
7
8
  #include <memory>
8
9
  #include <mutex>
9
10
  #include <napi.h>
@@ -82,10 +83,23 @@ public:
82
83
  inline const common_params &params() const { return params_; }
83
84
 
84
85
  inline std::mutex &get_mutex() { return mutex; }
86
+
87
+ // Getter for the multimodal context
88
+ inline const mtmd_context* get_mtmd_ctx() const {
89
+ return _mtmd_ctx;
90
+ }
91
+
92
+ // Setter for the multimodal context
93
+ inline void set_mtmd_ctx(mtmd_context* ctx) {
94
+ _mtmd_ctx = ctx;
95
+ }
85
96
 
86
97
  void dispose() {
87
98
  std::lock_guard<std::mutex> lock(mutex);
88
99
  tokens_.clear();
100
+
101
+ // mtmd_ctx is owned by LlamaContext, so we don't free it here
102
+ _mtmd_ctx = nullptr;
89
103
  }
90
104
 
91
105
  private:
@@ -93,6 +107,7 @@ private:
93
107
  const common_params params_;
94
108
  std::vector<llama_token> tokens_{};
95
109
  std::mutex mutex;
110
+ mtmd_context* _mtmd_ctx = nullptr;
96
111
  };
97
112
 
98
113
  typedef std::shared_ptr<LlamaSession> LlamaSessionPtr;
@@ -0,0 +1,233 @@
1
+ name: Build on Linux using cross-compiler
2
+ on:
3
+ workflow_dispatch:
4
+ workflow_call:
5
+
6
+ jobs:
7
+ ubuntu-24-riscv64-cpu-cross:
8
+ runs-on: ubuntu-24.04
9
+
10
+ steps:
11
+ - uses: actions/checkout@v4
12
+ - name: Setup Riscv
13
+ run: |
14
+ sudo dpkg --add-architecture riscv64
15
+
16
+ # Add arch-specific repositories for non-amd64 architectures
17
+ cat << EOF | sudo tee /etc/apt/sources.list.d/riscv64-ports.list
18
+ deb [arch=riscv64] http://ports.ubuntu.com/ubuntu-ports/ noble main universe
19
+ deb [arch=riscv64] http://ports.ubuntu.com/ubuntu-ports/ noble-updates main universe
20
+ deb [arch=riscv64] http://ports.ubuntu.com/ubuntu-ports/ noble-security main universe
21
+ deb [arch=riscv64] http://ports.ubuntu.com/ubuntu-ports/ noble-backports main universe
22
+ EOF
23
+
24
+ sudo apt-get update || true ;# Prevent failure due to missing URLs.
25
+
26
+ sudo apt-get install -y --no-install-recommends \
27
+ build-essential \
28
+ gcc-14-riscv64-linux-gnu \
29
+ g++-14-riscv64-linux-gnu \
30
+ libcurl4-openssl-dev:riscv64
31
+
32
+ - name: Build
33
+ run: |
34
+ cmake -B build -DCMAKE_BUILD_TYPE=Release \
35
+ -DGGML_OPENMP=OFF \
36
+ -DLLAMA_BUILD_EXAMPLES=ON \
37
+ -DLLAMA_BUILD_TOOLS=ON \
38
+ -DLLAMA_BUILD_TESTS=OFF \
39
+ -DCMAKE_SYSTEM_NAME=Linux \
40
+ -DCMAKE_SYSTEM_PROCESSOR=riscv64 \
41
+ -DCMAKE_C_COMPILER=riscv64-linux-gnu-gcc-14 \
42
+ -DCMAKE_CXX_COMPILER=riscv64-linux-gnu-g++-14 \
43
+ -DCMAKE_POSITION_INDEPENDENT_CODE=ON \
44
+ -DCMAKE_FIND_ROOT_PATH=/usr/lib/riscv64-linux-gnu \
45
+ -DCMAKE_FIND_ROOT_PATH_MODE_PROGRAM=NEVER \
46
+ -DCMAKE_FIND_ROOT_PATH_MODE_LIBRARY=ONLY \
47
+ -DCMAKE_FIND_ROOT_PATH_MODE_INCLUDE=BOTH
48
+
49
+ cmake --build build --config Release -j $(nproc)
50
+
51
+ ubuntu-24-riscv64-vulkan-cross:
52
+ runs-on: ubuntu-24.04
53
+
54
+ steps:
55
+ - uses: actions/checkout@v4
56
+ - name: Setup Riscv
57
+ run: |
58
+ sudo dpkg --add-architecture riscv64
59
+
60
+ # Add arch-specific repositories for non-amd64 architectures
61
+ cat << EOF | sudo tee /etc/apt/sources.list.d/riscv64-ports.list
62
+ deb [arch=riscv64] http://ports.ubuntu.com/ubuntu-ports/ noble main universe
63
+ deb [arch=riscv64] http://ports.ubuntu.com/ubuntu-ports/ noble-updates main universe
64
+ deb [arch=riscv64] http://ports.ubuntu.com/ubuntu-ports/ noble-security main universe
65
+ deb [arch=riscv64] http://ports.ubuntu.com/ubuntu-ports/ noble-backports main universe
66
+ EOF
67
+
68
+ sudo apt-get update || true ;# Prevent failure due to missing URLs.
69
+
70
+ sudo apt-get install -y --no-install-recommends \
71
+ build-essential \
72
+ glslc \
73
+ gcc-14-riscv64-linux-gnu \
74
+ g++-14-riscv64-linux-gnu \
75
+ libvulkan-dev:riscv64 \
76
+ libcurl4-openssl-dev:riscv64
77
+
78
+ - name: Build
79
+ run: |
80
+ cmake -B build -DCMAKE_BUILD_TYPE=Release \
81
+ -DGGML_VULKAN=ON \
82
+ -DGGML_OPENMP=OFF \
83
+ -DLLAMA_BUILD_EXAMPLES=ON \
84
+ -DLLAMA_BUILD_TOOLS=ON \
85
+ -DLLAMA_BUILD_TESTS=OFF \
86
+ -DCMAKE_SYSTEM_NAME=Linux \
87
+ -DCMAKE_SYSTEM_PROCESSOR=riscv64 \
88
+ -DCMAKE_C_COMPILER=riscv64-linux-gnu-gcc-14 \
89
+ -DCMAKE_CXX_COMPILER=riscv64-linux-gnu-g++-14 \
90
+ -DCMAKE_POSITION_INDEPENDENT_CODE=ON \
91
+ -DCMAKE_FIND_ROOT_PATH=/usr/lib/riscv64-linux-gnu \
92
+ -DCMAKE_FIND_ROOT_PATH_MODE_PROGRAM=NEVER \
93
+ -DCMAKE_FIND_ROOT_PATH_MODE_LIBRARY=ONLY \
94
+ -DCMAKE_FIND_ROOT_PATH_MODE_INCLUDE=BOTH
95
+
96
+ cmake --build build --config Release -j $(nproc)
97
+
98
+ ubuntu-24-arm64-vulkan-cross:
99
+ runs-on: ubuntu-24.04
100
+
101
+ steps:
102
+ - uses: actions/checkout@v4
103
+ - name: Setup Arm64
104
+ run: |
105
+ sudo dpkg --add-architecture arm64
106
+
107
+ # Add arch-specific repositories for non-amd64 architectures
108
+ cat << EOF | sudo tee /etc/apt/sources.list.d/arm64-ports.list
109
+ deb [arch=arm64] http://ports.ubuntu.com/ubuntu-ports/ noble main universe
110
+ deb [arch=arm64] http://ports.ubuntu.com/ubuntu-ports/ noble-updates main universe
111
+ deb [arch=arm64] http://ports.ubuntu.com/ubuntu-ports/ noble-security main universe
112
+ deb [arch=arm64] http://ports.ubuntu.com/ubuntu-ports/ noble-backports main universe
113
+ EOF
114
+
115
+ sudo apt-get update || true ;# Prevent failure due to missing URLs.
116
+
117
+ sudo apt-get install -y --no-install-recommends \
118
+ build-essential \
119
+ glslc \
120
+ crossbuild-essential-arm64 \
121
+ libvulkan-dev:arm64 \
122
+ libcurl4-openssl-dev:arm64
123
+
124
+ - name: Build
125
+ run: |
126
+ cmake -B build -DCMAKE_BUILD_TYPE=Release \
127
+ -DGGML_VULKAN=ON \
128
+ -DGGML_OPENMP=OFF \
129
+ -DLLAMA_BUILD_EXAMPLES=ON \
130
+ -DLLAMA_BUILD_TOOLS=ON \
131
+ -DLLAMA_BUILD_TESTS=OFF \
132
+ -DCMAKE_SYSTEM_NAME=Linux \
133
+ -DCMAKE_SYSTEM_PROCESSOR=aarch64 \
134
+ -DCMAKE_C_COMPILER=aarch64-linux-gnu-gcc \
135
+ -DCMAKE_CXX_COMPILER=aarch64-linux-gnu-g++ \
136
+ -DCMAKE_POSITION_INDEPENDENT_CODE=ON \
137
+ -DCMAKE_FIND_ROOT_PATH=/usr/lib/aarch64-linux-gnu \
138
+ -DCMAKE_FIND_ROOT_PATH_MODE_PROGRAM=NEVER \
139
+ -DCMAKE_FIND_ROOT_PATH_MODE_LIBRARY=ONLY \
140
+ -DCMAKE_FIND_ROOT_PATH_MODE_INCLUDE=BOTH
141
+
142
+ cmake --build build --config Release -j $(nproc)
143
+
144
+ ubuntu-24-ppc64el-cpu-cross:
145
+ runs-on: ubuntu-24.04
146
+
147
+ steps:
148
+ - uses: actions/checkout@v4
149
+ - name: Setup PowerPC64le
150
+ run: |
151
+ sudo dpkg --add-architecture ppc64el
152
+
153
+ # Add arch-specific repositories for non-amd64 architectures
154
+ cat << EOF | sudo tee /etc/apt/sources.list.d/ppc64el-ports.list
155
+ deb [arch=ppc64el] http://ports.ubuntu.com/ubuntu-ports/ noble main universe
156
+ deb [arch=ppc64el] http://ports.ubuntu.com/ubuntu-ports/ noble-updates main universe
157
+ deb [arch=ppc64el] http://ports.ubuntu.com/ubuntu-ports/ noble-security main universe
158
+ deb [arch=ppc64el] http://ports.ubuntu.com/ubuntu-ports/ noble-backports main universe
159
+ EOF
160
+
161
+ sudo apt-get update || true ;# Prevent failure due to missing URLs.
162
+
163
+ sudo apt-get install -y --no-install-recommends \
164
+ build-essential \
165
+ gcc-14-powerpc64le-linux-gnu \
166
+ g++-14-powerpc64le-linux-gnu \
167
+ libcurl4-openssl-dev:ppc64el
168
+
169
+ - name: Build
170
+ run: |
171
+ cmake -B build -DCMAKE_BUILD_TYPE=Release \
172
+ -DGGML_OPENMP=OFF \
173
+ -DLLAMA_BUILD_EXAMPLES=ON \
174
+ -DLLAMA_BUILD_TOOLS=ON \
175
+ -DLLAMA_BUILD_TESTS=OFF \
176
+ -DCMAKE_SYSTEM_NAME=Linux \
177
+ -DCMAKE_SYSTEM_PROCESSOR=ppc64 \
178
+ -DCMAKE_C_COMPILER=powerpc64le-linux-gnu-gcc-14 \
179
+ -DCMAKE_CXX_COMPILER=powerpc64le-linux-gnu-g++-14 \
180
+ -DCMAKE_POSITION_INDEPENDENT_CODE=ON \
181
+ -DCMAKE_FIND_ROOT_PATH=/usr/lib/powerpc64le-linux-gnu \
182
+ -DCMAKE_FIND_ROOT_PATH_MODE_PROGRAM=NEVER \
183
+ -DCMAKE_FIND_ROOT_PATH_MODE_LIBRARY=ONLY \
184
+ -DCMAKE_FIND_ROOT_PATH_MODE_INCLUDE=BOTH
185
+
186
+ cmake --build build --config Release -j $(nproc)
187
+
188
+ ubuntu-24-ppc64el-vulkan-cross:
189
+ runs-on: ubuntu-24.04
190
+
191
+ steps:
192
+ - uses: actions/checkout@v4
193
+ - name: Setup PowerPC64le
194
+ run: |
195
+ sudo dpkg --add-architecture ppc64el
196
+
197
+ # Add arch-specific repositories for non-amd64 architectures
198
+ cat << EOF | sudo tee /etc/apt/sources.list.d/ppc64el-ports.list
199
+ deb [arch=ppc64el] http://ports.ubuntu.com/ubuntu-ports/ noble main universe
200
+ deb [arch=ppc64el] http://ports.ubuntu.com/ubuntu-ports/ noble-updates main universe
201
+ deb [arch=ppc64el] http://ports.ubuntu.com/ubuntu-ports/ noble-security main universe
202
+ deb [arch=ppc64el] http://ports.ubuntu.com/ubuntu-ports/ noble-backports main universe
203
+ EOF
204
+
205
+ sudo apt-get update || true ;# Prevent failure due to missing URLs.
206
+
207
+ sudo apt-get install -y --no-install-recommends \
208
+ build-essential \
209
+ glslc \
210
+ gcc-14-powerpc64le-linux-gnu \
211
+ g++-14-powerpc64le-linux-gnu \
212
+ libvulkan-dev:ppc64el \
213
+ libcurl4-openssl-dev:ppc64el
214
+
215
+ - name: Build
216
+ run: |
217
+ cmake -B build -DCMAKE_BUILD_TYPE=Release \
218
+ -DGGML_VULKAN=ON \
219
+ -DGGML_OPENMP=OFF \
220
+ -DLLAMA_BUILD_EXAMPLES=ON \
221
+ -DLLAMA_BUILD_TOOLS=ON \
222
+ -DLLAMA_BUILD_TESTS=OFF \
223
+ -DCMAKE_SYSTEM_NAME=Linux \
224
+ -DCMAKE_SYSTEM_PROCESSOR=ppc64 \
225
+ -DCMAKE_C_COMPILER=powerpc64le-linux-gnu-gcc-14 \
226
+ -DCMAKE_CXX_COMPILER=powerpc64le-linux-gnu-g++-14 \
227
+ -DCMAKE_POSITION_INDEPENDENT_CODE=ON \
228
+ -DCMAKE_FIND_ROOT_PATH=/usr/lib/powerpc64le-linux-gnu \
229
+ -DCMAKE_FIND_ROOT_PATH_MODE_PROGRAM=NEVER \
230
+ -DCMAKE_FIND_ROOT_PATH_MODE_LIBRARY=ONLY \
231
+ -DCMAKE_FIND_ROOT_PATH_MODE_INCLUDE=BOTH
232
+
233
+ cmake --build build --config Release -j $(nproc)