@fugood/llama.node 0.3.16 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (281) hide show
  1. package/CMakeLists.txt +6 -1
  2. package/bin/darwin/arm64/llama-node.node +0 -0
  3. package/bin/darwin/x64/llama-node.node +0 -0
  4. package/bin/linux/arm64/llama-node.node +0 -0
  5. package/bin/linux/x64/llama-node.node +0 -0
  6. package/bin/linux-cuda/arm64/llama-node.node +0 -0
  7. package/bin/linux-cuda/x64/llama-node.node +0 -0
  8. package/bin/linux-vulkan/arm64/llama-node.node +0 -0
  9. package/bin/linux-vulkan/x64/llama-node.node +0 -0
  10. package/bin/win32/arm64/llama-node.node +0 -0
  11. package/bin/win32/arm64/node.lib +0 -0
  12. package/bin/win32/x64/llama-node.node +0 -0
  13. package/bin/win32/x64/node.lib +0 -0
  14. package/bin/win32-vulkan/arm64/llama-node.node +0 -0
  15. package/bin/win32-vulkan/arm64/node.lib +0 -0
  16. package/bin/win32-vulkan/x64/llama-node.node +0 -0
  17. package/bin/win32-vulkan/x64/node.lib +0 -0
  18. package/lib/binding.ts +44 -2
  19. package/lib/index.js +132 -1
  20. package/lib/index.ts +203 -3
  21. package/package.json +2 -1
  22. package/src/EmbeddingWorker.cpp +1 -1
  23. package/src/LlamaCompletionWorker.cpp +374 -19
  24. package/src/LlamaCompletionWorker.h +31 -10
  25. package/src/LlamaContext.cpp +216 -7
  26. package/src/LlamaContext.h +12 -0
  27. package/src/common.hpp +15 -0
  28. package/src/llama.cpp/.github/workflows/build-linux-cross.yml +233 -0
  29. package/src/llama.cpp/.github/workflows/build.yml +89 -767
  30. package/src/llama.cpp/.github/workflows/docker.yml +9 -6
  31. package/src/llama.cpp/.github/workflows/release.yml +716 -0
  32. package/src/llama.cpp/.github/workflows/server.yml +19 -23
  33. package/src/llama.cpp/CMakeLists.txt +11 -1
  34. package/src/llama.cpp/cmake/build-info.cmake +8 -2
  35. package/src/llama.cpp/cmake/x64-windows-llvm.cmake +0 -6
  36. package/src/llama.cpp/common/CMakeLists.txt +35 -4
  37. package/src/llama.cpp/common/arg.cpp +844 -121
  38. package/src/llama.cpp/common/arg.h +9 -0
  39. package/src/llama.cpp/common/chat.cpp +129 -107
  40. package/src/llama.cpp/common/chat.h +2 -0
  41. package/src/llama.cpp/common/common.cpp +64 -518
  42. package/src/llama.cpp/common/common.h +35 -45
  43. package/src/llama.cpp/common/json-schema-to-grammar.cpp +3 -0
  44. package/src/llama.cpp/common/llguidance.cpp +31 -47
  45. package/src/llama.cpp/common/minja/chat-template.hpp +23 -11
  46. package/src/llama.cpp/common/minja/minja.hpp +186 -127
  47. package/src/llama.cpp/common/regex-partial.cpp +204 -0
  48. package/src/llama.cpp/common/regex-partial.h +56 -0
  49. package/src/llama.cpp/common/sampling.cpp +60 -50
  50. package/src/llama.cpp/docs/build.md +122 -7
  51. package/src/llama.cpp/examples/CMakeLists.txt +2 -32
  52. package/src/llama.cpp/examples/batched/batched.cpp +1 -1
  53. package/src/llama.cpp/examples/embedding/embedding.cpp +9 -12
  54. package/src/llama.cpp/examples/gritlm/gritlm.cpp +1 -1
  55. package/src/llama.cpp/examples/llama.android/llama/build.gradle.kts +1 -0
  56. package/src/llama.cpp/examples/parallel/parallel.cpp +89 -15
  57. package/src/llama.cpp/examples/passkey/passkey.cpp +1 -1
  58. package/src/llama.cpp/examples/speculative/speculative.cpp +1 -1
  59. package/src/llama.cpp/examples/speculative-simple/speculative-simple.cpp +1 -1
  60. package/src/llama.cpp/examples/sycl/build.sh +2 -2
  61. package/src/llama.cpp/examples/sycl/win-build-sycl.bat +2 -2
  62. package/src/llama.cpp/examples/training/CMakeLists.txt +5 -0
  63. package/src/llama.cpp/examples/training/finetune.cpp +96 -0
  64. package/src/llama.cpp/ggml/CMakeLists.txt +35 -2
  65. package/src/llama.cpp/ggml/cmake/GitVars.cmake +22 -0
  66. package/src/llama.cpp/ggml/include/ggml-backend.h +4 -4
  67. package/src/llama.cpp/ggml/include/ggml-cpp.h +1 -1
  68. package/src/llama.cpp/ggml/include/ggml-cpu.h +5 -0
  69. package/src/llama.cpp/ggml/include/ggml-opt.h +47 -28
  70. package/src/llama.cpp/ggml/include/ggml-rpc.h +6 -1
  71. package/src/llama.cpp/ggml/include/ggml.h +76 -106
  72. package/src/llama.cpp/ggml/src/CMakeLists.txt +11 -8
  73. package/src/llama.cpp/ggml/src/ggml-alloc.c +4 -1
  74. package/src/llama.cpp/ggml/src/ggml-backend.cpp +9 -5
  75. package/src/llama.cpp/ggml/src/ggml-cann/CMakeLists.txt +0 -2
  76. package/src/llama.cpp/ggml/src/ggml-cann/acl_tensor.cpp +8 -4
  77. package/src/llama.cpp/ggml/src/ggml-cann/acl_tensor.h +5 -5
  78. package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.cpp +692 -1534
  79. package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.h +613 -122
  80. package/src/llama.cpp/ggml/src/ggml-cann/common.h +135 -1
  81. package/src/llama.cpp/ggml/src/ggml-cann/ggml-cann.cpp +507 -137
  82. package/src/llama.cpp/ggml/src/ggml-common.h +12 -6
  83. package/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +66 -33
  84. package/src/llama.cpp/ggml/src/ggml-cpu/binary-ops.cpp +158 -0
  85. package/src/llama.cpp/ggml/src/ggml-cpu/binary-ops.h +16 -0
  86. package/src/llama.cpp/ggml/src/ggml-cpu/common.h +72 -0
  87. package/src/llama.cpp/ggml/src/ggml-cpu/cpu-feats-x86.cpp +1 -1
  88. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp +896 -194
  89. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-impl.h +2 -21
  90. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.c +1060 -410
  91. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +1008 -13533
  92. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +31 -16
  93. package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.cpp +90 -12
  94. package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.h +47 -13
  95. package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +266 -72
  96. package/src/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm.cpp +1034 -88
  97. package/src/llama.cpp/ggml/src/ggml-cpu/ops.cpp +8796 -0
  98. package/src/llama.cpp/ggml/src/ggml-cpu/ops.h +110 -0
  99. package/src/llama.cpp/ggml/src/ggml-cpu/simd-mappings.h +892 -0
  100. package/src/llama.cpp/ggml/src/ggml-cpu/unary-ops.cpp +186 -0
  101. package/src/llama.cpp/ggml/src/ggml-cpu/unary-ops.h +28 -0
  102. package/src/llama.cpp/ggml/src/ggml-cpu/vec.cpp +252 -0
  103. package/src/llama.cpp/ggml/src/ggml-cpu/vec.h +802 -0
  104. package/src/llama.cpp/ggml/src/ggml-cuda/CMakeLists.txt +23 -4
  105. package/src/llama.cpp/ggml/src/ggml-cuda/vendors/hip.h +7 -0
  106. package/src/llama.cpp/ggml/src/ggml-cuda/vendors/musa.h +1 -0
  107. package/src/llama.cpp/ggml/src/ggml-hip/CMakeLists.txt +0 -4
  108. package/src/llama.cpp/ggml/src/ggml-impl.h +52 -18
  109. package/src/llama.cpp/ggml/src/ggml-metal/ggml-metal-impl.h +106 -14
  110. package/src/llama.cpp/ggml/src/ggml-opencl/CMakeLists.txt +67 -119
  111. package/src/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +1023 -262
  112. package/src/llama.cpp/ggml/src/ggml-opt.cpp +368 -190
  113. package/src/llama.cpp/ggml/src/ggml-quants.c +0 -6
  114. package/src/llama.cpp/ggml/src/ggml-rpc/ggml-rpc.cpp +307 -40
  115. package/src/llama.cpp/ggml/src/ggml-sycl/CMakeLists.txt +125 -45
  116. package/src/llama.cpp/ggml/src/ggml-sycl/backend.hpp +10 -8
  117. package/src/llama.cpp/ggml/src/ggml-sycl/binbcast.cpp +239 -0
  118. package/src/llama.cpp/ggml/src/ggml-sycl/binbcast.hpp +39 -0
  119. package/src/llama.cpp/ggml/src/ggml-sycl/common.cpp +0 -35
  120. package/src/llama.cpp/ggml/src/ggml-sycl/common.hpp +9 -307
  121. package/src/llama.cpp/ggml/src/ggml-sycl/convert.cpp +72 -25
  122. package/src/llama.cpp/ggml/src/ggml-sycl/convert.hpp +14 -7
  123. package/src/llama.cpp/ggml/src/ggml-sycl/dequantize.hpp +59 -21
  124. package/src/llama.cpp/ggml/src/ggml-sycl/dmmv.cpp +7 -1
  125. package/src/llama.cpp/ggml/src/ggml-sycl/dpct/helper.hpp +79 -90
  126. package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.cpp +944 -438
  127. package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.hpp +22 -23
  128. package/src/llama.cpp/ggml/src/ggml-sycl/gemm.hpp +37 -8
  129. package/src/llama.cpp/ggml/src/ggml-sycl/getrows.cpp +24 -20
  130. package/src/llama.cpp/ggml/src/ggml-sycl/getrows.hpp +1 -4
  131. package/src/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +507 -411
  132. package/src/llama.cpp/ggml/src/ggml-sycl/im2col.cpp +84 -74
  133. package/src/llama.cpp/ggml/src/ggml-sycl/im2col.hpp +1 -3
  134. package/src/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +185 -89
  135. package/src/llama.cpp/ggml/src/ggml-sycl/norm.cpp +37 -49
  136. package/src/llama.cpp/ggml/src/ggml-sycl/norm.hpp +7 -22
  137. package/src/llama.cpp/ggml/src/ggml-sycl/outprod.cpp +4 -14
  138. package/src/llama.cpp/ggml/src/ggml-sycl/quants.hpp +83 -0
  139. package/src/llama.cpp/ggml/src/ggml-sycl/rope.cpp +204 -118
  140. package/src/llama.cpp/ggml/src/ggml-sycl/rope.hpp +1 -3
  141. package/src/llama.cpp/ggml/src/ggml-sycl/vecdotq.hpp +128 -53
  142. package/src/llama.cpp/ggml/src/ggml-vulkan/CMakeLists.txt +83 -49
  143. package/src/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +1278 -282
  144. package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/CMakeLists.txt +32 -0
  145. package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +133 -30
  146. package/src/llama.cpp/ggml/src/ggml.c +170 -265
  147. package/src/llama.cpp/ggml/src/gguf.cpp +34 -33
  148. package/src/llama.cpp/include/llama.h +82 -22
  149. package/src/llama.cpp/models/ggml-vocab-llama4.gguf.inp +112 -0
  150. package/src/llama.cpp/models/ggml-vocab-llama4.gguf.out +46 -0
  151. package/src/llama.cpp/models/ggml-vocab-pixtral.gguf.inp +112 -0
  152. package/src/llama.cpp/models/ggml-vocab-pixtral.gguf.out +46 -0
  153. package/src/llama.cpp/requirements/requirements-all.txt +5 -3
  154. package/src/llama.cpp/requirements/requirements-gguf_editor_gui.txt +3 -0
  155. package/src/llama.cpp/scripts/xxd.cmake +1 -1
  156. package/src/llama.cpp/src/CMakeLists.txt +4 -2
  157. package/src/llama.cpp/src/llama-adapter.cpp +43 -1
  158. package/src/llama.cpp/src/llama-arch.cpp +163 -17
  159. package/src/llama.cpp/src/llama-arch.h +16 -0
  160. package/src/llama.cpp/src/llama-batch.cpp +5 -1
  161. package/src/llama.cpp/src/llama-batch.h +2 -1
  162. package/src/llama.cpp/src/llama-chat.cpp +91 -16
  163. package/src/llama.cpp/src/llama-chat.h +7 -2
  164. package/src/llama.cpp/src/llama-context.cpp +479 -575
  165. package/src/llama.cpp/src/llama-context.h +44 -33
  166. package/src/llama.cpp/src/llama-cparams.h +1 -0
  167. package/src/llama.cpp/src/llama-graph.cpp +209 -157
  168. package/src/llama.cpp/src/llama-graph.h +38 -14
  169. package/src/llama.cpp/src/llama-hparams.h +13 -0
  170. package/src/llama.cpp/src/llama-kv-cache.cpp +1604 -543
  171. package/src/llama.cpp/src/llama-kv-cache.h +283 -171
  172. package/src/llama.cpp/src/llama-memory.h +12 -2
  173. package/src/llama.cpp/src/llama-mmap.cpp +1 -1
  174. package/src/llama.cpp/src/llama-model-loader.cpp +34 -20
  175. package/src/llama.cpp/src/llama-model-loader.h +5 -3
  176. package/src/llama.cpp/src/llama-model-saver.cpp +281 -0
  177. package/src/llama.cpp/src/llama-model-saver.h +37 -0
  178. package/src/llama.cpp/src/llama-model.cpp +1803 -330
  179. package/src/llama.cpp/src/llama-model.h +21 -2
  180. package/src/llama.cpp/src/llama-quant.cpp +33 -10
  181. package/src/llama.cpp/src/llama-sampling.cpp +25 -7
  182. package/src/llama.cpp/src/llama-vocab.cpp +86 -10
  183. package/src/llama.cpp/src/llama-vocab.h +6 -0
  184. package/src/llama.cpp/src/llama.cpp +15 -1
  185. package/src/llama.cpp/tests/CMakeLists.txt +52 -31
  186. package/src/llama.cpp/tests/test-arg-parser.cpp +51 -4
  187. package/src/llama.cpp/tests/test-backend-ops.cpp +189 -90
  188. package/src/llama.cpp/tests/test-chat-template.cpp +26 -6
  189. package/src/llama.cpp/tests/test-chat.cpp +15 -3
  190. package/src/llama.cpp/{examples/gbnf-validator/gbnf-validator.cpp → tests/test-gbnf-validator.cpp} +2 -2
  191. package/src/llama.cpp/tests/test-grammar-integration.cpp +3 -2
  192. package/src/llama.cpp/tests/test-grammar-llguidance.cpp +63 -2
  193. package/src/llama.cpp/tests/test-grammar-parser.cpp +3 -1
  194. package/src/llama.cpp/tests/test-json-schema-to-grammar.cpp +17 -1
  195. package/src/llama.cpp/tests/test-llama-grammar.cpp +2 -1
  196. package/src/llama.cpp/tests/test-mtmd-c-api.c +63 -0
  197. package/src/llama.cpp/tests/test-opt.cpp +33 -21
  198. package/src/llama.cpp/{examples/quantize-stats/quantize-stats.cpp → tests/test-quantize-stats.cpp} +3 -1
  199. package/src/llama.cpp/tests/test-regex-partial.cpp +288 -0
  200. package/src/llama.cpp/tests/test-sampling.cpp +1 -1
  201. package/src/llama.cpp/tests/test-tokenizer-1-bpe.cpp +2 -1
  202. package/src/llama.cpp/tests/test-tokenizer-1-spm.cpp +2 -1
  203. package/src/llama.cpp/tools/CMakeLists.txt +39 -0
  204. package/src/llama.cpp/{examples → tools}/batched-bench/batched-bench.cpp +3 -3
  205. package/src/llama.cpp/{examples → tools}/export-lora/export-lora.cpp +1 -1
  206. package/src/llama.cpp/{examples → tools}/gguf-split/gguf-split.cpp +15 -16
  207. package/src/llama.cpp/{examples → tools}/imatrix/imatrix.cpp +11 -9
  208. package/src/llama.cpp/{examples → tools}/llama-bench/llama-bench.cpp +623 -274
  209. package/src/llama.cpp/{examples → tools}/main/main.cpp +22 -14
  210. package/src/llama.cpp/tools/mtmd/CMakeLists.txt +47 -0
  211. package/src/llama.cpp/tools/mtmd/clip-impl.h +365 -0
  212. package/src/llama.cpp/tools/mtmd/clip.cpp +3646 -0
  213. package/src/llama.cpp/tools/mtmd/clip.h +99 -0
  214. package/src/llama.cpp/tools/mtmd/deprecation-warning.cpp +22 -0
  215. package/src/llama.cpp/tools/mtmd/mtmd-cli.cpp +370 -0
  216. package/src/llama.cpp/tools/mtmd/mtmd-helper.cpp +310 -0
  217. package/src/llama.cpp/tools/mtmd/mtmd.cpp +678 -0
  218. package/src/llama.cpp/tools/mtmd/mtmd.h +331 -0
  219. package/src/llama.cpp/{examples → tools}/perplexity/perplexity.cpp +21 -5
  220. package/src/llama.cpp/{examples → tools}/quantize/quantize.cpp +53 -3
  221. package/src/llama.cpp/tools/rpc/CMakeLists.txt +4 -0
  222. package/src/llama.cpp/tools/rpc/rpc-server.cpp +322 -0
  223. package/src/llama.cpp/tools/run/CMakeLists.txt +16 -0
  224. package/src/llama.cpp/{examples → tools}/run/run.cpp +30 -30
  225. package/src/llama.cpp/{examples → tools}/server/CMakeLists.txt +2 -1
  226. package/src/llama.cpp/{examples → tools}/server/httplib.h +313 -247
  227. package/src/llama.cpp/{examples → tools}/server/server.cpp +529 -215
  228. package/src/llama.cpp/{examples → tools}/server/utils.hpp +427 -6
  229. package/src/llama.cpp/{examples → tools}/tts/tts.cpp +6 -9
  230. package/src/llama.cpp/cmake/arm64-windows-msvc.cmake +0 -6
  231. package/src/llama.cpp/examples/gbnf-validator/CMakeLists.txt +0 -5
  232. package/src/llama.cpp/examples/infill/CMakeLists.txt +0 -5
  233. package/src/llama.cpp/examples/infill/infill.cpp +0 -590
  234. package/src/llama.cpp/examples/llava/CMakeLists.txt +0 -66
  235. package/src/llama.cpp/examples/llava/android/build_64.sh +0 -8
  236. package/src/llama.cpp/examples/llava/clip-quantize-cli.cpp +0 -59
  237. package/src/llama.cpp/examples/llava/clip.cpp +0 -3206
  238. package/src/llama.cpp/examples/llava/clip.h +0 -118
  239. package/src/llama.cpp/examples/llava/gemma3-cli.cpp +0 -341
  240. package/src/llama.cpp/examples/llava/llava-cli.cpp +0 -332
  241. package/src/llama.cpp/examples/llava/llava.cpp +0 -574
  242. package/src/llama.cpp/examples/llava/llava.h +0 -49
  243. package/src/llama.cpp/examples/llava/minicpmv-cli.cpp +0 -354
  244. package/src/llama.cpp/examples/llava/qwen2vl-cli.cpp +0 -584
  245. package/src/llama.cpp/examples/quantize-stats/CMakeLists.txt +0 -6
  246. package/src/llama.cpp/examples/rpc/CMakeLists.txt +0 -2
  247. package/src/llama.cpp/examples/rpc/rpc-server.cpp +0 -171
  248. package/src/llama.cpp/examples/run/CMakeLists.txt +0 -5
  249. package/src/llama.cpp/ggml/src/ggml-cann/kernels/CMakeLists.txt +0 -30
  250. package/src/llama.cpp/ggml/src/ggml-cann/kernels/ascendc_kernels.h +0 -19
  251. package/src/llama.cpp/ggml/src/ggml-cann/kernels/dup.cpp +0 -234
  252. package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_f16.cpp +0 -197
  253. package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_f32.cpp +0 -190
  254. package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_q4_0.cpp +0 -204
  255. package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_q8_0.cpp +0 -191
  256. package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_f16_q8_0.cpp +0 -218
  257. package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_f32_q8_0.cpp +0 -216
  258. package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_float_to_q4_0.cpp +0 -295
  259. /package/src/llama.cpp/{examples → tools}/batched-bench/CMakeLists.txt +0 -0
  260. /package/src/llama.cpp/{examples → tools}/cvector-generator/CMakeLists.txt +0 -0
  261. /package/src/llama.cpp/{examples → tools}/cvector-generator/completions.txt +0 -0
  262. /package/src/llama.cpp/{examples → tools}/cvector-generator/cvector-generator.cpp +0 -0
  263. /package/src/llama.cpp/{examples → tools}/cvector-generator/mean.hpp +0 -0
  264. /package/src/llama.cpp/{examples → tools}/cvector-generator/negative.txt +0 -0
  265. /package/src/llama.cpp/{examples → tools}/cvector-generator/pca.hpp +0 -0
  266. /package/src/llama.cpp/{examples → tools}/cvector-generator/positive.txt +0 -0
  267. /package/src/llama.cpp/{examples → tools}/export-lora/CMakeLists.txt +0 -0
  268. /package/src/llama.cpp/{examples → tools}/gguf-split/CMakeLists.txt +0 -0
  269. /package/src/llama.cpp/{examples → tools}/imatrix/CMakeLists.txt +0 -0
  270. /package/src/llama.cpp/{examples → tools}/llama-bench/CMakeLists.txt +0 -0
  271. /package/src/llama.cpp/{examples → tools}/main/CMakeLists.txt +0 -0
  272. /package/src/llama.cpp/{examples/llava → tools/mtmd}/requirements.txt +0 -0
  273. /package/src/llama.cpp/{examples → tools}/perplexity/CMakeLists.txt +0 -0
  274. /package/src/llama.cpp/{examples → tools}/quantize/CMakeLists.txt +0 -0
  275. /package/src/llama.cpp/{examples → tools}/run/linenoise.cpp/linenoise.cpp +0 -0
  276. /package/src/llama.cpp/{examples → tools}/run/linenoise.cpp/linenoise.h +0 -0
  277. /package/src/llama.cpp/{examples → tools}/server/bench/requirements.txt +0 -0
  278. /package/src/llama.cpp/{examples → tools}/server/tests/requirements.txt +0 -0
  279. /package/src/llama.cpp/{examples → tools}/tokenize/CMakeLists.txt +0 -0
  280. /package/src/llama.cpp/{examples → tools}/tokenize/tokenize.cpp +0 -0
  281. /package/src/llama.cpp/{examples → tools}/tts/CMakeLists.txt +0 -0
@@ -1,118 +0,0 @@
1
- #ifndef CLIP_H
2
- #define CLIP_H
3
-
4
- #include <stddef.h>
5
- #include <stdint.h>
6
-
7
- #ifdef LLAMA_SHARED
8
- # if defined(_WIN32) && !defined(__MINGW32__)
9
- # ifdef LLAMA_BUILD
10
- # define CLIP_API __declspec(dllexport)
11
- # else
12
- # define CLIP_API __declspec(dllimport)
13
- # endif
14
- # else
15
- # define CLIP_API __attribute__ ((visibility ("default")))
16
- # endif
17
- #else
18
- # define CLIP_API
19
- #endif
20
-
21
- #ifdef __cplusplus
22
- extern "C" {
23
- #endif
24
-
25
- struct clip_ctx;
26
-
27
- struct clip_image_size {
28
- int width;
29
- int height;
30
- };
31
-
32
- struct clip_image_u8_batch {
33
- struct clip_image_u8 * data;
34
- size_t size;
35
- };
36
-
37
- struct clip_image_f32_batch {
38
- struct clip_image_f32 * data;
39
- size_t size;
40
- };
41
-
42
- struct clip_context_params {
43
- bool use_gpu;
44
- int verbosity;
45
- };
46
-
47
- // deprecated, use clip_init
48
- CLIP_API struct clip_ctx * clip_model_load(const char * fname, int verbosity);
49
-
50
- CLIP_API struct clip_ctx * clip_init(const char * fname, struct clip_context_params ctx_params);
51
-
52
- CLIP_API void clip_free(struct clip_ctx * ctx);
53
-
54
- CLIP_API size_t clip_embd_nbytes(const struct clip_ctx * ctx);
55
- CLIP_API size_t clip_embd_nbytes_by_img(const struct clip_ctx * ctx, int img_h, int img_w);
56
-
57
- CLIP_API int32_t clip_image_size (const struct clip_ctx * ctx);
58
- CLIP_API int32_t clip_patch_size (const struct clip_ctx * ctx);
59
- CLIP_API int32_t clip_hidden_size(const struct clip_ctx * ctx);
60
-
61
- // TODO: should be enum, not string
62
- CLIP_API const char * clip_patch_merge_type(const struct clip_ctx * ctx);
63
-
64
- CLIP_API const int32_t * clip_image_grid(const struct clip_ctx * ctx);
65
- CLIP_API size_t get_clip_image_grid_size(const struct clip_ctx * ctx);
66
-
67
- CLIP_API int clip_n_patches (const struct clip_ctx * ctx);
68
- CLIP_API int clip_n_patches_by_img (const struct clip_ctx * ctx, struct clip_image_f32 * img);
69
- CLIP_API int clip_n_mmproj_embd (const struct clip_ctx * ctx);
70
-
71
- CLIP_API int clip_uhd_num_image_embeds_col(struct clip_ctx * ctx_clip);
72
- CLIP_API void clip_add_load_image_size(struct clip_ctx * ctx_clip, struct clip_image_size * load_image_size);
73
- CLIP_API struct clip_image_size * clip_get_load_image_size(struct clip_ctx * ctx_clip);
74
-
75
- CLIP_API struct clip_image_size * clip_image_size_init();
76
- CLIP_API struct clip_image_u8 * clip_image_u8_init ();
77
- CLIP_API struct clip_image_f32 * clip_image_f32_init();
78
-
79
- CLIP_API void clip_image_u8_free (struct clip_image_u8 * img);
80
- CLIP_API void clip_image_f32_free(struct clip_image_f32 * img);
81
- CLIP_API void clip_image_u8_batch_free (struct clip_image_u8_batch * batch);
82
- CLIP_API void clip_image_f32_batch_free(struct clip_image_f32_batch * batch);
83
-
84
- /**
85
- * Build image from pixels decoded by other libraries instead of stb_image.h for better performance.
86
- * The memory layout is RGBRGBRGB..., input buffer length must be 3*nx*ny bytes
87
- */
88
- CLIP_API void clip_build_img_from_pixels(const unsigned char * rgb_pixels, int nx, int ny, struct clip_image_u8 * img);
89
-
90
- CLIP_API bool clip_image_load_from_file(const char * fname, struct clip_image_u8 * img);
91
-
92
- /** interpret bytes as an image file with length bytes_length, and use the result to populate img */
93
- CLIP_API bool clip_image_load_from_bytes(const unsigned char * bytes, size_t bytes_length, struct clip_image_u8 * img);
94
-
95
- /** preprocess img and store the result in res_imgs, pad_to_square may be overridden to false depending on model configuration */
96
- CLIP_API bool clip_image_preprocess(struct clip_ctx * ctx, const struct clip_image_u8 * img, struct clip_image_f32_batch * res_imgs );
97
-
98
- CLIP_API struct ggml_tensor * clip_get_newline_tensor(const struct clip_ctx * ctx);
99
-
100
- CLIP_API bool clip_image_encode (struct clip_ctx * ctx, int n_threads, struct clip_image_f32 * img, float * vec);
101
- CLIP_API bool clip_image_batch_encode(struct clip_ctx * ctx, int n_threads, const struct clip_image_f32_batch * imgs, float * vec);
102
-
103
- CLIP_API bool clip_model_quantize(const char * fname_inp, const char * fname_out, int itype);
104
-
105
- CLIP_API int clip_is_minicpmv(const struct clip_ctx * ctx);
106
- CLIP_API bool clip_is_glm(const struct clip_ctx * ctx);
107
- CLIP_API bool clip_is_qwen2vl(const struct clip_ctx * ctx);
108
-
109
- CLIP_API int get_deepest_feature_layer(const struct clip_ctx * ctx);
110
-
111
- CLIP_API bool clip_encode_float_image (struct clip_ctx * ctx, int n_threads, float * img, int h, int w, float * vec);
112
-
113
-
114
- #ifdef __cplusplus
115
- }
116
- #endif
117
-
118
- #endif // CLIP_H
@@ -1,341 +0,0 @@
1
- #include "arg.h"
2
- #include "log.h"
3
- #include "common.h"
4
- #include "sampling.h"
5
- #include "clip.h"
6
- #include "stb_image.h"
7
- #include "llama.h"
8
- #include "ggml.h"
9
- #include "console.h"
10
-
11
- #include <vector>
12
- #include <limits.h>
13
- #include <inttypes.h>
14
-
15
- #if defined (__unix__) || (defined (__APPLE__) && defined (__MACH__))
16
- #include <signal.h>
17
- #include <unistd.h>
18
- #elif defined (_WIN32)
19
- #define WIN32_LEAN_AND_MEAN
20
- #ifndef NOMINMAX
21
- #define NOMINMAX
22
- #endif
23
- #include <windows.h>
24
- #include <signal.h>
25
- #endif
26
-
27
- static bool g_is_generating = false;
28
-
29
- /**
30
- * Please note that this is NOT a production-ready stuff.
31
- * It is a playground for trying Gemma 3 vision capabilities.
32
- * For contributors: please keep this code simple and easy to understand.
33
- */
34
-
35
- static void show_additional_info(int /*argc*/, char ** argv) {
36
- LOG(
37
- "Experimental CLI for using Gemma 3 vision model\n\n"
38
- "Usage: %s [options] -m <model> --mmproj <mmproj> --image <image> -p <prompt>\n\n"
39
- " -m and --mmproj are required\n"
40
- " --image and -p are optional, if NOT provided, the CLI will run in chat mode\n",
41
- argv[0]
42
- );
43
- }
44
-
45
- #if defined (__unix__) || (defined (__APPLE__) && defined (__MACH__)) || defined (_WIN32)
46
- static void sigint_handler(int signo) {
47
- if (signo == SIGINT) {
48
- if (g_is_generating) {
49
- g_is_generating = false;
50
- } else {
51
- console::cleanup();
52
- LOG("\nInterrupted by user\n");
53
- _exit(130);
54
- }
55
- }
56
- }
57
- #endif
58
-
59
- struct gemma3_context {
60
- struct clip_ctx * ctx_clip = NULL;
61
- common_init_result llama_init;
62
-
63
- llama_model * model;
64
- llama_context * lctx;
65
- const llama_vocab * vocab;
66
- llama_batch batch;
67
-
68
- int n_threads = 1;
69
- llama_pos n_past = 0;
70
-
71
- gemma3_context(common_params & params) : llama_init(common_init_from_params(params)) {
72
- model = llama_init.model.get();
73
- lctx = llama_init.context.get();
74
- vocab = llama_model_get_vocab(model);
75
- n_threads = params.cpuparams.n_threads;
76
- batch = llama_batch_init(params.n_batch, 0, 1);
77
- init_clip_model(params);
78
- }
79
-
80
- void init_clip_model(common_params & params) {
81
- const char * clip_path = params.mmproj.c_str();
82
- ctx_clip = clip_model_load(clip_path, params.verbosity > 1);
83
- }
84
-
85
- ~gemma3_context() {
86
- clip_free(ctx_clip);
87
- }
88
- };
89
-
90
- struct decode_embd_batch {
91
- std::vector<llama_pos> pos;
92
- std::vector<int32_t> n_seq_id;
93
- std::vector<llama_seq_id> seq_id_0;
94
- std::vector<llama_seq_id *> seq_ids;
95
- std::vector<int8_t> logits;
96
- llama_batch batch;
97
- decode_embd_batch(float * embd, int32_t n_tokens, llama_pos pos_0, llama_seq_id seq_id) {
98
- pos .resize(n_tokens);
99
- n_seq_id.resize(n_tokens);
100
- seq_ids .resize(n_tokens + 1);
101
- logits .resize(n_tokens);
102
- seq_id_0.resize(1);
103
- seq_id_0[0] = seq_id;
104
- seq_ids [n_tokens] = nullptr;
105
- batch = {
106
- /*n_tokens =*/ n_tokens,
107
- /*tokens =*/ nullptr,
108
- /*embd =*/ embd,
109
- /*pos =*/ pos.data(),
110
- /*n_seq_id =*/ n_seq_id.data(),
111
- /*seq_id =*/ seq_ids.data(),
112
- /*logits =*/ logits.data(),
113
- };
114
- for (int i = 0; i < n_tokens; i++) {
115
- batch.pos [i] = pos_0 + i;
116
- batch.n_seq_id[i] = 1;
117
- batch.seq_id [i] = seq_id_0.data();
118
- batch.logits [i] = false;
119
- }
120
- }
121
- };
122
-
123
- static int eval_text(gemma3_context & ctx, std::string input, bool logits_last = false) {
124
- llama_tokens tokens = common_tokenize(ctx.lctx, input, false, true);
125
- common_batch_clear(ctx.batch);
126
- for (llama_token & t : tokens) {
127
- common_batch_add(ctx.batch, t, ctx.n_past++, {0}, false);
128
- }
129
- if (logits_last) {
130
- ctx.batch.logits[ctx.batch.n_tokens - 1] = true;
131
- }
132
- // LOG("eval_text (n_tokens = %d): %s\n", (int)tokens.size(), input.c_str());
133
- if (llama_decode(ctx.lctx, ctx.batch)) {
134
- LOG_ERR("Failed to decode text\n");
135
- return 1;
136
- }
137
- return 0;
138
- }
139
-
140
- static int eval_image(gemma3_context & ctx, std::string & fname) {
141
- std::vector<float> image_embd_v;
142
- int n_embd = llama_model_n_embd(ctx.model);
143
- int n_tokens = 256;
144
- image_embd_v.resize(n_tokens * n_embd);
145
-
146
- bool ok;
147
- struct clip_image_u8 * img_u8 = clip_image_u8_init();
148
- ok = clip_image_load_from_file(fname.c_str(), img_u8);
149
- if (!ok) {
150
- LOG_ERR("Unable to load image %s\n", fname.c_str());
151
- clip_image_u8_free(img_u8);
152
- return 2; // non-fatal error
153
- }
154
-
155
- clip_image_f32_batch batch_f32;
156
- ok = clip_image_preprocess(ctx.ctx_clip, img_u8, &batch_f32);
157
- if (!ok) {
158
- LOG_ERR("Unable to preprocess image\n");
159
- clip_image_f32_batch_free(&batch_f32);
160
- clip_image_u8_free(img_u8);
161
- return 1;
162
- }
163
-
164
- int64_t t0 = ggml_time_ms();
165
- LOG("Encoding image %s\n", fname.c_str());
166
- ok = clip_image_batch_encode(ctx.ctx_clip, ctx.n_threads, &batch_f32, image_embd_v.data());
167
- if (!ok) {
168
- LOG_ERR("Unable to encode image\n");
169
- clip_image_f32_batch_free(&batch_f32);
170
- clip_image_u8_free(img_u8);
171
- return 1;
172
- }
173
- LOG("Image encoded in %" PRId64 " ms\n", ggml_time_ms() - t0);
174
-
175
- clip_image_f32_batch_free(&batch_f32);
176
- clip_image_u8_free(img_u8);
177
-
178
- // decode image embeddings
179
- int64_t t1 = ggml_time_ms();
180
- eval_text(ctx, "<start_of_image>");
181
- llama_set_causal_attn(ctx.lctx, false);
182
- decode_embd_batch batch_img(image_embd_v.data(), n_tokens, ctx.n_past, 0);
183
- if (llama_decode(ctx.lctx, batch_img.batch)) {
184
- LOG_ERR("failed to decode image\n");
185
- return 1;
186
- }
187
- ctx.n_past += n_tokens;
188
- llama_set_causal_attn(ctx.lctx, true);
189
- eval_text(ctx, "<end_of_image>");
190
- LOG("Image decoded in %" PRId64 " ms\n", ggml_time_ms() - t1);
191
- return 0;
192
- }
193
-
194
- static int generate_response(gemma3_context & ctx, common_sampler * smpl, int n_predict) {
195
- for (int i = 0; i < n_predict; i++) {
196
- if (i > n_predict || !g_is_generating) {
197
- printf("\n");
198
- break;
199
- }
200
-
201
- llama_token token_id = common_sampler_sample(smpl, ctx.lctx, -1);
202
- common_sampler_accept(smpl, token_id, true);
203
-
204
- if (llama_vocab_is_eog(ctx.vocab, token_id)) {
205
- printf("\n");
206
- break; // end of generation
207
- }
208
-
209
- printf("%s", common_token_to_piece(ctx.lctx, token_id).c_str());
210
- fflush(stdout);
211
-
212
- // eval the token
213
- common_batch_clear(ctx.batch);
214
- common_batch_add(ctx.batch, token_id, ctx.n_past++, {0}, true);
215
- if (llama_decode(ctx.lctx, ctx.batch)) {
216
- LOG_ERR("failed to decode token\n");
217
- return 1;
218
- }
219
- }
220
- return 0;
221
- }
222
-
223
- int main(int argc, char ** argv) {
224
- ggml_time_init();
225
-
226
- common_params params;
227
- params.sampling.temp = 0.2; // lower temp by default for better quality
228
-
229
- if (!common_params_parse(argc, argv, params, LLAMA_EXAMPLE_LLAVA, show_additional_info)) {
230
- return 1;
231
- }
232
-
233
- common_init();
234
-
235
- if (params.mmproj.empty()) {
236
- show_additional_info(argc, argv);
237
- return 1;
238
- }
239
-
240
- gemma3_context ctx(params);
241
- printf("%s: %s\n", __func__, params.model.c_str());
242
-
243
- bool is_single_turn = !params.prompt.empty() && !params.image.empty();
244
-
245
- struct common_sampler * smpl = common_sampler_init(ctx.model, params.sampling);
246
- int n_predict = params.n_predict < 0 ? INT_MAX : params.n_predict;
247
-
248
- // ctrl+C handling
249
- {
250
- #if defined (__unix__) || (defined (__APPLE__) && defined (__MACH__))
251
- struct sigaction sigint_action;
252
- sigint_action.sa_handler = sigint_handler;
253
- sigemptyset (&sigint_action.sa_mask);
254
- sigint_action.sa_flags = 0;
255
- sigaction(SIGINT, &sigint_action, NULL);
256
- #elif defined (_WIN32)
257
- auto console_ctrl_handler = +[](DWORD ctrl_type) -> BOOL {
258
- return (ctrl_type == CTRL_C_EVENT) ? (sigint_handler(SIGINT), true) : false;
259
- };
260
- SetConsoleCtrlHandler(reinterpret_cast<PHANDLER_ROUTINE>(console_ctrl_handler), true);
261
- #endif
262
- }
263
-
264
- if (eval_text(ctx, "<bos>")) {
265
- return 1;
266
- }
267
-
268
- if (is_single_turn) {
269
- g_is_generating = true;
270
- if (eval_text(ctx, "<start_of_turn>user\n")) {
271
- return 1;
272
- }
273
- for (auto & fname : params.image) {
274
- if (eval_image(ctx, fname)) {
275
- return 1;
276
- }
277
- }
278
- if (eval_text(ctx, params.prompt + "<end_of_turn><start_of_turn>model\n", true)) {
279
- return 1;
280
- }
281
- if (generate_response(ctx, smpl, n_predict)) {
282
- return 1;
283
- }
284
-
285
- } else {
286
- LOG("\n Running in chat mode, available commands:");
287
- LOG("\n /image <path> load an image");
288
- LOG("\n /clear clear the chat history");
289
- LOG("\n /quit or /exit exit the program");
290
- LOG("\n");
291
-
292
- if (eval_text(ctx, "<start_of_turn>user\n")) {
293
- return 1;
294
- }
295
-
296
- while (true) {
297
- g_is_generating = false;
298
- LOG("\n> ");
299
- console::set_display(console::user_input);
300
- std::string line;
301
- console::readline(line, false);
302
- console::set_display(console::reset);
303
- line = string_strip(line);
304
- if (line.empty()) {
305
- continue;
306
- }
307
- if (line == "/quit" || line == "/exit") {
308
- break;
309
- }
310
- if (line == "/clear") {
311
- ctx.n_past = 0;
312
- llama_kv_self_seq_rm(ctx.lctx, 0, 1, -1); // keep BOS
313
- LOG("Chat history cleared\n\n");
314
- continue;
315
- }
316
- g_is_generating = true;
317
- if (line.find("/image") == 0) {
318
- std::string image = line.substr(7);
319
- int res = eval_image(ctx, image);
320
- if (res == 2) {
321
- continue; // image not found
322
- }
323
- if (res) {
324
- return 1;
325
- }
326
- continue;
327
- }
328
- if (eval_text(ctx, line + "<end_of_turn><start_of_turn>model\n", true)) {
329
- return 1;
330
- }
331
- if (generate_response(ctx, smpl, n_predict)) {
332
- return 1;
333
- }
334
- if (eval_text(ctx, "<end_of_turn><start_of_turn>user\n")) {
335
- return 1;
336
- }
337
- }
338
- }
339
-
340
- return 0;
341
- }