@fugood/llama.node 0.3.16 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (281) hide show
  1. package/CMakeLists.txt +6 -1
  2. package/bin/darwin/arm64/llama-node.node +0 -0
  3. package/bin/darwin/x64/llama-node.node +0 -0
  4. package/bin/linux/arm64/llama-node.node +0 -0
  5. package/bin/linux/x64/llama-node.node +0 -0
  6. package/bin/linux-cuda/arm64/llama-node.node +0 -0
  7. package/bin/linux-cuda/x64/llama-node.node +0 -0
  8. package/bin/linux-vulkan/arm64/llama-node.node +0 -0
  9. package/bin/linux-vulkan/x64/llama-node.node +0 -0
  10. package/bin/win32/arm64/llama-node.node +0 -0
  11. package/bin/win32/arm64/node.lib +0 -0
  12. package/bin/win32/x64/llama-node.node +0 -0
  13. package/bin/win32/x64/node.lib +0 -0
  14. package/bin/win32-vulkan/arm64/llama-node.node +0 -0
  15. package/bin/win32-vulkan/arm64/node.lib +0 -0
  16. package/bin/win32-vulkan/x64/llama-node.node +0 -0
  17. package/bin/win32-vulkan/x64/node.lib +0 -0
  18. package/lib/binding.ts +44 -2
  19. package/lib/index.js +132 -1
  20. package/lib/index.ts +203 -3
  21. package/package.json +2 -1
  22. package/src/EmbeddingWorker.cpp +1 -1
  23. package/src/LlamaCompletionWorker.cpp +374 -19
  24. package/src/LlamaCompletionWorker.h +31 -10
  25. package/src/LlamaContext.cpp +216 -7
  26. package/src/LlamaContext.h +12 -0
  27. package/src/common.hpp +15 -0
  28. package/src/llama.cpp/.github/workflows/build-linux-cross.yml +233 -0
  29. package/src/llama.cpp/.github/workflows/build.yml +89 -767
  30. package/src/llama.cpp/.github/workflows/docker.yml +9 -6
  31. package/src/llama.cpp/.github/workflows/release.yml +716 -0
  32. package/src/llama.cpp/.github/workflows/server.yml +19 -23
  33. package/src/llama.cpp/CMakeLists.txt +11 -1
  34. package/src/llama.cpp/cmake/build-info.cmake +8 -2
  35. package/src/llama.cpp/cmake/x64-windows-llvm.cmake +0 -6
  36. package/src/llama.cpp/common/CMakeLists.txt +35 -4
  37. package/src/llama.cpp/common/arg.cpp +844 -121
  38. package/src/llama.cpp/common/arg.h +9 -0
  39. package/src/llama.cpp/common/chat.cpp +129 -107
  40. package/src/llama.cpp/common/chat.h +2 -0
  41. package/src/llama.cpp/common/common.cpp +64 -518
  42. package/src/llama.cpp/common/common.h +35 -45
  43. package/src/llama.cpp/common/json-schema-to-grammar.cpp +3 -0
  44. package/src/llama.cpp/common/llguidance.cpp +31 -47
  45. package/src/llama.cpp/common/minja/chat-template.hpp +23 -11
  46. package/src/llama.cpp/common/minja/minja.hpp +186 -127
  47. package/src/llama.cpp/common/regex-partial.cpp +204 -0
  48. package/src/llama.cpp/common/regex-partial.h +56 -0
  49. package/src/llama.cpp/common/sampling.cpp +60 -50
  50. package/src/llama.cpp/docs/build.md +122 -7
  51. package/src/llama.cpp/examples/CMakeLists.txt +2 -32
  52. package/src/llama.cpp/examples/batched/batched.cpp +1 -1
  53. package/src/llama.cpp/examples/embedding/embedding.cpp +9 -12
  54. package/src/llama.cpp/examples/gritlm/gritlm.cpp +1 -1
  55. package/src/llama.cpp/examples/llama.android/llama/build.gradle.kts +1 -0
  56. package/src/llama.cpp/examples/parallel/parallel.cpp +89 -15
  57. package/src/llama.cpp/examples/passkey/passkey.cpp +1 -1
  58. package/src/llama.cpp/examples/speculative/speculative.cpp +1 -1
  59. package/src/llama.cpp/examples/speculative-simple/speculative-simple.cpp +1 -1
  60. package/src/llama.cpp/examples/sycl/build.sh +2 -2
  61. package/src/llama.cpp/examples/sycl/win-build-sycl.bat +2 -2
  62. package/src/llama.cpp/examples/training/CMakeLists.txt +5 -0
  63. package/src/llama.cpp/examples/training/finetune.cpp +96 -0
  64. package/src/llama.cpp/ggml/CMakeLists.txt +35 -2
  65. package/src/llama.cpp/ggml/cmake/GitVars.cmake +22 -0
  66. package/src/llama.cpp/ggml/include/ggml-backend.h +4 -4
  67. package/src/llama.cpp/ggml/include/ggml-cpp.h +1 -1
  68. package/src/llama.cpp/ggml/include/ggml-cpu.h +5 -0
  69. package/src/llama.cpp/ggml/include/ggml-opt.h +47 -28
  70. package/src/llama.cpp/ggml/include/ggml-rpc.h +6 -1
  71. package/src/llama.cpp/ggml/include/ggml.h +76 -106
  72. package/src/llama.cpp/ggml/src/CMakeLists.txt +11 -8
  73. package/src/llama.cpp/ggml/src/ggml-alloc.c +4 -1
  74. package/src/llama.cpp/ggml/src/ggml-backend.cpp +9 -5
  75. package/src/llama.cpp/ggml/src/ggml-cann/CMakeLists.txt +0 -2
  76. package/src/llama.cpp/ggml/src/ggml-cann/acl_tensor.cpp +8 -4
  77. package/src/llama.cpp/ggml/src/ggml-cann/acl_tensor.h +5 -5
  78. package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.cpp +692 -1534
  79. package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.h +613 -122
  80. package/src/llama.cpp/ggml/src/ggml-cann/common.h +135 -1
  81. package/src/llama.cpp/ggml/src/ggml-cann/ggml-cann.cpp +507 -137
  82. package/src/llama.cpp/ggml/src/ggml-common.h +12 -6
  83. package/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +66 -33
  84. package/src/llama.cpp/ggml/src/ggml-cpu/binary-ops.cpp +158 -0
  85. package/src/llama.cpp/ggml/src/ggml-cpu/binary-ops.h +16 -0
  86. package/src/llama.cpp/ggml/src/ggml-cpu/common.h +72 -0
  87. package/src/llama.cpp/ggml/src/ggml-cpu/cpu-feats-x86.cpp +1 -1
  88. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp +896 -194
  89. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-impl.h +2 -21
  90. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.c +1060 -410
  91. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +1008 -13533
  92. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +31 -16
  93. package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.cpp +90 -12
  94. package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.h +47 -13
  95. package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +266 -72
  96. package/src/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm.cpp +1034 -88
  97. package/src/llama.cpp/ggml/src/ggml-cpu/ops.cpp +8796 -0
  98. package/src/llama.cpp/ggml/src/ggml-cpu/ops.h +110 -0
  99. package/src/llama.cpp/ggml/src/ggml-cpu/simd-mappings.h +892 -0
  100. package/src/llama.cpp/ggml/src/ggml-cpu/unary-ops.cpp +186 -0
  101. package/src/llama.cpp/ggml/src/ggml-cpu/unary-ops.h +28 -0
  102. package/src/llama.cpp/ggml/src/ggml-cpu/vec.cpp +252 -0
  103. package/src/llama.cpp/ggml/src/ggml-cpu/vec.h +802 -0
  104. package/src/llama.cpp/ggml/src/ggml-cuda/CMakeLists.txt +23 -4
  105. package/src/llama.cpp/ggml/src/ggml-cuda/vendors/hip.h +7 -0
  106. package/src/llama.cpp/ggml/src/ggml-cuda/vendors/musa.h +1 -0
  107. package/src/llama.cpp/ggml/src/ggml-hip/CMakeLists.txt +0 -4
  108. package/src/llama.cpp/ggml/src/ggml-impl.h +52 -18
  109. package/src/llama.cpp/ggml/src/ggml-metal/ggml-metal-impl.h +106 -14
  110. package/src/llama.cpp/ggml/src/ggml-opencl/CMakeLists.txt +67 -119
  111. package/src/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +1023 -262
  112. package/src/llama.cpp/ggml/src/ggml-opt.cpp +368 -190
  113. package/src/llama.cpp/ggml/src/ggml-quants.c +0 -6
  114. package/src/llama.cpp/ggml/src/ggml-rpc/ggml-rpc.cpp +307 -40
  115. package/src/llama.cpp/ggml/src/ggml-sycl/CMakeLists.txt +125 -45
  116. package/src/llama.cpp/ggml/src/ggml-sycl/backend.hpp +10 -8
  117. package/src/llama.cpp/ggml/src/ggml-sycl/binbcast.cpp +239 -0
  118. package/src/llama.cpp/ggml/src/ggml-sycl/binbcast.hpp +39 -0
  119. package/src/llama.cpp/ggml/src/ggml-sycl/common.cpp +0 -35
  120. package/src/llama.cpp/ggml/src/ggml-sycl/common.hpp +9 -307
  121. package/src/llama.cpp/ggml/src/ggml-sycl/convert.cpp +72 -25
  122. package/src/llama.cpp/ggml/src/ggml-sycl/convert.hpp +14 -7
  123. package/src/llama.cpp/ggml/src/ggml-sycl/dequantize.hpp +59 -21
  124. package/src/llama.cpp/ggml/src/ggml-sycl/dmmv.cpp +7 -1
  125. package/src/llama.cpp/ggml/src/ggml-sycl/dpct/helper.hpp +79 -90
  126. package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.cpp +944 -438
  127. package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.hpp +22 -23
  128. package/src/llama.cpp/ggml/src/ggml-sycl/gemm.hpp +37 -8
  129. package/src/llama.cpp/ggml/src/ggml-sycl/getrows.cpp +24 -20
  130. package/src/llama.cpp/ggml/src/ggml-sycl/getrows.hpp +1 -4
  131. package/src/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +507 -411
  132. package/src/llama.cpp/ggml/src/ggml-sycl/im2col.cpp +84 -74
  133. package/src/llama.cpp/ggml/src/ggml-sycl/im2col.hpp +1 -3
  134. package/src/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +185 -89
  135. package/src/llama.cpp/ggml/src/ggml-sycl/norm.cpp +37 -49
  136. package/src/llama.cpp/ggml/src/ggml-sycl/norm.hpp +7 -22
  137. package/src/llama.cpp/ggml/src/ggml-sycl/outprod.cpp +4 -14
  138. package/src/llama.cpp/ggml/src/ggml-sycl/quants.hpp +83 -0
  139. package/src/llama.cpp/ggml/src/ggml-sycl/rope.cpp +204 -118
  140. package/src/llama.cpp/ggml/src/ggml-sycl/rope.hpp +1 -3
  141. package/src/llama.cpp/ggml/src/ggml-sycl/vecdotq.hpp +128 -53
  142. package/src/llama.cpp/ggml/src/ggml-vulkan/CMakeLists.txt +83 -49
  143. package/src/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +1278 -282
  144. package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/CMakeLists.txt +32 -0
  145. package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +133 -30
  146. package/src/llama.cpp/ggml/src/ggml.c +170 -265
  147. package/src/llama.cpp/ggml/src/gguf.cpp +34 -33
  148. package/src/llama.cpp/include/llama.h +82 -22
  149. package/src/llama.cpp/models/ggml-vocab-llama4.gguf.inp +112 -0
  150. package/src/llama.cpp/models/ggml-vocab-llama4.gguf.out +46 -0
  151. package/src/llama.cpp/models/ggml-vocab-pixtral.gguf.inp +112 -0
  152. package/src/llama.cpp/models/ggml-vocab-pixtral.gguf.out +46 -0
  153. package/src/llama.cpp/requirements/requirements-all.txt +5 -3
  154. package/src/llama.cpp/requirements/requirements-gguf_editor_gui.txt +3 -0
  155. package/src/llama.cpp/scripts/xxd.cmake +1 -1
  156. package/src/llama.cpp/src/CMakeLists.txt +4 -2
  157. package/src/llama.cpp/src/llama-adapter.cpp +43 -1
  158. package/src/llama.cpp/src/llama-arch.cpp +163 -17
  159. package/src/llama.cpp/src/llama-arch.h +16 -0
  160. package/src/llama.cpp/src/llama-batch.cpp +5 -1
  161. package/src/llama.cpp/src/llama-batch.h +2 -1
  162. package/src/llama.cpp/src/llama-chat.cpp +91 -16
  163. package/src/llama.cpp/src/llama-chat.h +7 -2
  164. package/src/llama.cpp/src/llama-context.cpp +479 -575
  165. package/src/llama.cpp/src/llama-context.h +44 -33
  166. package/src/llama.cpp/src/llama-cparams.h +1 -0
  167. package/src/llama.cpp/src/llama-graph.cpp +209 -157
  168. package/src/llama.cpp/src/llama-graph.h +38 -14
  169. package/src/llama.cpp/src/llama-hparams.h +13 -0
  170. package/src/llama.cpp/src/llama-kv-cache.cpp +1604 -543
  171. package/src/llama.cpp/src/llama-kv-cache.h +283 -171
  172. package/src/llama.cpp/src/llama-memory.h +12 -2
  173. package/src/llama.cpp/src/llama-mmap.cpp +1 -1
  174. package/src/llama.cpp/src/llama-model-loader.cpp +34 -20
  175. package/src/llama.cpp/src/llama-model-loader.h +5 -3
  176. package/src/llama.cpp/src/llama-model-saver.cpp +281 -0
  177. package/src/llama.cpp/src/llama-model-saver.h +37 -0
  178. package/src/llama.cpp/src/llama-model.cpp +1803 -330
  179. package/src/llama.cpp/src/llama-model.h +21 -2
  180. package/src/llama.cpp/src/llama-quant.cpp +33 -10
  181. package/src/llama.cpp/src/llama-sampling.cpp +25 -7
  182. package/src/llama.cpp/src/llama-vocab.cpp +86 -10
  183. package/src/llama.cpp/src/llama-vocab.h +6 -0
  184. package/src/llama.cpp/src/llama.cpp +15 -1
  185. package/src/llama.cpp/tests/CMakeLists.txt +52 -31
  186. package/src/llama.cpp/tests/test-arg-parser.cpp +51 -4
  187. package/src/llama.cpp/tests/test-backend-ops.cpp +189 -90
  188. package/src/llama.cpp/tests/test-chat-template.cpp +26 -6
  189. package/src/llama.cpp/tests/test-chat.cpp +15 -3
  190. package/src/llama.cpp/{examples/gbnf-validator/gbnf-validator.cpp → tests/test-gbnf-validator.cpp} +2 -2
  191. package/src/llama.cpp/tests/test-grammar-integration.cpp +3 -2
  192. package/src/llama.cpp/tests/test-grammar-llguidance.cpp +63 -2
  193. package/src/llama.cpp/tests/test-grammar-parser.cpp +3 -1
  194. package/src/llama.cpp/tests/test-json-schema-to-grammar.cpp +17 -1
  195. package/src/llama.cpp/tests/test-llama-grammar.cpp +2 -1
  196. package/src/llama.cpp/tests/test-mtmd-c-api.c +63 -0
  197. package/src/llama.cpp/tests/test-opt.cpp +33 -21
  198. package/src/llama.cpp/{examples/quantize-stats/quantize-stats.cpp → tests/test-quantize-stats.cpp} +3 -1
  199. package/src/llama.cpp/tests/test-regex-partial.cpp +288 -0
  200. package/src/llama.cpp/tests/test-sampling.cpp +1 -1
  201. package/src/llama.cpp/tests/test-tokenizer-1-bpe.cpp +2 -1
  202. package/src/llama.cpp/tests/test-tokenizer-1-spm.cpp +2 -1
  203. package/src/llama.cpp/tools/CMakeLists.txt +39 -0
  204. package/src/llama.cpp/{examples → tools}/batched-bench/batched-bench.cpp +3 -3
  205. package/src/llama.cpp/{examples → tools}/export-lora/export-lora.cpp +1 -1
  206. package/src/llama.cpp/{examples → tools}/gguf-split/gguf-split.cpp +15 -16
  207. package/src/llama.cpp/{examples → tools}/imatrix/imatrix.cpp +11 -9
  208. package/src/llama.cpp/{examples → tools}/llama-bench/llama-bench.cpp +623 -274
  209. package/src/llama.cpp/{examples → tools}/main/main.cpp +22 -14
  210. package/src/llama.cpp/tools/mtmd/CMakeLists.txt +47 -0
  211. package/src/llama.cpp/tools/mtmd/clip-impl.h +365 -0
  212. package/src/llama.cpp/tools/mtmd/clip.cpp +3646 -0
  213. package/src/llama.cpp/tools/mtmd/clip.h +99 -0
  214. package/src/llama.cpp/tools/mtmd/deprecation-warning.cpp +22 -0
  215. package/src/llama.cpp/tools/mtmd/mtmd-cli.cpp +370 -0
  216. package/src/llama.cpp/tools/mtmd/mtmd-helper.cpp +310 -0
  217. package/src/llama.cpp/tools/mtmd/mtmd.cpp +678 -0
  218. package/src/llama.cpp/tools/mtmd/mtmd.h +331 -0
  219. package/src/llama.cpp/{examples → tools}/perplexity/perplexity.cpp +21 -5
  220. package/src/llama.cpp/{examples → tools}/quantize/quantize.cpp +53 -3
  221. package/src/llama.cpp/tools/rpc/CMakeLists.txt +4 -0
  222. package/src/llama.cpp/tools/rpc/rpc-server.cpp +322 -0
  223. package/src/llama.cpp/tools/run/CMakeLists.txt +16 -0
  224. package/src/llama.cpp/{examples → tools}/run/run.cpp +30 -30
  225. package/src/llama.cpp/{examples → tools}/server/CMakeLists.txt +2 -1
  226. package/src/llama.cpp/{examples → tools}/server/httplib.h +313 -247
  227. package/src/llama.cpp/{examples → tools}/server/server.cpp +529 -215
  228. package/src/llama.cpp/{examples → tools}/server/utils.hpp +427 -6
  229. package/src/llama.cpp/{examples → tools}/tts/tts.cpp +6 -9
  230. package/src/llama.cpp/cmake/arm64-windows-msvc.cmake +0 -6
  231. package/src/llama.cpp/examples/gbnf-validator/CMakeLists.txt +0 -5
  232. package/src/llama.cpp/examples/infill/CMakeLists.txt +0 -5
  233. package/src/llama.cpp/examples/infill/infill.cpp +0 -590
  234. package/src/llama.cpp/examples/llava/CMakeLists.txt +0 -66
  235. package/src/llama.cpp/examples/llava/android/build_64.sh +0 -8
  236. package/src/llama.cpp/examples/llava/clip-quantize-cli.cpp +0 -59
  237. package/src/llama.cpp/examples/llava/clip.cpp +0 -3206
  238. package/src/llama.cpp/examples/llava/clip.h +0 -118
  239. package/src/llama.cpp/examples/llava/gemma3-cli.cpp +0 -341
  240. package/src/llama.cpp/examples/llava/llava-cli.cpp +0 -332
  241. package/src/llama.cpp/examples/llava/llava.cpp +0 -574
  242. package/src/llama.cpp/examples/llava/llava.h +0 -49
  243. package/src/llama.cpp/examples/llava/minicpmv-cli.cpp +0 -354
  244. package/src/llama.cpp/examples/llava/qwen2vl-cli.cpp +0 -584
  245. package/src/llama.cpp/examples/quantize-stats/CMakeLists.txt +0 -6
  246. package/src/llama.cpp/examples/rpc/CMakeLists.txt +0 -2
  247. package/src/llama.cpp/examples/rpc/rpc-server.cpp +0 -171
  248. package/src/llama.cpp/examples/run/CMakeLists.txt +0 -5
  249. package/src/llama.cpp/ggml/src/ggml-cann/kernels/CMakeLists.txt +0 -30
  250. package/src/llama.cpp/ggml/src/ggml-cann/kernels/ascendc_kernels.h +0 -19
  251. package/src/llama.cpp/ggml/src/ggml-cann/kernels/dup.cpp +0 -234
  252. package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_f16.cpp +0 -197
  253. package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_f32.cpp +0 -190
  254. package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_q4_0.cpp +0 -204
  255. package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_q8_0.cpp +0 -191
  256. package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_f16_q8_0.cpp +0 -218
  257. package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_f32_q8_0.cpp +0 -216
  258. package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_float_to_q4_0.cpp +0 -295
  259. /package/src/llama.cpp/{examples → tools}/batched-bench/CMakeLists.txt +0 -0
  260. /package/src/llama.cpp/{examples → tools}/cvector-generator/CMakeLists.txt +0 -0
  261. /package/src/llama.cpp/{examples → tools}/cvector-generator/completions.txt +0 -0
  262. /package/src/llama.cpp/{examples → tools}/cvector-generator/cvector-generator.cpp +0 -0
  263. /package/src/llama.cpp/{examples → tools}/cvector-generator/mean.hpp +0 -0
  264. /package/src/llama.cpp/{examples → tools}/cvector-generator/negative.txt +0 -0
  265. /package/src/llama.cpp/{examples → tools}/cvector-generator/pca.hpp +0 -0
  266. /package/src/llama.cpp/{examples → tools}/cvector-generator/positive.txt +0 -0
  267. /package/src/llama.cpp/{examples → tools}/export-lora/CMakeLists.txt +0 -0
  268. /package/src/llama.cpp/{examples → tools}/gguf-split/CMakeLists.txt +0 -0
  269. /package/src/llama.cpp/{examples → tools}/imatrix/CMakeLists.txt +0 -0
  270. /package/src/llama.cpp/{examples → tools}/llama-bench/CMakeLists.txt +0 -0
  271. /package/src/llama.cpp/{examples → tools}/main/CMakeLists.txt +0 -0
  272. /package/src/llama.cpp/{examples/llava → tools/mtmd}/requirements.txt +0 -0
  273. /package/src/llama.cpp/{examples → tools}/perplexity/CMakeLists.txt +0 -0
  274. /package/src/llama.cpp/{examples → tools}/quantize/CMakeLists.txt +0 -0
  275. /package/src/llama.cpp/{examples → tools}/run/linenoise.cpp/linenoise.cpp +0 -0
  276. /package/src/llama.cpp/{examples → tools}/run/linenoise.cpp/linenoise.h +0 -0
  277. /package/src/llama.cpp/{examples → tools}/server/bench/requirements.txt +0 -0
  278. /package/src/llama.cpp/{examples → tools}/server/tests/requirements.txt +0 -0
  279. /package/src/llama.cpp/{examples → tools}/tokenize/CMakeLists.txt +0 -0
  280. /package/src/llama.cpp/{examples → tools}/tokenize/tokenize.cpp +0 -0
  281. /package/src/llama.cpp/{examples → tools}/tts/CMakeLists.txt +0 -0
@@ -1,590 +0,0 @@
1
- #include "arg.h"
2
- #include "common.h"
3
- #include "console.h"
4
- #include "sampling.h"
5
- #include "log.h"
6
- #include "llama.h"
7
-
8
- #include <cassert>
9
- #include <cinttypes>
10
- #include <cmath>
11
- #include <cstdio>
12
- #include <cstring>
13
- #include <ctime>
14
- #include <fstream>
15
- #include <iostream>
16
- #include <sstream>
17
- #include <string>
18
- #include <vector>
19
-
20
- #if defined (__unix__) || (defined (__APPLE__) && defined (__MACH__))
21
- #include <signal.h>
22
- #include <unistd.h>
23
- #elif defined (_WIN32)
24
- #define WIN32_LEAN_AND_MEAN
25
- #ifndef NOMINMAX
26
- #define NOMINMAX
27
- #endif
28
- #include <windows.h>
29
- #include <signal.h>
30
- #endif
31
-
32
- #if defined(_MSC_VER)
33
- #pragma warning(disable: 4244 4267) // possible loss of data
34
- #endif
35
-
36
- static llama_context ** g_ctx;
37
- static llama_model ** g_model;
38
- static common_sampler ** g_smpl;
39
- static common_params * g_params;
40
- static std::vector<llama_token> * g_input_tokens;
41
- static std::ostringstream * g_output_ss;
42
- static std::vector<llama_token> * g_output_tokens;
43
-
44
- static bool is_interacting = false;
45
-
46
- #if defined (__unix__) || (defined (__APPLE__) && defined (__MACH__)) || defined (_WIN32)
47
- static void sigint_handler(int signo) {
48
- if (signo == SIGINT) {
49
- if (!is_interacting) {
50
- is_interacting = true;
51
- } else {
52
- console::cleanup();
53
- LOG("\n");
54
- common_perf_print(*g_ctx, *g_smpl);
55
-
56
- // make sure all logs are flushed
57
- LOG("Interrupted by user\n");
58
- common_log_pause(common_log_main());
59
-
60
- _exit(130);
61
- }
62
- }
63
- }
64
- #endif
65
-
66
- int main(int argc, char ** argv) {
67
- common_params params;
68
- g_params = &params;
69
-
70
- if (!common_params_parse(argc, argv, params, LLAMA_EXAMPLE_INFILL)) {
71
- return 1;
72
- }
73
-
74
- common_init();
75
-
76
- auto & sparams = params.sampling;
77
-
78
- console::init(params.simple_io, params.use_color);
79
- atexit([]() { console::cleanup(); });
80
-
81
- if (params.logits_all) {
82
- LOG_ERR("\n************\n");
83
- LOG_ERR("%s: please use the 'perplexity' tool for perplexity calculations\n", __func__);
84
- LOG_ERR("************\n\n");
85
-
86
- return 0;
87
- }
88
-
89
- if (params.embedding) {
90
- LOG_ERR("\n************\n");
91
- LOG_ERR("%s: please use the 'embedding' tool for embedding calculations\n", __func__);
92
- LOG_ERR("************\n\n");
93
-
94
- return 0;
95
- }
96
-
97
- if (params.n_ctx != 0 && params.n_ctx < 8) {
98
- LOG_WRN("%s: minimum context size is 8, using minimum size.\n", __func__);
99
- params.n_ctx = 8;
100
- }
101
-
102
- if (!params.interactive_first && (params.input_prefix.empty() && params.input_suffix.empty())) {
103
- LOG_ERR("\n************\n");
104
- LOG_ERR("%s: please use '--interactive_first' or specify '--in_prefix' and/or '--in_suffix'\n", __func__);
105
- LOG_ERR("************\n\n");
106
-
107
- return 0;
108
- }
109
-
110
- if (params.rope_freq_base != 0.0) {
111
- LOG_WRN("%s: changing RoPE frequency base to %g.\n", __func__, params.rope_freq_base);
112
- }
113
-
114
- if (params.rope_freq_scale != 0.0) {
115
- LOG_WRN("%s: scaling RoPE frequency by %g.\n", __func__, params.rope_freq_scale);
116
- }
117
-
118
- LOG_INF("%s: llama backend init\n", __func__);
119
- llama_backend_init();
120
- llama_numa_init(params.numa);
121
-
122
- llama_model * model = nullptr;
123
- llama_context * ctx = nullptr;
124
- common_sampler * smpl = nullptr;
125
-
126
- g_model = &model;
127
- g_ctx = &ctx;
128
- g_smpl = &smpl;
129
-
130
- // load the model and apply lora adapter, if any
131
- LOG_INF("%s: load the model and apply lora adapter, if any\n", __func__);
132
- common_init_result llama_init = common_init_from_params(params);
133
-
134
- model = llama_init.model.get();
135
- ctx = llama_init.context.get();
136
-
137
- if (model == NULL) {
138
- LOG_ERR("%s: unable to load model\n", __func__);
139
- return 1;
140
- }
141
-
142
- const llama_vocab * vocab = llama_model_get_vocab(model);
143
-
144
- const int n_ctx_train = llama_model_n_ctx_train(model);
145
- const int n_ctx = llama_n_ctx(ctx);
146
- LOG_DBG("n_ctx: %d\n", n_ctx);
147
-
148
- if (n_ctx > n_ctx_train) {
149
- LOG_WRN("%s: model was trained on only %d context tokens (%d specified)\n", __func__, n_ctx_train, n_ctx);
150
- }
151
-
152
- // print system information
153
- {
154
- LOG_INF("\n");
155
- LOG_INF("%s\n", common_params_get_system_info(params).c_str());
156
- }
157
- const bool add_bos = llama_vocab_get_add_bos(vocab);
158
- GGML_ASSERT(!llama_vocab_get_add_eos(vocab));
159
-
160
- std::vector<llama_token> embd_inp;
161
- std::vector<llama_token> embd_end;
162
- std::vector<llama_token> inp_pfx = common_tokenize(ctx, params.input_prefix, false);
163
- std::vector<llama_token> inp_sfx = common_tokenize(ctx, params.input_suffix, false);
164
-
165
- GGML_ASSERT(llama_vocab_fim_pre(vocab) >= 0);
166
- GGML_ASSERT(llama_vocab_fim_suf(vocab) >= 0);
167
-
168
- inp_pfx.insert(inp_pfx.begin(), llama_vocab_fim_pre(vocab));
169
- inp_sfx.insert(inp_sfx.begin(), llama_vocab_fim_suf(vocab));
170
-
171
- embd_inp = params.spm_infill ? inp_sfx : inp_pfx;
172
- embd_end = params.spm_infill ? inp_pfx : inp_sfx;
173
- if (add_bos) {
174
- embd_inp.insert(embd_inp.begin(), llama_vocab_bos(vocab));
175
- }
176
- embd_inp.insert(embd_inp.end(), embd_end.begin(), embd_end.end());
177
-
178
- const llama_token middle_token = llama_vocab_fim_mid(vocab);
179
- if (middle_token >= 0) {
180
- embd_inp.push_back(middle_token);
181
- }
182
-
183
- LOG_DBG("add_bos: %d\n", add_bos);
184
- LOG_DBG("prefix: \"%s\"\n", params.input_prefix.c_str());
185
- LOG_DBG("suffix: \"%s\"\n", params.input_suffix.c_str());
186
- LOG_DBG("tokens: %s\n", string_from(ctx, embd_inp).c_str());
187
-
188
- // Should not run without any tokens
189
- if (embd_inp.empty()) {
190
- embd_inp.push_back(llama_vocab_bos(vocab));
191
- LOG_WRN("embd_inp was considered empty and bos was added: %s\n", string_from(ctx, embd_inp).c_str());
192
- }
193
-
194
- if ((int) embd_inp.size() > n_ctx - 4) {
195
- LOG_ERR("%s: prompt is too long (%d tokens, max %d)\n", __func__, (int) embd_inp.size(), n_ctx - 4);
196
- return 1;
197
- }
198
-
199
- // number of tokens to keep when resetting context
200
- if (params.n_keep < 0 || params.n_keep > (int) embd_inp.size()) {
201
- params.n_keep = (int)embd_inp.size();
202
- }
203
-
204
- LOG_INF("inp_pfx: %s\n", string_from(ctx, inp_pfx).c_str());
205
- LOG_INF("inp_sfx: %s\n", string_from(ctx, inp_sfx).c_str());
206
-
207
- // enable interactive mode if interactive start is specified
208
- if (params.interactive_first) {
209
- params.interactive = true;
210
- }
211
-
212
- if (params.verbose_prompt) {
213
- LOG_INF("\n");
214
- LOG_INF("%s: prompt: '%s'\n", __func__, params.prompt.c_str());
215
- LOG_INF("%s: number of tokens in prompt = %zu\n", __func__, embd_inp.size());
216
- for (int i = 0; i < (int) embd_inp.size(); i++) {
217
- LOG_INF("%6d -> '%s'\n", embd_inp[i], common_token_to_piece(ctx, embd_inp[i]).c_str());
218
- }
219
-
220
- if (params.n_keep > 0) {
221
- LOG_INF("%s: static prompt based on n_keep: '", __func__);
222
- for (int i = 0; i < params.n_keep; i++) {
223
- LOG_CNT("%s", common_token_to_piece(ctx, embd_inp[i]).c_str());
224
- }
225
- LOG_CNT("'\n");
226
- }
227
- LOG_INF("\n");
228
- }
229
-
230
- if (params.interactive) {
231
- #if defined (__unix__) || (defined (__APPLE__) && defined (__MACH__))
232
- struct sigaction sigint_action;
233
- sigint_action.sa_handler = sigint_handler;
234
- sigemptyset (&sigint_action.sa_mask);
235
- sigint_action.sa_flags = 0;
236
- sigaction(SIGINT, &sigint_action, NULL);
237
- #elif defined (_WIN32)
238
- auto console_ctrl_handler = +[](DWORD ctrl_type) -> BOOL {
239
- return (ctrl_type == CTRL_C_EVENT) ? (sigint_handler(SIGINT), true) : false;
240
- };
241
- SetConsoleCtrlHandler(reinterpret_cast<PHANDLER_ROUTINE>(console_ctrl_handler), true);
242
- #endif
243
-
244
- LOG_INF("%s: interactive mode on.\n", __func__);
245
-
246
- if (params.input_prefix_bos) {
247
- LOG_INF("Input prefix with BOS\n");
248
- }
249
-
250
- if (!params.input_prefix.empty()) {
251
- LOG_INF("Input prefix: '%s'\n", params.input_prefix.c_str());
252
- }
253
-
254
- if (!params.input_suffix.empty()) {
255
- LOG_INF("Input suffix: '%s'\n", params.input_suffix.c_str());
256
- }
257
- }
258
- smpl = common_sampler_init(model, sparams);
259
-
260
- LOG_INF("sampler seed: %u\n", common_sampler_get_seed(smpl));
261
- LOG_INF("sampler params: \n%s\n", sparams.print().c_str());
262
- LOG_INF("sampler chain: %s\n", common_sampler_print(smpl).c_str());
263
-
264
- LOG_INF("generate: n_ctx = %d, n_batch = %d, n_predict = %d, n_keep = %d\n", n_ctx, params.n_batch, params.n_predict, params.n_keep);
265
-
266
- LOG_INF("\n");
267
- LOG_INF("\n##### Infill mode #####\n\n");
268
- if (params.interactive) {
269
- const char *control_message;
270
- if (params.multiline_input) {
271
- control_message = " - To return control to LLaMA, end your input with '\\'.\n"
272
- " - To return control without starting a new line, end your input with '/'.\n";
273
- } else {
274
- control_message = " - Press Return to return control to LLaMA.\n"
275
- " - To return control without starting a new line, end your input with '/'.\n"
276
- " - If you want to submit another line, end your input with '\\'.\n";
277
- }
278
- LOG_INF("== Running in interactive mode. ==\n");
279
- #if defined (__unix__) || (defined (__APPLE__) && defined (__MACH__)) || defined (_WIN32)
280
- LOG_INF( " - Press Ctrl+C to interject at any time.\n");
281
- #endif
282
- LOG_INF( "%s\n", control_message);
283
-
284
- is_interacting = params.interactive_first;
285
- }
286
-
287
- bool input_echo = true;
288
-
289
- int n_past = 0;
290
- int n_remain = params.n_predict;
291
- int n_consumed = 0;
292
-
293
- std::vector<int> input_tokens; g_input_tokens = &input_tokens;
294
- std::vector<int> output_tokens; g_output_tokens = &output_tokens;
295
- std::ostringstream output_ss; g_output_ss = &output_ss;
296
-
297
- // the first thing we will do is to output the prompt, so set color accordingly
298
- console::set_display(console::prompt);
299
-
300
- std::vector<llama_token> embd;
301
-
302
- while (n_remain != 0 || params.interactive) {
303
- // predict
304
- if (!embd.empty()) {
305
- // Note: n_ctx - 4 here is to match the logic for commandline prompt handling via
306
- // --prompt or --file which uses the same value.
307
- int max_embd_size = n_ctx - 4;
308
-
309
- // Ensure the input doesn't exceed the context size by truncating embd if necessary.
310
- if ((int) embd.size() > max_embd_size) {
311
- const int skipped_tokens = (int) embd.size() - max_embd_size;
312
- embd.resize(max_embd_size);
313
-
314
- console::set_display(console::error);
315
- LOG_WRN("<<input too long: skipped %d token%s>>", skipped_tokens, skipped_tokens != 1 ? "s" : "");
316
- console::set_display(console::reset);
317
- }
318
-
319
- // infinite text generation via context swapping
320
- // if we run out of context:
321
- // - take the n_keep first tokens from the original prompt (via n_past)
322
- // - take half of the last (n_ctx - n_keep) tokens and recompute the logits in batches
323
- if (n_past + (int) embd.size() > n_ctx) {
324
- if (params.n_predict == -2) {
325
- LOG_DBG("\n\n%s: context full and n_predict == -%d => stopping\n", __func__, params.n_predict);
326
- break;
327
- }
328
-
329
- const int n_left = n_past - params.n_keep - 1;
330
- const int n_discard = n_left/2;
331
-
332
- LOG_DBG("context full, swapping: n_past = %d, n_left = %d, n_ctx = %d, n_keep = %d, n_discard = %d\n",
333
- n_past, n_left, n_ctx, params.n_keep, n_discard);
334
-
335
- llama_kv_self_seq_rm (ctx, 0, params.n_keep + 1 , params.n_keep + n_discard + 1);
336
- llama_kv_self_seq_add(ctx, 0, params.n_keep + 1 + n_discard, n_past, -n_discard);
337
-
338
- n_past -= n_discard;
339
-
340
- LOG_DBG("after swap: n_past = %d\n", n_past);
341
-
342
- LOG_DBG("embd: %s\n", string_from(ctx, embd).c_str());
343
-
344
- }
345
-
346
- // evaluate tokens in batches
347
- // embd is typically prepared beforehand to fit within a batch, but not always
348
- for (int i = 0; i < (int) embd.size(); i += params.n_batch) {
349
- int n_eval = (int) embd.size() - i;
350
- if (n_eval > params.n_batch) {
351
- n_eval = params.n_batch;
352
- }
353
-
354
- LOG_DBG("eval: %s\n", string_from(ctx, embd).c_str());
355
-
356
- if (llama_decode(ctx, llama_batch_get_one(&embd[i], n_eval))) {
357
- LOG_ERR("%s : failed to eval\n", __func__);
358
- return 1;
359
- }
360
-
361
- n_past += n_eval;
362
-
363
- LOG_DBG("n_past = %d\n", n_past);
364
- }
365
-
366
- }
367
-
368
- embd.clear();
369
-
370
- if ((int) embd_inp.size() <= n_consumed && !is_interacting) {
371
- const llama_token id = common_sampler_sample(smpl, ctx, -1);
372
-
373
- common_sampler_accept(smpl, id, true);
374
-
375
- // LOG_DBG("last: %s\n", string_from(ctx, smpl->prev.to_vector()).c_str());
376
-
377
- embd.push_back(id);
378
-
379
- // echo this to console
380
- input_echo = true;
381
-
382
- // decrement remaining sampling budget
383
- --n_remain;
384
-
385
- LOG_DBG("n_remain: %d\n", n_remain);
386
- } else {
387
- // some user input remains from prompt or interaction, forward it to processing
388
- LOG_DBG("embd_inp.size(): %d, n_consumed: %d\n", (int) embd_inp.size(), n_consumed);
389
- while ((int) embd_inp.size() > n_consumed) {
390
- embd.push_back(embd_inp[n_consumed]);
391
-
392
- // push the prompt in the sampling context in order to apply repetition penalties later
393
- // for the prompt, we don't apply grammar rules
394
- common_sampler_accept(smpl, embd_inp[n_consumed], false);
395
-
396
- ++n_consumed;
397
- if ((int) embd.size() >= params.n_batch) {
398
- break;
399
- }
400
- }
401
- }
402
-
403
- // display text
404
- if (input_echo) {
405
- for (auto id : embd) {
406
- const std::string token_str = common_token_to_piece(ctx, id);
407
- LOG("%s", token_str.c_str());
408
-
409
- if (embd.size() > 1) {
410
- input_tokens.push_back(id);
411
- } else {
412
- output_tokens.push_back(id);
413
- output_ss << token_str;
414
- }
415
- }
416
- }
417
- // reset color to default if we there is no pending user input
418
- if (input_echo && (int) embd_inp.size() == n_consumed) {
419
- console::set_display(console::reset);
420
- }
421
-
422
- // if not currently processing queued inputs;
423
- if ((int) embd_inp.size() <= n_consumed) {
424
- // deal with eot token in infill mode
425
- if ((common_sampler_last(smpl) == llama_vocab_eot(vocab) || is_interacting) && params.interactive){
426
- if (is_interacting && !params.interactive_first) {
427
- // print an eot token
428
- LOG("%s", common_token_to_piece(ctx, llama_vocab_eot(vocab)).c_str());
429
- }
430
- LOG("\n");
431
- console::set_display(console::user_input);
432
- std::string buffer;
433
- std::string line;
434
- bool another_line=true;
435
- // set a new prefix via stdin
436
- do {
437
- another_line = console::readline(line, params.multiline_input);
438
- buffer += line;
439
- } while (another_line);
440
- // check if we got an empty line, if so we use the old input
441
- if (!buffer.empty() && !(buffer.length() == 1 && buffer[0] == '\n')) {
442
- params.input_prefix = buffer;
443
- }
444
- buffer.clear();
445
- // set a new suffix via stdin
446
- do {
447
- another_line = console::readline(line, params.multiline_input);
448
- buffer += line;
449
- } while (another_line);
450
- // check if we got an empty line
451
- if (!buffer.empty() && !(buffer.length() == 1 && buffer[0] == '\n')) {
452
- params.input_suffix = buffer;
453
- }
454
- buffer.clear();
455
- // done taking input, reset color
456
- console::set_display(console::reset);
457
-
458
- if (params.escape) {
459
- //process escape sequences, for the initial prompt this is done in common.cpp when we load the params, but for the interactive mode we need to do it here
460
- string_process_escapes(params.input_prefix);
461
- string_process_escapes(params.input_suffix);
462
- }
463
-
464
- // tokenize new prefix and suffix
465
- std::vector<llama_token> inp_pfx = common_tokenize(ctx, params.input_prefix, false);
466
- std::vector<llama_token> inp_sfx = common_tokenize(ctx, params.input_suffix, false);
467
-
468
- inp_pfx.insert(inp_pfx.begin(), llama_vocab_fim_pre(vocab));
469
- inp_sfx.insert(inp_sfx.begin(), llama_vocab_fim_suf(vocab));
470
-
471
- embd_inp = params.spm_infill ? inp_sfx : inp_pfx;
472
- embd_end = params.spm_infill ? inp_pfx : inp_sfx;
473
- if (add_bos) {
474
- embd_inp.insert(embd_inp.begin(), llama_vocab_bos(vocab));
475
- }
476
- embd_inp.insert(embd_inp.end(), embd_end.begin(), embd_end.end());
477
-
478
- if (middle_token >= 0) {
479
- embd_inp.push_back(middle_token);
480
- }
481
-
482
- embd.clear();
483
- n_remain = params.n_predict;
484
- n_past = 0;
485
- n_consumed = 0;
486
- is_interacting = false;
487
- }
488
- // deal with end of generation tokens in interactive mode
489
- else if (llama_vocab_is_eog(vocab, common_sampler_last(smpl))) {
490
- LOG_DBG("found EOS token\n");
491
-
492
- if (params.interactive) {
493
-
494
- is_interacting = true;
495
- LOG("\n");
496
- console::set_display(console::user_input);
497
- }
498
- }
499
-
500
- if (n_past > 0 && is_interacting && !params.interactive) {
501
- LOG_DBG("waiting for user input\n");
502
-
503
- if (params.input_prefix_bos) {
504
- LOG_DBG("adding input prefix BOS token\n");
505
- embd_inp.push_back(llama_vocab_bos(vocab));
506
- }
507
-
508
- std::string buffer;
509
- if (!params.input_prefix.empty()) {
510
- LOG_DBG("appending input prefix: '%s'\n", params.input_prefix.c_str());
511
- buffer += params.input_prefix;
512
- LOG("%s", buffer.c_str());
513
- }
514
-
515
- std::string line;
516
- bool another_line = true;
517
- do {
518
- another_line = console::readline(line, params.multiline_input);
519
- buffer += line;
520
- } while (another_line);
521
-
522
- // done taking input, reset color
523
- console::set_display(console::reset);
524
-
525
- // Add tokens to embd only if the input buffer is non-empty
526
- // Entering a empty line lets the user pass control back
527
- if (buffer.length() > 1) {
528
- // append input suffix if any
529
- if (!params.input_suffix.empty()) {
530
- LOG_DBG("appending input suffix: '%s'\n", params.input_suffix.c_str());
531
- buffer += params.input_suffix;
532
- LOG("%s", params.input_suffix.c_str());
533
- }
534
-
535
- LOG_DBG("buffer: '%s'\n", buffer.c_str());
536
-
537
- const size_t original_size = embd_inp.size();
538
-
539
- const auto line_inp = common_tokenize(ctx, buffer, false);
540
- LOG_DBG("input tokens: %s\n", string_from(ctx, line_inp).c_str());
541
-
542
- embd_inp.insert(embd_inp.end(), line_inp.begin(), line_inp.end());
543
-
544
- for (size_t i = original_size; i < embd_inp.size(); ++i) {
545
- const llama_token token = embd_inp[i];
546
- output_tokens.push_back(token);
547
- output_ss << common_token_to_piece(ctx, token);
548
- }
549
-
550
- n_remain -= line_inp.size();
551
- LOG_DBG("n_remain: %d\n", n_remain);
552
- } else {
553
- LOG_DBG("empty line, passing control back\n");
554
- }
555
-
556
- input_echo = false; // do not echo this again
557
- }
558
-
559
- if (n_past > 0) {
560
- if (is_interacting) {
561
- common_sampler_reset(smpl);
562
- }
563
- is_interacting = false;
564
- }
565
- }
566
-
567
- // end of generation
568
- if (!embd.empty() && llama_vocab_is_eog(vocab, embd.back()) && !params.interactive) {
569
- break;
570
- }
571
-
572
- // In interactive mode, respect the maximum number of tokens and drop back to user input when reached.
573
- // We skip this logic when n_predict == -1 (infinite) or -2 (stop at context size).
574
- if (params.interactive && n_remain <= 0 && params.n_predict >= 0) {
575
- n_remain = params.n_predict;
576
- is_interacting = true;
577
- }
578
- }
579
- if (!params.interactive && n_remain <= 0) {
580
- LOG("%s", common_token_to_piece(ctx, llama_vocab_eot(vocab)).c_str());
581
- }
582
-
583
- LOG("\n");
584
- common_perf_print(ctx, smpl);
585
-
586
- common_sampler_free(smpl);
587
- llama_backend_free();
588
-
589
- return 0;
590
- }
@@ -1,66 +0,0 @@
1
- add_library(llava OBJECT
2
- llava.cpp
3
- llava.h
4
- clip.cpp
5
- clip.h
6
- )
7
-
8
- target_link_libraries(llava PRIVATE ggml llama ${CMAKE_THREAD_LIBS_INIT})
9
-
10
- target_include_directories(llava PUBLIC .)
11
- target_include_directories(llava PUBLIC ../..)
12
- target_include_directories(llava PUBLIC ../../common)
13
-
14
- target_compile_features(llava PRIVATE cxx_std_17)
15
-
16
- add_library(llava_static STATIC $<TARGET_OBJECTS:llava>)
17
- if (BUILD_SHARED_LIBS)
18
- set_target_properties(llava PROPERTIES POSITION_INDEPENDENT_CODE ON)
19
- target_compile_definitions(llava PRIVATE LLAMA_SHARED LLAMA_BUILD)
20
- add_library(llava_shared SHARED $<TARGET_OBJECTS:llava>)
21
- target_link_libraries(llava_shared PRIVATE ggml llama ${CMAKE_THREAD_LIBS_INIT})
22
- install(TARGETS llava_shared LIBRARY)
23
- endif()
24
-
25
- if (NOT MSVC)
26
- target_compile_options(llava PRIVATE -Wno-cast-qual) # stb_image.h
27
- endif()
28
-
29
- if(TARGET BUILD_INFO)
30
- add_dependencies(llava BUILD_INFO)
31
- endif()
32
-
33
- set(TARGET llama-llava-cli)
34
- add_executable(${TARGET} llava-cli.cpp)
35
- set_target_properties(${TARGET} PROPERTIES OUTPUT_NAME llama-llava-cli)
36
- install(TARGETS ${TARGET} RUNTIME)
37
- target_link_libraries(${TARGET} PRIVATE common llava ${CMAKE_THREAD_LIBS_INIT})
38
- target_compile_features(${TARGET} PRIVATE cxx_std_17)
39
-
40
- set(TARGET llama-minicpmv-cli)
41
- add_executable(${TARGET} minicpmv-cli.cpp)
42
- set_target_properties(${TARGET} PROPERTIES OUTPUT_NAME llama-minicpmv-cli)
43
- install(TARGETS ${TARGET} RUNTIME)
44
- target_link_libraries(${TARGET} PRIVATE common llava ${CMAKE_THREAD_LIBS_INIT})
45
- target_compile_features(${TARGET} PRIVATE cxx_std_17)
46
-
47
- set(TARGET llama-qwen2vl-cli)
48
- add_executable(${TARGET} qwen2vl-cli.cpp)
49
- set_target_properties(${TARGET} PROPERTIES OUTPUT_NAME llama-qwen2vl-cli)
50
- install(TARGETS ${TARGET} RUNTIME)
51
- target_link_libraries(${TARGET} PRIVATE common llava ${CMAKE_THREAD_LIBS_INIT})
52
- target_compile_features(${TARGET} PRIVATE cxx_std_17)
53
-
54
- set(TARGET llama-gemma3-cli)
55
- add_executable(${TARGET} gemma3-cli.cpp)
56
- set_target_properties(${TARGET} PROPERTIES OUTPUT_NAME llama-gemma3-cli)
57
- install(TARGETS ${TARGET} RUNTIME)
58
- target_link_libraries(${TARGET} PRIVATE common llava ${CMAKE_THREAD_LIBS_INIT})
59
- target_compile_features(${TARGET} PRIVATE cxx_std_17)
60
-
61
- set(TARGET llama-llava-clip-quantize-cli)
62
- add_executable(${TARGET} clip-quantize-cli.cpp)
63
- set_target_properties(${TARGET} PROPERTIES OUTPUT_NAME llama-llava-clip-quantize-cli)
64
- install(TARGETS ${TARGET} RUNTIME)
65
- target_link_libraries(${TARGET} PRIVATE common llava ${CMAKE_THREAD_LIBS_INIT})
66
- target_compile_features(${TARGET} PRIVATE cxx_std_17)
@@ -1,8 +0,0 @@
1
- #!/bin/bash
2
- cmake ../../../../ \
3
- -DCMAKE_TOOLCHAIN_FILE=$ANDROID_NDK/build/cmake/android.toolchain.cmake \
4
- -DCMAKE_BUILD_TYPE=Release \
5
- -DANDROID_ABI="arm64-v8a" \
6
- -DANDROID_PLATFORM=android-23 $1
7
-
8
- make -j4