@fugood/llama.node 0.3.2 → 0.3.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (286) hide show
  1. package/CMakeLists.txt +7 -0
  2. package/bin/darwin/arm64/llama-node.node +0 -0
  3. package/bin/darwin/x64/llama-node.node +0 -0
  4. package/bin/linux/arm64/llama-node.node +0 -0
  5. package/bin/linux/x64/llama-node.node +0 -0
  6. package/bin/linux-vulkan/arm64/llama-node.node +0 -0
  7. package/bin/linux-vulkan/x64/llama-node.node +0 -0
  8. package/bin/win32/arm64/llama-node.node +0 -0
  9. package/bin/win32/arm64/node.lib +0 -0
  10. package/bin/win32/x64/llama-node.node +0 -0
  11. package/bin/win32/x64/node.lib +0 -0
  12. package/bin/win32-vulkan/arm64/llama-node.node +0 -0
  13. package/bin/win32-vulkan/arm64/node.lib +0 -0
  14. package/bin/win32-vulkan/x64/llama-node.node +0 -0
  15. package/bin/win32-vulkan/x64/node.lib +0 -0
  16. package/lib/binding.ts +18 -1
  17. package/package.json +1 -1
  18. package/src/DetokenizeWorker.cpp +1 -1
  19. package/src/EmbeddingWorker.cpp +17 -7
  20. package/src/EmbeddingWorker.h +2 -1
  21. package/src/LlamaCompletionWorker.cpp +8 -8
  22. package/src/LlamaCompletionWorker.h +2 -2
  23. package/src/LlamaContext.cpp +89 -27
  24. package/src/LlamaContext.h +2 -0
  25. package/src/TokenizeWorker.cpp +1 -1
  26. package/src/common.hpp +4 -4
  27. package/src/llama.cpp/.github/workflows/build.yml +240 -168
  28. package/src/llama.cpp/.github/workflows/docker.yml +8 -8
  29. package/src/llama.cpp/.github/workflows/python-lint.yml +8 -1
  30. package/src/llama.cpp/.github/workflows/server.yml +21 -14
  31. package/src/llama.cpp/CMakeLists.txt +14 -6
  32. package/src/llama.cpp/Sources/llama/llama.h +4 -0
  33. package/src/llama.cpp/cmake/arm64-apple-clang.cmake +16 -0
  34. package/src/llama.cpp/cmake/common.cmake +33 -0
  35. package/src/llama.cpp/cmake/x64-windows-llvm.cmake +11 -0
  36. package/src/llama.cpp/common/CMakeLists.txt +6 -4
  37. package/src/llama.cpp/common/arg.cpp +986 -770
  38. package/src/llama.cpp/common/arg.h +22 -22
  39. package/src/llama.cpp/common/common.cpp +212 -351
  40. package/src/llama.cpp/common/common.h +204 -117
  41. package/src/llama.cpp/common/json-schema-to-grammar.cpp +1 -1
  42. package/src/llama.cpp/common/log.cpp +50 -50
  43. package/src/llama.cpp/common/log.h +18 -18
  44. package/src/llama.cpp/common/ngram-cache.cpp +36 -36
  45. package/src/llama.cpp/common/ngram-cache.h +19 -19
  46. package/src/llama.cpp/common/sampling.cpp +163 -121
  47. package/src/llama.cpp/common/sampling.h +41 -20
  48. package/src/llama.cpp/common/speculative.cpp +274 -0
  49. package/src/llama.cpp/common/speculative.h +28 -0
  50. package/src/llama.cpp/docs/build.md +134 -161
  51. package/src/llama.cpp/examples/CMakeLists.txt +33 -14
  52. package/src/llama.cpp/examples/batched/CMakeLists.txt +1 -1
  53. package/src/llama.cpp/examples/batched/batched.cpp +19 -18
  54. package/src/llama.cpp/examples/batched-bench/CMakeLists.txt +1 -1
  55. package/src/llama.cpp/examples/batched-bench/batched-bench.cpp +10 -11
  56. package/src/llama.cpp/examples/convert-llama2c-to-ggml/CMakeLists.txt +1 -1
  57. package/src/llama.cpp/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp +1 -1
  58. package/src/llama.cpp/examples/cvector-generator/CMakeLists.txt +1 -1
  59. package/src/llama.cpp/examples/cvector-generator/cvector-generator.cpp +9 -9
  60. package/src/llama.cpp/examples/deprecation-warning/deprecation-warning.cpp +1 -1
  61. package/src/llama.cpp/examples/embedding/CMakeLists.txt +1 -1
  62. package/src/llama.cpp/examples/embedding/embedding.cpp +12 -12
  63. package/src/llama.cpp/examples/eval-callback/CMakeLists.txt +3 -2
  64. package/src/llama.cpp/examples/eval-callback/eval-callback.cpp +8 -8
  65. package/src/llama.cpp/examples/export-lora/CMakeLists.txt +1 -1
  66. package/src/llama.cpp/examples/export-lora/export-lora.cpp +5 -5
  67. package/src/llama.cpp/examples/gbnf-validator/CMakeLists.txt +1 -1
  68. package/src/llama.cpp/examples/gbnf-validator/gbnf-validator.cpp +4 -7
  69. package/src/llama.cpp/examples/gen-docs/CMakeLists.txt +1 -1
  70. package/src/llama.cpp/examples/gen-docs/gen-docs.cpp +7 -7
  71. package/src/llama.cpp/examples/gguf/CMakeLists.txt +1 -1
  72. package/src/llama.cpp/examples/gguf-hash/CMakeLists.txt +8 -1
  73. package/src/llama.cpp/examples/gguf-split/CMakeLists.txt +1 -1
  74. package/src/llama.cpp/examples/gguf-split/gguf-split.cpp +2 -2
  75. package/src/llama.cpp/examples/gritlm/CMakeLists.txt +1 -1
  76. package/src/llama.cpp/examples/gritlm/gritlm.cpp +18 -18
  77. package/src/llama.cpp/examples/imatrix/CMakeLists.txt +1 -1
  78. package/src/llama.cpp/examples/imatrix/imatrix.cpp +31 -13
  79. package/src/llama.cpp/examples/infill/CMakeLists.txt +1 -1
  80. package/src/llama.cpp/examples/infill/infill.cpp +41 -87
  81. package/src/llama.cpp/examples/llama-bench/CMakeLists.txt +1 -1
  82. package/src/llama.cpp/examples/llama-bench/llama-bench.cpp +439 -459
  83. package/src/llama.cpp/examples/llama.android/llama/build.gradle.kts +2 -0
  84. package/src/llama.cpp/examples/llama.android/llama/src/main/cpp/llama-android.cpp +11 -14
  85. package/src/llama.cpp/examples/llava/CMakeLists.txt +10 -3
  86. package/src/llama.cpp/examples/llava/clip.cpp +263 -66
  87. package/src/llama.cpp/examples/llava/clip.h +8 -2
  88. package/src/llama.cpp/examples/llava/llava-cli.cpp +23 -23
  89. package/src/llama.cpp/examples/llava/llava.cpp +83 -22
  90. package/src/llama.cpp/examples/llava/minicpmv-cli.cpp +21 -21
  91. package/src/llama.cpp/examples/llava/qwen2vl-cli.cpp +581 -0
  92. package/src/llama.cpp/examples/lookahead/CMakeLists.txt +1 -1
  93. package/src/llama.cpp/examples/lookahead/lookahead.cpp +26 -26
  94. package/src/llama.cpp/examples/lookup/CMakeLists.txt +4 -4
  95. package/src/llama.cpp/examples/lookup/lookup-create.cpp +7 -7
  96. package/src/llama.cpp/examples/lookup/lookup-merge.cpp +4 -4
  97. package/src/llama.cpp/examples/lookup/lookup-stats.cpp +16 -15
  98. package/src/llama.cpp/examples/lookup/lookup.cpp +30 -30
  99. package/src/llama.cpp/examples/main/CMakeLists.txt +1 -1
  100. package/src/llama.cpp/examples/main/main.cpp +73 -114
  101. package/src/llama.cpp/examples/main-cmake-pkg/CMakeLists.txt +1 -1
  102. package/src/llama.cpp/examples/parallel/CMakeLists.txt +1 -1
  103. package/src/llama.cpp/examples/parallel/parallel.cpp +18 -19
  104. package/src/llama.cpp/examples/passkey/CMakeLists.txt +1 -1
  105. package/src/llama.cpp/examples/passkey/passkey.cpp +14 -14
  106. package/src/llama.cpp/examples/perplexity/CMakeLists.txt +1 -1
  107. package/src/llama.cpp/examples/perplexity/perplexity.cpp +99 -120
  108. package/src/llama.cpp/examples/quantize/CMakeLists.txt +1 -1
  109. package/src/llama.cpp/examples/quantize/quantize.cpp +0 -3
  110. package/src/llama.cpp/examples/quantize-stats/CMakeLists.txt +1 -1
  111. package/src/llama.cpp/examples/quantize-stats/quantize-stats.cpp +10 -9
  112. package/src/llama.cpp/examples/retrieval/CMakeLists.txt +1 -1
  113. package/src/llama.cpp/examples/retrieval/retrieval.cpp +16 -16
  114. package/src/llama.cpp/examples/rpc/rpc-server.cpp +3 -1
  115. package/src/llama.cpp/examples/run/CMakeLists.txt +5 -0
  116. package/src/llama.cpp/examples/run/run.cpp +911 -0
  117. package/src/llama.cpp/examples/save-load-state/CMakeLists.txt +1 -1
  118. package/src/llama.cpp/examples/save-load-state/save-load-state.cpp +38 -21
  119. package/src/llama.cpp/examples/server/CMakeLists.txt +3 -16
  120. package/src/llama.cpp/examples/server/server.cpp +2073 -1339
  121. package/src/llama.cpp/examples/server/tests/requirements.txt +2 -2
  122. package/src/llama.cpp/examples/server/utils.hpp +354 -277
  123. package/src/llama.cpp/examples/simple/CMakeLists.txt +2 -2
  124. package/src/llama.cpp/examples/simple/simple.cpp +130 -94
  125. package/src/llama.cpp/examples/simple-chat/CMakeLists.txt +5 -0
  126. package/src/llama.cpp/examples/simple-chat/simple-chat.cpp +200 -0
  127. package/src/llama.cpp/examples/speculative/CMakeLists.txt +1 -1
  128. package/src/llama.cpp/examples/speculative/speculative.cpp +68 -64
  129. package/src/llama.cpp/examples/speculative-simple/CMakeLists.txt +5 -0
  130. package/src/llama.cpp/examples/speculative-simple/speculative-simple.cpp +265 -0
  131. package/src/llama.cpp/examples/tokenize/CMakeLists.txt +1 -1
  132. package/src/llama.cpp/examples/tokenize/tokenize.cpp +3 -3
  133. package/src/llama.cpp/examples/tts/CMakeLists.txt +5 -0
  134. package/src/llama.cpp/examples/tts/tts.cpp +932 -0
  135. package/src/llama.cpp/ggml/CMakeLists.txt +54 -36
  136. package/src/llama.cpp/ggml/include/ggml-backend.h +63 -34
  137. package/src/llama.cpp/ggml/include/ggml-blas.h +5 -3
  138. package/src/llama.cpp/ggml/include/ggml-cann.h +9 -7
  139. package/src/llama.cpp/ggml/include/ggml-cpp.h +38 -0
  140. package/src/llama.cpp/ggml/include/ggml-cpu.h +135 -0
  141. package/src/llama.cpp/ggml/include/ggml-cuda.h +12 -12
  142. package/src/llama.cpp/ggml/include/ggml-kompute.h +7 -3
  143. package/src/llama.cpp/ggml/include/ggml-metal.h +11 -7
  144. package/src/llama.cpp/ggml/include/ggml-opencl.h +26 -0
  145. package/src/llama.cpp/ggml/include/ggml-opt.h +216 -0
  146. package/src/llama.cpp/ggml/include/ggml-rpc.h +9 -5
  147. package/src/llama.cpp/ggml/include/ggml-sycl.h +18 -11
  148. package/src/llama.cpp/ggml/include/ggml-vulkan.h +10 -8
  149. package/src/llama.cpp/ggml/include/ggml.h +159 -417
  150. package/src/llama.cpp/ggml/src/CMakeLists.txt +121 -1155
  151. package/src/llama.cpp/ggml/src/ggml-alloc.c +23 -28
  152. package/src/llama.cpp/ggml/src/ggml-backend-impl.h +57 -36
  153. package/src/llama.cpp/ggml/src/ggml-backend-reg.cpp +552 -0
  154. package/src/llama.cpp/ggml/src/ggml-backend.cpp +306 -867
  155. package/src/llama.cpp/ggml/src/ggml-blas/CMakeLists.txt +87 -0
  156. package/src/llama.cpp/ggml/src/{ggml-blas.cpp → ggml-blas/ggml-blas.cpp} +216 -65
  157. package/src/llama.cpp/ggml/src/ggml-cann/CMakeLists.txt +76 -0
  158. package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.cpp +456 -111
  159. package/src/llama.cpp/ggml/src/ggml-cann/common.h +6 -3
  160. package/src/llama.cpp/ggml/src/{ggml-cann.cpp → ggml-cann/ggml-cann.cpp} +343 -177
  161. package/src/llama.cpp/ggml/src/ggml-cann/kernels/CMakeLists.txt +2 -5
  162. package/src/llama.cpp/ggml/src/ggml-cann/kernels/dup.cpp +22 -9
  163. package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_f16.cpp +24 -13
  164. package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_f32.cpp +23 -13
  165. package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_q4_0.cpp +11 -0
  166. package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_f16_q8_0.cpp +10 -0
  167. package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_f32_q8_0.cpp +10 -0
  168. package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_float_to_q4_0.cpp +17 -0
  169. package/src/llama.cpp/ggml/src/ggml-common.h +42 -42
  170. package/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +336 -0
  171. package/src/llama.cpp/ggml/src/ggml-cpu/amx/amx.cpp +220 -0
  172. package/src/llama.cpp/ggml/src/ggml-cpu/amx/amx.h +8 -0
  173. package/src/llama.cpp/ggml/src/ggml-cpu/amx/common.h +91 -0
  174. package/src/llama.cpp/ggml/src/ggml-cpu/amx/mmq.cpp +2511 -0
  175. package/src/llama.cpp/ggml/src/ggml-cpu/amx/mmq.h +10 -0
  176. package/src/llama.cpp/ggml/src/ggml-cpu/cpu-feats-x86.cpp +323 -0
  177. package/src/llama.cpp/ggml/src/{ggml-aarch64.c → ggml-cpu/ggml-cpu-aarch64.cpp} +1299 -246
  178. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.h +8 -0
  179. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-hbm.cpp +55 -0
  180. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-hbm.h +8 -0
  181. package/src/llama.cpp/ggml/src/{ggml-cpu-impl.h → ggml-cpu/ggml-cpu-impl.h} +14 -242
  182. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.c +10835 -0
  183. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.h +63 -0
  184. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-traits.cpp +36 -0
  185. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-traits.h +38 -0
  186. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +14123 -0
  187. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +628 -0
  188. package/src/llama.cpp/ggml/src/{llamafile → ggml-cpu/llamafile}/sgemm.cpp +666 -0
  189. package/src/llama.cpp/ggml/src/ggml-cuda/CMakeLists.txt +152 -0
  190. package/src/llama.cpp/ggml/src/ggml-cuda/vendors/hip.h +8 -0
  191. package/src/llama.cpp/ggml/src/ggml-hip/CMakeLists.txt +104 -0
  192. package/src/llama.cpp/ggml/src/ggml-impl.h +393 -22
  193. package/src/llama.cpp/ggml/src/ggml-kompute/CMakeLists.txt +166 -0
  194. package/src/llama.cpp/ggml/src/{ggml-kompute.cpp → ggml-kompute/ggml-kompute.cpp} +360 -127
  195. package/src/llama.cpp/ggml/src/ggml-metal/CMakeLists.txt +105 -0
  196. package/src/llama.cpp/ggml/src/ggml-metal/ggml-metal-impl.h +288 -0
  197. package/src/llama.cpp/ggml/src/ggml-musa/CMakeLists.txt +107 -0
  198. package/src/llama.cpp/ggml/src/ggml-opencl/CMakeLists.txt +147 -0
  199. package/src/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +4004 -0
  200. package/src/llama.cpp/ggml/src/ggml-opt.cpp +854 -0
  201. package/src/llama.cpp/ggml/src/ggml-quants.c +188 -10702
  202. package/src/llama.cpp/ggml/src/ggml-quants.h +78 -125
  203. package/src/llama.cpp/ggml/src/ggml-rpc/CMakeLists.txt +9 -0
  204. package/src/llama.cpp/ggml/src/{ggml-rpc.cpp → ggml-rpc/ggml-rpc.cpp} +478 -300
  205. package/src/llama.cpp/ggml/src/ggml-sycl/CMakeLists.txt +84 -0
  206. package/src/llama.cpp/ggml/src/ggml-sycl/backend.hpp +3 -0
  207. package/src/llama.cpp/ggml/src/ggml-sycl/common.cpp +36 -5
  208. package/src/llama.cpp/ggml/src/ggml-sycl/common.hpp +259 -0
  209. package/src/llama.cpp/ggml/src/ggml-sycl/concat.cpp +3 -2
  210. package/src/llama.cpp/ggml/src/ggml-sycl/convert.cpp +1 -1
  211. package/src/llama.cpp/ggml/src/ggml-sycl/dmmv.cpp +5 -5
  212. package/src/llama.cpp/ggml/src/ggml-sycl/dpct/helper.hpp +34 -35
  213. package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.cpp +1030 -0
  214. package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.hpp +76 -0
  215. package/src/llama.cpp/ggml/src/ggml-sycl/gemm.hpp +4 -4
  216. package/src/llama.cpp/ggml/src/{ggml-sycl.cpp → ggml-sycl/ggml-sycl.cpp} +3638 -4151
  217. package/src/llama.cpp/ggml/src/ggml-sycl/im2col.cpp +3 -2
  218. package/src/llama.cpp/ggml/src/ggml-sycl/mmq.cpp +6 -6
  219. package/src/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +75 -87
  220. package/src/llama.cpp/ggml/src/ggml-sycl/norm.cpp +7 -6
  221. package/src/llama.cpp/ggml/src/ggml-sycl/outprod.cpp +56 -0
  222. package/src/llama.cpp/ggml/src/ggml-sycl/outprod.hpp +11 -0
  223. package/src/llama.cpp/ggml/src/ggml-sycl/presets.hpp +6 -0
  224. package/src/llama.cpp/ggml/src/ggml-sycl/rope.cpp +4 -3
  225. package/src/llama.cpp/ggml/src/ggml-sycl/softmax.cpp +7 -7
  226. package/src/llama.cpp/ggml/src/ggml-sycl/tsembd.cpp +1 -0
  227. package/src/llama.cpp/ggml/src/ggml-sycl/vecdotq.hpp +4 -4
  228. package/src/llama.cpp/ggml/src/ggml-sycl/wkv6.cpp +141 -0
  229. package/src/llama.cpp/ggml/src/ggml-sycl/wkv6.hpp +10 -0
  230. package/src/llama.cpp/ggml/src/ggml-threading.cpp +12 -0
  231. package/src/llama.cpp/ggml/src/ggml-threading.h +14 -0
  232. package/src/llama.cpp/ggml/src/ggml-vulkan/CMakeLists.txt +92 -0
  233. package/src/llama.cpp/ggml/src/{ggml-vulkan.cpp → ggml-vulkan/ggml-vulkan.cpp} +2138 -887
  234. package/src/llama.cpp/ggml/src/{vulkan-shaders → ggml-vulkan/vulkan-shaders}/CMakeLists.txt +3 -1
  235. package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +593 -0
  236. package/src/llama.cpp/ggml/src/ggml.c +4427 -20125
  237. package/src/llama.cpp/include/llama-cpp.h +25 -0
  238. package/src/llama.cpp/include/llama.h +93 -52
  239. package/src/llama.cpp/models/ggml-vocab-roberta-bpe.gguf.inp +112 -0
  240. package/src/llama.cpp/models/ggml-vocab-roberta-bpe.gguf.out +46 -0
  241. package/src/llama.cpp/pocs/CMakeLists.txt +3 -1
  242. package/src/llama.cpp/pocs/vdot/CMakeLists.txt +2 -2
  243. package/src/llama.cpp/pocs/vdot/q8dot.cpp +4 -3
  244. package/src/llama.cpp/pocs/vdot/vdot.cpp +8 -7
  245. package/src/llama.cpp/src/CMakeLists.txt +4 -8
  246. package/src/llama.cpp/src/llama-grammar.cpp +15 -15
  247. package/src/llama.cpp/src/llama-grammar.h +2 -5
  248. package/src/llama.cpp/src/llama-sampling.cpp +779 -194
  249. package/src/llama.cpp/src/llama-sampling.h +21 -2
  250. package/src/llama.cpp/src/llama-vocab.cpp +55 -10
  251. package/src/llama.cpp/src/llama-vocab.h +35 -11
  252. package/src/llama.cpp/src/llama.cpp +4317 -2979
  253. package/src/llama.cpp/src/unicode-data.cpp +2 -2
  254. package/src/llama.cpp/src/unicode.cpp +62 -51
  255. package/src/llama.cpp/src/unicode.h +9 -10
  256. package/src/llama.cpp/tests/CMakeLists.txt +48 -38
  257. package/src/llama.cpp/tests/test-arg-parser.cpp +15 -15
  258. package/src/llama.cpp/tests/test-backend-ops.cpp +324 -80
  259. package/src/llama.cpp/tests/test-barrier.cpp +1 -0
  260. package/src/llama.cpp/tests/test-chat-template.cpp +59 -9
  261. package/src/llama.cpp/tests/test-gguf.cpp +1303 -0
  262. package/src/llama.cpp/tests/test-grammar-integration.cpp +3 -6
  263. package/src/llama.cpp/tests/test-json-schema-to-grammar.cpp +17 -4
  264. package/src/llama.cpp/tests/test-llama-grammar.cpp +2 -4
  265. package/src/llama.cpp/tests/test-log.cpp +2 -2
  266. package/src/llama.cpp/tests/test-opt.cpp +853 -142
  267. package/src/llama.cpp/tests/test-quantize-fns.cpp +24 -21
  268. package/src/llama.cpp/tests/test-quantize-perf.cpp +16 -14
  269. package/src/llama.cpp/tests/test-rope.cpp +62 -20
  270. package/src/llama.cpp/tests/test-sampling.cpp +163 -138
  271. package/src/llama.cpp/tests/test-tokenizer-0.cpp +7 -7
  272. package/src/llama.cpp/tests/test-tokenizer-1-bpe.cpp +5 -5
  273. package/src/llama.cpp/tests/test-tokenizer-1-spm.cpp +5 -5
  274. package/src/llama.cpp/.github/workflows/nix-ci-aarch64.yml +0 -72
  275. package/src/llama.cpp/.github/workflows/nix-ci.yml +0 -79
  276. package/src/llama.cpp/.github/workflows/nix-flake-update.yml +0 -22
  277. package/src/llama.cpp/.github/workflows/nix-publish-flake.yml +0 -36
  278. package/src/llama.cpp/common/train.cpp +0 -1515
  279. package/src/llama.cpp/common/train.h +0 -233
  280. package/src/llama.cpp/examples/baby-llama/CMakeLists.txt +0 -5
  281. package/src/llama.cpp/examples/baby-llama/baby-llama.cpp +0 -1639
  282. package/src/llama.cpp/ggml/src/ggml-aarch64.h +0 -39
  283. package/src/llama.cpp/ggml/src/vulkan-shaders/vulkan-shaders-gen.cpp +0 -600
  284. package/src/llama.cpp/tests/test-grad0.cpp +0 -1683
  285. /package/src/llama.cpp/ggml/{cmake → src/ggml-cpu/cmake}/FindSIMD.cmake +0 -0
  286. /package/src/llama.cpp/ggml/src/{llamafile → ggml-cpu/llamafile}/sgemm.h +0 -0
@@ -20,7 +20,7 @@ static bool eval_tokens(struct llama_context * ctx_llama, std::vector<llama_toke
20
20
  if (n_eval > n_batch) {
21
21
  n_eval = n_batch;
22
22
  }
23
- if (llama_decode(ctx_llama, llama_batch_get_one(&tokens[i], n_eval, *n_past, 0))) {
23
+ if (llama_decode(ctx_llama, llama_batch_get_one(&tokens[i], n_eval))) {
24
24
  LOG_ERR("%s : failed to eval. token %d/%d (batch size %d, n_past %d)\n", __func__, i, N, n_batch, *n_past);
25
25
  return false;
26
26
  }
@@ -37,21 +37,21 @@ static bool eval_id(struct llama_context * ctx_llama, int id, int * n_past) {
37
37
 
38
38
  static bool eval_string(struct llama_context * ctx_llama, const char* str, int n_batch, int * n_past, bool add_bos){
39
39
  std::string str2 = str;
40
- std::vector<llama_token> embd_inp = ::llama_tokenize(ctx_llama, str2, add_bos, true);
40
+ std::vector<llama_token> embd_inp = common_tokenize(ctx_llama, str2, add_bos, true);
41
41
  eval_tokens(ctx_llama, embd_inp, n_batch, n_past);
42
42
  return true;
43
43
  }
44
44
 
45
- static const char * sample(struct gpt_sampler * smpl,
45
+ static const char * sample(struct common_sampler * smpl,
46
46
  struct llama_context * ctx_llama,
47
47
  int * n_past) {
48
- const llama_token id = gpt_sampler_sample(smpl, ctx_llama, -1);
49
- gpt_sampler_accept(smpl, id, true);
48
+ const llama_token id = common_sampler_sample(smpl, ctx_llama, -1);
49
+ common_sampler_accept(smpl, id, true);
50
50
  static std::string ret;
51
51
  if (llama_token_is_eog(llama_get_model(ctx_llama), id)) {
52
52
  ret = "</s>";
53
53
  } else {
54
- ret = llama_token_to_piece(ctx_llama, id);
54
+ ret = common_token_to_piece(ctx_llama, id);
55
55
  }
56
56
  eval_id(ctx_llama, id, n_past);
57
57
  return ret.c_str();
@@ -120,7 +120,7 @@ static void print_usage(int, char ** argv) {
120
120
  LOG("\n note: a lower temperature value like 0.1 is recommended for better quality.\n");
121
121
  }
122
122
 
123
- static struct llava_image_embed * load_image(llava_context * ctx_llava, gpt_params * params, const std::string & fname) {
123
+ static struct llava_image_embed * load_image(llava_context * ctx_llava, common_params * params, const std::string & fname) {
124
124
 
125
125
  // load and preprocess the image
126
126
  llava_image_embed * embed = NULL;
@@ -146,7 +146,7 @@ static struct llava_image_embed * load_image(llava_context * ctx_llava, gpt_para
146
146
  return embed;
147
147
  }
148
148
 
149
- static void process_prompt(struct llava_context * ctx_llava, struct llava_image_embed * image_embed, gpt_params * params, const std::string & prompt) {
149
+ static void process_prompt(struct llava_context * ctx_llava, struct llava_image_embed * image_embed, common_params * params, const std::string & prompt) {
150
150
  int n_past = 0;
151
151
 
152
152
  const int max_tgt_len = params->n_predict < 0 ? 256 : params->n_predict;
@@ -159,16 +159,16 @@ static void process_prompt(struct llava_context * ctx_llava, struct llava_image_
159
159
  user_prompt = prompt.substr(image_pos + std::string("<image>").length());
160
160
  LOG_INF("system_prompt: %s\n", system_prompt.c_str());
161
161
  if (params->verbose_prompt) {
162
- auto tmp = ::llama_tokenize(ctx_llava->ctx_llama, system_prompt, true, true);
162
+ auto tmp = common_tokenize(ctx_llava->ctx_llama, system_prompt, true, true);
163
163
  for (int i = 0; i < (int) tmp.size(); i++) {
164
- LOG_INF("%6d -> '%s'\n", tmp[i], llama_token_to_piece(ctx_llava->ctx_llama, tmp[i]).c_str());
164
+ LOG_INF("%6d -> '%s'\n", tmp[i], common_token_to_piece(ctx_llava->ctx_llama, tmp[i]).c_str());
165
165
  }
166
166
  }
167
167
  LOG_INF("user_prompt: %s\n", user_prompt.c_str());
168
168
  if (params->verbose_prompt) {
169
- auto tmp = ::llama_tokenize(ctx_llava->ctx_llama, user_prompt, true, true);
169
+ auto tmp = common_tokenize(ctx_llava->ctx_llama, user_prompt, true, true);
170
170
  for (int i = 0; i < (int) tmp.size(); i++) {
171
- LOG_INF("%6d -> '%s'\n", tmp[i], llama_token_to_piece(ctx_llava->ctx_llama, tmp[i]).c_str());
171
+ LOG_INF("%6d -> '%s'\n", tmp[i], common_token_to_piece(ctx_llava->ctx_llama, tmp[i]).c_str());
172
172
  }
173
173
  }
174
174
  } else {
@@ -176,9 +176,9 @@ static void process_prompt(struct llava_context * ctx_llava, struct llava_image_
176
176
  system_prompt = "A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions.\nUSER:";
177
177
  user_prompt = prompt + "\nASSISTANT:";
178
178
  if (params->verbose_prompt) {
179
- auto tmp = ::llama_tokenize(ctx_llava->ctx_llama, user_prompt, true, true);
179
+ auto tmp = common_tokenize(ctx_llava->ctx_llama, user_prompt, true, true);
180
180
  for (int i = 0; i < (int) tmp.size(); i++) {
181
- LOG_INF("%6d -> '%s'\n", tmp[i], llama_token_to_piece(ctx_llava->ctx_llama, tmp[i]).c_str());
181
+ LOG_INF("%6d -> '%s'\n", tmp[i], common_token_to_piece(ctx_llava->ctx_llama, tmp[i]).c_str());
182
182
  }
183
183
  }
184
184
  }
@@ -191,7 +191,7 @@ static void process_prompt(struct llava_context * ctx_llava, struct llava_image_
191
191
 
192
192
  LOG("\n");
193
193
 
194
- struct gpt_sampler * smpl = gpt_sampler_init(ctx_llava->model, params->sparams);
194
+ struct common_sampler * smpl = common_sampler_init(ctx_llava->model, params->sampling);
195
195
  if (!smpl) {
196
196
  LOG_ERR("%s: failed to initialize sampling subsystem\n", __func__);
197
197
  exit(1);
@@ -211,15 +211,15 @@ static void process_prompt(struct llava_context * ctx_llava, struct llava_image_
211
211
  fflush(stdout);
212
212
  }
213
213
 
214
- gpt_sampler_free(smpl);
214
+ common_sampler_free(smpl);
215
215
  LOG("\n");
216
216
  }
217
217
 
218
- static struct llama_model * llava_init(gpt_params * params) {
218
+ static struct llama_model * llava_init(common_params * params) {
219
219
  llama_backend_init();
220
220
  llama_numa_init(params->numa);
221
221
 
222
- llama_model_params model_params = llama_model_params_from_gpt_params(*params);
222
+ llama_model_params model_params = common_model_params_to_llama(*params);
223
223
 
224
224
  llama_model * model = llama_load_model_from_file(params->model.c_str(), model_params);
225
225
  if (model == NULL) {
@@ -229,7 +229,7 @@ static struct llama_model * llava_init(gpt_params * params) {
229
229
  return model;
230
230
  }
231
231
 
232
- static struct llava_context * llava_init_context(gpt_params * params, llama_model * model) {
232
+ static struct llava_context * llava_init_context(common_params * params, llama_model * model) {
233
233
  const char * clip_path = params->mmproj.c_str();
234
234
 
235
235
  auto prompt = params->prompt;
@@ -240,7 +240,7 @@ static struct llava_context * llava_init_context(gpt_params * params, llama_mode
240
240
  auto ctx_clip = clip_model_load(clip_path, /*verbosity=*/ 1);
241
241
 
242
242
 
243
- llama_context_params ctx_params = llama_context_params_from_gpt_params(*params);
243
+ llama_context_params ctx_params = common_context_params_to_llama(*params);
244
244
  ctx_params.n_ctx = params->n_ctx < 2048 ? 2048 : params->n_ctx; // we need a longer context size to process image embeddings
245
245
 
246
246
  llama_context * ctx_llama = llama_new_context_with_model(model, ctx_params);
@@ -272,13 +272,13 @@ static void llava_free(struct llava_context * ctx_llava) {
272
272
  int main(int argc, char ** argv) {
273
273
  ggml_time_init();
274
274
 
275
- gpt_params params;
275
+ common_params params;
276
276
 
277
- if (!gpt_params_parse(argc, argv, params, LLAMA_EXAMPLE_LLAVA, print_usage)) {
277
+ if (!common_params_parse(argc, argv, params, LLAMA_EXAMPLE_LLAVA, print_usage)) {
278
278
  return 1;
279
279
  }
280
280
 
281
- gpt_init();
281
+ common_init();
282
282
 
283
283
  if (params.mmproj.empty() || (params.image.empty() && !prompt_contains_image(params.prompt))) {
284
284
  print_usage(argc, argv);
@@ -11,13 +11,17 @@
11
11
  #include <limits>
12
12
  #include <vector>
13
13
 
14
- #define die(msg) do { fputs("error: " msg "\n", stderr); exit(1); } while (0)
15
- #define die_fmt(fmt, ...) do { fprintf(stderr, "error: " fmt "\n", __VA_ARGS__); exit(1); } while (0)
16
-
17
- #define LOG_INF(...) do { fprintf(stdout, __VA_ARGS__); } while (0)
18
- #define LOG_WRN(...) do { fprintf(stderr, __VA_ARGS__); } while (0)
19
- #define LOG_ERR(...) do { fprintf(stderr, __VA_ARGS__); } while (0)
20
- #define LOG_DBG(...) do { fprintf(stdout, __VA_ARGS__); } while (0)
14
+ #if defined(LLAVA_LOG_OFF)
15
+ # define LOG_INF(...)
16
+ # define LOG_WRN(...)
17
+ # define LOG_ERR(...)
18
+ # define LOG_DBG(...)
19
+ #else // defined(LLAVA_LOG_OFF)
20
+ # define LOG_INF(...) do { fprintf(stdout, __VA_ARGS__); } while (0)
21
+ # define LOG_WRN(...) do { fprintf(stderr, __VA_ARGS__); } while (0)
22
+ # define LOG_ERR(...) do { fprintf(stderr, __VA_ARGS__); } while (0)
23
+ # define LOG_DBG(...) do { fprintf(stdout, __VA_ARGS__); } while (0)
24
+ #endif // defined(LLAVA_LOG_OFF)
21
25
 
22
26
  // RGB uint8 image
23
27
  struct clip_image_u8 {
@@ -255,25 +259,33 @@ static bool encode_image_with_clip(clip_ctx * ctx_clip, int n_threads, const cli
255
259
 
256
260
  const char * mm_patch_merge_type = clip_patch_merge_type(ctx_clip);
257
261
 
258
- if (clip_is_minicpmv(ctx_clip)) {
262
+ if (clip_is_minicpmv(ctx_clip) || clip_is_qwen2vl(ctx_clip)) {
259
263
  std::vector<float *> image_embd_v;
260
264
  image_embd_v.resize(img_res_v.size);
261
265
  struct clip_image_size * load_image_size = clip_image_size_init();
266
+
262
267
  for (size_t i = 0; i < img_res_v.size; i++) {
263
268
  const int64_t t_img_enc_step_start_us = ggml_time_us();
264
- image_embd_v[i] = (float *)malloc(clip_embd_nbytes(ctx_clip));
269
+ image_embd_v[i] = (float *)malloc(clip_embd_nbytes_by_img(ctx_clip, img_res_v.data[i].nx, img_res_v.data[i].ny));
265
270
  int patch_size=14;
266
271
  load_image_size->width = img_res_v.data[i].nx;
267
272
  load_image_size->height = img_res_v.data[i].ny;
268
273
  clip_add_load_image_size(ctx_clip, load_image_size);
274
+
269
275
  bool encoded = false;
270
- int has_minicpmv_projector = clip_is_minicpmv(ctx_clip);
271
- if (has_minicpmv_projector == 2) {
272
- encoded = clip_image_encode(ctx_clip, n_threads, only_v2_5_reshape_by_patch(&img_res_v.data[i], patch_size), image_embd_v[i]);
273
- }
274
- else if (has_minicpmv_projector == 3) {
276
+ if (clip_is_qwen2vl(ctx_clip)) {
275
277
  encoded = clip_image_encode(ctx_clip, n_threads, &img_res_v.data[i], image_embd_v[i]);
276
278
  }
279
+ else {
280
+ int has_minicpmv_projector = clip_is_minicpmv(ctx_clip);
281
+ if (has_minicpmv_projector == 2) {
282
+ encoded = clip_image_encode(ctx_clip, n_threads, only_v2_5_reshape_by_patch(&img_res_v.data[i], patch_size), image_embd_v[i]);
283
+ }
284
+ else if (has_minicpmv_projector == 3) {
285
+ encoded = clip_image_encode(ctx_clip, n_threads, &img_res_v.data[i], image_embd_v[i]);
286
+ }
287
+ }
288
+
277
289
  if (!encoded) {
278
290
  LOG_ERR("Unable to encode image - spatial_unpad - subimage %d of %d\n", (int) i+1, (int) img_res_v.size);
279
291
  return false;
@@ -286,8 +298,11 @@ static bool encode_image_with_clip(clip_ctx * ctx_clip, int n_threads, const cli
286
298
 
287
299
  int n_img_pos_out = 0;
288
300
  for (size_t i = 0; i < image_embd_v.size(); i++) {
289
- std::memcpy(image_embd + n_img_pos_out * clip_n_mmproj_embd(ctx_clip), image_embd_v[i], clip_embd_nbytes(ctx_clip));
290
- n_img_pos_out += clip_n_patches(ctx_clip);
301
+ std::memcpy(
302
+ image_embd + n_img_pos_out * clip_n_mmproj_embd(ctx_clip),
303
+ image_embd_v[i],
304
+ clip_embd_nbytes_by_img(ctx_clip, img_res_v.data[i].nx, img_res_v.data[i].ny));
305
+ n_img_pos_out += clip_n_patches_by_img(ctx_clip, &img_res_v.data[i]);
291
306
  }
292
307
  *n_img_pos = n_img_pos_out;
293
308
  for (size_t i = 0; i < image_embd_v.size(); i++) {
@@ -383,7 +398,13 @@ bool llava_image_embed_make_with_clip_img(clip_ctx * ctx_clip, int n_threads, co
383
398
  if (clip_is_minicpmv(ctx_clip)) {
384
399
  num_max_patches = 10;
385
400
  }
386
- float * image_embd = (float *)malloc(clip_embd_nbytes(ctx_clip)*num_max_patches); // TODO: base on gridsize/llava model
401
+ float * image_embd;
402
+ if (clip_is_qwen2vl(ctx_clip)) {
403
+ // qwen2vl don't split image into chunks, so `num_max_patches` is not needed.
404
+ image_embd = (float *)malloc(clip_embd_nbytes_by_img(ctx_clip, img->nx, img->ny));
405
+ } else {
406
+ image_embd = (float *)malloc(clip_embd_nbytes(ctx_clip)*num_max_patches); // TODO: base on gridsize/llava model
407
+ }
387
408
  if (!image_embd) {
388
409
  LOG_ERR("Unable to allocate memory for image embeddings\n");
389
410
  return false;
@@ -401,6 +422,39 @@ bool llava_image_embed_make_with_clip_img(clip_ctx * ctx_clip, int n_threads, co
401
422
  return true;
402
423
  }
403
424
 
425
+ struct llava_embd_batch {
426
+ std::vector<llama_pos> pos;
427
+ std::vector<int32_t> n_seq_id;
428
+ std::vector<llama_seq_id> seq_id_0;
429
+ std::vector<llama_seq_id *> seq_ids;
430
+ std::vector<int8_t> logits;
431
+ llama_batch batch;
432
+ llava_embd_batch(float * embd, int32_t n_tokens, llama_pos pos_0, llama_seq_id seq_id) {
433
+ pos .resize(n_tokens);
434
+ n_seq_id.resize(n_tokens);
435
+ seq_ids .resize(n_tokens + 1);
436
+ logits .resize(n_tokens);
437
+ seq_id_0.resize(1);
438
+ seq_id_0[0] = seq_id;
439
+ seq_ids [n_tokens] = nullptr;
440
+ batch = {
441
+ /*n_tokens =*/ n_tokens,
442
+ /*tokens =*/ nullptr,
443
+ /*embd =*/ embd,
444
+ /*pos =*/ pos.data(),
445
+ /*n_seq_id =*/ n_seq_id.data(),
446
+ /*seq_id =*/ seq_ids.data(),
447
+ /*logits =*/ logits.data(),
448
+ };
449
+ for (int i = 0; i < n_tokens; i++) {
450
+ batch.pos [i] = pos_0 + i;
451
+ batch.n_seq_id[i] = 1;
452
+ batch.seq_id [i] = seq_id_0.data();
453
+ batch.logits [i] = false;
454
+ }
455
+ }
456
+ };
457
+
404
458
  bool llava_eval_image_embed(llama_context * ctx_llama, const struct llava_image_embed * image_embed, int n_batch, int * n_past) {
405
459
  int n_embd = llama_n_embd(llama_get_model(ctx_llama));
406
460
 
@@ -409,8 +463,9 @@ bool llava_eval_image_embed(llama_context * ctx_llama, const struct llava_image_
409
463
  if (n_eval > n_batch) {
410
464
  n_eval = n_batch;
411
465
  }
412
- llama_batch batch = {int32_t(n_eval), nullptr, (image_embed->embed+i*n_embd), nullptr, nullptr, nullptr, nullptr, *n_past, 1, 0, };
413
- if (llama_decode(ctx_llama, batch)) {
466
+ float * embd = image_embed->embed+i*n_embd;
467
+ llava_embd_batch llava_batch = llava_embd_batch(embd, n_eval, *n_past, 0);
468
+ if (llama_decode(ctx_llama, llava_batch.batch)) {
414
469
  LOG_ERR("%s : failed to eval\n", __func__);
415
470
  return false;
416
471
  }
@@ -432,7 +487,7 @@ struct llava_image_embed * llava_image_embed_make_with_bytes(struct clip_ctx * c
432
487
  bool image_embed_result = llava_image_embed_make_with_clip_img(ctx_clip, n_threads, img, &image_embed, &n_image_pos);
433
488
  if (!image_embed_result) {
434
489
  clip_image_u8_free(img);
435
- LOG_ERR("%s: coulnd't embed the image\n", __func__);
490
+ LOG_ERR("%s: couldn't embed the image\n", __func__);
436
491
  return NULL;
437
492
  }
438
493
 
@@ -464,10 +519,16 @@ static bool load_file_to_bytes(const char* path, unsigned char** bytesOut, long
464
519
  errno = 0;
465
520
  size_t ret = fread(buffer, 1, fileSize, file); // Read the file into the buffer
466
521
  if (ferror(file)) {
467
- die_fmt("read error: %s", strerror(errno));
522
+ LOG_ERR("read error: %s", strerror(errno));
523
+ free(buffer);
524
+ fclose(file);
525
+ return false;
468
526
  }
469
527
  if (ret != (size_t) fileSize) {
470
- die("unexpectedly reached end of file");
528
+ LOG_ERR("unexpectedly reached end of file");
529
+ free(buffer);
530
+ fclose(file);
531
+ return false;
471
532
  }
472
533
  fclose(file); // Close the file
473
534
 
@@ -25,11 +25,11 @@ static void show_additional_info(int /*argc*/, char ** argv) {
25
25
  LOG("\nnote: a lower temperature value like 0.1 is recommended for better quality.\n");
26
26
  }
27
27
 
28
- static struct llama_model * llava_init(gpt_params * params) {
28
+ static struct llama_model * llava_init(common_params * params) {
29
29
  llama_backend_init();
30
30
  llama_numa_init(params->numa);
31
31
 
32
- llama_model_params model_params = llama_model_params_from_gpt_params(*params);
32
+ llama_model_params model_params = common_model_params_to_llama(*params);
33
33
 
34
34
  llama_model * model = llama_load_model_from_file(params->model.c_str(), model_params);
35
35
  if (model == NULL) {
@@ -39,13 +39,13 @@ static struct llama_model * llava_init(gpt_params * params) {
39
39
  return model;
40
40
  }
41
41
 
42
- static struct llava_context * llava_init_context(gpt_params * params, llama_model * model) {
42
+ static struct llava_context * llava_init_context(common_params * params, llama_model * model) {
43
43
  auto prompt = params->prompt;
44
44
  if (prompt.empty()) {
45
45
  prompt = "describe the image in detail.";
46
46
  }
47
47
 
48
- llama_context_params ctx_params = llama_context_params_from_gpt_params(*params);
48
+ llama_context_params ctx_params = common_context_params_to_llama(*params);
49
49
  if (params->n_ctx < 2048) {
50
50
  // warn user here, "Image processing requires at least 2048 context, setting context to 2048"
51
51
  LOG_WRN("%s: Image processing requires at least 2048 context, setting context to 2048\n" , __func__);
@@ -79,7 +79,7 @@ static void llava_free(struct llava_context * ctx_llava) {
79
79
  llama_backend_free();
80
80
  }
81
81
 
82
- static struct clip_ctx * clip_init_context(gpt_params * params) {
82
+ static struct clip_ctx * clip_init_context(common_params * params) {
83
83
  const char * clip_path = params->mmproj.c_str();
84
84
 
85
85
  auto prompt = params->prompt;
@@ -97,7 +97,7 @@ static bool eval_tokens(struct llama_context * ctx_llama, std::vector<llama_toke
97
97
  if (n_eval > n_batch) {
98
98
  n_eval = n_batch;
99
99
  }
100
- if (llama_decode(ctx_llama, llama_batch_get_one(&tokens[i], n_eval, *n_past, 0))) {
100
+ if (llama_decode(ctx_llama, llama_batch_get_one(&tokens[i], n_eval))) {
101
101
  LOG_ERR("%s : failed to eval. token %d/%d (batch size %d, n_past %d)\n", __func__, i, N, n_batch, *n_past);
102
102
  return false;
103
103
  }
@@ -114,7 +114,7 @@ static bool eval_id(struct llama_context * ctx_llama, int id, int * n_past) {
114
114
 
115
115
  static bool eval_string(struct llama_context * ctx_llama, const char* str, int n_batch, int * n_past, bool add_bos){
116
116
  std::string str2 = str;
117
- std::vector<llama_token> embd_inp = ::llama_tokenize(ctx_llama, str2, add_bos, true);
117
+ std::vector<llama_token> embd_inp = common_tokenize(ctx_llama, str2, add_bos, true);
118
118
  return eval_tokens(ctx_llama, embd_inp, n_batch, n_past);
119
119
  }
120
120
 
@@ -129,7 +129,7 @@ static void process_eval_image_embed(struct llava_context * ctx_llava, const str
129
129
  llava_image_embed_free(slice_embed);
130
130
  }
131
131
 
132
- static void process_image(struct llava_context * ctx_llava, struct llava_image_embed * embeds, gpt_params * params, int &n_past) {
132
+ static void process_image(struct llava_context * ctx_llava, struct llava_image_embed * embeds, common_params * params, int &n_past) {
133
133
  std::string system_prompt;
134
134
  int idx = 0;
135
135
  int num_image_embeds = embeds->n_image_pos / clip_n_patches(ctx_llava->ctx_clip);
@@ -162,22 +162,22 @@ static void process_image(struct llava_context * ctx_llava, struct llava_image_e
162
162
  LOG_INF("%s: image token past: %d\n", __func__, n_past);
163
163
  }
164
164
 
165
- static const char * sample(struct gpt_sampler * smpl,
165
+ static const char * sample(struct common_sampler * smpl,
166
166
  struct llama_context * ctx_llama,
167
167
  int * n_past) {
168
- const llama_token id = gpt_sampler_sample(smpl, ctx_llama, -1);
169
- gpt_sampler_accept(smpl, id, true);
168
+ const llama_token id = common_sampler_sample(smpl, ctx_llama, -1);
169
+ common_sampler_accept(smpl, id, true);
170
170
  static std::string ret;
171
171
  if (llama_token_is_eog(llama_get_model(ctx_llama), id)) {
172
172
  ret = "</s>";
173
173
  } else {
174
- ret = llama_token_to_piece(ctx_llama, id);
174
+ ret = common_token_to_piece(ctx_llama, id);
175
175
  }
176
176
  eval_id(ctx_llama, id, n_past);
177
177
  return ret.c_str();
178
178
  }
179
179
 
180
- static struct llava_context * minicpmv_init(gpt_params * params, const std::string & fname, int &n_past){
180
+ static struct llava_context * minicpmv_init(common_params * params, const std::string & fname, int &n_past){
181
181
  auto * ctx_clip = clip_init_context(params);
182
182
  auto * embeds = llava_image_embed_make_with_filename(ctx_clip, params->cpuparams.n_threads, fname.c_str());
183
183
  if (!embeds) {
@@ -213,7 +213,7 @@ static struct llava_context * minicpmv_init(gpt_params * params, const std::stri
213
213
  return ctx_llava;
214
214
  }
215
215
 
216
- static struct gpt_sampler * llama_init(struct llava_context * ctx_llava, gpt_params * params, const std::string & prompt, int & n_past, bool is_first = false){
216
+ static struct common_sampler * llama_init(struct llava_context * ctx_llava, common_params * params, const std::string & prompt, int & n_past, bool is_first = false){
217
217
  std::string user_prompt = prompt;
218
218
  int has_minicpmv_projector = clip_is_minicpmv(ctx_llava->ctx_clip);
219
219
  if (!is_first) {
@@ -237,11 +237,11 @@ static struct gpt_sampler * llama_init(struct llava_context * ctx_llava, gpt_par
237
237
 
238
238
  LOG_INF("\n");
239
239
 
240
- struct gpt_sampler * smpl = gpt_sampler_init(ctx_llava->model, params->sparams);
240
+ struct common_sampler * smpl = common_sampler_init(ctx_llava->model, params->sampling);
241
241
  return smpl;
242
242
  }
243
243
 
244
- static const char * llama_loop(struct llava_context * ctx_llava,struct gpt_sampler * smpl, int &n_past){
244
+ static const char * llama_loop(struct llava_context * ctx_llava,struct common_sampler * smpl, int &n_past){
245
245
 
246
246
  const char * tmp = sample(smpl, ctx_llava->ctx_llama, &n_past);
247
247
  return tmp;
@@ -250,13 +250,13 @@ static const char * llama_loop(struct llava_context * ctx_llava,struct gpt_sampl
250
250
  int main(int argc, char ** argv) {
251
251
  ggml_time_init();
252
252
 
253
- gpt_params params;
253
+ common_params params;
254
254
 
255
- if (!gpt_params_parse(argc, argv, params, LLAMA_EXAMPLE_LLAVA, show_additional_info)) {
255
+ if (!common_params_parse(argc, argv, params, LLAMA_EXAMPLE_LLAVA, show_additional_info)) {
256
256
  return 1;
257
257
  }
258
258
 
259
- gpt_init();
259
+ common_init();
260
260
 
261
261
  if (params.mmproj.empty() || (params.image.empty())) {
262
262
  show_additional_info(argc, argv);
@@ -290,7 +290,7 @@ int main(int argc, char ** argv) {
290
290
 
291
291
  fflush(stdout);
292
292
  }
293
- gpt_sampler_free(smpl);
293
+ common_sampler_free(smpl);
294
294
  }else {
295
295
  while (true) {
296
296
  LOG("<user>");
@@ -309,7 +309,7 @@ int main(int argc, char ** argv) {
309
309
  if (strstr(response.c_str(), "<user>")) break; // minicpm-v
310
310
  fflush(stdout);
311
311
  }
312
- gpt_sampler_free(smpl);
312
+ common_sampler_free(smpl);
313
313
  }
314
314
  }
315
315
  printf("\n");