@fugood/llama.node 0.6.3 → 1.0.0-beta.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (377) hide show
  1. package/CMakeLists.txt +40 -30
  2. package/README.md +4 -1
  3. package/lib/binding.js +41 -29
  4. package/lib/binding.ts +26 -25
  5. package/package.json +40 -7
  6. package/scripts/build.js +47 -0
  7. package/scripts/llama.cpp.patch +109 -0
  8. package/src/anyascii.c +22223 -0
  9. package/src/anyascii.h +42 -0
  10. package/src/tts_utils.cpp +20 -7
  11. package/src/tts_utils.h +2 -0
  12. package/bin/darwin/arm64/llama-node.node +0 -0
  13. package/bin/darwin/x64/llama-node.node +0 -0
  14. package/bin/linux/arm64/llama-node.node +0 -0
  15. package/bin/linux/x64/llama-node.node +0 -0
  16. package/bin/linux-cuda/arm64/llama-node.node +0 -0
  17. package/bin/linux-cuda/x64/llama-node.node +0 -0
  18. package/bin/linux-vulkan/arm64/llama-node.node +0 -0
  19. package/bin/linux-vulkan/x64/llama-node.node +0 -0
  20. package/bin/win32/x64/llama-node.node +0 -0
  21. package/bin/win32/x64/node.lib +0 -0
  22. package/bin/win32-vulkan/arm64/llama-node.node +0 -0
  23. package/bin/win32-vulkan/arm64/node.lib +0 -0
  24. package/bin/win32-vulkan/x64/llama-node.node +0 -0
  25. package/bin/win32-vulkan/x64/node.lib +0 -0
  26. package/src/llama.cpp/.github/workflows/build-linux-cross.yml +0 -233
  27. package/src/llama.cpp/.github/workflows/build.yml +0 -1078
  28. package/src/llama.cpp/.github/workflows/close-issue.yml +0 -28
  29. package/src/llama.cpp/.github/workflows/docker.yml +0 -178
  30. package/src/llama.cpp/.github/workflows/editorconfig.yml +0 -29
  31. package/src/llama.cpp/.github/workflows/gguf-publish.yml +0 -44
  32. package/src/llama.cpp/.github/workflows/labeler.yml +0 -17
  33. package/src/llama.cpp/.github/workflows/python-check-requirements.yml +0 -33
  34. package/src/llama.cpp/.github/workflows/python-lint.yml +0 -30
  35. package/src/llama.cpp/.github/workflows/python-type-check.yml +0 -40
  36. package/src/llama.cpp/.github/workflows/release.yml +0 -739
  37. package/src/llama.cpp/.github/workflows/server.yml +0 -237
  38. package/src/llama.cpp/.github/workflows/winget.yml +0 -42
  39. package/src/llama.cpp/cmake/arm64-apple-clang.cmake +0 -16
  40. package/src/llama.cpp/cmake/arm64-windows-llvm.cmake +0 -16
  41. package/src/llama.cpp/cmake/build-info.cmake +0 -64
  42. package/src/llama.cpp/cmake/common.cmake +0 -35
  43. package/src/llama.cpp/cmake/git-vars.cmake +0 -22
  44. package/src/llama.cpp/cmake/x64-windows-llvm.cmake +0 -5
  45. package/src/llama.cpp/common/build-info.cpp.in +0 -4
  46. package/src/llama.cpp/docs/build.md +0 -561
  47. package/src/llama.cpp/examples/CMakeLists.txt +0 -43
  48. package/src/llama.cpp/examples/batched/CMakeLists.txt +0 -5
  49. package/src/llama.cpp/examples/batched/batched.cpp +0 -246
  50. package/src/llama.cpp/examples/chat-13B.bat +0 -57
  51. package/src/llama.cpp/examples/convert-llama2c-to-ggml/CMakeLists.txt +0 -5
  52. package/src/llama.cpp/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp +0 -941
  53. package/src/llama.cpp/examples/deprecation-warning/deprecation-warning.cpp +0 -35
  54. package/src/llama.cpp/examples/embedding/CMakeLists.txt +0 -5
  55. package/src/llama.cpp/examples/embedding/embedding.cpp +0 -323
  56. package/src/llama.cpp/examples/eval-callback/CMakeLists.txt +0 -10
  57. package/src/llama.cpp/examples/eval-callback/eval-callback.cpp +0 -194
  58. package/src/llama.cpp/examples/gen-docs/CMakeLists.txt +0 -5
  59. package/src/llama.cpp/examples/gen-docs/gen-docs.cpp +0 -83
  60. package/src/llama.cpp/examples/gguf/CMakeLists.txt +0 -5
  61. package/src/llama.cpp/examples/gguf/gguf.cpp +0 -265
  62. package/src/llama.cpp/examples/gguf-hash/CMakeLists.txt +0 -22
  63. package/src/llama.cpp/examples/gguf-hash/deps/rotate-bits/rotate-bits.h +0 -46
  64. package/src/llama.cpp/examples/gguf-hash/deps/sha1/sha1.c +0 -295
  65. package/src/llama.cpp/examples/gguf-hash/deps/sha1/sha1.h +0 -52
  66. package/src/llama.cpp/examples/gguf-hash/deps/sha256/sha256.c +0 -221
  67. package/src/llama.cpp/examples/gguf-hash/deps/sha256/sha256.h +0 -24
  68. package/src/llama.cpp/examples/gguf-hash/deps/xxhash/xxhash.c +0 -42
  69. package/src/llama.cpp/examples/gguf-hash/deps/xxhash/xxhash.h +0 -7093
  70. package/src/llama.cpp/examples/gguf-hash/gguf-hash.cpp +0 -694
  71. package/src/llama.cpp/examples/gritlm/CMakeLists.txt +0 -5
  72. package/src/llama.cpp/examples/gritlm/gritlm.cpp +0 -229
  73. package/src/llama.cpp/examples/jeopardy/questions.txt +0 -100
  74. package/src/llama.cpp/examples/llama.android/app/build.gradle.kts +0 -65
  75. package/src/llama.cpp/examples/llama.android/build.gradle.kts +0 -6
  76. package/src/llama.cpp/examples/llama.android/llama/build.gradle.kts +0 -71
  77. package/src/llama.cpp/examples/llama.android/llama/src/main/cpp/CMakeLists.txt +0 -53
  78. package/src/llama.cpp/examples/llama.android/llama/src/main/cpp/llama-android.cpp +0 -452
  79. package/src/llama.cpp/examples/llama.android/settings.gradle.kts +0 -18
  80. package/src/llama.cpp/examples/lookahead/CMakeLists.txt +0 -5
  81. package/src/llama.cpp/examples/lookahead/lookahead.cpp +0 -472
  82. package/src/llama.cpp/examples/lookup/CMakeLists.txt +0 -23
  83. package/src/llama.cpp/examples/lookup/lookup-create.cpp +0 -40
  84. package/src/llama.cpp/examples/lookup/lookup-merge.cpp +0 -47
  85. package/src/llama.cpp/examples/lookup/lookup-stats.cpp +0 -157
  86. package/src/llama.cpp/examples/lookup/lookup.cpp +0 -242
  87. package/src/llama.cpp/examples/parallel/CMakeLists.txt +0 -5
  88. package/src/llama.cpp/examples/parallel/parallel.cpp +0 -492
  89. package/src/llama.cpp/examples/passkey/CMakeLists.txt +0 -5
  90. package/src/llama.cpp/examples/passkey/passkey.cpp +0 -277
  91. package/src/llama.cpp/examples/retrieval/CMakeLists.txt +0 -5
  92. package/src/llama.cpp/examples/retrieval/retrieval.cpp +0 -304
  93. package/src/llama.cpp/examples/save-load-state/CMakeLists.txt +0 -5
  94. package/src/llama.cpp/examples/save-load-state/save-load-state.cpp +0 -246
  95. package/src/llama.cpp/examples/simple/CMakeLists.txt +0 -5
  96. package/src/llama.cpp/examples/simple/simple.cpp +0 -206
  97. package/src/llama.cpp/examples/simple-chat/CMakeLists.txt +0 -5
  98. package/src/llama.cpp/examples/simple-chat/simple-chat.cpp +0 -206
  99. package/src/llama.cpp/examples/simple-cmake-pkg/CMakeLists.txt +0 -11
  100. package/src/llama.cpp/examples/speculative/CMakeLists.txt +0 -5
  101. package/src/llama.cpp/examples/speculative/speculative.cpp +0 -644
  102. package/src/llama.cpp/examples/speculative-simple/CMakeLists.txt +0 -5
  103. package/src/llama.cpp/examples/speculative-simple/speculative-simple.cpp +0 -261
  104. package/src/llama.cpp/examples/sycl/CMakeLists.txt +0 -9
  105. package/src/llama.cpp/examples/sycl/build.sh +0 -23
  106. package/src/llama.cpp/examples/sycl/ls-sycl-device.cpp +0 -13
  107. package/src/llama.cpp/examples/sycl/run-llama2.sh +0 -27
  108. package/src/llama.cpp/examples/sycl/run-llama3.sh +0 -28
  109. package/src/llama.cpp/examples/sycl/win-build-sycl.bat +0 -33
  110. package/src/llama.cpp/examples/sycl/win-run-llama2.bat +0 -9
  111. package/src/llama.cpp/examples/sycl/win-run-llama3.bat +0 -9
  112. package/src/llama.cpp/examples/training/CMakeLists.txt +0 -5
  113. package/src/llama.cpp/examples/training/finetune.cpp +0 -96
  114. package/src/llama.cpp/ggml/cmake/GitVars.cmake +0 -22
  115. package/src/llama.cpp/ggml/cmake/common.cmake +0 -26
  116. package/src/llama.cpp/ggml/src/ggml-alloc.c +0 -1042
  117. package/src/llama.cpp/ggml/src/ggml-backend-impl.h +0 -255
  118. package/src/llama.cpp/ggml/src/ggml-backend-reg.cpp +0 -586
  119. package/src/llama.cpp/ggml/src/ggml-backend.cpp +0 -2008
  120. package/src/llama.cpp/ggml/src/ggml-blas/CMakeLists.txt +0 -87
  121. package/src/llama.cpp/ggml/src/ggml-blas/ggml-blas.cpp +0 -517
  122. package/src/llama.cpp/ggml/src/ggml-cann/CMakeLists.txt +0 -74
  123. package/src/llama.cpp/ggml/src/ggml-cann/acl_tensor.cpp +0 -179
  124. package/src/llama.cpp/ggml/src/ggml-cann/acl_tensor.h +0 -258
  125. package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.cpp +0 -2863
  126. package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.h +0 -1110
  127. package/src/llama.cpp/ggml/src/ggml-cann/common.h +0 -420
  128. package/src/llama.cpp/ggml/src/ggml-cann/ggml-cann.cpp +0 -2570
  129. package/src/llama.cpp/ggml/src/ggml-common.h +0 -1857
  130. package/src/llama.cpp/ggml/src/ggml-cpu/cmake/FindSIMD.cmake +0 -100
  131. package/src/llama.cpp/ggml/src/ggml-cuda/CMakeLists.txt +0 -184
  132. package/src/llama.cpp/ggml/src/ggml-cuda/vendors/cuda.h +0 -15
  133. package/src/llama.cpp/ggml/src/ggml-cuda/vendors/hip.h +0 -243
  134. package/src/llama.cpp/ggml/src/ggml-cuda/vendors/musa.h +0 -140
  135. package/src/llama.cpp/ggml/src/ggml-hip/CMakeLists.txt +0 -131
  136. package/src/llama.cpp/ggml/src/ggml-impl.h +0 -601
  137. package/src/llama.cpp/ggml/src/ggml-kompute/CMakeLists.txt +0 -166
  138. package/src/llama.cpp/ggml/src/ggml-kompute/ggml-kompute.cpp +0 -2251
  139. package/src/llama.cpp/ggml/src/ggml-metal/CMakeLists.txt +0 -120
  140. package/src/llama.cpp/ggml/src/ggml-metal/ggml-metal-impl.h +0 -622
  141. package/src/llama.cpp/ggml/src/ggml-musa/CMakeLists.txt +0 -113
  142. package/src/llama.cpp/ggml/src/ggml-opencl/CMakeLists.txt +0 -96
  143. package/src/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +0 -5124
  144. package/src/llama.cpp/ggml/src/ggml-opt.cpp +0 -1037
  145. package/src/llama.cpp/ggml/src/ggml-quants.c +0 -5232
  146. package/src/llama.cpp/ggml/src/ggml-quants.h +0 -100
  147. package/src/llama.cpp/ggml/src/ggml-rpc/CMakeLists.txt +0 -9
  148. package/src/llama.cpp/ggml/src/ggml-rpc/ggml-rpc.cpp +0 -1813
  149. package/src/llama.cpp/ggml/src/ggml-sycl/CMakeLists.txt +0 -189
  150. package/src/llama.cpp/ggml/src/ggml-sycl/backend.hpp +0 -37
  151. package/src/llama.cpp/ggml/src/ggml-sycl/binbcast.cpp +0 -239
  152. package/src/llama.cpp/ggml/src/ggml-sycl/binbcast.hpp +0 -39
  153. package/src/llama.cpp/ggml/src/ggml-sycl/common.cpp +0 -83
  154. package/src/llama.cpp/ggml/src/ggml-sycl/common.hpp +0 -493
  155. package/src/llama.cpp/ggml/src/ggml-sycl/concat.cpp +0 -197
  156. package/src/llama.cpp/ggml/src/ggml-sycl/concat.hpp +0 -20
  157. package/src/llama.cpp/ggml/src/ggml-sycl/conv.cpp +0 -100
  158. package/src/llama.cpp/ggml/src/ggml-sycl/conv.hpp +0 -20
  159. package/src/llama.cpp/ggml/src/ggml-sycl/convert.cpp +0 -623
  160. package/src/llama.cpp/ggml/src/ggml-sycl/convert.hpp +0 -34
  161. package/src/llama.cpp/ggml/src/ggml-sycl/cpy.cpp +0 -701
  162. package/src/llama.cpp/ggml/src/ggml-sycl/cpy.hpp +0 -11
  163. package/src/llama.cpp/ggml/src/ggml-sycl/dequantize.hpp +0 -791
  164. package/src/llama.cpp/ggml/src/ggml-sycl/dmmv.cpp +0 -1160
  165. package/src/llama.cpp/ggml/src/ggml-sycl/dmmv.hpp +0 -27
  166. package/src/llama.cpp/ggml/src/ggml-sycl/dpct/helper.hpp +0 -2957
  167. package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.cpp +0 -1536
  168. package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.hpp +0 -75
  169. package/src/llama.cpp/ggml/src/ggml-sycl/gemm.hpp +0 -99
  170. package/src/llama.cpp/ggml/src/ggml-sycl/getrows.cpp +0 -311
  171. package/src/llama.cpp/ggml/src/ggml-sycl/getrows.hpp +0 -20
  172. package/src/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +0 -4443
  173. package/src/llama.cpp/ggml/src/ggml-sycl/gla.cpp +0 -105
  174. package/src/llama.cpp/ggml/src/ggml-sycl/gla.hpp +0 -8
  175. package/src/llama.cpp/ggml/src/ggml-sycl/im2col.cpp +0 -136
  176. package/src/llama.cpp/ggml/src/ggml-sycl/im2col.hpp +0 -21
  177. package/src/llama.cpp/ggml/src/ggml-sycl/mmq.cpp +0 -3030
  178. package/src/llama.cpp/ggml/src/ggml-sycl/mmq.hpp +0 -33
  179. package/src/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +0 -1108
  180. package/src/llama.cpp/ggml/src/ggml-sycl/mmvq.hpp +0 -27
  181. package/src/llama.cpp/ggml/src/ggml-sycl/norm.cpp +0 -474
  182. package/src/llama.cpp/ggml/src/ggml-sycl/norm.hpp +0 -26
  183. package/src/llama.cpp/ggml/src/ggml-sycl/outprod.cpp +0 -46
  184. package/src/llama.cpp/ggml/src/ggml-sycl/outprod.hpp +0 -10
  185. package/src/llama.cpp/ggml/src/ggml-sycl/presets.hpp +0 -74
  186. package/src/llama.cpp/ggml/src/ggml-sycl/quants.hpp +0 -83
  187. package/src/llama.cpp/ggml/src/ggml-sycl/rope.cpp +0 -362
  188. package/src/llama.cpp/ggml/src/ggml-sycl/rope.hpp +0 -20
  189. package/src/llama.cpp/ggml/src/ggml-sycl/softmax.cpp +0 -264
  190. package/src/llama.cpp/ggml/src/ggml-sycl/softmax.hpp +0 -20
  191. package/src/llama.cpp/ggml/src/ggml-sycl/sycl_hw.cpp +0 -13
  192. package/src/llama.cpp/ggml/src/ggml-sycl/sycl_hw.hpp +0 -23
  193. package/src/llama.cpp/ggml/src/ggml-sycl/tsembd.cpp +0 -73
  194. package/src/llama.cpp/ggml/src/ggml-sycl/tsembd.hpp +0 -20
  195. package/src/llama.cpp/ggml/src/ggml-sycl/vecdotq.hpp +0 -1215
  196. package/src/llama.cpp/ggml/src/ggml-sycl/wkv.cpp +0 -305
  197. package/src/llama.cpp/ggml/src/ggml-sycl/wkv.hpp +0 -10
  198. package/src/llama.cpp/ggml/src/ggml-threading.cpp +0 -12
  199. package/src/llama.cpp/ggml/src/ggml-threading.h +0 -14
  200. package/src/llama.cpp/ggml/src/ggml-vulkan/CMakeLists.txt +0 -196
  201. package/src/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +0 -10699
  202. package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/CMakeLists.txt +0 -39
  203. package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +0 -751
  204. package/src/llama.cpp/ggml/src/ggml.c +0 -6550
  205. package/src/llama.cpp/ggml/src/gguf.cpp +0 -1330
  206. package/src/llama.cpp/models/.editorconfig +0 -1
  207. package/src/llama.cpp/models/ggml-vocab-aquila.gguf +0 -0
  208. package/src/llama.cpp/models/ggml-vocab-baichuan.gguf +0 -0
  209. package/src/llama.cpp/models/ggml-vocab-bert-bge.gguf +0 -0
  210. package/src/llama.cpp/models/ggml-vocab-bert-bge.gguf.inp +0 -112
  211. package/src/llama.cpp/models/ggml-vocab-bert-bge.gguf.out +0 -46
  212. package/src/llama.cpp/models/ggml-vocab-chameleon.gguf.inp +0 -112
  213. package/src/llama.cpp/models/ggml-vocab-chameleon.gguf.out +0 -46
  214. package/src/llama.cpp/models/ggml-vocab-command-r.gguf +0 -0
  215. package/src/llama.cpp/models/ggml-vocab-command-r.gguf.inp +0 -112
  216. package/src/llama.cpp/models/ggml-vocab-command-r.gguf.out +0 -46
  217. package/src/llama.cpp/models/ggml-vocab-deepseek-coder.gguf +0 -0
  218. package/src/llama.cpp/models/ggml-vocab-deepseek-coder.gguf.inp +0 -112
  219. package/src/llama.cpp/models/ggml-vocab-deepseek-coder.gguf.out +0 -46
  220. package/src/llama.cpp/models/ggml-vocab-deepseek-llm.gguf +0 -0
  221. package/src/llama.cpp/models/ggml-vocab-deepseek-llm.gguf.inp +0 -112
  222. package/src/llama.cpp/models/ggml-vocab-deepseek-llm.gguf.out +0 -46
  223. package/src/llama.cpp/models/ggml-vocab-deepseek-r1-qwen.gguf.inp +0 -112
  224. package/src/llama.cpp/models/ggml-vocab-deepseek-r1-qwen.gguf.out +0 -46
  225. package/src/llama.cpp/models/ggml-vocab-falcon.gguf +0 -0
  226. package/src/llama.cpp/models/ggml-vocab-falcon.gguf.inp +0 -112
  227. package/src/llama.cpp/models/ggml-vocab-falcon.gguf.out +0 -46
  228. package/src/llama.cpp/models/ggml-vocab-gpt-2.gguf +0 -0
  229. package/src/llama.cpp/models/ggml-vocab-gpt-2.gguf.inp +0 -112
  230. package/src/llama.cpp/models/ggml-vocab-gpt-2.gguf.out +0 -46
  231. package/src/llama.cpp/models/ggml-vocab-gpt-4o.gguf.inp +0 -112
  232. package/src/llama.cpp/models/ggml-vocab-gpt-4o.gguf.out +0 -46
  233. package/src/llama.cpp/models/ggml-vocab-gpt-neox.gguf +0 -0
  234. package/src/llama.cpp/models/ggml-vocab-llama-bpe.gguf +0 -0
  235. package/src/llama.cpp/models/ggml-vocab-llama-bpe.gguf.inp +0 -112
  236. package/src/llama.cpp/models/ggml-vocab-llama-bpe.gguf.out +0 -46
  237. package/src/llama.cpp/models/ggml-vocab-llama-spm.gguf +0 -0
  238. package/src/llama.cpp/models/ggml-vocab-llama-spm.gguf.inp +0 -112
  239. package/src/llama.cpp/models/ggml-vocab-llama-spm.gguf.out +0 -46
  240. package/src/llama.cpp/models/ggml-vocab-llama4.gguf.inp +0 -112
  241. package/src/llama.cpp/models/ggml-vocab-llama4.gguf.out +0 -46
  242. package/src/llama.cpp/models/ggml-vocab-mpt.gguf +0 -0
  243. package/src/llama.cpp/models/ggml-vocab-mpt.gguf.inp +0 -112
  244. package/src/llama.cpp/models/ggml-vocab-mpt.gguf.out +0 -46
  245. package/src/llama.cpp/models/ggml-vocab-phi-3.gguf +0 -0
  246. package/src/llama.cpp/models/ggml-vocab-phi-3.gguf.inp +0 -112
  247. package/src/llama.cpp/models/ggml-vocab-phi-3.gguf.out +0 -46
  248. package/src/llama.cpp/models/ggml-vocab-pixtral.gguf.inp +0 -112
  249. package/src/llama.cpp/models/ggml-vocab-pixtral.gguf.out +0 -46
  250. package/src/llama.cpp/models/ggml-vocab-qwen2.gguf +0 -0
  251. package/src/llama.cpp/models/ggml-vocab-qwen2.gguf.inp +0 -112
  252. package/src/llama.cpp/models/ggml-vocab-qwen2.gguf.out +0 -46
  253. package/src/llama.cpp/models/ggml-vocab-refact.gguf +0 -0
  254. package/src/llama.cpp/models/ggml-vocab-refact.gguf.inp +0 -112
  255. package/src/llama.cpp/models/ggml-vocab-refact.gguf.out +0 -46
  256. package/src/llama.cpp/models/ggml-vocab-roberta-bpe.gguf.inp +0 -112
  257. package/src/llama.cpp/models/ggml-vocab-roberta-bpe.gguf.out +0 -46
  258. package/src/llama.cpp/models/ggml-vocab-starcoder.gguf +0 -0
  259. package/src/llama.cpp/models/ggml-vocab-starcoder.gguf.inp +0 -112
  260. package/src/llama.cpp/models/ggml-vocab-starcoder.gguf.out +0 -46
  261. package/src/llama.cpp/pocs/CMakeLists.txt +0 -14
  262. package/src/llama.cpp/pocs/vdot/CMakeLists.txt +0 -9
  263. package/src/llama.cpp/pocs/vdot/q8dot.cpp +0 -173
  264. package/src/llama.cpp/pocs/vdot/vdot.cpp +0 -311
  265. package/src/llama.cpp/prompts/LLM-questions.txt +0 -49
  266. package/src/llama.cpp/prompts/alpaca.txt +0 -1
  267. package/src/llama.cpp/prompts/assistant.txt +0 -31
  268. package/src/llama.cpp/prompts/chat-with-baichuan.txt +0 -4
  269. package/src/llama.cpp/prompts/chat-with-bob.txt +0 -7
  270. package/src/llama.cpp/prompts/chat-with-qwen.txt +0 -1
  271. package/src/llama.cpp/prompts/chat-with-vicuna-v0.txt +0 -7
  272. package/src/llama.cpp/prompts/chat-with-vicuna-v1.txt +0 -7
  273. package/src/llama.cpp/prompts/chat.txt +0 -28
  274. package/src/llama.cpp/prompts/dan-modified.txt +0 -1
  275. package/src/llama.cpp/prompts/dan.txt +0 -1
  276. package/src/llama.cpp/prompts/mnemonics.txt +0 -93
  277. package/src/llama.cpp/prompts/parallel-questions.txt +0 -43
  278. package/src/llama.cpp/prompts/reason-act.txt +0 -18
  279. package/src/llama.cpp/requirements/requirements-all.txt +0 -15
  280. package/src/llama.cpp/requirements/requirements-compare-llama-bench.txt +0 -2
  281. package/src/llama.cpp/requirements/requirements-convert_hf_to_gguf.txt +0 -7
  282. package/src/llama.cpp/requirements/requirements-convert_hf_to_gguf_update.txt +0 -7
  283. package/src/llama.cpp/requirements/requirements-convert_legacy_llama.txt +0 -5
  284. package/src/llama.cpp/requirements/requirements-convert_llama_ggml_to_gguf.txt +0 -1
  285. package/src/llama.cpp/requirements/requirements-convert_lora_to_gguf.txt +0 -4
  286. package/src/llama.cpp/requirements/requirements-gguf_editor_gui.txt +0 -3
  287. package/src/llama.cpp/requirements/requirements-pydantic.txt +0 -3
  288. package/src/llama.cpp/requirements/requirements-test-tokenizer-random.txt +0 -1
  289. package/src/llama.cpp/requirements/requirements-tool_bench.txt +0 -12
  290. package/src/llama.cpp/requirements.txt +0 -13
  291. package/src/llama.cpp/scripts/build-info.sh +0 -30
  292. package/src/llama.cpp/scripts/install-oneapi.bat +0 -19
  293. package/src/llama.cpp/scripts/xxd.cmake +0 -16
  294. package/src/llama.cpp/tests/CMakeLists.txt +0 -177
  295. package/src/llama.cpp/tests/get-model.cpp +0 -21
  296. package/src/llama.cpp/tests/get-model.h +0 -2
  297. package/src/llama.cpp/tests/test-arg-parser.cpp +0 -178
  298. package/src/llama.cpp/tests/test-autorelease.cpp +0 -24
  299. package/src/llama.cpp/tests/test-backend-ops.cpp +0 -4793
  300. package/src/llama.cpp/tests/test-barrier.cpp +0 -94
  301. package/src/llama.cpp/tests/test-c.c +0 -7
  302. package/src/llama.cpp/tests/test-chat-template.cpp +0 -417
  303. package/src/llama.cpp/tests/test-chat.cpp +0 -985
  304. package/src/llama.cpp/tests/test-double-float.cpp +0 -57
  305. package/src/llama.cpp/tests/test-gbnf-validator.cpp +0 -109
  306. package/src/llama.cpp/tests/test-gguf.cpp +0 -1338
  307. package/src/llama.cpp/tests/test-grammar-integration.cpp +0 -1308
  308. package/src/llama.cpp/tests/test-grammar-llguidance.cpp +0 -1201
  309. package/src/llama.cpp/tests/test-grammar-parser.cpp +0 -519
  310. package/src/llama.cpp/tests/test-json-schema-to-grammar.cpp +0 -1304
  311. package/src/llama.cpp/tests/test-llama-grammar.cpp +0 -408
  312. package/src/llama.cpp/tests/test-log.cpp +0 -39
  313. package/src/llama.cpp/tests/test-model-load-cancel.cpp +0 -27
  314. package/src/llama.cpp/tests/test-mtmd-c-api.c +0 -63
  315. package/src/llama.cpp/tests/test-opt.cpp +0 -904
  316. package/src/llama.cpp/tests/test-quantize-fns.cpp +0 -186
  317. package/src/llama.cpp/tests/test-quantize-perf.cpp +0 -365
  318. package/src/llama.cpp/tests/test-quantize-stats.cpp +0 -424
  319. package/src/llama.cpp/tests/test-regex-partial.cpp +0 -288
  320. package/src/llama.cpp/tests/test-rope.cpp +0 -262
  321. package/src/llama.cpp/tests/test-sampling.cpp +0 -399
  322. package/src/llama.cpp/tests/test-tokenizer-0.cpp +0 -312
  323. package/src/llama.cpp/tests/test-tokenizer-1-bpe.cpp +0 -155
  324. package/src/llama.cpp/tests/test-tokenizer-1-spm.cpp +0 -125
  325. package/src/llama.cpp/tools/CMakeLists.txt +0 -39
  326. package/src/llama.cpp/tools/batched-bench/CMakeLists.txt +0 -5
  327. package/src/llama.cpp/tools/batched-bench/batched-bench.cpp +0 -204
  328. package/src/llama.cpp/tools/cvector-generator/CMakeLists.txt +0 -5
  329. package/src/llama.cpp/tools/cvector-generator/completions.txt +0 -582
  330. package/src/llama.cpp/tools/cvector-generator/cvector-generator.cpp +0 -508
  331. package/src/llama.cpp/tools/cvector-generator/mean.hpp +0 -48
  332. package/src/llama.cpp/tools/cvector-generator/negative.txt +0 -4
  333. package/src/llama.cpp/tools/cvector-generator/pca.hpp +0 -315
  334. package/src/llama.cpp/tools/cvector-generator/positive.txt +0 -4
  335. package/src/llama.cpp/tools/export-lora/CMakeLists.txt +0 -5
  336. package/src/llama.cpp/tools/export-lora/export-lora.cpp +0 -434
  337. package/src/llama.cpp/tools/gguf-split/CMakeLists.txt +0 -5
  338. package/src/llama.cpp/tools/gguf-split/gguf-split.cpp +0 -583
  339. package/src/llama.cpp/tools/imatrix/CMakeLists.txt +0 -5
  340. package/src/llama.cpp/tools/imatrix/imatrix.cpp +0 -667
  341. package/src/llama.cpp/tools/llama-bench/CMakeLists.txt +0 -5
  342. package/src/llama.cpp/tools/llama-bench/llama-bench.cpp +0 -2024
  343. package/src/llama.cpp/tools/main/CMakeLists.txt +0 -5
  344. package/src/llama.cpp/tools/main/main.cpp +0 -977
  345. package/src/llama.cpp/tools/mtmd/CMakeLists.txt +0 -58
  346. package/src/llama.cpp/tools/mtmd/clip-impl.h +0 -462
  347. package/src/llama.cpp/tools/mtmd/clip.cpp +0 -4024
  348. package/src/llama.cpp/tools/mtmd/clip.h +0 -101
  349. package/src/llama.cpp/tools/mtmd/deprecation-warning.cpp +0 -22
  350. package/src/llama.cpp/tools/mtmd/miniaudio.h +0 -93468
  351. package/src/llama.cpp/tools/mtmd/mtmd-audio.cpp +0 -855
  352. package/src/llama.cpp/tools/mtmd/mtmd-audio.h +0 -62
  353. package/src/llama.cpp/tools/mtmd/mtmd-cli.cpp +0 -377
  354. package/src/llama.cpp/tools/mtmd/mtmd-helper.cpp +0 -297
  355. package/src/llama.cpp/tools/mtmd/mtmd.cpp +0 -942
  356. package/src/llama.cpp/tools/mtmd/mtmd.h +0 -362
  357. package/src/llama.cpp/tools/mtmd/requirements.txt +0 -5
  358. package/src/llama.cpp/tools/perplexity/CMakeLists.txt +0 -5
  359. package/src/llama.cpp/tools/perplexity/perplexity.cpp +0 -2063
  360. package/src/llama.cpp/tools/quantize/CMakeLists.txt +0 -6
  361. package/src/llama.cpp/tools/quantize/quantize.cpp +0 -519
  362. package/src/llama.cpp/tools/rpc/CMakeLists.txt +0 -4
  363. package/src/llama.cpp/tools/rpc/rpc-server.cpp +0 -322
  364. package/src/llama.cpp/tools/run/CMakeLists.txt +0 -16
  365. package/src/llama.cpp/tools/run/linenoise.cpp/linenoise.cpp +0 -1995
  366. package/src/llama.cpp/tools/run/linenoise.cpp/linenoise.h +0 -137
  367. package/src/llama.cpp/tools/run/run.cpp +0 -1261
  368. package/src/llama.cpp/tools/server/CMakeLists.txt +0 -51
  369. package/src/llama.cpp/tools/server/bench/requirements.txt +0 -2
  370. package/src/llama.cpp/tools/server/httplib.h +0 -10506
  371. package/src/llama.cpp/tools/server/server.cpp +0 -4966
  372. package/src/llama.cpp/tools/server/tests/requirements.txt +0 -8
  373. package/src/llama.cpp/tools/server/utils.hpp +0 -1337
  374. package/src/llama.cpp/tools/tokenize/CMakeLists.txt +0 -5
  375. package/src/llama.cpp/tools/tokenize/tokenize.cpp +0 -416
  376. package/src/llama.cpp/tools/tts/CMakeLists.txt +0 -5
  377. package/src/llama.cpp/tools/tts/tts.cpp +0 -1092
@@ -1,472 +0,0 @@
1
- #include "arg.h"
2
- #include "common.h"
3
- #include "sampling.h"
4
- #include "log.h"
5
- #include "llama.h"
6
-
7
- #include <cstdio>
8
- #include <string>
9
- #include <vector>
10
- #include <algorithm>
11
-
12
- struct ngram_data {
13
- bool active = false;
14
-
15
- llama_seq_id seq_id = -1;
16
-
17
- std::vector<int> i_batch;
18
-
19
- std::vector<llama_token> tokens;
20
- };
21
-
22
- // n-gram container
23
- struct ngram_container {
24
- ngram_container(int n_vocab, int N, int G) {
25
- cnt.resize(n_vocab);
26
- head.resize(n_vocab);
27
- tokens.resize(n_vocab * G * (N - 1));
28
- }
29
-
30
- int n_total = 0;
31
-
32
- std::vector<int> cnt;
33
- std::vector<int> head;
34
-
35
- // [n_vocab][G][N - 1]
36
- // for each token of the vocab, keep a ring-buffer of capacity G of n-grams of size N - 1
37
- std::vector<llama_token> tokens;
38
- };
39
-
40
- int main(int argc, char ** argv) {
41
- common_params params;
42
-
43
- if (!common_params_parse(argc, argv, params, LLAMA_EXAMPLE_COMMON)) {
44
- return 1;
45
- }
46
-
47
- common_init();
48
-
49
- const int W = 15; // lookahead window
50
- const int N = 5; // n-gram size
51
- const int G = 15; // max verification n-grams
52
-
53
- // init llama.cpp
54
- llama_backend_init();
55
- llama_numa_init(params.numa);
56
-
57
- // load the target model
58
- common_init_result llama_init = common_init_from_params(params);
59
-
60
- llama_model * model = llama_init.model.get();
61
- llama_context * ctx = llama_init.context.get();
62
-
63
- const llama_vocab * vocab = llama_model_get_vocab(model);
64
-
65
- // Tokenize the prompt
66
- std::vector<llama_token> inp;
67
- std::vector<llama_token> all;
68
-
69
- inp = common_tokenize(ctx, params.prompt, true, true);
70
- all = inp;
71
-
72
- const int max_context_size = llama_n_ctx(ctx);
73
- const int max_tokens_list_size = max_context_size - 4;
74
-
75
- if ((int) inp.size() > max_tokens_list_size) {
76
- LOG_ERR("%s: prompt too long (%d tokens, max %d)\n", __func__, (int) inp.size(), max_tokens_list_size);
77
- return 1;
78
- }
79
-
80
- LOG("\n\n");
81
-
82
- for (auto id : inp) {
83
- LOG("%s", common_token_to_piece(ctx, id).c_str());
84
- }
85
-
86
- fflush(stderr);
87
-
88
- const int n_input = inp.size();
89
-
90
- const auto t_enc_start = ggml_time_us();
91
-
92
- // eval the prompt
93
- llama_decode(ctx, llama_batch_get_one( inp.data(), n_input - 1));
94
- llama_decode(ctx, llama_batch_get_one(&inp.back(), 1));
95
-
96
- for (int s = 1; s < W + G + 1; ++s) {
97
- llama_kv_self_seq_cp(ctx, 0, s, -1, -1);
98
- }
99
-
100
- const auto t_enc_end = ggml_time_us();
101
-
102
- int n_predict = 0;
103
- int n_accept = 0;
104
-
105
- int n_past = inp.size();
106
-
107
- llama_token id = 0;
108
-
109
- // used to determine end of generation
110
- bool has_eos = false;
111
-
112
- // for each decoded batch, we have at most W + G + 1 distinct sequences:
113
- // seq_id == 0 : the current input token
114
- // seq_id [1, W] : tokens from the past N - 1 Jacobi iterations
115
- // seq_id [W + 1, W + G] : verification n-grams
116
- llama_batch batch = llama_batch_init(params.n_ctx, 0, W + G + 1);
117
-
118
- // target model sampling context
119
- struct common_sampler * smpl = common_sampler_init(model, params.sampling);
120
-
121
- // verification n-grams
122
- std::vector<ngram_data> ngrams_cur(G);
123
-
124
- // tokens for the past N - 1 Jacobi iterations
125
- std::vector<llama_token> tokens_j_prev(W);
126
- std::vector<std::vector<llama_token>> tokens_j(N - 1);
127
- for (int j = 0; j < N - 1; j++) {
128
- tokens_j[j].resize(W);
129
-
130
- for (int i = 0; i < W; i++) {
131
- // there are different ways to init these tokens
132
- if (0) {
133
- // initialize randomly from the prompt tokens
134
- tokens_j[j][i] = all[1 + rand() % (all.size() - 1)];
135
- } else {
136
- // initialize with a sequence of increasing numbers
137
- tokens_j[j][i] = 100 + i;
138
- }
139
- }
140
- }
141
-
142
- std::vector<llama_seq_id> seq_id_look;
143
-
144
- // the input token belongs both to all sequences
145
- std::vector<llama_seq_id> seq_id_all(W + G + 1);
146
- for (int i = 0; i < W + G + 1; i++) {
147
- seq_id_all[i] = i;
148
- }
149
-
150
- // here we keep adding new n-grams as we go
151
- ngram_container ngrams_observed(llama_vocab_n_tokens(vocab), N, G);
152
-
153
- const auto t_dec_start = ggml_time_us();
154
-
155
- // sample first token
156
- {
157
- id = common_sampler_sample(smpl, ctx, 0);
158
-
159
- common_sampler_accept(smpl, id, true);
160
-
161
- {
162
- const std::string token_str = common_token_to_piece(ctx, id);
163
-
164
- LOG("%s", token_str.c_str());
165
- fflush(stdout);
166
- }
167
- }
168
-
169
- while (true) {
170
- // build the mask from https://lmsys.org/blog/2023-11-21-lookahead-decoding/
171
- //
172
- // Example for W = 5, N = 4, G = 2:
173
- // (I = input, L = lookahead, V = verification)
174
- //
175
- // Batch: 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
176
- // T: -2 -2 -2 -2 -1 -1 -1 -1 -1 0 0 0 0 0 0
177
- // Info: I L L L L L L L L L L L L L L V V V V V V
178
- // Pos: 0 1 2 3 4 1 2 3 4 5 2 3 4 5 6 1 2 3 1 2 3 (+ n_past)
179
- // Logits: 1 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1
180
- // ---------------------------------------------------------------------
181
- // Seq: 0
182
- // 1 1 1
183
- // 2 2 2 2
184
- // 3 3 3 3 3
185
- // 4 4 4 4 4 4
186
- // 5 5 5 5 5 5 5
187
- // 6 6 6 6
188
- // 7 7 7 7
189
- // ---------------------------------------------------------------------
190
- // | | | | | | | | | | |
191
- // V V V V V | | | | | |
192
- // j_tokens | | | | | |
193
- // V V V V V V
194
- // id
195
- {
196
- common_batch_clear(batch);
197
-
198
- // current token - first token of the first level
199
- common_batch_add(batch, id, n_past, seq_id_all, true);
200
-
201
- // verification n-grams - queue this before the lookahead tokens for less KV cache fragmentation
202
- {
203
- const int g_cur = ngrams_observed.cnt[id];
204
-
205
- ngrams_cur.resize(g_cur);
206
- for (int g = 0; g < g_cur; g++) {
207
- ngrams_cur[g].active = true;
208
- ngrams_cur[g].tokens.resize(N);
209
- ngrams_cur[g].i_batch.resize(N);
210
- ngrams_cur[g].seq_id = W + 1 + g;
211
- ngrams_cur[g].i_batch[0] = 0;
212
- ngrams_cur[g].tokens [0] = id;
213
- }
214
-
215
- for (int j = 0; j < N - 1; j++) {
216
- for (int g = 0; g < g_cur; g++) {
217
- const int idx = id*(N - 1)*G + g*(N - 1);
218
-
219
- const llama_token t = ngrams_observed.tokens[idx + j];
220
-
221
- ngrams_cur[g].tokens [j + 1] = t;
222
- ngrams_cur[g].i_batch[j + 1] = batch.n_tokens;
223
-
224
- common_batch_add(batch, t, n_past + j + 1, { W + 1 + g }, true);
225
- }
226
- }
227
- }
228
-
229
- // fill the remaining W - 1 tokens for the first level
230
- for (int i = 1; i < W; i++) {
231
- seq_id_look.resize(W - i);
232
- for (int j = 0; j < W - i; j++) {
233
- seq_id_look[j] = i + j + 1;
234
- }
235
-
236
- common_batch_add(batch, tokens_j[0][i], n_past + i, seq_id_look, false);
237
- }
238
-
239
- // fill the rest of the levels
240
- for (int j = 1; j < N - 1; j++) {
241
- for (int i = 0; i < W; i++) {
242
- common_batch_add(batch, tokens_j[j][i], n_past + j + i, { i + 1 }, j == N - 2);
243
- }
244
- }
245
- }
246
-
247
- if (llama_decode(ctx, batch) != 0) {
248
- LOG_ERR("\n\n%s: llama_decode failed - increase KV cache size\n", __func__);
249
- return 1;
250
- }
251
-
252
- int seq_id_best = 0;
253
-
254
- for (int v = 0; v < N; ++v) {
255
- int i_batch = 0;
256
-
257
- // if no active ngrams are left, it means the sampled token does not pass the verification
258
- if (v > 0) {
259
- for (int g = 0; g < (int) ngrams_cur.size(); g++) {
260
- if (ngrams_cur[g].active) {
261
- i_batch = ngrams_cur[g].i_batch[v];
262
- seq_id_best = ngrams_cur[g].seq_id;
263
-
264
- ++n_accept;
265
- break;
266
- }
267
- }
268
-
269
- // no more matches -> create a new batch
270
- if (i_batch == 0) {
271
- break;
272
- }
273
- }
274
-
275
- // sample the next token
276
- id = common_sampler_sample(smpl, ctx, i_batch);
277
-
278
- common_sampler_accept(smpl, id, true);
279
-
280
- // print
281
- {
282
- const std::string token_str = common_token_to_piece(ctx, id);
283
-
284
- if (v == 0) {
285
- LOG("%s", token_str.c_str());
286
- } else {
287
- // print light cyan
288
- LOG("\033[0;96m%s\033[0m", token_str.c_str());
289
- }
290
- fflush(stdout);
291
-
292
- if (llama_vocab_is_eog(vocab, id)) {
293
- has_eos = true;
294
- }
295
-
296
- all.push_back(id);
297
- }
298
-
299
- ++n_predict;
300
- ++n_past;
301
-
302
- if ((params.n_predict >= 0 && n_predict > params.n_predict) || has_eos) {
303
- break;
304
- }
305
-
306
- // verify across active n-grams
307
- for (int g = 0; g < (int) ngrams_cur.size(); g++) {
308
- if (ngrams_cur[g].active) {
309
- if (v == N - 1) {
310
- ngrams_cur[g].active = false;
311
- } else {
312
- if (id != ngrams_cur[g].tokens[v + 1]) {
313
- ngrams_cur[g].active = false;
314
- }
315
- }
316
- }
317
- }
318
-
319
- // print known n-grams starting with token id (debug)
320
- if (0 && v == 0) {
321
- if (ngrams_observed.cnt[id] > 0) {
322
- LOG("\n - %d n-grams starting with '%s'\n", ngrams_observed.cnt[id], common_token_to_piece(ctx, id).c_str());
323
- }
324
-
325
- for (int i = 0; i < ngrams_observed.cnt[id]; i++) {
326
- LOG(" - ngram %2d: ", i);
327
-
328
- const int idx = id*(N - 1)*G + i*(N - 1);
329
-
330
- for (int j = 0; j < N - 1; j++) {
331
- const std::string token_str = common_token_to_piece(ctx, ngrams_observed.tokens[idx + j]);
332
-
333
- LOG("%s", token_str.c_str());
334
- }
335
-
336
- LOG("\n");
337
- }
338
- }
339
-
340
- // update lookahead tokens
341
- {
342
- for (int i = 0; i < W; i++) {
343
- tokens_j_prev[i] = tokens_j[0][i];
344
- }
345
-
346
- for (int j = 0; j < N - 2; j++) {
347
- tokens_j[j] = tokens_j[j + 1];
348
- }
349
-
350
- if (v == 0) {
351
- // sample from the last level
352
- for (int i = 0; i < W; i++) {
353
- tokens_j[N - 2][i] = common_sampler_sample(smpl, ctx, ngrams_cur.size()*(N-1) + W*(N - 2) + i);
354
- }
355
- } else {
356
- for (int i = 0; i < W; i++) {
357
- // there are different ways to init these tokens
358
- if (0) {
359
- // random init
360
- tokens_j[N - 2][i] = all[1 + rand() % (all.size() - 1)];
361
- } else {
362
- // init from the previous level
363
- tokens_j[N - 2][i] = tokens_j[0][i];
364
- }
365
- }
366
- }
367
- }
368
-
369
- // update observed ngrams
370
- if (v == 0) {
371
- // the first token of the n-gram is determined by the index in the container so it is not stored
372
- std::vector<llama_token> ngram(N - 1);
373
-
374
- // n-gram generation
375
- // ref: https://github.com/hao-ai-lab/LookaheadDecoding/issues/14#issuecomment-1826198518
376
- for (int f = 0; f < W; ++f) {
377
- const int ft = tokens_j_prev[f]; // first token of the n-gram
378
-
379
- for (int j = 0; j < N - 1; ++j) {
380
- ngram[j] = tokens_j[j][f];
381
- }
382
-
383
- // filter-out repeating n-grams
384
- {
385
- bool is_unique = true;
386
-
387
- for (int k = 0; k < ngrams_observed.cnt[ft]; ++k) {
388
- const int idx = ft*(N - 1)*G + k*(N - 1);
389
-
390
- bool is_match = true;
391
- for (int j = 0; j < N - 1; ++j) {
392
- if (ngrams_observed.tokens[idx + j] != ngram[j]) {
393
- is_match = false;
394
- break;
395
- }
396
- }
397
-
398
- if (is_match) {
399
- is_unique = false;
400
- break;
401
- }
402
- }
403
-
404
- if (!is_unique) {
405
- continue;
406
- }
407
- }
408
-
409
- const int head = ngrams_observed.head[ft];
410
- const int idx = ft*(N - 1)*G + head*(N - 1);
411
-
412
- for (int i = 0; i < N - 1; i++) {
413
- ngrams_observed.tokens[idx + i] = ngram[i];
414
- }
415
-
416
- ngrams_observed.cnt[ft] = std::min(G, ngrams_observed.cnt[ft] + 1);
417
- ngrams_observed.head[ft] = (head + 1) % G;
418
-
419
- ngrams_observed.n_total++;
420
- }
421
- }
422
- }
423
-
424
- if ((params.n_predict >= 0 && n_predict > params.n_predict) || has_eos) {
425
- break;
426
- }
427
-
428
- // KV cache management
429
- // if no verification token matched, we simply remove all cells from this batch -> no fragmentation
430
- llama_kv_self_seq_rm(ctx, -1, n_past, -1);
431
-
432
- if (seq_id_best != 0) {
433
- // if a verification token matched, we keep the best sequence and remove the rest
434
- // this leads to some KV cache fragmentation
435
- llama_kv_self_seq_keep(ctx, seq_id_best);
436
- llama_kv_self_seq_cp (ctx, seq_id_best, 0, -1, -1);
437
- llama_kv_self_seq_rm (ctx, seq_id_best, -1, -1);
438
-
439
- for (int s = 1; s < W + G + 1; ++s) {
440
- llama_kv_self_seq_cp(ctx, 0, s, -1, -1);
441
- }
442
- }
443
- }
444
-
445
- auto t_dec_end = ggml_time_us();
446
-
447
- LOG("\n\n");
448
-
449
- LOG_INF("encoded %4d tokens in %8.3f seconds, speed: %8.3f t/s\n", n_input, (t_enc_end - t_enc_start) / 1e6f, inp.size() / ((t_enc_end - t_enc_start) / 1e6f));
450
- LOG_INF("decoded %4d tokens in %8.3f seconds, speed: %8.3f t/s\n", n_predict, (t_dec_end - t_dec_start) / 1e6f, n_predict / ((t_dec_end - t_dec_start) / 1e6f));
451
-
452
- LOG_INF("\n");
453
- LOG_INF("W = %2d\n", W);
454
- LOG_INF("N = %2d\n", N);
455
- LOG_INF("G = %2d\n", G);
456
- LOG_INF("\n");
457
- LOG_INF("n_predict = %d\n", n_predict);
458
- LOG_INF("n_accept = %d\n", n_accept);
459
-
460
- LOG_INF("\n");
461
- common_perf_print(ctx, smpl);
462
-
463
- common_sampler_free(smpl);
464
-
465
- llama_batch_free(batch);
466
-
467
- llama_backend_free();
468
-
469
- LOG("\n\n");
470
-
471
- return 0;
472
- }
@@ -1,23 +0,0 @@
1
- set(TARGET llama-lookup)
2
- add_executable(${TARGET} lookup.cpp)
3
- install(TARGETS ${TARGET} RUNTIME)
4
- target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT})
5
- target_compile_features(${TARGET} PRIVATE cxx_std_17)
6
-
7
- set(TARGET llama-lookup-create)
8
- add_executable(${TARGET} lookup-create.cpp)
9
- install(TARGETS ${TARGET} RUNTIME)
10
- target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT})
11
- target_compile_features(${TARGET} PRIVATE cxx_std_17)
12
-
13
- set(TARGET llama-lookup-merge)
14
- add_executable(${TARGET} lookup-merge.cpp)
15
- install(TARGETS ${TARGET} RUNTIME)
16
- target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT})
17
- target_compile_features(${TARGET} PRIVATE cxx_std_17)
18
-
19
- set(TARGET llama-lookup-stats)
20
- add_executable(${TARGET} lookup-stats.cpp)
21
- install(TARGETS ${TARGET} RUNTIME)
22
- target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT})
23
- target_compile_features(${TARGET} PRIVATE cxx_std_17)
@@ -1,40 +0,0 @@
1
- #include "arg.h"
2
- #include "common.h"
3
- #include "ngram-cache.h"
4
- #include "llama.h"
5
-
6
- #include <string>
7
- #include <vector>
8
-
9
- int main(int argc, char ** argv){
10
- common_params params;
11
-
12
- if (!common_params_parse(argc, argv, params, LLAMA_EXAMPLE_LOOKUP)) {
13
- return 1;
14
- }
15
-
16
- // init llama.cpp
17
- llama_backend_init();
18
- llama_numa_init(params.numa);
19
-
20
- // load the model
21
- common_init_result llama_init = common_init_from_params(params);
22
-
23
- llama_model_ptr & model = llama_init.model;
24
- llama_context_ptr & ctx = llama_init.context;
25
-
26
- GGML_ASSERT(model != nullptr);
27
-
28
- // tokenize the prompt
29
- std::vector<llama_token> inp;
30
- inp = common_tokenize(ctx.get(), params.prompt, true, true);
31
- fprintf(stderr, "%s: tokenization done\n", __func__);
32
-
33
- common_ngram_cache ngram_cache;
34
- common_ngram_cache_update(ngram_cache, LLAMA_NGRAM_STATIC, LLAMA_NGRAM_STATIC, inp, inp.size(), true);
35
- fprintf(stderr, "%s: hashing done, writing file to %s\n", __func__, params.lookup_cache_static.c_str());
36
-
37
- common_ngram_cache_save(ngram_cache, params.lookup_cache_static);
38
-
39
- return 0;
40
- }
@@ -1,47 +0,0 @@
1
- #include "ggml.h"
2
- #include "llama.h"
3
- #include "common.h"
4
- #include "ngram-cache.h"
5
-
6
- #include <cstdint>
7
- #include <cstdio>
8
- #include <fstream>
9
- #include <iostream>
10
- #include <string>
11
- #include <unordered_map>
12
- #include <vector>
13
-
14
- static void print_usage(char* argv0) {
15
- fprintf(stderr, "Merges multiple lookup cache files into a single one.\n");
16
- fprintf(stderr, "Usage: %s [--help] lookup_part_1.bin lookup_part_2.bin ... lookup_merged.bin\n", argv0);
17
- }
18
-
19
- int main(int argc, char ** argv){
20
- if (argc < 3) {
21
- print_usage(argv[0]);
22
- exit(1);
23
- }
24
-
25
- std::vector<std::string> args;
26
- args.resize(argc-1);
27
- for (int i = 0; i < argc-1; ++i) {
28
- args[i] = argv[i+1];
29
- if (args[i] == "-h" || args[i] == "--help") {
30
- print_usage(argv[0]);
31
- exit(0);
32
- }
33
- }
34
-
35
- fprintf(stderr, "lookup-merge: loading file %s\n", args[0].c_str());
36
- common_ngram_cache ngram_cache_merged = common_ngram_cache_load(args[0]);
37
-
38
- for (size_t i = 1; i < args.size()-1; ++i) {
39
- fprintf(stderr, "lookup-merge: loading file %s\n", args[i].c_str());
40
- common_ngram_cache ngram_cache = common_ngram_cache_load(args[i]);
41
-
42
- common_ngram_cache_merge(ngram_cache_merged, ngram_cache);
43
- }
44
-
45
- fprintf(stderr, "lookup-merge: saving file %s\n", args.back().c_str());
46
- common_ngram_cache_save(ngram_cache_merged, args.back());
47
- }