@fugood/llama.node 0.2.3 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (319) hide show
  1. package/CMakeLists.txt +6 -3
  2. package/bin/darwin/arm64/llama-node.node +0 -0
  3. package/bin/darwin/x64/llama-node.node +0 -0
  4. package/bin/linux/arm64/llama-node.node +0 -0
  5. package/bin/linux/x64/llama-node.node +0 -0
  6. package/bin/linux-vulkan/arm64/llama-node.node +0 -0
  7. package/bin/linux-vulkan/x64/llama-node.node +0 -0
  8. package/bin/win32/arm64/llama-node.node +0 -0
  9. package/bin/win32/arm64/node.lib +0 -0
  10. package/bin/win32/x64/llama-node.node +0 -0
  11. package/bin/win32/x64/node.lib +0 -0
  12. package/bin/win32-vulkan/arm64/llama-node.node +0 -0
  13. package/bin/win32-vulkan/arm64/node.lib +0 -0
  14. package/bin/win32-vulkan/x64/llama-node.node +0 -0
  15. package/bin/win32-vulkan/x64/node.lib +0 -0
  16. package/lib/binding.ts +8 -1
  17. package/package.json +3 -3
  18. package/patches/llama.patch +12 -12
  19. package/src/DetokenizeWorker.cpp +1 -1
  20. package/src/LlamaContext.cpp +33 -1
  21. package/src/LlamaContext.h +1 -0
  22. package/src/llama.cpp/.github/workflows/bench.yml +310 -0
  23. package/src/llama.cpp/.github/workflows/build.yml +1315 -0
  24. package/src/llama.cpp/.github/workflows/close-issue.yml +23 -0
  25. package/src/llama.cpp/.github/workflows/docker.yml +116 -0
  26. package/src/llama.cpp/.github/workflows/editorconfig.yml +27 -0
  27. package/src/llama.cpp/.github/workflows/gguf-publish.yml +44 -0
  28. package/src/llama.cpp/.github/workflows/labeler.yml +17 -0
  29. package/src/llama.cpp/.github/workflows/nix-ci-aarch64.yml +65 -0
  30. package/src/llama.cpp/.github/workflows/nix-ci.yml +72 -0
  31. package/src/llama.cpp/.github/workflows/nix-flake-update.yml +22 -0
  32. package/src/llama.cpp/.github/workflows/nix-publish-flake.yml +36 -0
  33. package/src/llama.cpp/.github/workflows/python-check-requirements.yml +35 -0
  34. package/src/llama.cpp/.github/workflows/python-lint.yml +23 -0
  35. package/src/llama.cpp/.github/workflows/python-type-check.yml +38 -0
  36. package/src/llama.cpp/.github/workflows/server.yml +183 -0
  37. package/src/llama.cpp/CMakeLists.txt +91 -1245
  38. package/src/llama.cpp/cmake/arm64-windows-llvm.cmake +1 -1
  39. package/src/llama.cpp/cmake/build-info.cmake +58 -0
  40. package/src/llama.cpp/cmake/git-vars.cmake +22 -0
  41. package/src/llama.cpp/common/CMakeLists.txt +4 -3
  42. package/src/llama.cpp/common/build-info.cpp.in +4 -0
  43. package/src/llama.cpp/common/common.cpp +1116 -877
  44. package/src/llama.cpp/common/common.h +191 -77
  45. package/src/llama.cpp/common/grammar-parser.cpp +118 -31
  46. package/src/llama.cpp/common/json-schema-to-grammar.cpp +346 -65
  47. package/src/llama.cpp/common/log.h +1 -1
  48. package/src/llama.cpp/common/ngram-cache.h +10 -3
  49. package/src/llama.cpp/common/sampling.cpp +19 -10
  50. package/src/llama.cpp/docs/build.md +353 -0
  51. package/src/llama.cpp/examples/CMakeLists.txt +22 -22
  52. package/src/llama.cpp/examples/baby-llama/CMakeLists.txt +1 -1
  53. package/src/llama.cpp/examples/baby-llama/baby-llama.cpp +6 -6
  54. package/src/llama.cpp/examples/batched/CMakeLists.txt +1 -1
  55. package/src/llama.cpp/examples/batched/batched.cpp +52 -55
  56. package/src/llama.cpp/examples/batched-bench/CMakeLists.txt +1 -1
  57. package/src/llama.cpp/examples/batched-bench/batched-bench.cpp +20 -72
  58. package/src/llama.cpp/examples/benchmark/CMakeLists.txt +1 -1
  59. package/src/llama.cpp/examples/chat-13B.bat +57 -0
  60. package/src/llama.cpp/examples/convert-llama2c-to-ggml/CMakeLists.txt +1 -1
  61. package/src/llama.cpp/examples/{finetune → cvector-generator}/CMakeLists.txt +2 -2
  62. package/src/llama.cpp/examples/cvector-generator/completions.txt +582 -0
  63. package/src/llama.cpp/examples/cvector-generator/cvector-generator.cpp +503 -0
  64. package/src/llama.cpp/examples/cvector-generator/mean.hpp +48 -0
  65. package/src/llama.cpp/examples/cvector-generator/negative.txt +4 -0
  66. package/src/llama.cpp/examples/cvector-generator/pca.hpp +325 -0
  67. package/src/llama.cpp/examples/cvector-generator/positive.txt +4 -0
  68. package/src/llama.cpp/examples/deprecation-warning/deprecation-warning.cpp +35 -0
  69. package/src/llama.cpp/examples/embedding/CMakeLists.txt +1 -1
  70. package/src/llama.cpp/examples/embedding/embedding.cpp +94 -46
  71. package/src/llama.cpp/examples/eval-callback/CMakeLists.txt +2 -2
  72. package/src/llama.cpp/examples/eval-callback/eval-callback.cpp +4 -6
  73. package/src/llama.cpp/examples/export-lora/CMakeLists.txt +1 -1
  74. package/src/llama.cpp/examples/export-lora/export-lora.cpp +344 -386
  75. package/src/llama.cpp/examples/gbnf-validator/CMakeLists.txt +2 -2
  76. package/src/llama.cpp/examples/gbnf-validator/gbnf-validator.cpp +30 -25
  77. package/src/llama.cpp/examples/gguf/CMakeLists.txt +1 -1
  78. package/src/llama.cpp/examples/gguf/gguf.cpp +5 -0
  79. package/src/llama.cpp/examples/gguf-hash/CMakeLists.txt +15 -0
  80. package/src/llama.cpp/examples/gguf-hash/deps/rotate-bits/rotate-bits.h +46 -0
  81. package/src/llama.cpp/examples/gguf-hash/deps/sha1/sha1.c +295 -0
  82. package/src/llama.cpp/examples/gguf-hash/deps/sha1/sha1.h +52 -0
  83. package/src/llama.cpp/examples/gguf-hash/deps/sha256/sha256.c +221 -0
  84. package/src/llama.cpp/examples/gguf-hash/deps/sha256/sha256.h +24 -0
  85. package/src/llama.cpp/examples/gguf-hash/deps/xxhash/xxhash.c +42 -0
  86. package/src/llama.cpp/examples/gguf-hash/deps/xxhash/xxhash.h +7093 -0
  87. package/src/llama.cpp/examples/gguf-hash/gguf-hash.cpp +693 -0
  88. package/src/llama.cpp/examples/gguf-split/CMakeLists.txt +1 -1
  89. package/src/llama.cpp/examples/gguf-split/gguf-split.cpp +3 -3
  90. package/src/llama.cpp/examples/gritlm/CMakeLists.txt +1 -1
  91. package/src/llama.cpp/examples/gritlm/gritlm.cpp +6 -2
  92. package/src/llama.cpp/examples/imatrix/CMakeLists.txt +1 -1
  93. package/src/llama.cpp/examples/imatrix/imatrix.cpp +137 -176
  94. package/src/llama.cpp/examples/infill/CMakeLists.txt +1 -1
  95. package/src/llama.cpp/examples/infill/infill.cpp +38 -153
  96. package/src/llama.cpp/examples/llama-bench/llama-bench.cpp +175 -94
  97. package/src/llama.cpp/examples/llama.android/app/build.gradle.kts +65 -0
  98. package/src/llama.cpp/examples/llama.android/build.gradle.kts +6 -0
  99. package/src/llama.cpp/examples/llama.android/llama/build.gradle.kts +68 -0
  100. package/src/llama.cpp/examples/llama.android/llama/src/main/cpp/CMakeLists.txt +11 -7
  101. package/src/llama.cpp/examples/llama.android/llama/src/main/cpp/llama-android.cpp +2 -2
  102. package/src/llama.cpp/examples/llama.android/settings.gradle.kts +18 -0
  103. package/src/llama.cpp/examples/llava/CMakeLists.txt +6 -5
  104. package/src/llama.cpp/examples/llava/android/build_64.sh +8 -0
  105. package/src/llama.cpp/examples/llava/clip.cpp +23 -14
  106. package/src/llama.cpp/examples/llava/llava-cli.cpp +8 -6
  107. package/src/llama.cpp/examples/llava/requirements.txt +3 -2
  108. package/src/llama.cpp/examples/lookahead/CMakeLists.txt +1 -1
  109. package/src/llama.cpp/examples/lookahead/lookahead.cpp +2 -1
  110. package/src/llama.cpp/examples/lookup/CMakeLists.txt +4 -4
  111. package/src/llama.cpp/examples/lookup/lookup-create.cpp +2 -0
  112. package/src/llama.cpp/examples/lookup/lookup-merge.cpp +4 -4
  113. package/src/llama.cpp/examples/lookup/lookup-stats.cpp +2 -2
  114. package/src/llama.cpp/examples/lookup/lookup.cpp +1 -1
  115. package/src/llama.cpp/examples/main/CMakeLists.txt +1 -1
  116. package/src/llama.cpp/examples/main/main.cpp +98 -75
  117. package/src/llama.cpp/examples/main-cmake-pkg/CMakeLists.txt +4 -5
  118. package/src/llama.cpp/examples/parallel/CMakeLists.txt +1 -1
  119. package/src/llama.cpp/examples/parallel/parallel.cpp +2 -1
  120. package/src/llama.cpp/examples/passkey/CMakeLists.txt +1 -1
  121. package/src/llama.cpp/examples/passkey/passkey.cpp +23 -43
  122. package/src/llama.cpp/examples/perplexity/CMakeLists.txt +1 -1
  123. package/src/llama.cpp/examples/perplexity/perplexity.cpp +13 -10
  124. package/src/llama.cpp/examples/quantize/CMakeLists.txt +1 -1
  125. package/src/llama.cpp/examples/quantize/quantize.cpp +37 -34
  126. package/src/llama.cpp/examples/quantize-stats/CMakeLists.txt +1 -1
  127. package/src/llama.cpp/examples/quantize-stats/quantize-stats.cpp +1 -1
  128. package/src/llama.cpp/examples/retrieval/CMakeLists.txt +1 -1
  129. package/src/llama.cpp/examples/retrieval/retrieval.cpp +26 -77
  130. package/src/llama.cpp/examples/save-load-state/CMakeLists.txt +1 -1
  131. package/src/llama.cpp/examples/save-load-state/save-load-state.cpp +14 -7
  132. package/src/llama.cpp/examples/server/CMakeLists.txt +26 -2
  133. package/src/llama.cpp/examples/server/server.cpp +274 -671
  134. package/src/llama.cpp/examples/server/tests/requirements.txt +2 -2
  135. package/src/llama.cpp/examples/server/utils.hpp +28 -29
  136. package/src/llama.cpp/examples/simple/CMakeLists.txt +1 -1
  137. package/src/llama.cpp/examples/simple/simple.cpp +21 -29
  138. package/src/llama.cpp/examples/speculative/CMakeLists.txt +1 -1
  139. package/src/llama.cpp/examples/speculative/speculative.cpp +2 -1
  140. package/src/llama.cpp/examples/sycl/CMakeLists.txt +1 -1
  141. package/src/llama.cpp/examples/sycl/build.sh +23 -0
  142. package/src/llama.cpp/examples/sycl/run-llama2.sh +36 -0
  143. package/src/llama.cpp/examples/sycl/win-build-sycl.bat +33 -0
  144. package/src/llama.cpp/examples/sycl/win-run-llama2.bat +9 -0
  145. package/src/llama.cpp/examples/tokenize/CMakeLists.txt +1 -1
  146. package/src/llama.cpp/examples/tokenize/tokenize.cpp +16 -2
  147. package/src/llama.cpp/ggml/CMakeLists.txt +253 -0
  148. package/src/llama.cpp/{cmake → ggml/cmake}/FindSIMD.cmake +6 -6
  149. package/src/llama.cpp/{ggml-backend.h → ggml/include/ggml-backend.h} +22 -17
  150. package/src/llama.cpp/ggml/include/ggml-blas.h +23 -0
  151. package/src/llama.cpp/ggml/include/ggml-cann.h +125 -0
  152. package/src/llama.cpp/{ggml-cuda.h → ggml/include/ggml-cuda.h} +3 -0
  153. package/src/llama.cpp/{ggml-metal.h → ggml/include/ggml-metal.h} +1 -2
  154. package/src/llama.cpp/{ggml-sycl.h → ggml/include/ggml-sycl.h} +3 -10
  155. package/src/llama.cpp/{ggml.h → ggml/include/ggml.h} +80 -85
  156. package/src/llama.cpp/ggml/src/CMakeLists.txt +1329 -0
  157. package/src/llama.cpp/ggml/src/ggml-aarch64.c +2193 -0
  158. package/src/llama.cpp/ggml/src/ggml-aarch64.h +39 -0
  159. package/src/llama.cpp/{ggml-alloc.c → ggml/src/ggml-alloc.c} +100 -49
  160. package/src/llama.cpp/{ggml-backend-impl.h → ggml/src/ggml-backend-impl.h} +20 -8
  161. package/src/llama.cpp/{ggml-backend.c → ggml/src/ggml-backend.c} +307 -167
  162. package/src/llama.cpp/ggml/src/ggml-blas.cpp +367 -0
  163. package/src/llama.cpp/ggml/src/ggml-cann/acl_tensor.cpp +198 -0
  164. package/src/llama.cpp/ggml/src/ggml-cann/acl_tensor.h +230 -0
  165. package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.cpp +2944 -0
  166. package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.h +592 -0
  167. package/src/llama.cpp/ggml/src/ggml-cann/common.h +282 -0
  168. package/src/llama.cpp/ggml/src/ggml-cann/kernels/CMakeLists.txt +32 -0
  169. package/src/llama.cpp/ggml/src/ggml-cann/kernels/ascendc_kernels.h +17 -0
  170. package/src/llama.cpp/ggml/src/ggml-cann/kernels/dup.cpp +223 -0
  171. package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_f16.cpp +186 -0
  172. package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_f32.cpp +180 -0
  173. package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_q4_0.cpp +193 -0
  174. package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_q8_0.cpp +191 -0
  175. package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_f16_q8_0.cpp +208 -0
  176. package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_f32_q8_0.cpp +206 -0
  177. package/src/llama.cpp/ggml/src/ggml-cann.cpp +2023 -0
  178. package/src/llama.cpp/{ggml-common.h → ggml/src/ggml-common.h} +41 -7
  179. package/src/llama.cpp/{ggml-impl.h → ggml/src/ggml-impl.h} +113 -9
  180. package/src/llama.cpp/{ggml-kompute.cpp → ggml/src/ggml-kompute.cpp} +33 -18
  181. package/src/llama.cpp/{ggml-quants.c → ggml/src/ggml-quants.c} +1460 -940
  182. package/src/llama.cpp/{ggml-quants.h → ggml/src/ggml-quants.h} +19 -20
  183. package/src/llama.cpp/{ggml-rpc.cpp → ggml/src/ggml-rpc.cpp} +95 -72
  184. package/src/llama.cpp/ggml/src/ggml-sycl/backend.hpp +27 -0
  185. package/src/llama.cpp/ggml/src/ggml-sycl/common.cpp +53 -0
  186. package/src/llama.cpp/ggml/src/ggml-sycl/common.hpp +355 -0
  187. package/src/llama.cpp/ggml/src/ggml-sycl/concat.cpp +195 -0
  188. package/src/llama.cpp/ggml/src/ggml-sycl/concat.hpp +21 -0
  189. package/src/llama.cpp/ggml/src/ggml-sycl/convert.cpp +547 -0
  190. package/src/llama.cpp/ggml/src/ggml-sycl/convert.hpp +27 -0
  191. package/src/llama.cpp/ggml/src/ggml-sycl/dequantize.hpp +698 -0
  192. package/src/llama.cpp/ggml/src/ggml-sycl/dmmv.cpp +1023 -0
  193. package/src/llama.cpp/ggml/src/ggml-sycl/dmmv.hpp +27 -0
  194. package/src/llama.cpp/ggml/src/ggml-sycl/dpct/helper.hpp +3011 -0
  195. package/src/llama.cpp/ggml/src/ggml-sycl/mmq.cpp +3031 -0
  196. package/src/llama.cpp/ggml/src/ggml-sycl/mmq.hpp +33 -0
  197. package/src/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +1027 -0
  198. package/src/llama.cpp/ggml/src/ggml-sycl/mmvq.hpp +27 -0
  199. package/src/llama.cpp/ggml/src/ggml-sycl/norm.cpp +374 -0
  200. package/src/llama.cpp/ggml/src/ggml-sycl/norm.hpp +35 -0
  201. package/src/llama.cpp/ggml/src/ggml-sycl/presets.hpp +66 -0
  202. package/src/llama.cpp/ggml/src/ggml-sycl/rope.cpp +275 -0
  203. package/src/llama.cpp/ggml/src/ggml-sycl/rope.hpp +22 -0
  204. package/src/llama.cpp/ggml/src/ggml-sycl/softmax.cpp +251 -0
  205. package/src/llama.cpp/ggml/src/ggml-sycl/softmax.hpp +24 -0
  206. package/src/llama.cpp/ggml/src/ggml-sycl/vecdotq.hpp +1140 -0
  207. package/src/llama.cpp/ggml/src/ggml-sycl.cpp +5314 -0
  208. package/src/llama.cpp/{ggml-vulkan.cpp → ggml/src/ggml-vulkan.cpp} +1781 -1868
  209. package/src/llama.cpp/{ggml.c → ggml/src/ggml.c} +1245 -2087
  210. package/src/llama.cpp/{sgemm.cpp → ggml/src/llamafile/sgemm.cpp} +21 -24
  211. package/src/llama.cpp/{sgemm.h → ggml/src/llamafile/sgemm.h} +1 -1
  212. package/src/llama.cpp/ggml/src/vulkan-shaders/CMakeLists.txt +5 -0
  213. package/src/llama.cpp/ggml/src/vulkan-shaders/vulkan-shaders-gen.cpp +552 -0
  214. package/src/llama.cpp/{llama.h → include/llama.h} +175 -100
  215. package/src/llama.cpp/models/.editorconfig +1 -0
  216. package/src/llama.cpp/models/ggml-vocab-aquila.gguf +0 -0
  217. package/src/llama.cpp/models/ggml-vocab-baichuan.gguf +0 -0
  218. package/src/llama.cpp/models/ggml-vocab-bert-bge.gguf +0 -0
  219. package/src/llama.cpp/models/ggml-vocab-bert-bge.gguf.inp +112 -0
  220. package/src/llama.cpp/models/ggml-vocab-bert-bge.gguf.out +46 -0
  221. package/src/llama.cpp/models/ggml-vocab-command-r.gguf +0 -0
  222. package/src/llama.cpp/models/ggml-vocab-command-r.gguf.inp +112 -0
  223. package/src/llama.cpp/models/ggml-vocab-command-r.gguf.out +46 -0
  224. package/src/llama.cpp/models/ggml-vocab-deepseek-coder.gguf +0 -0
  225. package/src/llama.cpp/models/ggml-vocab-deepseek-coder.gguf.inp +112 -0
  226. package/src/llama.cpp/models/ggml-vocab-deepseek-coder.gguf.out +46 -0
  227. package/src/llama.cpp/models/ggml-vocab-deepseek-llm.gguf +0 -0
  228. package/src/llama.cpp/models/ggml-vocab-deepseek-llm.gguf.inp +112 -0
  229. package/src/llama.cpp/models/ggml-vocab-deepseek-llm.gguf.out +46 -0
  230. package/src/llama.cpp/models/ggml-vocab-falcon.gguf +0 -0
  231. package/src/llama.cpp/models/ggml-vocab-falcon.gguf.inp +112 -0
  232. package/src/llama.cpp/models/ggml-vocab-falcon.gguf.out +46 -0
  233. package/src/llama.cpp/models/ggml-vocab-gpt-2.gguf +0 -0
  234. package/src/llama.cpp/models/ggml-vocab-gpt-2.gguf.inp +112 -0
  235. package/src/llama.cpp/models/ggml-vocab-gpt-2.gguf.out +46 -0
  236. package/src/llama.cpp/models/ggml-vocab-gpt-neox.gguf +0 -0
  237. package/src/llama.cpp/models/ggml-vocab-llama-bpe.gguf +0 -0
  238. package/src/llama.cpp/models/ggml-vocab-llama-bpe.gguf.inp +112 -0
  239. package/src/llama.cpp/models/ggml-vocab-llama-bpe.gguf.out +46 -0
  240. package/src/llama.cpp/models/ggml-vocab-llama-spm.gguf +0 -0
  241. package/src/llama.cpp/models/ggml-vocab-llama-spm.gguf.inp +112 -0
  242. package/src/llama.cpp/models/ggml-vocab-llama-spm.gguf.out +46 -0
  243. package/src/llama.cpp/models/ggml-vocab-mpt.gguf +0 -0
  244. package/src/llama.cpp/models/ggml-vocab-mpt.gguf.inp +112 -0
  245. package/src/llama.cpp/models/ggml-vocab-mpt.gguf.out +46 -0
  246. package/src/llama.cpp/models/ggml-vocab-phi-3.gguf +0 -0
  247. package/src/llama.cpp/models/ggml-vocab-phi-3.gguf.inp +112 -0
  248. package/src/llama.cpp/models/ggml-vocab-phi-3.gguf.out +46 -0
  249. package/src/llama.cpp/models/ggml-vocab-qwen2.gguf +0 -0
  250. package/src/llama.cpp/models/ggml-vocab-qwen2.gguf.inp +112 -0
  251. package/src/llama.cpp/models/ggml-vocab-qwen2.gguf.out +46 -0
  252. package/src/llama.cpp/models/ggml-vocab-refact.gguf +0 -0
  253. package/src/llama.cpp/models/ggml-vocab-refact.gguf.inp +112 -0
  254. package/src/llama.cpp/models/ggml-vocab-refact.gguf.out +46 -0
  255. package/src/llama.cpp/models/ggml-vocab-starcoder.gguf +0 -0
  256. package/src/llama.cpp/models/ggml-vocab-starcoder.gguf.inp +112 -0
  257. package/src/llama.cpp/models/ggml-vocab-starcoder.gguf.out +46 -0
  258. package/src/llama.cpp/pocs/vdot/CMakeLists.txt +2 -2
  259. package/src/llama.cpp/requirements/requirements-all.txt +12 -0
  260. package/src/llama.cpp/requirements/requirements-compare-llama-bench.txt +2 -0
  261. package/src/llama.cpp/requirements/requirements-convert_hf_to_gguf.txt +3 -0
  262. package/src/llama.cpp/requirements/requirements-convert_hf_to_gguf_update.txt +3 -0
  263. package/src/llama.cpp/requirements/{requirements-convert.txt → requirements-convert_legacy_llama.txt} +1 -1
  264. package/src/llama.cpp/requirements/requirements-convert_llama_ggml_to_gguf.txt +1 -0
  265. package/src/llama.cpp/requirements/requirements-convert_lora_to_gguf.txt +2 -0
  266. package/src/llama.cpp/requirements/requirements-pydantic.txt +3 -0
  267. package/src/llama.cpp/requirements/requirements-test-tokenizer-random.txt +1 -0
  268. package/src/llama.cpp/requirements.txt +5 -4
  269. package/src/llama.cpp/scripts/build-info.sh +30 -0
  270. package/src/llama.cpp/scripts/install-oneapi.bat +19 -0
  271. package/src/llama.cpp/src/CMakeLists.txt +33 -0
  272. package/src/llama.cpp/src/llama-grammar.cpp +539 -0
  273. package/src/llama.cpp/src/llama-grammar.h +39 -0
  274. package/src/llama.cpp/src/llama-impl.h +26 -0
  275. package/src/llama.cpp/src/llama-sampling.cpp +635 -0
  276. package/src/llama.cpp/src/llama-sampling.h +56 -0
  277. package/src/llama.cpp/src/llama-vocab.cpp +1721 -0
  278. package/src/llama.cpp/src/llama-vocab.h +130 -0
  279. package/src/llama.cpp/{llama.cpp → src/llama.cpp} +5979 -5260
  280. package/src/llama.cpp/{unicode-data.cpp → src/unicode-data.cpp} +851 -802
  281. package/src/llama.cpp/{unicode.cpp → src/unicode.cpp} +52 -30
  282. package/src/llama.cpp/{unicode.h → src/unicode.h} +5 -1
  283. package/src/llama.cpp/tests/CMakeLists.txt +19 -20
  284. package/src/llama.cpp/tests/test-backend-ops.cpp +245 -67
  285. package/src/llama.cpp/tests/test-chat-template.cpp +57 -3
  286. package/src/llama.cpp/tests/test-double-float.cpp +2 -2
  287. package/src/llama.cpp/tests/test-grad0.cpp +2 -2
  288. package/src/llama.cpp/tests/test-grammar-integration.cpp +978 -31
  289. package/src/llama.cpp/tests/test-grammar-parser.cpp +423 -158
  290. package/src/llama.cpp/tests/test-json-schema-to-grammar.cpp +508 -135
  291. package/src/llama.cpp/tests/test-llama-grammar.cpp +15 -9
  292. package/src/llama.cpp/tests/test-quantize-fns.cpp +1 -1
  293. package/src/llama.cpp/tests/test-quantize-perf.cpp +1 -1
  294. package/src/llama.cpp/tests/test-rope.cpp +3 -4
  295. package/src/llama.cpp/tests/test-sampling.cpp +5 -5
  296. package/src/llama.cpp/tests/test-tokenizer-0.cpp +6 -6
  297. package/src/llama.cpp/tests/test-tokenizer-1-bpe.cpp +20 -15
  298. package/src/llama.cpp/tests/test-tokenizer-1-spm.cpp +22 -11
  299. package/bin/darwin/arm64/default.metallib +0 -0
  300. package/bin/darwin/x64/default.metallib +0 -0
  301. package/src/llama.cpp/examples/beam-search/CMakeLists.txt +0 -5
  302. package/src/llama.cpp/examples/beam-search/beam-search.cpp +0 -188
  303. package/src/llama.cpp/examples/finetune/finetune.cpp +0 -1862
  304. package/src/llama.cpp/examples/llama.android/llama/CMakeLists.txt +0 -55
  305. package/src/llama.cpp/examples/train-text-from-scratch/CMakeLists.txt +0 -5
  306. package/src/llama.cpp/examples/train-text-from-scratch/train-text-from-scratch.cpp +0 -1253
  307. package/src/llama.cpp/ggml-opencl.cpp +0 -2305
  308. package/src/llama.cpp/ggml-opencl.h +0 -36
  309. package/src/llama.cpp/ggml-sycl.cpp +0 -17340
  310. package/src/llama.cpp/ggml-vulkan-shaders.hpp +0 -81211
  311. package/src/llama.cpp/requirements/requirements-convert-hf-to-gguf-update.txt +0 -2
  312. package/src/llama.cpp/requirements/requirements-convert-hf-to-gguf.txt +0 -2
  313. package/src/llama.cpp/requirements/requirements-convert-llama-ggml-to-gguf.txt +0 -1
  314. package/src/llama.cpp/scripts/gen-build-info-cpp.cmake +0 -24
  315. /package/src/llama.cpp/{ggml-alloc.h → ggml/include/ggml-alloc.h} +0 -0
  316. /package/src/llama.cpp/{ggml-kompute.h → ggml/include/ggml-kompute.h} +0 -0
  317. /package/src/llama.cpp/{ggml-rpc.h → ggml/include/ggml-rpc.h} +0 -0
  318. /package/src/llama.cpp/{ggml-vulkan.h → ggml/include/ggml-vulkan.h} +0 -0
  319. /package/src/llama.cpp/{unicode-data.h → src/unicode-data.h} +0 -0
@@ -154,7 +154,7 @@ static void test_roundtrip_on_chunk(
154
154
  }
155
155
 
156
156
  if (use_reference) {
157
- qfns.from_float_reference(input_scratch, quantized_scratch, chunk_size);
157
+ qfns.from_float_ref(input_scratch, quantized_scratch, chunk_size);
158
158
  } else {
159
159
  qfns.from_float(input_scratch, quantized_scratch, chunk_size);
160
160
  }
@@ -1,4 +1,4 @@
1
- set(TARGET retrieval)
1
+ set(TARGET llama-retrieval)
2
2
  add_executable(${TARGET} retrieval.cpp)
3
3
  install(TARGETS ${TARGET} RUNTIME)
4
4
  target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT})
@@ -4,72 +4,12 @@
4
4
  #include <algorithm>
5
5
  #include <fstream>
6
6
 
7
- struct retrieval_params {
8
- std::vector<std::string> context_files; // context files to embed
9
- int32_t chunk_size = 64; // chunk size for context embedding
10
- std::string chunk_separator = "\n"; // chunk separator for context embedding
11
- };
12
-
13
- static void retrieval_params_print_usage(int argc, char ** argv, gpt_params & gpt_params, retrieval_params & params) {
14
- gpt_params_print_usage(argc, argv, gpt_params);
15
- printf("retrieval options:\n");
16
- printf(" --context-file FNAME file containing context to embed.\n");
17
- printf(" specify multiple files by providing --context-file option multiple times.\n");
18
- printf(" --chunk-size N minimum length of embedded text chunk (default:%d)\n", params.chunk_size);
19
- printf(" --chunk-separator STRING\n");
20
- printf(" string to separate chunks (default: \"\\n\")\n");
21
- printf("\n");
22
- }
7
+ static void print_usage(int argc, char ** argv, const gpt_params & params) {
8
+ gpt_params_print_usage(argc, argv, params);
23
9
 
24
- static void retrieval_params_parse(int argc, char ** argv, gpt_params & gpt_params, retrieval_params & retrieval_params) {
25
- int i = 1;
26
- std::string arg;
27
- while (i < argc) {
28
- arg = argv[i];
29
- bool invalid_gpt_param = false;
30
- if(gpt_params_find_arg(argc, argv, argv[i], gpt_params, i, invalid_gpt_param)) {
31
- if (invalid_gpt_param) {
32
- fprintf(stderr, "error: invalid argument: %s\n", arg.c_str());
33
- retrieval_params_print_usage(argc, argv, gpt_params, retrieval_params);
34
- exit(1);
35
- }
36
- // option was parsed by gpt_params_find_arg
37
- } else if (arg == "--context-file") {
38
- if (++i >= argc) {
39
- fprintf(stderr, "error: missing argument for --context-file\n");
40
- retrieval_params_print_usage(argc, argv, gpt_params, retrieval_params);
41
- exit(1);
42
- }
43
- std::ifstream file(argv[i]);
44
- if (!file) {
45
- fprintf(stderr, "error: failed to open file '%s'\n", argv[i]);
46
- retrieval_params_print_usage(argc, argv, gpt_params, retrieval_params);
47
- exit(1);
48
- }
49
- // store the external file name in params
50
- retrieval_params.context_files.push_back(argv[i]);
51
- } else if (arg == "--chunk-size") {
52
- if (++i >= argc) {
53
- fprintf(stderr, "error: missing argument for --chunk-size\n");
54
- retrieval_params_print_usage(argc, argv, gpt_params, retrieval_params);
55
- exit(1);
56
- }
57
- retrieval_params.chunk_size = std::stoi(argv[i]);
58
- } else if (arg == "--chunk-separator") {
59
- if (++i >= argc) {
60
- fprintf(stderr, "error: missing argument for --chunk-separator\n");
61
- retrieval_params_print_usage(argc, argv, gpt_params, retrieval_params);
62
- exit(1);
63
- }
64
- retrieval_params.chunk_separator = argv[i];
65
- } else {
66
- // unknown argument
67
- fprintf(stderr, "error: unknown argument: %s\n", arg.c_str());
68
- retrieval_params_print_usage(argc, argv, gpt_params, retrieval_params);
69
- exit(1);
70
- }
71
- i++;
72
- }
10
+ LOG_TEE("\nexample usage:\n");
11
+ LOG_TEE("\n %s --model ./models/bge-base-en-v1.5-f16.gguf --top-k 3 --context-file README.md --context-file License --chunk-size 100 --chunk-separator .\n", argv[0]);
12
+ LOG_TEE("\n");
73
13
  }
74
14
 
75
15
  struct chunk {
@@ -133,9 +73,10 @@ static std::vector<chunk> chunk_file(const std::string & filename, int chunk_siz
133
73
  return chunks;
134
74
  }
135
75
 
136
- static void batch_add_seq(llama_batch & batch, const std::vector<int32_t> & tokens, int seq_id) {
137
- for (size_t i = 0; i < tokens.size(); i++) {
138
- llama_batch_add(batch, tokens[i], i, { seq_id }, i == tokens.size() - 1);
76
+ static void batch_add_seq(llama_batch & batch, const std::vector<int32_t> & tokens, llama_seq_id seq_id) {
77
+ size_t n_tokens = tokens.size();
78
+ for (size_t i = 0; i < n_tokens; i++) {
79
+ llama_batch_add(batch, tokens[i], i, { seq_id }, true);
139
80
  }
140
81
  }
141
82
 
@@ -171,33 +112,35 @@ static void batch_decode(llama_context * ctx, llama_batch & batch, float * outpu
171
112
 
172
113
  int main(int argc, char ** argv) {
173
114
  gpt_params params;
174
- retrieval_params retrieval_params;
175
115
 
176
- retrieval_params_parse(argc, argv, params, retrieval_params);
116
+ if (!gpt_params_parse(argc, argv, params)) {
117
+ print_usage(argc, argv, params);
118
+ return 1;
119
+ }
177
120
 
178
121
  // For BERT models, batch size must be equal to ubatch size
179
122
  params.n_ubatch = params.n_batch;
123
+ params.embedding = true;
180
124
 
181
- if (retrieval_params.chunk_size <= 0) {
125
+ if (params.chunk_size <= 0) {
182
126
  fprintf(stderr, "chunk_size must be positive\n");
183
127
  return 1;
184
128
  }
185
- if (retrieval_params.context_files.empty()) {
129
+ if (params.context_files.empty()) {
186
130
  fprintf(stderr, "context_files must be specified\n");
187
131
  return 1;
188
132
  }
189
- params.embedding = true;
190
133
 
191
134
  print_build_info();
192
135
 
193
136
  printf("processing files:\n");
194
- for (auto & context_file : retrieval_params.context_files) {
137
+ for (auto & context_file : params.context_files) {
195
138
  printf("%s\n", context_file.c_str());
196
139
  }
197
140
 
198
141
  std::vector<chunk> chunks;
199
- for (auto & context_file : retrieval_params.context_files) {
200
- std::vector<chunk> file_chunk = chunk_file(context_file, retrieval_params.chunk_size, retrieval_params.chunk_separator);
142
+ for (auto & context_file : params.context_files) {
143
+ std::vector<chunk> file_chunk = chunk_file(context_file, params.chunk_size, params.chunk_separator);
201
144
  chunks.insert(chunks.end(), file_chunk.begin(), file_chunk.end());
202
145
  }
203
146
  printf("Number of chunks: %ld\n", chunks.size());
@@ -218,6 +161,12 @@ int main(int argc, char ** argv) {
218
161
  const int n_ctx_train = llama_n_ctx_train(model);
219
162
  const int n_ctx = llama_n_ctx(ctx);
220
163
 
164
+ const enum llama_pooling_type pooling_type = llama_pooling_type(ctx);
165
+ if (pooling_type == LLAMA_POOLING_TYPE_NONE) {
166
+ fprintf(stderr, "%s: error: pooling type NONE not supported\n", __func__);
167
+ return 1;
168
+ }
169
+
221
170
  if (n_ctx > n_ctx_train) {
222
171
  fprintf(stderr, "%s: warning: model was trained on only %d context tokens (%d specified)\n",
223
172
  __func__, n_ctx_train, n_ctx);
@@ -242,7 +191,7 @@ int main(int argc, char ** argv) {
242
191
  return 1;
243
192
  }
244
193
  // add eos if not present
245
- if (inp.empty() || inp.back() != llama_token_eos(model)) {
194
+ if (llama_token_eos(model) >= 0 && (inp.empty() || inp.back() != llama_token_eos(model))) {
246
195
  inp.push_back(llama_token_eos(model));
247
196
  }
248
197
  chunk.tokens = inp;
@@ -1,4 +1,4 @@
1
- set(TARGET save-load-state)
1
+ set(TARGET llama-save-load-state)
2
2
  add_executable(${TARGET} save-load-state.cpp)
3
3
  install(TARGETS ${TARGET} RUNTIME)
4
4
  target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT})
@@ -11,6 +11,7 @@ int main(int argc, char ** argv) {
11
11
  params.prompt = "The quick brown fox";
12
12
 
13
13
  if (!gpt_params_parse(argc, argv, params)) {
14
+ gpt_params_print_usage(argc, argv, params);
14
15
  return 1;
15
16
  }
16
17
 
@@ -46,7 +47,7 @@ int main(int argc, char ** argv) {
46
47
  // save state (rng, logits, embedding and kv_cache) to file
47
48
  {
48
49
  std::vector<uint8_t> state_mem(llama_state_get_size(ctx));
49
- const size_t written = llama_state_get_data(ctx, state_mem.data());
50
+ const size_t written = llama_state_get_data(ctx, state_mem.data(), state_mem.size());
50
51
 
51
52
  FILE *fp_write = fopen("dump_state.bin", "wb");
52
53
  fwrite(state_mem.data(), 1, written, fp_write);
@@ -98,13 +99,16 @@ int main(int argc, char ** argv) {
98
99
 
99
100
  // load state (rng, logits, embedding and kv_cache) from file
100
101
  {
101
- std::vector<uint8_t> state_mem(llama_state_get_size(ctx2));
102
+ std::vector<uint8_t> state_mem;
102
103
 
103
104
  FILE * fp_read = fopen("dump_state.bin", "rb");
105
+ fseek(fp_read, 0, SEEK_END);
106
+ state_mem.resize(ftell(fp_read));
107
+ fseek(fp_read, 0, SEEK_SET);
104
108
  const size_t read = fread(state_mem.data(), 1, state_mem.size(), fp_read);
105
109
  fclose(fp_read);
106
110
 
107
- if (read != llama_state_set_data(ctx2, state_mem.data())) {
111
+ if (read != llama_state_set_data(ctx2, state_mem.data(), state_mem.size())) {
108
112
  fprintf(stderr, "\n%s : failed to read state\n", __func__);
109
113
  llama_free(ctx2);
110
114
  llama_free_model(model);
@@ -158,13 +162,16 @@ int main(int argc, char ** argv) {
158
162
 
159
163
  // load state (rng, logits, embedding and kv_cache) from file
160
164
  {
161
- std::vector<uint8_t> state_mem(llama_state_get_size(ctx3));
165
+ std::vector<uint8_t> state_mem;
162
166
 
163
167
  FILE * fp_read = fopen("dump_state.bin", "rb");
168
+ fseek(fp_read, 0, SEEK_END);
169
+ state_mem.resize(ftell(fp_read));
170
+ fseek(fp_read, 0, SEEK_SET);
164
171
  const size_t read = fread(state_mem.data(), 1, state_mem.size(), fp_read);
165
172
  fclose(fp_read);
166
173
 
167
- if (read != llama_state_set_data(ctx3, state_mem.data())) {
174
+ if (read != llama_state_set_data(ctx3, state_mem.data(), state_mem.size())) {
168
175
  fprintf(stderr, "\n%s : failed to read state\n", __func__);
169
176
  llama_free(ctx3);
170
177
  llama_free_model(model);
@@ -181,7 +188,7 @@ int main(int argc, char ** argv) {
181
188
  {
182
189
  // save kv of seq 0
183
190
  std::vector<uint8_t> seq_store(llama_state_seq_get_size(ctx3, 0));
184
- const size_t ncopy = llama_state_seq_get_data(ctx3, seq_store.data(), 0);
191
+ const size_t ncopy = llama_state_seq_get_data(ctx3, seq_store.data(), seq_store.size(), 0);
185
192
  if (ncopy != seq_store.size()) {
186
193
  fprintf(stderr, "\n%s : seq copy data length %zd does not match expected length %zd\n", __func__, ncopy, seq_store.size());
187
194
  llama_free(ctx3);
@@ -195,7 +202,7 @@ int main(int argc, char ** argv) {
195
202
  fprintf(stderr, "%s : kv cache cleared\n", __func__);
196
203
 
197
204
  // restore kv into seq 1
198
- const size_t nset = llama_state_seq_set_data(ctx3, seq_store.data(), 1);
205
+ const size_t nset = llama_state_seq_set_data(ctx3, seq_store.data(), seq_store.size(), 1);
199
206
  if (nset != seq_store.size()) {
200
207
  fprintf(stderr, "\n%s : seq set data length %zd does not match expected length %zd\n", __func__, nset, seq_store.size());
201
208
  llama_free(ctx3);
@@ -1,18 +1,37 @@
1
- set(TARGET server)
1
+ set(TARGET llama-server)
2
2
  option(LLAMA_SERVER_VERBOSE "Build verbose logging option for Server" ON)
3
- option(LLAMA_SERVER_SSL "Build SSL support for the server" OFF)
3
+ option(LLAMA_SERVER_SSL "Build SSL support for the server" OFF)
4
+
4
5
  include_directories(${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_BINARY_DIR})
6
+
7
+ if (MINGW)
8
+ # fix: https://github.com/ggerganov/llama.cpp/actions/runs/9651004652/job/26617901362?pr=8006
9
+ add_compile_definitions(_WIN32_WINNT=${GGML_WIN_VER})
10
+ endif()
11
+
5
12
  set(TARGET_SRCS
6
13
  server.cpp
7
14
  utils.hpp
8
15
  httplib.h
9
16
  )
10
17
  set(PUBLIC_ASSETS
18
+ colorthemes.css
19
+ style.css
20
+ theme-beeninorder.css
21
+ theme-ketivah.css
22
+ theme-mangotango.css
23
+ theme-playground.css
24
+ theme-polarnight.css
25
+ theme-snowstorm.css
11
26
  index.html
27
+ index-new.html
12
28
  index.js
13
29
  completion.js
30
+ system-prompts.js
31
+ prompt-formats.js
14
32
  json-schema-to-grammar.mjs
15
33
  )
34
+
16
35
  foreach(asset ${PUBLIC_ASSETS})
17
36
  set(input "${CMAKE_CURRENT_SOURCE_DIR}/public/${asset}")
18
37
  set(output "${CMAKE_CURRENT_BINARY_DIR}/${asset}.hpp")
@@ -23,18 +42,23 @@ foreach(asset ${PUBLIC_ASSETS})
23
42
  COMMAND "${CMAKE_COMMAND}" "-DINPUT=${input}" "-DOUTPUT=${output}" -P "${PROJECT_SOURCE_DIR}/scripts/xxd.cmake"
24
43
  )
25
44
  endforeach()
45
+
26
46
  add_executable(${TARGET} ${TARGET_SRCS})
27
47
  install(TARGETS ${TARGET} RUNTIME)
28
48
  target_compile_definitions(${TARGET} PRIVATE
29
49
  SERVER_VERBOSE=$<BOOL:${LLAMA_SERVER_VERBOSE}>
30
50
  )
51
+
31
52
  target_link_libraries(${TARGET} PRIVATE common ${CMAKE_THREAD_LIBS_INIT})
53
+
32
54
  if (LLAMA_SERVER_SSL)
33
55
  find_package(OpenSSL REQUIRED)
34
56
  target_link_libraries(${TARGET} PRIVATE OpenSSL::SSL OpenSSL::Crypto)
35
57
  target_compile_definitions(${TARGET} PRIVATE CPPHTTPLIB_OPENSSL_SUPPORT)
36
58
  endif()
59
+
37
60
  if (WIN32)
38
61
  TARGET_LINK_LIBRARIES(${TARGET} PRIVATE ws2_32)
39
62
  endif()
63
+
40
64
  target_compile_features(${TARGET} PRIVATE cxx_std_11)