@fugood/llama.node 0.3.1 → 0.3.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (252) hide show
  1. package/CMakeLists.txt +1 -8
  2. package/bin/darwin/arm64/llama-node.node +0 -0
  3. package/bin/darwin/x64/llama-node.node +0 -0
  4. package/bin/linux/arm64/llama-node.node +0 -0
  5. package/bin/linux/x64/llama-node.node +0 -0
  6. package/bin/linux-vulkan/arm64/llama-node.node +0 -0
  7. package/bin/linux-vulkan/x64/llama-node.node +0 -0
  8. package/bin/win32/arm64/llama-node.node +0 -0
  9. package/bin/win32/arm64/node.lib +0 -0
  10. package/bin/win32/x64/llama-node.node +0 -0
  11. package/bin/win32/x64/node.lib +0 -0
  12. package/bin/win32-vulkan/arm64/llama-node.node +0 -0
  13. package/bin/win32-vulkan/arm64/node.lib +0 -0
  14. package/bin/win32-vulkan/x64/llama-node.node +0 -0
  15. package/bin/win32-vulkan/x64/node.lib +0 -0
  16. package/package.json +4 -2
  17. package/src/DetokenizeWorker.cpp +1 -1
  18. package/src/EmbeddingWorker.cpp +2 -2
  19. package/src/LlamaCompletionWorker.cpp +10 -10
  20. package/src/LlamaCompletionWorker.h +2 -2
  21. package/src/LlamaContext.cpp +14 -17
  22. package/src/TokenizeWorker.cpp +1 -1
  23. package/src/common.hpp +5 -4
  24. package/src/llama.cpp/.github/workflows/build.yml +137 -29
  25. package/src/llama.cpp/.github/workflows/close-issue.yml +5 -0
  26. package/src/llama.cpp/.github/workflows/docker.yml +46 -34
  27. package/src/llama.cpp/.github/workflows/nix-ci-aarch64.yml +7 -0
  28. package/src/llama.cpp/.github/workflows/nix-ci.yml +7 -0
  29. package/src/llama.cpp/.github/workflows/python-check-requirements.yml +2 -4
  30. package/src/llama.cpp/.github/workflows/python-type-check.yml +3 -1
  31. package/src/llama.cpp/.github/workflows/server.yml +7 -0
  32. package/src/llama.cpp/CMakeLists.txt +26 -11
  33. package/src/llama.cpp/cmake/arm64-apple-clang.cmake +16 -0
  34. package/src/llama.cpp/common/CMakeLists.txt +10 -10
  35. package/src/llama.cpp/common/arg.cpp +2041 -0
  36. package/src/llama.cpp/common/arg.h +77 -0
  37. package/src/llama.cpp/common/common.cpp +523 -1861
  38. package/src/llama.cpp/common/common.h +234 -106
  39. package/src/llama.cpp/common/console.cpp +3 -0
  40. package/src/llama.cpp/common/json-schema-to-grammar.cpp +1 -1
  41. package/src/llama.cpp/common/log.cpp +401 -0
  42. package/src/llama.cpp/common/log.h +66 -698
  43. package/src/llama.cpp/common/ngram-cache.cpp +39 -36
  44. package/src/llama.cpp/common/ngram-cache.h +19 -19
  45. package/src/llama.cpp/common/sampling.cpp +356 -350
  46. package/src/llama.cpp/common/sampling.h +62 -139
  47. package/src/llama.cpp/common/stb_image.h +5990 -6398
  48. package/src/llama.cpp/docs/build.md +72 -17
  49. package/src/llama.cpp/examples/CMakeLists.txt +1 -2
  50. package/src/llama.cpp/examples/batched/batched.cpp +49 -65
  51. package/src/llama.cpp/examples/batched-bench/batched-bench.cpp +42 -53
  52. package/src/llama.cpp/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp +55 -52
  53. package/src/llama.cpp/examples/cvector-generator/cvector-generator.cpp +22 -22
  54. package/src/llama.cpp/examples/cvector-generator/pca.hpp +3 -13
  55. package/src/llama.cpp/examples/embedding/embedding.cpp +147 -91
  56. package/src/llama.cpp/examples/eval-callback/eval-callback.cpp +37 -37
  57. package/src/llama.cpp/examples/export-lora/export-lora.cpp +39 -38
  58. package/src/llama.cpp/examples/gbnf-validator/gbnf-validator.cpp +14 -39
  59. package/src/llama.cpp/examples/{baby-llama → gen-docs}/CMakeLists.txt +2 -2
  60. package/src/llama.cpp/examples/gen-docs/gen-docs.cpp +83 -0
  61. package/src/llama.cpp/examples/gguf-split/gguf-split.cpp +58 -39
  62. package/src/llama.cpp/examples/gritlm/gritlm.cpp +46 -39
  63. package/src/llama.cpp/examples/imatrix/imatrix.cpp +75 -69
  64. package/src/llama.cpp/examples/infill/infill.cpp +131 -192
  65. package/src/llama.cpp/examples/llama-bench/llama-bench.cpp +276 -178
  66. package/src/llama.cpp/examples/llama.android/llama/build.gradle.kts +1 -0
  67. package/src/llama.cpp/examples/llama.android/llama/src/main/cpp/llama-android.cpp +40 -36
  68. package/src/llama.cpp/examples/llava/CMakeLists.txt +7 -0
  69. package/src/llama.cpp/examples/llava/clip.cpp +686 -150
  70. package/src/llama.cpp/examples/llava/clip.h +11 -2
  71. package/src/llama.cpp/examples/llava/llava-cli.cpp +60 -71
  72. package/src/llama.cpp/examples/llava/llava.cpp +146 -26
  73. package/src/llama.cpp/examples/llava/llava.h +2 -3
  74. package/src/llama.cpp/examples/llava/minicpmv-cli.cpp +323 -0
  75. package/src/llama.cpp/examples/llava/requirements.txt +1 -0
  76. package/src/llama.cpp/examples/lookahead/lookahead.cpp +55 -56
  77. package/src/llama.cpp/examples/lookup/lookup-create.cpp +15 -13
  78. package/src/llama.cpp/examples/lookup/lookup-merge.cpp +4 -4
  79. package/src/llama.cpp/examples/lookup/lookup-stats.cpp +34 -33
  80. package/src/llama.cpp/examples/lookup/lookup.cpp +60 -63
  81. package/src/llama.cpp/examples/main/main.cpp +216 -313
  82. package/src/llama.cpp/examples/parallel/parallel.cpp +58 -59
  83. package/src/llama.cpp/examples/passkey/passkey.cpp +53 -61
  84. package/src/llama.cpp/examples/perplexity/perplexity.cpp +277 -311
  85. package/src/llama.cpp/examples/quantize/CMakeLists.txt +1 -1
  86. package/src/llama.cpp/examples/quantize/quantize.cpp +27 -9
  87. package/src/llama.cpp/examples/quantize-stats/quantize-stats.cpp +12 -12
  88. package/src/llama.cpp/examples/retrieval/retrieval.cpp +57 -52
  89. package/src/llama.cpp/examples/rpc/rpc-server.cpp +27 -2
  90. package/src/llama.cpp/examples/save-load-state/save-load-state.cpp +60 -46
  91. package/src/llama.cpp/examples/server/CMakeLists.txt +7 -18
  92. package/src/llama.cpp/examples/server/server.cpp +1347 -1531
  93. package/src/llama.cpp/examples/server/tests/requirements.txt +2 -1
  94. package/src/llama.cpp/examples/server/utils.hpp +396 -107
  95. package/src/llama.cpp/examples/simple/CMakeLists.txt +1 -1
  96. package/src/llama.cpp/examples/simple/simple.cpp +132 -106
  97. package/src/llama.cpp/examples/simple-chat/CMakeLists.txt +5 -0
  98. package/src/llama.cpp/examples/simple-chat/simple-chat.cpp +197 -0
  99. package/src/llama.cpp/examples/speculative/speculative.cpp +153 -124
  100. package/src/llama.cpp/examples/sycl/run-llama2.sh +10 -19
  101. package/src/llama.cpp/examples/sycl/win-run-llama2.bat +1 -1
  102. package/src/llama.cpp/examples/tokenize/tokenize.cpp +27 -29
  103. package/src/llama.cpp/ggml/CMakeLists.txt +29 -12
  104. package/src/llama.cpp/ggml/include/ggml-alloc.h +3 -3
  105. package/src/llama.cpp/ggml/include/ggml-amx.h +25 -0
  106. package/src/llama.cpp/ggml/include/ggml-backend.h +166 -68
  107. package/src/llama.cpp/ggml/include/ggml-blas.h +5 -3
  108. package/src/llama.cpp/ggml/include/ggml-cann.h +17 -19
  109. package/src/llama.cpp/ggml/include/ggml-cpp.h +38 -0
  110. package/src/llama.cpp/ggml/include/ggml-cpu.h +177 -0
  111. package/src/llama.cpp/ggml/include/ggml-cuda.h +17 -17
  112. package/src/llama.cpp/ggml/include/ggml-kompute.h +7 -3
  113. package/src/llama.cpp/ggml/include/ggml-metal.h +13 -12
  114. package/src/llama.cpp/ggml/include/ggml-opt.h +216 -0
  115. package/src/llama.cpp/ggml/include/ggml-rpc.h +9 -5
  116. package/src/llama.cpp/ggml/include/ggml-sycl.h +18 -11
  117. package/src/llama.cpp/ggml/include/ggml-vulkan.h +10 -8
  118. package/src/llama.cpp/ggml/include/ggml.h +272 -505
  119. package/src/llama.cpp/ggml/src/CMakeLists.txt +69 -1110
  120. package/src/llama.cpp/ggml/src/ggml-aarch64.c +52 -2116
  121. package/src/llama.cpp/ggml/src/ggml-aarch64.h +0 -20
  122. package/src/llama.cpp/ggml/src/ggml-alloc.c +29 -27
  123. package/src/llama.cpp/ggml/src/ggml-amx/CMakeLists.txt +107 -0
  124. package/src/llama.cpp/ggml/src/ggml-amx/common.h +94 -0
  125. package/src/llama.cpp/ggml/src/ggml-amx/ggml-amx.cpp +446 -0
  126. package/src/llama.cpp/ggml/src/ggml-amx/mmq.cpp +2510 -0
  127. package/src/llama.cpp/ggml/src/ggml-amx/mmq.h +17 -0
  128. package/src/llama.cpp/ggml/src/ggml-backend-impl.h +144 -81
  129. package/src/llama.cpp/ggml/src/ggml-backend-reg.cpp +195 -0
  130. package/src/llama.cpp/ggml/src/{ggml-backend.c → ggml-backend.cpp} +394 -635
  131. package/src/llama.cpp/ggml/src/ggml-blas/CMakeLists.txt +91 -0
  132. package/src/llama.cpp/ggml/src/{ggml-blas.cpp → ggml-blas/ggml-blas.cpp} +217 -70
  133. package/src/llama.cpp/ggml/src/ggml-cann/CMakeLists.txt +46 -0
  134. package/src/llama.cpp/ggml/src/ggml-cann/acl_tensor.cpp +4 -27
  135. package/src/llama.cpp/ggml/src/ggml-cann/acl_tensor.h +32 -4
  136. package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.cpp +179 -41
  137. package/src/llama.cpp/ggml/src/ggml-cann/common.h +1 -0
  138. package/src/llama.cpp/ggml/src/{ggml-cann.cpp → ggml-cann/ggml-cann.cpp} +458 -353
  139. package/src/llama.cpp/ggml/src/ggml-cann/kernels/CMakeLists.txt +2 -1
  140. package/src/llama.cpp/ggml/src/ggml-cann/kernels/ascendc_kernels.h +2 -0
  141. package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_float_to_q4_0.cpp +278 -0
  142. package/src/llama.cpp/ggml/src/ggml-common.h +20 -0
  143. package/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +261 -0
  144. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.c +3560 -0
  145. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.h +30 -0
  146. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-impl.h +371 -0
  147. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.c +10822 -0
  148. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.h +63 -0
  149. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +13970 -0
  150. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +663 -0
  151. package/src/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm.cpp +1885 -0
  152. package/src/llama.cpp/ggml/src/ggml-cuda/CMakeLists.txt +155 -0
  153. package/src/llama.cpp/ggml/src/ggml-cuda/vendors/cuda.h +14 -0
  154. package/src/llama.cpp/ggml/src/ggml-cuda/vendors/hip.h +178 -0
  155. package/src/llama.cpp/ggml/src/ggml-cuda/vendors/musa.h +134 -0
  156. package/src/llama.cpp/ggml/src/ggml-hip/CMakeLists.txt +106 -0
  157. package/src/llama.cpp/ggml/src/ggml-impl.h +380 -584
  158. package/src/llama.cpp/ggml/src/ggml-kompute/CMakeLists.txt +162 -0
  159. package/src/llama.cpp/ggml/src/{ggml-kompute.cpp → ggml-kompute/ggml-kompute.cpp} +233 -87
  160. package/src/llama.cpp/ggml/src/ggml-metal/CMakeLists.txt +108 -0
  161. package/src/llama.cpp/ggml/src/ggml-metal/ggml-metal-impl.h +249 -0
  162. package/src/llama.cpp/ggml/src/ggml-musa/CMakeLists.txt +100 -0
  163. package/src/llama.cpp/ggml/src/ggml-opt.cpp +867 -0
  164. package/src/llama.cpp/ggml/src/ggml-quants.c +369 -9994
  165. package/src/llama.cpp/ggml/src/ggml-quants.h +78 -110
  166. package/src/llama.cpp/ggml/src/ggml-rpc/CMakeLists.txt +11 -0
  167. package/src/llama.cpp/ggml/src/{ggml-rpc.cpp → ggml-rpc/ggml-rpc.cpp} +560 -335
  168. package/src/llama.cpp/ggml/src/ggml-sycl/CMakeLists.txt +81 -0
  169. package/src/llama.cpp/ggml/src/ggml-sycl/backend.hpp +6 -0
  170. package/src/llama.cpp/ggml/src/ggml-sycl/common.cpp +51 -0
  171. package/src/llama.cpp/ggml/src/ggml-sycl/common.hpp +310 -0
  172. package/src/llama.cpp/ggml/src/ggml-sycl/concat.cpp +1 -0
  173. package/src/llama.cpp/ggml/src/ggml-sycl/conv.cpp +99 -0
  174. package/src/llama.cpp/ggml/src/ggml-sycl/conv.hpp +21 -0
  175. package/src/llama.cpp/ggml/src/ggml-sycl/convert.cpp +57 -57
  176. package/src/llama.cpp/ggml/src/ggml-sycl/convert.hpp +1 -1
  177. package/src/llama.cpp/ggml/src/ggml-sycl/dequantize.hpp +106 -106
  178. package/src/llama.cpp/ggml/src/ggml-sycl/dmmv.cpp +4 -4
  179. package/src/llama.cpp/ggml/src/ggml-sycl/dpct/helper.hpp +18 -25
  180. package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.cpp +1011 -0
  181. package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.hpp +76 -0
  182. package/src/llama.cpp/ggml/src/ggml-sycl/gemm.hpp +101 -0
  183. package/src/llama.cpp/ggml/src/{ggml-sycl.cpp → ggml-sycl/ggml-sycl.cpp} +3350 -3980
  184. package/src/llama.cpp/ggml/src/ggml-sycl/im2col.cpp +125 -0
  185. package/src/llama.cpp/ggml/src/ggml-sycl/im2col.hpp +23 -0
  186. package/src/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +70 -68
  187. package/src/llama.cpp/ggml/src/ggml-sycl/norm.cpp +9 -6
  188. package/src/llama.cpp/ggml/src/ggml-sycl/outprod.cpp +56 -0
  189. package/src/llama.cpp/ggml/src/ggml-sycl/outprod.hpp +11 -0
  190. package/src/llama.cpp/ggml/src/ggml-sycl/presets.hpp +8 -0
  191. package/src/llama.cpp/ggml/src/ggml-sycl/rope.cpp +1 -1
  192. package/src/llama.cpp/ggml/src/ggml-sycl/tsembd.cpp +71 -0
  193. package/src/llama.cpp/ggml/src/ggml-sycl/tsembd.hpp +21 -0
  194. package/src/llama.cpp/ggml/src/ggml-sycl/vecdotq.hpp +4 -4
  195. package/src/llama.cpp/ggml/src/ggml-sycl/wkv6.cpp +138 -0
  196. package/src/llama.cpp/ggml/src/ggml-sycl/wkv6.hpp +10 -0
  197. package/src/llama.cpp/ggml/src/ggml-threading.cpp +12 -0
  198. package/src/llama.cpp/ggml/src/ggml-threading.h +12 -0
  199. package/src/llama.cpp/ggml/src/ggml-vulkan/CMakeLists.txt +78 -0
  200. package/src/llama.cpp/ggml/src/{ggml-vulkan.cpp → ggml-vulkan/ggml-vulkan.cpp} +2034 -1718
  201. package/src/llama.cpp/ggml/src/{vulkan-shaders → ggml-vulkan/vulkan-shaders}/CMakeLists.txt +2 -0
  202. package/src/llama.cpp/ggml/src/{vulkan-shaders → ggml-vulkan/vulkan-shaders}/vulkan-shaders-gen.cpp +152 -185
  203. package/src/llama.cpp/ggml/src/ggml.c +2075 -16579
  204. package/src/llama.cpp/include/llama.h +296 -285
  205. package/src/llama.cpp/models/ggml-vocab-chameleon.gguf.inp +112 -0
  206. package/src/llama.cpp/models/ggml-vocab-chameleon.gguf.out +46 -0
  207. package/src/llama.cpp/pocs/vdot/q8dot.cpp +4 -3
  208. package/src/llama.cpp/pocs/vdot/vdot.cpp +8 -7
  209. package/src/llama.cpp/requirements/requirements-convert_legacy_llama.txt +1 -1
  210. package/src/llama.cpp/src/CMakeLists.txt +2 -1
  211. package/src/llama.cpp/src/llama-grammar.cpp +721 -122
  212. package/src/llama.cpp/src/llama-grammar.h +120 -15
  213. package/src/llama.cpp/src/llama-impl.h +156 -1
  214. package/src/llama.cpp/src/llama-sampling.cpp +2058 -346
  215. package/src/llama.cpp/src/llama-sampling.h +39 -47
  216. package/src/llama.cpp/src/llama-vocab.cpp +390 -127
  217. package/src/llama.cpp/src/llama-vocab.h +60 -20
  218. package/src/llama.cpp/src/llama.cpp +6215 -3263
  219. package/src/llama.cpp/src/unicode-data.cpp +6 -4
  220. package/src/llama.cpp/src/unicode-data.h +4 -4
  221. package/src/llama.cpp/src/unicode.cpp +15 -7
  222. package/src/llama.cpp/tests/CMakeLists.txt +4 -2
  223. package/src/llama.cpp/tests/test-arg-parser.cpp +131 -0
  224. package/src/llama.cpp/tests/test-backend-ops.cpp +1725 -297
  225. package/src/llama.cpp/tests/test-barrier.cpp +94 -0
  226. package/src/llama.cpp/tests/test-chat-template.cpp +9 -5
  227. package/src/llama.cpp/tests/test-grammar-integration.cpp +23 -38
  228. package/src/llama.cpp/tests/test-grammar-parser.cpp +6 -4
  229. package/src/llama.cpp/tests/test-json-schema-to-grammar.cpp +23 -8
  230. package/src/llama.cpp/tests/test-llama-grammar.cpp +9 -8
  231. package/src/llama.cpp/tests/test-log.cpp +39 -0
  232. package/src/llama.cpp/tests/test-opt.cpp +853 -142
  233. package/src/llama.cpp/tests/test-quantize-fns.cpp +28 -19
  234. package/src/llama.cpp/tests/test-quantize-perf.cpp +16 -14
  235. package/src/llama.cpp/tests/test-rope.cpp +2 -1
  236. package/src/llama.cpp/tests/test-sampling.cpp +226 -142
  237. package/src/llama.cpp/tests/test-tokenizer-0.cpp +56 -36
  238. package/src/llama.cpp/tests/test-tokenizer-1-bpe.cpp +5 -5
  239. package/src/llama.cpp/tests/test-tokenizer-1-spm.cpp +5 -5
  240. package/patches/llama.patch +0 -22
  241. package/src/llama.cpp/.github/workflows/bench.yml +0 -310
  242. package/src/llama.cpp/common/grammar-parser.cpp +0 -536
  243. package/src/llama.cpp/common/grammar-parser.h +0 -29
  244. package/src/llama.cpp/common/train.cpp +0 -1513
  245. package/src/llama.cpp/common/train.h +0 -233
  246. package/src/llama.cpp/examples/baby-llama/baby-llama.cpp +0 -1640
  247. package/src/llama.cpp/examples/benchmark/CMakeLists.txt +0 -6
  248. package/src/llama.cpp/examples/benchmark/benchmark-matmult.cpp +0 -275
  249. package/src/llama.cpp/ggml/src/llamafile/sgemm.cpp +0 -1027
  250. package/src/llama.cpp/tests/test-grad0.cpp +0 -1566
  251. /package/src/llama.cpp/ggml/{cmake → src/ggml-cpu/cmake}/FindSIMD.cmake +0 -0
  252. /package/src/llama.cpp/ggml/src/{llamafile → ggml-cpu/llamafile}/sgemm.h +0 -0
@@ -1,17 +1,17 @@
1
+ #include "arg.h"
1
2
  #include "common.h"
2
3
  #include "llama.h"
3
4
 
4
5
  #include <vector>
5
6
  #include <cstdio>
6
- #include <chrono>
7
7
 
8
8
  int main(int argc, char ** argv) {
9
- gpt_params params;
9
+ common_params params;
10
10
 
11
11
  params.prompt = "The quick brown fox";
12
+ params.sparams.seed = 1234;
12
13
 
13
- if (!gpt_params_parse(argc, argv, params)) {
14
- gpt_params_print_usage(argc, argv, params);
14
+ if (!common_params_parse(argc, argv, params, LLAMA_EXAMPLE_COMMON)) {
15
15
  return 1;
16
16
  }
17
17
 
@@ -28,21 +28,35 @@ int main(int argc, char ** argv) {
28
28
  std::string result2;
29
29
 
30
30
  // init
31
- llama_model * model;
32
- llama_context * ctx;
31
+ common_init_result llama_init = common_init_from_params(params);
32
+
33
+ llama_model * model = llama_init.model;
34
+ llama_context * ctx = llama_init.context;
33
35
 
34
- std::tie(model, ctx) = llama_init_from_gpt_params(params);
35
36
  if (model == nullptr || ctx == nullptr) {
36
37
  fprintf(stderr, "%s : failed to init\n", __func__);
37
38
  return 1;
38
39
  }
39
40
 
41
+ auto sparams = llama_sampler_chain_default_params();
42
+
43
+ llama_sampler * smpl = llama_sampler_chain_init(sparams);
44
+
45
+ llama_sampler_chain_add(smpl, llama_sampler_init_dist(params.sparams.seed));
46
+
40
47
  // tokenize prompt
41
- auto tokens = llama_tokenize(ctx, params.prompt, true);
48
+ auto tokens = common_tokenize(ctx, params.prompt, true);
49
+
50
+ // prepare the batch
51
+ llama_batch batch = llama_batch_init(tokens.size(), 0, 1);
52
+ for (size_t i = 0; i < tokens.size(); i++) {
53
+ common_batch_add(batch, tokens[i], i, {0}, false);
54
+ }
55
+ batch.logits[batch.n_tokens - 1] = true; // generate next token
42
56
 
43
57
  // evaluate prompt
44
- llama_decode(ctx, llama_batch_get_one(tokens.data(), tokens.size(), n_past, 0));
45
- n_past += tokens.size();
58
+ llama_decode(ctx, batch);
59
+ n_past += batch.n_tokens;
46
60
 
47
61
  // save state (rng, logits, embedding and kv_cache) to file
48
62
  {
@@ -63,23 +77,18 @@ int main(int argc, char ** argv) {
63
77
  printf("\nfirst run: %s", params.prompt.c_str());
64
78
 
65
79
  for (auto i = 0; i < params.n_predict; i++) {
66
- auto * logits = llama_get_logits(ctx);
67
- auto n_vocab = llama_n_vocab(model);
68
-
69
- std::vector<llama_token_data> candidates;
70
- candidates.reserve(n_vocab);
71
- for (llama_token token_id = 0; token_id < n_vocab; token_id++) {
72
- candidates.emplace_back(llama_token_data{token_id, logits[token_id], 0.0f});
73
- }
74
- llama_token_data_array candidates_p = { candidates.data(), candidates.size(), false };
75
- auto next_token = llama_sample_token(ctx, &candidates_p);
76
- auto next_token_str = llama_token_to_piece(ctx, next_token);
80
+ auto next_token = llama_sampler_sample(smpl, ctx, -1);
81
+ auto next_token_str = common_token_to_piece(ctx, next_token);
77
82
 
78
83
  printf("%s", next_token_str.c_str());
79
84
  result0 += next_token_str;
80
85
 
81
- if (llama_decode(ctx, llama_batch_get_one(&next_token, 1, n_past, 0))) {
86
+ common_batch_clear(batch);
87
+ common_batch_add(batch, next_token, n_past, {0}, true);
88
+
89
+ if (llama_decode(ctx, batch)) {
82
90
  fprintf(stderr, "\n%s : failed to evaluate\n", __func__);
91
+ llama_batch_free(batch);
83
92
  llama_free(ctx);
84
93
  llama_free_model(model);
85
94
  return 1;
@@ -93,7 +102,11 @@ int main(int argc, char ** argv) {
93
102
  llama_free(ctx);
94
103
 
95
104
  // make new context
96
- auto * ctx2 = llama_new_context_with_model(model, llama_context_params_from_gpt_params(params));
105
+ auto * ctx2 = llama_new_context_with_model(model, common_context_params_to_llama(params));
106
+
107
+ llama_sampler * smpl2 = llama_sampler_chain_init(sparams);
108
+
109
+ llama_sampler_chain_add(smpl2, llama_sampler_init_dist(params.sparams.seed));
97
110
 
98
111
  printf("\nsecond run: %s", params.prompt.c_str());
99
112
 
@@ -123,22 +136,18 @@ int main(int argc, char ** argv) {
123
136
 
124
137
  // second run
125
138
  for (auto i = 0; i < params.n_predict; i++) {
126
- auto * logits = llama_get_logits(ctx2);
127
- auto n_vocab = llama_n_vocab(model);
128
- std::vector<llama_token_data> candidates;
129
- candidates.reserve(n_vocab);
130
- for (llama_token token_id = 0; token_id < n_vocab; token_id++) {
131
- candidates.emplace_back(llama_token_data{token_id, logits[token_id], 0.0f});
132
- }
133
- llama_token_data_array candidates_p = { candidates.data(), candidates.size(), false };
134
- auto next_token = llama_sample_token(ctx2, &candidates_p);
135
- auto next_token_str = llama_token_to_piece(ctx2, next_token);
139
+ auto next_token = llama_sampler_sample(smpl2, ctx2, -1);
140
+ auto next_token_str = common_token_to_piece(ctx2, next_token);
136
141
 
137
142
  printf("%s", next_token_str.c_str());
138
143
  result1 += next_token_str;
139
144
 
140
- if (llama_decode(ctx2, llama_batch_get_one(&next_token, 1, n_past, 0))) {
145
+ common_batch_clear(batch);
146
+ common_batch_add(batch, next_token, n_past, {0}, true);
147
+
148
+ if (llama_decode(ctx2, batch)) {
141
149
  fprintf(stderr, "\n%s : failed to evaluate\n", __func__);
150
+ llama_batch_free(batch);
142
151
  llama_free(ctx2);
143
152
  llama_free_model(model);
144
153
  return 1;
@@ -156,7 +165,11 @@ int main(int argc, char ** argv) {
156
165
  }
157
166
 
158
167
  // make new context
159
- auto* ctx3 = llama_new_context_with_model(model, llama_context_params_from_gpt_params(params));
168
+ auto * ctx3 = llama_new_context_with_model(model, common_context_params_to_llama(params));
169
+
170
+ llama_sampler * smpl3 = llama_sampler_chain_init(sparams);
171
+
172
+ llama_sampler_chain_add(smpl3, llama_sampler_init_dist(params.sparams.seed));
160
173
 
161
174
  printf("\nsingle seq run: %s", params.prompt.c_str());
162
175
 
@@ -214,22 +227,18 @@ int main(int argc, char ** argv) {
214
227
 
215
228
  // third run with seq 1 instead of 0
216
229
  for (auto i = 0; i < params.n_predict; i++) {
217
- auto * logits = llama_get_logits(ctx3);
218
- auto n_vocab = llama_n_vocab(model);
219
- std::vector<llama_token_data> candidates;
220
- candidates.reserve(n_vocab);
221
- for (llama_token token_id = 0; token_id < n_vocab; token_id++) {
222
- candidates.emplace_back(llama_token_data{token_id, logits[token_id], 0.0f});
223
- }
224
- llama_token_data_array candidates_p = { candidates.data(), candidates.size(), false };
225
- auto next_token = llama_sample_token(ctx3, &candidates_p);
226
- auto next_token_str = llama_token_to_piece(ctx3, next_token);
230
+ auto next_token = llama_sampler_sample(smpl3, ctx3, -1);
231
+ auto next_token_str = common_token_to_piece(ctx3, next_token);
227
232
 
228
233
  printf("%s", next_token_str.c_str());
229
234
  result2 += next_token_str;
230
235
 
231
- if (llama_decode(ctx3, llama_batch_get_one(&next_token, 1, n_past, 1))) {
236
+ common_batch_clear(batch);
237
+ common_batch_add(batch, next_token, n_past, {1}, true);
238
+
239
+ if (llama_decode(ctx3, batch)) {
232
240
  fprintf(stderr, "\n%s : failed to evaluate\n", __func__);
241
+ llama_batch_free(batch);
233
242
  llama_free(ctx3);
234
243
  llama_free_model(model);
235
244
  return 1;
@@ -239,6 +248,11 @@ int main(int argc, char ** argv) {
239
248
 
240
249
  printf("\n");
241
250
 
251
+ llama_sampler_free(smpl);
252
+ llama_sampler_free(smpl2);
253
+ llama_sampler_free(smpl3);
254
+
255
+ llama_batch_free(batch);
242
256
  llama_free(ctx3);
243
257
  llama_free_model(model);
244
258
 
@@ -1,6 +1,6 @@
1
1
  set(TARGET llama-server)
2
- option(LLAMA_SERVER_VERBOSE "Build verbose logging option for Server" ON)
3
- option(LLAMA_SERVER_SSL "Build SSL support for the server" OFF)
2
+
3
+ option(LLAMA_SERVER_SSL "Build SSL support for the server" OFF)
4
4
 
5
5
  include_directories(${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_BINARY_DIR})
6
6
 
@@ -15,21 +15,13 @@ set(TARGET_SRCS
15
15
  httplib.h
16
16
  )
17
17
  set(PUBLIC_ASSETS
18
- colorthemes.css
19
- style.css
20
- theme-beeninorder.css
21
- theme-ketivah.css
22
- theme-mangotango.css
23
- theme-playground.css
24
- theme-polarnight.css
25
- theme-snowstorm.css
26
18
  index.html
27
- index-new.html
28
- index.js
29
19
  completion.js
30
- system-prompts.js
31
- prompt-formats.js
32
- json-schema-to-grammar.mjs
20
+ loading.html
21
+ deps_daisyui.min.css
22
+ deps_markdown-it.js
23
+ deps_tailwindcss.js
24
+ deps_vue.esm-browser.js
33
25
  )
34
26
 
35
27
  foreach(asset ${PUBLIC_ASSETS})
@@ -45,9 +37,6 @@ endforeach()
45
37
 
46
38
  add_executable(${TARGET} ${TARGET_SRCS})
47
39
  install(TARGETS ${TARGET} RUNTIME)
48
- target_compile_definitions(${TARGET} PRIVATE
49
- SERVER_VERBOSE=$<BOOL:${LLAMA_SERVER_VERBOSE}>
50
- )
51
40
 
52
41
  target_link_libraries(${TARGET} PRIVATE common ${CMAKE_THREAD_LIBS_INIT})
53
42