@fugood/llama.node 0.3.1 → 0.3.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (252) hide show
  1. package/CMakeLists.txt +1 -8
  2. package/bin/darwin/arm64/llama-node.node +0 -0
  3. package/bin/darwin/x64/llama-node.node +0 -0
  4. package/bin/linux/arm64/llama-node.node +0 -0
  5. package/bin/linux/x64/llama-node.node +0 -0
  6. package/bin/linux-vulkan/arm64/llama-node.node +0 -0
  7. package/bin/linux-vulkan/x64/llama-node.node +0 -0
  8. package/bin/win32/arm64/llama-node.node +0 -0
  9. package/bin/win32/arm64/node.lib +0 -0
  10. package/bin/win32/x64/llama-node.node +0 -0
  11. package/bin/win32/x64/node.lib +0 -0
  12. package/bin/win32-vulkan/arm64/llama-node.node +0 -0
  13. package/bin/win32-vulkan/arm64/node.lib +0 -0
  14. package/bin/win32-vulkan/x64/llama-node.node +0 -0
  15. package/bin/win32-vulkan/x64/node.lib +0 -0
  16. package/package.json +4 -2
  17. package/src/DetokenizeWorker.cpp +1 -1
  18. package/src/EmbeddingWorker.cpp +2 -2
  19. package/src/LlamaCompletionWorker.cpp +10 -10
  20. package/src/LlamaCompletionWorker.h +2 -2
  21. package/src/LlamaContext.cpp +14 -17
  22. package/src/TokenizeWorker.cpp +1 -1
  23. package/src/common.hpp +5 -4
  24. package/src/llama.cpp/.github/workflows/build.yml +137 -29
  25. package/src/llama.cpp/.github/workflows/close-issue.yml +5 -0
  26. package/src/llama.cpp/.github/workflows/docker.yml +46 -34
  27. package/src/llama.cpp/.github/workflows/nix-ci-aarch64.yml +7 -0
  28. package/src/llama.cpp/.github/workflows/nix-ci.yml +7 -0
  29. package/src/llama.cpp/.github/workflows/python-check-requirements.yml +2 -4
  30. package/src/llama.cpp/.github/workflows/python-type-check.yml +3 -1
  31. package/src/llama.cpp/.github/workflows/server.yml +7 -0
  32. package/src/llama.cpp/CMakeLists.txt +26 -11
  33. package/src/llama.cpp/cmake/arm64-apple-clang.cmake +16 -0
  34. package/src/llama.cpp/common/CMakeLists.txt +10 -10
  35. package/src/llama.cpp/common/arg.cpp +2041 -0
  36. package/src/llama.cpp/common/arg.h +77 -0
  37. package/src/llama.cpp/common/common.cpp +523 -1861
  38. package/src/llama.cpp/common/common.h +234 -106
  39. package/src/llama.cpp/common/console.cpp +3 -0
  40. package/src/llama.cpp/common/json-schema-to-grammar.cpp +1 -1
  41. package/src/llama.cpp/common/log.cpp +401 -0
  42. package/src/llama.cpp/common/log.h +66 -698
  43. package/src/llama.cpp/common/ngram-cache.cpp +39 -36
  44. package/src/llama.cpp/common/ngram-cache.h +19 -19
  45. package/src/llama.cpp/common/sampling.cpp +356 -350
  46. package/src/llama.cpp/common/sampling.h +62 -139
  47. package/src/llama.cpp/common/stb_image.h +5990 -6398
  48. package/src/llama.cpp/docs/build.md +72 -17
  49. package/src/llama.cpp/examples/CMakeLists.txt +1 -2
  50. package/src/llama.cpp/examples/batched/batched.cpp +49 -65
  51. package/src/llama.cpp/examples/batched-bench/batched-bench.cpp +42 -53
  52. package/src/llama.cpp/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp +55 -52
  53. package/src/llama.cpp/examples/cvector-generator/cvector-generator.cpp +22 -22
  54. package/src/llama.cpp/examples/cvector-generator/pca.hpp +3 -13
  55. package/src/llama.cpp/examples/embedding/embedding.cpp +147 -91
  56. package/src/llama.cpp/examples/eval-callback/eval-callback.cpp +37 -37
  57. package/src/llama.cpp/examples/export-lora/export-lora.cpp +39 -38
  58. package/src/llama.cpp/examples/gbnf-validator/gbnf-validator.cpp +14 -39
  59. package/src/llama.cpp/examples/{baby-llama → gen-docs}/CMakeLists.txt +2 -2
  60. package/src/llama.cpp/examples/gen-docs/gen-docs.cpp +83 -0
  61. package/src/llama.cpp/examples/gguf-split/gguf-split.cpp +58 -39
  62. package/src/llama.cpp/examples/gritlm/gritlm.cpp +46 -39
  63. package/src/llama.cpp/examples/imatrix/imatrix.cpp +75 -69
  64. package/src/llama.cpp/examples/infill/infill.cpp +131 -192
  65. package/src/llama.cpp/examples/llama-bench/llama-bench.cpp +276 -178
  66. package/src/llama.cpp/examples/llama.android/llama/build.gradle.kts +1 -0
  67. package/src/llama.cpp/examples/llama.android/llama/src/main/cpp/llama-android.cpp +40 -36
  68. package/src/llama.cpp/examples/llava/CMakeLists.txt +7 -0
  69. package/src/llama.cpp/examples/llava/clip.cpp +686 -150
  70. package/src/llama.cpp/examples/llava/clip.h +11 -2
  71. package/src/llama.cpp/examples/llava/llava-cli.cpp +60 -71
  72. package/src/llama.cpp/examples/llava/llava.cpp +146 -26
  73. package/src/llama.cpp/examples/llava/llava.h +2 -3
  74. package/src/llama.cpp/examples/llava/minicpmv-cli.cpp +323 -0
  75. package/src/llama.cpp/examples/llava/requirements.txt +1 -0
  76. package/src/llama.cpp/examples/lookahead/lookahead.cpp +55 -56
  77. package/src/llama.cpp/examples/lookup/lookup-create.cpp +15 -13
  78. package/src/llama.cpp/examples/lookup/lookup-merge.cpp +4 -4
  79. package/src/llama.cpp/examples/lookup/lookup-stats.cpp +34 -33
  80. package/src/llama.cpp/examples/lookup/lookup.cpp +60 -63
  81. package/src/llama.cpp/examples/main/main.cpp +216 -313
  82. package/src/llama.cpp/examples/parallel/parallel.cpp +58 -59
  83. package/src/llama.cpp/examples/passkey/passkey.cpp +53 -61
  84. package/src/llama.cpp/examples/perplexity/perplexity.cpp +277 -311
  85. package/src/llama.cpp/examples/quantize/CMakeLists.txt +1 -1
  86. package/src/llama.cpp/examples/quantize/quantize.cpp +27 -9
  87. package/src/llama.cpp/examples/quantize-stats/quantize-stats.cpp +12 -12
  88. package/src/llama.cpp/examples/retrieval/retrieval.cpp +57 -52
  89. package/src/llama.cpp/examples/rpc/rpc-server.cpp +27 -2
  90. package/src/llama.cpp/examples/save-load-state/save-load-state.cpp +60 -46
  91. package/src/llama.cpp/examples/server/CMakeLists.txt +7 -18
  92. package/src/llama.cpp/examples/server/server.cpp +1347 -1531
  93. package/src/llama.cpp/examples/server/tests/requirements.txt +2 -1
  94. package/src/llama.cpp/examples/server/utils.hpp +396 -107
  95. package/src/llama.cpp/examples/simple/CMakeLists.txt +1 -1
  96. package/src/llama.cpp/examples/simple/simple.cpp +132 -106
  97. package/src/llama.cpp/examples/simple-chat/CMakeLists.txt +5 -0
  98. package/src/llama.cpp/examples/simple-chat/simple-chat.cpp +197 -0
  99. package/src/llama.cpp/examples/speculative/speculative.cpp +153 -124
  100. package/src/llama.cpp/examples/sycl/run-llama2.sh +10 -19
  101. package/src/llama.cpp/examples/sycl/win-run-llama2.bat +1 -1
  102. package/src/llama.cpp/examples/tokenize/tokenize.cpp +27 -29
  103. package/src/llama.cpp/ggml/CMakeLists.txt +29 -12
  104. package/src/llama.cpp/ggml/include/ggml-alloc.h +3 -3
  105. package/src/llama.cpp/ggml/include/ggml-amx.h +25 -0
  106. package/src/llama.cpp/ggml/include/ggml-backend.h +166 -68
  107. package/src/llama.cpp/ggml/include/ggml-blas.h +5 -3
  108. package/src/llama.cpp/ggml/include/ggml-cann.h +17 -19
  109. package/src/llama.cpp/ggml/include/ggml-cpp.h +38 -0
  110. package/src/llama.cpp/ggml/include/ggml-cpu.h +177 -0
  111. package/src/llama.cpp/ggml/include/ggml-cuda.h +17 -17
  112. package/src/llama.cpp/ggml/include/ggml-kompute.h +7 -3
  113. package/src/llama.cpp/ggml/include/ggml-metal.h +13 -12
  114. package/src/llama.cpp/ggml/include/ggml-opt.h +216 -0
  115. package/src/llama.cpp/ggml/include/ggml-rpc.h +9 -5
  116. package/src/llama.cpp/ggml/include/ggml-sycl.h +18 -11
  117. package/src/llama.cpp/ggml/include/ggml-vulkan.h +10 -8
  118. package/src/llama.cpp/ggml/include/ggml.h +272 -505
  119. package/src/llama.cpp/ggml/src/CMakeLists.txt +69 -1110
  120. package/src/llama.cpp/ggml/src/ggml-aarch64.c +52 -2116
  121. package/src/llama.cpp/ggml/src/ggml-aarch64.h +0 -20
  122. package/src/llama.cpp/ggml/src/ggml-alloc.c +29 -27
  123. package/src/llama.cpp/ggml/src/ggml-amx/CMakeLists.txt +107 -0
  124. package/src/llama.cpp/ggml/src/ggml-amx/common.h +94 -0
  125. package/src/llama.cpp/ggml/src/ggml-amx/ggml-amx.cpp +446 -0
  126. package/src/llama.cpp/ggml/src/ggml-amx/mmq.cpp +2510 -0
  127. package/src/llama.cpp/ggml/src/ggml-amx/mmq.h +17 -0
  128. package/src/llama.cpp/ggml/src/ggml-backend-impl.h +144 -81
  129. package/src/llama.cpp/ggml/src/ggml-backend-reg.cpp +195 -0
  130. package/src/llama.cpp/ggml/src/{ggml-backend.c → ggml-backend.cpp} +394 -635
  131. package/src/llama.cpp/ggml/src/ggml-blas/CMakeLists.txt +91 -0
  132. package/src/llama.cpp/ggml/src/{ggml-blas.cpp → ggml-blas/ggml-blas.cpp} +217 -70
  133. package/src/llama.cpp/ggml/src/ggml-cann/CMakeLists.txt +46 -0
  134. package/src/llama.cpp/ggml/src/ggml-cann/acl_tensor.cpp +4 -27
  135. package/src/llama.cpp/ggml/src/ggml-cann/acl_tensor.h +32 -4
  136. package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.cpp +179 -41
  137. package/src/llama.cpp/ggml/src/ggml-cann/common.h +1 -0
  138. package/src/llama.cpp/ggml/src/{ggml-cann.cpp → ggml-cann/ggml-cann.cpp} +458 -353
  139. package/src/llama.cpp/ggml/src/ggml-cann/kernels/CMakeLists.txt +2 -1
  140. package/src/llama.cpp/ggml/src/ggml-cann/kernels/ascendc_kernels.h +2 -0
  141. package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_float_to_q4_0.cpp +278 -0
  142. package/src/llama.cpp/ggml/src/ggml-common.h +20 -0
  143. package/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +261 -0
  144. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.c +3560 -0
  145. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.h +30 -0
  146. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-impl.h +371 -0
  147. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.c +10822 -0
  148. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.h +63 -0
  149. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +13970 -0
  150. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +663 -0
  151. package/src/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm.cpp +1885 -0
  152. package/src/llama.cpp/ggml/src/ggml-cuda/CMakeLists.txt +155 -0
  153. package/src/llama.cpp/ggml/src/ggml-cuda/vendors/cuda.h +14 -0
  154. package/src/llama.cpp/ggml/src/ggml-cuda/vendors/hip.h +178 -0
  155. package/src/llama.cpp/ggml/src/ggml-cuda/vendors/musa.h +134 -0
  156. package/src/llama.cpp/ggml/src/ggml-hip/CMakeLists.txt +106 -0
  157. package/src/llama.cpp/ggml/src/ggml-impl.h +380 -584
  158. package/src/llama.cpp/ggml/src/ggml-kompute/CMakeLists.txt +162 -0
  159. package/src/llama.cpp/ggml/src/{ggml-kompute.cpp → ggml-kompute/ggml-kompute.cpp} +233 -87
  160. package/src/llama.cpp/ggml/src/ggml-metal/CMakeLists.txt +108 -0
  161. package/src/llama.cpp/ggml/src/ggml-metal/ggml-metal-impl.h +249 -0
  162. package/src/llama.cpp/ggml/src/ggml-musa/CMakeLists.txt +100 -0
  163. package/src/llama.cpp/ggml/src/ggml-opt.cpp +867 -0
  164. package/src/llama.cpp/ggml/src/ggml-quants.c +369 -9994
  165. package/src/llama.cpp/ggml/src/ggml-quants.h +78 -110
  166. package/src/llama.cpp/ggml/src/ggml-rpc/CMakeLists.txt +11 -0
  167. package/src/llama.cpp/ggml/src/{ggml-rpc.cpp → ggml-rpc/ggml-rpc.cpp} +560 -335
  168. package/src/llama.cpp/ggml/src/ggml-sycl/CMakeLists.txt +81 -0
  169. package/src/llama.cpp/ggml/src/ggml-sycl/backend.hpp +6 -0
  170. package/src/llama.cpp/ggml/src/ggml-sycl/common.cpp +51 -0
  171. package/src/llama.cpp/ggml/src/ggml-sycl/common.hpp +310 -0
  172. package/src/llama.cpp/ggml/src/ggml-sycl/concat.cpp +1 -0
  173. package/src/llama.cpp/ggml/src/ggml-sycl/conv.cpp +99 -0
  174. package/src/llama.cpp/ggml/src/ggml-sycl/conv.hpp +21 -0
  175. package/src/llama.cpp/ggml/src/ggml-sycl/convert.cpp +57 -57
  176. package/src/llama.cpp/ggml/src/ggml-sycl/convert.hpp +1 -1
  177. package/src/llama.cpp/ggml/src/ggml-sycl/dequantize.hpp +106 -106
  178. package/src/llama.cpp/ggml/src/ggml-sycl/dmmv.cpp +4 -4
  179. package/src/llama.cpp/ggml/src/ggml-sycl/dpct/helper.hpp +18 -25
  180. package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.cpp +1011 -0
  181. package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.hpp +76 -0
  182. package/src/llama.cpp/ggml/src/ggml-sycl/gemm.hpp +101 -0
  183. package/src/llama.cpp/ggml/src/{ggml-sycl.cpp → ggml-sycl/ggml-sycl.cpp} +3350 -3980
  184. package/src/llama.cpp/ggml/src/ggml-sycl/im2col.cpp +125 -0
  185. package/src/llama.cpp/ggml/src/ggml-sycl/im2col.hpp +23 -0
  186. package/src/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +70 -68
  187. package/src/llama.cpp/ggml/src/ggml-sycl/norm.cpp +9 -6
  188. package/src/llama.cpp/ggml/src/ggml-sycl/outprod.cpp +56 -0
  189. package/src/llama.cpp/ggml/src/ggml-sycl/outprod.hpp +11 -0
  190. package/src/llama.cpp/ggml/src/ggml-sycl/presets.hpp +8 -0
  191. package/src/llama.cpp/ggml/src/ggml-sycl/rope.cpp +1 -1
  192. package/src/llama.cpp/ggml/src/ggml-sycl/tsembd.cpp +71 -0
  193. package/src/llama.cpp/ggml/src/ggml-sycl/tsembd.hpp +21 -0
  194. package/src/llama.cpp/ggml/src/ggml-sycl/vecdotq.hpp +4 -4
  195. package/src/llama.cpp/ggml/src/ggml-sycl/wkv6.cpp +138 -0
  196. package/src/llama.cpp/ggml/src/ggml-sycl/wkv6.hpp +10 -0
  197. package/src/llama.cpp/ggml/src/ggml-threading.cpp +12 -0
  198. package/src/llama.cpp/ggml/src/ggml-threading.h +12 -0
  199. package/src/llama.cpp/ggml/src/ggml-vulkan/CMakeLists.txt +78 -0
  200. package/src/llama.cpp/ggml/src/{ggml-vulkan.cpp → ggml-vulkan/ggml-vulkan.cpp} +2034 -1718
  201. package/src/llama.cpp/ggml/src/{vulkan-shaders → ggml-vulkan/vulkan-shaders}/CMakeLists.txt +2 -0
  202. package/src/llama.cpp/ggml/src/{vulkan-shaders → ggml-vulkan/vulkan-shaders}/vulkan-shaders-gen.cpp +152 -185
  203. package/src/llama.cpp/ggml/src/ggml.c +2075 -16579
  204. package/src/llama.cpp/include/llama.h +296 -285
  205. package/src/llama.cpp/models/ggml-vocab-chameleon.gguf.inp +112 -0
  206. package/src/llama.cpp/models/ggml-vocab-chameleon.gguf.out +46 -0
  207. package/src/llama.cpp/pocs/vdot/q8dot.cpp +4 -3
  208. package/src/llama.cpp/pocs/vdot/vdot.cpp +8 -7
  209. package/src/llama.cpp/requirements/requirements-convert_legacy_llama.txt +1 -1
  210. package/src/llama.cpp/src/CMakeLists.txt +2 -1
  211. package/src/llama.cpp/src/llama-grammar.cpp +721 -122
  212. package/src/llama.cpp/src/llama-grammar.h +120 -15
  213. package/src/llama.cpp/src/llama-impl.h +156 -1
  214. package/src/llama.cpp/src/llama-sampling.cpp +2058 -346
  215. package/src/llama.cpp/src/llama-sampling.h +39 -47
  216. package/src/llama.cpp/src/llama-vocab.cpp +390 -127
  217. package/src/llama.cpp/src/llama-vocab.h +60 -20
  218. package/src/llama.cpp/src/llama.cpp +6215 -3263
  219. package/src/llama.cpp/src/unicode-data.cpp +6 -4
  220. package/src/llama.cpp/src/unicode-data.h +4 -4
  221. package/src/llama.cpp/src/unicode.cpp +15 -7
  222. package/src/llama.cpp/tests/CMakeLists.txt +4 -2
  223. package/src/llama.cpp/tests/test-arg-parser.cpp +131 -0
  224. package/src/llama.cpp/tests/test-backend-ops.cpp +1725 -297
  225. package/src/llama.cpp/tests/test-barrier.cpp +94 -0
  226. package/src/llama.cpp/tests/test-chat-template.cpp +9 -5
  227. package/src/llama.cpp/tests/test-grammar-integration.cpp +23 -38
  228. package/src/llama.cpp/tests/test-grammar-parser.cpp +6 -4
  229. package/src/llama.cpp/tests/test-json-schema-to-grammar.cpp +23 -8
  230. package/src/llama.cpp/tests/test-llama-grammar.cpp +9 -8
  231. package/src/llama.cpp/tests/test-log.cpp +39 -0
  232. package/src/llama.cpp/tests/test-opt.cpp +853 -142
  233. package/src/llama.cpp/tests/test-quantize-fns.cpp +28 -19
  234. package/src/llama.cpp/tests/test-quantize-perf.cpp +16 -14
  235. package/src/llama.cpp/tests/test-rope.cpp +2 -1
  236. package/src/llama.cpp/tests/test-sampling.cpp +226 -142
  237. package/src/llama.cpp/tests/test-tokenizer-0.cpp +56 -36
  238. package/src/llama.cpp/tests/test-tokenizer-1-bpe.cpp +5 -5
  239. package/src/llama.cpp/tests/test-tokenizer-1-spm.cpp +5 -5
  240. package/patches/llama.patch +0 -22
  241. package/src/llama.cpp/.github/workflows/bench.yml +0 -310
  242. package/src/llama.cpp/common/grammar-parser.cpp +0 -536
  243. package/src/llama.cpp/common/grammar-parser.h +0 -29
  244. package/src/llama.cpp/common/train.cpp +0 -1513
  245. package/src/llama.cpp/common/train.h +0 -233
  246. package/src/llama.cpp/examples/baby-llama/baby-llama.cpp +0 -1640
  247. package/src/llama.cpp/examples/benchmark/CMakeLists.txt +0 -6
  248. package/src/llama.cpp/examples/benchmark/benchmark-matmult.cpp +0 -275
  249. package/src/llama.cpp/ggml/src/llamafile/sgemm.cpp +0 -1027
  250. package/src/llama.cpp/tests/test-grad0.cpp +0 -1566
  251. /package/src/llama.cpp/ggml/{cmake → src/ggml-cpu/cmake}/FindSIMD.cmake +0 -0
  252. /package/src/llama.cpp/ggml/src/{llamafile → ggml-cpu/llamafile}/sgemm.h +0 -0
@@ -0,0 +1,94 @@
1
+ #include "ggml.h"
2
+ #include "ggml-cpu.h"
3
+ #include "ggml-backend.h"
4
+
5
+ #include <chrono>
6
+ #include <iostream>
7
+ #include <cstdio>
8
+ #include <cstdlib>
9
+ #include <cassert>
10
+ #include <vector>
11
+
12
+ #define MAX_NARGS 2
13
+
14
+ int main(int argc, char *argv[]) {
15
+
16
+ int n_threads = 4;
17
+ int n_rounds = 100;
18
+
19
+ if (argc > 1) {
20
+ n_threads = std::atoi(argv[1]);
21
+ }
22
+
23
+ if (argc > 2) {
24
+ n_rounds = std::atoi(argv[2]);
25
+ }
26
+
27
+ struct ggml_init_params params = {
28
+ /* .mem_size = */ 1024*1024*1024,
29
+ /* .mem_buffer = */ NULL,
30
+ /* .no_alloc = */ false,
31
+ };
32
+
33
+ struct ggml_context * ctx = ggml_init(params);
34
+
35
+ // Create graph
36
+ struct ggml_cgraph * gf = ggml_new_graph(ctx);
37
+
38
+ // Lots of small, parallel ops where barriers in between will dominate
39
+ struct ggml_tensor * out = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, 64);
40
+ for (int i = 0; i < 1000; i++) {
41
+ struct ggml_tensor * a = ggml_new_tensor_2d(ctx, GGML_TYPE_Q4_0, 64, 128);
42
+ out = ggml_mul_mat(ctx, a, out);
43
+
44
+ struct ggml_tensor * d = ggml_new_tensor_2d(ctx, GGML_TYPE_Q4_0, 128, 64);
45
+ out = ggml_mul_mat(ctx, d, out);
46
+ }
47
+
48
+ ggml_build_forward_expand(gf, out);
49
+ int n_nodes = ggml_graph_n_nodes(gf);
50
+
51
+ // Create threadpool
52
+ struct ggml_threadpool_params tpp = ggml_threadpool_params_default(n_threads);
53
+ struct ggml_threadpool* threadpool = ggml_threadpool_new(&tpp);
54
+ if (!threadpool) {
55
+ fprintf(stderr, "threadpool create failed : n_threads %d\n", n_threads);
56
+ exit(1);
57
+ }
58
+
59
+ // Create compute plan
60
+ struct ggml_cplan cplan = ggml_graph_plan(gf, n_threads, threadpool);
61
+
62
+ std::vector<uint8_t> work_data(cplan.work_size);
63
+ cplan.work_data = work_data.data();
64
+
65
+ std::cerr << "graph-compute with"
66
+ << "\n n_threads: " << n_threads
67
+ << "\n n_nodes: " << n_nodes
68
+ << "\n n_rounds: " << n_rounds
69
+ << "\n";
70
+ // ggml_graph_print(gf);
71
+
72
+ // Warmup
73
+ ggml_graph_compute(gf, &cplan);
74
+
75
+ auto t0 = std::chrono::high_resolution_clock::now();
76
+
77
+ for (int i=0; i < n_rounds; i++) {
78
+ ggml_graph_compute(gf, &cplan);
79
+ }
80
+
81
+ auto t1 = std::chrono::high_resolution_clock::now();
82
+
83
+ auto usec = std::chrono::duration_cast<std::chrono::microseconds>(t1-t0).count();
84
+ auto nsec = std::chrono::duration_cast<std::chrono::nanoseconds>(t1-t0).count();
85
+ std::cerr << "graph-compute took " << usec << " usec "
86
+ << "\n " << (float) usec / n_rounds << " usec per-iter"
87
+ << "\n " << (float) nsec / (n_rounds * n_nodes) << " nsec per-node"
88
+ << "\n";
89
+
90
+ ggml_threadpool_free(threadpool);
91
+ ggml_free(ctx);
92
+
93
+ return 0;
94
+ }
@@ -65,6 +65,8 @@ int main(void) {
65
65
  u8"{% for message in messages %}{% if message['role'] == 'user' %}{{'<用户>' + message['content'].strip() + '<AI>'}}{% else %}{{message['content'].strip()}}{% endif %}{% endfor %}",
66
66
  // DeepSeek-V2
67
67
  "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{{ bos_token }}{% for message in messages %}{% if message['role'] == 'user' %}{{ 'User: ' + message['content'] + '\n\n' }}{% elif message['role'] == 'assistant' %}{{ 'Assistant: ' + message['content'] + eos_token }}{% elif message['role'] == 'system' %}{{ message['content'] + '\n\n' }}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ 'Assistant:' }}{% endif %}",
68
+ // ibm-granite/granite-3.0-8b-instruct
69
+ "{%- if tools %}\n {{- '<|start_of_role|>available_tools<|end_of_role|>\n' }}\n {%- for tool in tools %}\n {{- tool | tojson(indent=4) }}\n {%- if not loop.last %}\n {{- '\n\n' }}\n {%- endif %}\n {%- endfor %}\n {{- '<|end_of_text|>\n' }}\n{%- endif %}\n{%- for message in messages %}\n {%- if message['role'] == 'system' %}\n {{- '<|start_of_role|>system<|end_of_role|>' + message['content'] + '<|end_of_text|>\n' }}\n {%- elif message['role'] == 'user' %}\n {{- '<|start_of_role|>user<|end_of_role|>' + message['content'] + '<|end_of_text|>\n' }}\n {%- elif message['role'] == 'assistant' %}\n {{- '<|start_of_role|>assistant<|end_of_role|>' + message['content'] + '<|end_of_text|>\n' }}\n {%- elif message['role'] == 'assistant_tool_call' %}\n {{- '<|start_of_role|>assistant<|end_of_role|><|tool_call|>' + message['content'] + '<|end_of_text|>\n' }}\n {%- elif message['role'] == 'tool_response' %}\n {{- '<|start_of_role|>tool_response<|end_of_role|>' + message['content'] + '<|end_of_text|>\n' }}\n {%- endif %}\n {%- if loop.last and add_generation_prompt %}\n {{- '<|start_of_role|>assistant<|end_of_role|>' }}\n {%- endif %}\n{%- endfor %}",
68
70
  };
69
71
  std::vector<std::string> expected_output = {
70
72
  // teknium/OpenHermes-2.5-Mistral-7B
@@ -109,6 +111,8 @@ int main(void) {
109
111
  u8"You are a helpful assistant<用户>Hello<AI>Hi there<用户>Who are you<AI>I am an assistant<用户>Another question<AI>",
110
112
  // DeepSeek-V2
111
113
  u8"You are a helpful assistant\n\nUser: Hello\n\nAssistant: Hi there<|end▁of▁sentence|>User: Who are you\n\nAssistant: I am an assistant <|end▁of▁sentence|>User: Another question\n\nAssistant:",
114
+ // ibm-granite/granite-3.0-8b-instruct
115
+ "<|start_of_role|>system<|end_of_role|>You are a helpful assistant<|end_of_text|>\n<|start_of_role|>user<|end_of_role|>Hello<|end_of_text|>\n<|start_of_role|>assistant<|end_of_role|>Hi there<|end_of_text|>\n<|start_of_role|>user<|end_of_role|>Who are you<|end_of_text|>\n<|start_of_role|>assistant<|end_of_role|> I am an assistant <|end_of_text|>\n<|start_of_role|>user<|end_of_role|>Another question<|end_of_text|>\n<|start_of_role|>assistant<|end_of_role|>\n",
112
116
  };
113
117
  std::vector<char> formatted_chat(1024);
114
118
  int32_t res;
@@ -140,11 +144,11 @@ int main(void) {
140
144
 
141
145
  // test llama_chat_format_single for system message
142
146
  printf("\n\n=== llama_chat_format_single (system message) ===\n\n");
143
- std::vector<llama_chat_msg> chat2;
144
- llama_chat_msg sys_msg{"system", "You are a helpful assistant"};
147
+ std::vector<common_chat_msg> chat2;
148
+ common_chat_msg sys_msg{"system", "You are a helpful assistant"};
145
149
 
146
150
  auto fmt_sys = [&](std::string tmpl) {
147
- auto output = llama_chat_format_single(nullptr, tmpl, chat2, sys_msg, false);
151
+ auto output = common_chat_format_single(nullptr, tmpl, chat2, sys_msg, false);
148
152
  printf("fmt_sys(%s) : %s\n", tmpl.c_str(), output.c_str());
149
153
  printf("-------------------------\n");
150
154
  return output;
@@ -160,10 +164,10 @@ int main(void) {
160
164
  chat2.push_back({"system", "You are a helpful assistant"});
161
165
  chat2.push_back({"user", "Hello"});
162
166
  chat2.push_back({"assistant", "I am assistant"});
163
- llama_chat_msg new_msg{"user", "How are you"};
167
+ common_chat_msg new_msg{"user", "How are you"};
164
168
 
165
169
  auto fmt_single = [&](std::string tmpl) {
166
- auto output = llama_chat_format_single(nullptr, tmpl, chat2, new_msg, true);
170
+ auto output = common_chat_format_single(nullptr, tmpl, chat2, new_msg, true);
167
171
  printf("fmt_single(%s) : %s\n", tmpl.c_str(), output.c_str());
168
172
  printf("-------------------------\n");
169
173
  return output;
@@ -2,33 +2,18 @@
2
2
  #undef NDEBUG
3
3
  #endif
4
4
 
5
- #define LLAMA_API_INTERNAL
6
-
7
- #include "ggml.h"
8
- #include "llama.h"
9
- #include "grammar-parser.h"
10
- #include "json-schema-to-grammar.h"
11
5
  #include "unicode.h"
6
+ #include "llama-grammar.h"
7
+ #include "json-schema-to-grammar.h"
8
+
12
9
  #include <cassert>
13
10
  #include <string>
14
11
  #include <vector>
15
12
 
16
13
  using json = nlohmann::ordered_json;
17
14
 
18
- static llama_grammar* build_grammar(const std::string & grammar_str) {
19
- auto parsed_grammar = grammar_parser::parse(grammar_str.c_str());
20
-
21
- // Ensure we parsed correctly
22
- assert(!parsed_grammar.rules.empty());
23
-
24
- // Ensure we have a root node
25
- assert(!(parsed_grammar.symbol_ids.find("root") == parsed_grammar.symbol_ids.end()));
26
-
27
- std::vector<const llama_grammar_element*> grammar_rules(parsed_grammar.c_rules());
28
- llama_grammar* grammar = llama_grammar_init(
29
- grammar_rules.data(), grammar_rules.size(), parsed_grammar.symbol_ids.at("root"));
30
-
31
- return grammar;
15
+ static llama_grammar * build_grammar(const std::string & grammar_str) {
16
+ return llama_grammar_init_impl(nullptr, grammar_str.c_str(), "root");
32
17
  }
33
18
 
34
19
  static bool test_build_grammar_fails(const std::string & grammar_str) {
@@ -45,25 +30,23 @@ static bool test_build_grammar_fails(const std::string & grammar_str) {
45
30
  }
46
31
 
47
32
  static bool match_string(const std::string & input, llama_grammar * grammar) {
48
- auto decoded = decode_utf8(input, {});
49
-
50
- const auto & code_points = decoded.first;
33
+ const auto cpts = unicode_cpts_from_utf8(input);
51
34
 
52
35
  const llama_grammar_rules & rules = llama_grammar_get_rules (grammar);
53
- llama_grammar_stacks & cur_stacks = llama_grammar_get_stacks(grammar);
36
+ llama_grammar_stacks & stacks_cur = llama_grammar_get_stacks(grammar);
54
37
 
55
- for (auto it = code_points.begin(), end = code_points.end() - 1; it != end; ++it) {
56
- const llama_grammar_stacks prev_stacks = llama_grammar_get_stacks(grammar); // copy
38
+ for (const auto & cpt : cpts) {
39
+ const llama_grammar_stacks stacks_prev = llama_grammar_get_stacks(grammar); // copy
57
40
 
58
- llama_grammar_accept(rules, prev_stacks, *it, cur_stacks);
41
+ llama_grammar_accept(rules, stacks_prev, cpt, stacks_cur);
59
42
 
60
- if (cur_stacks.empty()) {
43
+ if (stacks_cur.empty()) {
61
44
  // no stacks means that the grammar failed to match at this point
62
45
  return false;
63
46
  }
64
47
  }
65
48
 
66
- for (const auto & stack : cur_stacks) {
49
+ for (const auto & stack : stacks_cur) {
67
50
  if (stack.empty()) {
68
51
  // An empty stack means that the grammar has been completed
69
52
  return true;
@@ -77,12 +60,12 @@ static void test(const std::string & test_desc, const std::string & grammar_str,
77
60
  fprintf(stderr, "⚫ Testing %s\n%s\n", test_desc.c_str(), grammar_str.c_str());
78
61
  fflush(stderr);
79
62
 
80
- auto grammar = build_grammar(grammar_str);
63
+ auto * grammar = build_grammar(grammar_str);
81
64
 
82
65
  // Save the original grammar stacks so that we can reset after every new string we want to test
83
- const llama_grammar_stacks original_stacks = llama_grammar_get_stacks(grammar);
66
+ const llama_grammar_stacks stacks_org = llama_grammar_get_stacks(grammar);
84
67
 
85
- llama_grammar_stacks & cur_stacks = llama_grammar_get_stacks(grammar);
68
+ llama_grammar_stacks & stacks_cur = llama_grammar_get_stacks(grammar);
86
69
 
87
70
  fprintf(stderr, " 🔵 Valid strings:\n");
88
71
 
@@ -119,7 +102,7 @@ static void test(const std::string & test_desc, const std::string & grammar_str,
119
102
  assert(matched);
120
103
 
121
104
  // Reset the grammar stacks
122
- cur_stacks = original_stacks;
105
+ stacks_cur = stacks_org;
123
106
  }
124
107
 
125
108
  fprintf(stderr, " 🟠 Invalid strings:\n");
@@ -139,11 +122,11 @@ static void test(const std::string & test_desc, const std::string & grammar_str,
139
122
  assert(!matched);
140
123
 
141
124
  // Reset the grammar stacks
142
- cur_stacks = original_stacks;
125
+ stacks_cur = stacks_org;
143
126
  }
144
127
 
145
128
  // Clean up allocated memory
146
- llama_grammar_free(grammar);
129
+ llama_grammar_free_impl(grammar);
147
130
  }
148
131
  static void test_grammar(const std::string & test_desc, const std::string & grammar_str, const std::vector<std::string> & passing_strings, const std::vector<std::string> & failing_strings) {
149
132
  test(test_desc + ". Grammar: " + grammar_str, grammar_str, passing_strings, failing_strings);
@@ -503,7 +486,7 @@ static void test_special_chars() {
503
486
  "aaaaabcccc",
504
487
  "aaaabccc",
505
488
  "aaaabccccc",
506
- "🔵🟠✅❌abc❌✅🟠🔵"
489
+ "🔵🟠✅❌abc❌✅🟠🔵",
507
490
  "🔵🟠abc🟠🔵"
508
491
  }
509
492
  );
@@ -683,7 +666,8 @@ static void test_failure_missing_root() {
683
666
  term ::= number
684
667
  number ::= [0-9]+)""";
685
668
 
686
- grammar_parser::parse_state parsed_grammar = grammar_parser::parse(grammar_str.c_str());
669
+ llama_grammar_parser parsed_grammar;
670
+ parsed_grammar.parse(grammar_str.c_str());
687
671
 
688
672
  // Ensure we parsed correctly
689
673
  assert(!parsed_grammar.rules.empty());
@@ -705,7 +689,8 @@ static void test_failure_missing_reference() {
705
689
 
706
690
  fprintf(stderr, " Expected error: ");
707
691
 
708
- grammar_parser::parse_state parsed_grammar = grammar_parser::parse(grammar_str.c_str());
692
+ llama_grammar_parser parsed_grammar;
693
+ parsed_grammar.parse(grammar_str.c_str());
709
694
 
710
695
  // Ensure we did NOT parsed correctly
711
696
  assert(parsed_grammar.rules.empty());
@@ -3,7 +3,7 @@
3
3
  #endif
4
4
 
5
5
  #include "llama.h"
6
- #include "grammar-parser.h"
6
+ #include "llama-grammar.h"
7
7
 
8
8
  #include <cassert>
9
9
 
@@ -22,7 +22,8 @@ static const char * type_str(llama_gretype type) {
22
22
 
23
23
  static void verify_parsing(const char *grammar_bytes, const std::vector<std::pair<std::string, uint32_t>> expected, const std::vector<llama_grammar_element> &expected_rules) {
24
24
  uint32_t index = 0;
25
- grammar_parser::parse_state parsed_grammar = grammar_parser::parse(grammar_bytes);
25
+ llama_grammar_parser parsed_grammar;
26
+ parsed_grammar.parse(grammar_bytes);
26
27
 
27
28
  std::map<uint32_t, std::string> symbol_names;
28
29
  for (auto it = parsed_grammar.symbol_ids.begin(); it != parsed_grammar.symbol_ids.end(); ++it) {
@@ -129,9 +130,10 @@ static void verify_parsing(const char *grammar_bytes, const std::vector<std::pai
129
130
  }
130
131
  }
131
132
 
132
- static void verify_failure(const char *grammar_bytes) {
133
+ static void verify_failure(const char * grammar_bytes) {
133
134
  fprintf(stderr, "Testing expected failure:%s\n", grammar_bytes);
134
- auto result = grammar_parser::parse(grammar_bytes);
135
+ llama_grammar_parser result;
136
+ result.parse(grammar_bytes);
135
137
  assert(result.rules.empty() && "should have failed");
136
138
  }
137
139
 
@@ -2,14 +2,15 @@
2
2
  #undef NDEBUG
3
3
  #endif
4
4
 
5
+ #include "json-schema-to-grammar.h"
6
+
7
+ #include "llama-grammar.h"
8
+
5
9
  #include <cassert>
6
10
  #include <fstream>
7
11
  #include <sstream>
8
12
  #include <regex>
9
13
 
10
- #include "json-schema-to-grammar.h"
11
- #include "grammar-parser.h"
12
-
13
14
  static std::string trim(const std::string & source) {
14
15
  std::string s(source);
15
16
  s.erase(0,s.find_first_not_of(" \n\r\t"));
@@ -40,7 +41,8 @@ struct TestCase {
40
41
  }
41
42
  void verify_expectation_parseable() const {
42
43
  try {
43
- auto state = grammar_parser::parse(expected_grammar.c_str());
44
+ llama_grammar_parser state;
45
+ state.parse(expected_grammar.c_str());
44
46
  if (state.symbol_ids.find("root") == state.symbol_ids.end()) {
45
47
  throw std::runtime_error("Grammar failed to parse:\n" + expected_grammar);
46
48
  }
@@ -694,7 +696,7 @@ static void test_all(const std::string & lang, std::function<void(const TestCase
694
696
  "pattern": "^abc?d*efg+(hij)?kl$"
695
697
  })""",
696
698
  R"""(
697
- root ::= "\"" "ab" "c"? "d"* "ef" "g"+ ("hij")? "kl" "\"" space
699
+ root ::= "\"" ("ab" "c"? "d"* "ef" "g"+ ("hij")? "kl") "\"" space
698
700
  space ::= | " " | "\n" [ \t]{0,20}
699
701
  )"""
700
702
  });
@@ -707,7 +709,7 @@ static void test_all(const std::string & lang, std::function<void(const TestCase
707
709
  "pattern": "^\\[\\]\\{\\}\\(\\)\\|\\+\\*\\?$"
708
710
  })""",
709
711
  R"""(
710
- root ::= "\"" "[]{}()|+*?" "\"" space
712
+ root ::= "\"" ("[]{}()|+*?") "\"" space
711
713
  space ::= | " " | "\n" [ \t]{0,20}
712
714
  )"""
713
715
  });
@@ -720,7 +722,20 @@ static void test_all(const std::string & lang, std::function<void(const TestCase
720
722
  "pattern": "^\"$"
721
723
  })""",
722
724
  R"""(
723
- root ::= "\"" "\"" "\"" space
725
+ root ::= "\"" ("\"") "\"" space
726
+ space ::= | " " | "\n" [ \t]{0,20}
727
+ )"""
728
+ });
729
+
730
+ test({
731
+ SUCCESS,
732
+ "regexp with top-level alternation",
733
+ R"""({
734
+ "type": "string",
735
+ "pattern": "^A|B|C|D$"
736
+ })""",
737
+ R"""(
738
+ root ::= "\"" ("A" | "B" | "C" | "D") "\"" space
724
739
  space ::= | " " | "\n" [ \t]{0,20}
725
740
  )"""
726
741
  });
@@ -734,7 +749,7 @@ static void test_all(const std::string & lang, std::function<void(const TestCase
734
749
  })""",
735
750
  R"""(
736
751
  dot ::= [^\x0A\x0D]
737
- root ::= "\"" ("(" root-1{1,3} ")")? root-1{3,3} "-" root-1{4,4} " " "a"{3,5} "nd" dot dot dot "\"" space
752
+ root ::= "\"" (("(" root-1{1,3} ")")? root-1{3,3} "-" root-1{4,4} " " "a"{3,5} "nd" dot dot dot) "\"" space
738
753
  root-1 ::= [0-9]
739
754
  space ::= | " " | "\n" [ \t]{0,20}
740
755
  )"""
@@ -2,16 +2,15 @@
2
2
  #undef NDEBUG
3
3
  #endif
4
4
 
5
- #define LLAMA_API_INTERNAL
6
5
  #include "llama.h"
7
- #include "grammar-parser.h"
6
+ #include "llama-grammar.h"
8
7
 
9
8
  #include <cassert>
10
9
  #include <stdexcept>
11
10
 
12
11
  int main()
13
12
  {
14
- grammar_parser::parse_state parsed_grammar;
13
+ llama_grammar_parser parsed_grammar;
15
14
 
16
15
  std::vector<std::pair<std::string, uint32_t>> expected = {
17
16
  {"expr", 2},
@@ -117,7 +116,7 @@ int main()
117
116
  llama_grammar * grammar = NULL;
118
117
  std::vector<const llama_grammar_element *> grammar_rules(parsed_grammar.c_rules());
119
118
 
120
- grammar = llama_grammar_init(grammar_rules.data(), grammar_rules.size(), parsed_grammar.symbol_ids.at("root"));
119
+ grammar = llama_grammar_init_impl(nullptr, grammar_rules.data(), grammar_rules.size(), parsed_grammar.symbol_ids.at("root"));
121
120
  if (grammar == nullptr)
122
121
  {
123
122
  throw std::runtime_error("Failed to initialize llama_grammar");
@@ -174,13 +173,13 @@ int main()
174
173
  }};
175
174
 
176
175
  auto index = 0;
177
- for (auto stack : llama_grammar_get_stacks(grammar))
176
+ for (const llama_grammar_stack & stack : llama_grammar_get_stacks(grammar))
178
177
  {
179
178
  // compare stack to expected_stack
180
179
  for (uint32_t i = 0; i < stack.size(); i++)
181
180
  {
182
- auto element = stack[i];
183
- auto expected_element = expected_stacks[index][i];
181
+ const llama_grammar_element * element = stack[i];
182
+ const llama_grammar_element & expected_element = expected_stacks[index][i];
184
183
 
185
184
  // pretty print error message before asserting
186
185
  if (expected_element.type != element->type || expected_element.value != element->value)
@@ -403,6 +402,8 @@ int main()
403
402
  delete[] candidate.code_points;
404
403
  candidate.code_points = nullptr;
405
404
  }
406
- llama_grammar_free(grammar);
405
+
406
+ llama_grammar_free_impl(grammar);
407
+
407
408
  return 0;
408
409
  }
@@ -0,0 +1,39 @@
1
+ #include "log.h"
2
+
3
+ #include <cstdlib>
4
+ #include <thread>
5
+
6
+ int main() {
7
+ const int n_thread = 8;
8
+
9
+ std::thread threads[n_thread];
10
+ for (int i = 0; i < n_thread; i++) {
11
+ threads[i] = std::thread([i]() {
12
+ const int n_msg = 1000;
13
+
14
+ for (int j = 0; j < n_msg; j++) {
15
+ const int log_type = std::rand() % 4;
16
+
17
+ switch (log_type) {
18
+ case 0: LOG_INF("Thread %d: %d\n", i, j); break;
19
+ case 1: LOG_WRN("Thread %d: %d\n", i, j); break;
20
+ case 2: LOG_ERR("Thread %d: %d\n", i, j); break;
21
+ case 3: LOG_DBG("Thread %d: %d\n", i, j); break;
22
+ default:
23
+ break;
24
+ }
25
+
26
+ if (rand () % 10 < 5) {
27
+ common_log_set_timestamps(common_log_main(), rand() % 2);
28
+ common_log_set_prefix (common_log_main(), rand() % 2);
29
+ }
30
+ }
31
+ });
32
+ }
33
+
34
+ for (int i = 0; i < n_thread; i++) {
35
+ threads[i].join();
36
+ }
37
+
38
+ return 0;
39
+ }