@fugood/llama.node 0.3.2 → 0.3.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (286) hide show
  1. package/CMakeLists.txt +7 -0
  2. package/bin/darwin/arm64/llama-node.node +0 -0
  3. package/bin/darwin/x64/llama-node.node +0 -0
  4. package/bin/linux/arm64/llama-node.node +0 -0
  5. package/bin/linux/x64/llama-node.node +0 -0
  6. package/bin/linux-vulkan/arm64/llama-node.node +0 -0
  7. package/bin/linux-vulkan/x64/llama-node.node +0 -0
  8. package/bin/win32/arm64/llama-node.node +0 -0
  9. package/bin/win32/arm64/node.lib +0 -0
  10. package/bin/win32/x64/llama-node.node +0 -0
  11. package/bin/win32/x64/node.lib +0 -0
  12. package/bin/win32-vulkan/arm64/llama-node.node +0 -0
  13. package/bin/win32-vulkan/arm64/node.lib +0 -0
  14. package/bin/win32-vulkan/x64/llama-node.node +0 -0
  15. package/bin/win32-vulkan/x64/node.lib +0 -0
  16. package/lib/binding.ts +18 -1
  17. package/package.json +1 -1
  18. package/src/DetokenizeWorker.cpp +1 -1
  19. package/src/EmbeddingWorker.cpp +17 -7
  20. package/src/EmbeddingWorker.h +2 -1
  21. package/src/LlamaCompletionWorker.cpp +8 -8
  22. package/src/LlamaCompletionWorker.h +2 -2
  23. package/src/LlamaContext.cpp +89 -27
  24. package/src/LlamaContext.h +2 -0
  25. package/src/TokenizeWorker.cpp +1 -1
  26. package/src/common.hpp +4 -4
  27. package/src/llama.cpp/.github/workflows/build.yml +240 -168
  28. package/src/llama.cpp/.github/workflows/docker.yml +8 -8
  29. package/src/llama.cpp/.github/workflows/python-lint.yml +8 -1
  30. package/src/llama.cpp/.github/workflows/server.yml +21 -14
  31. package/src/llama.cpp/CMakeLists.txt +14 -6
  32. package/src/llama.cpp/Sources/llama/llama.h +4 -0
  33. package/src/llama.cpp/cmake/arm64-apple-clang.cmake +16 -0
  34. package/src/llama.cpp/cmake/common.cmake +33 -0
  35. package/src/llama.cpp/cmake/x64-windows-llvm.cmake +11 -0
  36. package/src/llama.cpp/common/CMakeLists.txt +6 -4
  37. package/src/llama.cpp/common/arg.cpp +986 -770
  38. package/src/llama.cpp/common/arg.h +22 -22
  39. package/src/llama.cpp/common/common.cpp +212 -351
  40. package/src/llama.cpp/common/common.h +204 -117
  41. package/src/llama.cpp/common/json-schema-to-grammar.cpp +1 -1
  42. package/src/llama.cpp/common/log.cpp +50 -50
  43. package/src/llama.cpp/common/log.h +18 -18
  44. package/src/llama.cpp/common/ngram-cache.cpp +36 -36
  45. package/src/llama.cpp/common/ngram-cache.h +19 -19
  46. package/src/llama.cpp/common/sampling.cpp +163 -121
  47. package/src/llama.cpp/common/sampling.h +41 -20
  48. package/src/llama.cpp/common/speculative.cpp +274 -0
  49. package/src/llama.cpp/common/speculative.h +28 -0
  50. package/src/llama.cpp/docs/build.md +134 -161
  51. package/src/llama.cpp/examples/CMakeLists.txt +33 -14
  52. package/src/llama.cpp/examples/batched/CMakeLists.txt +1 -1
  53. package/src/llama.cpp/examples/batched/batched.cpp +19 -18
  54. package/src/llama.cpp/examples/batched-bench/CMakeLists.txt +1 -1
  55. package/src/llama.cpp/examples/batched-bench/batched-bench.cpp +10 -11
  56. package/src/llama.cpp/examples/convert-llama2c-to-ggml/CMakeLists.txt +1 -1
  57. package/src/llama.cpp/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp +1 -1
  58. package/src/llama.cpp/examples/cvector-generator/CMakeLists.txt +1 -1
  59. package/src/llama.cpp/examples/cvector-generator/cvector-generator.cpp +9 -9
  60. package/src/llama.cpp/examples/deprecation-warning/deprecation-warning.cpp +1 -1
  61. package/src/llama.cpp/examples/embedding/CMakeLists.txt +1 -1
  62. package/src/llama.cpp/examples/embedding/embedding.cpp +12 -12
  63. package/src/llama.cpp/examples/eval-callback/CMakeLists.txt +3 -2
  64. package/src/llama.cpp/examples/eval-callback/eval-callback.cpp +8 -8
  65. package/src/llama.cpp/examples/export-lora/CMakeLists.txt +1 -1
  66. package/src/llama.cpp/examples/export-lora/export-lora.cpp +5 -5
  67. package/src/llama.cpp/examples/gbnf-validator/CMakeLists.txt +1 -1
  68. package/src/llama.cpp/examples/gbnf-validator/gbnf-validator.cpp +4 -7
  69. package/src/llama.cpp/examples/gen-docs/CMakeLists.txt +1 -1
  70. package/src/llama.cpp/examples/gen-docs/gen-docs.cpp +7 -7
  71. package/src/llama.cpp/examples/gguf/CMakeLists.txt +1 -1
  72. package/src/llama.cpp/examples/gguf-hash/CMakeLists.txt +8 -1
  73. package/src/llama.cpp/examples/gguf-split/CMakeLists.txt +1 -1
  74. package/src/llama.cpp/examples/gguf-split/gguf-split.cpp +2 -2
  75. package/src/llama.cpp/examples/gritlm/CMakeLists.txt +1 -1
  76. package/src/llama.cpp/examples/gritlm/gritlm.cpp +18 -18
  77. package/src/llama.cpp/examples/imatrix/CMakeLists.txt +1 -1
  78. package/src/llama.cpp/examples/imatrix/imatrix.cpp +31 -13
  79. package/src/llama.cpp/examples/infill/CMakeLists.txt +1 -1
  80. package/src/llama.cpp/examples/infill/infill.cpp +41 -87
  81. package/src/llama.cpp/examples/llama-bench/CMakeLists.txt +1 -1
  82. package/src/llama.cpp/examples/llama-bench/llama-bench.cpp +439 -459
  83. package/src/llama.cpp/examples/llama.android/llama/build.gradle.kts +2 -0
  84. package/src/llama.cpp/examples/llama.android/llama/src/main/cpp/llama-android.cpp +11 -14
  85. package/src/llama.cpp/examples/llava/CMakeLists.txt +10 -3
  86. package/src/llama.cpp/examples/llava/clip.cpp +263 -66
  87. package/src/llama.cpp/examples/llava/clip.h +8 -2
  88. package/src/llama.cpp/examples/llava/llava-cli.cpp +23 -23
  89. package/src/llama.cpp/examples/llava/llava.cpp +83 -22
  90. package/src/llama.cpp/examples/llava/minicpmv-cli.cpp +21 -21
  91. package/src/llama.cpp/examples/llava/qwen2vl-cli.cpp +581 -0
  92. package/src/llama.cpp/examples/lookahead/CMakeLists.txt +1 -1
  93. package/src/llama.cpp/examples/lookahead/lookahead.cpp +26 -26
  94. package/src/llama.cpp/examples/lookup/CMakeLists.txt +4 -4
  95. package/src/llama.cpp/examples/lookup/lookup-create.cpp +7 -7
  96. package/src/llama.cpp/examples/lookup/lookup-merge.cpp +4 -4
  97. package/src/llama.cpp/examples/lookup/lookup-stats.cpp +16 -15
  98. package/src/llama.cpp/examples/lookup/lookup.cpp +30 -30
  99. package/src/llama.cpp/examples/main/CMakeLists.txt +1 -1
  100. package/src/llama.cpp/examples/main/main.cpp +73 -114
  101. package/src/llama.cpp/examples/main-cmake-pkg/CMakeLists.txt +1 -1
  102. package/src/llama.cpp/examples/parallel/CMakeLists.txt +1 -1
  103. package/src/llama.cpp/examples/parallel/parallel.cpp +18 -19
  104. package/src/llama.cpp/examples/passkey/CMakeLists.txt +1 -1
  105. package/src/llama.cpp/examples/passkey/passkey.cpp +14 -14
  106. package/src/llama.cpp/examples/perplexity/CMakeLists.txt +1 -1
  107. package/src/llama.cpp/examples/perplexity/perplexity.cpp +99 -120
  108. package/src/llama.cpp/examples/quantize/CMakeLists.txt +1 -1
  109. package/src/llama.cpp/examples/quantize/quantize.cpp +0 -3
  110. package/src/llama.cpp/examples/quantize-stats/CMakeLists.txt +1 -1
  111. package/src/llama.cpp/examples/quantize-stats/quantize-stats.cpp +10 -9
  112. package/src/llama.cpp/examples/retrieval/CMakeLists.txt +1 -1
  113. package/src/llama.cpp/examples/retrieval/retrieval.cpp +16 -16
  114. package/src/llama.cpp/examples/rpc/rpc-server.cpp +3 -1
  115. package/src/llama.cpp/examples/run/CMakeLists.txt +5 -0
  116. package/src/llama.cpp/examples/run/run.cpp +911 -0
  117. package/src/llama.cpp/examples/save-load-state/CMakeLists.txt +1 -1
  118. package/src/llama.cpp/examples/save-load-state/save-load-state.cpp +38 -21
  119. package/src/llama.cpp/examples/server/CMakeLists.txt +3 -16
  120. package/src/llama.cpp/examples/server/server.cpp +2073 -1339
  121. package/src/llama.cpp/examples/server/tests/requirements.txt +2 -2
  122. package/src/llama.cpp/examples/server/utils.hpp +354 -277
  123. package/src/llama.cpp/examples/simple/CMakeLists.txt +2 -2
  124. package/src/llama.cpp/examples/simple/simple.cpp +130 -94
  125. package/src/llama.cpp/examples/simple-chat/CMakeLists.txt +5 -0
  126. package/src/llama.cpp/examples/simple-chat/simple-chat.cpp +200 -0
  127. package/src/llama.cpp/examples/speculative/CMakeLists.txt +1 -1
  128. package/src/llama.cpp/examples/speculative/speculative.cpp +68 -64
  129. package/src/llama.cpp/examples/speculative-simple/CMakeLists.txt +5 -0
  130. package/src/llama.cpp/examples/speculative-simple/speculative-simple.cpp +265 -0
  131. package/src/llama.cpp/examples/tokenize/CMakeLists.txt +1 -1
  132. package/src/llama.cpp/examples/tokenize/tokenize.cpp +3 -3
  133. package/src/llama.cpp/examples/tts/CMakeLists.txt +5 -0
  134. package/src/llama.cpp/examples/tts/tts.cpp +932 -0
  135. package/src/llama.cpp/ggml/CMakeLists.txt +54 -36
  136. package/src/llama.cpp/ggml/include/ggml-backend.h +63 -34
  137. package/src/llama.cpp/ggml/include/ggml-blas.h +5 -3
  138. package/src/llama.cpp/ggml/include/ggml-cann.h +9 -7
  139. package/src/llama.cpp/ggml/include/ggml-cpp.h +38 -0
  140. package/src/llama.cpp/ggml/include/ggml-cpu.h +135 -0
  141. package/src/llama.cpp/ggml/include/ggml-cuda.h +12 -12
  142. package/src/llama.cpp/ggml/include/ggml-kompute.h +7 -3
  143. package/src/llama.cpp/ggml/include/ggml-metal.h +11 -7
  144. package/src/llama.cpp/ggml/include/ggml-opencl.h +26 -0
  145. package/src/llama.cpp/ggml/include/ggml-opt.h +216 -0
  146. package/src/llama.cpp/ggml/include/ggml-rpc.h +9 -5
  147. package/src/llama.cpp/ggml/include/ggml-sycl.h +18 -11
  148. package/src/llama.cpp/ggml/include/ggml-vulkan.h +10 -8
  149. package/src/llama.cpp/ggml/include/ggml.h +159 -417
  150. package/src/llama.cpp/ggml/src/CMakeLists.txt +121 -1155
  151. package/src/llama.cpp/ggml/src/ggml-alloc.c +23 -28
  152. package/src/llama.cpp/ggml/src/ggml-backend-impl.h +57 -36
  153. package/src/llama.cpp/ggml/src/ggml-backend-reg.cpp +552 -0
  154. package/src/llama.cpp/ggml/src/ggml-backend.cpp +306 -867
  155. package/src/llama.cpp/ggml/src/ggml-blas/CMakeLists.txt +87 -0
  156. package/src/llama.cpp/ggml/src/{ggml-blas.cpp → ggml-blas/ggml-blas.cpp} +216 -65
  157. package/src/llama.cpp/ggml/src/ggml-cann/CMakeLists.txt +76 -0
  158. package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.cpp +456 -111
  159. package/src/llama.cpp/ggml/src/ggml-cann/common.h +6 -3
  160. package/src/llama.cpp/ggml/src/{ggml-cann.cpp → ggml-cann/ggml-cann.cpp} +343 -177
  161. package/src/llama.cpp/ggml/src/ggml-cann/kernels/CMakeLists.txt +2 -5
  162. package/src/llama.cpp/ggml/src/ggml-cann/kernels/dup.cpp +22 -9
  163. package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_f16.cpp +24 -13
  164. package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_f32.cpp +23 -13
  165. package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_q4_0.cpp +11 -0
  166. package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_f16_q8_0.cpp +10 -0
  167. package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_f32_q8_0.cpp +10 -0
  168. package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_float_to_q4_0.cpp +17 -0
  169. package/src/llama.cpp/ggml/src/ggml-common.h +42 -42
  170. package/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +336 -0
  171. package/src/llama.cpp/ggml/src/ggml-cpu/amx/amx.cpp +220 -0
  172. package/src/llama.cpp/ggml/src/ggml-cpu/amx/amx.h +8 -0
  173. package/src/llama.cpp/ggml/src/ggml-cpu/amx/common.h +91 -0
  174. package/src/llama.cpp/ggml/src/ggml-cpu/amx/mmq.cpp +2511 -0
  175. package/src/llama.cpp/ggml/src/ggml-cpu/amx/mmq.h +10 -0
  176. package/src/llama.cpp/ggml/src/ggml-cpu/cpu-feats-x86.cpp +323 -0
  177. package/src/llama.cpp/ggml/src/{ggml-aarch64.c → ggml-cpu/ggml-cpu-aarch64.cpp} +1299 -246
  178. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.h +8 -0
  179. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-hbm.cpp +55 -0
  180. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-hbm.h +8 -0
  181. package/src/llama.cpp/ggml/src/{ggml-cpu-impl.h → ggml-cpu/ggml-cpu-impl.h} +14 -242
  182. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.c +10835 -0
  183. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.h +63 -0
  184. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-traits.cpp +36 -0
  185. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-traits.h +38 -0
  186. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +14123 -0
  187. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +628 -0
  188. package/src/llama.cpp/ggml/src/{llamafile → ggml-cpu/llamafile}/sgemm.cpp +666 -0
  189. package/src/llama.cpp/ggml/src/ggml-cuda/CMakeLists.txt +152 -0
  190. package/src/llama.cpp/ggml/src/ggml-cuda/vendors/hip.h +8 -0
  191. package/src/llama.cpp/ggml/src/ggml-hip/CMakeLists.txt +104 -0
  192. package/src/llama.cpp/ggml/src/ggml-impl.h +393 -22
  193. package/src/llama.cpp/ggml/src/ggml-kompute/CMakeLists.txt +166 -0
  194. package/src/llama.cpp/ggml/src/{ggml-kompute.cpp → ggml-kompute/ggml-kompute.cpp} +360 -127
  195. package/src/llama.cpp/ggml/src/ggml-metal/CMakeLists.txt +105 -0
  196. package/src/llama.cpp/ggml/src/ggml-metal/ggml-metal-impl.h +288 -0
  197. package/src/llama.cpp/ggml/src/ggml-musa/CMakeLists.txt +107 -0
  198. package/src/llama.cpp/ggml/src/ggml-opencl/CMakeLists.txt +147 -0
  199. package/src/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +4004 -0
  200. package/src/llama.cpp/ggml/src/ggml-opt.cpp +854 -0
  201. package/src/llama.cpp/ggml/src/ggml-quants.c +188 -10702
  202. package/src/llama.cpp/ggml/src/ggml-quants.h +78 -125
  203. package/src/llama.cpp/ggml/src/ggml-rpc/CMakeLists.txt +9 -0
  204. package/src/llama.cpp/ggml/src/{ggml-rpc.cpp → ggml-rpc/ggml-rpc.cpp} +478 -300
  205. package/src/llama.cpp/ggml/src/ggml-sycl/CMakeLists.txt +84 -0
  206. package/src/llama.cpp/ggml/src/ggml-sycl/backend.hpp +3 -0
  207. package/src/llama.cpp/ggml/src/ggml-sycl/common.cpp +36 -5
  208. package/src/llama.cpp/ggml/src/ggml-sycl/common.hpp +259 -0
  209. package/src/llama.cpp/ggml/src/ggml-sycl/concat.cpp +3 -2
  210. package/src/llama.cpp/ggml/src/ggml-sycl/convert.cpp +1 -1
  211. package/src/llama.cpp/ggml/src/ggml-sycl/dmmv.cpp +5 -5
  212. package/src/llama.cpp/ggml/src/ggml-sycl/dpct/helper.hpp +34 -35
  213. package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.cpp +1030 -0
  214. package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.hpp +76 -0
  215. package/src/llama.cpp/ggml/src/ggml-sycl/gemm.hpp +4 -4
  216. package/src/llama.cpp/ggml/src/{ggml-sycl.cpp → ggml-sycl/ggml-sycl.cpp} +3638 -4151
  217. package/src/llama.cpp/ggml/src/ggml-sycl/im2col.cpp +3 -2
  218. package/src/llama.cpp/ggml/src/ggml-sycl/mmq.cpp +6 -6
  219. package/src/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +75 -87
  220. package/src/llama.cpp/ggml/src/ggml-sycl/norm.cpp +7 -6
  221. package/src/llama.cpp/ggml/src/ggml-sycl/outprod.cpp +56 -0
  222. package/src/llama.cpp/ggml/src/ggml-sycl/outprod.hpp +11 -0
  223. package/src/llama.cpp/ggml/src/ggml-sycl/presets.hpp +6 -0
  224. package/src/llama.cpp/ggml/src/ggml-sycl/rope.cpp +4 -3
  225. package/src/llama.cpp/ggml/src/ggml-sycl/softmax.cpp +7 -7
  226. package/src/llama.cpp/ggml/src/ggml-sycl/tsembd.cpp +1 -0
  227. package/src/llama.cpp/ggml/src/ggml-sycl/vecdotq.hpp +4 -4
  228. package/src/llama.cpp/ggml/src/ggml-sycl/wkv6.cpp +141 -0
  229. package/src/llama.cpp/ggml/src/ggml-sycl/wkv6.hpp +10 -0
  230. package/src/llama.cpp/ggml/src/ggml-threading.cpp +12 -0
  231. package/src/llama.cpp/ggml/src/ggml-threading.h +14 -0
  232. package/src/llama.cpp/ggml/src/ggml-vulkan/CMakeLists.txt +92 -0
  233. package/src/llama.cpp/ggml/src/{ggml-vulkan.cpp → ggml-vulkan/ggml-vulkan.cpp} +2138 -887
  234. package/src/llama.cpp/ggml/src/{vulkan-shaders → ggml-vulkan/vulkan-shaders}/CMakeLists.txt +3 -1
  235. package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +593 -0
  236. package/src/llama.cpp/ggml/src/ggml.c +4427 -20125
  237. package/src/llama.cpp/include/llama-cpp.h +25 -0
  238. package/src/llama.cpp/include/llama.h +93 -52
  239. package/src/llama.cpp/models/ggml-vocab-roberta-bpe.gguf.inp +112 -0
  240. package/src/llama.cpp/models/ggml-vocab-roberta-bpe.gguf.out +46 -0
  241. package/src/llama.cpp/pocs/CMakeLists.txt +3 -1
  242. package/src/llama.cpp/pocs/vdot/CMakeLists.txt +2 -2
  243. package/src/llama.cpp/pocs/vdot/q8dot.cpp +4 -3
  244. package/src/llama.cpp/pocs/vdot/vdot.cpp +8 -7
  245. package/src/llama.cpp/src/CMakeLists.txt +4 -8
  246. package/src/llama.cpp/src/llama-grammar.cpp +15 -15
  247. package/src/llama.cpp/src/llama-grammar.h +2 -5
  248. package/src/llama.cpp/src/llama-sampling.cpp +779 -194
  249. package/src/llama.cpp/src/llama-sampling.h +21 -2
  250. package/src/llama.cpp/src/llama-vocab.cpp +55 -10
  251. package/src/llama.cpp/src/llama-vocab.h +35 -11
  252. package/src/llama.cpp/src/llama.cpp +4317 -2979
  253. package/src/llama.cpp/src/unicode-data.cpp +2 -2
  254. package/src/llama.cpp/src/unicode.cpp +62 -51
  255. package/src/llama.cpp/src/unicode.h +9 -10
  256. package/src/llama.cpp/tests/CMakeLists.txt +48 -38
  257. package/src/llama.cpp/tests/test-arg-parser.cpp +15 -15
  258. package/src/llama.cpp/tests/test-backend-ops.cpp +324 -80
  259. package/src/llama.cpp/tests/test-barrier.cpp +1 -0
  260. package/src/llama.cpp/tests/test-chat-template.cpp +59 -9
  261. package/src/llama.cpp/tests/test-gguf.cpp +1303 -0
  262. package/src/llama.cpp/tests/test-grammar-integration.cpp +3 -6
  263. package/src/llama.cpp/tests/test-json-schema-to-grammar.cpp +17 -4
  264. package/src/llama.cpp/tests/test-llama-grammar.cpp +2 -4
  265. package/src/llama.cpp/tests/test-log.cpp +2 -2
  266. package/src/llama.cpp/tests/test-opt.cpp +853 -142
  267. package/src/llama.cpp/tests/test-quantize-fns.cpp +24 -21
  268. package/src/llama.cpp/tests/test-quantize-perf.cpp +16 -14
  269. package/src/llama.cpp/tests/test-rope.cpp +62 -20
  270. package/src/llama.cpp/tests/test-sampling.cpp +163 -138
  271. package/src/llama.cpp/tests/test-tokenizer-0.cpp +7 -7
  272. package/src/llama.cpp/tests/test-tokenizer-1-bpe.cpp +5 -5
  273. package/src/llama.cpp/tests/test-tokenizer-1-spm.cpp +5 -5
  274. package/src/llama.cpp/.github/workflows/nix-ci-aarch64.yml +0 -72
  275. package/src/llama.cpp/.github/workflows/nix-ci.yml +0 -79
  276. package/src/llama.cpp/.github/workflows/nix-flake-update.yml +0 -22
  277. package/src/llama.cpp/.github/workflows/nix-publish-flake.yml +0 -36
  278. package/src/llama.cpp/common/train.cpp +0 -1515
  279. package/src/llama.cpp/common/train.h +0 -233
  280. package/src/llama.cpp/examples/baby-llama/CMakeLists.txt +0 -5
  281. package/src/llama.cpp/examples/baby-llama/baby-llama.cpp +0 -1639
  282. package/src/llama.cpp/ggml/src/ggml-aarch64.h +0 -39
  283. package/src/llama.cpp/ggml/src/vulkan-shaders/vulkan-shaders-gen.cpp +0 -600
  284. package/src/llama.cpp/tests/test-grad0.cpp +0 -1683
  285. /package/src/llama.cpp/ggml/{cmake → src/ggml-cpu/cmake}/FindSIMD.cmake +0 -0
  286. /package/src/llama.cpp/ggml/src/{llamafile → ggml-cpu/llamafile}/sgemm.h +0 -0
@@ -4,8 +4,6 @@
4
4
 
5
5
  #include "llama-grammar.h"
6
6
 
7
- #include <unordered_map>
8
-
9
7
  struct llama_vocab;
10
8
  struct llama_grammar;
11
9
 
@@ -27,3 +25,24 @@ struct llama_sampler * llama_sampler_init_grammar_impl(
27
25
  const struct llama_vocab & vocab,
28
26
  const char * grammar_str,
29
27
  const char * grammar_root);
28
+
29
+ struct llama_sampler * llama_sampler_init_infill_impl(
30
+ const struct llama_vocab & vocab);
31
+
32
+ struct llama_sampler * llama_sampler_init_dry_impl(
33
+ const struct llama_vocab & vocab,
34
+ int32_t context_size,
35
+ float dry_multiplier,
36
+ float dry_base,
37
+ int32_t dry_allowed_length,
38
+ int32_t dry_penalty_last_n,
39
+ const char ** seq_breakers,
40
+ size_t num_breakers);
41
+
42
+ struct llama_sampler * llama_sampler_init_dry_testing(
43
+ int32_t context_size,
44
+ float dry_multiplier,
45
+ float dry_base,
46
+ int32_t dry_allowed_length,
47
+ int32_t dry_penalty_last_n,
48
+ const std::vector<std::vector<llama_token>>& seq_breakers);
@@ -221,7 +221,7 @@ struct llm_tokenizer_spm_session {
221
221
  }
222
222
 
223
223
  // seed the work queue with all possible 2-character tokens.
224
- for (size_t i = 1; i < symbols.size(); ++i) {
224
+ for (int i = 1; i < (int) symbols.size(); ++i) {
225
225
  try_add_bigram(i - 1, i);
226
226
  }
227
227
 
@@ -418,6 +418,7 @@ struct llm_tokenizer_bpe : llm_tokenizer {
418
418
  case LLAMA_VOCAB_PRE_TYPE_SMOLLM:
419
419
  case LLAMA_VOCAB_PRE_TYPE_CODESHELL:
420
420
  case LLAMA_VOCAB_PRE_TYPE_EXAONE:
421
+ case LLAMA_VOCAB_PRE_TYPE_MINERVA:
421
422
  regex_exprs = {
422
423
  "\\p{N}",
423
424
  "'s|'t|'re|'ve|'m|'ll|'d| ?\\p{L}+| ?\\p{N}+| ?[^\\s\\p{L}\\p{N}]+|\\s+(?!\\S)",
@@ -563,7 +564,7 @@ struct llm_tokenizer_bpe_session {
563
564
  index++;
564
565
  symbols.emplace_back(sym);
565
566
  }
566
- for (size_t i = 1; i < symbols.size(); ++i) {
567
+ for (int i = 1; i < (int) symbols.size(); ++i) {
567
568
  add_new_bigram(i - 1, i);
568
569
  }
569
570
 
@@ -737,7 +738,7 @@ struct llm_tokenizer_wpm_session {
737
738
  std::vector<std::string> words(1, "");
738
739
 
739
740
  for (const uint32_t cpt : cpts_nfd) {
740
- const auto flags = unicode_cpt_flags(cpt);
741
+ const auto flags = unicode_cpt_flags_from_cpt(cpt);
741
742
 
742
743
  if (flags.is_whitespace) {
743
744
  if (words.back().size()) { // finish previous word if any
@@ -1663,6 +1664,14 @@ llama_token llama_token_eos_impl(const struct llama_vocab & vocab) {
1663
1664
  return vocab.special_eos_id;
1664
1665
  }
1665
1666
 
1667
+ llama_token llama_token_eot_impl(const struct llama_vocab & vocab) {
1668
+ return vocab.special_eot_id;
1669
+ }
1670
+
1671
+ llama_token llama_token_eom_impl(const struct llama_vocab & vocab) {
1672
+ return vocab.special_eom_id;
1673
+ }
1674
+
1666
1675
  llama_token llama_token_cls_impl(const struct llama_vocab & vocab) {
1667
1676
  return vocab.special_cls_id;
1668
1677
  }
@@ -1688,23 +1697,39 @@ bool llama_add_eos_token_impl(const struct llama_vocab & vocab) {
1688
1697
  }
1689
1698
 
1690
1699
  llama_token llama_token_prefix_impl(const struct llama_vocab & vocab) {
1691
- return vocab.special_prefix_id;
1700
+ return vocab.special_fim_pre_id;
1692
1701
  }
1693
1702
 
1694
1703
  llama_token llama_token_middle_impl(const struct llama_vocab & vocab) {
1695
- return vocab.special_middle_id;
1704
+ return vocab.special_fim_mid_id;
1696
1705
  }
1697
1706
 
1698
1707
  llama_token llama_token_suffix_impl(const struct llama_vocab & vocab) {
1699
- return vocab.special_suffix_id;
1708
+ return vocab.special_fim_suf_id;
1700
1709
  }
1701
1710
 
1702
- llama_token llama_token_eot_impl(const struct llama_vocab & vocab) {
1703
- return vocab.special_eot_id;
1711
+ llama_token llama_token_fim_pre_impl(const struct llama_vocab & vocab) {
1712
+ return vocab.special_fim_pre_id;
1704
1713
  }
1705
1714
 
1706
- llama_token llama_token_eom_impl(const struct llama_vocab & vocab) {
1707
- return vocab.special_eom_id;
1715
+ llama_token llama_token_fim_suf_impl(const struct llama_vocab & vocab) {
1716
+ return vocab.special_fim_suf_id;
1717
+ }
1718
+
1719
+ llama_token llama_token_fim_mid_impl(const struct llama_vocab & vocab) {
1720
+ return vocab.special_fim_mid_id;
1721
+ }
1722
+
1723
+ llama_token llama_token_fim_pad_impl(const struct llama_vocab & vocab) {
1724
+ return vocab.special_fim_pad_id;
1725
+ }
1726
+
1727
+ llama_token llama_token_fim_rep_impl(const struct llama_vocab & vocab) {
1728
+ return vocab.special_fim_rep_id;
1729
+ }
1730
+
1731
+ llama_token llama_token_fim_sep_impl(const struct llama_vocab & vocab) {
1732
+ return vocab.special_fim_sep_id;
1708
1733
  }
1709
1734
 
1710
1735
  int32_t llama_tokenize_impl(
@@ -1842,6 +1867,10 @@ int32_t llama_detokenize_impl(
1842
1867
  int32_t text_len_max,
1843
1868
  bool remove_special,
1844
1869
  bool unparse_special) {
1870
+ if (vocab.type == LLAMA_VOCAB_TYPE_NONE) {
1871
+ return 0;
1872
+ }
1873
+
1845
1874
  GGML_ASSERT(vocab.tokenizer && "Tokenizer not initialized. Call llama_vocab::init_tokenizer() first.");
1846
1875
 
1847
1876
  int32_t avail = text_len_max;
@@ -1942,3 +1971,19 @@ int32_t llama_detokenize_impl(
1942
1971
 
1943
1972
  return total <= text_len_max ? total : -total;
1944
1973
  }
1974
+
1975
+ std::string llama_detokenize(const struct llama_vocab & vocab, const std::vector<llama_token> & tokens, bool special) {
1976
+ std::string text;
1977
+ text.resize(std::max(text.capacity(), tokens.size()));
1978
+ int32_t n_chars = llama_detokenize_impl(vocab, tokens.data(), (int32_t)tokens.size(), &text[0], (int32_t)text.size(), false, special);
1979
+ if (n_chars < 0) {
1980
+ text.resize(-n_chars);
1981
+ n_chars = llama_detokenize_impl(vocab, tokens.data(), (int32_t)tokens.size(), &text[0], (int32_t)text.size(), false, special);
1982
+ GGML_ASSERT(n_chars <= (int32_t)text.size()); // whitespace trimming is performed after per-token detokenization
1983
+ }
1984
+
1985
+ text.resize(n_chars);
1986
+
1987
+ // NOTE: the original tokenizer decodes bytes after collecting the pieces.
1988
+ return text;
1989
+ }
@@ -37,20 +37,26 @@ struct llama_vocab {
37
37
  std::map<std::pair<std::string, std::string>, int> bpe_ranks;
38
38
 
39
39
  // default LLaMA special tokens
40
+ // TODO: should we set all of these to LLAMA_TOKEN_NULL?
40
41
  id special_bos_id = 1;
41
42
  id special_eos_id = 2;
43
+ id special_eot_id = LLAMA_TOKEN_NULL;
44
+ id special_eom_id = LLAMA_TOKEN_NULL;
42
45
  id special_unk_id = 0;
43
46
  id special_sep_id = LLAMA_TOKEN_NULL;
44
47
  id special_pad_id = LLAMA_TOKEN_NULL;
45
48
  id special_cls_id = LLAMA_TOKEN_NULL;
46
49
  id special_mask_id = LLAMA_TOKEN_NULL;
47
50
 
48
- id linefeed_id = 13;
49
- id special_prefix_id = LLAMA_TOKEN_NULL;
50
- id special_suffix_id = LLAMA_TOKEN_NULL;
51
- id special_middle_id = LLAMA_TOKEN_NULL;
52
- id special_eot_id = LLAMA_TOKEN_NULL; // TODO: move above after "eos_id", and here add "file separator" token
53
- id special_eom_id = LLAMA_TOKEN_NULL;
51
+ id linefeed_id = 13;
52
+
53
+ // fim tokens
54
+ id special_fim_pre_id = LLAMA_TOKEN_NULL;
55
+ id special_fim_suf_id = LLAMA_TOKEN_NULL;
56
+ id special_fim_mid_id = LLAMA_TOKEN_NULL;
57
+ id special_fim_pad_id = LLAMA_TOKEN_NULL;
58
+ id special_fim_rep_id = LLAMA_TOKEN_NULL; // repo
59
+ id special_fim_sep_id = LLAMA_TOKEN_NULL; // file separator
54
60
 
55
61
  // set of all tokens that cause "end of generation"
56
62
  std::set<id> special_eog_ids;
@@ -104,19 +110,26 @@ bool llama_token_is_control_impl(const struct llama_vocab & vocab, llama_token t
104
110
 
105
111
  llama_token llama_token_bos_impl(const struct llama_vocab & vocab);
106
112
  llama_token llama_token_eos_impl(const struct llama_vocab & vocab);
113
+ llama_token llama_token_eot_impl(const struct llama_vocab & vocab);
114
+ llama_token llama_token_eom_impl(const struct llama_vocab & vocab);
107
115
  llama_token llama_token_cls_impl(const struct llama_vocab & vocab);
108
116
  llama_token llama_token_sep_impl(const struct llama_vocab & vocab);
109
117
  llama_token llama_token_nl_impl (const struct llama_vocab & vocab);
110
118
  llama_token llama_token_pad_impl(const struct llama_vocab & vocab);
111
119
 
112
- bool llama_add_bos_token_impl(const struct llama_vocab & vocab);
113
- bool llama_add_eos_token_impl(const struct llama_vocab & vocab);
114
-
115
120
  llama_token llama_token_prefix_impl(const struct llama_vocab & vocab);
116
121
  llama_token llama_token_middle_impl(const struct llama_vocab & vocab);
117
122
  llama_token llama_token_suffix_impl(const struct llama_vocab & vocab);
118
- llama_token llama_token_eot_impl (const struct llama_vocab & vocab);
119
- llama_token llama_token_eom_impl (const struct llama_vocab & vocab);
123
+
124
+ llama_token llama_token_fim_pre_impl(const struct llama_vocab & vocab);
125
+ llama_token llama_token_fim_suf_impl(const struct llama_vocab & vocab);
126
+ llama_token llama_token_fim_mid_impl(const struct llama_vocab & vocab);
127
+ llama_token llama_token_fim_pad_impl(const struct llama_vocab & vocab);
128
+ llama_token llama_token_fim_rep_impl(const struct llama_vocab & vocab);
129
+ llama_token llama_token_fim_sep_impl(const struct llama_vocab & vocab);
130
+
131
+ bool llama_add_bos_token_impl(const struct llama_vocab & vocab);
132
+ bool llama_add_eos_token_impl(const struct llama_vocab & vocab);
120
133
 
121
134
  int32_t llama_tokenize_impl(
122
135
  const struct llama_vocab & vocab,
@@ -136,6 +149,12 @@ int32_t llama_token_to_piece_impl(
136
149
  int32_t lstrip,
137
150
  bool special);
138
151
 
152
+ // check if token0 is contained as a prefix in token1
153
+ bool llama_token_is_prefix_impl(
154
+ const struct llama_vocab & vocab,
155
+ llama_token token0,
156
+ llama_token token1);
157
+
139
158
  int32_t llama_detokenize_impl(
140
159
  const struct llama_vocab & vocab,
141
160
  const llama_token * tokens,
@@ -144,3 +163,8 @@ int32_t llama_detokenize_impl(
144
163
  int32_t text_len_max,
145
164
  bool remove_special,
146
165
  bool unparse_special);
166
+
167
+ std::string llama_detokenize(
168
+ const struct llama_vocab & vocab,
169
+ const std::vector<llama_token> & tokens,
170
+ bool special);