@fugood/llama.node 0.3.2 → 0.3.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (286) hide show
  1. package/CMakeLists.txt +7 -0
  2. package/bin/darwin/arm64/llama-node.node +0 -0
  3. package/bin/darwin/x64/llama-node.node +0 -0
  4. package/bin/linux/arm64/llama-node.node +0 -0
  5. package/bin/linux/x64/llama-node.node +0 -0
  6. package/bin/linux-vulkan/arm64/llama-node.node +0 -0
  7. package/bin/linux-vulkan/x64/llama-node.node +0 -0
  8. package/bin/win32/arm64/llama-node.node +0 -0
  9. package/bin/win32/arm64/node.lib +0 -0
  10. package/bin/win32/x64/llama-node.node +0 -0
  11. package/bin/win32/x64/node.lib +0 -0
  12. package/bin/win32-vulkan/arm64/llama-node.node +0 -0
  13. package/bin/win32-vulkan/arm64/node.lib +0 -0
  14. package/bin/win32-vulkan/x64/llama-node.node +0 -0
  15. package/bin/win32-vulkan/x64/node.lib +0 -0
  16. package/lib/binding.ts +18 -1
  17. package/package.json +1 -1
  18. package/src/DetokenizeWorker.cpp +1 -1
  19. package/src/EmbeddingWorker.cpp +17 -7
  20. package/src/EmbeddingWorker.h +2 -1
  21. package/src/LlamaCompletionWorker.cpp +8 -8
  22. package/src/LlamaCompletionWorker.h +2 -2
  23. package/src/LlamaContext.cpp +89 -27
  24. package/src/LlamaContext.h +2 -0
  25. package/src/TokenizeWorker.cpp +1 -1
  26. package/src/common.hpp +4 -4
  27. package/src/llama.cpp/.github/workflows/build.yml +240 -168
  28. package/src/llama.cpp/.github/workflows/docker.yml +8 -8
  29. package/src/llama.cpp/.github/workflows/python-lint.yml +8 -1
  30. package/src/llama.cpp/.github/workflows/server.yml +21 -14
  31. package/src/llama.cpp/CMakeLists.txt +14 -6
  32. package/src/llama.cpp/Sources/llama/llama.h +4 -0
  33. package/src/llama.cpp/cmake/arm64-apple-clang.cmake +16 -0
  34. package/src/llama.cpp/cmake/common.cmake +33 -0
  35. package/src/llama.cpp/cmake/x64-windows-llvm.cmake +11 -0
  36. package/src/llama.cpp/common/CMakeLists.txt +6 -4
  37. package/src/llama.cpp/common/arg.cpp +986 -770
  38. package/src/llama.cpp/common/arg.h +22 -22
  39. package/src/llama.cpp/common/common.cpp +212 -351
  40. package/src/llama.cpp/common/common.h +204 -117
  41. package/src/llama.cpp/common/json-schema-to-grammar.cpp +1 -1
  42. package/src/llama.cpp/common/log.cpp +50 -50
  43. package/src/llama.cpp/common/log.h +18 -18
  44. package/src/llama.cpp/common/ngram-cache.cpp +36 -36
  45. package/src/llama.cpp/common/ngram-cache.h +19 -19
  46. package/src/llama.cpp/common/sampling.cpp +163 -121
  47. package/src/llama.cpp/common/sampling.h +41 -20
  48. package/src/llama.cpp/common/speculative.cpp +274 -0
  49. package/src/llama.cpp/common/speculative.h +28 -0
  50. package/src/llama.cpp/docs/build.md +134 -161
  51. package/src/llama.cpp/examples/CMakeLists.txt +33 -14
  52. package/src/llama.cpp/examples/batched/CMakeLists.txt +1 -1
  53. package/src/llama.cpp/examples/batched/batched.cpp +19 -18
  54. package/src/llama.cpp/examples/batched-bench/CMakeLists.txt +1 -1
  55. package/src/llama.cpp/examples/batched-bench/batched-bench.cpp +10 -11
  56. package/src/llama.cpp/examples/convert-llama2c-to-ggml/CMakeLists.txt +1 -1
  57. package/src/llama.cpp/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp +1 -1
  58. package/src/llama.cpp/examples/cvector-generator/CMakeLists.txt +1 -1
  59. package/src/llama.cpp/examples/cvector-generator/cvector-generator.cpp +9 -9
  60. package/src/llama.cpp/examples/deprecation-warning/deprecation-warning.cpp +1 -1
  61. package/src/llama.cpp/examples/embedding/CMakeLists.txt +1 -1
  62. package/src/llama.cpp/examples/embedding/embedding.cpp +12 -12
  63. package/src/llama.cpp/examples/eval-callback/CMakeLists.txt +3 -2
  64. package/src/llama.cpp/examples/eval-callback/eval-callback.cpp +8 -8
  65. package/src/llama.cpp/examples/export-lora/CMakeLists.txt +1 -1
  66. package/src/llama.cpp/examples/export-lora/export-lora.cpp +5 -5
  67. package/src/llama.cpp/examples/gbnf-validator/CMakeLists.txt +1 -1
  68. package/src/llama.cpp/examples/gbnf-validator/gbnf-validator.cpp +4 -7
  69. package/src/llama.cpp/examples/gen-docs/CMakeLists.txt +1 -1
  70. package/src/llama.cpp/examples/gen-docs/gen-docs.cpp +7 -7
  71. package/src/llama.cpp/examples/gguf/CMakeLists.txt +1 -1
  72. package/src/llama.cpp/examples/gguf-hash/CMakeLists.txt +8 -1
  73. package/src/llama.cpp/examples/gguf-split/CMakeLists.txt +1 -1
  74. package/src/llama.cpp/examples/gguf-split/gguf-split.cpp +2 -2
  75. package/src/llama.cpp/examples/gritlm/CMakeLists.txt +1 -1
  76. package/src/llama.cpp/examples/gritlm/gritlm.cpp +18 -18
  77. package/src/llama.cpp/examples/imatrix/CMakeLists.txt +1 -1
  78. package/src/llama.cpp/examples/imatrix/imatrix.cpp +31 -13
  79. package/src/llama.cpp/examples/infill/CMakeLists.txt +1 -1
  80. package/src/llama.cpp/examples/infill/infill.cpp +41 -87
  81. package/src/llama.cpp/examples/llama-bench/CMakeLists.txt +1 -1
  82. package/src/llama.cpp/examples/llama-bench/llama-bench.cpp +439 -459
  83. package/src/llama.cpp/examples/llama.android/llama/build.gradle.kts +2 -0
  84. package/src/llama.cpp/examples/llama.android/llama/src/main/cpp/llama-android.cpp +11 -14
  85. package/src/llama.cpp/examples/llava/CMakeLists.txt +10 -3
  86. package/src/llama.cpp/examples/llava/clip.cpp +263 -66
  87. package/src/llama.cpp/examples/llava/clip.h +8 -2
  88. package/src/llama.cpp/examples/llava/llava-cli.cpp +23 -23
  89. package/src/llama.cpp/examples/llava/llava.cpp +83 -22
  90. package/src/llama.cpp/examples/llava/minicpmv-cli.cpp +21 -21
  91. package/src/llama.cpp/examples/llava/qwen2vl-cli.cpp +581 -0
  92. package/src/llama.cpp/examples/lookahead/CMakeLists.txt +1 -1
  93. package/src/llama.cpp/examples/lookahead/lookahead.cpp +26 -26
  94. package/src/llama.cpp/examples/lookup/CMakeLists.txt +4 -4
  95. package/src/llama.cpp/examples/lookup/lookup-create.cpp +7 -7
  96. package/src/llama.cpp/examples/lookup/lookup-merge.cpp +4 -4
  97. package/src/llama.cpp/examples/lookup/lookup-stats.cpp +16 -15
  98. package/src/llama.cpp/examples/lookup/lookup.cpp +30 -30
  99. package/src/llama.cpp/examples/main/CMakeLists.txt +1 -1
  100. package/src/llama.cpp/examples/main/main.cpp +73 -114
  101. package/src/llama.cpp/examples/main-cmake-pkg/CMakeLists.txt +1 -1
  102. package/src/llama.cpp/examples/parallel/CMakeLists.txt +1 -1
  103. package/src/llama.cpp/examples/parallel/parallel.cpp +18 -19
  104. package/src/llama.cpp/examples/passkey/CMakeLists.txt +1 -1
  105. package/src/llama.cpp/examples/passkey/passkey.cpp +14 -14
  106. package/src/llama.cpp/examples/perplexity/CMakeLists.txt +1 -1
  107. package/src/llama.cpp/examples/perplexity/perplexity.cpp +99 -120
  108. package/src/llama.cpp/examples/quantize/CMakeLists.txt +1 -1
  109. package/src/llama.cpp/examples/quantize/quantize.cpp +0 -3
  110. package/src/llama.cpp/examples/quantize-stats/CMakeLists.txt +1 -1
  111. package/src/llama.cpp/examples/quantize-stats/quantize-stats.cpp +10 -9
  112. package/src/llama.cpp/examples/retrieval/CMakeLists.txt +1 -1
  113. package/src/llama.cpp/examples/retrieval/retrieval.cpp +16 -16
  114. package/src/llama.cpp/examples/rpc/rpc-server.cpp +3 -1
  115. package/src/llama.cpp/examples/run/CMakeLists.txt +5 -0
  116. package/src/llama.cpp/examples/run/run.cpp +911 -0
  117. package/src/llama.cpp/examples/save-load-state/CMakeLists.txt +1 -1
  118. package/src/llama.cpp/examples/save-load-state/save-load-state.cpp +38 -21
  119. package/src/llama.cpp/examples/server/CMakeLists.txt +3 -16
  120. package/src/llama.cpp/examples/server/server.cpp +2073 -1339
  121. package/src/llama.cpp/examples/server/tests/requirements.txt +2 -2
  122. package/src/llama.cpp/examples/server/utils.hpp +354 -277
  123. package/src/llama.cpp/examples/simple/CMakeLists.txt +2 -2
  124. package/src/llama.cpp/examples/simple/simple.cpp +130 -94
  125. package/src/llama.cpp/examples/simple-chat/CMakeLists.txt +5 -0
  126. package/src/llama.cpp/examples/simple-chat/simple-chat.cpp +200 -0
  127. package/src/llama.cpp/examples/speculative/CMakeLists.txt +1 -1
  128. package/src/llama.cpp/examples/speculative/speculative.cpp +68 -64
  129. package/src/llama.cpp/examples/speculative-simple/CMakeLists.txt +5 -0
  130. package/src/llama.cpp/examples/speculative-simple/speculative-simple.cpp +265 -0
  131. package/src/llama.cpp/examples/tokenize/CMakeLists.txt +1 -1
  132. package/src/llama.cpp/examples/tokenize/tokenize.cpp +3 -3
  133. package/src/llama.cpp/examples/tts/CMakeLists.txt +5 -0
  134. package/src/llama.cpp/examples/tts/tts.cpp +932 -0
  135. package/src/llama.cpp/ggml/CMakeLists.txt +54 -36
  136. package/src/llama.cpp/ggml/include/ggml-backend.h +63 -34
  137. package/src/llama.cpp/ggml/include/ggml-blas.h +5 -3
  138. package/src/llama.cpp/ggml/include/ggml-cann.h +9 -7
  139. package/src/llama.cpp/ggml/include/ggml-cpp.h +38 -0
  140. package/src/llama.cpp/ggml/include/ggml-cpu.h +135 -0
  141. package/src/llama.cpp/ggml/include/ggml-cuda.h +12 -12
  142. package/src/llama.cpp/ggml/include/ggml-kompute.h +7 -3
  143. package/src/llama.cpp/ggml/include/ggml-metal.h +11 -7
  144. package/src/llama.cpp/ggml/include/ggml-opencl.h +26 -0
  145. package/src/llama.cpp/ggml/include/ggml-opt.h +216 -0
  146. package/src/llama.cpp/ggml/include/ggml-rpc.h +9 -5
  147. package/src/llama.cpp/ggml/include/ggml-sycl.h +18 -11
  148. package/src/llama.cpp/ggml/include/ggml-vulkan.h +10 -8
  149. package/src/llama.cpp/ggml/include/ggml.h +159 -417
  150. package/src/llama.cpp/ggml/src/CMakeLists.txt +121 -1155
  151. package/src/llama.cpp/ggml/src/ggml-alloc.c +23 -28
  152. package/src/llama.cpp/ggml/src/ggml-backend-impl.h +57 -36
  153. package/src/llama.cpp/ggml/src/ggml-backend-reg.cpp +552 -0
  154. package/src/llama.cpp/ggml/src/ggml-backend.cpp +306 -867
  155. package/src/llama.cpp/ggml/src/ggml-blas/CMakeLists.txt +87 -0
  156. package/src/llama.cpp/ggml/src/{ggml-blas.cpp → ggml-blas/ggml-blas.cpp} +216 -65
  157. package/src/llama.cpp/ggml/src/ggml-cann/CMakeLists.txt +76 -0
  158. package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.cpp +456 -111
  159. package/src/llama.cpp/ggml/src/ggml-cann/common.h +6 -3
  160. package/src/llama.cpp/ggml/src/{ggml-cann.cpp → ggml-cann/ggml-cann.cpp} +343 -177
  161. package/src/llama.cpp/ggml/src/ggml-cann/kernels/CMakeLists.txt +2 -5
  162. package/src/llama.cpp/ggml/src/ggml-cann/kernels/dup.cpp +22 -9
  163. package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_f16.cpp +24 -13
  164. package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_f32.cpp +23 -13
  165. package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_q4_0.cpp +11 -0
  166. package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_f16_q8_0.cpp +10 -0
  167. package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_f32_q8_0.cpp +10 -0
  168. package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_float_to_q4_0.cpp +17 -0
  169. package/src/llama.cpp/ggml/src/ggml-common.h +42 -42
  170. package/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +336 -0
  171. package/src/llama.cpp/ggml/src/ggml-cpu/amx/amx.cpp +220 -0
  172. package/src/llama.cpp/ggml/src/ggml-cpu/amx/amx.h +8 -0
  173. package/src/llama.cpp/ggml/src/ggml-cpu/amx/common.h +91 -0
  174. package/src/llama.cpp/ggml/src/ggml-cpu/amx/mmq.cpp +2511 -0
  175. package/src/llama.cpp/ggml/src/ggml-cpu/amx/mmq.h +10 -0
  176. package/src/llama.cpp/ggml/src/ggml-cpu/cpu-feats-x86.cpp +323 -0
  177. package/src/llama.cpp/ggml/src/{ggml-aarch64.c → ggml-cpu/ggml-cpu-aarch64.cpp} +1299 -246
  178. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.h +8 -0
  179. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-hbm.cpp +55 -0
  180. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-hbm.h +8 -0
  181. package/src/llama.cpp/ggml/src/{ggml-cpu-impl.h → ggml-cpu/ggml-cpu-impl.h} +14 -242
  182. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.c +10835 -0
  183. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.h +63 -0
  184. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-traits.cpp +36 -0
  185. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-traits.h +38 -0
  186. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +14123 -0
  187. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +628 -0
  188. package/src/llama.cpp/ggml/src/{llamafile → ggml-cpu/llamafile}/sgemm.cpp +666 -0
  189. package/src/llama.cpp/ggml/src/ggml-cuda/CMakeLists.txt +152 -0
  190. package/src/llama.cpp/ggml/src/ggml-cuda/vendors/hip.h +8 -0
  191. package/src/llama.cpp/ggml/src/ggml-hip/CMakeLists.txt +104 -0
  192. package/src/llama.cpp/ggml/src/ggml-impl.h +393 -22
  193. package/src/llama.cpp/ggml/src/ggml-kompute/CMakeLists.txt +166 -0
  194. package/src/llama.cpp/ggml/src/{ggml-kompute.cpp → ggml-kompute/ggml-kompute.cpp} +360 -127
  195. package/src/llama.cpp/ggml/src/ggml-metal/CMakeLists.txt +105 -0
  196. package/src/llama.cpp/ggml/src/ggml-metal/ggml-metal-impl.h +288 -0
  197. package/src/llama.cpp/ggml/src/ggml-musa/CMakeLists.txt +107 -0
  198. package/src/llama.cpp/ggml/src/ggml-opencl/CMakeLists.txt +147 -0
  199. package/src/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +4004 -0
  200. package/src/llama.cpp/ggml/src/ggml-opt.cpp +854 -0
  201. package/src/llama.cpp/ggml/src/ggml-quants.c +188 -10702
  202. package/src/llama.cpp/ggml/src/ggml-quants.h +78 -125
  203. package/src/llama.cpp/ggml/src/ggml-rpc/CMakeLists.txt +9 -0
  204. package/src/llama.cpp/ggml/src/{ggml-rpc.cpp → ggml-rpc/ggml-rpc.cpp} +478 -300
  205. package/src/llama.cpp/ggml/src/ggml-sycl/CMakeLists.txt +84 -0
  206. package/src/llama.cpp/ggml/src/ggml-sycl/backend.hpp +3 -0
  207. package/src/llama.cpp/ggml/src/ggml-sycl/common.cpp +36 -5
  208. package/src/llama.cpp/ggml/src/ggml-sycl/common.hpp +259 -0
  209. package/src/llama.cpp/ggml/src/ggml-sycl/concat.cpp +3 -2
  210. package/src/llama.cpp/ggml/src/ggml-sycl/convert.cpp +1 -1
  211. package/src/llama.cpp/ggml/src/ggml-sycl/dmmv.cpp +5 -5
  212. package/src/llama.cpp/ggml/src/ggml-sycl/dpct/helper.hpp +34 -35
  213. package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.cpp +1030 -0
  214. package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.hpp +76 -0
  215. package/src/llama.cpp/ggml/src/ggml-sycl/gemm.hpp +4 -4
  216. package/src/llama.cpp/ggml/src/{ggml-sycl.cpp → ggml-sycl/ggml-sycl.cpp} +3638 -4151
  217. package/src/llama.cpp/ggml/src/ggml-sycl/im2col.cpp +3 -2
  218. package/src/llama.cpp/ggml/src/ggml-sycl/mmq.cpp +6 -6
  219. package/src/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +75 -87
  220. package/src/llama.cpp/ggml/src/ggml-sycl/norm.cpp +7 -6
  221. package/src/llama.cpp/ggml/src/ggml-sycl/outprod.cpp +56 -0
  222. package/src/llama.cpp/ggml/src/ggml-sycl/outprod.hpp +11 -0
  223. package/src/llama.cpp/ggml/src/ggml-sycl/presets.hpp +6 -0
  224. package/src/llama.cpp/ggml/src/ggml-sycl/rope.cpp +4 -3
  225. package/src/llama.cpp/ggml/src/ggml-sycl/softmax.cpp +7 -7
  226. package/src/llama.cpp/ggml/src/ggml-sycl/tsembd.cpp +1 -0
  227. package/src/llama.cpp/ggml/src/ggml-sycl/vecdotq.hpp +4 -4
  228. package/src/llama.cpp/ggml/src/ggml-sycl/wkv6.cpp +141 -0
  229. package/src/llama.cpp/ggml/src/ggml-sycl/wkv6.hpp +10 -0
  230. package/src/llama.cpp/ggml/src/ggml-threading.cpp +12 -0
  231. package/src/llama.cpp/ggml/src/ggml-threading.h +14 -0
  232. package/src/llama.cpp/ggml/src/ggml-vulkan/CMakeLists.txt +92 -0
  233. package/src/llama.cpp/ggml/src/{ggml-vulkan.cpp → ggml-vulkan/ggml-vulkan.cpp} +2138 -887
  234. package/src/llama.cpp/ggml/src/{vulkan-shaders → ggml-vulkan/vulkan-shaders}/CMakeLists.txt +3 -1
  235. package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +593 -0
  236. package/src/llama.cpp/ggml/src/ggml.c +4427 -20125
  237. package/src/llama.cpp/include/llama-cpp.h +25 -0
  238. package/src/llama.cpp/include/llama.h +93 -52
  239. package/src/llama.cpp/models/ggml-vocab-roberta-bpe.gguf.inp +112 -0
  240. package/src/llama.cpp/models/ggml-vocab-roberta-bpe.gguf.out +46 -0
  241. package/src/llama.cpp/pocs/CMakeLists.txt +3 -1
  242. package/src/llama.cpp/pocs/vdot/CMakeLists.txt +2 -2
  243. package/src/llama.cpp/pocs/vdot/q8dot.cpp +4 -3
  244. package/src/llama.cpp/pocs/vdot/vdot.cpp +8 -7
  245. package/src/llama.cpp/src/CMakeLists.txt +4 -8
  246. package/src/llama.cpp/src/llama-grammar.cpp +15 -15
  247. package/src/llama.cpp/src/llama-grammar.h +2 -5
  248. package/src/llama.cpp/src/llama-sampling.cpp +779 -194
  249. package/src/llama.cpp/src/llama-sampling.h +21 -2
  250. package/src/llama.cpp/src/llama-vocab.cpp +55 -10
  251. package/src/llama.cpp/src/llama-vocab.h +35 -11
  252. package/src/llama.cpp/src/llama.cpp +4317 -2979
  253. package/src/llama.cpp/src/unicode-data.cpp +2 -2
  254. package/src/llama.cpp/src/unicode.cpp +62 -51
  255. package/src/llama.cpp/src/unicode.h +9 -10
  256. package/src/llama.cpp/tests/CMakeLists.txt +48 -38
  257. package/src/llama.cpp/tests/test-arg-parser.cpp +15 -15
  258. package/src/llama.cpp/tests/test-backend-ops.cpp +324 -80
  259. package/src/llama.cpp/tests/test-barrier.cpp +1 -0
  260. package/src/llama.cpp/tests/test-chat-template.cpp +59 -9
  261. package/src/llama.cpp/tests/test-gguf.cpp +1303 -0
  262. package/src/llama.cpp/tests/test-grammar-integration.cpp +3 -6
  263. package/src/llama.cpp/tests/test-json-schema-to-grammar.cpp +17 -4
  264. package/src/llama.cpp/tests/test-llama-grammar.cpp +2 -4
  265. package/src/llama.cpp/tests/test-log.cpp +2 -2
  266. package/src/llama.cpp/tests/test-opt.cpp +853 -142
  267. package/src/llama.cpp/tests/test-quantize-fns.cpp +24 -21
  268. package/src/llama.cpp/tests/test-quantize-perf.cpp +16 -14
  269. package/src/llama.cpp/tests/test-rope.cpp +62 -20
  270. package/src/llama.cpp/tests/test-sampling.cpp +163 -138
  271. package/src/llama.cpp/tests/test-tokenizer-0.cpp +7 -7
  272. package/src/llama.cpp/tests/test-tokenizer-1-bpe.cpp +5 -5
  273. package/src/llama.cpp/tests/test-tokenizer-1-spm.cpp +5 -5
  274. package/src/llama.cpp/.github/workflows/nix-ci-aarch64.yml +0 -72
  275. package/src/llama.cpp/.github/workflows/nix-ci.yml +0 -79
  276. package/src/llama.cpp/.github/workflows/nix-flake-update.yml +0 -22
  277. package/src/llama.cpp/.github/workflows/nix-publish-flake.yml +0 -36
  278. package/src/llama.cpp/common/train.cpp +0 -1515
  279. package/src/llama.cpp/common/train.h +0 -233
  280. package/src/llama.cpp/examples/baby-llama/CMakeLists.txt +0 -5
  281. package/src/llama.cpp/examples/baby-llama/baby-llama.cpp +0 -1639
  282. package/src/llama.cpp/ggml/src/ggml-aarch64.h +0 -39
  283. package/src/llama.cpp/ggml/src/vulkan-shaders/vulkan-shaders-gen.cpp +0 -600
  284. package/src/llama.cpp/tests/test-grad0.cpp +0 -1683
  285. /package/src/llama.cpp/ggml/{cmake → src/ggml-cpu/cmake}/FindSIMD.cmake +0 -0
  286. /package/src/llama.cpp/ggml/src/{llamafile → ggml-cpu/llamafile}/sgemm.h +0 -0
package/CMakeLists.txt CHANGED
@@ -6,6 +6,11 @@ project (llama-node)
6
6
 
7
7
  set(CMAKE_CXX_STANDARD 17)
8
8
 
9
+ execute_process(COMMAND
10
+ git apply ${CMAKE_CURRENT_SOURCE_DIR}/scripts/ggml-cpu-CMakeLists.txt.patch
11
+ WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
12
+ )
13
+
9
14
  if(NOT DEFINED napi_build_version)
10
15
  set(napi_build_version 6)
11
16
  endif()
@@ -62,6 +67,8 @@ if (VULKAN_SDK)
62
67
  find_package(Vulkan REQUIRED)
63
68
  endif()
64
69
 
70
+ set(LLAMA_BUILD_COMMON ON CACHE BOOL "Build common")
71
+
65
72
  set(BUILD_SHARED_LIBS OFF CACHE BOOL "Build shared libraries")
66
73
  add_subdirectory("src/llama.cpp")
67
74
 
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
package/lib/binding.ts CHANGED
@@ -8,6 +8,8 @@ export type ChatMessage = {
8
8
  export type LlamaModelOptions = {
9
9
  model: string
10
10
  embedding?: boolean
11
+ embd_normalize?: number
12
+ pooling_type?: number
11
13
  n_ctx?: number
12
14
  n_batch?: number
13
15
  n_threads?: number
@@ -23,7 +25,21 @@ export type LlamaCompletionOptions = {
23
25
  temperature?: number
24
26
  top_k?: number
25
27
  top_p?: number
26
- repetition_penalty?: number
28
+ min_p?: number
29
+ mirostat?: number
30
+ mirostat_tau?: number
31
+ mirostat_eta?: number
32
+ penalty_last_n?: number
33
+ penalty_repeat?: number
34
+ penalty_freq?: number
35
+ penalty_present?: number
36
+ typ_p?: number
37
+ xtc_threshold?: number
38
+ xtc_probability?: number
39
+ dry_multiplier?: number
40
+ dry_base?: number
41
+ dry_allowed_length?: number
42
+ dry_penalty_last_n?: number
27
43
  n_predict?: number
28
44
  max_length?: number
29
45
  max_tokens?: number
@@ -54,6 +70,7 @@ export type EmbeddingResult = {
54
70
  export interface LlamaContext {
55
71
  new (options: LlamaModelOptions): LlamaContext
56
72
  getSystemInfo(): string
73
+ getModelInfo(): object
57
74
  getFormattedChat(messages: ChatMessage[]): string
58
75
  completion(options: LlamaCompletionOptions, callback?: (token: LlamaCompletionToken) => void): Promise<LlamaCompletionResult>
59
76
  stopCompletion(): void
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@fugood/llama.node",
3
3
  "access": "public",
4
- "version": "0.3.2",
4
+ "version": "0.3.4",
5
5
  "description": "Llama.cpp for Node.js",
6
6
  "main": "lib/index.js",
7
7
  "scripts": {
@@ -8,7 +8,7 @@ DetokenizeWorker::DetokenizeWorker(const Napi::CallbackInfo &info,
8
8
  _tokens(std::move(tokens)) {}
9
9
 
10
10
  void DetokenizeWorker::Execute() {
11
- const auto text = ::llama_detokenize(_sess->context(), _tokens);
11
+ const auto text = ::common_detokenize(_sess->context(), _tokens);
12
12
  _text = std::move(text);
13
13
  }
14
14
 
@@ -2,32 +2,42 @@
2
2
  #include "LlamaContext.h"
3
3
 
4
4
  EmbeddingWorker::EmbeddingWorker(const Napi::CallbackInfo &info,
5
- LlamaSessionPtr &sess, std::string text)
6
- : AsyncWorker(info.Env()), Deferred(info.Env()), _sess(sess), _text(text) {}
5
+ LlamaSessionPtr &sess, std::string text, common_params &params)
6
+ : AsyncWorker(info.Env()), Deferred(info.Env()), _sess(sess), _text(text), _params(params) {}
7
7
 
8
8
  void EmbeddingWorker::Execute() {
9
9
  llama_kv_cache_clear(_sess->context());
10
- auto tokens = ::llama_tokenize(_sess->context(), _text, true);
10
+ auto tokens = ::common_tokenize(_sess->context(), _text, true);
11
11
  // add SEP if not present
12
12
  if (tokens.empty() || tokens.back() != llama_token_sep(_sess->model())) {
13
13
  tokens.push_back(llama_token_sep(_sess->model()));
14
14
  }
15
15
  const int n_embd = llama_n_embd(_sess->model());
16
16
  do {
17
+ auto ctx = _sess->context();
17
18
  int ret =
18
- llama_decode(_sess->context(),
19
- llama_batch_get_one(tokens.data(), tokens.size(), 0, 0));
19
+ llama_decode(ctx,
20
+ llama_batch_get_one(tokens.data(), tokens.size()));
20
21
  if (ret < 0) {
21
22
  SetError("Failed to inference, code: " + std::to_string(ret));
22
23
  break;
23
24
  }
24
- const float *embd = llama_get_embeddings_seq(_sess->context(), 0);
25
+
26
+ float *embd;
27
+ const enum llama_pooling_type pooling_type = llama_pooling_type(ctx);
28
+ if (pooling_type == LLAMA_POOLING_TYPE_NONE) {
29
+ embd = llama_get_embeddings(ctx);
30
+ } else {
31
+ embd = llama_get_embeddings_seq(ctx, 0);
32
+ }
25
33
  if (embd == nullptr) {
26
34
  SetError("Failed to get embeddings");
27
35
  break;
28
36
  }
29
37
  _result.embedding.resize(n_embd);
30
- memcpy(_result.embedding.data(), embd, n_embd * sizeof(float));
38
+ std::vector<float> embedding(embd, embd + n_embd), out(embd, embd + n_embd);
39
+ common_embd_normalize(embedding.data(), out.data(), n_embd, _params.embd_normalize);
40
+ memcpy(_result.embedding.data(), out.data(), n_embd * sizeof(float));
31
41
  } while (false);
32
42
  }
33
43
 
@@ -9,7 +9,7 @@ class EmbeddingWorker : public Napi::AsyncWorker,
9
9
  public Napi::Promise::Deferred {
10
10
  public:
11
11
  EmbeddingWorker(const Napi::CallbackInfo &info, LlamaSessionPtr &sess,
12
- std::string text);
12
+ std::string text, common_params &params);
13
13
 
14
14
  protected:
15
15
  void Execute();
@@ -19,5 +19,6 @@ protected:
19
19
  private:
20
20
  LlamaSessionPtr _sess;
21
21
  std::string _text;
22
+ common_params _params;
22
23
  EmbeddingResult _result;
23
24
  };
@@ -34,7 +34,7 @@ size_t findStoppingStrings(const std::string &text,
34
34
 
35
35
  LlamaCompletionWorker::LlamaCompletionWorker(
36
36
  const Napi::CallbackInfo &info, LlamaSessionPtr &sess,
37
- Napi::Function callback, gpt_params params,
37
+ Napi::Function callback, common_params params,
38
38
  std::vector<std::string> stop_words)
39
39
  : AsyncWorker(info.Env()), Deferred(info.Env()), _sess(sess),
40
40
  _params(params), _stop_words(stop_words) {
@@ -64,11 +64,11 @@ void LlamaCompletionWorker::Execute() {
64
64
 
65
65
  auto sparams = llama_sampler_chain_default_params();
66
66
 
67
- LlamaCppSampling sampling{gpt_sampler_init(model, _params.sparams),
68
- gpt_sampler_free};
67
+ LlamaCppSampling sampling{common_sampler_init(model, _params.sampling),
68
+ common_sampler_free};
69
69
 
70
70
  std::vector<llama_token> prompt_tokens =
71
- ::llama_tokenize(ctx, _params.prompt, add_bos);
71
+ ::common_tokenize(ctx, _params.prompt, add_bos);
72
72
  n_input = prompt_tokens.size();
73
73
  if (_sess->tokens_ptr()->size() > 0) {
74
74
  n_cur = common_part(*(_sess->tokens_ptr()), prompt_tokens);
@@ -102,18 +102,18 @@ void LlamaCompletionWorker::Execute() {
102
102
  _result.truncated = true;
103
103
  }
104
104
  int ret = llama_decode(
105
- ctx, llama_batch_get_one(embd->data() + n_cur, n_input, n_cur, 0));
105
+ ctx, llama_batch_get_one(embd->data() + n_cur, n_input));
106
106
  if (ret < 0) {
107
107
  SetError("Failed to decode token, code: " + std::to_string(ret));
108
108
  break;
109
109
  }
110
110
  // sample the next token
111
111
  const llama_token new_token_id =
112
- gpt_sampler_sample(sampling.get(), ctx, -1);
113
- gpt_sampler_accept(sampling.get(), new_token_id, true);
112
+ common_sampler_sample(sampling.get(), ctx, -1);
113
+ common_sampler_accept(sampling.get(), new_token_id, true);
114
114
  // prepare the next batch
115
115
  embd->emplace_back(new_token_id);
116
- auto token = llama_token_to_piece(ctx, new_token_id);
116
+ auto token = common_token_to_piece(ctx, new_token_id);
117
117
  _result.text += token;
118
118
  n_cur += n_input;
119
119
  _result.tokens_evaluated += n_input;
@@ -12,7 +12,7 @@ class LlamaCompletionWorker : public Napi::AsyncWorker,
12
12
  public Napi::Promise::Deferred {
13
13
  public:
14
14
  LlamaCompletionWorker(const Napi::CallbackInfo &info, LlamaSessionPtr &sess,
15
- Napi::Function callback, gpt_params params,
15
+ Napi::Function callback, common_params params,
16
16
  std::vector<std::string> stop_words = {});
17
17
 
18
18
  ~LlamaCompletionWorker();
@@ -28,7 +28,7 @@ protected:
28
28
 
29
29
  private:
30
30
  LlamaSessionPtr _sess;
31
- gpt_params _params;
31
+ common_params _params;
32
32
  std::vector<std::string> _stop_words;
33
33
  Napi::ThreadSafeFunction _tsfn;
34
34
  bool _has_callback = false;
@@ -7,8 +7,8 @@
7
7
  #include "SaveSessionWorker.h"
8
8
  #include "TokenizeWorker.h"
9
9
 
10
- std::vector<llama_chat_msg> get_messages(Napi::Array messages) {
11
- std::vector<llama_chat_msg> chat;
10
+ std::vector<common_chat_msg> get_messages(Napi::Array messages) {
11
+ std::vector<common_chat_msg> chat;
12
12
  for (size_t i = 0; i < messages.Length(); i++) {
13
13
  auto message = messages.Get(i).As<Napi::Object>();
14
14
  chat.push_back({
@@ -25,6 +25,9 @@ void LlamaContext::Init(Napi::Env env, Napi::Object &exports) {
25
25
  {InstanceMethod<&LlamaContext::GetSystemInfo>(
26
26
  "getSystemInfo",
27
27
  static_cast<napi_property_attributes>(napi_enumerable)),
28
+ InstanceMethod<&LlamaContext::GetModelInfo>(
29
+ "getModelInfo",
30
+ static_cast<napi_property_attributes>(napi_enumerable)),
28
31
  InstanceMethod<&LlamaContext::GetFormattedChat>(
29
32
  "getFormattedChat",
30
33
  static_cast<napi_property_attributes>(napi_enumerable)),
@@ -67,14 +70,23 @@ LlamaContext::LlamaContext(const Napi::CallbackInfo &info)
67
70
  }
68
71
  auto options = info[0].As<Napi::Object>();
69
72
 
70
- gpt_params params;
73
+ common_params params;
71
74
  params.model = get_option<std::string>(options, "model", "");
72
75
  if (params.model.empty()) {
73
76
  Napi::TypeError::New(env, "Model is required").ThrowAsJavaScriptException();
74
77
  }
75
- params.embedding = get_option<bool>(options, "embedding", false);
78
+
76
79
  params.n_ctx = get_option<int32_t>(options, "n_ctx", 512);
77
80
  params.n_batch = get_option<int32_t>(options, "n_batch", 2048);
81
+ params.embedding = get_option<bool>(options, "embedding", false);
82
+ if (params.embedding) {
83
+ // For non-causal models, batch size must be equal to ubatch size
84
+ params.n_ubatch = params.n_batch;
85
+ }
86
+ params.embd_normalize = get_option<int32_t>(options, "embd_normalize", 2);
87
+ int32_t pooling_type = get_option<int32_t>(options, "pooling_type", -1);
88
+ params.pooling_type = (enum llama_pooling_type) pooling_type;
89
+
78
90
  params.cpuparams.n_threads =
79
91
  get_option<int32_t>(options, "n_threads", cpu_get_num_math() / 2);
80
92
  params.n_gpu_layers = get_option<int32_t>(options, "n_gpu_layers", -1);
@@ -86,7 +98,7 @@ LlamaContext::LlamaContext(const Napi::CallbackInfo &info)
86
98
  llama_backend_init();
87
99
  llama_numa_init(params.numa);
88
100
 
89
- auto result = llama_init_from_gpt_params(params);
101
+ auto result = common_init_from_params(params);
90
102
 
91
103
  if (result.model == nullptr || result.context == nullptr) {
92
104
  Napi::TypeError::New(env, "Failed to load model")
@@ -94,7 +106,7 @@ LlamaContext::LlamaContext(const Napi::CallbackInfo &info)
94
106
  }
95
107
 
96
108
  _sess = std::make_shared<LlamaSession>(result.model, result.context, params);
97
- _info = gpt_params_get_system_info(params);
109
+ _info = common_params_get_system_info(params);
98
110
  }
99
111
 
100
112
  // getSystemInfo(): string
@@ -102,6 +114,44 @@ Napi::Value LlamaContext::GetSystemInfo(const Napi::CallbackInfo &info) {
102
114
  return Napi::String::New(info.Env(), _info);
103
115
  }
104
116
 
117
+ bool validateModelChatTemplate(const struct llama_model * model) {
118
+ std::vector<char> model_template(2048, 0); // longest known template is about 1200 bytes
119
+ std::string template_key = "tokenizer.chat_template";
120
+ int32_t res = llama_model_meta_val_str(model, template_key.c_str(), model_template.data(), model_template.size());
121
+ if (res >= 0) {
122
+ llama_chat_message chat[] = {{"user", "test"}};
123
+ std::string tmpl = std::string(model_template.data(), model_template.size());
124
+ int32_t chat_res = llama_chat_apply_template(model, tmpl.c_str(), chat, 1, true, nullptr, 0);
125
+ return chat_res > 0;
126
+ }
127
+ return res > 0;
128
+ }
129
+
130
+ // getModelInfo(): object
131
+ Napi::Value LlamaContext::GetModelInfo(const Napi::CallbackInfo &info) {
132
+ char desc[1024];
133
+ auto model = _sess->model();
134
+ llama_model_desc(model, desc, sizeof(desc));
135
+
136
+ int count = llama_model_meta_count(model);
137
+ Napi::Object metadata = Napi::Object::New(info.Env());
138
+ for (int i = 0; i < count; i++) {
139
+ char key[256];
140
+ llama_model_meta_key_by_index(model, i, key, sizeof(key));
141
+ char val[2048];
142
+ llama_model_meta_val_str_by_index(model, i, val, sizeof(val));
143
+
144
+ metadata.Set(key, val);
145
+ }
146
+ Napi::Object details = Napi::Object::New(info.Env());
147
+ details.Set("desc", desc);
148
+ details.Set("nParams", llama_model_n_params(model));
149
+ details.Set("size", llama_model_size(model));
150
+ details.Set("isChatTemplateSupported", validateModelChatTemplate(model));
151
+ details.Set("metadata", metadata);
152
+ return details;
153
+ }
154
+
105
155
  // getFormattedChat(messages: [{ role: string, content: string }]): string
106
156
  Napi::Value LlamaContext::GetFormattedChat(const Napi::CallbackInfo &info) {
107
157
  Napi::Env env = info.Env();
@@ -109,7 +159,7 @@ Napi::Value LlamaContext::GetFormattedChat(const Napi::CallbackInfo &info) {
109
159
  Napi::TypeError::New(env, "Array expected").ThrowAsJavaScriptException();
110
160
  }
111
161
  auto messages = info[0].As<Napi::Array>();
112
- auto formatted = llama_chat_apply_template(_sess->model(), "", get_messages(messages), true);
162
+ auto formatted = common_chat_apply_template(_sess->model(), "", get_messages(messages), true);
113
163
  return Napi::String::New(env, formatted);
114
164
  }
115
165
 
@@ -133,10 +183,10 @@ Napi::Value LlamaContext::Completion(const Napi::CallbackInfo &info) {
133
183
  }
134
184
  auto options = info[0].As<Napi::Object>();
135
185
 
136
- gpt_params params = _sess->params();
186
+ common_params params = _sess->params();
137
187
  if (options.Has("messages") && options.Get("messages").IsArray()) {
138
188
  auto messages = options.Get("messages").As<Napi::Array>();
139
- auto formatted = llama_chat_apply_template(_sess->model(), "", get_messages(messages), true);
189
+ auto formatted = common_chat_apply_template(_sess->model(), "", get_messages(messages), true);
140
190
  params.prompt = formatted;
141
191
  } else {
142
192
  params.prompt = get_option<std::string>(options, "prompt", "");
@@ -146,30 +196,34 @@ Napi::Value LlamaContext::Completion(const Napi::CallbackInfo &info) {
146
196
  .ThrowAsJavaScriptException();
147
197
  }
148
198
  params.n_predict = get_option<int32_t>(options, "n_predict", -1);
149
- params.sparams.temp = get_option<float>(options, "temperature", 0.80f);
150
- params.sparams.top_k = get_option<int32_t>(options, "top_k", 40);
151
- params.sparams.top_p = get_option<float>(options, "top_p", 0.95f);
152
- params.sparams.min_p = get_option<float>(options, "min_p", 0.05f);
153
- params.sparams.tfs_z = get_option<float>(options, "tfs_z", 1.00f);
154
- params.sparams.mirostat = get_option<int32_t>(options, "mirostat", 0.00f);
155
- params.sparams.mirostat_tau =
199
+ params.sampling.temp = get_option<float>(options, "temperature", 0.80f);
200
+ params.sampling.top_k = get_option<int32_t>(options, "top_k", 40);
201
+ params.sampling.top_p = get_option<float>(options, "top_p", 0.95f);
202
+ params.sampling.min_p = get_option<float>(options, "min_p", 0.05f);
203
+ params.sampling.mirostat = get_option<int32_t>(options, "mirostat", 0.00f);
204
+ params.sampling.mirostat_tau =
156
205
  get_option<float>(options, "mirostat_tau", 5.00f);
157
- params.sparams.mirostat_eta =
206
+ params.sampling.mirostat_eta =
158
207
  get_option<float>(options, "mirostat_eta", 0.10f);
159
- params.sparams.penalty_last_n =
208
+ params.sampling.penalty_last_n =
160
209
  get_option<int32_t>(options, "penalty_last_n", 64);
161
- params.sparams.penalty_repeat =
210
+ params.sampling.penalty_repeat =
162
211
  get_option<float>(options, "penalty_repeat", 1.00f);
163
- params.sparams.penalty_freq =
212
+ params.sampling.penalty_freq =
164
213
  get_option<float>(options, "penalty_freq", 0.00f);
165
- params.sparams.penalty_present =
214
+ params.sampling.penalty_present =
166
215
  get_option<float>(options, "penalty_present", 0.00f);
167
- params.sparams.penalize_nl = get_option<bool>(options, "penalize_nl", false);
168
- params.sparams.typ_p = get_option<float>(options, "typical_p", 1.00f);
169
- params.sparams.ignore_eos = get_option<float>(options, "ignore_eos", false);
170
- params.sparams.grammar = get_option<std::string>(options, "grammar", "");
216
+ params.sampling.typ_p = get_option<float>(options, "typical_p", 1.00f);
217
+ params.sampling.xtc_threshold = get_option<float>(options, "xtc_threshold", 0.00f);
218
+ params.sampling.xtc_probability = get_option<float>(options, "xtc_probability", 0.10f);
219
+ params.sampling.dry_multiplier = get_option<float>(options, "dry_multiplier", 1.75f);
220
+ params.sampling.dry_base = get_option<float>(options, "dry_base", 2);
221
+ params.sampling.dry_allowed_length = get_option<float>(options, "dry_allowed_length", -1);
222
+ params.sampling.dry_penalty_last_n = get_option<float>(options, "dry_penalty_last_n", 0);
223
+ params.sampling.ignore_eos = get_option<bool>(options, "ignore_eos", false);
224
+ params.sampling.grammar = get_option<std::string>(options, "grammar", "");
171
225
  params.n_keep = get_option<int32_t>(options, "n_keep", 0);
172
- params.sparams.seed = get_option<int32_t>(options, "seed", LLAMA_DEFAULT_SEED);
226
+ params.sampling.seed = get_option<int32_t>(options, "seed", LLAMA_DEFAULT_SEED);
173
227
  std::vector<std::string> stop_words;
174
228
  if (options.Has("stop") && options.Get("stop").IsArray()) {
175
229
  auto stop_words_array = options.Get("stop").As<Napi::Array>();
@@ -244,8 +298,16 @@ Napi::Value LlamaContext::Embedding(const Napi::CallbackInfo &info) {
244
298
  Napi::TypeError::New(env, "Context is disposed")
245
299
  .ThrowAsJavaScriptException();
246
300
  }
301
+ auto options = Napi::Object::New(env);
302
+ if (info.Length() >= 2 && info[1].IsObject()) {
303
+ options = info[1].As<Napi::Object>();
304
+ }
305
+
306
+ common_params embdParams;
307
+ embdParams.embedding = true;
308
+ embdParams.embd_normalize = get_option<int32_t>(options, "embd_normalize", 2);
247
309
  auto text = info[0].ToString().Utf8Value();
248
- auto *worker = new EmbeddingWorker(info, _sess, text);
310
+ auto *worker = new EmbeddingWorker(info, _sess, text, embdParams);
249
311
  worker->Queue();
250
312
  return worker->Promise();
251
313
  }
@@ -9,6 +9,7 @@ public:
9
9
 
10
10
  private:
11
11
  Napi::Value GetSystemInfo(const Napi::CallbackInfo &info);
12
+ Napi::Value GetModelInfo(const Napi::CallbackInfo &info);
12
13
  Napi::Value GetFormattedChat(const Napi::CallbackInfo &info);
13
14
  Napi::Value Completion(const Napi::CallbackInfo &info);
14
15
  void StopCompletion(const Napi::CallbackInfo &info);
@@ -20,6 +21,7 @@ private:
20
21
  Napi::Value Release(const Napi::CallbackInfo &info);
21
22
 
22
23
  std::string _info;
24
+ Napi::Object _meta;
23
25
  LlamaSessionPtr _sess = nullptr;
24
26
  LlamaCompletionWorker *_wip = nullptr;
25
27
  };
@@ -6,7 +6,7 @@ TokenizeWorker::TokenizeWorker(const Napi::CallbackInfo &info,
6
6
  : AsyncWorker(info.Env()), Deferred(info.Env()), _sess(sess), _text(text) {}
7
7
 
8
8
  void TokenizeWorker::Execute() {
9
- const auto tokens = ::llama_tokenize(_sess->context(), _text, false);
9
+ const auto tokens = ::common_tokenize(_sess->context(), _text, false);
10
10
  _result.tokens = std::move(tokens);
11
11
  }
12
12
 
package/src/common.hpp CHANGED
@@ -13,7 +13,7 @@
13
13
 
14
14
  typedef std::unique_ptr<llama_model, decltype(&llama_free_model)> LlamaCppModel;
15
15
  typedef std::unique_ptr<llama_context, decltype(&llama_free)> LlamaCppContext;
16
- typedef std::unique_ptr<gpt_sampler, decltype(&gpt_sampler_free)>
16
+ typedef std::unique_ptr<common_sampler, decltype(&common_sampler_free)>
17
17
  LlamaCppSampling;
18
18
  typedef std::unique_ptr<llama_batch, decltype(&llama_batch_free)> LlamaCppBatch;
19
19
 
@@ -47,7 +47,7 @@ constexpr T get_option(const Napi::Object &options, const std::string &name,
47
47
 
48
48
  class LlamaSession {
49
49
  public:
50
- LlamaSession(llama_model *model, llama_context *ctx, gpt_params params)
50
+ LlamaSession(llama_model *model, llama_context *ctx, common_params params)
51
51
  : model_(LlamaCppModel(model, llama_free_model)),
52
52
  ctx_(LlamaCppContext(ctx, llama_free)), params_(params) {
53
53
  tokens_.reserve(params.n_ctx);
@@ -65,7 +65,7 @@ public:
65
65
  tokens_ = std::move(tokens);
66
66
  }
67
67
 
68
- inline const gpt_params &params() const { return params_; }
68
+ inline const common_params &params() const { return params_; }
69
69
 
70
70
  inline std::mutex &get_mutex() { return mutex; }
71
71
 
@@ -79,7 +79,7 @@ public:
79
79
  private:
80
80
  LlamaCppModel model_;
81
81
  LlamaCppContext ctx_;
82
- const gpt_params params_;
82
+ const common_params params_;
83
83
  std::vector<llama_token> tokens_{};
84
84
  std::mutex mutex;
85
85
  };