@fugood/llama.node 0.3.3 → 0.3.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (225) hide show
  1. package/CMakeLists.txt +5 -0
  2. package/bin/darwin/arm64/llama-node.node +0 -0
  3. package/bin/darwin/x64/llama-node.node +0 -0
  4. package/bin/linux/arm64/llama-node.node +0 -0
  5. package/bin/linux/x64/llama-node.node +0 -0
  6. package/bin/linux-vulkan/arm64/llama-node.node +0 -0
  7. package/bin/linux-vulkan/x64/llama-node.node +0 -0
  8. package/bin/win32/arm64/llama-node.node +0 -0
  9. package/bin/win32/arm64/node.lib +0 -0
  10. package/bin/win32/x64/llama-node.node +0 -0
  11. package/bin/win32/x64/node.lib +0 -0
  12. package/bin/win32-vulkan/arm64/llama-node.node +0 -0
  13. package/bin/win32-vulkan/arm64/node.lib +0 -0
  14. package/bin/win32-vulkan/x64/llama-node.node +0 -0
  15. package/bin/win32-vulkan/x64/node.lib +0 -0
  16. package/lib/binding.ts +18 -1
  17. package/package.json +1 -1
  18. package/src/EmbeddingWorker.cpp +15 -5
  19. package/src/EmbeddingWorker.h +2 -1
  20. package/src/LlamaCompletionWorker.cpp +1 -1
  21. package/src/LlamaContext.cpp +81 -18
  22. package/src/LlamaContext.h +2 -0
  23. package/src/llama.cpp/.github/workflows/build.yml +197 -159
  24. package/src/llama.cpp/.github/workflows/docker.yml +5 -8
  25. package/src/llama.cpp/.github/workflows/python-lint.yml +8 -1
  26. package/src/llama.cpp/.github/workflows/server.yml +21 -14
  27. package/src/llama.cpp/CMakeLists.txt +11 -6
  28. package/src/llama.cpp/Sources/llama/llama.h +4 -0
  29. package/src/llama.cpp/cmake/common.cmake +33 -0
  30. package/src/llama.cpp/cmake/x64-windows-llvm.cmake +11 -0
  31. package/src/llama.cpp/common/CMakeLists.txt +6 -2
  32. package/src/llama.cpp/common/arg.cpp +426 -245
  33. package/src/llama.cpp/common/common.cpp +143 -80
  34. package/src/llama.cpp/common/common.h +81 -24
  35. package/src/llama.cpp/common/sampling.cpp +53 -19
  36. package/src/llama.cpp/common/sampling.h +22 -1
  37. package/src/llama.cpp/common/speculative.cpp +274 -0
  38. package/src/llama.cpp/common/speculative.h +28 -0
  39. package/src/llama.cpp/docs/build.md +101 -148
  40. package/src/llama.cpp/examples/CMakeLists.txt +32 -13
  41. package/src/llama.cpp/examples/batched/CMakeLists.txt +1 -1
  42. package/src/llama.cpp/examples/batched/batched.cpp +5 -4
  43. package/src/llama.cpp/examples/batched-bench/CMakeLists.txt +1 -1
  44. package/src/llama.cpp/examples/convert-llama2c-to-ggml/CMakeLists.txt +1 -1
  45. package/src/llama.cpp/examples/cvector-generator/CMakeLists.txt +1 -1
  46. package/src/llama.cpp/examples/deprecation-warning/deprecation-warning.cpp +1 -1
  47. package/src/llama.cpp/examples/embedding/CMakeLists.txt +1 -1
  48. package/src/llama.cpp/examples/eval-callback/CMakeLists.txt +3 -2
  49. package/src/llama.cpp/examples/export-lora/CMakeLists.txt +1 -1
  50. package/src/llama.cpp/examples/gbnf-validator/CMakeLists.txt +1 -1
  51. package/src/llama.cpp/examples/gbnf-validator/gbnf-validator.cpp +4 -7
  52. package/src/llama.cpp/examples/gen-docs/CMakeLists.txt +1 -1
  53. package/src/llama.cpp/examples/gguf/CMakeLists.txt +1 -1
  54. package/src/llama.cpp/examples/gguf-hash/CMakeLists.txt +8 -1
  55. package/src/llama.cpp/examples/gguf-split/CMakeLists.txt +1 -1
  56. package/src/llama.cpp/examples/gguf-split/gguf-split.cpp +2 -2
  57. package/src/llama.cpp/examples/gritlm/CMakeLists.txt +1 -1
  58. package/src/llama.cpp/examples/gritlm/gritlm.cpp +1 -1
  59. package/src/llama.cpp/examples/imatrix/CMakeLists.txt +1 -1
  60. package/src/llama.cpp/examples/imatrix/imatrix.cpp +11 -2
  61. package/src/llama.cpp/examples/infill/CMakeLists.txt +1 -1
  62. package/src/llama.cpp/examples/infill/infill.cpp +1 -1
  63. package/src/llama.cpp/examples/llama-bench/CMakeLists.txt +1 -1
  64. package/src/llama.cpp/examples/llama-bench/llama-bench.cpp +405 -316
  65. package/src/llama.cpp/examples/llama.android/llama/build.gradle.kts +1 -0
  66. package/src/llama.cpp/examples/llava/CMakeLists.txt +10 -3
  67. package/src/llama.cpp/examples/llava/clip.cpp +262 -66
  68. package/src/llama.cpp/examples/llava/clip.h +8 -2
  69. package/src/llama.cpp/examples/llava/llava-cli.cpp +1 -1
  70. package/src/llama.cpp/examples/llava/llava.cpp +46 -19
  71. package/src/llama.cpp/examples/llava/minicpmv-cli.cpp +1 -1
  72. package/src/llama.cpp/examples/llava/qwen2vl-cli.cpp +581 -0
  73. package/src/llama.cpp/examples/lookahead/CMakeLists.txt +1 -1
  74. package/src/llama.cpp/examples/lookahead/lookahead.cpp +1 -1
  75. package/src/llama.cpp/examples/lookup/CMakeLists.txt +4 -4
  76. package/src/llama.cpp/examples/lookup/lookup-stats.cpp +2 -1
  77. package/src/llama.cpp/examples/lookup/lookup.cpp +2 -2
  78. package/src/llama.cpp/examples/main/CMakeLists.txt +1 -1
  79. package/src/llama.cpp/examples/main/main.cpp +9 -5
  80. package/src/llama.cpp/examples/main-cmake-pkg/CMakeLists.txt +1 -1
  81. package/src/llama.cpp/examples/parallel/CMakeLists.txt +1 -1
  82. package/src/llama.cpp/examples/parallel/parallel.cpp +1 -1
  83. package/src/llama.cpp/examples/passkey/CMakeLists.txt +1 -1
  84. package/src/llama.cpp/examples/perplexity/CMakeLists.txt +1 -1
  85. package/src/llama.cpp/examples/quantize/CMakeLists.txt +1 -1
  86. package/src/llama.cpp/examples/quantize/quantize.cpp +0 -3
  87. package/src/llama.cpp/examples/quantize-stats/CMakeLists.txt +1 -1
  88. package/src/llama.cpp/examples/retrieval/CMakeLists.txt +1 -1
  89. package/src/llama.cpp/examples/retrieval/retrieval.cpp +4 -4
  90. package/src/llama.cpp/examples/run/CMakeLists.txt +5 -0
  91. package/src/llama.cpp/examples/run/run.cpp +911 -0
  92. package/src/llama.cpp/examples/save-load-state/CMakeLists.txt +1 -1
  93. package/src/llama.cpp/examples/save-load-state/save-load-state.cpp +4 -4
  94. package/src/llama.cpp/examples/server/CMakeLists.txt +3 -7
  95. package/src/llama.cpp/examples/server/server.cpp +1758 -886
  96. package/src/llama.cpp/examples/server/tests/requirements.txt +2 -2
  97. package/src/llama.cpp/examples/server/utils.hpp +94 -304
  98. package/src/llama.cpp/examples/simple/CMakeLists.txt +1 -1
  99. package/src/llama.cpp/examples/simple/simple.cpp +4 -0
  100. package/src/llama.cpp/examples/simple-chat/CMakeLists.txt +1 -1
  101. package/src/llama.cpp/examples/simple-chat/simple-chat.cpp +3 -0
  102. package/src/llama.cpp/examples/speculative/CMakeLists.txt +1 -1
  103. package/src/llama.cpp/examples/speculative/speculative.cpp +16 -15
  104. package/src/llama.cpp/examples/speculative-simple/CMakeLists.txt +5 -0
  105. package/src/llama.cpp/examples/speculative-simple/speculative-simple.cpp +265 -0
  106. package/src/llama.cpp/examples/tokenize/CMakeLists.txt +1 -1
  107. package/src/llama.cpp/examples/tokenize/tokenize.cpp +1 -1
  108. package/src/llama.cpp/examples/tts/CMakeLists.txt +5 -0
  109. package/src/llama.cpp/examples/tts/tts.cpp +932 -0
  110. package/src/llama.cpp/ggml/CMakeLists.txt +46 -34
  111. package/src/llama.cpp/ggml/include/ggml-backend.h +16 -0
  112. package/src/llama.cpp/ggml/include/ggml-cpu.h +7 -49
  113. package/src/llama.cpp/ggml/include/ggml-opencl.h +26 -0
  114. package/src/llama.cpp/ggml/include/ggml.h +106 -24
  115. package/src/llama.cpp/ggml/src/CMakeLists.txt +73 -24
  116. package/src/llama.cpp/ggml/src/ggml-alloc.c +0 -1
  117. package/src/llama.cpp/ggml/src/ggml-backend-impl.h +51 -11
  118. package/src/llama.cpp/ggml/src/ggml-backend-reg.cpp +379 -22
  119. package/src/llama.cpp/ggml/src/ggml-backend.cpp +4 -4
  120. package/src/llama.cpp/ggml/src/ggml-blas/CMakeLists.txt +3 -7
  121. package/src/llama.cpp/ggml/src/ggml-blas/ggml-blas.cpp +5 -2
  122. package/src/llama.cpp/ggml/src/ggml-cann/CMakeLists.txt +33 -3
  123. package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.cpp +456 -111
  124. package/src/llama.cpp/ggml/src/ggml-cann/common.h +6 -3
  125. package/src/llama.cpp/ggml/src/ggml-cann/ggml-cann.cpp +95 -35
  126. package/src/llama.cpp/ggml/src/ggml-cann/kernels/CMakeLists.txt +2 -5
  127. package/src/llama.cpp/ggml/src/ggml-cann/kernels/dup.cpp +22 -9
  128. package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_f16.cpp +24 -13
  129. package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_f32.cpp +23 -13
  130. package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_q4_0.cpp +11 -0
  131. package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_f16_q8_0.cpp +10 -0
  132. package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_f32_q8_0.cpp +10 -0
  133. package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_float_to_q4_0.cpp +17 -0
  134. package/src/llama.cpp/ggml/src/ggml-common.h +42 -42
  135. package/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +288 -213
  136. package/src/llama.cpp/ggml/src/ggml-cpu/amx/amx.cpp +220 -0
  137. package/src/llama.cpp/ggml/src/ggml-cpu/amx/amx.h +8 -0
  138. package/src/llama.cpp/ggml/src/{ggml-amx → ggml-cpu/amx}/common.h +19 -22
  139. package/src/llama.cpp/ggml/src/{ggml-amx → ggml-cpu/amx}/mmq.cpp +93 -92
  140. package/src/llama.cpp/ggml/src/{ggml-amx → ggml-cpu/amx}/mmq.h +2 -9
  141. package/src/llama.cpp/ggml/src/ggml-cpu/cpu-feats-x86.cpp +323 -0
  142. package/src/llama.cpp/ggml/src/ggml-cpu/{ggml-cpu-aarch64.c → ggml-cpu-aarch64.cpp} +892 -190
  143. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.h +2 -24
  144. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-hbm.cpp +55 -0
  145. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-hbm.h +8 -0
  146. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-impl.h +15 -0
  147. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.c +38 -25
  148. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-traits.cpp +36 -0
  149. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-traits.h +38 -0
  150. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +552 -399
  151. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +101 -136
  152. package/src/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm.cpp +2 -2
  153. package/src/llama.cpp/ggml/src/ggml-cuda/CMakeLists.txt +7 -10
  154. package/src/llama.cpp/ggml/src/ggml-cuda/vendors/hip.h +8 -0
  155. package/src/llama.cpp/ggml/src/ggml-hip/CMakeLists.txt +4 -6
  156. package/src/llama.cpp/ggml/src/ggml-impl.h +32 -11
  157. package/src/llama.cpp/ggml/src/ggml-kompute/CMakeLists.txt +13 -9
  158. package/src/llama.cpp/ggml/src/ggml-kompute/ggml-kompute.cpp +131 -64
  159. package/src/llama.cpp/ggml/src/ggml-metal/CMakeLists.txt +3 -6
  160. package/src/llama.cpp/ggml/src/ggml-metal/ggml-metal-impl.h +39 -0
  161. package/src/llama.cpp/ggml/src/ggml-musa/CMakeLists.txt +14 -7
  162. package/src/llama.cpp/ggml/src/ggml-opencl/CMakeLists.txt +147 -0
  163. package/src/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +4004 -0
  164. package/src/llama.cpp/ggml/src/ggml-opt.cpp +67 -80
  165. package/src/llama.cpp/ggml/src/ggml-quants.c +0 -9
  166. package/src/llama.cpp/ggml/src/ggml-rpc/CMakeLists.txt +3 -5
  167. package/src/llama.cpp/ggml/src/ggml-rpc/ggml-rpc.cpp +5 -2
  168. package/src/llama.cpp/ggml/src/ggml-sycl/CMakeLists.txt +13 -10
  169. package/src/llama.cpp/ggml/src/ggml-sycl/common.cpp +2 -11
  170. package/src/llama.cpp/ggml/src/ggml-sycl/common.hpp +1 -0
  171. package/src/llama.cpp/ggml/src/ggml-sycl/concat.cpp +2 -2
  172. package/src/llama.cpp/ggml/src/ggml-sycl/convert.cpp +1 -1
  173. package/src/llama.cpp/ggml/src/ggml-sycl/dmmv.cpp +5 -5
  174. package/src/llama.cpp/ggml/src/ggml-sycl/dpct/helper.hpp +32 -13
  175. package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.cpp +80 -61
  176. package/src/llama.cpp/ggml/src/ggml-sycl/gemm.hpp +4 -4
  177. package/src/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +159 -114
  178. package/src/llama.cpp/ggml/src/ggml-sycl/im2col.cpp +3 -2
  179. package/src/llama.cpp/ggml/src/ggml-sycl/mmq.cpp +6 -6
  180. package/src/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +6 -20
  181. package/src/llama.cpp/ggml/src/ggml-sycl/norm.cpp +4 -3
  182. package/src/llama.cpp/ggml/src/ggml-sycl/outprod.cpp +8 -8
  183. package/src/llama.cpp/ggml/src/ggml-sycl/rope.cpp +4 -3
  184. package/src/llama.cpp/ggml/src/ggml-sycl/softmax.cpp +7 -7
  185. package/src/llama.cpp/ggml/src/ggml-sycl/tsembd.cpp +1 -0
  186. package/src/llama.cpp/ggml/src/ggml-sycl/wkv6.cpp +4 -1
  187. package/src/llama.cpp/ggml/src/ggml-threading.h +4 -2
  188. package/src/llama.cpp/ggml/src/ggml-vulkan/CMakeLists.txt +21 -7
  189. package/src/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +1718 -399
  190. package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/CMakeLists.txt +3 -1
  191. package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +105 -31
  192. package/src/llama.cpp/ggml/src/ggml.c +367 -207
  193. package/src/llama.cpp/include/llama-cpp.h +25 -0
  194. package/src/llama.cpp/include/llama.h +26 -19
  195. package/src/llama.cpp/models/ggml-vocab-roberta-bpe.gguf.inp +112 -0
  196. package/src/llama.cpp/models/ggml-vocab-roberta-bpe.gguf.out +46 -0
  197. package/src/llama.cpp/pocs/CMakeLists.txt +3 -1
  198. package/src/llama.cpp/pocs/vdot/CMakeLists.txt +2 -2
  199. package/src/llama.cpp/src/CMakeLists.txt +2 -7
  200. package/src/llama.cpp/src/llama-grammar.cpp +15 -15
  201. package/src/llama.cpp/src/llama-grammar.h +2 -5
  202. package/src/llama.cpp/src/llama-sampling.cpp +35 -90
  203. package/src/llama.cpp/src/llama-vocab.cpp +6 -1
  204. package/src/llama.cpp/src/llama.cpp +1748 -640
  205. package/src/llama.cpp/src/unicode.cpp +62 -51
  206. package/src/llama.cpp/src/unicode.h +9 -10
  207. package/src/llama.cpp/tests/CMakeLists.txt +48 -37
  208. package/src/llama.cpp/tests/test-arg-parser.cpp +2 -2
  209. package/src/llama.cpp/tests/test-backend-ops.cpp +140 -21
  210. package/src/llama.cpp/tests/test-chat-template.cpp +50 -4
  211. package/src/llama.cpp/tests/test-gguf.cpp +1303 -0
  212. package/src/llama.cpp/tests/test-grammar-integration.cpp +3 -6
  213. package/src/llama.cpp/tests/test-llama-grammar.cpp +2 -4
  214. package/src/llama.cpp/tests/test-quantize-fns.cpp +3 -3
  215. package/src/llama.cpp/tests/test-rope.cpp +61 -20
  216. package/src/llama.cpp/tests/test-sampling.cpp +2 -2
  217. package/src/llama.cpp/.github/workflows/nix-ci-aarch64.yml +0 -72
  218. package/src/llama.cpp/.github/workflows/nix-ci.yml +0 -79
  219. package/src/llama.cpp/.github/workflows/nix-flake-update.yml +0 -22
  220. package/src/llama.cpp/.github/workflows/nix-publish-flake.yml +0 -36
  221. package/src/llama.cpp/ggml/include/ggml-amx.h +0 -25
  222. package/src/llama.cpp/ggml/src/ggml-aarch64.c +0 -129
  223. package/src/llama.cpp/ggml/src/ggml-aarch64.h +0 -19
  224. package/src/llama.cpp/ggml/src/ggml-amx/CMakeLists.txt +0 -107
  225. package/src/llama.cpp/ggml/src/ggml-amx/ggml-amx.cpp +0 -446
package/CMakeLists.txt CHANGED
@@ -6,6 +6,11 @@ project (llama-node)
6
6
 
7
7
  set(CMAKE_CXX_STANDARD 17)
8
8
 
9
+ execute_process(COMMAND
10
+ git apply ${CMAKE_CURRENT_SOURCE_DIR}/scripts/ggml-cpu-CMakeLists.txt.patch
11
+ WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
12
+ )
13
+
9
14
  if(NOT DEFINED napi_build_version)
10
15
  set(napi_build_version 6)
11
16
  endif()
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
package/lib/binding.ts CHANGED
@@ -8,6 +8,8 @@ export type ChatMessage = {
8
8
  export type LlamaModelOptions = {
9
9
  model: string
10
10
  embedding?: boolean
11
+ embd_normalize?: number
12
+ pooling_type?: number
11
13
  n_ctx?: number
12
14
  n_batch?: number
13
15
  n_threads?: number
@@ -23,7 +25,21 @@ export type LlamaCompletionOptions = {
23
25
  temperature?: number
24
26
  top_k?: number
25
27
  top_p?: number
26
- repetition_penalty?: number
28
+ min_p?: number
29
+ mirostat?: number
30
+ mirostat_tau?: number
31
+ mirostat_eta?: number
32
+ penalty_last_n?: number
33
+ penalty_repeat?: number
34
+ penalty_freq?: number
35
+ penalty_present?: number
36
+ typ_p?: number
37
+ xtc_threshold?: number
38
+ xtc_probability?: number
39
+ dry_multiplier?: number
40
+ dry_base?: number
41
+ dry_allowed_length?: number
42
+ dry_penalty_last_n?: number
27
43
  n_predict?: number
28
44
  max_length?: number
29
45
  max_tokens?: number
@@ -54,6 +70,7 @@ export type EmbeddingResult = {
54
70
  export interface LlamaContext {
55
71
  new (options: LlamaModelOptions): LlamaContext
56
72
  getSystemInfo(): string
73
+ getModelInfo(): object
57
74
  getFormattedChat(messages: ChatMessage[]): string
58
75
  completion(options: LlamaCompletionOptions, callback?: (token: LlamaCompletionToken) => void): Promise<LlamaCompletionResult>
59
76
  stopCompletion(): void
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@fugood/llama.node",
3
3
  "access": "public",
4
- "version": "0.3.3",
4
+ "version": "0.3.4",
5
5
  "description": "Llama.cpp for Node.js",
6
6
  "main": "lib/index.js",
7
7
  "scripts": {
@@ -2,8 +2,8 @@
2
2
  #include "LlamaContext.h"
3
3
 
4
4
  EmbeddingWorker::EmbeddingWorker(const Napi::CallbackInfo &info,
5
- LlamaSessionPtr &sess, std::string text)
6
- : AsyncWorker(info.Env()), Deferred(info.Env()), _sess(sess), _text(text) {}
5
+ LlamaSessionPtr &sess, std::string text, common_params &params)
6
+ : AsyncWorker(info.Env()), Deferred(info.Env()), _sess(sess), _text(text), _params(params) {}
7
7
 
8
8
  void EmbeddingWorker::Execute() {
9
9
  llama_kv_cache_clear(_sess->context());
@@ -14,20 +14,30 @@ void EmbeddingWorker::Execute() {
14
14
  }
15
15
  const int n_embd = llama_n_embd(_sess->model());
16
16
  do {
17
+ auto ctx = _sess->context();
17
18
  int ret =
18
- llama_decode(_sess->context(),
19
+ llama_decode(ctx,
19
20
  llama_batch_get_one(tokens.data(), tokens.size()));
20
21
  if (ret < 0) {
21
22
  SetError("Failed to inference, code: " + std::to_string(ret));
22
23
  break;
23
24
  }
24
- const float *embd = llama_get_embeddings_seq(_sess->context(), 0);
25
+
26
+ float *embd;
27
+ const enum llama_pooling_type pooling_type = llama_pooling_type(ctx);
28
+ if (pooling_type == LLAMA_POOLING_TYPE_NONE) {
29
+ embd = llama_get_embeddings(ctx);
30
+ } else {
31
+ embd = llama_get_embeddings_seq(ctx, 0);
32
+ }
25
33
  if (embd == nullptr) {
26
34
  SetError("Failed to get embeddings");
27
35
  break;
28
36
  }
29
37
  _result.embedding.resize(n_embd);
30
- memcpy(_result.embedding.data(), embd, n_embd * sizeof(float));
38
+ std::vector<float> embedding(embd, embd + n_embd), out(embd, embd + n_embd);
39
+ common_embd_normalize(embedding.data(), out.data(), n_embd, _params.embd_normalize);
40
+ memcpy(_result.embedding.data(), out.data(), n_embd * sizeof(float));
31
41
  } while (false);
32
42
  }
33
43
 
@@ -9,7 +9,7 @@ class EmbeddingWorker : public Napi::AsyncWorker,
9
9
  public Napi::Promise::Deferred {
10
10
  public:
11
11
  EmbeddingWorker(const Napi::CallbackInfo &info, LlamaSessionPtr &sess,
12
- std::string text);
12
+ std::string text, common_params &params);
13
13
 
14
14
  protected:
15
15
  void Execute();
@@ -19,5 +19,6 @@ protected:
19
19
  private:
20
20
  LlamaSessionPtr _sess;
21
21
  std::string _text;
22
+ common_params _params;
22
23
  EmbeddingResult _result;
23
24
  };
@@ -64,7 +64,7 @@ void LlamaCompletionWorker::Execute() {
64
64
 
65
65
  auto sparams = llama_sampler_chain_default_params();
66
66
 
67
- LlamaCppSampling sampling{common_sampler_init(model, _params.sparams),
67
+ LlamaCppSampling sampling{common_sampler_init(model, _params.sampling),
68
68
  common_sampler_free};
69
69
 
70
70
  std::vector<llama_token> prompt_tokens =
@@ -25,6 +25,9 @@ void LlamaContext::Init(Napi::Env env, Napi::Object &exports) {
25
25
  {InstanceMethod<&LlamaContext::GetSystemInfo>(
26
26
  "getSystemInfo",
27
27
  static_cast<napi_property_attributes>(napi_enumerable)),
28
+ InstanceMethod<&LlamaContext::GetModelInfo>(
29
+ "getModelInfo",
30
+ static_cast<napi_property_attributes>(napi_enumerable)),
28
31
  InstanceMethod<&LlamaContext::GetFormattedChat>(
29
32
  "getFormattedChat",
30
33
  static_cast<napi_property_attributes>(napi_enumerable)),
@@ -72,9 +75,18 @@ LlamaContext::LlamaContext(const Napi::CallbackInfo &info)
72
75
  if (params.model.empty()) {
73
76
  Napi::TypeError::New(env, "Model is required").ThrowAsJavaScriptException();
74
77
  }
75
- params.embedding = get_option<bool>(options, "embedding", false);
78
+
76
79
  params.n_ctx = get_option<int32_t>(options, "n_ctx", 512);
77
80
  params.n_batch = get_option<int32_t>(options, "n_batch", 2048);
81
+ params.embedding = get_option<bool>(options, "embedding", false);
82
+ if (params.embedding) {
83
+ // For non-causal models, batch size must be equal to ubatch size
84
+ params.n_ubatch = params.n_batch;
85
+ }
86
+ params.embd_normalize = get_option<int32_t>(options, "embd_normalize", 2);
87
+ int32_t pooling_type = get_option<int32_t>(options, "pooling_type", -1);
88
+ params.pooling_type = (enum llama_pooling_type) pooling_type;
89
+
78
90
  params.cpuparams.n_threads =
79
91
  get_option<int32_t>(options, "n_threads", cpu_get_num_math() / 2);
80
92
  params.n_gpu_layers = get_option<int32_t>(options, "n_gpu_layers", -1);
@@ -102,6 +114,44 @@ Napi::Value LlamaContext::GetSystemInfo(const Napi::CallbackInfo &info) {
102
114
  return Napi::String::New(info.Env(), _info);
103
115
  }
104
116
 
117
+ bool validateModelChatTemplate(const struct llama_model * model) {
118
+ std::vector<char> model_template(2048, 0); // longest known template is about 1200 bytes
119
+ std::string template_key = "tokenizer.chat_template";
120
+ int32_t res = llama_model_meta_val_str(model, template_key.c_str(), model_template.data(), model_template.size());
121
+ if (res >= 0) {
122
+ llama_chat_message chat[] = {{"user", "test"}};
123
+ std::string tmpl = std::string(model_template.data(), model_template.size());
124
+ int32_t chat_res = llama_chat_apply_template(model, tmpl.c_str(), chat, 1, true, nullptr, 0);
125
+ return chat_res > 0;
126
+ }
127
+ return res > 0;
128
+ }
129
+
130
+ // getModelInfo(): object
131
+ Napi::Value LlamaContext::GetModelInfo(const Napi::CallbackInfo &info) {
132
+ char desc[1024];
133
+ auto model = _sess->model();
134
+ llama_model_desc(model, desc, sizeof(desc));
135
+
136
+ int count = llama_model_meta_count(model);
137
+ Napi::Object metadata = Napi::Object::New(info.Env());
138
+ for (int i = 0; i < count; i++) {
139
+ char key[256];
140
+ llama_model_meta_key_by_index(model, i, key, sizeof(key));
141
+ char val[2048];
142
+ llama_model_meta_val_str_by_index(model, i, val, sizeof(val));
143
+
144
+ metadata.Set(key, val);
145
+ }
146
+ Napi::Object details = Napi::Object::New(info.Env());
147
+ details.Set("desc", desc);
148
+ details.Set("nParams", llama_model_n_params(model));
149
+ details.Set("size", llama_model_size(model));
150
+ details.Set("isChatTemplateSupported", validateModelChatTemplate(model));
151
+ details.Set("metadata", metadata);
152
+ return details;
153
+ }
154
+
105
155
  // getFormattedChat(messages: [{ role: string, content: string }]): string
106
156
  Napi::Value LlamaContext::GetFormattedChat(const Napi::CallbackInfo &info) {
107
157
  Napi::Env env = info.Env();
@@ -146,29 +196,34 @@ Napi::Value LlamaContext::Completion(const Napi::CallbackInfo &info) {
146
196
  .ThrowAsJavaScriptException();
147
197
  }
148
198
  params.n_predict = get_option<int32_t>(options, "n_predict", -1);
149
- params.sparams.temp = get_option<float>(options, "temperature", 0.80f);
150
- params.sparams.top_k = get_option<int32_t>(options, "top_k", 40);
151
- params.sparams.top_p = get_option<float>(options, "top_p", 0.95f);
152
- params.sparams.min_p = get_option<float>(options, "min_p", 0.05f);
153
- params.sparams.mirostat = get_option<int32_t>(options, "mirostat", 0.00f);
154
- params.sparams.mirostat_tau =
199
+ params.sampling.temp = get_option<float>(options, "temperature", 0.80f);
200
+ params.sampling.top_k = get_option<int32_t>(options, "top_k", 40);
201
+ params.sampling.top_p = get_option<float>(options, "top_p", 0.95f);
202
+ params.sampling.min_p = get_option<float>(options, "min_p", 0.05f);
203
+ params.sampling.mirostat = get_option<int32_t>(options, "mirostat", 0.00f);
204
+ params.sampling.mirostat_tau =
155
205
  get_option<float>(options, "mirostat_tau", 5.00f);
156
- params.sparams.mirostat_eta =
206
+ params.sampling.mirostat_eta =
157
207
  get_option<float>(options, "mirostat_eta", 0.10f);
158
- params.sparams.penalty_last_n =
208
+ params.sampling.penalty_last_n =
159
209
  get_option<int32_t>(options, "penalty_last_n", 64);
160
- params.sparams.penalty_repeat =
210
+ params.sampling.penalty_repeat =
161
211
  get_option<float>(options, "penalty_repeat", 1.00f);
162
- params.sparams.penalty_freq =
212
+ params.sampling.penalty_freq =
163
213
  get_option<float>(options, "penalty_freq", 0.00f);
164
- params.sparams.penalty_present =
214
+ params.sampling.penalty_present =
165
215
  get_option<float>(options, "penalty_present", 0.00f);
166
- params.sparams.penalize_nl = get_option<bool>(options, "penalize_nl", false);
167
- params.sparams.typ_p = get_option<float>(options, "typical_p", 1.00f);
168
- params.sparams.ignore_eos = get_option<float>(options, "ignore_eos", false);
169
- params.sparams.grammar = get_option<std::string>(options, "grammar", "");
216
+ params.sampling.typ_p = get_option<float>(options, "typical_p", 1.00f);
217
+ params.sampling.xtc_threshold = get_option<float>(options, "xtc_threshold", 0.00f);
218
+ params.sampling.xtc_probability = get_option<float>(options, "xtc_probability", 0.10f);
219
+ params.sampling.dry_multiplier = get_option<float>(options, "dry_multiplier", 1.75f);
220
+ params.sampling.dry_base = get_option<float>(options, "dry_base", 2);
221
+ params.sampling.dry_allowed_length = get_option<float>(options, "dry_allowed_length", -1);
222
+ params.sampling.dry_penalty_last_n = get_option<float>(options, "dry_penalty_last_n", 0);
223
+ params.sampling.ignore_eos = get_option<bool>(options, "ignore_eos", false);
224
+ params.sampling.grammar = get_option<std::string>(options, "grammar", "");
170
225
  params.n_keep = get_option<int32_t>(options, "n_keep", 0);
171
- params.sparams.seed = get_option<int32_t>(options, "seed", LLAMA_DEFAULT_SEED);
226
+ params.sampling.seed = get_option<int32_t>(options, "seed", LLAMA_DEFAULT_SEED);
172
227
  std::vector<std::string> stop_words;
173
228
  if (options.Has("stop") && options.Get("stop").IsArray()) {
174
229
  auto stop_words_array = options.Get("stop").As<Napi::Array>();
@@ -243,8 +298,16 @@ Napi::Value LlamaContext::Embedding(const Napi::CallbackInfo &info) {
243
298
  Napi::TypeError::New(env, "Context is disposed")
244
299
  .ThrowAsJavaScriptException();
245
300
  }
301
+ auto options = Napi::Object::New(env);
302
+ if (info.Length() >= 2 && info[1].IsObject()) {
303
+ options = info[1].As<Napi::Object>();
304
+ }
305
+
306
+ common_params embdParams;
307
+ embdParams.embedding = true;
308
+ embdParams.embd_normalize = get_option<int32_t>(options, "embd_normalize", 2);
246
309
  auto text = info[0].ToString().Utf8Value();
247
- auto *worker = new EmbeddingWorker(info, _sess, text);
310
+ auto *worker = new EmbeddingWorker(info, _sess, text, embdParams);
248
311
  worker->Queue();
249
312
  return worker->Promise();
250
313
  }
@@ -9,6 +9,7 @@ public:
9
9
 
10
10
  private:
11
11
  Napi::Value GetSystemInfo(const Napi::CallbackInfo &info);
12
+ Napi::Value GetModelInfo(const Napi::CallbackInfo &info);
12
13
  Napi::Value GetFormattedChat(const Napi::CallbackInfo &info);
13
14
  Napi::Value Completion(const Napi::CallbackInfo &info);
14
15
  void StopCompletion(const Napi::CallbackInfo &info);
@@ -20,6 +21,7 @@ private:
20
21
  Napi::Value Release(const Napi::CallbackInfo &info);
21
22
 
22
23
  std::string _info;
24
+ Napi::Object _meta;
23
25
  LlamaSessionPtr _sess = nullptr;
24
26
  LlamaCompletionWorker *_wip = nullptr;
25
27
  };