@fugood/llama.node 0.3.7 → 0.3.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (186) hide show
  1. package/README.md +17 -2
  2. package/bin/darwin/arm64/llama-node.node +0 -0
  3. package/bin/darwin/x64/llama-node.node +0 -0
  4. package/bin/linux/arm64/llama-node.node +0 -0
  5. package/bin/linux/x64/llama-node.node +0 -0
  6. package/bin/linux-cuda/arm64/llama-node.node +0 -0
  7. package/bin/linux-cuda/x64/llama-node.node +0 -0
  8. package/bin/linux-vulkan/arm64/llama-node.node +0 -0
  9. package/bin/linux-vulkan/x64/llama-node.node +0 -0
  10. package/bin/win32/arm64/llama-node.node +0 -0
  11. package/bin/win32/arm64/node.lib +0 -0
  12. package/bin/win32/x64/llama-node.node +0 -0
  13. package/bin/win32/x64/node.lib +0 -0
  14. package/bin/win32-vulkan/arm64/llama-node.node +0 -0
  15. package/bin/win32-vulkan/arm64/node.lib +0 -0
  16. package/bin/win32-vulkan/x64/llama-node.node +0 -0
  17. package/bin/win32-vulkan/x64/node.lib +0 -0
  18. package/lib/binding.ts +8 -0
  19. package/lib/index.js +16 -1
  20. package/lib/index.ts +16 -0
  21. package/package.json +1 -1
  22. package/src/EmbeddingWorker.cpp +4 -3
  23. package/src/LlamaCompletionWorker.cpp +4 -2
  24. package/src/LlamaContext.cpp +156 -6
  25. package/src/LlamaContext.h +5 -0
  26. package/src/common.hpp +6 -11
  27. package/src/llama.cpp/.github/workflows/build.yml +19 -17
  28. package/src/llama.cpp/.github/workflows/docker.yml +77 -30
  29. package/src/llama.cpp/.github/workflows/editorconfig.yml +3 -1
  30. package/src/llama.cpp/.github/workflows/server.yml +22 -3
  31. package/src/llama.cpp/CMakeLists.txt +49 -24
  32. package/src/llama.cpp/common/arg.cpp +82 -26
  33. package/src/llama.cpp/common/arg.h +3 -0
  34. package/src/llama.cpp/common/common.cpp +192 -72
  35. package/src/llama.cpp/common/common.h +51 -18
  36. package/src/llama.cpp/common/ngram-cache.cpp +12 -12
  37. package/src/llama.cpp/common/ngram-cache.h +2 -2
  38. package/src/llama.cpp/common/sampling.cpp +11 -6
  39. package/src/llama.cpp/common/speculative.cpp +18 -15
  40. package/src/llama.cpp/docs/build.md +2 -0
  41. package/src/llama.cpp/examples/batched/batched.cpp +9 -7
  42. package/src/llama.cpp/examples/batched-bench/batched-bench.cpp +3 -3
  43. package/src/llama.cpp/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp +10 -8
  44. package/src/llama.cpp/examples/cvector-generator/cvector-generator.cpp +11 -8
  45. package/src/llama.cpp/examples/cvector-generator/mean.hpp +1 -1
  46. package/src/llama.cpp/examples/cvector-generator/pca.hpp +1 -1
  47. package/src/llama.cpp/examples/embedding/embedding.cpp +8 -7
  48. package/src/llama.cpp/examples/eval-callback/eval-callback.cpp +7 -6
  49. package/src/llama.cpp/examples/export-lora/export-lora.cpp +8 -7
  50. package/src/llama.cpp/examples/gguf/gguf.cpp +10 -6
  51. package/src/llama.cpp/examples/gguf-hash/gguf-hash.cpp +1 -0
  52. package/src/llama.cpp/examples/gguf-split/gguf-split.cpp +8 -7
  53. package/src/llama.cpp/examples/gritlm/gritlm.cpp +13 -10
  54. package/src/llama.cpp/examples/imatrix/imatrix.cpp +13 -12
  55. package/src/llama.cpp/examples/infill/infill.cpp +23 -24
  56. package/src/llama.cpp/examples/llama-bench/llama-bench.cpp +44 -13
  57. package/src/llama.cpp/examples/llama.android/llama/src/main/cpp/llama-android.cpp +11 -6
  58. package/src/llama.cpp/examples/llava/clip.cpp +4 -2
  59. package/src/llama.cpp/examples/llava/llava-cli.cpp +9 -6
  60. package/src/llama.cpp/examples/llava/llava.cpp +2 -2
  61. package/src/llama.cpp/examples/llava/minicpmv-cli.cpp +8 -4
  62. package/src/llama.cpp/examples/llava/qwen2vl-cli.cpp +11 -8
  63. package/src/llama.cpp/examples/lookahead/lookahead.cpp +6 -7
  64. package/src/llama.cpp/examples/lookup/lookup-create.cpp +4 -9
  65. package/src/llama.cpp/examples/lookup/lookup-stats.cpp +3 -7
  66. package/src/llama.cpp/examples/lookup/lookup.cpp +5 -6
  67. package/src/llama.cpp/examples/main/main.cpp +51 -29
  68. package/src/llama.cpp/examples/parallel/parallel.cpp +5 -6
  69. package/src/llama.cpp/examples/passkey/passkey.cpp +7 -5
  70. package/src/llama.cpp/examples/perplexity/perplexity.cpp +37 -23
  71. package/src/llama.cpp/examples/quantize-stats/quantize-stats.cpp +12 -14
  72. package/src/llama.cpp/examples/retrieval/retrieval.cpp +8 -8
  73. package/src/llama.cpp/examples/rpc/rpc-server.cpp +12 -0
  74. package/src/llama.cpp/examples/run/CMakeLists.txt +1 -1
  75. package/src/llama.cpp/examples/run/linenoise.cpp/linenoise.cpp +1351 -0
  76. package/src/llama.cpp/examples/run/linenoise.cpp/linenoise.h +114 -0
  77. package/src/llama.cpp/examples/run/run.cpp +175 -61
  78. package/src/llama.cpp/examples/save-load-state/save-load-state.cpp +4 -25
  79. package/src/llama.cpp/examples/server/CMakeLists.txt +1 -0
  80. package/src/llama.cpp/examples/server/httplib.h +1295 -409
  81. package/src/llama.cpp/examples/server/server.cpp +387 -181
  82. package/src/llama.cpp/examples/server/tests/requirements.txt +1 -0
  83. package/src/llama.cpp/examples/server/utils.hpp +170 -58
  84. package/src/llama.cpp/examples/simple/simple.cpp +9 -8
  85. package/src/llama.cpp/examples/simple-chat/simple-chat.cpp +16 -12
  86. package/src/llama.cpp/examples/speculative/speculative.cpp +22 -23
  87. package/src/llama.cpp/examples/speculative-simple/speculative-simple.cpp +8 -12
  88. package/src/llama.cpp/examples/tokenize/tokenize.cpp +17 -5
  89. package/src/llama.cpp/examples/tts/tts.cpp +64 -23
  90. package/src/llama.cpp/ggml/CMakeLists.txt +5 -21
  91. package/src/llama.cpp/ggml/include/ggml-backend.h +2 -0
  92. package/src/llama.cpp/ggml/include/ggml-cpp.h +1 -0
  93. package/src/llama.cpp/ggml/include/ggml.h +36 -145
  94. package/src/llama.cpp/ggml/include/gguf.h +202 -0
  95. package/src/llama.cpp/ggml/src/CMakeLists.txt +6 -3
  96. package/src/llama.cpp/ggml/src/ggml-alloc.c +5 -0
  97. package/src/llama.cpp/ggml/src/ggml-backend-impl.h +0 -1
  98. package/src/llama.cpp/ggml/src/ggml-backend-reg.cpp +79 -49
  99. package/src/llama.cpp/ggml/src/ggml-backend.cpp +5 -2
  100. package/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +33 -23
  101. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp +57 -72
  102. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.c +87 -2
  103. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +335 -66
  104. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +10 -2
  105. package/src/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm.cpp +1090 -378
  106. package/src/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm.h +2 -2
  107. package/src/llama.cpp/ggml/src/ggml-cuda/vendors/cuda.h +1 -0
  108. package/src/llama.cpp/ggml/src/ggml-cuda/vendors/hip.h +3 -0
  109. package/src/llama.cpp/ggml/src/ggml-cuda/vendors/musa.h +3 -0
  110. package/src/llama.cpp/ggml/src/ggml-hip/CMakeLists.txt +3 -1
  111. package/src/llama.cpp/ggml/src/ggml-impl.h +11 -16
  112. package/src/llama.cpp/ggml/src/ggml-metal/CMakeLists.txt +16 -0
  113. package/src/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +6 -6
  114. package/src/llama.cpp/ggml/src/ggml-rpc/ggml-rpc.cpp +154 -35
  115. package/src/llama.cpp/ggml/src/ggml-sycl/backend.hpp +1 -0
  116. package/src/llama.cpp/ggml/src/ggml-sycl/common.cpp +9 -3
  117. package/src/llama.cpp/ggml/src/ggml-sycl/common.hpp +18 -0
  118. package/src/llama.cpp/ggml/src/ggml-sycl/concat.cpp +3 -2
  119. package/src/llama.cpp/ggml/src/ggml-sycl/concat.hpp +1 -2
  120. package/src/llama.cpp/ggml/src/ggml-sycl/conv.cpp +3 -2
  121. package/src/llama.cpp/ggml/src/ggml-sycl/conv.hpp +1 -2
  122. package/src/llama.cpp/ggml/src/ggml-sycl/dpct/helper.hpp +40 -95
  123. package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.cpp +48 -48
  124. package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.hpp +24 -24
  125. package/src/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +238 -164
  126. package/src/llama.cpp/ggml/src/ggml-sycl/gla.cpp +105 -0
  127. package/src/llama.cpp/ggml/src/ggml-sycl/gla.hpp +8 -0
  128. package/src/llama.cpp/ggml/src/ggml-sycl/outprod.cpp +3 -3
  129. package/src/llama.cpp/ggml/src/ggml-sycl/outprod.hpp +1 -2
  130. package/src/llama.cpp/ggml/src/ggml-sycl/tsembd.cpp +3 -2
  131. package/src/llama.cpp/ggml/src/ggml-sycl/tsembd.hpp +1 -2
  132. package/src/llama.cpp/ggml/src/ggml-sycl/wkv6.cpp +7 -5
  133. package/src/llama.cpp/ggml/src/ggml-sycl/wkv6.hpp +1 -2
  134. package/src/llama.cpp/ggml/src/ggml-vulkan/CMakeLists.txt +74 -4
  135. package/src/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +314 -116
  136. package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/CMakeLists.txt +4 -2
  137. package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +9 -3
  138. package/src/llama.cpp/ggml/src/ggml.c +117 -1327
  139. package/src/llama.cpp/ggml/src/gguf.cpp +1329 -0
  140. package/src/llama.cpp/include/llama-cpp.h +6 -1
  141. package/src/llama.cpp/include/llama.h +138 -75
  142. package/src/llama.cpp/src/CMakeLists.txt +13 -1
  143. package/src/llama.cpp/src/llama-adapter.cpp +347 -0
  144. package/src/llama.cpp/src/llama-adapter.h +74 -0
  145. package/src/llama.cpp/src/llama-arch.cpp +1487 -0
  146. package/src/llama.cpp/src/llama-arch.h +400 -0
  147. package/src/llama.cpp/src/llama-batch.cpp +368 -0
  148. package/src/llama.cpp/src/llama-batch.h +88 -0
  149. package/src/llama.cpp/src/llama-chat.cpp +578 -0
  150. package/src/llama.cpp/src/llama-chat.h +52 -0
  151. package/src/llama.cpp/src/llama-context.cpp +1775 -0
  152. package/src/llama.cpp/src/llama-context.h +128 -0
  153. package/src/llama.cpp/src/llama-cparams.cpp +1 -0
  154. package/src/llama.cpp/src/llama-cparams.h +37 -0
  155. package/src/llama.cpp/src/llama-grammar.cpp +5 -4
  156. package/src/llama.cpp/src/llama-grammar.h +3 -1
  157. package/src/llama.cpp/src/llama-hparams.cpp +71 -0
  158. package/src/llama.cpp/src/llama-hparams.h +139 -0
  159. package/src/llama.cpp/src/llama-impl.cpp +167 -0
  160. package/src/llama.cpp/src/llama-impl.h +16 -136
  161. package/src/llama.cpp/src/llama-kv-cache.cpp +718 -0
  162. package/src/llama.cpp/src/llama-kv-cache.h +218 -0
  163. package/src/llama.cpp/src/llama-mmap.cpp +589 -0
  164. package/src/llama.cpp/src/llama-mmap.h +67 -0
  165. package/src/llama.cpp/src/llama-model-loader.cpp +1124 -0
  166. package/src/llama.cpp/src/llama-model-loader.h +167 -0
  167. package/src/llama.cpp/src/llama-model.cpp +3953 -0
  168. package/src/llama.cpp/src/llama-model.h +370 -0
  169. package/src/llama.cpp/src/llama-quant.cpp +934 -0
  170. package/src/llama.cpp/src/llama-quant.h +1 -0
  171. package/src/llama.cpp/src/llama-sampling.cpp +147 -32
  172. package/src/llama.cpp/src/llama-sampling.h +3 -19
  173. package/src/llama.cpp/src/llama-vocab.cpp +1832 -575
  174. package/src/llama.cpp/src/llama-vocab.h +97 -142
  175. package/src/llama.cpp/src/llama.cpp +7160 -20314
  176. package/src/llama.cpp/src/unicode.cpp +8 -3
  177. package/src/llama.cpp/tests/CMakeLists.txt +2 -0
  178. package/src/llama.cpp/tests/test-autorelease.cpp +3 -3
  179. package/src/llama.cpp/tests/test-backend-ops.cpp +370 -59
  180. package/src/llama.cpp/tests/test-chat-template.cpp +162 -125
  181. package/src/llama.cpp/tests/test-gguf.cpp +222 -187
  182. package/src/llama.cpp/tests/test-model-load-cancel.cpp +1 -1
  183. package/src/llama.cpp/tests/test-sampling.cpp +0 -1
  184. package/src/llama.cpp/tests/test-tokenizer-0.cpp +4 -4
  185. package/src/llama.cpp/tests/test-tokenizer-1-bpe.cpp +9 -7
  186. package/src/llama.cpp/tests/test-tokenizer-1-spm.cpp +8 -6
package/README.md CHANGED
@@ -4,9 +4,23 @@
4
4
  [![NPM Version](https://img.shields.io/npm/v/%40fugood%2Fllama.node)](https://www.npmjs.com/package/@fugood/llama.node)
5
5
  ![NPM Downloads](https://img.shields.io/npm/dw/%40fugood%2Fllama.node)
6
6
 
7
- Node binding of [llama.cpp](https://github.com/ggerganov/llama.cpp).
7
+ An another Node binding of [llama.cpp](https://github.com/ggerganov/llama.cpp) to make same API with [llama.rn](https://github.com/mybigday/llama.rn) as much as possible.
8
8
 
9
- [llama.cpp](https://github.com/ggerganov/llama.cpp): Inference of [LLaMA](https://arxiv.org/abs/2302.13971) model in pure C/C++
9
+ - [llama.cpp](https://github.com/ggerganov/llama.cpp): Inference of [LLaMA](https://arxiv.org/abs/2302.13971) model in pure C/C++
10
+ - [llama.rn](https://github.com/mybigday/llama.rn): React Native binding of llama.cpp
11
+
12
+ ## Platform Support
13
+
14
+ - macOS
15
+ - arm64: CPU and Metal GPU acceleration
16
+ - x86_64: CPU only
17
+ - Windows (x86_64 and arm64)
18
+ - CPU
19
+ - GPU acceleration via Vulkan
20
+ - Linux (x86_64 and arm64)
21
+ - CPU
22
+ - GPU acceleration via Vulkan
23
+ - GPU acceleration via CUDA
10
24
 
11
25
  ## Installation
12
26
 
@@ -49,6 +63,7 @@ console.log('Result:', text)
49
63
 
50
64
  - [x] `default`: General usage, not support GPU except macOS (Metal)
51
65
  - [x] `vulkan`: Support GPU Vulkan (Windows/Linux), but some scenario might unstable
66
+ - [x] `cuda`: Support GPU CUDA (Linux), but only for limited capability (x86_64: 8.9, arm64: 8.7)
52
67
 
53
68
  ## License
54
69
 
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
package/lib/binding.ts CHANGED
@@ -37,6 +37,9 @@ export type LlamaModelOptions = {
37
37
  use_mlock?: boolean
38
38
  use_mmap?: boolean
39
39
  vocab_only?: boolean
40
+ lora?: string
41
+ lora_scaled?: number
42
+ lora_list?: { path: string; scaled: number }[]
40
43
  }
41
44
 
42
45
  export type LlamaCompletionOptions = {
@@ -111,6 +114,11 @@ export interface LlamaContext {
111
114
  saveSession(path: string): Promise<void>
112
115
  loadSession(path: string): Promise<void>
113
116
  release(): Promise<void>
117
+ applyLoraAdapters(adapters: { path: string; scaled: number }[]): void
118
+ removeLoraAdapters(adapters: { path: string }[]): void
119
+ getLoadedLoraAdapters(): { path: string; scaled: number }[]
120
+ // static
121
+ loadModelInfo(path: string, skip: string[]): Promise<Object>
114
122
  }
115
123
 
116
124
  export interface Module {
package/lib/index.js CHANGED
@@ -23,7 +23,7 @@ var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, ge
23
23
  });
24
24
  };
25
25
  Object.defineProperty(exports, "__esModule", { value: true });
26
- exports.loadModel = void 0;
26
+ exports.loadLlamaModelInfo = exports.initLlama = exports.loadModel = void 0;
27
27
  const binding_1 = require("./binding");
28
28
  __exportStar(require("./binding"), exports);
29
29
  const mods = {};
@@ -34,3 +34,18 @@ const loadModel = (options) => __awaiter(void 0, void 0, void 0, function* () {
34
34
  return new mods[variant].LlamaContext(options);
35
35
  });
36
36
  exports.loadModel = loadModel;
37
+ exports.initLlama = binding_1.loadModule;
38
+ const modelInfoSkip = [
39
+ // Large fields
40
+ 'tokenizer.ggml.tokens',
41
+ 'tokenizer.ggml.token_type',
42
+ 'tokenizer.ggml.merges',
43
+ 'tokenizer.ggml.scores',
44
+ ];
45
+ const loadLlamaModelInfo = (path) => __awaiter(void 0, void 0, void 0, function* () {
46
+ var _a;
47
+ const variant = 'default';
48
+ (_a = mods[variant]) !== null && _a !== void 0 ? _a : (mods[variant] = yield (0, binding_1.loadModule)(variant));
49
+ return mods[variant].LlamaContext.loadModelInfo(path, modelInfoSkip);
50
+ });
51
+ exports.loadLlamaModelInfo = loadLlamaModelInfo;
package/lib/index.ts CHANGED
@@ -14,3 +14,19 @@ export const loadModel = async (options: LlamaModelOptionsExtended): Promise<Lla
14
14
  mods[variant] ??= await loadModule(options.lib_variant)
15
15
  return new mods[variant].LlamaContext(options)
16
16
  }
17
+
18
+ export const initLlama = loadModule
19
+
20
+ const modelInfoSkip = [
21
+ // Large fields
22
+ 'tokenizer.ggml.tokens',
23
+ 'tokenizer.ggml.token_type',
24
+ 'tokenizer.ggml.merges',
25
+ 'tokenizer.ggml.scores',
26
+ ]
27
+
28
+ export const loadLlamaModelInfo = async (path: string): Promise<Object> => {
29
+ const variant = 'default'
30
+ mods[variant] ??= await loadModule(variant)
31
+ return mods[variant].LlamaContext.loadModelInfo(path, modelInfoSkip)
32
+ }
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@fugood/llama.node",
3
3
  "access": "public",
4
- "version": "0.3.7",
4
+ "version": "0.3.9",
5
5
  "description": "Llama.cpp for Node.js",
6
6
  "main": "lib/index.js",
7
7
  "scripts": {
@@ -9,10 +9,11 @@ void EmbeddingWorker::Execute() {
9
9
  llama_kv_cache_clear(_sess->context());
10
10
  auto tokens = ::common_tokenize(_sess->context(), _text, true);
11
11
  // add SEP if not present
12
- if (tokens.empty() || tokens.back() != llama_token_sep(_sess->model())) {
13
- tokens.push_back(llama_token_sep(_sess->model()));
12
+ auto vocab = llama_model_get_vocab(_sess->model());
13
+ if (tokens.empty() || tokens.back() != llama_vocab_sep(vocab)) {
14
+ tokens.push_back(llama_vocab_sep(vocab));
14
15
  }
15
- const int n_embd = llama_n_embd(_sess->model());
16
+ const int n_embd = llama_model_n_embd(_sess->model());
16
17
  do {
17
18
  auto ctx = _sess->context();
18
19
  int ret =
@@ -59,7 +59,9 @@ void LlamaCompletionWorker::Execute() {
59
59
  size_t n_cur = 0;
60
60
  size_t n_input = 0;
61
61
  const auto model = _sess->model();
62
- const bool add_bos = llama_add_bos_token(model);
62
+ auto vocab = llama_model_get_vocab(model);
63
+
64
+ const bool add_bos = llama_vocab_get_add_bos(vocab);
63
65
  auto ctx = _sess->context();
64
66
 
65
67
  auto sparams = llama_sampler_chain_default_params();
@@ -130,7 +132,7 @@ void LlamaCompletionWorker::Execute() {
130
132
  });
131
133
  }
132
134
  // is it an end of generation?
133
- if (llama_token_is_eog(model, new_token_id)) {
135
+ if (llama_vocab_is_eog(vocab, new_token_id)) {
134
136
  break;
135
137
  }
136
138
  // check for stop words
@@ -1,4 +1,6 @@
1
1
  #include "ggml.h"
2
+ #include "gguf.h"
3
+ #include "llama-impl.h"
2
4
  #include "LlamaContext.h"
3
5
  #include "DetokenizeWorker.h"
4
6
  #include "DisposeWorker.h"
@@ -8,6 +10,56 @@
8
10
  #include "SaveSessionWorker.h"
9
11
  #include "TokenizeWorker.h"
10
12
 
13
+ // loadModelInfo(path: string): object
14
+ Napi::Value LlamaContext::ModelInfo(const Napi::CallbackInfo& info) {
15
+ Napi::Env env = info.Env();
16
+ struct gguf_init_params params = {
17
+ /*.no_alloc = */ false,
18
+ /*.ctx = */ NULL,
19
+ };
20
+ std::string path = info[0].ToString().Utf8Value();
21
+
22
+ // Convert Napi::Array to vector<string>
23
+ std::vector<std::string> skip;
24
+ if (info.Length() > 1 && info[1].IsArray()) {
25
+ Napi::Array skipArray = info[1].As<Napi::Array>();
26
+ for (uint32_t i = 0; i < skipArray.Length(); i++) {
27
+ skip.push_back(skipArray.Get(i).ToString().Utf8Value());
28
+ }
29
+ }
30
+
31
+ struct gguf_context * ctx = gguf_init_from_file(path.c_str(), params);
32
+
33
+ Napi::Object metadata = Napi::Object::New(env);
34
+ if (std::find(skip.begin(), skip.end(), "version") == skip.end()) {
35
+ metadata.Set("version", Napi::Number::New(env, gguf_get_version(ctx)));
36
+ }
37
+ if (std::find(skip.begin(), skip.end(), "alignment") == skip.end()) {
38
+ metadata.Set("alignment", Napi::Number::New(env, gguf_get_alignment(ctx)));
39
+ }
40
+ if (std::find(skip.begin(), skip.end(), "data_offset") == skip.end()) {
41
+ metadata.Set("data_offset", Napi::Number::New(env, gguf_get_data_offset(ctx)));
42
+ }
43
+
44
+ // kv
45
+ {
46
+ const int n_kv = gguf_get_n_kv(ctx);
47
+
48
+ for (int i = 0; i < n_kv; ++i) {
49
+ const char * key = gguf_get_key(ctx, i);
50
+ if (std::find(skip.begin(), skip.end(), key) != skip.end()) {
51
+ continue;
52
+ }
53
+ const std::string value = gguf_kv_to_str(ctx, i);
54
+ metadata.Set(key, Napi::String::New(env, value.c_str()));
55
+ }
56
+ }
57
+
58
+ gguf_free(ctx);
59
+
60
+ return metadata;
61
+ }
62
+
11
63
  std::vector<common_chat_msg> get_messages(Napi::Array messages) {
12
64
  std::vector<common_chat_msg> chat;
13
65
  for (size_t i = 0; i < messages.Length(); i++) {
@@ -51,8 +103,20 @@ void LlamaContext::Init(Napi::Env env, Napi::Object &exports) {
51
103
  InstanceMethod<&LlamaContext::LoadSession>(
52
104
  "loadSession",
53
105
  static_cast<napi_property_attributes>(napi_enumerable)),
106
+ InstanceMethod<&LlamaContext::ApplyLoraAdapters>(
107
+ "applyLoraAdapters",
108
+ static_cast<napi_property_attributes>(napi_enumerable)),
109
+ InstanceMethod<&LlamaContext::RemoveLoraAdapters>(
110
+ "removeLoraAdapters",
111
+ static_cast<napi_property_attributes>(napi_enumerable)),
112
+ InstanceMethod<&LlamaContext::GetLoadedLoraAdapters>(
113
+ "getLoadedLoraAdapters",
114
+ static_cast<napi_property_attributes>(napi_enumerable)),
54
115
  InstanceMethod<&LlamaContext::Release>(
55
- "release", static_cast<napi_property_attributes>(napi_enumerable))});
116
+ "release", static_cast<napi_property_attributes>(napi_enumerable)),
117
+ StaticMethod<&LlamaContext::ModelInfo>(
118
+ "loadModelInfo",
119
+ static_cast<napi_property_attributes>(napi_enumerable))});
56
120
  Napi::FunctionReference *constructor = new Napi::FunctionReference();
57
121
  *constructor = Napi::Persistent(func);
58
122
  #if NAPI_VERSION > 5
@@ -140,14 +204,56 @@ LlamaContext::LlamaContext(const Napi::CallbackInfo &info)
140
204
  llama_backend_init();
141
205
  llama_numa_init(params.numa);
142
206
 
143
- auto result = common_init_from_params(params);
207
+ auto sess = std::make_shared<LlamaSession>(params);
144
208
 
145
- if (result.model == nullptr || result.context == nullptr) {
209
+ if (sess->model() == nullptr || sess->context() == nullptr) {
146
210
  Napi::TypeError::New(env, "Failed to load model")
147
211
  .ThrowAsJavaScriptException();
148
212
  }
149
213
 
150
- _sess = std::make_shared<LlamaSession>(result.model, result.context, params);
214
+ auto ctx = sess->context();
215
+ auto model = sess->model();
216
+
217
+ std::vector<common_adapter_lora_info> lora;
218
+ auto lora_path = get_option<std::string>(options, "lora", "");
219
+ auto lora_scaled = get_option<float>(options, "lora_scaled", 1.0f);
220
+ if (lora_path != "") {
221
+ common_adapter_lora_info la;
222
+ la.path = lora_path;
223
+ la.scale = lora_scaled;
224
+ la.ptr = llama_adapter_lora_init(model, lora_path.c_str());
225
+ if (la.ptr == nullptr) {
226
+ Napi::TypeError::New(env, "Failed to load lora adapter")
227
+ .ThrowAsJavaScriptException();
228
+ }
229
+ lora.push_back(la);
230
+ }
231
+
232
+ if (options.Has("lora_list") && options.Get("lora_list").IsArray()) {
233
+ auto lora_list = options.Get("lora_list").As<Napi::Array>();
234
+ if (lora_list != nullptr) {
235
+ int lora_list_size = lora_list.Length();
236
+ for (int i = 0; i < lora_list_size; i++) {
237
+ auto lora_adapter = lora_list.Get(i).As<Napi::Object>();
238
+ auto path = lora_adapter.Get("path").ToString();
239
+ if (path != nullptr) {
240
+ common_adapter_lora_info la;
241
+ la.path = path;
242
+ la.scale = lora_adapter.Get("scaled").ToNumber().FloatValue();
243
+ la.ptr = llama_adapter_lora_init(model, path.Utf8Value().c_str());
244
+ if (la.ptr == nullptr) {
245
+ Napi::TypeError::New(env, "Failed to load lora adapter")
246
+ .ThrowAsJavaScriptException();
247
+ }
248
+ lora.push_back(la);
249
+ }
250
+ }
251
+ }
252
+ }
253
+ common_set_adapter_lora(ctx, lora);
254
+ _lora = lora;
255
+
256
+ _sess = sess;
151
257
  _info = common_params_get_system_info(params);
152
258
  }
153
259
 
@@ -162,8 +268,8 @@ bool validateModelChatTemplate(const struct llama_model * model) {
162
268
  int32_t res = llama_model_meta_val_str(model, template_key.c_str(), model_template.data(), model_template.size());
163
269
  if (res >= 0) {
164
270
  llama_chat_message chat[] = {{"user", "test"}};
165
- std::string tmpl = std::string(model_template.data(), model_template.size());
166
- int32_t chat_res = llama_chat_apply_template(model, tmpl.c_str(), chat, 1, true, nullptr, 0);
271
+ const char * tmpl = llama_model_chat_template(model);
272
+ int32_t chat_res = llama_chat_apply_template(tmpl, chat, 1, true, nullptr, 0);
167
273
  return chat_res > 0;
168
274
  }
169
275
  return res > 0;
@@ -187,6 +293,7 @@ Napi::Value LlamaContext::GetModelInfo(const Napi::CallbackInfo &info) {
187
293
  }
188
294
  Napi::Object details = Napi::Object::New(info.Env());
189
295
  details.Set("desc", desc);
296
+ details.Set("nEmbd", llama_model_n_embd(model));
190
297
  details.Set("nParams", llama_model_n_params(model));
191
298
  details.Set("size", llama_model_size(model));
192
299
  details.Set("isChatTemplateSupported", validateModelChatTemplate(model));
@@ -396,6 +503,49 @@ Napi::Value LlamaContext::LoadSession(const Napi::CallbackInfo &info) {
396
503
  return worker->Promise();
397
504
  }
398
505
 
506
+ // applyLoraAdapters(lora_adapters: [{ path: string, scaled: number }]): void
507
+ void LlamaContext::ApplyLoraAdapters(const Napi::CallbackInfo &info) {
508
+ Napi::Env env = info.Env();
509
+ std::vector<common_adapter_lora_info> lora;
510
+ auto lora_adapters = info[0].As<Napi::Array>();
511
+ for (size_t i = 0; i < lora_adapters.Length(); i++) {
512
+ auto lora_adapter = lora_adapters.Get(i).As<Napi::Object>();
513
+ auto path = lora_adapter.Get("path").ToString().Utf8Value();
514
+ auto scaled = lora_adapter.Get("scaled").ToNumber().FloatValue();
515
+ common_adapter_lora_info la;
516
+ la.path = path;
517
+ la.scale = scaled;
518
+ la.ptr = llama_adapter_lora_init(_sess->model(), path.c_str());
519
+ if (la.ptr == nullptr) {
520
+ Napi::TypeError::New(env, "Failed to load lora adapter")
521
+ .ThrowAsJavaScriptException();
522
+ }
523
+ lora.push_back(la);
524
+ }
525
+ common_set_adapter_lora(_sess->context(), lora);
526
+ _lora = lora;
527
+ }
528
+
529
+ // removeLoraAdapters(): void
530
+ void LlamaContext::RemoveLoraAdapters(const Napi::CallbackInfo &info) {
531
+ _lora.clear();
532
+ common_set_adapter_lora(_sess->context(), _lora);
533
+ }
534
+
535
+ // getLoadedLoraAdapters(): Promise<{ count, lora_adapters: [{ path: string,
536
+ // scaled: number }] }>
537
+ Napi::Value LlamaContext::GetLoadedLoraAdapters(const Napi::CallbackInfo &info) {
538
+ Napi::Env env = info.Env();
539
+ Napi::Array lora_adapters = Napi::Array::New(env, _lora.size());
540
+ for (size_t i = 0; i < _lora.size(); i++) {
541
+ Napi::Object lora_adapter = Napi::Object::New(env);
542
+ lora_adapter.Set("path", _lora[i].path);
543
+ lora_adapter.Set("scaled", _lora[i].scale);
544
+ lora_adapters.Set(i, lora_adapter);
545
+ }
546
+ return lora_adapters;
547
+ }
548
+
399
549
  // release(): Promise<void>
400
550
  Napi::Value LlamaContext::Release(const Napi::CallbackInfo &info) {
401
551
  auto env = info.Env();
@@ -5,6 +5,7 @@ class LlamaCompletionWorker;
5
5
  class LlamaContext : public Napi::ObjectWrap<LlamaContext> {
6
6
  public:
7
7
  LlamaContext(const Napi::CallbackInfo &info);
8
+ static Napi::Value ModelInfo(const Napi::CallbackInfo& info);
8
9
  static void Init(Napi::Env env, Napi::Object &exports);
9
10
 
10
11
  private:
@@ -18,10 +19,14 @@ private:
18
19
  Napi::Value Embedding(const Napi::CallbackInfo &info);
19
20
  Napi::Value SaveSession(const Napi::CallbackInfo &info);
20
21
  Napi::Value LoadSession(const Napi::CallbackInfo &info);
22
+ void ApplyLoraAdapters(const Napi::CallbackInfo &info);
23
+ void RemoveLoraAdapters(const Napi::CallbackInfo &info);
24
+ Napi::Value GetLoadedLoraAdapters(const Napi::CallbackInfo &info);
21
25
  Napi::Value Release(const Napi::CallbackInfo &info);
22
26
 
23
27
  std::string _info;
24
28
  Napi::Object _meta;
25
29
  LlamaSessionPtr _sess = nullptr;
30
+ std::vector<common_adapter_lora_info> _lora;
26
31
  LlamaCompletionWorker *_wip = nullptr;
27
32
  };
package/src/common.hpp CHANGED
@@ -11,8 +11,6 @@
11
11
  #include <tuple>
12
12
  #include <vector>
13
13
 
14
- typedef std::unique_ptr<llama_model, decltype(&llama_free_model)> LlamaCppModel;
15
- typedef std::unique_ptr<llama_context, decltype(&llama_free)> LlamaCppContext;
16
14
  typedef std::unique_ptr<common_sampler, decltype(&common_sampler_free)>
17
15
  LlamaCppSampling;
18
16
  typedef std::unique_ptr<llama_batch, decltype(&llama_batch_free)> LlamaCppBatch;
@@ -47,17 +45,17 @@ constexpr T get_option(const Napi::Object &options, const std::string &name,
47
45
 
48
46
  class LlamaSession {
49
47
  public:
50
- LlamaSession(llama_model *model, llama_context *ctx, common_params params)
51
- : model_(LlamaCppModel(model, llama_free_model)),
52
- ctx_(LlamaCppContext(ctx, llama_free)), params_(params) {
48
+ LlamaSession(common_params params)
49
+ : params_(params) {
50
+ llama_init_ = common_init_from_params(params);
53
51
  tokens_.reserve(params.n_ctx);
54
52
  }
55
53
 
56
54
  ~LlamaSession() { dispose(); }
57
55
 
58
- inline llama_context *context() { return ctx_.get(); }
56
+ inline llama_context *context() { return llama_init_.context.get(); }
59
57
 
60
- inline llama_model *model() { return model_.get(); }
58
+ inline llama_model *model() { return llama_init_.model.get(); }
61
59
 
62
60
  inline std::vector<llama_token> *tokens_ptr() { return &tokens_; }
63
61
 
@@ -72,13 +70,10 @@ public:
72
70
  void dispose() {
73
71
  std::lock_guard<std::mutex> lock(mutex);
74
72
  tokens_.clear();
75
- ctx_.reset();
76
- model_.reset();
77
73
  }
78
74
 
79
75
  private:
80
- LlamaCppModel model_;
81
- LlamaCppContext ctx_;
76
+ common_init_result llama_init_;
82
77
  const common_params params_;
83
78
  std::vector<llama_token> tokens_{};
84
79
  std::mutex mutex;
@@ -60,8 +60,7 @@ jobs:
60
60
  -DLLAMA_CURL=ON \
61
61
  -DGGML_METAL_USE_BF16=ON \
62
62
  -DGGML_METAL_EMBED_LIBRARY=ON \
63
- -DGGML_RPC=ON \
64
- -DBUILD_SHARED_LIBS=OFF
63
+ -DGGML_RPC=ON
65
64
  cmake --build . --config Release -j $(sysctl -n hw.logicalcpu)
66
65
 
67
66
  - name: Test
@@ -88,6 +87,7 @@ jobs:
88
87
  if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
89
88
  run: |
90
89
  cp LICENSE ./build/bin/
90
+ cp examples/run/linenoise.cpp/LICENSE ./build/bin/LICENSE.linenoise.cpp
91
91
  zip -r llama-${{ steps.tag.outputs.name }}-bin-macos-arm64.zip ./build/bin/*
92
92
 
93
93
  - name: Upload artifacts
@@ -123,8 +123,7 @@ jobs:
123
123
  -DLLAMA_FATAL_WARNINGS=ON \
124
124
  -DLLAMA_CURL=ON \
125
125
  -DGGML_METAL=OFF \
126
- -DGGML_RPC=ON \
127
- -DBUILD_SHARED_LIBS=OFF
126
+ -DGGML_RPC=ON
128
127
  cmake --build build --config Release -j $(sysctl -n hw.logicalcpu)
129
128
 
130
129
  - name: Test
@@ -151,6 +150,7 @@ jobs:
151
150
  if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
152
151
  run: |
153
152
  cp LICENSE ./build/bin/
153
+ cp examples/run/linenoise.cpp/LICENSE ./build/bin/LICENSE.linenoise.cpp
154
154
  zip -r llama-${{ steps.tag.outputs.name }}-bin-macos-x64.zip ./build/bin/*
155
155
 
156
156
  - name: Upload artifacts
@@ -181,7 +181,7 @@ jobs:
181
181
  run: |
182
182
  mkdir build
183
183
  cd build
184
- cmake .. -DLLAMA_FATAL_WARNINGS=ON -DLLAMA_CURL=ON -DGGML_RPC=ON -DBUILD_SHARED_LIBS=OFF
184
+ cmake .. -DLLAMA_FATAL_WARNINGS=ON -DLLAMA_CURL=ON -DGGML_RPC=ON
185
185
  cmake --build . --config Release -j $(nproc)
186
186
 
187
187
  - name: Test
@@ -219,6 +219,7 @@ jobs:
219
219
  if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
220
220
  run: |
221
221
  cp LICENSE ./build/bin/
222
+ cp examples/run/linenoise.cpp/LICENSE ./build/bin/LICENSE.linenoise.cpp
222
223
  zip -r llama-${{ steps.tag.outputs.name }}-bin-ubuntu-x64.zip ./build/bin/*
223
224
 
224
225
  - name: Upload artifacts
@@ -236,7 +237,7 @@ jobs:
236
237
  strategy:
237
238
  matrix:
238
239
  sanitizer: [ADDRESS, THREAD, UNDEFINED]
239
- build_type: [Debug, Release]
240
+ build_type: [Debug]
240
241
 
241
242
  steps:
242
243
  - name: Clone
@@ -651,23 +652,23 @@ jobs:
651
652
  matrix:
652
653
  include:
653
654
  - build: 'noavx-x64'
654
- defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_AVX=OFF -DGGML_AVX2=OFF -DGGML_FMA=OFF -DBUILD_SHARED_LIBS=ON'
655
+ defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_AVX=OFF -DGGML_AVX2=OFF -DGGML_FMA=OFF'
655
656
  - build: 'avx2-x64'
656
- defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DBUILD_SHARED_LIBS=ON'
657
+ defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON'
657
658
  - build: 'avx-x64'
658
- defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_AVX2=OFF -DBUILD_SHARED_LIBS=ON'
659
+ defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_AVX2=OFF'
659
660
  - build: 'avx512-x64'
660
- defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_AVX512=ON -DBUILD_SHARED_LIBS=ON'
661
+ defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_AVX512=ON'
661
662
  - build: 'openblas-x64'
662
- defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_BLAS=ON -DBUILD_SHARED_LIBS=ON -DGGML_BLAS_VENDOR=OpenBLAS -DBLAS_INCLUDE_DIRS="$env:RUNNER_TEMP/openblas/include" -DBLAS_LIBRARIES="$env:RUNNER_TEMP/openblas/lib/openblas.lib"'
663
+ defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS -DBLAS_INCLUDE_DIRS="$env:RUNNER_TEMP/openblas/include" -DBLAS_LIBRARIES="$env:RUNNER_TEMP/openblas/lib/openblas.lib"'
663
664
  - build: 'kompute-x64'
664
- defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_KOMPUTE=ON -DKOMPUTE_OPT_DISABLE_VULKAN_VERSION_CHECK=ON -DBUILD_SHARED_LIBS=ON'
665
+ defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_KOMPUTE=ON -DKOMPUTE_OPT_DISABLE_VULKAN_VERSION_CHECK=ON'
665
666
  - build: 'vulkan-x64'
666
- defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_VULKAN=ON -DBUILD_SHARED_LIBS=ON'
667
+ defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_VULKAN=ON'
667
668
  - build: 'llvm-arm64'
668
- defines: '-G "Ninja Multi-Config" -D CMAKE_TOOLCHAIN_FILE=cmake/arm64-windows-llvm.cmake -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DBUILD_SHARED_LIBS=ON'
669
+ defines: '-G "Ninja Multi-Config" -D CMAKE_TOOLCHAIN_FILE=cmake/arm64-windows-llvm.cmake -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON'
669
670
  - build: 'msvc-arm64'
670
- defines: '-G "Ninja Multi-Config" -D CMAKE_TOOLCHAIN_FILE=cmake/arm64-windows-msvc.cmake -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DBUILD_SHARED_LIBS=ON'
671
+ defines: '-G "Ninja Multi-Config" -D CMAKE_TOOLCHAIN_FILE=cmake/arm64-windows-msvc.cmake -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON'
671
672
  - build: 'llvm-arm64-opencl-adreno'
672
673
  defines: '-G "Ninja Multi-Config" -D CMAKE_TOOLCHAIN_FILE=cmake/arm64-windows-llvm.cmake -DCMAKE_PREFIX_PATH="$env:RUNNER_TEMP/opencl-arm64-release" -DGGML_OPENCL=ON -DGGML_OPENCL_USE_ADRENO_KERNELS=ON'
673
674
 
@@ -798,6 +799,7 @@ jobs:
798
799
  if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
799
800
  run: |
800
801
  Copy-Item LICENSE .\build\bin\Release\llama.cpp.txt
802
+ Copy-Item .\examples\run\linenoise.cpp\LICENSE .\build\bin\Release\linenoise.cpp.txt
801
803
  7z a llama-${{ steps.tag.outputs.name }}-bin-win-${{ matrix.build }}.zip .\build\bin\Release\*
802
804
 
803
805
  - name: Upload artifacts
@@ -914,7 +916,7 @@ jobs:
914
916
  shell: cmd
915
917
  run: |
916
918
  call "C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\VC\Auxiliary\Build\vcvars64.bat"
917
- cmake -S . -B build -G "Ninja Multi-Config" -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_CUDA=ON -DBUILD_SHARED_LIBS=ON -DGGML_RPC=ON
919
+ cmake -S . -B build -G "Ninja Multi-Config" -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_CUDA=ON -DGGML_RPC=ON
918
920
  set /A NINJA_JOBS=%NUMBER_OF_PROCESSORS%-1
919
921
  cmake --build build --config Release -j %NINJA_JOBS% -t ggml
920
922
  cmake --build build --config Release
@@ -1239,7 +1241,7 @@ jobs:
1239
1241
 
1240
1242
  - name: Create release
1241
1243
  id: create_release
1242
- uses: anzz1/action-create-release@v1
1244
+ uses: ggml-org/action-create-release@v1
1243
1245
  env:
1244
1246
  GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
1245
1247
  with: