@fugood/llama.node 0.0.1-alpha.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (204) hide show
  1. package/CMakeLists.txt +85 -0
  2. package/README.md +56 -0
  3. package/bin/darwin/arm64/llama-node.node +0 -0
  4. package/bin/darwin/x64/llama-node.node +0 -0
  5. package/bin/linux/arm64/llama-node.node +0 -0
  6. package/bin/linux/x64/llama-node.node +0 -0
  7. package/bin/win32/arm64/llama-node.node +0 -0
  8. package/bin/win32/arm64/node.lib +0 -0
  9. package/bin/win32/x64/llama-node.node +0 -0
  10. package/bin/win32/x64/node.lib +0 -0
  11. package/lib/binding.js +13 -0
  12. package/lib/binding.ts +57 -0
  13. package/lib/index.js +24 -0
  14. package/lib/index.ts +13 -0
  15. package/package.json +65 -0
  16. package/src/addons.cpp +506 -0
  17. package/src/llama.cpp/CMakeLists.txt +1320 -0
  18. package/src/llama.cpp/build.zig +172 -0
  19. package/src/llama.cpp/cmake/FindSIMD.cmake +100 -0
  20. package/src/llama.cpp/common/CMakeLists.txt +87 -0
  21. package/src/llama.cpp/common/base64.hpp +392 -0
  22. package/src/llama.cpp/common/common.cpp +2949 -0
  23. package/src/llama.cpp/common/common.h +324 -0
  24. package/src/llama.cpp/common/console.cpp +501 -0
  25. package/src/llama.cpp/common/console.h +19 -0
  26. package/src/llama.cpp/common/grammar-parser.cpp +440 -0
  27. package/src/llama.cpp/common/grammar-parser.h +29 -0
  28. package/src/llama.cpp/common/json-schema-to-grammar.cpp +764 -0
  29. package/src/llama.cpp/common/json-schema-to-grammar.h +4 -0
  30. package/src/llama.cpp/common/json.hpp +24766 -0
  31. package/src/llama.cpp/common/log.h +724 -0
  32. package/src/llama.cpp/common/ngram-cache.cpp +282 -0
  33. package/src/llama.cpp/common/ngram-cache.h +94 -0
  34. package/src/llama.cpp/common/sampling.cpp +353 -0
  35. package/src/llama.cpp/common/sampling.h +147 -0
  36. package/src/llama.cpp/common/stb_image.h +8396 -0
  37. package/src/llama.cpp/common/train.cpp +1513 -0
  38. package/src/llama.cpp/common/train.h +233 -0
  39. package/src/llama.cpp/examples/CMakeLists.txt +52 -0
  40. package/src/llama.cpp/examples/baby-llama/CMakeLists.txt +5 -0
  41. package/src/llama.cpp/examples/baby-llama/baby-llama.cpp +1640 -0
  42. package/src/llama.cpp/examples/batched/CMakeLists.txt +5 -0
  43. package/src/llama.cpp/examples/batched/batched.cpp +262 -0
  44. package/src/llama.cpp/examples/batched-bench/CMakeLists.txt +5 -0
  45. package/src/llama.cpp/examples/batched-bench/batched-bench.cpp +261 -0
  46. package/src/llama.cpp/examples/beam-search/CMakeLists.txt +5 -0
  47. package/src/llama.cpp/examples/beam-search/beam-search.cpp +188 -0
  48. package/src/llama.cpp/examples/benchmark/CMakeLists.txt +6 -0
  49. package/src/llama.cpp/examples/benchmark/benchmark-matmult.cpp +275 -0
  50. package/src/llama.cpp/examples/convert-llama2c-to-ggml/CMakeLists.txt +5 -0
  51. package/src/llama.cpp/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp +936 -0
  52. package/src/llama.cpp/examples/embedding/CMakeLists.txt +5 -0
  53. package/src/llama.cpp/examples/embedding/embedding.cpp +211 -0
  54. package/src/llama.cpp/examples/eval-callback/CMakeLists.txt +9 -0
  55. package/src/llama.cpp/examples/eval-callback/eval-callback.cpp +195 -0
  56. package/src/llama.cpp/examples/export-lora/CMakeLists.txt +5 -0
  57. package/src/llama.cpp/examples/export-lora/export-lora.cpp +462 -0
  58. package/src/llama.cpp/examples/finetune/CMakeLists.txt +5 -0
  59. package/src/llama.cpp/examples/finetune/finetune.cpp +1861 -0
  60. package/src/llama.cpp/examples/gbnf-validator/CMakeLists.txt +5 -0
  61. package/src/llama.cpp/examples/gbnf-validator/gbnf-validator.cpp +132 -0
  62. package/src/llama.cpp/examples/gguf/CMakeLists.txt +5 -0
  63. package/src/llama.cpp/examples/gguf/gguf.cpp +256 -0
  64. package/src/llama.cpp/examples/gguf-split/CMakeLists.txt +5 -0
  65. package/src/llama.cpp/examples/gguf-split/gguf-split.cpp +553 -0
  66. package/src/llama.cpp/examples/gritlm/CMakeLists.txt +5 -0
  67. package/src/llama.cpp/examples/gritlm/gritlm.cpp +215 -0
  68. package/src/llama.cpp/examples/imatrix/CMakeLists.txt +5 -0
  69. package/src/llama.cpp/examples/imatrix/imatrix.cpp +655 -0
  70. package/src/llama.cpp/examples/infill/CMakeLists.txt +5 -0
  71. package/src/llama.cpp/examples/infill/infill.cpp +767 -0
  72. package/src/llama.cpp/examples/jeopardy/questions.txt +100 -0
  73. package/src/llama.cpp/examples/llama-bench/CMakeLists.txt +5 -0
  74. package/src/llama.cpp/examples/llama-bench/llama-bench.cpp +1286 -0
  75. package/src/llama.cpp/examples/llama.android/app/src/main/cpp/CMakeLists.txt +50 -0
  76. package/src/llama.cpp/examples/llama.android/app/src/main/cpp/llama-android.cpp +443 -0
  77. package/src/llama.cpp/examples/llava/CMakeLists.txt +37 -0
  78. package/src/llama.cpp/examples/llava/clip.cpp +2027 -0
  79. package/src/llama.cpp/examples/llava/clip.h +85 -0
  80. package/src/llama.cpp/examples/llava/llava-cli.cpp +309 -0
  81. package/src/llama.cpp/examples/llava/llava.cpp +426 -0
  82. package/src/llama.cpp/examples/llava/llava.h +50 -0
  83. package/src/llama.cpp/examples/llava/requirements.txt +3 -0
  84. package/src/llama.cpp/examples/lookahead/CMakeLists.txt +5 -0
  85. package/src/llama.cpp/examples/lookahead/lookahead.cpp +485 -0
  86. package/src/llama.cpp/examples/lookup/CMakeLists.txt +23 -0
  87. package/src/llama.cpp/examples/lookup/lookup-create.cpp +41 -0
  88. package/src/llama.cpp/examples/lookup/lookup-merge.cpp +47 -0
  89. package/src/llama.cpp/examples/lookup/lookup-stats.cpp +160 -0
  90. package/src/llama.cpp/examples/lookup/lookup.cpp +258 -0
  91. package/src/llama.cpp/examples/main/CMakeLists.txt +5 -0
  92. package/src/llama.cpp/examples/main/main.cpp +957 -0
  93. package/src/llama.cpp/examples/main-cmake-pkg/CMakeLists.txt +33 -0
  94. package/src/llama.cpp/examples/parallel/CMakeLists.txt +5 -0
  95. package/src/llama.cpp/examples/parallel/parallel.cpp +427 -0
  96. package/src/llama.cpp/examples/passkey/CMakeLists.txt +5 -0
  97. package/src/llama.cpp/examples/passkey/passkey.cpp +302 -0
  98. package/src/llama.cpp/examples/perplexity/CMakeLists.txt +5 -0
  99. package/src/llama.cpp/examples/perplexity/perplexity.cpp +1943 -0
  100. package/src/llama.cpp/examples/quantize/CMakeLists.txt +6 -0
  101. package/src/llama.cpp/examples/quantize/quantize.cpp +423 -0
  102. package/src/llama.cpp/examples/quantize-stats/CMakeLists.txt +6 -0
  103. package/src/llama.cpp/examples/quantize-stats/quantize-stats.cpp +424 -0
  104. package/src/llama.cpp/examples/retrieval/CMakeLists.txt +5 -0
  105. package/src/llama.cpp/examples/retrieval/retrieval.cpp +350 -0
  106. package/src/llama.cpp/examples/save-load-state/CMakeLists.txt +5 -0
  107. package/src/llama.cpp/examples/save-load-state/save-load-state.cpp +246 -0
  108. package/src/llama.cpp/examples/server/CMakeLists.txt +40 -0
  109. package/src/llama.cpp/examples/server/bench/requirements.txt +2 -0
  110. package/src/llama.cpp/examples/server/httplib.h +9465 -0
  111. package/src/llama.cpp/examples/server/server.cpp +3826 -0
  112. package/src/llama.cpp/examples/server/tests/requirements.txt +6 -0
  113. package/src/llama.cpp/examples/server/utils.hpp +653 -0
  114. package/src/llama.cpp/examples/simple/CMakeLists.txt +5 -0
  115. package/src/llama.cpp/examples/simple/simple.cpp +183 -0
  116. package/src/llama.cpp/examples/speculative/CMakeLists.txt +5 -0
  117. package/src/llama.cpp/examples/speculative/speculative.cpp +614 -0
  118. package/src/llama.cpp/examples/sycl/CMakeLists.txt +9 -0
  119. package/src/llama.cpp/examples/sycl/ls-sycl-device.cpp +13 -0
  120. package/src/llama.cpp/examples/tokenize/CMakeLists.txt +5 -0
  121. package/src/llama.cpp/examples/tokenize/tokenize.cpp +42 -0
  122. package/src/llama.cpp/examples/train-text-from-scratch/CMakeLists.txt +5 -0
  123. package/src/llama.cpp/examples/train-text-from-scratch/train-text-from-scratch.cpp +1252 -0
  124. package/src/llama.cpp/ggml-alloc.c +985 -0
  125. package/src/llama.cpp/ggml-alloc.h +76 -0
  126. package/src/llama.cpp/ggml-backend-impl.h +141 -0
  127. package/src/llama.cpp/ggml-backend.c +2099 -0
  128. package/src/llama.cpp/ggml-backend.h +233 -0
  129. package/src/llama.cpp/ggml-common.h +1853 -0
  130. package/src/llama.cpp/ggml-cuda.h +43 -0
  131. package/src/llama.cpp/ggml-impl.h +265 -0
  132. package/src/llama.cpp/ggml-kompute.cpp +2006 -0
  133. package/src/llama.cpp/ggml-kompute.h +46 -0
  134. package/src/llama.cpp/ggml-metal.h +66 -0
  135. package/src/llama.cpp/ggml-mpi.c +216 -0
  136. package/src/llama.cpp/ggml-mpi.h +39 -0
  137. package/src/llama.cpp/ggml-opencl.cpp +2301 -0
  138. package/src/llama.cpp/ggml-opencl.h +36 -0
  139. package/src/llama.cpp/ggml-quants.c +12678 -0
  140. package/src/llama.cpp/ggml-quants.h +133 -0
  141. package/src/llama.cpp/ggml-sycl.cpp +17882 -0
  142. package/src/llama.cpp/ggml-sycl.h +49 -0
  143. package/src/llama.cpp/ggml-vulkan-shaders.hpp +69849 -0
  144. package/src/llama.cpp/ggml-vulkan.cpp +6442 -0
  145. package/src/llama.cpp/ggml-vulkan.h +29 -0
  146. package/src/llama.cpp/ggml.c +21819 -0
  147. package/src/llama.cpp/ggml.h +2403 -0
  148. package/src/llama.cpp/llama.cpp +17468 -0
  149. package/src/llama.cpp/llama.h +1117 -0
  150. package/src/llama.cpp/pocs/CMakeLists.txt +12 -0
  151. package/src/llama.cpp/pocs/vdot/CMakeLists.txt +9 -0
  152. package/src/llama.cpp/pocs/vdot/q8dot.cpp +172 -0
  153. package/src/llama.cpp/pocs/vdot/vdot.cpp +310 -0
  154. package/src/llama.cpp/prompts/LLM-questions.txt +49 -0
  155. package/src/llama.cpp/prompts/alpaca.txt +1 -0
  156. package/src/llama.cpp/prompts/assistant.txt +31 -0
  157. package/src/llama.cpp/prompts/chat-with-baichuan.txt +4 -0
  158. package/src/llama.cpp/prompts/chat-with-bob.txt +7 -0
  159. package/src/llama.cpp/prompts/chat-with-qwen.txt +1 -0
  160. package/src/llama.cpp/prompts/chat-with-vicuna-v0.txt +7 -0
  161. package/src/llama.cpp/prompts/chat-with-vicuna-v1.txt +7 -0
  162. package/src/llama.cpp/prompts/chat.txt +28 -0
  163. package/src/llama.cpp/prompts/dan-modified.txt +1 -0
  164. package/src/llama.cpp/prompts/dan.txt +1 -0
  165. package/src/llama.cpp/prompts/mnemonics.txt +93 -0
  166. package/src/llama.cpp/prompts/parallel-questions.txt +43 -0
  167. package/src/llama.cpp/prompts/reason-act.txt +18 -0
  168. package/src/llama.cpp/requirements/requirements-convert-hf-to-gguf.txt +3 -0
  169. package/src/llama.cpp/requirements/requirements-convert-llama-ggml-to-gguf.txt +1 -0
  170. package/src/llama.cpp/requirements/requirements-convert-lora-to-ggml.txt +2 -0
  171. package/src/llama.cpp/requirements/requirements-convert-persimmon-to-gguf.txt +2 -0
  172. package/src/llama.cpp/requirements/requirements-convert.txt +5 -0
  173. package/src/llama.cpp/requirements.txt +12 -0
  174. package/src/llama.cpp/scripts/gen-build-info-cpp.cmake +24 -0
  175. package/src/llama.cpp/scripts/xxd.cmake +16 -0
  176. package/src/llama.cpp/sgemm.cpp +999 -0
  177. package/src/llama.cpp/sgemm.h +12 -0
  178. package/src/llama.cpp/tests/CMakeLists.txt +78 -0
  179. package/src/llama.cpp/tests/get-model.cpp +21 -0
  180. package/src/llama.cpp/tests/get-model.h +2 -0
  181. package/src/llama.cpp/tests/test-autorelease.cpp +24 -0
  182. package/src/llama.cpp/tests/test-backend-ops.cpp +2266 -0
  183. package/src/llama.cpp/tests/test-c.c +7 -0
  184. package/src/llama.cpp/tests/test-chat-template.cpp +107 -0
  185. package/src/llama.cpp/tests/test-double-float.cpp +57 -0
  186. package/src/llama.cpp/tests/test-grad0.cpp +1606 -0
  187. package/src/llama.cpp/tests/test-grammar-integration.cpp +243 -0
  188. package/src/llama.cpp/tests/test-grammar-parser.cpp +250 -0
  189. package/src/llama.cpp/tests/test-json-schema-to-grammar.cpp +899 -0
  190. package/src/llama.cpp/tests/test-llama-grammar.cpp +402 -0
  191. package/src/llama.cpp/tests/test-model-load-cancel.cpp +27 -0
  192. package/src/llama.cpp/tests/test-opt.cpp +181 -0
  193. package/src/llama.cpp/tests/test-quantize-fns.cpp +185 -0
  194. package/src/llama.cpp/tests/test-quantize-perf.cpp +363 -0
  195. package/src/llama.cpp/tests/test-rope.cpp +221 -0
  196. package/src/llama.cpp/tests/test-sampling.cpp +301 -0
  197. package/src/llama.cpp/tests/test-tokenizer-0-falcon.cpp +187 -0
  198. package/src/llama.cpp/tests/test-tokenizer-0-llama.cpp +190 -0
  199. package/src/llama.cpp/tests/test-tokenizer-1-bpe.cpp +123 -0
  200. package/src/llama.cpp/tests/test-tokenizer-1-llama.cpp +111 -0
  201. package/src/llama.cpp/unicode-data.cpp +1651 -0
  202. package/src/llama.cpp/unicode-data.h +16 -0
  203. package/src/llama.cpp/unicode.cpp +277 -0
  204. package/src/llama.cpp/unicode.h +28 -0
package/src/addons.cpp ADDED
@@ -0,0 +1,506 @@
1
+ #include "common/common.h"
2
+ #include "llama.h"
3
+ #include <memory>
4
+ #include <mutex>
5
+ #include <napi.h>
6
+ #include <string>
7
+ #include <thread>
8
+ #include <tuple>
9
+ #include <vector>
10
+
11
+ typedef std::unique_ptr<llama_model, decltype(&llama_free_model)> LlamaCppModel;
12
+ typedef std::unique_ptr<llama_context, decltype(&llama_free)> LlamaCppContext;
13
+ typedef std::unique_ptr<llama_sampling_context, decltype(&llama_sampling_free)>
14
+ LlamaCppSampling;
15
+ typedef std::unique_ptr<llama_batch, decltype(&llama_batch_free)> LlamaCppBatch;
16
+
17
+ size_t common_part(const std::vector<llama_token> &a,
18
+ const std::vector<llama_token> &b) {
19
+ size_t i = 0;
20
+ while (i < a.size() && i < b.size() && a[i] == b[i]) {
21
+ i++;
22
+ }
23
+ return i;
24
+ }
25
+
26
+ class LlamaCompletionWorker;
27
+
28
+ class LlamaContext : public Napi::ObjectWrap<LlamaContext> {
29
+ public:
30
+ // construct({ model, embedding, n_ctx, n_batch, n_threads, n_gpu_layers,
31
+ // use_mlock, use_mmap }): LlamaContext throws error
32
+ LlamaContext(const Napi::CallbackInfo &info)
33
+ : Napi::ObjectWrap<LlamaContext>(info) {
34
+ Napi::Env env = info.Env();
35
+ if (info.Length() < 1 || !info[0].IsObject()) {
36
+ Napi::TypeError::New(env, "Object expected").ThrowAsJavaScriptException();
37
+ }
38
+ auto options = info[0].As<Napi::Object>();
39
+
40
+ if (options.Has("model")) {
41
+ params.model = options.Get("model").ToString();
42
+ }
43
+ if (options.Has("embedding")) {
44
+ params.embedding = options.Get("embedding").ToBoolean();
45
+ }
46
+ if (options.Has("n_ctx")) {
47
+ params.n_ctx = options.Get("n_ctx").ToNumber();
48
+ }
49
+ if (options.Has("n_batch")) {
50
+ params.n_batch = options.Get("n_batch").ToNumber();
51
+ }
52
+ if (options.Has("n_threads")) {
53
+ params.n_threads = options.Get("n_threads").ToNumber();
54
+ }
55
+ if (options.Has("n_gpu_layers")) {
56
+ params.n_gpu_layers = options.Get("n_gpu_layers").ToNumber();
57
+ }
58
+ if (options.Has("use_mlock")) {
59
+ params.use_mlock = options.Get("use_mlock").ToBoolean();
60
+ }
61
+ if (options.Has("use_mmap")) {
62
+ params.use_mmap = options.Get("use_mmap").ToBoolean();
63
+ }
64
+ if (options.Has("numa")) {
65
+ int numa = options.Get("numa").ToNumber();
66
+ params.numa = static_cast<ggml_numa_strategy>(numa);
67
+ }
68
+ if (options.Has("seed")) {
69
+ params.seed = options.Get("seed").ToNumber();
70
+ }
71
+
72
+ llama_backend_init();
73
+ llama_numa_init(params.numa);
74
+
75
+ auto tuple = llama_init_from_gpt_params(params);
76
+ model.reset(std::get<0>(tuple));
77
+ ctx.reset(std::get<1>(tuple));
78
+
79
+ if (model == nullptr || ctx == nullptr) {
80
+ Napi::TypeError::New(env, "Failed to load model")
81
+ .ThrowAsJavaScriptException();
82
+ }
83
+ }
84
+
85
+ static void Export(Napi::Env env, Napi::Object &exports) {
86
+ Napi::Function func = DefineClass(
87
+ env, "LlamaContext",
88
+ {InstanceMethod<&LlamaContext::GetSystemInfo>(
89
+ "getSystemInfo",
90
+ static_cast<napi_property_attributes>(napi_enumerable)),
91
+ InstanceMethod<&LlamaContext::Completion>(
92
+ "completion",
93
+ static_cast<napi_property_attributes>(napi_enumerable)),
94
+ InstanceMethod<&LlamaContext::StopCompletion>(
95
+ "stopCompletion",
96
+ static_cast<napi_property_attributes>(napi_enumerable)),
97
+ InstanceMethod<&LlamaContext::SaveSession>(
98
+ "saveSession",
99
+ static_cast<napi_property_attributes>(napi_enumerable)),
100
+ InstanceMethod<&LlamaContext::LoadSession>(
101
+ "loadSession",
102
+ static_cast<napi_property_attributes>(napi_enumerable))});
103
+ Napi::FunctionReference *constructor = new Napi::FunctionReference();
104
+ *constructor = Napi::Persistent(func);
105
+ #if NAPI_VERSION > 5
106
+ env.SetInstanceData(constructor);
107
+ #endif
108
+ exports.Set("LlamaContext", func);
109
+ }
110
+
111
+ llama_context *getContext() { return ctx.get(); }
112
+ llama_model *getModel() { return model.get(); }
113
+
114
+ std::vector<llama_token> *getTokens() { return tokens.get(); }
115
+
116
+ const gpt_params &getParams() const { return params; }
117
+
118
+ void ensureTokens() {
119
+ if (tokens == nullptr) {
120
+ tokens = std::make_unique<std::vector<llama_token>>();
121
+ }
122
+ }
123
+
124
+ void setTokens(std::vector<llama_token> tokens) {
125
+ this->tokens.reset(new std::vector<llama_token>(std::move(tokens)));
126
+ }
127
+
128
+ std::mutex &getMutex() { return mutex; }
129
+
130
+ private:
131
+ Napi::Value GetSystemInfo(const Napi::CallbackInfo &info);
132
+ Napi::Value Completion(const Napi::CallbackInfo &info);
133
+ void StopCompletion(const Napi::CallbackInfo &info);
134
+ Napi::Value SaveSession(const Napi::CallbackInfo &info);
135
+ Napi::Value LoadSession(const Napi::CallbackInfo &info);
136
+
137
+ gpt_params params;
138
+ LlamaCppModel model{nullptr, llama_free_model};
139
+ LlamaCppContext ctx{nullptr, llama_free};
140
+ std::unique_ptr<std::vector<llama_token>> tokens;
141
+ std::mutex mutex;
142
+ LlamaCompletionWorker *compl_worker = nullptr;
143
+ };
144
+
145
+ class LlamaCompletionWorker : public Napi::AsyncWorker,
146
+ public Napi::Promise::Deferred {
147
+ LlamaContext *_ctx;
148
+ gpt_params _params;
149
+ std::vector<std::string> _stop_words;
150
+ std::string generated_text = "";
151
+ Napi::ThreadSafeFunction _tsfn;
152
+ bool _has_callback = false;
153
+ bool _stop = false;
154
+ size_t tokens_predicted = 0;
155
+ size_t tokens_evaluated = 0;
156
+ bool truncated = false;
157
+
158
+ public:
159
+ LlamaCompletionWorker(const Napi::CallbackInfo &info, LlamaContext *ctx,
160
+ Napi::Function callback, gpt_params params,
161
+ std::vector<std::string> stop_words = {})
162
+ : AsyncWorker(info.Env()), Deferred(info.Env()), _ctx(ctx),
163
+ _params(params), _stop_words(stop_words) {
164
+ _ctx->Ref();
165
+ if (!callback.IsEmpty()) {
166
+ _tsfn = Napi::ThreadSafeFunction::New(info.Env(), callback,
167
+ "LlamaCompletionCallback", 0, 1);
168
+ _has_callback = true;
169
+ }
170
+ }
171
+
172
+ ~LlamaCompletionWorker() {
173
+ _ctx->Unref();
174
+ if (_has_callback) {
175
+ _tsfn.Abort();
176
+ _tsfn.Release();
177
+ }
178
+ }
179
+
180
+ void Stop() { _stop = true; }
181
+
182
+ protected:
183
+ size_t findStoppingStrings(const std::string &text,
184
+ const size_t last_token_size) {
185
+ size_t stop_pos = std::string::npos;
186
+
187
+ for (const std::string &word : _stop_words) {
188
+ size_t pos;
189
+
190
+ const size_t tmp = word.size() + last_token_size;
191
+ const size_t from_pos = text.size() > tmp ? text.size() - tmp : 0;
192
+
193
+ pos = text.find(word, from_pos);
194
+
195
+ if (pos != std::string::npos &&
196
+ (stop_pos == std::string::npos || pos < stop_pos)) {
197
+ stop_pos = pos;
198
+ }
199
+ }
200
+
201
+ return stop_pos;
202
+ }
203
+
204
+ void Execute() {
205
+ _ctx->getMutex().lock();
206
+ _ctx->ensureTokens();
207
+ const auto t_main_start = ggml_time_us();
208
+ const size_t n_ctx = _params.n_ctx;
209
+ auto n_keep = _params.n_keep;
210
+ auto n_predict = _params.n_predict;
211
+ size_t n_cur = 0;
212
+ size_t n_input = 0;
213
+ const bool add_bos = llama_should_add_bos_token(_ctx->getModel());
214
+ auto *ctx = _ctx->getContext();
215
+
216
+ llama_set_rng_seed(ctx, _params.seed);
217
+
218
+ LlamaCppSampling sampling{llama_sampling_init(_params.sparams),
219
+ llama_sampling_free};
220
+
221
+ std::vector<llama_token> prompt_tokens =
222
+ ::llama_tokenize(ctx, _params.prompt, add_bos);
223
+ n_input = prompt_tokens.size();
224
+ if (_ctx->getTokens() != nullptr) {
225
+ n_cur = common_part(*_ctx->getTokens(), prompt_tokens);
226
+ if (n_cur == n_input) {
227
+ --n_cur;
228
+ }
229
+ n_input -= n_cur;
230
+ llama_kv_cache_seq_rm(ctx, 0, n_cur, -1);
231
+ }
232
+ _ctx->setTokens(std::move(prompt_tokens));
233
+
234
+ const int max_len = _params.n_predict < 0 ? 0 : _params.n_predict;
235
+
236
+ for (int i = 0; i < max_len || _stop; i++) {
237
+ auto *embd = _ctx->getTokens();
238
+ // check if we need to remove some tokens
239
+ if (embd->size() >= n_ctx) {
240
+ const int n_left = n_cur - n_keep - 1;
241
+ const int n_discard = n_left / 2;
242
+
243
+ llama_kv_cache_seq_rm(ctx, 0, n_keep + 1, n_keep + n_discard + 1);
244
+ llama_kv_cache_seq_add(ctx, 0, n_keep + 1 + n_discard, n_cur,
245
+ -n_discard);
246
+
247
+ for (size_t i = n_keep + 1 + n_discard; i < embd->size(); i++) {
248
+ (*embd)[i - n_discard] = (*embd)[i];
249
+ }
250
+ embd->resize(embd->size() - n_discard);
251
+
252
+ n_cur -= n_discard;
253
+ truncated = true;
254
+ }
255
+ int ret = llama_decode(
256
+ ctx, llama_batch_get_one(embd->data() + n_cur, n_input, n_cur, 0));
257
+ if (ret < 0) {
258
+ SetError("Failed to decode token, code: " + std::to_string(ret));
259
+ break;
260
+ }
261
+ // sample the next token
262
+ const llama_token new_token_id =
263
+ llama_sampling_sample(sampling.get(), ctx, nullptr);
264
+ // prepare the next batch
265
+ embd->push_back(new_token_id);
266
+ auto token = llama_token_to_piece(ctx, new_token_id);
267
+ generated_text += token;
268
+ n_cur += n_input;
269
+ tokens_evaluated += n_input;
270
+ tokens_predicted += 1;
271
+ n_input = 1;
272
+ if (_has_callback) {
273
+ // _cb.Call({ Napi::String::New(AsyncWorker::Env(), token) });
274
+ const char *c_token = strdup(token.c_str());
275
+ _tsfn.BlockingCall(c_token, [](Napi::Env env, Napi::Function jsCallback,
276
+ const char *value) {
277
+ auto obj = Napi::Object::New(env);
278
+ obj.Set("token", Napi::String::New(env, value));
279
+ jsCallback.Call({obj});
280
+ });
281
+ }
282
+ // is it an end of generation?
283
+ if (llama_token_is_eog(_ctx->getModel(), new_token_id)) {
284
+ break;
285
+ }
286
+ // check for stop words
287
+ if (!_stop_words.empty()) {
288
+ const size_t stop_pos =
289
+ findStoppingStrings(generated_text, token.size());
290
+ if (stop_pos != std::string::npos) {
291
+ break;
292
+ }
293
+ }
294
+ }
295
+ const auto t_main_end = ggml_time_us();
296
+ _ctx->getMutex().unlock();
297
+ }
298
+
299
+ void OnOK() {
300
+ auto result = Napi::Object::New(Napi::AsyncWorker::Env());
301
+ result.Set("tokens_evaluated",
302
+ Napi::Number::New(Napi::AsyncWorker::Env(), tokens_evaluated));
303
+ result.Set("tokens_predicted",
304
+ Napi::Number::New(Napi::AsyncWorker::Env(), tokens_predicted));
305
+ result.Set("truncated",
306
+ Napi::Boolean::New(Napi::AsyncWorker::Env(), truncated));
307
+ result.Set("text",
308
+ Napi::String::New(Napi::AsyncWorker::Env(), generated_text));
309
+ Napi::Promise::Deferred::Resolve(result);
310
+ }
311
+
312
+ void OnError(const Napi::Error &err) {
313
+ Napi::Promise::Deferred::Reject(err.Value());
314
+ }
315
+ };
316
+
317
+ class SaveSessionWorker : public Napi::AsyncWorker,
318
+ public Napi::Promise::Deferred {
319
+ std::string _path;
320
+ LlamaContext *_ctx;
321
+
322
+ public:
323
+ SaveSessionWorker(const Napi::CallbackInfo &info, LlamaContext *ctx)
324
+ : AsyncWorker(info.Env()), Deferred(info.Env()),
325
+ _path(info[0].ToString()), _ctx(ctx) {
326
+ _ctx->Ref();
327
+ }
328
+
329
+ protected:
330
+ void Execute() {
331
+ _ctx->getMutex().lock();
332
+ if (_ctx->getTokens() == nullptr) {
333
+ SetError("Failed to save session");
334
+ return;
335
+ }
336
+ if (!llama_state_save_file(_ctx->getContext(), _path.c_str(),
337
+ _ctx->getTokens()->data(),
338
+ _ctx->getTokens()->size())) {
339
+ SetError("Failed to save session");
340
+ }
341
+ _ctx->getMutex().unlock();
342
+ }
343
+
344
+ void OnOK() { Resolve(AsyncWorker::Env().Undefined()); }
345
+
346
+ void OnError(const Napi::Error &err) { Reject(err.Value()); }
347
+ };
348
+
349
+ class LoadSessionWorker : public Napi::AsyncWorker,
350
+ public Napi::Promise::Deferred {
351
+ std::string _path;
352
+ LlamaContext *_ctx;
353
+ size_t count = 0;
354
+
355
+ public:
356
+ LoadSessionWorker(const Napi::CallbackInfo &info, LlamaContext *ctx)
357
+ : AsyncWorker(info.Env()), Deferred(info.Env()),
358
+ _path(info[0].ToString()), _ctx(ctx) {
359
+ _ctx->Ref();
360
+ }
361
+
362
+ protected:
363
+ void Execute() {
364
+ _ctx->getMutex().lock();
365
+ _ctx->ensureTokens();
366
+ // reserve the maximum number of tokens for capacity
367
+ _ctx->getTokens()->reserve(_ctx->getParams().n_ctx);
368
+ if (!llama_state_load_file(_ctx->getContext(), _path.c_str(),
369
+ _ctx->getTokens()->data(),
370
+ _ctx->getTokens()->capacity(), &count)) {
371
+ SetError("Failed to load session");
372
+ }
373
+ _ctx->getMutex().unlock();
374
+ }
375
+
376
+ void OnOK() { Resolve(AsyncWorker::Env().Undefined()); }
377
+
378
+ void OnError(const Napi::Error &err) { Reject(err.Value()); }
379
+ };
380
+
381
+ // getSystemInfo(): string
382
+ Napi::Value LlamaContext::GetSystemInfo(const Napi::CallbackInfo &info) {
383
+ return Napi::String::New(info.Env(), get_system_info(params).c_str());
384
+ }
385
+
386
+ // completion(options: LlamaCompletionOptions, onToken?: (token: string) =>
387
+ // void): Promise<LlamaCompletionResult>
388
+ Napi::Value LlamaContext::Completion(const Napi::CallbackInfo &info) {
389
+ Napi::Env env = info.Env();
390
+ if (info.Length() < 1 || !info[0].IsObject()) {
391
+ Napi::TypeError::New(env, "Object expected").ThrowAsJavaScriptException();
392
+ }
393
+ if (info.Length() >= 2 && !info[1].IsFunction()) {
394
+ Napi::TypeError::New(env, "Function expected").ThrowAsJavaScriptException();
395
+ }
396
+ auto options = info[0].As<Napi::Object>();
397
+
398
+ gpt_params params;
399
+ if (options.Has("prompt")) {
400
+ params.prompt = options.Get("prompt").ToString();
401
+ } else {
402
+ Napi::TypeError::New(env, "Prompt is required")
403
+ .ThrowAsJavaScriptException();
404
+ }
405
+ params.n_predict =
406
+ options.Has("n_predict") ? options.Get("n_predict").ToNumber() : -1;
407
+ params.sparams.temp = options.Has("temperature")
408
+ ? options.Get("temperature").ToNumber()
409
+ : 0.80f;
410
+ params.sparams.top_k =
411
+ options.Has("top_k") ? options.Get("top_k").ToNumber() : 40;
412
+ params.sparams.top_p =
413
+ options.Has("top_p") ? options.Get("top_p").ToNumber() : 0.95f;
414
+ params.sparams.min_p =
415
+ options.Has("min_p") ? options.Get("min_p").ToNumber() : 0.05f;
416
+ params.sparams.tfs_z =
417
+ options.Has("tfs_z") ? options.Get("tfs_z").ToNumber() : 1.00f;
418
+ params.sparams.mirostat =
419
+ options.Has("mirostat") ? options.Get("mirostat").ToNumber() : 0;
420
+ params.sparams.mirostat_tau = options.Has("mirostat_tau")
421
+ ? options.Get("mirostat_tau").ToNumber()
422
+ : 5.00f;
423
+ params.sparams.mirostat_eta = options.Has("mirostat_eta")
424
+ ? options.Get("mirostat_eta").ToNumber()
425
+ : 0.10f;
426
+ params.sparams.penalty_last_n = options.Has("penalty_last_n")
427
+ ? options.Get("penalty_last_n").ToNumber()
428
+ : 64;
429
+ params.sparams.penalty_repeat = options.Has("penalty_repeat")
430
+ ? options.Get("penalty_repeat").ToNumber()
431
+ : 1.00f;
432
+ params.sparams.penalty_freq = options.Has("penalty_freq")
433
+ ? options.Get("penalty_freq").ToNumber()
434
+ : 0.00f;
435
+ params.sparams.penalty_present =
436
+ options.Has("penalty_present") ? options.Get("penalty_present").ToNumber()
437
+ : 0.00f;
438
+ params.sparams.penalize_nl = options.Has("penalize_nl")
439
+ ? options.Get("penalize_nl").ToBoolean()
440
+ : false;
441
+ params.sparams.typical_p =
442
+ options.Has("typical_p") ? options.Get("typical_p").ToNumber() : 1.00f;
443
+ params.ignore_eos =
444
+ options.Has("ignore_eos") ? options.Get("ignore_eos").ToBoolean() : false;
445
+ params.sparams.grammar = options.Has("grammar")
446
+ ? options.Get("grammar").ToString().Utf8Value()
447
+ : "";
448
+ params.n_keep = options.Has("n_keep") ? options.Get("n_keep").ToNumber() : 0;
449
+ params.seed =
450
+ options.Has("seed") ? options.Get("seed").ToNumber() : LLAMA_DEFAULT_SEED;
451
+ std::vector<std::string> stop_words;
452
+ if (options.Has("stop")) {
453
+ auto stop_words_array = options.Get("stop").As<Napi::Array>();
454
+ for (size_t i = 0; i < stop_words_array.Length(); i++) {
455
+ stop_words.push_back(stop_words_array.Get(i).ToString());
456
+ }
457
+ }
458
+
459
+ // options.on_sample
460
+ Napi::Function callback;
461
+ if (info.Length() >= 2) {
462
+ callback = info[1].As<Napi::Function>();
463
+ }
464
+
465
+ auto worker =
466
+ new LlamaCompletionWorker(info, this, callback, params, stop_words);
467
+ worker->Queue();
468
+ compl_worker = worker;
469
+ return worker->Promise();
470
+ }
471
+
472
+ // stopCompletion(): void
473
+ void LlamaContext::StopCompletion(const Napi::CallbackInfo &info) {
474
+ if (compl_worker != nullptr) {
475
+ compl_worker->Stop();
476
+ }
477
+ }
478
+
479
+ // saveSession(path: string): Promise<void> throws error
480
+ Napi::Value LlamaContext::SaveSession(const Napi::CallbackInfo &info) {
481
+ Napi::Env env = info.Env();
482
+ if (info.Length() < 1 || !info[0].IsString()) {
483
+ Napi::TypeError::New(env, "String expected").ThrowAsJavaScriptException();
484
+ }
485
+ auto *worker = new SaveSessionWorker(info, this);
486
+ worker->Queue();
487
+ return worker->Promise();
488
+ }
489
+
490
+ // loadSession(path: string): Promise<{ count }> throws error
491
+ Napi::Value LlamaContext::LoadSession(const Napi::CallbackInfo &info) {
492
+ Napi::Env env = info.Env();
493
+ if (info.Length() < 1 || !info[0].IsString()) {
494
+ Napi::TypeError::New(env, "String expected").ThrowAsJavaScriptException();
495
+ }
496
+ auto *worker = new LoadSessionWorker(info, this);
497
+ worker->Queue();
498
+ return worker->Promise();
499
+ }
500
+
501
+ Napi::Object Init(Napi::Env env, Napi::Object exports) {
502
+ LlamaContext::Export(env, exports);
503
+ return exports;
504
+ }
505
+
506
+ NODE_API_MODULE(addons, Init)