node-llama-cpp 3.0.0-beta.37 → 3.0.0-beta.39

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (167) hide show
  1. package/bins/linux-arm64/_nlcBuildMetadata.json +1 -1
  2. package/bins/linux-arm64/libggml.so +0 -0
  3. package/bins/linux-arm64/libllama.so +0 -0
  4. package/bins/linux-arm64/llama-addon.node +0 -0
  5. package/bins/linux-armv7l/_nlcBuildMetadata.json +1 -1
  6. package/bins/linux-armv7l/libggml.so +0 -0
  7. package/bins/linux-armv7l/libllama.so +0 -0
  8. package/bins/linux-armv7l/llama-addon.node +0 -0
  9. package/bins/linux-x64/_nlcBuildMetadata.json +1 -1
  10. package/bins/linux-x64/libggml.so +0 -0
  11. package/bins/linux-x64/libllama.so +0 -0
  12. package/bins/linux-x64/llama-addon.node +0 -0
  13. package/bins/linux-x64-vulkan/_nlcBuildMetadata.json +1 -1
  14. package/bins/linux-x64-vulkan/libggml.so +0 -0
  15. package/bins/linux-x64-vulkan/libllama.so +0 -0
  16. package/bins/linux-x64-vulkan/llama-addon.node +0 -0
  17. package/bins/linux-x64-vulkan/vulkan-shaders-gen +0 -0
  18. package/bins/mac-arm64-metal/_nlcBuildMetadata.json +1 -1
  19. package/bins/mac-arm64-metal/ggml-common.h +24 -0
  20. package/bins/mac-arm64-metal/ggml-metal.metal +181 -552
  21. package/bins/mac-arm64-metal/libggml.dylib +0 -0
  22. package/bins/mac-arm64-metal/libllama.dylib +0 -0
  23. package/bins/mac-arm64-metal/llama-addon.node +0 -0
  24. package/bins/mac-x64/_nlcBuildMetadata.json +1 -1
  25. package/bins/mac-x64/libggml.dylib +0 -0
  26. package/bins/mac-x64/libllama.dylib +0 -0
  27. package/bins/mac-x64/llama-addon.node +0 -0
  28. package/bins/win-arm64/_nlcBuildMetadata.json +1 -1
  29. package/bins/win-arm64/ggml.dll +0 -0
  30. package/bins/win-arm64/llama-addon.exp +0 -0
  31. package/bins/win-arm64/llama-addon.lib +0 -0
  32. package/bins/win-arm64/llama-addon.node +0 -0
  33. package/bins/win-arm64/llama.dll +0 -0
  34. package/bins/win-x64/_nlcBuildMetadata.json +1 -1
  35. package/bins/win-x64/ggml.dll +0 -0
  36. package/bins/win-x64/llama-addon.node +0 -0
  37. package/bins/win-x64/llama.dll +0 -0
  38. package/bins/win-x64-vulkan/_nlcBuildMetadata.json +1 -1
  39. package/bins/win-x64-vulkan/ggml.dll +0 -0
  40. package/bins/win-x64-vulkan/llama-addon.node +0 -0
  41. package/bins/win-x64-vulkan/llama.dll +0 -0
  42. package/bins/win-x64-vulkan/vulkan-shaders-gen.exe +0 -0
  43. package/dist/ChatWrapper.d.ts +2 -1
  44. package/dist/ChatWrapper.js +19 -5
  45. package/dist/ChatWrapper.js.map +1 -1
  46. package/dist/bindings/AddonTypes.d.ts +13 -2
  47. package/dist/bindings/getLlama.d.ts +3 -2
  48. package/dist/bindings/getLlama.js +1 -1
  49. package/dist/bindings/getLlama.js.map +1 -1
  50. package/dist/chatWrappers/FunctionaryChatWrapper.js +8 -5
  51. package/dist/chatWrappers/FunctionaryChatWrapper.js.map +1 -1
  52. package/dist/chatWrappers/GemmaChatWrapper.js +1 -1
  53. package/dist/chatWrappers/GemmaChatWrapper.js.map +1 -1
  54. package/dist/chatWrappers/Llama3ChatWrapper.js +5 -6
  55. package/dist/chatWrappers/Llama3ChatWrapper.js.map +1 -1
  56. package/dist/chatWrappers/Llama3_1ChatWrapper.d.ts +31 -0
  57. package/dist/chatWrappers/Llama3_1ChatWrapper.js +223 -0
  58. package/dist/chatWrappers/Llama3_1ChatWrapper.js.map +1 -0
  59. package/dist/chatWrappers/generic/JinjaTemplateChatWrapper.d.ts +9 -0
  60. package/dist/chatWrappers/generic/JinjaTemplateChatWrapper.js.map +1 -1
  61. package/dist/chatWrappers/utils/ChatModelFunctionsDocumentationGenerator.d.ts +17 -2
  62. package/dist/chatWrappers/utils/ChatModelFunctionsDocumentationGenerator.js +39 -2
  63. package/dist/chatWrappers/utils/ChatModelFunctionsDocumentationGenerator.js.map +1 -1
  64. package/dist/chatWrappers/utils/jsonDumps.d.ts +7 -0
  65. package/dist/chatWrappers/utils/jsonDumps.js +18 -0
  66. package/dist/chatWrappers/utils/jsonDumps.js.map +1 -0
  67. package/dist/chatWrappers/utils/resolveChatWrapper.d.ts +5 -3
  68. package/dist/chatWrappers/utils/resolveChatWrapper.js +50 -4
  69. package/dist/chatWrappers/utils/resolveChatWrapper.js.map +1 -1
  70. package/dist/cli/commands/ChatCommand.d.ts +1 -1
  71. package/dist/cli/commands/ChatCommand.js +5 -5
  72. package/dist/cli/commands/ChatCommand.js.map +1 -1
  73. package/dist/cli/commands/CompleteCommand.js +5 -3
  74. package/dist/cli/commands/CompleteCommand.js.map +1 -1
  75. package/dist/cli/commands/InfillCommand.js +5 -3
  76. package/dist/cli/commands/InfillCommand.js.map +1 -1
  77. package/dist/cli/recommendedModels.js +43 -24
  78. package/dist/cli/recommendedModels.js.map +1 -1
  79. package/dist/cli/utils/interactivelyAskForModel.d.ts +2 -1
  80. package/dist/cli/utils/interactivelyAskForModel.js +19 -9
  81. package/dist/cli/utils/interactivelyAskForModel.js.map +1 -1
  82. package/dist/cli/utils/resolveCommandGgufPath.d.ts +2 -1
  83. package/dist/cli/utils/resolveCommandGgufPath.js +3 -2
  84. package/dist/cli/utils/resolveCommandGgufPath.js.map +1 -1
  85. package/dist/consts.d.ts +1 -0
  86. package/dist/consts.js +1 -0
  87. package/dist/consts.js.map +1 -1
  88. package/dist/evaluator/LlamaChat/LlamaChat.d.ts +22 -0
  89. package/dist/evaluator/LlamaChat/LlamaChat.js +65 -34
  90. package/dist/evaluator/LlamaChat/LlamaChat.js.map +1 -1
  91. package/dist/evaluator/LlamaChatSession/LlamaChatSession.d.ts +28 -6
  92. package/dist/evaluator/LlamaChatSession/LlamaChatSession.js +22 -16
  93. package/dist/evaluator/LlamaChatSession/LlamaChatSession.js.map +1 -1
  94. package/dist/evaluator/LlamaChatSession/utils/LlamaChatSessionPromptCompletionEngine.js +4 -5
  95. package/dist/evaluator/LlamaChatSession/utils/LlamaChatSessionPromptCompletionEngine.js.map +1 -1
  96. package/dist/evaluator/LlamaCompletion.d.ts +13 -2
  97. package/dist/evaluator/LlamaCompletion.js +10 -5
  98. package/dist/evaluator/LlamaCompletion.js.map +1 -1
  99. package/dist/evaluator/LlamaContext/LlamaContext.d.ts +1 -1
  100. package/dist/evaluator/LlamaContext/LlamaContext.js +60 -0
  101. package/dist/evaluator/LlamaContext/LlamaContext.js.map +1 -1
  102. package/dist/evaluator/LlamaContext/types.d.ts +21 -0
  103. package/dist/evaluator/LlamaGrammar.d.ts +6 -3
  104. package/dist/evaluator/LlamaGrammar.js +2 -2
  105. package/dist/evaluator/LlamaGrammar.js.map +1 -1
  106. package/dist/evaluator/LlamaModel/LlamaModel.d.ts +16 -32
  107. package/dist/evaluator/LlamaModel/LlamaModel.js +94 -53
  108. package/dist/evaluator/LlamaModel/LlamaModel.js.map +1 -1
  109. package/dist/gguf/consts.d.ts +1 -0
  110. package/dist/gguf/consts.js +4 -0
  111. package/dist/gguf/consts.js.map +1 -1
  112. package/dist/gguf/insights/GgufInsights.js +4 -0
  113. package/dist/gguf/insights/GgufInsights.js.map +1 -1
  114. package/dist/gguf/parser/GgufV2Parser.js +3 -1
  115. package/dist/gguf/parser/GgufV2Parser.js.map +1 -1
  116. package/dist/gguf/types/GgufMetadataTypes.d.ts +16 -0
  117. package/dist/gguf/types/GgufMetadataTypes.js.map +1 -1
  118. package/dist/gguf/utils/convertMetadataKeyValueRecordToNestedObject.d.ts +3 -2
  119. package/dist/gguf/utils/convertMetadataKeyValueRecordToNestedObject.js +44 -8
  120. package/dist/gguf/utils/convertMetadataKeyValueRecordToNestedObject.js.map +1 -1
  121. package/dist/index.d.ts +4 -2
  122. package/dist/index.js +3 -1
  123. package/dist/index.js.map +1 -1
  124. package/dist/types.d.ts +15 -1
  125. package/dist/types.js.map +1 -1
  126. package/dist/utils/DeepPartialObject.d.ts +3 -0
  127. package/dist/utils/DeepPartialObject.js +2 -0
  128. package/dist/utils/DeepPartialObject.js.map +1 -0
  129. package/dist/utils/StopGenerationDetector.d.ts +6 -3
  130. package/dist/utils/StopGenerationDetector.js +22 -7
  131. package/dist/utils/StopGenerationDetector.js.map +1 -1
  132. package/dist/utils/TokenStreamRegulator.d.ts +1 -0
  133. package/dist/utils/TokenStreamRegulator.js +23 -5
  134. package/dist/utils/TokenStreamRegulator.js.map +1 -1
  135. package/dist/utils/resolveLastTokens.d.ts +2 -0
  136. package/dist/utils/resolveLastTokens.js +12 -0
  137. package/dist/utils/resolveLastTokens.js.map +1 -0
  138. package/llama/CMakeLists.txt +1 -1
  139. package/llama/addon/AddonContext.cpp +772 -0
  140. package/llama/addon/AddonContext.h +53 -0
  141. package/llama/addon/AddonGrammar.cpp +44 -0
  142. package/llama/addon/AddonGrammar.h +18 -0
  143. package/llama/addon/AddonGrammarEvaluationState.cpp +28 -0
  144. package/llama/addon/AddonGrammarEvaluationState.h +15 -0
  145. package/llama/addon/AddonModel.cpp +681 -0
  146. package/llama/addon/AddonModel.h +61 -0
  147. package/llama/addon/AddonModelData.cpp +25 -0
  148. package/llama/addon/AddonModelData.h +15 -0
  149. package/llama/addon/AddonModelLora.cpp +107 -0
  150. package/llama/addon/AddonModelLora.h +28 -0
  151. package/llama/addon/addon.cpp +217 -0
  152. package/llama/addon/addonGlobals.cpp +22 -0
  153. package/llama/addon/addonGlobals.h +12 -0
  154. package/llama/addon/globals/addonLog.cpp +135 -0
  155. package/llama/addon/globals/addonLog.h +21 -0
  156. package/llama/addon/globals/addonProgress.cpp +15 -0
  157. package/llama/addon/globals/addonProgress.h +15 -0
  158. package/llama/addon/globals/getGpuInfo.cpp +108 -0
  159. package/llama/addon/globals/getGpuInfo.h +6 -0
  160. package/llama/binariesGithubRelease.json +1 -1
  161. package/llama/gitRelease.bundle +0 -0
  162. package/llama/grammars/README.md +1 -1
  163. package/llama/llama.cpp.info.json +1 -1
  164. package/package.json +3 -3
  165. package/templates/packed/electron-typescript-react.json +1 -1
  166. package/templates/packed/node-typescript.json +1 -1
  167. package/llama/addon.cpp +0 -2014
@@ -0,0 +1,681 @@
1
+ #include <sstream>
2
+ #include "addonGlobals.h"
3
+ #include "globals/addonLog.h"
4
+ #include "common.h"
5
+ #include "llama.h"
6
+ #include "AddonModel.h"
7
+ #include "AddonModelData.h"
8
+ #include "AddonModelLora.h"
9
+
10
+ static Napi::Value getNapiToken(const Napi::CallbackInfo& info, llama_model* model, llama_token token) {
11
+ if (token < 0) {
12
+ return Napi::Number::From(info.Env(), -1);
13
+ }
14
+
15
+ auto tokenAttributes = llama_token_get_attr(model, token);
16
+
17
+ if (tokenAttributes & LLAMA_TOKEN_ATTR_UNDEFINED || tokenAttributes & LLAMA_TOKEN_ATTR_UNKNOWN) {
18
+ return Napi::Number::From(info.Env(), -1);
19
+ }
20
+
21
+ return Napi::Number::From(info.Env(), token);
22
+ }
23
+
24
+ static Napi::Value getNapiControlToken(const Napi::CallbackInfo& info, llama_model* model, llama_token token) {
25
+ if (token < 0) {
26
+ return Napi::Number::From(info.Env(), -1);
27
+ }
28
+
29
+ auto tokenAttributes = llama_token_get_attr(model, token);
30
+
31
+ if (!(tokenAttributes & LLAMA_TOKEN_ATTR_CONTROL) && !(tokenAttributes & LLAMA_TOKEN_ATTR_UNDEFINED)) {
32
+ return Napi::Number::From(info.Env(), -1);
33
+ }
34
+
35
+ return Napi::Number::From(info.Env(), token);
36
+ }
37
+
38
+ static bool llamaModelParamsProgressCallback(float progress, void * user_data) {
39
+ AddonModel* addonModel = (AddonModel *) user_data;
40
+ unsigned percentage = (unsigned) (100 * progress);
41
+
42
+ if (percentage > addonModel->modelLoadPercentage) {
43
+ addonModel->modelLoadPercentage = percentage;
44
+
45
+ // original llama.cpp logs
46
+ addonLlamaCppLogCallback(GGML_LOG_LEVEL_INFO, ".", nullptr);
47
+ if (percentage >= 100) {
48
+ addonLlamaCppLogCallback(GGML_LOG_LEVEL_INFO, "\n", nullptr);
49
+ }
50
+ }
51
+
52
+ if (progress > addonModel->rawModelLoadPercentage) {
53
+ addonModel->rawModelLoadPercentage = progress;
54
+
55
+ if (addonModel->onLoadProgressEventCallbackSet) {
56
+ addon_progress_event* data = new addon_progress_event {
57
+ progress
58
+ };
59
+
60
+ auto status = addonModel->addonThreadSafeOnLoadProgressEventCallback.NonBlockingCall(data);
61
+
62
+ if (status != napi_ok) {
63
+ delete data;
64
+ }
65
+ }
66
+ }
67
+
68
+ return !(addonModel->abortModelLoad);
69
+ }
70
+
71
+ class AddonModelLoadModelWorker : public Napi::AsyncWorker {
72
+ public:
73
+ AddonModel* model;
74
+
75
+ AddonModelLoadModelWorker(const Napi::Env& env, AddonModel* model)
76
+ : Napi::AsyncWorker(env, "AddonModelLoadModelWorker"),
77
+ model(model),
78
+ deferred(Napi::Promise::Deferred::New(env)) {
79
+ model->Ref();
80
+ }
81
+ ~AddonModelLoadModelWorker() {
82
+ model->Unref();
83
+ }
84
+
85
+ Napi::Promise GetPromise() {
86
+ return deferred.Promise();
87
+ }
88
+
89
+ protected:
90
+ Napi::Promise::Deferred deferred;
91
+
92
+ void Execute() {
93
+ try {
94
+ model->model = llama_load_model_from_file(model->modelPath.c_str(), model->model_params);
95
+
96
+ model->modelLoaded = model->model != nullptr && model->model != NULL;
97
+ } catch (const std::exception& e) {
98
+ SetError(e.what());
99
+ } catch(...) {
100
+ SetError("Unknown error when calling \"llama_load_model_from_file\"");
101
+ }
102
+ }
103
+ void OnOK() {
104
+ if (model->modelLoaded) {
105
+ uint64_t modelSize = llama_model_size(model->model);
106
+ adjustNapiExternalMemoryAdd(Env(), modelSize);
107
+ model->loadedModelSize = modelSize;
108
+ }
109
+
110
+ deferred.Resolve(Napi::Boolean::New(Env(), model->modelLoaded));
111
+ if (model->onLoadProgressEventCallbackSet) {
112
+ model->addonThreadSafeOnLoadProgressEventCallback.Release();
113
+ }
114
+ }
115
+ void OnError(const Napi::Error& err) {
116
+ deferred.Reject(err.Value());
117
+ }
118
+ };
119
+
120
+ class AddonModelUnloadModelWorker : public Napi::AsyncWorker {
121
+ public:
122
+ AddonModel* model;
123
+
124
+ AddonModelUnloadModelWorker(const Napi::Env& env, AddonModel* model)
125
+ : Napi::AsyncWorker(env, "AddonModelUnloadModelWorker"),
126
+ model(model),
127
+ deferred(Napi::Promise::Deferred::New(env)) {
128
+ model->Ref();
129
+ }
130
+ ~AddonModelUnloadModelWorker() {
131
+ model->Unref();
132
+ }
133
+
134
+ Napi::Promise GetPromise() {
135
+ return deferred.Promise();
136
+ }
137
+
138
+ protected:
139
+ Napi::Promise::Deferred deferred;
140
+
141
+ void Execute() {
142
+ try {
143
+ llama_free_model(model->model);
144
+ model->modelLoaded = false;
145
+
146
+ model->dispose();
147
+ } catch (const std::exception& e) {
148
+ SetError(e.what());
149
+ } catch(...) {
150
+ SetError("Unknown error when calling \"llama_free_model\"");
151
+ }
152
+ }
153
+ void OnOK() {
154
+ adjustNapiExternalMemorySubtract(Env(), model->loadedModelSize);
155
+ model->loadedModelSize = 0;
156
+
157
+ deferred.Resolve(Env().Undefined());
158
+ }
159
+ void OnError(const Napi::Error& err) {
160
+ deferred.Reject(err.Value());
161
+ }
162
+ };
163
+
164
+ class AddonModelLoadLoraWorker : public Napi::AsyncWorker {
165
+ public:
166
+ AddonModelLora* modelLora;
167
+
168
+ AddonModelLoadLoraWorker(
169
+ const Napi::Env& env,
170
+ AddonModelLora* modelLora
171
+ )
172
+ : Napi::AsyncWorker(env, "AddonModelLoadLoraWorker"),
173
+ modelLora(modelLora),
174
+ deferred(Napi::Promise::Deferred::New(env)) {
175
+ modelLora->model->Ref();
176
+ modelLora->Ref();
177
+ }
178
+ ~AddonModelLoadLoraWorker() {
179
+ modelLora->model->Unref();
180
+ modelLora->Unref();
181
+ }
182
+
183
+ Napi::Promise GetPromise() {
184
+ return deferred.Promise();
185
+ }
186
+
187
+ protected:
188
+ Napi::Promise::Deferred deferred;
189
+
190
+ void Execute() {
191
+ try {
192
+ const auto loraAdapter = llama_lora_adapter_init(modelLora->model->model, modelLora->loraFilePath.c_str());
193
+
194
+ if (loraAdapter == nullptr) {
195
+ SetError(
196
+ std::string(
197
+ std::string("Failed to initialize LoRA adapter \"" + modelLora->loraFilePath + "\"")
198
+ )
199
+ );
200
+ return;
201
+ }
202
+
203
+ modelLora->lora_adapter = loraAdapter;
204
+ modelLora->model->Ref();
205
+
206
+ if (modelLora->model->data != nullptr) {
207
+ modelLora->model->data->loraAdapters.insert(modelLora);
208
+ } else {
209
+ modelLora->dispose(true);
210
+ SetError("Model data is not initialized");
211
+ }
212
+ } catch (const std::exception& e) {
213
+ SetError(e.what());
214
+ } catch(...) {
215
+ SetError("Unknown error when calling \"llama_lora_adapter_init\"");
216
+ }
217
+ }
218
+ void OnOK() {
219
+ deferred.Resolve(Env().Undefined());
220
+ }
221
+ void OnError(const Napi::Error& err) {
222
+ deferred.Reject(err.Value());
223
+ }
224
+ };
225
+
226
+ AddonModel::AddonModel(const Napi::CallbackInfo& info) : Napi::ObjectWrap<AddonModel>(info) {
227
+ loadedModelSize = 0;
228
+ hasAddonExportsRef = false;
229
+ modelLoaded = false;
230
+ abortModelLoad = false;
231
+ model_load_stopped = false;
232
+ rawModelLoadPercentage = 0;
233
+ modelLoadPercentage = 0;
234
+ onLoadProgressEventCallbackSet = false;
235
+ hasLoadAbortSignal = false;
236
+ disposed = false;
237
+
238
+ data = new AddonModelData();
239
+ model_params = llama_model_default_params();
240
+
241
+ // Get the model path
242
+ modelPath = info[0].As<Napi::String>().Utf8Value();
243
+
244
+ if (info.Length() > 1 && info[1].IsObject()) {
245
+ Napi::Object options = info[1].As<Napi::Object>();
246
+
247
+ if (options.Has("addonExports")) {
248
+ addonExportsRef = Napi::Persistent(options.Get("addonExports").As<Napi::Object>());
249
+ hasAddonExportsRef = true;
250
+ }
251
+
252
+ if (options.Has("gpuLayers")) {
253
+ model_params.n_gpu_layers = options.Get("gpuLayers").As<Napi::Number>().Int32Value();
254
+ }
255
+
256
+ if (options.Has("vocabOnly")) {
257
+ model_params.vocab_only = options.Get("vocabOnly").As<Napi::Boolean>().Value();
258
+ }
259
+
260
+ if (options.Has("useMmap")) {
261
+ model_params.use_mmap = options.Get("useMmap").As<Napi::Boolean>().Value();
262
+ }
263
+
264
+ if (options.Has("useMlock")) {
265
+ model_params.use_mlock = options.Get("useMlock").As<Napi::Boolean>().Value();
266
+ }
267
+
268
+ if (options.Has("checkTensors")) {
269
+ model_params.check_tensors = options.Get("checkTensors").As<Napi::Boolean>().Value();
270
+ }
271
+
272
+ if (options.Has("onLoadProgress")) {
273
+ auto onLoadProgressJSCallback = options.Get("onLoadProgress").As<Napi::Function>();
274
+ if (onLoadProgressJSCallback.IsFunction()) {
275
+ AddonThreadSafeProgressCallbackFunctionContext* context = new Napi::Reference<Napi::Value>(Napi::Persistent(info.This()));
276
+ addonThreadSafeOnLoadProgressEventCallback = AddonThreadSafeProgressEventCallbackFunction::New(
277
+ info.Env(),
278
+ onLoadProgressJSCallback,
279
+ "onLoadProgressCallback",
280
+ 0,
281
+ 1,
282
+ context,
283
+ [](Napi::Env, AddonModel* addonModel, AddonThreadSafeProgressCallbackFunctionContext* ctx) {
284
+ addonModel->onLoadProgressEventCallbackSet = false;
285
+
286
+ delete ctx;
287
+ },
288
+ this
289
+ );
290
+ onLoadProgressEventCallbackSet = true;
291
+ }
292
+ }
293
+
294
+ if (options.Has("hasLoadAbortSignal")) {
295
+ hasLoadAbortSignal = options.Get("hasLoadAbortSignal").As<Napi::Boolean>().Value();
296
+ }
297
+
298
+ if (options.Has("overridesList")) {
299
+ Napi::Array overridesList = options.Get("overridesList").As<Napi::Array>();
300
+ kv_overrides.reserve(overridesList.Length());
301
+
302
+ for (uint32_t i = 0; i < overridesList.Length(); i++) {
303
+ Napi::Array overrideItem = overridesList.Get(i).As<Napi::Array>();
304
+ auto key = overrideItem.Get((uint32_t)0).As<Napi::String>().Utf8Value();
305
+ auto value = overrideItem.Get((uint32_t)1);
306
+
307
+ if (key.length() > 127) {
308
+ continue;
309
+ }
310
+
311
+ llama_model_kv_override kvo;
312
+ std::strncpy(kvo.key, key.c_str(), key.length());
313
+ kvo.key[key.length()] = 0;
314
+
315
+ if (value.IsString()) {
316
+ auto valueString = value.As<Napi::String>().Utf8Value();
317
+ if (valueString.length() > 127) {
318
+ continue;
319
+ }
320
+
321
+ kvo.tag = LLAMA_KV_OVERRIDE_TYPE_STR;
322
+ std::strncpy(kvo.val_str, valueString.c_str(), valueString.length());
323
+ kvo.val_str[valueString.length()] = 0;
324
+
325
+ fputs(std::string("Override: " + key + " = " + valueString + "\n").c_str(), stdout);
326
+ fflush(stdout);
327
+ } else if (value.IsNumber() || value.IsBigInt()) {
328
+ auto numberType = overrideItem.Get((uint32_t)2).As<Napi::Number>().Int32Value();
329
+ if (numberType == 0) {
330
+ kvo.tag = LLAMA_KV_OVERRIDE_TYPE_INT;
331
+ kvo.val_i64 = value.As<Napi::Number>().Int64Value();
332
+ } else {
333
+ kvo.tag = LLAMA_KV_OVERRIDE_TYPE_FLOAT;
334
+ kvo.val_f64 = value.As<Napi::Number>().DoubleValue();
335
+ }
336
+
337
+ continue;
338
+ } else if (value.IsBoolean()) {
339
+ kvo.tag = LLAMA_KV_OVERRIDE_TYPE_BOOL;
340
+ kvo.val_bool = value.As<Napi::Boolean>().Value();
341
+ }
342
+
343
+ kv_overrides.emplace_back(std::move(kvo));
344
+ }
345
+
346
+ if (!kv_overrides.empty()) {
347
+ kv_overrides.emplace_back();
348
+ kv_overrides.back().key[0] = 0;
349
+ }
350
+
351
+ model_params.kv_overrides = kv_overrides.data();
352
+ }
353
+
354
+ if (onLoadProgressEventCallbackSet || hasLoadAbortSignal) {
355
+ model_params.progress_callback_user_data = &(*this);
356
+ model_params.progress_callback = llamaModelParamsProgressCallback;
357
+ }
358
+ }
359
+ }
360
+
361
+ AddonModel::~AddonModel() {
362
+ dispose();
363
+ }
364
+ void AddonModel::dispose() {
365
+ if (disposed) {
366
+ return;
367
+ }
368
+
369
+ disposed = true;
370
+ if (modelLoaded) {
371
+ modelLoaded = false;
372
+ llama_free_model(model);
373
+
374
+ adjustNapiExternalMemorySubtract(Env(), loadedModelSize);
375
+ loadedModelSize = 0;
376
+ }
377
+
378
+ if (data != nullptr) {
379
+ auto currentData = data;
380
+ data = nullptr;
381
+ delete currentData;
382
+ }
383
+
384
+ if (hasAddonExportsRef) {
385
+ addonExportsRef.Unref();
386
+ hasAddonExportsRef = false;
387
+ }
388
+ }
389
+
390
+ Napi::Value AddonModel::Init(const Napi::CallbackInfo& info) {
391
+ if (disposed) {
392
+ Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
393
+ return info.Env().Undefined();
394
+ }
395
+
396
+ AddonModelLoadModelWorker* worker = new AddonModelLoadModelWorker(this->Env(), this);
397
+ worker->Queue();
398
+ return worker->GetPromise();
399
+ }
400
+ Napi::Value AddonModel::LoadLora(const Napi::CallbackInfo& info) {
401
+ AddonModelLora* modelLora = Napi::ObjectWrap<AddonModelLora>::Unwrap(info[0].As<Napi::Object>());
402
+ AddonModelLoadLoraWorker* worker = new AddonModelLoadLoraWorker(this->Env(), modelLora);
403
+ worker->Queue();
404
+ return worker->GetPromise();
405
+ }
406
+ Napi::Value AddonModel::AbortActiveModelLoad(const Napi::CallbackInfo& info) {
407
+ abortModelLoad = true;
408
+ return info.Env().Undefined();
409
+ }
410
+ Napi::Value AddonModel::Dispose(const Napi::CallbackInfo& info) {
411
+ if (disposed) {
412
+ return info.Env().Undefined();
413
+ }
414
+
415
+ if (modelLoaded) {
416
+ modelLoaded = false;
417
+
418
+ AddonModelUnloadModelWorker* worker = new AddonModelUnloadModelWorker(this->Env(), this);
419
+ worker->Queue();
420
+ return worker->GetPromise();
421
+ } else {
422
+ dispose();
423
+
424
+ Napi::Promise::Deferred deferred = Napi::Promise::Deferred::New(info.Env());
425
+ deferred.Resolve(info.Env().Undefined());
426
+ return deferred.Promise();
427
+ }
428
+ }
429
+
430
+ Napi::Value AddonModel::Tokenize(const Napi::CallbackInfo& info) {
431
+ if (disposed) {
432
+ Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
433
+ return info.Env().Undefined();
434
+ }
435
+
436
+ std::string text = info[0].As<Napi::String>().Utf8Value();
437
+ bool specialTokens = info[1].As<Napi::Boolean>().Value();
438
+
439
+ std::vector<llama_token> tokens = llama_tokenize(model, text, false, specialTokens);
440
+
441
+ Napi::Uint32Array result = Napi::Uint32Array::New(info.Env(), tokens.size());
442
+ for (size_t i = 0; i < tokens.size(); ++i) {
443
+ result[i] = static_cast<uint32_t>(tokens[i]);
444
+ }
445
+
446
+ return result;
447
+ }
448
+ Napi::Value AddonModel::Detokenize(const Napi::CallbackInfo& info) {
449
+ if (disposed) {
450
+ Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
451
+ return info.Env().Undefined();
452
+ }
453
+
454
+ Napi::Uint32Array tokens = info[0].As<Napi::Uint32Array>();
455
+ bool decodeSpecialTokens = info.Length() > 0
456
+ ? info[1].As<Napi::Boolean>().Value()
457
+ : false;
458
+
459
+ std::vector<char> result(8, 0);
460
+ const int n_length = llama_detokenize(model, (llama_token*)tokens.Data(), tokens.ElementLength(), result.data(), result.size(), false, decodeSpecialTokens);
461
+
462
+ if (n_length < 0) {
463
+ result.resize(-n_length);
464
+ int check = llama_detokenize(model, (llama_token*)tokens.Data(), tokens.ElementLength(), result.data(), result.size(), false, decodeSpecialTokens);
465
+ GGML_ASSERT(check == -n_length);
466
+ } else {
467
+ result.resize(n_length);
468
+ }
469
+
470
+ return Napi::String::New(info.Env(), result.data(), result.size());
471
+ }
472
+
473
+ Napi::Value AddonModel::GetTrainContextSize(const Napi::CallbackInfo& info) {
474
+ if (disposed) {
475
+ Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
476
+ return info.Env().Undefined();
477
+ }
478
+
479
+ return Napi::Number::From(info.Env(), llama_n_ctx_train(model));
480
+ }
481
+
482
+ Napi::Value AddonModel::GetEmbeddingVectorSize(const Napi::CallbackInfo& info) {
483
+ if (disposed) {
484
+ Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
485
+ return info.Env().Undefined();
486
+ }
487
+
488
+ return Napi::Number::From(info.Env(), llama_n_embd(model));
489
+ }
490
+
491
+ Napi::Value AddonModel::GetTotalSize(const Napi::CallbackInfo& info) {
492
+ if (disposed) {
493
+ Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
494
+ return info.Env().Undefined();
495
+ }
496
+
497
+ return Napi::Number::From(info.Env(), llama_model_size(model));
498
+ }
499
+
500
+ Napi::Value AddonModel::GetTotalParameters(const Napi::CallbackInfo& info) {
501
+ if (disposed) {
502
+ Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
503
+ return info.Env().Undefined();
504
+ }
505
+
506
+ return Napi::Number::From(info.Env(), llama_model_n_params(model));
507
+ }
508
+
509
+ Napi::Value AddonModel::GetModelDescription(const Napi::CallbackInfo& info) {
510
+ if (disposed) {
511
+ Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
512
+ return info.Env().Undefined();
513
+ }
514
+
515
+ char model_desc[128];
516
+ int actual_length = llama_model_desc(model, model_desc, sizeof(model_desc));
517
+
518
+ return Napi::String::New(info.Env(), model_desc, actual_length);
519
+ }
520
+
521
+ Napi::Value AddonModel::TokenBos(const Napi::CallbackInfo& info) {
522
+ if (disposed) {
523
+ Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
524
+ return info.Env().Undefined();
525
+ }
526
+
527
+ return getNapiControlToken(info, model, llama_token_bos(model));
528
+ }
529
+ Napi::Value AddonModel::TokenEos(const Napi::CallbackInfo& info) {
530
+ if (disposed) {
531
+ Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
532
+ return info.Env().Undefined();
533
+ }
534
+
535
+ return getNapiControlToken(info, model, llama_token_eos(model));
536
+ }
537
+ Napi::Value AddonModel::TokenNl(const Napi::CallbackInfo& info) {
538
+ if (disposed) {
539
+ Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
540
+ return info.Env().Undefined();
541
+ }
542
+
543
+ return getNapiToken(info, model, llama_token_nl(model));
544
+ }
545
+ Napi::Value AddonModel::PrefixToken(const Napi::CallbackInfo& info) {
546
+ if (disposed) {
547
+ Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
548
+ return info.Env().Undefined();
549
+ }
550
+
551
+ return getNapiControlToken(info, model, llama_token_prefix(model));
552
+ }
553
+ Napi::Value AddonModel::MiddleToken(const Napi::CallbackInfo& info) {
554
+ if (disposed) {
555
+ Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
556
+ return info.Env().Undefined();
557
+ }
558
+
559
+ return getNapiControlToken(info, model, llama_token_middle(model));
560
+ }
561
+ Napi::Value AddonModel::SuffixToken(const Napi::CallbackInfo& info) {
562
+ if (disposed) {
563
+ Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
564
+ return info.Env().Undefined();
565
+ }
566
+
567
+ return getNapiControlToken(info, model, llama_token_suffix(model));
568
+ }
569
+ Napi::Value AddonModel::EotToken(const Napi::CallbackInfo& info) {
570
+ if (disposed) {
571
+ Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
572
+ return info.Env().Undefined();
573
+ }
574
+
575
+ return getNapiControlToken(info, model, llama_token_eot(model));
576
+ }
577
+ Napi::Value AddonModel::GetTokenString(const Napi::CallbackInfo& info) {
578
+ if (disposed) {
579
+ Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
580
+ return info.Env().Undefined();
581
+ }
582
+
583
+ int token = info[0].As<Napi::Number>().Int32Value();
584
+ std::stringstream ss;
585
+
586
+ const char* str = llama_token_get_text(model, token);
587
+ if (str == nullptr) {
588
+ return info.Env().Undefined();
589
+ }
590
+
591
+ ss << str;
592
+
593
+ return Napi::String::New(info.Env(), ss.str());
594
+ }
595
+
596
+ Napi::Value AddonModel::GetTokenAttributes(const Napi::CallbackInfo& info) {
597
+ if (disposed) {
598
+ Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
599
+ return info.Env().Undefined();
600
+ }
601
+
602
+ if (info[0].IsNumber() == false) {
603
+ return Napi::Number::From(info.Env(), int32_t(LLAMA_TOKEN_ATTR_UNDEFINED));
604
+ }
605
+
606
+ int token = info[0].As<Napi::Number>().Int32Value();
607
+ auto tokenAttributes = llama_token_get_attr(model, token);
608
+
609
+ return Napi::Number::From(info.Env(), int32_t(tokenAttributes));
610
+ }
611
+ Napi::Value AddonModel::IsEogToken(const Napi::CallbackInfo& info) {
612
+ if (disposed) {
613
+ Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
614
+ return info.Env().Undefined();
615
+ }
616
+
617
+ if (info[0].IsNumber() == false) {
618
+ return Napi::Boolean::New(info.Env(), false);
619
+ }
620
+
621
+ int token = info[0].As<Napi::Number>().Int32Value();
622
+
623
+ return Napi::Boolean::New(info.Env(), llama_token_is_eog(model, token));
624
+ }
625
+ Napi::Value AddonModel::GetVocabularyType(const Napi::CallbackInfo& info) {
626
+ if (disposed) {
627
+ Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
628
+ return info.Env().Undefined();
629
+ }
630
+
631
+ auto vocabularyType = llama_vocab_type(model);
632
+
633
+ return Napi::Number::From(info.Env(), int32_t(vocabularyType));
634
+ }
635
+ Napi::Value AddonModel::ShouldPrependBosToken(const Napi::CallbackInfo& info) {
636
+ const int addBos = llama_add_bos_token(model);
637
+
638
+ bool shouldPrependBos = addBos != -1 ? bool(addBos) : (llama_vocab_type(model) == LLAMA_VOCAB_TYPE_SPM);
639
+
640
+ return Napi::Boolean::New(info.Env(), shouldPrependBos);
641
+ }
642
+
643
+ Napi::Value AddonModel::GetModelSize(const Napi::CallbackInfo& info) {
644
+ return Napi::Number::From(info.Env(), llama_model_size(model));
645
+ }
646
+
647
+ void AddonModel::init(Napi::Object exports) {
648
+ exports.Set(
649
+ "AddonModel",
650
+ DefineClass(
651
+ exports.Env(),
652
+ "AddonModel",
653
+ {
654
+ InstanceMethod("init", &AddonModel::Init),
655
+ InstanceMethod("loadLora", &AddonModel::LoadLora),
656
+ InstanceMethod("abortActiveModelLoad", &AddonModel::AbortActiveModelLoad),
657
+ InstanceMethod("tokenize", &AddonModel::Tokenize),
658
+ InstanceMethod("detokenize", &AddonModel::Detokenize),
659
+ InstanceMethod("getTrainContextSize", &AddonModel::GetTrainContextSize),
660
+ InstanceMethod("getEmbeddingVectorSize", &AddonModel::GetEmbeddingVectorSize),
661
+ InstanceMethod("getTotalSize", &AddonModel::GetTotalSize),
662
+ InstanceMethod("getTotalParameters", &AddonModel::GetTotalParameters),
663
+ InstanceMethod("getModelDescription", &AddonModel::GetModelDescription),
664
+ InstanceMethod("tokenBos", &AddonModel::TokenBos),
665
+ InstanceMethod("tokenEos", &AddonModel::TokenEos),
666
+ InstanceMethod("tokenNl", &AddonModel::TokenNl),
667
+ InstanceMethod("prefixToken", &AddonModel::PrefixToken),
668
+ InstanceMethod("middleToken", &AddonModel::MiddleToken),
669
+ InstanceMethod("suffixToken", &AddonModel::SuffixToken),
670
+ InstanceMethod("eotToken", &AddonModel::EotToken),
671
+ InstanceMethod("getTokenString", &AddonModel::GetTokenString),
672
+ InstanceMethod("getTokenAttributes", &AddonModel::GetTokenAttributes),
673
+ InstanceMethod("isEogToken", &AddonModel::IsEogToken),
674
+ InstanceMethod("getVocabularyType", &AddonModel::GetVocabularyType),
675
+ InstanceMethod("shouldPrependBosToken", &AddonModel::ShouldPrependBosToken),
676
+ InstanceMethod("getModelSize", &AddonModel::GetModelSize),
677
+ InstanceMethod("dispose", &AddonModel::Dispose),
678
+ }
679
+ )
680
+ );
681
+ }