node-llama-cpp 3.0.0-beta.37 → 3.0.0-beta.39

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (167) hide show
  1. package/bins/linux-arm64/_nlcBuildMetadata.json +1 -1
  2. package/bins/linux-arm64/libggml.so +0 -0
  3. package/bins/linux-arm64/libllama.so +0 -0
  4. package/bins/linux-arm64/llama-addon.node +0 -0
  5. package/bins/linux-armv7l/_nlcBuildMetadata.json +1 -1
  6. package/bins/linux-armv7l/libggml.so +0 -0
  7. package/bins/linux-armv7l/libllama.so +0 -0
  8. package/bins/linux-armv7l/llama-addon.node +0 -0
  9. package/bins/linux-x64/_nlcBuildMetadata.json +1 -1
  10. package/bins/linux-x64/libggml.so +0 -0
  11. package/bins/linux-x64/libllama.so +0 -0
  12. package/bins/linux-x64/llama-addon.node +0 -0
  13. package/bins/linux-x64-vulkan/_nlcBuildMetadata.json +1 -1
  14. package/bins/linux-x64-vulkan/libggml.so +0 -0
  15. package/bins/linux-x64-vulkan/libllama.so +0 -0
  16. package/bins/linux-x64-vulkan/llama-addon.node +0 -0
  17. package/bins/linux-x64-vulkan/vulkan-shaders-gen +0 -0
  18. package/bins/mac-arm64-metal/_nlcBuildMetadata.json +1 -1
  19. package/bins/mac-arm64-metal/ggml-common.h +24 -0
  20. package/bins/mac-arm64-metal/ggml-metal.metal +181 -552
  21. package/bins/mac-arm64-metal/libggml.dylib +0 -0
  22. package/bins/mac-arm64-metal/libllama.dylib +0 -0
  23. package/bins/mac-arm64-metal/llama-addon.node +0 -0
  24. package/bins/mac-x64/_nlcBuildMetadata.json +1 -1
  25. package/bins/mac-x64/libggml.dylib +0 -0
  26. package/bins/mac-x64/libllama.dylib +0 -0
  27. package/bins/mac-x64/llama-addon.node +0 -0
  28. package/bins/win-arm64/_nlcBuildMetadata.json +1 -1
  29. package/bins/win-arm64/ggml.dll +0 -0
  30. package/bins/win-arm64/llama-addon.exp +0 -0
  31. package/bins/win-arm64/llama-addon.lib +0 -0
  32. package/bins/win-arm64/llama-addon.node +0 -0
  33. package/bins/win-arm64/llama.dll +0 -0
  34. package/bins/win-x64/_nlcBuildMetadata.json +1 -1
  35. package/bins/win-x64/ggml.dll +0 -0
  36. package/bins/win-x64/llama-addon.node +0 -0
  37. package/bins/win-x64/llama.dll +0 -0
  38. package/bins/win-x64-vulkan/_nlcBuildMetadata.json +1 -1
  39. package/bins/win-x64-vulkan/ggml.dll +0 -0
  40. package/bins/win-x64-vulkan/llama-addon.node +0 -0
  41. package/bins/win-x64-vulkan/llama.dll +0 -0
  42. package/bins/win-x64-vulkan/vulkan-shaders-gen.exe +0 -0
  43. package/dist/ChatWrapper.d.ts +2 -1
  44. package/dist/ChatWrapper.js +19 -5
  45. package/dist/ChatWrapper.js.map +1 -1
  46. package/dist/bindings/AddonTypes.d.ts +13 -2
  47. package/dist/bindings/getLlama.d.ts +3 -2
  48. package/dist/bindings/getLlama.js +1 -1
  49. package/dist/bindings/getLlama.js.map +1 -1
  50. package/dist/chatWrappers/FunctionaryChatWrapper.js +8 -5
  51. package/dist/chatWrappers/FunctionaryChatWrapper.js.map +1 -1
  52. package/dist/chatWrappers/GemmaChatWrapper.js +1 -1
  53. package/dist/chatWrappers/GemmaChatWrapper.js.map +1 -1
  54. package/dist/chatWrappers/Llama3ChatWrapper.js +5 -6
  55. package/dist/chatWrappers/Llama3ChatWrapper.js.map +1 -1
  56. package/dist/chatWrappers/Llama3_1ChatWrapper.d.ts +31 -0
  57. package/dist/chatWrappers/Llama3_1ChatWrapper.js +223 -0
  58. package/dist/chatWrappers/Llama3_1ChatWrapper.js.map +1 -0
  59. package/dist/chatWrappers/generic/JinjaTemplateChatWrapper.d.ts +9 -0
  60. package/dist/chatWrappers/generic/JinjaTemplateChatWrapper.js.map +1 -1
  61. package/dist/chatWrappers/utils/ChatModelFunctionsDocumentationGenerator.d.ts +17 -2
  62. package/dist/chatWrappers/utils/ChatModelFunctionsDocumentationGenerator.js +39 -2
  63. package/dist/chatWrappers/utils/ChatModelFunctionsDocumentationGenerator.js.map +1 -1
  64. package/dist/chatWrappers/utils/jsonDumps.d.ts +7 -0
  65. package/dist/chatWrappers/utils/jsonDumps.js +18 -0
  66. package/dist/chatWrappers/utils/jsonDumps.js.map +1 -0
  67. package/dist/chatWrappers/utils/resolveChatWrapper.d.ts +5 -3
  68. package/dist/chatWrappers/utils/resolveChatWrapper.js +50 -4
  69. package/dist/chatWrappers/utils/resolveChatWrapper.js.map +1 -1
  70. package/dist/cli/commands/ChatCommand.d.ts +1 -1
  71. package/dist/cli/commands/ChatCommand.js +5 -5
  72. package/dist/cli/commands/ChatCommand.js.map +1 -1
  73. package/dist/cli/commands/CompleteCommand.js +5 -3
  74. package/dist/cli/commands/CompleteCommand.js.map +1 -1
  75. package/dist/cli/commands/InfillCommand.js +5 -3
  76. package/dist/cli/commands/InfillCommand.js.map +1 -1
  77. package/dist/cli/recommendedModels.js +43 -24
  78. package/dist/cli/recommendedModels.js.map +1 -1
  79. package/dist/cli/utils/interactivelyAskForModel.d.ts +2 -1
  80. package/dist/cli/utils/interactivelyAskForModel.js +19 -9
  81. package/dist/cli/utils/interactivelyAskForModel.js.map +1 -1
  82. package/dist/cli/utils/resolveCommandGgufPath.d.ts +2 -1
  83. package/dist/cli/utils/resolveCommandGgufPath.js +3 -2
  84. package/dist/cli/utils/resolveCommandGgufPath.js.map +1 -1
  85. package/dist/consts.d.ts +1 -0
  86. package/dist/consts.js +1 -0
  87. package/dist/consts.js.map +1 -1
  88. package/dist/evaluator/LlamaChat/LlamaChat.d.ts +22 -0
  89. package/dist/evaluator/LlamaChat/LlamaChat.js +65 -34
  90. package/dist/evaluator/LlamaChat/LlamaChat.js.map +1 -1
  91. package/dist/evaluator/LlamaChatSession/LlamaChatSession.d.ts +28 -6
  92. package/dist/evaluator/LlamaChatSession/LlamaChatSession.js +22 -16
  93. package/dist/evaluator/LlamaChatSession/LlamaChatSession.js.map +1 -1
  94. package/dist/evaluator/LlamaChatSession/utils/LlamaChatSessionPromptCompletionEngine.js +4 -5
  95. package/dist/evaluator/LlamaChatSession/utils/LlamaChatSessionPromptCompletionEngine.js.map +1 -1
  96. package/dist/evaluator/LlamaCompletion.d.ts +13 -2
  97. package/dist/evaluator/LlamaCompletion.js +10 -5
  98. package/dist/evaluator/LlamaCompletion.js.map +1 -1
  99. package/dist/evaluator/LlamaContext/LlamaContext.d.ts +1 -1
  100. package/dist/evaluator/LlamaContext/LlamaContext.js +60 -0
  101. package/dist/evaluator/LlamaContext/LlamaContext.js.map +1 -1
  102. package/dist/evaluator/LlamaContext/types.d.ts +21 -0
  103. package/dist/evaluator/LlamaGrammar.d.ts +6 -3
  104. package/dist/evaluator/LlamaGrammar.js +2 -2
  105. package/dist/evaluator/LlamaGrammar.js.map +1 -1
  106. package/dist/evaluator/LlamaModel/LlamaModel.d.ts +16 -32
  107. package/dist/evaluator/LlamaModel/LlamaModel.js +94 -53
  108. package/dist/evaluator/LlamaModel/LlamaModel.js.map +1 -1
  109. package/dist/gguf/consts.d.ts +1 -0
  110. package/dist/gguf/consts.js +4 -0
  111. package/dist/gguf/consts.js.map +1 -1
  112. package/dist/gguf/insights/GgufInsights.js +4 -0
  113. package/dist/gguf/insights/GgufInsights.js.map +1 -1
  114. package/dist/gguf/parser/GgufV2Parser.js +3 -1
  115. package/dist/gguf/parser/GgufV2Parser.js.map +1 -1
  116. package/dist/gguf/types/GgufMetadataTypes.d.ts +16 -0
  117. package/dist/gguf/types/GgufMetadataTypes.js.map +1 -1
  118. package/dist/gguf/utils/convertMetadataKeyValueRecordToNestedObject.d.ts +3 -2
  119. package/dist/gguf/utils/convertMetadataKeyValueRecordToNestedObject.js +44 -8
  120. package/dist/gguf/utils/convertMetadataKeyValueRecordToNestedObject.js.map +1 -1
  121. package/dist/index.d.ts +4 -2
  122. package/dist/index.js +3 -1
  123. package/dist/index.js.map +1 -1
  124. package/dist/types.d.ts +15 -1
  125. package/dist/types.js.map +1 -1
  126. package/dist/utils/DeepPartialObject.d.ts +3 -0
  127. package/dist/utils/DeepPartialObject.js +2 -0
  128. package/dist/utils/DeepPartialObject.js.map +1 -0
  129. package/dist/utils/StopGenerationDetector.d.ts +6 -3
  130. package/dist/utils/StopGenerationDetector.js +22 -7
  131. package/dist/utils/StopGenerationDetector.js.map +1 -1
  132. package/dist/utils/TokenStreamRegulator.d.ts +1 -0
  133. package/dist/utils/TokenStreamRegulator.js +23 -5
  134. package/dist/utils/TokenStreamRegulator.js.map +1 -1
  135. package/dist/utils/resolveLastTokens.d.ts +2 -0
  136. package/dist/utils/resolveLastTokens.js +12 -0
  137. package/dist/utils/resolveLastTokens.js.map +1 -0
  138. package/llama/CMakeLists.txt +1 -1
  139. package/llama/addon/AddonContext.cpp +772 -0
  140. package/llama/addon/AddonContext.h +53 -0
  141. package/llama/addon/AddonGrammar.cpp +44 -0
  142. package/llama/addon/AddonGrammar.h +18 -0
  143. package/llama/addon/AddonGrammarEvaluationState.cpp +28 -0
  144. package/llama/addon/AddonGrammarEvaluationState.h +15 -0
  145. package/llama/addon/AddonModel.cpp +681 -0
  146. package/llama/addon/AddonModel.h +61 -0
  147. package/llama/addon/AddonModelData.cpp +25 -0
  148. package/llama/addon/AddonModelData.h +15 -0
  149. package/llama/addon/AddonModelLora.cpp +107 -0
  150. package/llama/addon/AddonModelLora.h +28 -0
  151. package/llama/addon/addon.cpp +217 -0
  152. package/llama/addon/addonGlobals.cpp +22 -0
  153. package/llama/addon/addonGlobals.h +12 -0
  154. package/llama/addon/globals/addonLog.cpp +135 -0
  155. package/llama/addon/globals/addonLog.h +21 -0
  156. package/llama/addon/globals/addonProgress.cpp +15 -0
  157. package/llama/addon/globals/addonProgress.h +15 -0
  158. package/llama/addon/globals/getGpuInfo.cpp +108 -0
  159. package/llama/addon/globals/getGpuInfo.h +6 -0
  160. package/llama/binariesGithubRelease.json +1 -1
  161. package/llama/gitRelease.bundle +0 -0
  162. package/llama/grammars/README.md +1 -1
  163. package/llama/llama.cpp.info.json +1 -1
  164. package/package.json +3 -3
  165. package/templates/packed/electron-typescript-react.json +1 -1
  166. package/templates/packed/node-typescript.json +1 -1
  167. package/llama/addon.cpp +0 -2014
package/llama/addon.cpp DELETED
@@ -1,2014 +0,0 @@
1
- #include <stddef.h>
2
-
3
- #include <algorithm>
4
- #include <sstream>
5
- #include <vector>
6
- #include <unordered_map>
7
-
8
- #include "common.h"
9
- #include "common/grammar-parser.h"
10
- #include "llama.h"
11
- #include "napi.h"
12
-
13
- #ifdef GPU_INFO_USE_CUDA
14
- # include "gpuInfo/cuda-gpu-info.h"
15
- #endif
16
- #ifdef GPU_INFO_USE_VULKAN
17
- # include "gpuInfo/vulkan-gpu-info.h"
18
- #endif
19
- #ifdef GPU_INFO_USE_METAL
20
- # include "gpuInfo/metal-gpu-info.h"
21
- #endif
22
-
23
-
24
- struct addon_logger_log {
25
- public:
26
- const int logLevelNumber;
27
- const std::stringstream* stringStream;
28
- };
29
-
30
- static void addonLlamaCppLogCallback(ggml_log_level level, const char* text, void* user_data);
31
-
32
- using AddonThreadSafeLogCallbackFunctionContext = Napi::Reference<Napi::Value>;
33
- void addonCallJsLogCallback(
34
- Napi::Env env, Napi::Function callback, AddonThreadSafeLogCallbackFunctionContext* context, addon_logger_log* data
35
- );
36
- using AddonThreadSafeLogCallbackFunction =
37
- Napi::TypedThreadSafeFunction<AddonThreadSafeLogCallbackFunctionContext, addon_logger_log, addonCallJsLogCallback>;
38
-
39
-
40
- struct addon_progress_event {
41
- public:
42
- const float progress;
43
- };
44
-
45
- using AddonThreadSafeProgressCallbackFunctionContext = Napi::Reference<Napi::Value>;
46
- void addonCallJsProgressCallback(
47
- Napi::Env env, Napi::Function callback, AddonThreadSafeProgressCallbackFunctionContext* context, addon_progress_event* data
48
- );
49
- using AddonThreadSafeProgressEventCallbackFunction =
50
- Napi::TypedThreadSafeFunction<AddonThreadSafeProgressCallbackFunctionContext, addon_progress_event, addonCallJsProgressCallback>;
51
-
52
-
53
- AddonThreadSafeLogCallbackFunction addonThreadSafeLoggerCallback;
54
- bool addonJsLoggerCallbackSet = false;
55
- int addonLoggerLogLevel = 5;
56
- bool backendInitialized = false;
57
- bool backendDisposed = false;
58
-
59
- void addonCallJsProgressCallback(
60
- Napi::Env env, Napi::Function callback, AddonThreadSafeProgressCallbackFunctionContext* context, addon_progress_event* data
61
- ) {
62
- if (env != nullptr && callback != nullptr && addonJsLoggerCallbackSet) {
63
- try {
64
- callback.Call({Napi::Number::New(env, data->progress)});
65
- } catch (const Napi::Error& e) {}
66
- }
67
-
68
- if (data != nullptr) {
69
- delete data;
70
- }
71
- }
72
-
73
- static uint64_t calculateBatchMemorySize(int32_t n_tokens_alloc, int32_t embd, int32_t n_seq_max) {
74
- uint64_t totalSize = 0;
75
-
76
- if (embd) {
77
- totalSize += sizeof(float) * n_tokens_alloc * embd;
78
- } else {
79
- totalSize += sizeof(llama_token) * n_tokens_alloc;
80
- }
81
-
82
- totalSize += sizeof(llama_pos) * n_tokens_alloc;
83
- totalSize += sizeof(int32_t) * n_tokens_alloc;
84
- totalSize += sizeof(llama_seq_id *) * (n_tokens_alloc + 1);
85
-
86
- totalSize += sizeof(llama_seq_id) * n_seq_max * n_tokens_alloc;
87
-
88
- totalSize += sizeof(int8_t) * n_tokens_alloc;
89
-
90
- return totalSize;
91
- }
92
-
93
- static void adjustNapiExternalMemoryAdd(Napi::Env env, uint64_t size) {
94
- const uint64_t chunkSize = std::numeric_limits<int64_t>::max();
95
- while (size > 0) {
96
- int64_t adjustSize = std::min(size, chunkSize);
97
- Napi::MemoryManagement::AdjustExternalMemory(env, adjustSize);
98
- size -= adjustSize;
99
- }
100
- }
101
-
102
- static void adjustNapiExternalMemorySubtract(Napi::Env env, uint64_t size) {
103
- const uint64_t chunkSize = std::numeric_limits<int64_t>::max();
104
- while (size > 0) {
105
- int64_t adjustSize = std::min(size, chunkSize);
106
- Napi::MemoryManagement::AdjustExternalMemory(env, -adjustSize);
107
- size -= adjustSize;
108
- }
109
- }
110
-
111
- std::string addon_model_token_to_piece(const struct llama_model* model, llama_token token, bool specialTokens) {
112
- std::vector<char> result(8, 0);
113
- const int n_tokens = llama_token_to_piece(model, token, result.data(), result.size(), specialTokens);
114
- if (n_tokens < 0) {
115
- result.resize(-n_tokens);
116
- int check = llama_token_to_piece(model, token, result.data(), result.size(), specialTokens);
117
- GGML_ASSERT(check == -n_tokens);
118
- } else {
119
- result.resize(n_tokens);
120
- }
121
-
122
- return std::string(result.data(), result.size());
123
- }
124
-
125
- #ifdef GPU_INFO_USE_CUDA
126
- void logCudaError(const char* message) {
127
- addonLlamaCppLogCallback(GGML_LOG_LEVEL_ERROR, (std::string("CUDA error: ") + std::string(message)).c_str(), nullptr);
128
- }
129
- #endif
130
- #ifdef GPU_INFO_USE_VULKAN
131
- void logVulkanWarning(const char* message) {
132
- addonLlamaCppLogCallback(GGML_LOG_LEVEL_WARN, (std::string("Vulkan warning: ") + std::string(message)).c_str(), nullptr);
133
- }
134
- #endif
135
-
136
- Napi::Value getGpuVramInfo(const Napi::CallbackInfo& info) {
137
- uint64_t total = 0;
138
- uint64_t used = 0;
139
-
140
- #ifdef GPU_INFO_USE_CUDA
141
- size_t cudaDeviceTotal = 0;
142
- size_t cudaDeviceUsed = 0;
143
- bool cudeGetInfoSuccess = gpuInfoGetTotalCudaDevicesInfo(&cudaDeviceTotal, &cudaDeviceUsed, logCudaError);
144
-
145
- if (cudeGetInfoSuccess) {
146
- total += cudaDeviceTotal;
147
- used += cudaDeviceUsed;
148
- }
149
- #endif
150
-
151
- #ifdef GPU_INFO_USE_VULKAN
152
- uint64_t vulkanDeviceTotal = 0;
153
- uint64_t vulkanDeviceUsed = 0;
154
- const bool vulkanDeviceSupportsMemoryBudgetExtension = gpuInfoGetTotalVulkanDevicesInfo(&vulkanDeviceTotal, &vulkanDeviceUsed, logVulkanWarning);
155
-
156
- if (vulkanDeviceSupportsMemoryBudgetExtension) {
157
- total += vulkanDeviceTotal;
158
- used += vulkanDeviceUsed;
159
- }
160
- #endif
161
-
162
- #ifdef GPU_INFO_USE_METAL
163
- uint64_t metalDeviceTotal = 0;
164
- uint64_t metalDeviceUsed = 0;
165
- getMetalGpuInfo(&metalDeviceTotal, &metalDeviceUsed);
166
-
167
- total += metalDeviceTotal;
168
- used += metalDeviceUsed;
169
- #endif
170
-
171
- Napi::Object result = Napi::Object::New(info.Env());
172
- result.Set("total", Napi::Number::From(info.Env(), total));
173
- result.Set("used", Napi::Number::From(info.Env(), used));
174
-
175
- return result;
176
- }
177
-
178
- Napi::Value getGpuDeviceInfo(const Napi::CallbackInfo& info) {
179
- std::vector<std::string> deviceNames;
180
-
181
- #ifdef GPU_INFO_USE_CUDA
182
- gpuInfoGetCudaDeviceNames(&deviceNames, logCudaError);
183
- #endif
184
-
185
- #ifdef GPU_INFO_USE_VULKAN
186
- gpuInfoGetVulkanDeviceNames(&deviceNames, logVulkanWarning);
187
- #endif
188
-
189
- #ifdef GPU_INFO_USE_METAL
190
- getMetalGpuDeviceNames(&deviceNames);
191
- #endif
192
-
193
- Napi::Object result = Napi::Object::New(info.Env());
194
-
195
- Napi::Array deviceNamesNapiArray = Napi::Array::New(info.Env(), deviceNames.size());
196
- for (size_t i = 0; i < deviceNames.size(); ++i) {
197
- deviceNamesNapiArray[i] = Napi::String::New(info.Env(), deviceNames[i]);
198
- }
199
- result.Set("deviceNames", deviceNamesNapiArray);
200
-
201
- return result;
202
- }
203
-
204
- Napi::Value getGpuType(const Napi::CallbackInfo& info) {
205
- #ifdef GPU_INFO_USE_CUDA
206
- return Napi::String::New(info.Env(), "cuda");
207
- #endif
208
-
209
- #ifdef GPU_INFO_USE_VULKAN
210
- return Napi::String::New(info.Env(), "vulkan");
211
- #endif
212
-
213
- #ifdef GPU_INFO_USE_METAL
214
- return Napi::String::New(info.Env(), "metal");
215
- #endif
216
-
217
- return info.Env().Undefined();
218
- }
219
-
220
- static Napi::Value getNapiToken(const Napi::CallbackInfo& info, llama_model* model, llama_token token) {
221
- if (token < 0) {
222
- return Napi::Number::From(info.Env(), -1);
223
- }
224
-
225
- auto tokenAttributes = llama_token_get_attr(model, token);
226
-
227
- if (tokenAttributes & LLAMA_TOKEN_ATTR_UNDEFINED || tokenAttributes & LLAMA_TOKEN_ATTR_UNKNOWN) {
228
- return Napi::Number::From(info.Env(), -1);
229
- }
230
-
231
- return Napi::Number::From(info.Env(), token);
232
- }
233
-
234
- static Napi::Value getNapiControlToken(const Napi::CallbackInfo& info, llama_model* model, llama_token token) {
235
- if (token < 0) {
236
- return Napi::Number::From(info.Env(), -1);
237
- }
238
-
239
- auto tokenAttributes = llama_token_get_attr(model, token);
240
-
241
- if (!(tokenAttributes & LLAMA_TOKEN_ATTR_CONTROL) && !(tokenAttributes & LLAMA_TOKEN_ATTR_UNDEFINED)) {
242
- return Napi::Number::From(info.Env(), -1);
243
- }
244
-
245
- return Napi::Number::From(info.Env(), token);
246
- }
247
-
248
- static bool llamaModelParamsProgressCallback(float progress, void * user_data);
249
-
250
- class AddonModel : public Napi::ObjectWrap<AddonModel> {
251
- public:
252
- llama_model_params model_params;
253
- llama_model* model;
254
- uint64_t loadedModelSize = 0;
255
- Napi::Reference<Napi::Object> addonExportsRef;
256
- bool hasAddonExportsRef = false;
257
-
258
- std::string modelPath;
259
- bool modelLoaded = false;
260
- bool abortModelLoad = false;
261
- bool model_load_stopped = false;
262
- float rawModelLoadPercentage = 0;
263
- unsigned modelLoadPercentage = 0;
264
- AddonThreadSafeProgressEventCallbackFunction addonThreadSafeOnLoadProgressEventCallback;
265
- bool onLoadProgressEventCallbackSet = false;
266
- bool hasLoadAbortSignal = false;
267
-
268
- bool disposed = false;
269
-
270
- AddonModel(const Napi::CallbackInfo& info) : Napi::ObjectWrap<AddonModel>(info) {
271
- model_params = llama_model_default_params();
272
-
273
- // Get the model path
274
- modelPath = info[0].As<Napi::String>().Utf8Value();
275
-
276
- if (info.Length() > 1 && info[1].IsObject()) {
277
- Napi::Object options = info[1].As<Napi::Object>();
278
-
279
- if (options.Has("addonExports")) {
280
- addonExportsRef = Napi::Persistent(options.Get("addonExports").As<Napi::Object>());
281
- hasAddonExportsRef = true;
282
- }
283
-
284
- if (options.Has("gpuLayers")) {
285
- model_params.n_gpu_layers = options.Get("gpuLayers").As<Napi::Number>().Int32Value();
286
- }
287
-
288
- if (options.Has("vocabOnly")) {
289
- model_params.vocab_only = options.Get("vocabOnly").As<Napi::Boolean>().Value();
290
- }
291
-
292
- if (options.Has("useMmap")) {
293
- model_params.use_mmap = options.Get("useMmap").As<Napi::Boolean>().Value();
294
- }
295
-
296
- if (options.Has("useMlock")) {
297
- model_params.use_mlock = options.Get("useMlock").As<Napi::Boolean>().Value();
298
- }
299
-
300
- if (options.Has("checkTensors")) {
301
- model_params.check_tensors = options.Get("checkTensors").As<Napi::Boolean>().Value();
302
- }
303
-
304
- if (options.Has("onLoadProgress")) {
305
- auto onLoadProgressJSCallback = options.Get("onLoadProgress").As<Napi::Function>();
306
- if (onLoadProgressJSCallback.IsFunction()) {
307
- AddonThreadSafeProgressCallbackFunctionContext* context = new Napi::Reference<Napi::Value>(Napi::Persistent(info.This()));
308
- addonThreadSafeOnLoadProgressEventCallback = AddonThreadSafeProgressEventCallbackFunction::New(
309
- info.Env(),
310
- onLoadProgressJSCallback,
311
- "onLoadProgressCallback",
312
- 0,
313
- 1,
314
- context,
315
- [](Napi::Env, AddonModel* addonModel, AddonThreadSafeProgressCallbackFunctionContext* ctx) {
316
- addonModel->onLoadProgressEventCallbackSet = false;
317
-
318
- delete ctx;
319
- },
320
- this
321
- );
322
- onLoadProgressEventCallbackSet = true;
323
- }
324
- }
325
-
326
- if (options.Has("hasLoadAbortSignal")) {
327
- hasLoadAbortSignal = options.Get("hasLoadAbortSignal").As<Napi::Boolean>().Value();
328
- }
329
-
330
- if (onLoadProgressEventCallbackSet || hasLoadAbortSignal) {
331
- model_params.progress_callback_user_data = &(*this);
332
- model_params.progress_callback = llamaModelParamsProgressCallback;
333
- }
334
- }
335
- }
336
-
337
- ~AddonModel() {
338
- dispose();
339
- }
340
-
341
- void dispose() {
342
- if (disposed) {
343
- return;
344
- }
345
-
346
- disposed = true;
347
- if (modelLoaded) {
348
- modelLoaded = false;
349
- llama_free_model(model);
350
-
351
- adjustNapiExternalMemorySubtract(Env(), loadedModelSize);
352
- loadedModelSize = 0;
353
- }
354
-
355
- if (hasAddonExportsRef) {
356
- addonExportsRef.Unref();
357
- hasAddonExportsRef = false;
358
- }
359
- }
360
-
361
- Napi::Value Init(const Napi::CallbackInfo& info);
362
- Napi::Value LoadLora(const Napi::CallbackInfo& info);
363
- Napi::Value AbortActiveModelLoad(const Napi::CallbackInfo& info) {
364
- abortModelLoad = true;
365
- return info.Env().Undefined();
366
- }
367
- Napi::Value Dispose(const Napi::CallbackInfo& info);
368
-
369
- Napi::Value Tokenize(const Napi::CallbackInfo& info) {
370
- if (disposed) {
371
- Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
372
- return info.Env().Undefined();
373
- }
374
-
375
- std::string text = info[0].As<Napi::String>().Utf8Value();
376
- bool specialTokens = info[1].As<Napi::Boolean>().Value();
377
-
378
- std::vector<llama_token> tokens = llama_tokenize(model, text, false, specialTokens);
379
-
380
- Napi::Uint32Array result = Napi::Uint32Array::New(info.Env(), tokens.size());
381
- for (size_t i = 0; i < tokens.size(); ++i) {
382
- result[i] = static_cast<uint32_t>(tokens[i]);
383
- }
384
-
385
- return result;
386
- }
387
- Napi::Value Detokenize(const Napi::CallbackInfo& info) {
388
- if (disposed) {
389
- Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
390
- return info.Env().Undefined();
391
- }
392
-
393
- Napi::Uint32Array tokens = info[0].As<Napi::Uint32Array>();
394
- bool decodeSpecialTokens = info.Length() > 0
395
- ? info[1].As<Napi::Boolean>().Value()
396
- : false;
397
-
398
- // Create a stringstream for accumulating the decoded string.
399
- std::stringstream ss;
400
-
401
- // Decode each token and accumulate the result.
402
- for (size_t i = 0; i < tokens.ElementLength(); i++) {
403
- const std::string piece = addon_model_token_to_piece(model, (llama_token)tokens[i], decodeSpecialTokens);
404
-
405
- if (piece.empty()) {
406
- continue;
407
- }
408
-
409
- ss << piece;
410
- }
411
-
412
- return Napi::String::New(info.Env(), ss.str());
413
- }
414
-
415
- Napi::Value GetTrainContextSize(const Napi::CallbackInfo& info) {
416
- if (disposed) {
417
- Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
418
- return info.Env().Undefined();
419
- }
420
-
421
- return Napi::Number::From(info.Env(), llama_n_ctx_train(model));
422
- }
423
-
424
- Napi::Value GetEmbeddingVectorSize(const Napi::CallbackInfo& info) {
425
- if (disposed) {
426
- Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
427
- return info.Env().Undefined();
428
- }
429
-
430
- return Napi::Number::From(info.Env(), llama_n_embd(model));
431
- }
432
-
433
- Napi::Value GetTotalSize(const Napi::CallbackInfo& info) {
434
- if (disposed) {
435
- Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
436
- return info.Env().Undefined();
437
- }
438
-
439
- return Napi::Number::From(info.Env(), llama_model_size(model));
440
- }
441
-
442
- Napi::Value GetTotalParameters(const Napi::CallbackInfo& info) {
443
- if (disposed) {
444
- Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
445
- return info.Env().Undefined();
446
- }
447
-
448
- return Napi::Number::From(info.Env(), llama_model_n_params(model));
449
- }
450
-
451
- Napi::Value GetModelDescription(const Napi::CallbackInfo& info) {
452
- if (disposed) {
453
- Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
454
- return info.Env().Undefined();
455
- }
456
-
457
- char model_desc[128];
458
- int actual_length = llama_model_desc(model, model_desc, sizeof(model_desc));
459
-
460
- return Napi::String::New(info.Env(), model_desc, actual_length);
461
- }
462
-
463
- Napi::Value TokenBos(const Napi::CallbackInfo& info) {
464
- if (disposed) {
465
- Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
466
- return info.Env().Undefined();
467
- }
468
-
469
- return getNapiControlToken(info, model, llama_token_bos(model));
470
- }
471
- Napi::Value TokenEos(const Napi::CallbackInfo& info) {
472
- if (disposed) {
473
- Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
474
- return info.Env().Undefined();
475
- }
476
-
477
- return getNapiControlToken(info, model, llama_token_eos(model));
478
- }
479
- Napi::Value TokenNl(const Napi::CallbackInfo& info) {
480
- if (disposed) {
481
- Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
482
- return info.Env().Undefined();
483
- }
484
-
485
- return getNapiToken(info, model, llama_token_nl(model));
486
- }
487
- Napi::Value PrefixToken(const Napi::CallbackInfo& info) {
488
- if (disposed) {
489
- Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
490
- return info.Env().Undefined();
491
- }
492
-
493
- return getNapiControlToken(info, model, llama_token_prefix(model));
494
- }
495
- Napi::Value MiddleToken(const Napi::CallbackInfo& info) {
496
- if (disposed) {
497
- Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
498
- return info.Env().Undefined();
499
- }
500
-
501
- return getNapiControlToken(info, model, llama_token_middle(model));
502
- }
503
- Napi::Value SuffixToken(const Napi::CallbackInfo& info) {
504
- if (disposed) {
505
- Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
506
- return info.Env().Undefined();
507
- }
508
-
509
- return getNapiControlToken(info, model, llama_token_suffix(model));
510
- }
511
- Napi::Value EotToken(const Napi::CallbackInfo& info) {
512
- if (disposed) {
513
- Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
514
- return info.Env().Undefined();
515
- }
516
-
517
- return getNapiControlToken(info, model, llama_token_eot(model));
518
- }
519
- Napi::Value GetTokenString(const Napi::CallbackInfo& info) {
520
- if (disposed) {
521
- Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
522
- return info.Env().Undefined();
523
- }
524
-
525
- int token = info[0].As<Napi::Number>().Int32Value();
526
- std::stringstream ss;
527
-
528
- const char* str = llama_token_get_text(model, token);
529
- if (str == nullptr) {
530
- return info.Env().Undefined();
531
- }
532
-
533
- ss << str;
534
-
535
- return Napi::String::New(info.Env(), ss.str());
536
- }
537
-
538
- Napi::Value GetTokenAttributes(const Napi::CallbackInfo& info) {
539
- if (disposed) {
540
- Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
541
- return info.Env().Undefined();
542
- }
543
-
544
- if (info[0].IsNumber() == false) {
545
- return Napi::Number::From(info.Env(), int32_t(LLAMA_TOKEN_ATTR_UNDEFINED));
546
- }
547
-
548
- int token = info[0].As<Napi::Number>().Int32Value();
549
- auto tokenAttributes = llama_token_get_attr(model, token);
550
-
551
- return Napi::Number::From(info.Env(), int32_t(tokenAttributes));
552
- }
553
- Napi::Value IsEogToken(const Napi::CallbackInfo& info) {
554
- if (disposed) {
555
- Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
556
- return info.Env().Undefined();
557
- }
558
-
559
- if (info[0].IsNumber() == false) {
560
- return Napi::Boolean::New(info.Env(), false);
561
- }
562
-
563
- int token = info[0].As<Napi::Number>().Int32Value();
564
-
565
- return Napi::Boolean::New(info.Env(), llama_token_is_eog(model, token));
566
- }
567
- Napi::Value GetVocabularyType(const Napi::CallbackInfo& info) {
568
- if (disposed) {
569
- Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
570
- return info.Env().Undefined();
571
- }
572
-
573
- auto vocabularyType = llama_vocab_type(model);
574
-
575
- return Napi::Number::From(info.Env(), int32_t(vocabularyType));
576
- }
577
- Napi::Value ShouldPrependBosToken(const Napi::CallbackInfo& info) {
578
- const int addBos = llama_add_bos_token(model);
579
-
580
- bool shouldPrependBos = addBos != -1 ? bool(addBos) : (llama_vocab_type(model) == LLAMA_VOCAB_TYPE_SPM);
581
-
582
- return Napi::Boolean::New(info.Env(), shouldPrependBos);
583
- }
584
-
585
- Napi::Value GetModelSize(const Napi::CallbackInfo& info) {
586
- return Napi::Number::From(info.Env(), llama_model_size(model));
587
- }
588
-
589
- static void init(Napi::Object exports) {
590
- exports.Set(
591
- "AddonModel",
592
- DefineClass(
593
- exports.Env(),
594
- "AddonModel",
595
- {
596
- InstanceMethod("init", &AddonModel::Init),
597
- InstanceMethod("loadLora", &AddonModel::LoadLora),
598
- InstanceMethod("abortActiveModelLoad", &AddonModel::AbortActiveModelLoad),
599
- InstanceMethod("tokenize", &AddonModel::Tokenize),
600
- InstanceMethod("detokenize", &AddonModel::Detokenize),
601
- InstanceMethod("getTrainContextSize", &AddonModel::GetTrainContextSize),
602
- InstanceMethod("getEmbeddingVectorSize", &AddonModel::GetEmbeddingVectorSize),
603
- InstanceMethod("getTotalSize", &AddonModel::GetTotalSize),
604
- InstanceMethod("getTotalParameters", &AddonModel::GetTotalParameters),
605
- InstanceMethod("getModelDescription", &AddonModel::GetModelDescription),
606
- InstanceMethod("tokenBos", &AddonModel::TokenBos),
607
- InstanceMethod("tokenEos", &AddonModel::TokenEos),
608
- InstanceMethod("tokenNl", &AddonModel::TokenNl),
609
- InstanceMethod("prefixToken", &AddonModel::PrefixToken),
610
- InstanceMethod("middleToken", &AddonModel::MiddleToken),
611
- InstanceMethod("suffixToken", &AddonModel::SuffixToken),
612
- InstanceMethod("eotToken", &AddonModel::EotToken),
613
- InstanceMethod("getTokenString", &AddonModel::GetTokenString),
614
- InstanceMethod("getTokenAttributes", &AddonModel::GetTokenAttributes),
615
- InstanceMethod("isEogToken", &AddonModel::IsEogToken),
616
- InstanceMethod("getVocabularyType", &AddonModel::GetVocabularyType),
617
- InstanceMethod("shouldPrependBosToken", &AddonModel::ShouldPrependBosToken),
618
- InstanceMethod("getModelSize", &AddonModel::GetModelSize),
619
- InstanceMethod("dispose", &AddonModel::Dispose),
620
- }
621
- )
622
- );
623
- }
624
- };
625
-
626
- static bool llamaModelParamsProgressCallback(float progress, void * user_data) {
627
- AddonModel* addonModel = (AddonModel *) user_data;
628
- unsigned percentage = (unsigned) (100 * progress);
629
-
630
- if (percentage > addonModel->modelLoadPercentage) {
631
- addonModel->modelLoadPercentage = percentage;
632
-
633
- // original llama.cpp logs
634
- addonLlamaCppLogCallback(GGML_LOG_LEVEL_INFO, ".", nullptr);
635
- if (percentage >= 100) {
636
- addonLlamaCppLogCallback(GGML_LOG_LEVEL_INFO, "\n", nullptr);
637
- }
638
- }
639
-
640
- if (progress > addonModel->rawModelLoadPercentage) {
641
- addonModel->rawModelLoadPercentage = progress;
642
-
643
- if (addonModel->onLoadProgressEventCallbackSet) {
644
- addon_progress_event* data = new addon_progress_event {
645
- progress
646
- };
647
-
648
- auto status = addonModel->addonThreadSafeOnLoadProgressEventCallback.NonBlockingCall(data);
649
-
650
- if (status != napi_ok) {
651
- delete data;
652
- }
653
- }
654
- }
655
-
656
- return !(addonModel->abortModelLoad);
657
- }
658
-
659
- class AddonModelLoadModelWorker : public Napi::AsyncWorker {
660
- public:
661
- AddonModel* model;
662
-
663
- AddonModelLoadModelWorker(const Napi::Env& env, AddonModel* model)
664
- : Napi::AsyncWorker(env, "AddonModelLoadModelWorker"),
665
- model(model),
666
- deferred(Napi::Promise::Deferred::New(env)) {
667
- model->Ref();
668
- }
669
- ~AddonModelLoadModelWorker() {
670
- model->Unref();
671
- }
672
-
673
- Napi::Promise GetPromise() {
674
- return deferred.Promise();
675
- }
676
-
677
- protected:
678
- Napi::Promise::Deferred deferred;
679
-
680
- void Execute() {
681
- try {
682
- model->model = llama_load_model_from_file(model->modelPath.c_str(), model->model_params);
683
-
684
- model->modelLoaded = model->model != nullptr && model->model != NULL;
685
- } catch (const std::exception& e) {
686
- SetError(e.what());
687
- } catch(...) {
688
- SetError("Unknown error when calling \"llama_load_model_from_file\"");
689
- }
690
- }
691
- void OnOK() {
692
- if (model->modelLoaded) {
693
- uint64_t modelSize = llama_model_size(model->model);
694
- adjustNapiExternalMemoryAdd(Env(), modelSize);
695
- model->loadedModelSize = modelSize;
696
- }
697
-
698
- deferred.Resolve(Napi::Boolean::New(Env(), model->modelLoaded));
699
- if (model->onLoadProgressEventCallbackSet) {
700
- model->addonThreadSafeOnLoadProgressEventCallback.Release();
701
- }
702
- }
703
- void OnError(const Napi::Error& err) {
704
- deferred.Reject(err.Value());
705
- }
706
- };
707
- class AddonModelUnloadModelWorker : public Napi::AsyncWorker {
708
- public:
709
- AddonModel* model;
710
-
711
- AddonModelUnloadModelWorker(const Napi::Env& env, AddonModel* model)
712
- : Napi::AsyncWorker(env, "AddonModelUnloadModelWorker"),
713
- model(model),
714
- deferred(Napi::Promise::Deferred::New(env)) {
715
- model->Ref();
716
- }
717
- ~AddonModelUnloadModelWorker() {
718
- model->Unref();
719
- }
720
-
721
- Napi::Promise GetPromise() {
722
- return deferred.Promise();
723
- }
724
-
725
- protected:
726
- Napi::Promise::Deferred deferred;
727
-
728
- void Execute() {
729
- try {
730
- llama_free_model(model->model);
731
- model->modelLoaded = false;
732
-
733
- model->dispose();
734
- } catch (const std::exception& e) {
735
- SetError(e.what());
736
- } catch(...) {
737
- SetError("Unknown error when calling \"llama_free_model\"");
738
- }
739
- }
740
- void OnOK() {
741
- adjustNapiExternalMemorySubtract(Env(), model->loadedModelSize);
742
- model->loadedModelSize = 0;
743
-
744
- deferred.Resolve(Env().Undefined());
745
- }
746
- void OnError(const Napi::Error& err) {
747
- deferred.Reject(err.Value());
748
- }
749
- };
750
- class AddonModelLoadLoraWorker : public Napi::AsyncWorker {
751
- public:
752
- AddonModel* model;
753
- std::string loraFilePath;
754
- float loraScale;
755
- int32_t loraThreads;
756
- std::string baseModelPath;
757
-
758
- AddonModelLoadLoraWorker(
759
- const Napi::Env& env,
760
- AddonModel* model,
761
- std::string loraFilePath,
762
- float loraScale,
763
- int32_t loraThreads,
764
- std::string baseModelPath
765
- )
766
- : Napi::AsyncWorker(env, "AddonModelLoadLoraWorker"),
767
- model(model),
768
- loraFilePath(loraFilePath),
769
- loraScale(loraScale),
770
- loraThreads(loraThreads),
771
- baseModelPath(baseModelPath),
772
- deferred(Napi::Promise::Deferred::New(env)) {
773
- model->Ref();
774
- }
775
- ~AddonModelLoadLoraWorker() {
776
- model->Unref();
777
- }
778
-
779
- Napi::Promise GetPromise() {
780
- return deferred.Promise();
781
- }
782
-
783
- protected:
784
- Napi::Promise::Deferred deferred;
785
-
786
- void Execute() {
787
- try {
788
- const auto res = llama_model_apply_lora_from_file(
789
- model->model,
790
- loraFilePath.c_str(),
791
- loraScale,
792
- baseModelPath.empty() ? NULL : baseModelPath.c_str(),
793
- loraThreads
794
- );
795
-
796
- if (res != 0) {
797
- SetError(
798
- std::string(
799
- std::string("Failed to apply LoRA \"") + loraFilePath + std::string("\"") + (
800
- baseModelPath.empty()
801
- ? std::string("")
802
- : (std::string(" with base model \"") + baseModelPath + std::string("\""))
803
- )
804
- )
805
- );
806
- }
807
- } catch (const std::exception& e) {
808
- SetError(e.what());
809
- } catch(...) {
810
- SetError("Unknown error when calling \"llama_model_apply_lora_from_file\"");
811
- }
812
- }
813
- void OnOK() {
814
- deferred.Resolve(Env().Undefined());
815
- }
816
- void OnError(const Napi::Error& err) {
817
- deferred.Reject(err.Value());
818
- }
819
- };
820
-
821
- Napi::Value AddonModel::Init(const Napi::CallbackInfo& info) {
822
- if (disposed) {
823
- Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
824
- return info.Env().Undefined();
825
- }
826
-
827
- AddonModelLoadModelWorker* worker = new AddonModelLoadModelWorker(this->Env(), this);
828
- worker->Queue();
829
- return worker->GetPromise();
830
- }
831
- Napi::Value AddonModel::LoadLora(const Napi::CallbackInfo& info) {
832
- std::string loraFilePath = info[0].As<Napi::String>().Utf8Value();
833
- float scale = info[1].As<Napi::Number>().FloatValue();
834
- int32_t threads = info[2].As<Napi::Number>().Int32Value();
835
- std::string baseModelPath = (info.Length() > 3 && info[3].IsString()) ? info[3].As<Napi::String>().Utf8Value() : std::string("");
836
-
837
- int32_t resolvedThreads = threads == 0 ? std::thread::hardware_concurrency() : threads;
838
-
839
- AddonModelLoadLoraWorker* worker = new AddonModelLoadLoraWorker(this->Env(), this, loraFilePath, scale, threads, baseModelPath);
840
- worker->Queue();
841
- return worker->GetPromise();
842
- }
843
- Napi::Value AddonModel::Dispose(const Napi::CallbackInfo& info) {
844
- if (disposed) {
845
- return info.Env().Undefined();
846
- }
847
-
848
- if (modelLoaded) {
849
- modelLoaded = false;
850
-
851
- AddonModelUnloadModelWorker* worker = new AddonModelUnloadModelWorker(this->Env(), this);
852
- worker->Queue();
853
- return worker->GetPromise();
854
- } else {
855
- dispose();
856
-
857
- Napi::Promise::Deferred deferred = Napi::Promise::Deferred::New(info.Env());
858
- deferred.Resolve(info.Env().Undefined());
859
- return deferred.Promise();
860
- }
861
- }
862
-
863
- class AddonGrammar : public Napi::ObjectWrap<AddonGrammar> {
864
- public:
865
- grammar_parser::parse_state parsed_grammar;
866
- Napi::Reference<Napi::Object> addonExportsRef;
867
- bool hasAddonExportsRef = false;
868
-
869
- AddonGrammar(const Napi::CallbackInfo& info) : Napi::ObjectWrap<AddonGrammar>(info) {
870
- // Get the model path
871
- std::string grammarCode = info[0].As<Napi::String>().Utf8Value();
872
- bool should_print_grammar = false;
873
-
874
- if (info.Length() > 1 && info[1].IsObject()) {
875
- Napi::Object options = info[1].As<Napi::Object>();
876
-
877
- if (options.Has("addonExports")) {
878
- addonExportsRef = Napi::Persistent(options.Get("addonExports").As<Napi::Object>());
879
- hasAddonExportsRef = true;
880
- }
881
-
882
- if (options.Has("printGrammar")) {
883
- should_print_grammar = options.Get("printGrammar").As<Napi::Boolean>().Value();
884
- }
885
- }
886
-
887
- parsed_grammar = grammar_parser::parse(grammarCode.c_str());
888
- // will be empty (default) if there are parse errors
889
- if (parsed_grammar.rules.empty()) {
890
- Napi::Error::New(info.Env(), "Failed to parse grammar").ThrowAsJavaScriptException();
891
- return;
892
- }
893
-
894
- if (should_print_grammar) {
895
- grammar_parser::print_grammar(stderr, parsed_grammar);
896
- }
897
- }
898
-
899
- ~AddonGrammar() {
900
- if (hasAddonExportsRef) {
901
- addonExportsRef.Unref();
902
- hasAddonExportsRef = false;
903
- }
904
- }
905
-
906
- static void init(Napi::Object exports) {
907
- exports.Set("AddonGrammar", DefineClass(exports.Env(), "AddonGrammar", {}));
908
- }
909
- };
910
-
911
- class AddonGrammarEvaluationState : public Napi::ObjectWrap<AddonGrammarEvaluationState> {
912
- public:
913
- AddonGrammar* grammarDef;
914
- llama_grammar* grammar = nullptr;
915
-
916
- AddonGrammarEvaluationState(const Napi::CallbackInfo& info) : Napi::ObjectWrap<AddonGrammarEvaluationState>(info) {
917
- grammarDef = Napi::ObjectWrap<AddonGrammar>::Unwrap(info[0].As<Napi::Object>());
918
- grammarDef->Ref();
919
-
920
- std::vector<const llama_grammar_element*> grammar_rules(grammarDef->parsed_grammar.c_rules());
921
- grammar = llama_grammar_init(grammar_rules.data(), grammar_rules.size(), grammarDef->parsed_grammar.symbol_ids.at("root"));
922
- }
923
-
924
- ~AddonGrammarEvaluationState() {
925
- grammarDef->Unref();
926
-
927
- if (grammar != nullptr) {
928
- llama_grammar_free(grammar);
929
- grammar = nullptr;
930
- }
931
- }
932
-
933
- static void init(Napi::Object exports) {
934
- exports.Set("AddonGrammarEvaluationState", DefineClass(exports.Env(), "AddonGrammarEvaluationState", {}));
935
- }
936
- };
937
-
938
- class AddonContext : public Napi::ObjectWrap<AddonContext> {
939
- public:
940
- AddonModel* model;
941
- llama_context_params context_params;
942
- llama_context* ctx;
943
- llama_batch batch;
944
- uint64_t batchMemorySize = 0;
945
- bool has_batch = false;
946
- int32_t batch_n_tokens = 0;
947
- int n_cur = 0;
948
-
949
- uint64_t loadedContextMemorySize = 0;
950
- bool contextLoaded = false;
951
-
952
- bool disposed = false;
953
-
954
- AddonContext(const Napi::CallbackInfo& info) : Napi::ObjectWrap<AddonContext>(info) {
955
- model = Napi::ObjectWrap<AddonModel>::Unwrap(info[0].As<Napi::Object>());
956
- model->Ref();
957
-
958
- context_params = llama_context_default_params();
959
- context_params.seed = -1;
960
- context_params.n_ctx = 4096;
961
- context_params.n_threads = 6;
962
- context_params.n_threads_batch = context_params.n_threads;
963
-
964
- if (info.Length() > 1 && info[1].IsObject()) {
965
- Napi::Object options = info[1].As<Napi::Object>();
966
-
967
- if (options.Has("noSeed")) {
968
- context_params.seed = time(NULL);
969
- } else if (options.Has("seed")) {
970
- context_params.seed = options.Get("seed").As<Napi::Number>().Uint32Value();
971
- }
972
-
973
- if (options.Has("contextSize")) {
974
- context_params.n_ctx = options.Get("contextSize").As<Napi::Number>().Uint32Value();
975
- }
976
-
977
- if (options.Has("batchSize")) {
978
- context_params.n_batch = options.Get("batchSize").As<Napi::Number>().Uint32Value();
979
- context_params.n_ubatch = context_params.n_batch; // the batch queue is managed in the JS side, so there's no need for managing it on the C++ side
980
- }
981
-
982
- if (options.Has("sequences")) {
983
- context_params.n_seq_max = options.Get("sequences").As<Napi::Number>().Uint32Value();
984
- }
985
-
986
- if (options.Has("embeddings")) {
987
- context_params.embeddings = options.Get("embeddings").As<Napi::Boolean>().Value();
988
- }
989
-
990
- if (options.Has("flashAttention")) {
991
- context_params.flash_attn = options.Get("flashAttention").As<Napi::Boolean>().Value();
992
- }
993
-
994
- if (options.Has("threads")) {
995
- const auto n_threads = options.Get("threads").As<Napi::Number>().Uint32Value();
996
- const auto resolved_n_threads = n_threads == 0 ? std::thread::hardware_concurrency() : n_threads;
997
-
998
- context_params.n_threads = resolved_n_threads;
999
- context_params.n_threads_batch = resolved_n_threads;
1000
- }
1001
- }
1002
- }
1003
- ~AddonContext() {
1004
- dispose();
1005
- }
1006
-
1007
- void dispose() {
1008
- if (disposed) {
1009
- return;
1010
- }
1011
-
1012
- disposed = true;
1013
- if (contextLoaded) {
1014
- contextLoaded = false;
1015
- llama_free(ctx);
1016
-
1017
- adjustNapiExternalMemorySubtract(Env(), loadedContextMemorySize);
1018
- loadedContextMemorySize = 0;
1019
- }
1020
-
1021
- model->Unref();
1022
-
1023
- disposeBatch();
1024
- }
1025
- void disposeBatch() {
1026
- if (!has_batch) {
1027
- return;
1028
- }
1029
-
1030
- llama_batch_free(batch);
1031
- has_batch = false;
1032
- batch_n_tokens = 0;
1033
-
1034
- adjustNapiExternalMemorySubtract(Env(), batchMemorySize);
1035
- batchMemorySize = 0;
1036
- }
1037
-
1038
- Napi::Value Init(const Napi::CallbackInfo& info);
1039
- Napi::Value Dispose(const Napi::CallbackInfo& info);
1040
-
1041
- Napi::Value GetContextSize(const Napi::CallbackInfo& info) {
1042
- if (disposed) {
1043
- Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
1044
- return info.Env().Undefined();
1045
- }
1046
-
1047
- return Napi::Number::From(info.Env(), llama_n_ctx(ctx));
1048
- }
1049
- Napi::Value InitBatch(const Napi::CallbackInfo& info) {
1050
- if (disposed) {
1051
- Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
1052
- return info.Env().Undefined();
1053
- }
1054
-
1055
- if (has_batch) {
1056
- llama_batch_free(batch);
1057
- }
1058
-
1059
- int32_t n_tokens = info[0].As<Napi::Number>().Int32Value();
1060
-
1061
- batch = llama_batch_init(n_tokens, 0, 1);
1062
- has_batch = true;
1063
- batch_n_tokens = n_tokens;
1064
-
1065
- uint64_t newBatchMemorySize = calculateBatchMemorySize(n_tokens, llama_n_embd(model->model), context_params.n_batch);
1066
- if (newBatchMemorySize > batchMemorySize) {
1067
- adjustNapiExternalMemoryAdd(Env(), newBatchMemorySize - batchMemorySize);
1068
- batchMemorySize = newBatchMemorySize;
1069
- } else if (newBatchMemorySize < batchMemorySize) {
1070
- adjustNapiExternalMemorySubtract(Env(), batchMemorySize - newBatchMemorySize);
1071
- batchMemorySize = newBatchMemorySize;
1072
- }
1073
-
1074
- return info.Env().Undefined();
1075
- }
1076
- Napi::Value DisposeBatch(const Napi::CallbackInfo& info) {
1077
- if (disposed) {
1078
- Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
1079
- return info.Env().Undefined();
1080
- }
1081
-
1082
- disposeBatch();
1083
-
1084
- return info.Env().Undefined();
1085
- }
1086
- Napi::Value AddToBatch(const Napi::CallbackInfo& info) {
1087
- if (!has_batch) {
1088
- Napi::Error::New(info.Env(), "No batch is initialized").ThrowAsJavaScriptException();
1089
- return info.Env().Undefined();
1090
- }
1091
-
1092
- int32_t sequenceId = info[0].As<Napi::Number>().Int32Value();
1093
- int32_t firstTokenContextIndex = info[1].As<Napi::Number>().Int32Value();
1094
- Napi::Uint32Array tokens = info[2].As<Napi::Uint32Array>();
1095
- bool generateLogitAtTheEnd = info[3].As<Napi::Boolean>().Value();
1096
-
1097
- auto tokensLength = tokens.ElementLength();
1098
- GGML_ASSERT(batch.n_tokens + tokensLength <= batch_n_tokens);
1099
-
1100
- for (size_t i = 0; i < tokensLength; i++) {
1101
- llama_batch_add(batch, static_cast<llama_token>(tokens[i]), firstTokenContextIndex + i, { sequenceId }, false);
1102
- }
1103
-
1104
- if (generateLogitAtTheEnd) {
1105
- batch.logits[batch.n_tokens - 1] = true;
1106
-
1107
- auto logit_index = batch.n_tokens - 1;
1108
-
1109
- return Napi::Number::From(info.Env(), logit_index);
1110
- }
1111
-
1112
- return info.Env().Undefined();
1113
- }
1114
- Napi::Value DisposeSequence(const Napi::CallbackInfo& info) {
1115
- if (disposed) {
1116
- Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
1117
- return info.Env().Undefined();
1118
- }
1119
-
1120
- int32_t sequenceId = info[0].As<Napi::Number>().Int32Value();
1121
-
1122
- bool result = llama_kv_cache_seq_rm(ctx, sequenceId, -1, -1);
1123
-
1124
- if (!result) {
1125
- Napi::Error::New(info.Env(), "Failed to dispose sequence").ThrowAsJavaScriptException();
1126
- return info.Env().Undefined();
1127
- }
1128
-
1129
- return info.Env().Undefined();
1130
- }
1131
- Napi::Value RemoveTokenCellsFromSequence(const Napi::CallbackInfo& info) {
1132
- if (disposed) {
1133
- Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
1134
- return info.Env().Undefined();
1135
- }
1136
-
1137
- int32_t sequenceId = info[0].As<Napi::Number>().Int32Value();
1138
- int32_t startPos = info[1].As<Napi::Number>().Int32Value();
1139
- int32_t endPos = info[2].As<Napi::Number>().Int32Value();
1140
-
1141
- bool result = llama_kv_cache_seq_rm(ctx, sequenceId, startPos, endPos);
1142
-
1143
- return Napi::Boolean::New(info.Env(), result);
1144
- }
1145
- Napi::Value ShiftSequenceTokenCells(const Napi::CallbackInfo& info) {
1146
- if (disposed) {
1147
- Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
1148
- return info.Env().Undefined();
1149
- }
1150
-
1151
- int32_t sequenceId = info[0].As<Napi::Number>().Int32Value();
1152
- int32_t startPos = info[1].As<Napi::Number>().Int32Value();
1153
- int32_t endPos = info[2].As<Napi::Number>().Int32Value();
1154
- int32_t shiftDelta = info[3].As<Napi::Number>().Int32Value();
1155
-
1156
- llama_kv_cache_seq_add(ctx, sequenceId, startPos, endPos, shiftDelta);
1157
-
1158
- return info.Env().Undefined();
1159
- }
1160
- Napi::Value DecodeBatch(const Napi::CallbackInfo& info);
1161
- Napi::Value SampleToken(const Napi::CallbackInfo& info);
1162
-
1163
- Napi::Value AcceptGrammarEvaluationStateToken(const Napi::CallbackInfo& info) {
1164
- AddonGrammarEvaluationState* grammar_evaluation_state =
1165
- Napi::ObjectWrap<AddonGrammarEvaluationState>::Unwrap(info[0].As<Napi::Object>());
1166
- llama_token tokenId = info[1].As<Napi::Number>().Int32Value();
1167
-
1168
- if ((grammar_evaluation_state)->grammar != nullptr) {
1169
- llama_grammar_accept_token(ctx, (grammar_evaluation_state)->grammar, tokenId);
1170
- }
1171
-
1172
- return info.Env().Undefined();
1173
- }
1174
-
1175
- Napi::Value CanBeNextTokenForGrammarEvaluationState(const Napi::CallbackInfo& info) {
1176
- AddonGrammarEvaluationState* grammar_evaluation_state =
1177
- Napi::ObjectWrap<AddonGrammarEvaluationState>::Unwrap(info[0].As<Napi::Object>());
1178
- llama_token tokenId = info[1].As<Napi::Number>().Int32Value();
1179
-
1180
- if ((grammar_evaluation_state)->grammar != nullptr) {
1181
- std::vector<llama_token_data> candidates;
1182
- candidates.reserve(1);
1183
- candidates.emplace_back(llama_token_data { tokenId, 1, 0.0f });
1184
-
1185
- llama_token_data_array candidates_p = { candidates.data(), candidates.size(), false };
1186
-
1187
- llama_sample_grammar(ctx, &candidates_p, (grammar_evaluation_state)->grammar);
1188
-
1189
- if (candidates_p.size == 0 || candidates_p.data[0].logit == -INFINITY) {
1190
- return Napi::Boolean::New(info.Env(), false);
1191
- }
1192
-
1193
- return Napi::Boolean::New(info.Env(), true);
1194
- }
1195
-
1196
- return Napi::Boolean::New(info.Env(), false);
1197
- }
1198
-
1199
- Napi::Value GetEmbedding(const Napi::CallbackInfo& info) {
1200
- if (disposed) {
1201
- Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
1202
- return info.Env().Undefined();
1203
- }
1204
-
1205
- int32_t inputTokensLength = info[0].As<Napi::Number>().Int32Value();
1206
-
1207
- if (inputTokensLength <= 0) {
1208
- Napi::Error::New(info.Env(), "Invalid input tokens length").ThrowAsJavaScriptException();
1209
- return info.Env().Undefined();
1210
- }
1211
-
1212
- const int n_embd = llama_n_embd(model->model);
1213
- const auto* embeddings = llama_get_embeddings_seq(ctx, 0);
1214
- if (embeddings == NULL) {
1215
- embeddings = llama_get_embeddings_ith(ctx, inputTokensLength - 1);
1216
-
1217
- if (embeddings == NULL) {
1218
- Napi::Error::New(info.Env(), std::string("Failed to get embeddings for token ") + std::to_string(inputTokensLength - 1)).ThrowAsJavaScriptException();
1219
- return info.Env().Undefined();
1220
- }
1221
- }
1222
-
1223
- Napi::Float64Array result = Napi::Float64Array::New(info.Env(), n_embd);
1224
- for (size_t i = 0; i < n_embd; ++i) {
1225
- result[i] = embeddings[i];
1226
- }
1227
-
1228
- return result;
1229
- }
1230
-
1231
- Napi::Value GetStateSize(const Napi::CallbackInfo& info) {
1232
- if (disposed) {
1233
- Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
1234
- return info.Env().Undefined();
1235
- }
1236
-
1237
- return Napi::Number::From(info.Env(), llama_state_get_size(ctx));
1238
- }
1239
-
1240
- Napi::Value PrintTimings(const Napi::CallbackInfo& info) {
1241
- llama_print_timings(ctx);
1242
- llama_reset_timings(ctx);
1243
- return info.Env().Undefined();
1244
- }
1245
-
1246
- static void init(Napi::Object exports) {
1247
- exports.Set(
1248
- "AddonContext",
1249
- DefineClass(
1250
- exports.Env(),
1251
- "AddonContext",
1252
- {
1253
- InstanceMethod("init", &AddonContext::Init),
1254
- InstanceMethod("getContextSize", &AddonContext::GetContextSize),
1255
- InstanceMethod("initBatch", &AddonContext::InitBatch),
1256
- InstanceMethod("addToBatch", &AddonContext::AddToBatch),
1257
- InstanceMethod("disposeSequence", &AddonContext::DisposeSequence),
1258
- InstanceMethod("removeTokenCellsFromSequence", &AddonContext::RemoveTokenCellsFromSequence),
1259
- InstanceMethod("shiftSequenceTokenCells", &AddonContext::ShiftSequenceTokenCells),
1260
- InstanceMethod("decodeBatch", &AddonContext::DecodeBatch),
1261
- InstanceMethod("sampleToken", &AddonContext::SampleToken),
1262
- InstanceMethod("acceptGrammarEvaluationStateToken", &AddonContext::AcceptGrammarEvaluationStateToken),
1263
- InstanceMethod("canBeNextTokenForGrammarEvaluationState", &AddonContext::CanBeNextTokenForGrammarEvaluationState),
1264
- InstanceMethod("getEmbedding", &AddonContext::GetEmbedding),
1265
- InstanceMethod("getStateSize", &AddonContext::GetStateSize),
1266
- InstanceMethod("printTimings", &AddonContext::PrintTimings),
1267
- InstanceMethod("dispose", &AddonContext::Dispose),
1268
- }
1269
- )
1270
- );
1271
- }
1272
- };
1273
-
1274
-
1275
- class AddonContextDecodeBatchWorker : public Napi::AsyncWorker {
1276
- public:
1277
- AddonContext* ctx;
1278
-
1279
- AddonContextDecodeBatchWorker(const Napi::Env& env, AddonContext* ctx)
1280
- : Napi::AsyncWorker(env, "AddonContextDecodeBatchWorker"),
1281
- ctx(ctx),
1282
- deferred(Napi::Promise::Deferred::New(env)) {
1283
- ctx->Ref();
1284
- }
1285
- ~AddonContextDecodeBatchWorker() {
1286
- ctx->Unref();
1287
- }
1288
-
1289
- Napi::Promise GetPromise() {
1290
- return deferred.Promise();
1291
- }
1292
-
1293
- protected:
1294
- Napi::Promise::Deferred deferred;
1295
-
1296
- void Execute() {
1297
- try {
1298
- // Perform the evaluation using llama_decode.
1299
- int r = llama_decode(ctx->ctx, ctx->batch);
1300
-
1301
- if (r != 0) {
1302
- if (r == 1) {
1303
- SetError("could not find a KV slot for the batch (try reducing the size of the batch or increase the context)");
1304
- } else {
1305
- SetError("Eval has failed");
1306
- }
1307
-
1308
- return;
1309
- }
1310
-
1311
- llama_synchronize(ctx->ctx);
1312
- } catch (const std::exception& e) {
1313
- SetError(e.what());
1314
- } catch(...) {
1315
- SetError("Unknown error when calling \"llama_decode\"");
1316
- }
1317
- }
1318
- void OnOK() {
1319
- deferred.Resolve(Env().Undefined());
1320
- }
1321
- void OnError(const Napi::Error& err) {
1322
- deferred.Reject(err.Value());
1323
- }
1324
- };
1325
-
1326
- Napi::Value AddonContext::DecodeBatch(const Napi::CallbackInfo& info) {
1327
- AddonContextDecodeBatchWorker* worker = new AddonContextDecodeBatchWorker(info.Env(), this);
1328
- worker->Queue();
1329
- return worker->GetPromise();
1330
- }
1331
-
1332
- class AddonContextLoadContextWorker : public Napi::AsyncWorker {
1333
- public:
1334
- AddonContext* context;
1335
-
1336
- AddonContextLoadContextWorker(const Napi::Env& env, AddonContext* context)
1337
- : Napi::AsyncWorker(env, "AddonContextLoadContextWorker"),
1338
- context(context),
1339
- deferred(Napi::Promise::Deferred::New(env)) {
1340
- context->Ref();
1341
- }
1342
- ~AddonContextLoadContextWorker() {
1343
- context->Unref();
1344
- }
1345
-
1346
- Napi::Promise GetPromise() {
1347
- return deferred.Promise();
1348
- }
1349
-
1350
- protected:
1351
- Napi::Promise::Deferred deferred;
1352
-
1353
- void Execute() {
1354
- try {
1355
- context->ctx = llama_new_context_with_model(context->model->model, context->context_params);
1356
-
1357
- context->contextLoaded = context->ctx != nullptr && context->ctx != NULL;
1358
- } catch (const std::exception& e) {
1359
- SetError(e.what());
1360
- } catch(...) {
1361
- SetError("Unknown error when calling \"llama_new_context_with_model\"");
1362
- }
1363
- }
1364
- void OnOK() {
1365
- if (context->contextLoaded) {
1366
- uint64_t contextMemorySize = llama_state_get_size(context->ctx);
1367
- adjustNapiExternalMemoryAdd(Env(), contextMemorySize);
1368
- context->loadedContextMemorySize = contextMemorySize;
1369
- }
1370
-
1371
- deferred.Resolve(Napi::Boolean::New(Env(), context->contextLoaded));
1372
- }
1373
- void OnError(const Napi::Error& err) {
1374
- deferred.Reject(err.Value());
1375
- }
1376
- };
1377
- class AddonContextUnloadContextWorker : public Napi::AsyncWorker {
1378
- public:
1379
- AddonContext* context;
1380
-
1381
- AddonContextUnloadContextWorker(const Napi::Env& env, AddonContext* context)
1382
- : Napi::AsyncWorker(env, "AddonContextUnloadContextWorker"),
1383
- context(context),
1384
- deferred(Napi::Promise::Deferred::New(env)) {
1385
- context->Ref();
1386
- }
1387
- ~AddonContextUnloadContextWorker() {
1388
- context->Unref();
1389
- }
1390
-
1391
- Napi::Promise GetPromise() {
1392
- return deferred.Promise();
1393
- }
1394
-
1395
- protected:
1396
- Napi::Promise::Deferred deferred;
1397
-
1398
- void Execute() {
1399
- try {
1400
- llama_free(context->ctx);
1401
- context->contextLoaded = false;
1402
-
1403
- try {
1404
- if (context->has_batch) {
1405
- llama_batch_free(context->batch);
1406
- context->has_batch = false;
1407
- context->batch_n_tokens = 0;
1408
- }
1409
-
1410
- context->dispose();
1411
- } catch (const std::exception& e) {
1412
- SetError(e.what());
1413
- } catch(...) {
1414
- SetError("Unknown error when calling \"llama_batch_free\"");
1415
- }
1416
- } catch (const std::exception& e) {
1417
- SetError(e.what());
1418
- } catch(...) {
1419
- SetError("Unknown error when calling \"llama_free\"");
1420
- }
1421
- }
1422
- void OnOK() {
1423
- adjustNapiExternalMemorySubtract(Env(), context->loadedContextMemorySize);
1424
- context->loadedContextMemorySize = 0;
1425
-
1426
- adjustNapiExternalMemorySubtract(Env(), context->batchMemorySize);
1427
- context->batchMemorySize = 0;
1428
-
1429
- deferred.Resolve(Env().Undefined());
1430
- }
1431
- void OnError(const Napi::Error& err) {
1432
- deferred.Reject(err.Value());
1433
- }
1434
- };
1435
-
1436
- Napi::Value AddonContext::Init(const Napi::CallbackInfo& info) {
1437
- if (disposed) {
1438
- Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
1439
- return info.Env().Undefined();
1440
- }
1441
-
1442
- AddonContextLoadContextWorker* worker = new AddonContextLoadContextWorker(this->Env(), this);
1443
- worker->Queue();
1444
- return worker->GetPromise();
1445
- }
1446
- Napi::Value AddonContext::Dispose(const Napi::CallbackInfo& info) {
1447
- if (disposed) {
1448
- return info.Env().Undefined();
1449
- }
1450
-
1451
- if (contextLoaded) {
1452
- contextLoaded = false;
1453
-
1454
- AddonContextUnloadContextWorker* worker = new AddonContextUnloadContextWorker(this->Env(), this);
1455
- worker->Queue();
1456
- return worker->GetPromise();
1457
- } else {
1458
- dispose();
1459
-
1460
- Napi::Promise::Deferred deferred = Napi::Promise::Deferred::New(info.Env());
1461
- deferred.Resolve(info.Env().Undefined());
1462
- return deferred.Promise();
1463
- }
1464
- }
1465
-
1466
- class AddonContextSampleTokenWorker : public Napi::AsyncWorker {
1467
- public:
1468
- AddonContext* ctx;
1469
- AddonGrammarEvaluationState* grammar_evaluation_state;
1470
- int32_t batchLogitIndex;
1471
- bool use_grammar = false;
1472
- llama_token result;
1473
- float temperature = 0.0f;
1474
- float min_p = 0;
1475
- int32_t top_k = 40;
1476
- float top_p = 0.95f;
1477
- float repeat_penalty = 1.10f; // 1.0 = disabled
1478
- float repeat_penalty_presence_penalty = 0.00f; // 0.0 = disabled
1479
- float repeat_penalty_frequency_penalty = 0.00f; // 0.0 = disabled
1480
- std::vector<llama_token> repeat_penalty_tokens;
1481
- std::unordered_map<llama_token, float> tokenBiases;
1482
- bool useTokenBiases = false;
1483
- bool use_repeat_penalty = false;
1484
-
1485
- AddonContextSampleTokenWorker(const Napi::CallbackInfo& info, AddonContext* ctx)
1486
- : Napi::AsyncWorker(info.Env(), "AddonContextSampleTokenWorker"),
1487
- ctx(ctx),
1488
- deferred(Napi::Promise::Deferred::New(info.Env())) {
1489
- ctx->Ref();
1490
-
1491
- batchLogitIndex = info[0].As<Napi::Number>().Int32Value();
1492
-
1493
- if (info.Length() > 1 && info[1].IsObject()) {
1494
- Napi::Object options = info[1].As<Napi::Object>();
1495
-
1496
- if (options.Has("temperature")) {
1497
- temperature = options.Get("temperature").As<Napi::Number>().FloatValue();
1498
- }
1499
-
1500
- if (options.Has("minP")) {
1501
- min_p = options.Get("minP").As<Napi::Number>().FloatValue();
1502
- }
1503
-
1504
- if (options.Has("topK")) {
1505
- top_k = options.Get("topK").As<Napi::Number>().Int32Value();
1506
- }
1507
-
1508
- if (options.Has("topP")) {
1509
- top_p = options.Get("topP").As<Napi::Number>().FloatValue();
1510
- }
1511
-
1512
- if (options.Has("repeatPenalty")) {
1513
- repeat_penalty = options.Get("repeatPenalty").As<Napi::Number>().FloatValue();
1514
- }
1515
-
1516
- if (options.Has("repeatPenaltyTokens")) {
1517
- Napi::Uint32Array repeat_penalty_tokens_uint32_array = options.Get("repeatPenaltyTokens").As<Napi::Uint32Array>();
1518
-
1519
- repeat_penalty_tokens.reserve(repeat_penalty_tokens_uint32_array.ElementLength());
1520
- for (size_t i = 0; i < repeat_penalty_tokens_uint32_array.ElementLength(); i++) {
1521
- repeat_penalty_tokens.push_back(static_cast<llama_token>(repeat_penalty_tokens_uint32_array[i]));
1522
- }
1523
-
1524
- use_repeat_penalty = true;
1525
- }
1526
-
1527
- if (options.Has("tokenBiasKeys") && options.Has("tokenBiasValues")) {
1528
- Napi::Uint32Array tokenBiasKeys = options.Get("tokenBiasKeys").As<Napi::Uint32Array>();
1529
- Napi::Float32Array tokenBiasValues = options.Get("tokenBiasValues").As<Napi::Float32Array>();
1530
-
1531
- if (tokenBiasKeys.ElementLength() == tokenBiasValues.ElementLength()) {
1532
- for (size_t i = 0; i < tokenBiasKeys.ElementLength(); i++) {
1533
- tokenBiases[static_cast<llama_token>(tokenBiasKeys[i])] = tokenBiasValues[i];
1534
- }
1535
-
1536
- useTokenBiases = true;
1537
- }
1538
- }
1539
-
1540
- if (options.Has("repeatPenaltyPresencePenalty")) {
1541
- repeat_penalty_presence_penalty = options.Get("repeatPenaltyPresencePenalty").As<Napi::Number>().FloatValue();
1542
- }
1543
-
1544
- if (options.Has("repeatPenaltyFrequencyPenalty")) {
1545
- repeat_penalty_frequency_penalty = options.Get("repeatPenaltyFrequencyPenalty").As<Napi::Number>().FloatValue();
1546
- }
1547
-
1548
- if (options.Has("grammarEvaluationState")) {
1549
- grammar_evaluation_state =
1550
- Napi::ObjectWrap<AddonGrammarEvaluationState>::Unwrap(options.Get("grammarEvaluationState").As<Napi::Object>());
1551
- grammar_evaluation_state->Ref();
1552
- use_grammar = true;
1553
- }
1554
- }
1555
- }
1556
- ~AddonContextSampleTokenWorker() {
1557
- ctx->Unref();
1558
-
1559
- if (use_grammar) {
1560
- grammar_evaluation_state->Unref();
1561
- use_grammar = false;
1562
- }
1563
- }
1564
-
1565
- Napi::Promise GetPromise() {
1566
- return deferred.Promise();
1567
- }
1568
-
1569
- protected:
1570
- Napi::Promise::Deferred deferred;
1571
-
1572
- void Execute() {
1573
- try {
1574
- SampleToken();
1575
- } catch (const std::exception& e) {
1576
- SetError(e.what());
1577
- } catch(...) {
1578
- SetError("Unknown error when calling \"SampleToken\"");
1579
- }
1580
- }
1581
-
1582
- void SampleToken() {
1583
- llama_token new_token_id = 0;
1584
-
1585
- // Select the best prediction.
1586
- if (llama_get_logits(ctx->ctx) == nullptr) {
1587
- SetError("This model does not support token generation");
1588
- return;
1589
- }
1590
-
1591
- auto logits = llama_get_logits_ith(ctx->ctx, batchLogitIndex);
1592
- auto n_vocab = llama_n_vocab(ctx->model->model);
1593
-
1594
- std::vector<llama_token_data> candidates;
1595
- candidates.reserve(n_vocab);
1596
-
1597
- for (llama_token token_id = 0; token_id < n_vocab; token_id++) {
1598
- auto logit = logits[token_id];
1599
-
1600
- if (useTokenBiases) {
1601
- bool hasTokenBias = tokenBiases.find(token_id) != tokenBiases.end();
1602
- if (hasTokenBias) {
1603
- auto logitBias = tokenBiases.at(token_id);
1604
- if (logitBias == -INFINITY || logitBias < -INFINITY) {
1605
- if (!llama_token_is_eog(ctx->model->model, token_id)) {
1606
- logit = -INFINITY;
1607
- }
1608
- } else {
1609
- logit += logitBias;
1610
- }
1611
- }
1612
- }
1613
-
1614
- candidates.emplace_back(llama_token_data { token_id, logit, 0.0f });
1615
- }
1616
-
1617
- llama_token_data_array candidates_p = { candidates.data(), candidates.size(), false };
1618
-
1619
- if (use_repeat_penalty && !repeat_penalty_tokens.empty()) {
1620
- llama_sample_repetition_penalties(
1621
- ctx->ctx,
1622
- &candidates_p,
1623
- repeat_penalty_tokens.data(),
1624
- repeat_penalty_tokens.size(),
1625
- repeat_penalty,
1626
- repeat_penalty_frequency_penalty,
1627
- repeat_penalty_presence_penalty
1628
- );
1629
- }
1630
-
1631
- if (use_grammar && (grammar_evaluation_state)->grammar != nullptr) {
1632
- llama_sample_grammar(ctx->ctx, &candidates_p, (grammar_evaluation_state)->grammar);
1633
-
1634
- if ((candidates_p.size == 0 || candidates_p.data[0].logit == -INFINITY) && useTokenBiases) {
1635
- // logit biases caused grammar sampling to fail, so sampling again without logit biases
1636
- useTokenBiases = false;
1637
- SampleToken();
1638
- return;
1639
- }
1640
- }
1641
-
1642
- if (temperature <= 0) {
1643
- new_token_id = llama_sample_token_greedy(ctx->ctx, &candidates_p);
1644
- } else {
1645
- const int32_t resolved_top_k =
1646
- top_k <= 0 ? llama_n_vocab(ctx->model->model) : std::min(top_k, llama_n_vocab(ctx->model->model));
1647
- const int32_t n_probs = 0; // Number of probabilities to keep - 0 = disabled
1648
- const float tfs_z = 1.00f; // Tail free sampling - 1.0 = disabled
1649
- const float typical_p = 1.00f; // Typical probability - 1.0 = disabled
1650
- const float resolved_top_p = top_p; // Top p sampling - 1.0 = disabled
1651
-
1652
- // Temperature sampling
1653
- size_t min_keep = std::max(1, n_probs);
1654
- llama_sample_top_k(ctx->ctx, &candidates_p, resolved_top_k, min_keep);
1655
- llama_sample_tail_free(ctx->ctx, &candidates_p, tfs_z, min_keep);
1656
- llama_sample_typical(ctx->ctx, &candidates_p, typical_p, min_keep);
1657
- llama_sample_top_p(ctx->ctx, &candidates_p, resolved_top_p, min_keep);
1658
- llama_sample_min_p(ctx->ctx, &candidates_p, min_p, min_keep);
1659
- llama_sample_temp(ctx->ctx, &candidates_p, temperature);
1660
- new_token_id = llama_sample_token(ctx->ctx, &candidates_p);
1661
- }
1662
-
1663
- if (!llama_token_is_eog(ctx->model->model, new_token_id) && use_grammar && (grammar_evaluation_state)->grammar != nullptr) {
1664
- llama_grammar_accept_token(ctx->ctx, (grammar_evaluation_state)->grammar, new_token_id);
1665
- }
1666
-
1667
- result = new_token_id;
1668
- }
1669
- void OnOK() {
1670
- Napi::Number resultValue = Napi::Number::New(Env(), static_cast<uint32_t>(result));
1671
- deferred.Resolve(resultValue);
1672
- }
1673
- void OnError(const Napi::Error& err) {
1674
- deferred.Reject(err.Value());
1675
- }
1676
- };
1677
-
1678
- Napi::Value AddonContext::SampleToken(const Napi::CallbackInfo& info) {
1679
- AddonContextSampleTokenWorker* worker = new AddonContextSampleTokenWorker(info, this);
1680
- worker->Queue();
1681
- return worker->GetPromise();
1682
- }
1683
-
1684
- Napi::Value systemInfo(const Napi::CallbackInfo& info) {
1685
- return Napi::String::From(info.Env(), llama_print_system_info());
1686
- }
1687
-
1688
- Napi::Value addonGetSupportsGpuOffloading(const Napi::CallbackInfo& info) {
1689
- return Napi::Boolean::New(info.Env(), llama_supports_gpu_offload());
1690
- }
1691
-
1692
- Napi::Value addonGetSupportsMmap(const Napi::CallbackInfo& info) {
1693
- return Napi::Boolean::New(info.Env(), llama_supports_mmap());
1694
- }
1695
-
1696
- Napi::Value addonGetSupportsMlock(const Napi::CallbackInfo& info) {
1697
- return Napi::Boolean::New(info.Env(), llama_supports_mlock());
1698
- }
1699
-
1700
- Napi::Value addonGetBlockSizeForGgmlType(const Napi::CallbackInfo& info) {
1701
- const int ggmlType = info[0].As<Napi::Number>().Int32Value();
1702
-
1703
- if (ggmlType < 0 || ggmlType > GGML_TYPE_COUNT) {
1704
- return info.Env().Undefined();
1705
- }
1706
-
1707
- const auto blockSize = ggml_blck_size(static_cast<ggml_type>(ggmlType));
1708
-
1709
- return Napi::Number::New(info.Env(), blockSize);
1710
- }
1711
-
1712
- Napi::Value addonGetTypeSizeForGgmlType(const Napi::CallbackInfo& info) {
1713
- const int ggmlType = info[0].As<Napi::Number>().Int32Value();
1714
-
1715
- if (ggmlType < 0 || ggmlType > GGML_TYPE_COUNT) {
1716
- return info.Env().Undefined();
1717
- }
1718
-
1719
- const auto typeSize = ggml_type_size(static_cast<ggml_type>(ggmlType));
1720
-
1721
- return Napi::Number::New(info.Env(), typeSize);
1722
- }
1723
-
1724
- Napi::Value addonGetConsts(const Napi::CallbackInfo& info) {
1725
- Napi::Object consts = Napi::Object::New(info.Env());
1726
- consts.Set("ggmlMaxDims", Napi::Number::New(info.Env(), GGML_MAX_DIMS));
1727
- consts.Set("ggmlTypeF16Size", Napi::Number::New(info.Env(), ggml_type_size(GGML_TYPE_F16)));
1728
- consts.Set("ggmlTypeF32Size", Napi::Number::New(info.Env(), ggml_type_size(GGML_TYPE_F32)));
1729
- consts.Set("ggmlTensorOverhead", Napi::Number::New(info.Env(), ggml_tensor_overhead()));
1730
- consts.Set("llamaMaxRngState", Napi::Number::New(info.Env(), LLAMA_MAX_RNG_STATE));
1731
- consts.Set("llamaPosSize", Napi::Number::New(info.Env(), sizeof(llama_pos)));
1732
- consts.Set("llamaSeqIdSize", Napi::Number::New(info.Env(), sizeof(llama_seq_id)));
1733
-
1734
- return consts;
1735
- }
1736
-
1737
- int addonGetGgmlLogLevelNumber(ggml_log_level level) {
1738
- switch (level) {
1739
- case GGML_LOG_LEVEL_ERROR: return 2;
1740
- case GGML_LOG_LEVEL_WARN: return 3;
1741
- case GGML_LOG_LEVEL_INFO: return 4;
1742
- case GGML_LOG_LEVEL_DEBUG: return 5;
1743
- }
1744
-
1745
- return 1;
1746
- }
1747
-
1748
- void addonCallJsLogCallback(
1749
- Napi::Env env, Napi::Function callback, AddonThreadSafeLogCallbackFunctionContext* context, addon_logger_log* data
1750
- ) {
1751
- bool called = false;
1752
-
1753
- if (env != nullptr && callback != nullptr && addonJsLoggerCallbackSet) {
1754
- try {
1755
- callback.Call({
1756
- Napi::Number::New(env, data->logLevelNumber),
1757
- Napi::String::New(env, data->stringStream->str()),
1758
- });
1759
- called = true;
1760
- } catch (const Napi::Error& e) {
1761
- called = false;
1762
- }
1763
- }
1764
-
1765
- if (!called && data != nullptr) {
1766
- if (data->logLevelNumber == 2) {
1767
- fputs(data->stringStream->str().c_str(), stderr);
1768
- fflush(stderr);
1769
- } else {
1770
- fputs(data->stringStream->str().c_str(), stdout);
1771
- fflush(stdout);
1772
- }
1773
- }
1774
-
1775
- if (data != nullptr) {
1776
- delete data->stringStream;
1777
- delete data;
1778
- }
1779
- }
1780
-
1781
- static void addonLlamaCppLogCallback(ggml_log_level level, const char* text, void* user_data) {
1782
- int logLevelNumber = addonGetGgmlLogLevelNumber(level);
1783
-
1784
- if (logLevelNumber > addonLoggerLogLevel) {
1785
- return;
1786
- }
1787
-
1788
- if (addonJsLoggerCallbackSet) {
1789
- std::stringstream* stringStream = new std::stringstream();
1790
- if (text != nullptr) {
1791
- *stringStream << text;
1792
- }
1793
-
1794
- addon_logger_log* data = new addon_logger_log {
1795
- logLevelNumber,
1796
- stringStream,
1797
- };
1798
-
1799
- auto status = addonThreadSafeLoggerCallback.NonBlockingCall(data);
1800
-
1801
- if (status == napi_ok) {
1802
- return;
1803
- } else {
1804
- delete stringStream;
1805
- delete data;
1806
- }
1807
- }
1808
-
1809
- if (text != nullptr) {
1810
- if (level == 2) {
1811
- fputs(text, stderr);
1812
- fflush(stderr);
1813
- } else {
1814
- fputs(text, stdout);
1815
- fflush(stdout);
1816
- }
1817
- }
1818
- }
1819
-
1820
- Napi::Value setLogger(const Napi::CallbackInfo& info) {
1821
- if (info.Length() < 1 || !info[0].IsFunction()) {
1822
- if (addonJsLoggerCallbackSet) {
1823
- addonJsLoggerCallbackSet = false;
1824
- addonThreadSafeLoggerCallback.Release();
1825
- }
1826
-
1827
- return info.Env().Undefined();
1828
- }
1829
-
1830
- auto addonLoggerJSCallback = info[0].As<Napi::Function>();
1831
- AddonThreadSafeLogCallbackFunctionContext* context = new Napi::Reference<Napi::Value>(Napi::Persistent(info.This()));
1832
- addonThreadSafeLoggerCallback = AddonThreadSafeLogCallbackFunction::New(
1833
- info.Env(),
1834
- addonLoggerJSCallback,
1835
- "loggerCallback",
1836
- 0,
1837
- 1,
1838
- context,
1839
- [](Napi::Env, void*, AddonThreadSafeLogCallbackFunctionContext* ctx) {
1840
- addonJsLoggerCallbackSet = false;
1841
-
1842
- delete ctx;
1843
- }
1844
- );
1845
- addonJsLoggerCallbackSet = true;
1846
-
1847
- // prevent blocking the main node process from exiting due to active resources
1848
- addonThreadSafeLoggerCallback.Unref(info.Env());
1849
-
1850
- return info.Env().Undefined();
1851
- }
1852
-
1853
- Napi::Value setLoggerLogLevel(const Napi::CallbackInfo& info) {
1854
- if (info.Length() < 1 || !info[0].IsNumber()) {
1855
- addonLoggerLogLevel = 5;
1856
-
1857
- return info.Env().Undefined();
1858
- }
1859
-
1860
- addonLoggerLogLevel = info[0].As<Napi::Number>().Int32Value();
1861
-
1862
- return info.Env().Undefined();
1863
- }
1864
-
1865
- class AddonBackendLoadWorker : public Napi::AsyncWorker {
1866
- public:
1867
- AddonBackendLoadWorker(const Napi::Env& env)
1868
- : Napi::AsyncWorker(env, "AddonBackendLoadWorker"),
1869
- deferred(Napi::Promise::Deferred::New(env)) {
1870
- }
1871
- ~AddonBackendLoadWorker() {
1872
- }
1873
-
1874
- Napi::Promise GetPromise() {
1875
- return deferred.Promise();
1876
- }
1877
-
1878
- protected:
1879
- Napi::Promise::Deferred deferred;
1880
-
1881
- void Execute() {
1882
- try {
1883
- llama_backend_init();
1884
-
1885
- try {
1886
- if (backendDisposed) {
1887
- llama_backend_free();
1888
- } else {
1889
- backendInitialized = true;
1890
- }
1891
- } catch (const std::exception& e) {
1892
- SetError(e.what());
1893
- } catch(...) {
1894
- SetError("Unknown error when calling \"llama_backend_free\"");
1895
- }
1896
- } catch (const std::exception& e) {
1897
- SetError(e.what());
1898
- } catch(...) {
1899
- SetError("Unknown error when calling \"llama_backend_init\"");
1900
- }
1901
- }
1902
- void OnOK() {
1903
- deferred.Resolve(Env().Undefined());
1904
- }
1905
- void OnError(const Napi::Error& err) {
1906
- deferred.Reject(err.Value());
1907
- }
1908
- };
1909
-
1910
-
1911
- class AddonBackendUnloadWorker : public Napi::AsyncWorker {
1912
- public:
1913
- AddonBackendUnloadWorker(const Napi::Env& env)
1914
- : Napi::AsyncWorker(env, "AddonBackendUnloadWorker"),
1915
- deferred(Napi::Promise::Deferred::New(env)) {
1916
- }
1917
- ~AddonBackendUnloadWorker() {
1918
- }
1919
-
1920
- Napi::Promise GetPromise() {
1921
- return deferred.Promise();
1922
- }
1923
-
1924
- protected:
1925
- Napi::Promise::Deferred deferred;
1926
-
1927
- void Execute() {
1928
- try {
1929
- if (backendInitialized) {
1930
- backendInitialized = false;
1931
- llama_backend_free();
1932
- }
1933
- } catch (const std::exception& e) {
1934
- SetError(e.what());
1935
- } catch(...) {
1936
- SetError("Unknown error when calling \"llama_backend_free\"");
1937
- }
1938
- }
1939
- void OnOK() {
1940
- deferred.Resolve(Env().Undefined());
1941
- }
1942
- void OnError(const Napi::Error& err) {
1943
- deferred.Reject(err.Value());
1944
- }
1945
- };
1946
-
1947
- Napi::Value addonInit(const Napi::CallbackInfo& info) {
1948
- if (backendInitialized) {
1949
- Napi::Promise::Deferred deferred = Napi::Promise::Deferred::New(info.Env());
1950
- deferred.Resolve(info.Env().Undefined());
1951
- return deferred.Promise();
1952
- }
1953
-
1954
- AddonBackendLoadWorker* worker = new AddonBackendLoadWorker(info.Env());
1955
- worker->Queue();
1956
- return worker->GetPromise();
1957
- }
1958
-
1959
- Napi::Value addonDispose(const Napi::CallbackInfo& info) {
1960
- if (backendDisposed) {
1961
- Napi::Promise::Deferred deferred = Napi::Promise::Deferred::New(info.Env());
1962
- deferred.Resolve(info.Env().Undefined());
1963
- return deferred.Promise();
1964
- }
1965
-
1966
- backendDisposed = true;
1967
-
1968
- AddonBackendUnloadWorker* worker = new AddonBackendUnloadWorker(info.Env());
1969
- worker->Queue();
1970
- return worker->GetPromise();
1971
- }
1972
-
1973
- static void addonFreeLlamaBackend(Napi::Env env, int* data) {
1974
- if (backendDisposed) {
1975
- return;
1976
- }
1977
-
1978
- backendDisposed = true;
1979
- if (backendInitialized) {
1980
- backendInitialized = false;
1981
- llama_backend_free();
1982
- }
1983
- }
1984
-
1985
- Napi::Object registerCallback(Napi::Env env, Napi::Object exports) {
1986
- exports.DefineProperties({
1987
- Napi::PropertyDescriptor::Function("systemInfo", systemInfo),
1988
- Napi::PropertyDescriptor::Function("getSupportsGpuOffloading", addonGetSupportsGpuOffloading),
1989
- Napi::PropertyDescriptor::Function("getSupportsMmap", addonGetSupportsMmap),
1990
- Napi::PropertyDescriptor::Function("getSupportsMlock", addonGetSupportsMlock),
1991
- Napi::PropertyDescriptor::Function("getBlockSizeForGgmlType", addonGetBlockSizeForGgmlType),
1992
- Napi::PropertyDescriptor::Function("getTypeSizeForGgmlType", addonGetTypeSizeForGgmlType),
1993
- Napi::PropertyDescriptor::Function("getConsts", addonGetConsts),
1994
- Napi::PropertyDescriptor::Function("setLogger", setLogger),
1995
- Napi::PropertyDescriptor::Function("setLoggerLogLevel", setLoggerLogLevel),
1996
- Napi::PropertyDescriptor::Function("getGpuVramInfo", getGpuVramInfo),
1997
- Napi::PropertyDescriptor::Function("getGpuDeviceInfo", getGpuDeviceInfo),
1998
- Napi::PropertyDescriptor::Function("getGpuType", getGpuType),
1999
- Napi::PropertyDescriptor::Function("init", addonInit),
2000
- Napi::PropertyDescriptor::Function("dispose", addonDispose),
2001
- });
2002
- AddonModel::init(exports);
2003
- AddonGrammar::init(exports);
2004
- AddonGrammarEvaluationState::init(exports);
2005
- AddonContext::init(exports);
2006
-
2007
- llama_log_set(addonLlamaCppLogCallback, nullptr);
2008
-
2009
- exports.AddFinalizer(addonFreeLlamaBackend, static_cast<int*>(nullptr));
2010
-
2011
- return exports;
2012
- }
2013
-
2014
- NODE_API_MODULE(NODE_GYP_MODULE_NAME, registerCallback)