node-llama-cpp 3.0.0-beta.13 → 3.0.0-beta.14
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/TemplateChatWrapper.d.ts +10 -9
- package/dist/TemplateChatWrapper.js.map +1 -1
- package/dist/bindings/AddonTypes.d.ts +14 -6
- package/dist/bindings/Llama.d.ts +8 -0
- package/dist/bindings/Llama.js +59 -7
- package/dist/bindings/Llama.js.map +1 -1
- package/dist/bindings/getLlama.js +1 -1
- package/dist/bindings/getLlama.js.map +1 -1
- package/dist/bindings/types.d.ts +3 -0
- package/dist/bindings/types.js +4 -0
- package/dist/bindings/types.js.map +1 -1
- package/dist/bindings/utils/cloneLlamaCppRepo.js +1 -1
- package/dist/bindings/utils/cloneLlamaCppRepo.js.map +1 -1
- package/dist/bindings/utils/compileLLamaCpp.d.ts +4 -1
- package/dist/bindings/utils/compileLLamaCpp.js +133 -97
- package/dist/bindings/utils/compileLLamaCpp.js.map +1 -1
- package/dist/bindings/utils/detectAvailableComputeLayers.d.ts +3 -0
- package/dist/bindings/utils/detectAvailableComputeLayers.js +155 -13
- package/dist/bindings/utils/detectAvailableComputeLayers.js.map +1 -1
- package/dist/bindings/utils/logDistroInstallInstruction.d.ts +1 -0
- package/dist/bindings/utils/logDistroInstallInstruction.js +16 -6
- package/dist/bindings/utils/logDistroInstallInstruction.js.map +1 -1
- package/dist/bindings/utils/testBindingBinary.js +2 -2
- package/dist/bindings/utils/testBindingBinary.js.map +1 -1
- package/dist/bindings/utils/testCmakeBinary.d.ts +5 -0
- package/dist/bindings/utils/testCmakeBinary.js +32 -0
- package/dist/bindings/utils/testCmakeBinary.js.map +1 -0
- package/dist/cli/commands/ChatCommand.js +31 -14
- package/dist/cli/commands/ChatCommand.js.map +1 -1
- package/dist/cli/commands/CompleteCommand.js +30 -13
- package/dist/cli/commands/CompleteCommand.js.map +1 -1
- package/dist/cli/commands/InfillCommand.js +30 -13
- package/dist/cli/commands/InfillCommand.js.map +1 -1
- package/dist/cli/commands/InspectCommand.js +1 -1
- package/dist/cli/commands/InspectCommand.js.map +1 -1
- package/dist/cli/commands/OnPostInstallCommand.js +2 -0
- package/dist/cli/commands/OnPostInstallCommand.js.map +1 -1
- package/dist/config.d.ts +1 -1
- package/dist/config.js +1 -1
- package/dist/config.js.map +1 -1
- package/dist/evaluator/LlamaContext/LlamaContext.d.ts +5 -8
- package/dist/evaluator/LlamaContext/LlamaContext.js +111 -65
- package/dist/evaluator/LlamaContext/LlamaContext.js.map +1 -1
- package/dist/evaluator/LlamaContext/types.d.ts +13 -6
- package/dist/evaluator/LlamaEmbeddingContext.d.ts +6 -5
- package/dist/evaluator/LlamaEmbeddingContext.js +32 -22
- package/dist/evaluator/LlamaEmbeddingContext.js.map +1 -1
- package/dist/evaluator/LlamaGrammar.js +1 -0
- package/dist/evaluator/LlamaGrammar.js.map +1 -1
- package/dist/evaluator/LlamaModel.d.ts +16 -16
- package/dist/evaluator/LlamaModel.js +95 -20
- package/dist/evaluator/LlamaModel.js.map +1 -1
- package/dist/gguf/errors/ModelScore/NotEnoughVRamError.js +2 -3
- package/dist/gguf/errors/ModelScore/NotEnoughVRamError.js.map +1 -1
- package/dist/gguf/ggufParser/stream/GGUFReadStream.js +1 -1
- package/dist/gguf/ggufParser/stream/GGUFReadStream.js.map +1 -1
- package/dist/index.d.ts +2 -2
- package/dist/index.js.map +1 -1
- package/dist/state.d.ts +2 -0
- package/dist/state.js +7 -0
- package/dist/state.js.map +1 -1
- package/dist/utils/DisposeGuard.d.ts +13 -0
- package/dist/utils/DisposeGuard.js +120 -0
- package/dist/utils/DisposeGuard.js.map +1 -0
- package/dist/utils/cmake.js +1 -1
- package/dist/utils/cmake.js.map +1 -1
- package/dist/utils/getConsoleLogPrefix.js +3 -2
- package/dist/utils/getConsoleLogPrefix.js.map +1 -1
- package/dist/utils/removeNullFields.d.ts +2 -2
- package/dist/utils/removeNullFields.js.map +1 -1
- package/dist/utils/spawnCommand.d.ts +11 -1
- package/dist/utils/spawnCommand.js +55 -7
- package/dist/utils/spawnCommand.js.map +1 -1
- package/llama/addon.cpp +676 -83
- package/llama/binariesGithubRelease.json +1 -1
- package/llama/gitRelease.bundle +0 -0
- package/llama/grammars/json.gbnf +1 -1
- package/llama/grammars/json_arr.gbnf +1 -1
- package/llama/llama.cpp.info.json +1 -1
- package/llamaBins/linux-arm64/{.buildMetadata.json → _nlcBuildMetadata.json} +1 -1
- package/llamaBins/linux-arm64/llama-addon.node +0 -0
- package/llamaBins/linux-armv7l/{.buildMetadata.json → _nlcBuildMetadata.json} +1 -1
- package/llamaBins/linux-armv7l/llama-addon.node +0 -0
- package/llamaBins/linux-x64/{.buildMetadata.json → _nlcBuildMetadata.json} +1 -1
- package/llamaBins/linux-x64/llama-addon.node +0 -0
- package/llamaBins/linux-x64-cuda/{.buildMetadata.json → _nlcBuildMetadata.json} +1 -1
- package/llamaBins/linux-x64-cuda/llama-addon.node +0 -0
- package/llamaBins/linux-x64-vulkan/{.buildMetadata.json → _nlcBuildMetadata.json} +1 -1
- package/llamaBins/linux-x64-vulkan/llama-addon.node +0 -0
- package/llamaBins/mac-arm64-metal/{.buildMetadata.json → _nlcBuildMetadata.json} +1 -1
- package/llamaBins/mac-arm64-metal/default.metallib +0 -0
- package/llamaBins/mac-arm64-metal/llama-addon.node +0 -0
- package/llamaBins/mac-x64/{.buildMetadata.json → _nlcBuildMetadata.json} +1 -1
- package/llamaBins/mac-x64/llama-addon.node +0 -0
- package/llamaBins/win-x64/{.buildMetadata.json → _nlcBuildMetadata.json} +1 -1
- package/llamaBins/win-x64/llama-addon.exp +0 -0
- package/llamaBins/win-x64/llama-addon.lib +0 -0
- package/llamaBins/win-x64/llama-addon.node +0 -0
- package/llamaBins/win-x64-cuda/{.buildMetadata.json → _nlcBuildMetadata.json} +1 -1
- package/llamaBins/win-x64-cuda/llama-addon.exp +0 -0
- package/llamaBins/win-x64-cuda/llama-addon.lib +0 -0
- package/llamaBins/win-x64-cuda/llama-addon.node +0 -0
- package/llamaBins/win-x64-vulkan/{.buildMetadata.json → _nlcBuildMetadata.json} +1 -1
- package/llamaBins/win-x64-vulkan/llama-addon.exp +0 -0
- package/llamaBins/win-x64-vulkan/llama-addon.lib +0 -0
- package/llamaBins/win-x64-vulkan/llama-addon.node +0 -0
- package/package.json +10 -7
- package/llamaBins/mac-arm64-metal/ggml-metal.metal +0 -7731
package/llama/addon.cpp
CHANGED
|
@@ -35,10 +35,77 @@ void addonCallJsLogCallback(
|
|
|
35
35
|
using AddonThreadSafeLogCallbackFunction =
|
|
36
36
|
Napi::TypedThreadSafeFunction<AddonThreadSafeLogCallbackFunctionContext, addon_logger_log, addonCallJsLogCallback>;
|
|
37
37
|
|
|
38
|
+
|
|
39
|
+
struct addon_progress_event {
|
|
40
|
+
public:
|
|
41
|
+
const float progress;
|
|
42
|
+
};
|
|
43
|
+
|
|
44
|
+
using AddonThreadSafeProgressCallbackFunctionContext = Napi::Reference<Napi::Value>;
|
|
45
|
+
void addonCallJsProgressCallback(
|
|
46
|
+
Napi::Env env, Napi::Function callback, AddonThreadSafeProgressCallbackFunctionContext* context, addon_progress_event* data
|
|
47
|
+
);
|
|
48
|
+
using AddonThreadSafeProgressEventCallbackFunction =
|
|
49
|
+
Napi::TypedThreadSafeFunction<AddonThreadSafeProgressCallbackFunctionContext, addon_progress_event, addonCallJsProgressCallback>;
|
|
50
|
+
|
|
51
|
+
|
|
38
52
|
AddonThreadSafeLogCallbackFunction addonThreadSafeLoggerCallback;
|
|
39
53
|
bool addonJsLoggerCallbackSet = false;
|
|
40
54
|
int addonLoggerLogLevel = 5;
|
|
41
55
|
bool backendInitialized = false;
|
|
56
|
+
bool backendDisposed = false;
|
|
57
|
+
|
|
58
|
+
void addonCallJsProgressCallback(
|
|
59
|
+
Napi::Env env, Napi::Function callback, AddonThreadSafeProgressCallbackFunctionContext* context, addon_progress_event* data
|
|
60
|
+
) {
|
|
61
|
+
if (env != nullptr && callback != nullptr && addonJsLoggerCallbackSet) {
|
|
62
|
+
try {
|
|
63
|
+
callback.Call({Napi::Number::New(env, data->progress)});
|
|
64
|
+
} catch (const Napi::Error& e) {}
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
if (data != nullptr) {
|
|
68
|
+
delete data;
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
static uint64_t calculateBatchMemorySize(int32_t n_tokens_alloc, int32_t embd, int32_t n_seq_max) {
|
|
73
|
+
uint64_t totalSize = 0;
|
|
74
|
+
|
|
75
|
+
if (embd) {
|
|
76
|
+
totalSize += sizeof(float) * n_tokens_alloc * embd;
|
|
77
|
+
} else {
|
|
78
|
+
totalSize += sizeof(llama_token) * n_tokens_alloc;
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
totalSize += sizeof(llama_pos) * n_tokens_alloc;
|
|
82
|
+
totalSize += sizeof(int32_t) * n_tokens_alloc;
|
|
83
|
+
totalSize += sizeof(llama_seq_id *) * (n_tokens_alloc + 1);
|
|
84
|
+
|
|
85
|
+
totalSize += sizeof(llama_seq_id) * n_seq_max * n_tokens_alloc;
|
|
86
|
+
|
|
87
|
+
totalSize += sizeof(int8_t) * n_tokens_alloc;
|
|
88
|
+
|
|
89
|
+
return totalSize;
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
static void adjustNapiExternalMemoryAdd(Napi::Env env, uint64_t size) {
|
|
93
|
+
const uint64_t chunkSize = std::numeric_limits<int64_t>::max();
|
|
94
|
+
while (size > 0) {
|
|
95
|
+
int64_t adjustSize = std::min(size, chunkSize);
|
|
96
|
+
Napi::MemoryManagement::AdjustExternalMemory(env, adjustSize);
|
|
97
|
+
size -= adjustSize;
|
|
98
|
+
}
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
static void adjustNapiExternalMemorySubtract(Napi::Env env, uint64_t size) {
|
|
102
|
+
const uint64_t chunkSize = std::numeric_limits<int64_t>::max();
|
|
103
|
+
while (size > 0) {
|
|
104
|
+
int64_t adjustSize = std::min(size, chunkSize);
|
|
105
|
+
Napi::MemoryManagement::AdjustExternalMemory(env, -adjustSize);
|
|
106
|
+
size -= adjustSize;
|
|
107
|
+
}
|
|
108
|
+
}
|
|
42
109
|
|
|
43
110
|
std::string addon_model_token_to_piece(const struct llama_model* model, llama_token token) {
|
|
44
111
|
std::vector<char> result(8, 0);
|
|
@@ -143,21 +210,42 @@ static Napi::Value getNapiControlToken(const Napi::CallbackInfo& info, llama_mod
|
|
|
143
210
|
return Napi::Number::From(info.Env(), token);
|
|
144
211
|
}
|
|
145
212
|
|
|
213
|
+
static bool llamaModelParamsProgressCallback(float progress, void * user_data);
|
|
214
|
+
|
|
146
215
|
class AddonModel : public Napi::ObjectWrap<AddonModel> {
|
|
147
216
|
public:
|
|
148
217
|
llama_model_params model_params;
|
|
149
218
|
llama_model* model;
|
|
219
|
+
uint64_t loadedModelSize = 0;
|
|
220
|
+
Napi::Reference<Napi::Object> addonExportsRef;
|
|
221
|
+
bool hasAddonExportsRef = false;
|
|
222
|
+
|
|
223
|
+
std::string modelPath;
|
|
224
|
+
bool modelLoaded = false;
|
|
225
|
+
bool abortModelLoad = false;
|
|
226
|
+
bool model_load_stopped = false;
|
|
227
|
+
float rawModelLoadPercentage = 0;
|
|
228
|
+
unsigned modelLoadPercentage = 0;
|
|
229
|
+
AddonThreadSafeProgressEventCallbackFunction addonThreadSafeOnLoadProgressEventCallback;
|
|
230
|
+
bool onLoadProgressEventCallbackSet = false;
|
|
231
|
+
bool hasLoadAbortSignal = false;
|
|
232
|
+
|
|
150
233
|
bool disposed = false;
|
|
151
234
|
|
|
152
235
|
AddonModel(const Napi::CallbackInfo& info) : Napi::ObjectWrap<AddonModel>(info) {
|
|
153
236
|
model_params = llama_model_default_params();
|
|
154
237
|
|
|
155
238
|
// Get the model path
|
|
156
|
-
|
|
239
|
+
modelPath = info[0].As<Napi::String>().Utf8Value();
|
|
157
240
|
|
|
158
241
|
if (info.Length() > 1 && info[1].IsObject()) {
|
|
159
242
|
Napi::Object options = info[1].As<Napi::Object>();
|
|
160
243
|
|
|
244
|
+
if (options.Has("addonExports")) {
|
|
245
|
+
addonExportsRef = Napi::Persistent(options.Get("addonExports").As<Napi::Object>());
|
|
246
|
+
hasAddonExportsRef = true;
|
|
247
|
+
}
|
|
248
|
+
|
|
161
249
|
if (options.Has("gpuLayers")) {
|
|
162
250
|
model_params.n_gpu_layers = options.Get("gpuLayers").As<Napi::Number>().Int32Value();
|
|
163
251
|
}
|
|
@@ -173,13 +261,37 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
|
|
|
173
261
|
if (options.Has("useMlock")) {
|
|
174
262
|
model_params.use_mlock = options.Get("useMlock").As<Napi::Boolean>().Value();
|
|
175
263
|
}
|
|
176
|
-
}
|
|
177
264
|
|
|
178
|
-
|
|
265
|
+
if (options.Has("onLoadProgress")) {
|
|
266
|
+
auto onLoadProgressJSCallback = options.Get("onLoadProgress").As<Napi::Function>();
|
|
267
|
+
if (onLoadProgressJSCallback.IsFunction()) {
|
|
268
|
+
AddonThreadSafeProgressCallbackFunctionContext* context = new Napi::Reference<Napi::Value>(Napi::Persistent(info.This()));
|
|
269
|
+
addonThreadSafeOnLoadProgressEventCallback = AddonThreadSafeProgressEventCallbackFunction::New(
|
|
270
|
+
info.Env(),
|
|
271
|
+
onLoadProgressJSCallback,
|
|
272
|
+
"onLoadProgressCallback",
|
|
273
|
+
0,
|
|
274
|
+
1,
|
|
275
|
+
context,
|
|
276
|
+
[](Napi::Env, AddonModel* addonModel, AddonThreadSafeProgressCallbackFunctionContext* ctx) {
|
|
277
|
+
addonModel->onLoadProgressEventCallbackSet = false;
|
|
278
|
+
|
|
279
|
+
delete ctx;
|
|
280
|
+
},
|
|
281
|
+
this
|
|
282
|
+
);
|
|
283
|
+
onLoadProgressEventCallbackSet = true;
|
|
284
|
+
}
|
|
285
|
+
}
|
|
179
286
|
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
287
|
+
if (options.Has("hasLoadAbortSignal")) {
|
|
288
|
+
hasLoadAbortSignal = options.Get("hasLoadAbortSignal").As<Napi::Boolean>().Value();
|
|
289
|
+
}
|
|
290
|
+
|
|
291
|
+
if (onLoadProgressEventCallbackSet || hasLoadAbortSignal) {
|
|
292
|
+
model_params.progress_callback_user_data = &(*this);
|
|
293
|
+
model_params.progress_callback = llamaModelParamsProgressCallback;
|
|
294
|
+
}
|
|
183
295
|
}
|
|
184
296
|
}
|
|
185
297
|
|
|
@@ -192,23 +304,31 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
|
|
|
192
304
|
return;
|
|
193
305
|
}
|
|
194
306
|
|
|
195
|
-
llama_free_model(model);
|
|
196
307
|
disposed = true;
|
|
197
|
-
|
|
308
|
+
if (modelLoaded) {
|
|
309
|
+
modelLoaded = false;
|
|
310
|
+
llama_free_model(model);
|
|
198
311
|
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
return info.Env().Undefined();
|
|
312
|
+
adjustNapiExternalMemorySubtract(Env(), loadedModelSize);
|
|
313
|
+
loadedModelSize = 0;
|
|
202
314
|
}
|
|
203
315
|
|
|
204
|
-
|
|
316
|
+
if (hasAddonExportsRef) {
|
|
317
|
+
addonExportsRef.Unref();
|
|
318
|
+
hasAddonExportsRef = false;
|
|
319
|
+
}
|
|
320
|
+
}
|
|
205
321
|
|
|
322
|
+
Napi::Value Init(const Napi::CallbackInfo& info);
|
|
323
|
+
Napi::Value AbortActiveModelLoad(const Napi::CallbackInfo& info) {
|
|
324
|
+
abortModelLoad = true;
|
|
206
325
|
return info.Env().Undefined();
|
|
207
326
|
}
|
|
327
|
+
Napi::Value Dispose(const Napi::CallbackInfo& info);
|
|
208
328
|
|
|
209
329
|
Napi::Value Tokenize(const Napi::CallbackInfo& info) {
|
|
210
330
|
if (disposed) {
|
|
211
|
-
Napi::Error::New(info.Env(), "
|
|
331
|
+
Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
|
|
212
332
|
return info.Env().Undefined();
|
|
213
333
|
}
|
|
214
334
|
|
|
@@ -226,7 +346,7 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
|
|
|
226
346
|
}
|
|
227
347
|
Napi::Value Detokenize(const Napi::CallbackInfo& info) {
|
|
228
348
|
if (disposed) {
|
|
229
|
-
Napi::Error::New(info.Env(), "
|
|
349
|
+
Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
|
|
230
350
|
return info.Env().Undefined();
|
|
231
351
|
}
|
|
232
352
|
|
|
@@ -251,7 +371,7 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
|
|
|
251
371
|
|
|
252
372
|
Napi::Value GetTrainContextSize(const Napi::CallbackInfo& info) {
|
|
253
373
|
if (disposed) {
|
|
254
|
-
Napi::Error::New(info.Env(), "
|
|
374
|
+
Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
|
|
255
375
|
return info.Env().Undefined();
|
|
256
376
|
}
|
|
257
377
|
|
|
@@ -260,7 +380,7 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
|
|
|
260
380
|
|
|
261
381
|
Napi::Value GetEmbeddingVectorSize(const Napi::CallbackInfo& info) {
|
|
262
382
|
if (disposed) {
|
|
263
|
-
Napi::Error::New(info.Env(), "
|
|
383
|
+
Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
|
|
264
384
|
return info.Env().Undefined();
|
|
265
385
|
}
|
|
266
386
|
|
|
@@ -269,7 +389,7 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
|
|
|
269
389
|
|
|
270
390
|
Napi::Value GetTotalSize(const Napi::CallbackInfo& info) {
|
|
271
391
|
if (disposed) {
|
|
272
|
-
Napi::Error::New(info.Env(), "
|
|
392
|
+
Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
|
|
273
393
|
return info.Env().Undefined();
|
|
274
394
|
}
|
|
275
395
|
|
|
@@ -278,7 +398,7 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
|
|
|
278
398
|
|
|
279
399
|
Napi::Value GetTotalParameters(const Napi::CallbackInfo& info) {
|
|
280
400
|
if (disposed) {
|
|
281
|
-
Napi::Error::New(info.Env(), "
|
|
401
|
+
Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
|
|
282
402
|
return info.Env().Undefined();
|
|
283
403
|
}
|
|
284
404
|
|
|
@@ -287,7 +407,7 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
|
|
|
287
407
|
|
|
288
408
|
Napi::Value GetModelDescription(const Napi::CallbackInfo& info) {
|
|
289
409
|
if (disposed) {
|
|
290
|
-
Napi::Error::New(info.Env(), "
|
|
410
|
+
Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
|
|
291
411
|
return info.Env().Undefined();
|
|
292
412
|
}
|
|
293
413
|
|
|
@@ -299,7 +419,7 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
|
|
|
299
419
|
|
|
300
420
|
Napi::Value TokenBos(const Napi::CallbackInfo& info) {
|
|
301
421
|
if (disposed) {
|
|
302
|
-
Napi::Error::New(info.Env(), "
|
|
422
|
+
Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
|
|
303
423
|
return info.Env().Undefined();
|
|
304
424
|
}
|
|
305
425
|
|
|
@@ -307,7 +427,7 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
|
|
|
307
427
|
}
|
|
308
428
|
Napi::Value TokenEos(const Napi::CallbackInfo& info) {
|
|
309
429
|
if (disposed) {
|
|
310
|
-
Napi::Error::New(info.Env(), "
|
|
430
|
+
Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
|
|
311
431
|
return info.Env().Undefined();
|
|
312
432
|
}
|
|
313
433
|
|
|
@@ -315,7 +435,7 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
|
|
|
315
435
|
}
|
|
316
436
|
Napi::Value TokenNl(const Napi::CallbackInfo& info) {
|
|
317
437
|
if (disposed) {
|
|
318
|
-
Napi::Error::New(info.Env(), "
|
|
438
|
+
Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
|
|
319
439
|
return info.Env().Undefined();
|
|
320
440
|
}
|
|
321
441
|
|
|
@@ -323,7 +443,7 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
|
|
|
323
443
|
}
|
|
324
444
|
Napi::Value PrefixToken(const Napi::CallbackInfo& info) {
|
|
325
445
|
if (disposed) {
|
|
326
|
-
Napi::Error::New(info.Env(), "
|
|
446
|
+
Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
|
|
327
447
|
return info.Env().Undefined();
|
|
328
448
|
}
|
|
329
449
|
|
|
@@ -331,7 +451,7 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
|
|
|
331
451
|
}
|
|
332
452
|
Napi::Value MiddleToken(const Napi::CallbackInfo& info) {
|
|
333
453
|
if (disposed) {
|
|
334
|
-
Napi::Error::New(info.Env(), "
|
|
454
|
+
Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
|
|
335
455
|
return info.Env().Undefined();
|
|
336
456
|
}
|
|
337
457
|
|
|
@@ -339,7 +459,7 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
|
|
|
339
459
|
}
|
|
340
460
|
Napi::Value SuffixToken(const Napi::CallbackInfo& info) {
|
|
341
461
|
if (disposed) {
|
|
342
|
-
Napi::Error::New(info.Env(), "
|
|
462
|
+
Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
|
|
343
463
|
return info.Env().Undefined();
|
|
344
464
|
}
|
|
345
465
|
|
|
@@ -347,7 +467,7 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
|
|
|
347
467
|
}
|
|
348
468
|
Napi::Value EotToken(const Napi::CallbackInfo& info) {
|
|
349
469
|
if (disposed) {
|
|
350
|
-
Napi::Error::New(info.Env(), "
|
|
470
|
+
Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
|
|
351
471
|
return info.Env().Undefined();
|
|
352
472
|
}
|
|
353
473
|
|
|
@@ -355,7 +475,7 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
|
|
|
355
475
|
}
|
|
356
476
|
Napi::Value GetTokenString(const Napi::CallbackInfo& info) {
|
|
357
477
|
if (disposed) {
|
|
358
|
-
Napi::Error::New(info.Env(), "
|
|
478
|
+
Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
|
|
359
479
|
return info.Env().Undefined();
|
|
360
480
|
}
|
|
361
481
|
|
|
@@ -374,7 +494,7 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
|
|
|
374
494
|
|
|
375
495
|
Napi::Value GetTokenType(const Napi::CallbackInfo& info) {
|
|
376
496
|
if (disposed) {
|
|
377
|
-
Napi::Error::New(info.Env(), "
|
|
497
|
+
Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
|
|
378
498
|
return info.Env().Undefined();
|
|
379
499
|
}
|
|
380
500
|
|
|
@@ -402,6 +522,8 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
|
|
|
402
522
|
exports.Env(),
|
|
403
523
|
"AddonModel",
|
|
404
524
|
{
|
|
525
|
+
InstanceMethod("init", &AddonModel::Init),
|
|
526
|
+
InstanceMethod("abortActiveModelLoad", &AddonModel::AbortActiveModelLoad),
|
|
405
527
|
InstanceMethod("tokenize", &AddonModel::Tokenize),
|
|
406
528
|
InstanceMethod("detokenize", &AddonModel::Detokenize),
|
|
407
529
|
InstanceMethod("getTrainContextSize", &AddonModel::GetTrainContextSize),
|
|
@@ -426,9 +548,166 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
|
|
|
426
548
|
}
|
|
427
549
|
};
|
|
428
550
|
|
|
551
|
+
static bool llamaModelParamsProgressCallback(float progress, void * user_data) {
|
|
552
|
+
AddonModel* addonModel = (AddonModel *) user_data;
|
|
553
|
+
unsigned percentage = (unsigned) (100 * progress);
|
|
554
|
+
|
|
555
|
+
if (percentage > addonModel->modelLoadPercentage) {
|
|
556
|
+
addonModel->modelLoadPercentage = percentage;
|
|
557
|
+
|
|
558
|
+
// original llama.cpp logs
|
|
559
|
+
addonLlamaCppLogCallback(GGML_LOG_LEVEL_INFO, ".", nullptr);
|
|
560
|
+
if (percentage >= 100) {
|
|
561
|
+
addonLlamaCppLogCallback(GGML_LOG_LEVEL_INFO, "\n", nullptr);
|
|
562
|
+
}
|
|
563
|
+
}
|
|
564
|
+
|
|
565
|
+
if (progress > addonModel->rawModelLoadPercentage) {
|
|
566
|
+
addonModel->rawModelLoadPercentage = progress;
|
|
567
|
+
|
|
568
|
+
if (addonModel->onLoadProgressEventCallbackSet) {
|
|
569
|
+
addon_progress_event* data = new addon_progress_event {
|
|
570
|
+
progress
|
|
571
|
+
};
|
|
572
|
+
|
|
573
|
+
auto status = addonModel->addonThreadSafeOnLoadProgressEventCallback.NonBlockingCall(data);
|
|
574
|
+
|
|
575
|
+
if (status != napi_ok) {
|
|
576
|
+
delete data;
|
|
577
|
+
}
|
|
578
|
+
}
|
|
579
|
+
}
|
|
580
|
+
|
|
581
|
+
return !(addonModel->abortModelLoad);
|
|
582
|
+
}
|
|
583
|
+
|
|
584
|
+
class AddonModelLoadModelWorker : public Napi::AsyncWorker {
|
|
585
|
+
public:
|
|
586
|
+
AddonModel* model;
|
|
587
|
+
|
|
588
|
+
AddonModelLoadModelWorker(const Napi::Env& env, AddonModel* model)
|
|
589
|
+
: Napi::AsyncWorker(env, "AddonModelLoadModelWorker"),
|
|
590
|
+
model(model),
|
|
591
|
+
deferred(Napi::Promise::Deferred::New(env)) {
|
|
592
|
+
model->Ref();
|
|
593
|
+
}
|
|
594
|
+
~AddonModelLoadModelWorker() {
|
|
595
|
+
model->Unref();
|
|
596
|
+
}
|
|
597
|
+
|
|
598
|
+
Napi::Promise GetPromise() {
|
|
599
|
+
return deferred.Promise();
|
|
600
|
+
}
|
|
601
|
+
|
|
602
|
+
protected:
|
|
603
|
+
Napi::Promise::Deferred deferred;
|
|
604
|
+
|
|
605
|
+
void Execute() {
|
|
606
|
+
try {
|
|
607
|
+
model->model = llama_load_model_from_file(model->modelPath.c_str(), model->model_params);
|
|
608
|
+
|
|
609
|
+
model->modelLoaded = model->model != nullptr && model->model != NULL;
|
|
610
|
+
} catch (const std::exception& e) {
|
|
611
|
+
SetError(e.what());
|
|
612
|
+
} catch(...) {
|
|
613
|
+
SetError("Unknown error when calling \"llama_load_model_from_file\"");
|
|
614
|
+
}
|
|
615
|
+
}
|
|
616
|
+
void OnOK() {
|
|
617
|
+
if (model->modelLoaded) {
|
|
618
|
+
uint64_t modelSize = llama_model_size(model->model);
|
|
619
|
+
adjustNapiExternalMemoryAdd(Env(), modelSize);
|
|
620
|
+
model->loadedModelSize = modelSize;
|
|
621
|
+
}
|
|
622
|
+
|
|
623
|
+
deferred.Resolve(Napi::Boolean::New(Env(), model->modelLoaded));
|
|
624
|
+
if (model->onLoadProgressEventCallbackSet) {
|
|
625
|
+
model->addonThreadSafeOnLoadProgressEventCallback.Release();
|
|
626
|
+
}
|
|
627
|
+
}
|
|
628
|
+
void OnError(const Napi::Error& err) {
|
|
629
|
+
deferred.Reject(err.Value());
|
|
630
|
+
}
|
|
631
|
+
};
|
|
632
|
+
class AddonModelUnloadModelWorker : public Napi::AsyncWorker {
|
|
633
|
+
public:
|
|
634
|
+
AddonModel* model;
|
|
635
|
+
|
|
636
|
+
AddonModelUnloadModelWorker(const Napi::Env& env, AddonModel* model)
|
|
637
|
+
: Napi::AsyncWorker(env, "AddonModelUnloadModelWorker"),
|
|
638
|
+
model(model),
|
|
639
|
+
deferred(Napi::Promise::Deferred::New(env)) {
|
|
640
|
+
model->Ref();
|
|
641
|
+
}
|
|
642
|
+
~AddonModelUnloadModelWorker() {
|
|
643
|
+
model->Unref();
|
|
644
|
+
}
|
|
645
|
+
|
|
646
|
+
Napi::Promise GetPromise() {
|
|
647
|
+
return deferred.Promise();
|
|
648
|
+
}
|
|
649
|
+
|
|
650
|
+
protected:
|
|
651
|
+
Napi::Promise::Deferred deferred;
|
|
652
|
+
|
|
653
|
+
void Execute() {
|
|
654
|
+
try {
|
|
655
|
+
llama_free_model(model->model);
|
|
656
|
+
model->modelLoaded = false;
|
|
657
|
+
|
|
658
|
+
model->dispose();
|
|
659
|
+
} catch (const std::exception& e) {
|
|
660
|
+
SetError(e.what());
|
|
661
|
+
} catch(...) {
|
|
662
|
+
SetError("Unknown error when calling \"llama_free_model\"");
|
|
663
|
+
}
|
|
664
|
+
}
|
|
665
|
+
void OnOK() {
|
|
666
|
+
adjustNapiExternalMemorySubtract(Env(), model->loadedModelSize);
|
|
667
|
+
model->loadedModelSize = 0;
|
|
668
|
+
|
|
669
|
+
deferred.Resolve(Env().Undefined());
|
|
670
|
+
}
|
|
671
|
+
void OnError(const Napi::Error& err) {
|
|
672
|
+
deferred.Reject(err.Value());
|
|
673
|
+
}
|
|
674
|
+
};
|
|
675
|
+
|
|
676
|
+
Napi::Value AddonModel::Init(const Napi::CallbackInfo& info) {
|
|
677
|
+
if (disposed) {
|
|
678
|
+
Napi::Error::New(info.Env(), "Model is disposed").ThrowAsJavaScriptException();
|
|
679
|
+
return info.Env().Undefined();
|
|
680
|
+
}
|
|
681
|
+
|
|
682
|
+
AddonModelLoadModelWorker* worker = new AddonModelLoadModelWorker(this->Env(), this);
|
|
683
|
+
worker->Queue();
|
|
684
|
+
return worker->GetPromise();
|
|
685
|
+
}
|
|
686
|
+
Napi::Value AddonModel::Dispose(const Napi::CallbackInfo& info) {
|
|
687
|
+
if (disposed) {
|
|
688
|
+
return info.Env().Undefined();
|
|
689
|
+
}
|
|
690
|
+
|
|
691
|
+
if (modelLoaded) {
|
|
692
|
+
modelLoaded = false;
|
|
693
|
+
|
|
694
|
+
AddonModelUnloadModelWorker* worker = new AddonModelUnloadModelWorker(this->Env(), this);
|
|
695
|
+
worker->Queue();
|
|
696
|
+
return worker->GetPromise();
|
|
697
|
+
} else {
|
|
698
|
+
dispose();
|
|
699
|
+
|
|
700
|
+
Napi::Promise::Deferred deferred = Napi::Promise::Deferred::New(info.Env());
|
|
701
|
+
deferred.Resolve(info.Env().Undefined());
|
|
702
|
+
return deferred.Promise();
|
|
703
|
+
}
|
|
704
|
+
}
|
|
705
|
+
|
|
429
706
|
class AddonGrammar : public Napi::ObjectWrap<AddonGrammar> {
|
|
430
707
|
public:
|
|
431
708
|
grammar_parser::parse_state parsed_grammar;
|
|
709
|
+
Napi::Reference<Napi::Object> addonExportsRef;
|
|
710
|
+
bool hasAddonExportsRef = false;
|
|
432
711
|
|
|
433
712
|
AddonGrammar(const Napi::CallbackInfo& info) : Napi::ObjectWrap<AddonGrammar>(info) {
|
|
434
713
|
// Get the model path
|
|
@@ -438,6 +717,11 @@ class AddonGrammar : public Napi::ObjectWrap<AddonGrammar> {
|
|
|
438
717
|
if (info.Length() > 1 && info[1].IsObject()) {
|
|
439
718
|
Napi::Object options = info[1].As<Napi::Object>();
|
|
440
719
|
|
|
720
|
+
if (options.Has("addonExports")) {
|
|
721
|
+
addonExportsRef = Napi::Persistent(options.Get("addonExports").As<Napi::Object>());
|
|
722
|
+
hasAddonExportsRef = true;
|
|
723
|
+
}
|
|
724
|
+
|
|
441
725
|
if (options.Has("printGrammar")) {
|
|
442
726
|
should_print_grammar = options.Get("printGrammar").As<Napi::Boolean>().Value();
|
|
443
727
|
}
|
|
@@ -455,6 +739,13 @@ class AddonGrammar : public Napi::ObjectWrap<AddonGrammar> {
|
|
|
455
739
|
}
|
|
456
740
|
}
|
|
457
741
|
|
|
742
|
+
~AddonGrammar() {
|
|
743
|
+
if (hasAddonExportsRef) {
|
|
744
|
+
addonExportsRef.Unref();
|
|
745
|
+
hasAddonExportsRef = false;
|
|
746
|
+
}
|
|
747
|
+
}
|
|
748
|
+
|
|
458
749
|
static void init(Napi::Object exports) {
|
|
459
750
|
exports.Set("AddonGrammar", DefineClass(exports.Env(), "AddonGrammar", {}));
|
|
460
751
|
}
|
|
@@ -493,9 +784,14 @@ class AddonContext : public Napi::ObjectWrap<AddonContext> {
|
|
|
493
784
|
llama_context_params context_params;
|
|
494
785
|
llama_context* ctx;
|
|
495
786
|
llama_batch batch;
|
|
787
|
+
uint64_t batchMemorySize = 0;
|
|
496
788
|
bool has_batch = false;
|
|
497
789
|
int32_t batch_n_tokens = 0;
|
|
498
790
|
int n_cur = 0;
|
|
791
|
+
|
|
792
|
+
uint64_t loadedContextMemorySize = 0;
|
|
793
|
+
bool contextLoaded = false;
|
|
794
|
+
|
|
499
795
|
bool disposed = false;
|
|
500
796
|
|
|
501
797
|
AddonContext(const Napi::CallbackInfo& info) : Napi::ObjectWrap<AddonContext>(info) {
|
|
@@ -523,10 +819,11 @@ class AddonContext : public Napi::ObjectWrap<AddonContext> {
|
|
|
523
819
|
|
|
524
820
|
if (options.Has("batchSize")) {
|
|
525
821
|
context_params.n_batch = options.Get("batchSize").As<Napi::Number>().Uint32Value();
|
|
822
|
+
context_params.n_ubatch = context_params.n_batch; // the batch queue is managed in the JS side, so there's no need for managing it on the C++ side
|
|
526
823
|
}
|
|
527
824
|
|
|
528
|
-
if (options.Has("
|
|
529
|
-
context_params.
|
|
825
|
+
if (options.Has("embeddings")) {
|
|
826
|
+
context_params.embeddings = options.Get("embeddings").As<Napi::Boolean>().Value();
|
|
530
827
|
}
|
|
531
828
|
|
|
532
829
|
if (options.Has("threads")) {
|
|
@@ -537,9 +834,6 @@ class AddonContext : public Napi::ObjectWrap<AddonContext> {
|
|
|
537
834
|
context_params.n_threads_batch = resolved_n_threads;
|
|
538
835
|
}
|
|
539
836
|
}
|
|
540
|
-
|
|
541
|
-
ctx = llama_new_context_with_model(model->model, context_params);
|
|
542
|
-
Napi::MemoryManagement::AdjustExternalMemory(Env(), llama_get_state_size(ctx));
|
|
543
837
|
}
|
|
544
838
|
~AddonContext() {
|
|
545
839
|
dispose();
|
|
@@ -550,13 +844,18 @@ class AddonContext : public Napi::ObjectWrap<AddonContext> {
|
|
|
550
844
|
return;
|
|
551
845
|
}
|
|
552
846
|
|
|
553
|
-
|
|
554
|
-
|
|
847
|
+
disposed = true;
|
|
848
|
+
if (contextLoaded) {
|
|
849
|
+
contextLoaded = false;
|
|
850
|
+
llama_free(ctx);
|
|
851
|
+
|
|
852
|
+
adjustNapiExternalMemorySubtract(Env(), loadedContextMemorySize);
|
|
853
|
+
loadedContextMemorySize = 0;
|
|
854
|
+
}
|
|
855
|
+
|
|
555
856
|
model->Unref();
|
|
556
857
|
|
|
557
858
|
disposeBatch();
|
|
558
|
-
|
|
559
|
-
disposed = true;
|
|
560
859
|
}
|
|
561
860
|
void disposeBatch() {
|
|
562
861
|
if (!has_batch) {
|
|
@@ -566,16 +865,14 @@ class AddonContext : public Napi::ObjectWrap<AddonContext> {
|
|
|
566
865
|
llama_batch_free(batch);
|
|
567
866
|
has_batch = false;
|
|
568
867
|
batch_n_tokens = 0;
|
|
868
|
+
|
|
869
|
+
adjustNapiExternalMemorySubtract(Env(), batchMemorySize);
|
|
870
|
+
batchMemorySize = 0;
|
|
569
871
|
}
|
|
570
|
-
Napi::Value Dispose(const Napi::CallbackInfo& info) {
|
|
571
|
-
if (disposed) {
|
|
572
|
-
return info.Env().Undefined();
|
|
573
|
-
}
|
|
574
872
|
|
|
575
|
-
|
|
873
|
+
Napi::Value Init(const Napi::CallbackInfo& info);
|
|
874
|
+
Napi::Value Dispose(const Napi::CallbackInfo& info);
|
|
576
875
|
|
|
577
|
-
return info.Env().Undefined();
|
|
578
|
-
}
|
|
579
876
|
Napi::Value GetContextSize(const Napi::CallbackInfo& info) {
|
|
580
877
|
if (disposed) {
|
|
581
878
|
Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
|
|
@@ -600,6 +897,15 @@ class AddonContext : public Napi::ObjectWrap<AddonContext> {
|
|
|
600
897
|
has_batch = true;
|
|
601
898
|
batch_n_tokens = n_tokens;
|
|
602
899
|
|
|
900
|
+
uint64_t newBatchMemorySize = calculateBatchMemorySize(n_tokens, llama_n_embd(model->model), context_params.n_batch);
|
|
901
|
+
if (newBatchMemorySize > batchMemorySize) {
|
|
902
|
+
adjustNapiExternalMemoryAdd(Env(), newBatchMemorySize - batchMemorySize);
|
|
903
|
+
batchMemorySize = newBatchMemorySize;
|
|
904
|
+
} else if (newBatchMemorySize < batchMemorySize) {
|
|
905
|
+
adjustNapiExternalMemorySubtract(Env(), batchMemorySize - newBatchMemorySize);
|
|
906
|
+
batchMemorySize = newBatchMemorySize;
|
|
907
|
+
}
|
|
908
|
+
|
|
603
909
|
return info.Env().Undefined();
|
|
604
910
|
}
|
|
605
911
|
Napi::Value DisposeBatch(const Napi::CallbackInfo& info) {
|
|
@@ -648,7 +954,12 @@ class AddonContext : public Napi::ObjectWrap<AddonContext> {
|
|
|
648
954
|
|
|
649
955
|
int32_t sequenceId = info[0].As<Napi::Number>().Int32Value();
|
|
650
956
|
|
|
651
|
-
llama_kv_cache_seq_rm(ctx, sequenceId, -1, -1);
|
|
957
|
+
bool result = llama_kv_cache_seq_rm(ctx, sequenceId, -1, -1);
|
|
958
|
+
|
|
959
|
+
if (!result) {
|
|
960
|
+
Napi::Error::New(info.Env(), "Failed to dispose sequence").ThrowAsJavaScriptException();
|
|
961
|
+
return info.Env().Undefined();
|
|
962
|
+
}
|
|
652
963
|
|
|
653
964
|
return info.Env().Undefined();
|
|
654
965
|
}
|
|
@@ -662,9 +973,9 @@ class AddonContext : public Napi::ObjectWrap<AddonContext> {
|
|
|
662
973
|
int32_t startPos = info[1].As<Napi::Number>().Int32Value();
|
|
663
974
|
int32_t endPos = info[2].As<Napi::Number>().Int32Value();
|
|
664
975
|
|
|
665
|
-
llama_kv_cache_seq_rm(ctx, sequenceId, startPos, endPos);
|
|
976
|
+
bool result = llama_kv_cache_seq_rm(ctx, sequenceId, startPos, endPos);
|
|
666
977
|
|
|
667
|
-
return info.Env()
|
|
978
|
+
return Napi::Boolean::New(info.Env(), result);
|
|
668
979
|
}
|
|
669
980
|
Napi::Value ShiftSequenceTokenCells(const Napi::CallbackInfo& info) {
|
|
670
981
|
if (disposed) {
|
|
@@ -702,8 +1013,23 @@ class AddonContext : public Napi::ObjectWrap<AddonContext> {
|
|
|
702
1013
|
return info.Env().Undefined();
|
|
703
1014
|
}
|
|
704
1015
|
|
|
1016
|
+
int32_t inputTokensLength = info[0].As<Napi::Number>().Int32Value();
|
|
1017
|
+
|
|
1018
|
+
if (inputTokensLength <= 0) {
|
|
1019
|
+
Napi::Error::New(info.Env(), "Invalid input tokens length").ThrowAsJavaScriptException();
|
|
1020
|
+
return info.Env().Undefined();
|
|
1021
|
+
}
|
|
1022
|
+
|
|
705
1023
|
const int n_embd = llama_n_embd(model->model);
|
|
706
|
-
const auto* embeddings =
|
|
1024
|
+
const auto* embeddings = llama_get_embeddings_seq(ctx, 0);
|
|
1025
|
+
if (embeddings == NULL) {
|
|
1026
|
+
embeddings = llama_get_embeddings_ith(ctx, inputTokensLength - 1);
|
|
1027
|
+
|
|
1028
|
+
if (embeddings == NULL) {
|
|
1029
|
+
Napi::Error::New(info.Env(), std::string("Failed to get embeddings for token ") + std::to_string(inputTokensLength - 1)).ThrowAsJavaScriptException();
|
|
1030
|
+
return info.Env().Undefined();
|
|
1031
|
+
}
|
|
1032
|
+
}
|
|
707
1033
|
|
|
708
1034
|
Napi::Float64Array result = Napi::Float64Array::New(info.Env(), n_embd);
|
|
709
1035
|
for (size_t i = 0; i < n_embd; ++i) {
|
|
@@ -726,6 +1052,7 @@ class AddonContext : public Napi::ObjectWrap<AddonContext> {
|
|
|
726
1052
|
exports.Env(),
|
|
727
1053
|
"AddonContext",
|
|
728
1054
|
{
|
|
1055
|
+
InstanceMethod("init", &AddonContext::Init),
|
|
729
1056
|
InstanceMethod("getContextSize", &AddonContext::GetContextSize),
|
|
730
1057
|
InstanceMethod("initBatch", &AddonContext::InitBatch),
|
|
731
1058
|
InstanceMethod("addToBatch", &AddonContext::AddToBatch),
|
|
@@ -745,53 +1072,198 @@ class AddonContext : public Napi::ObjectWrap<AddonContext> {
|
|
|
745
1072
|
};
|
|
746
1073
|
|
|
747
1074
|
|
|
748
|
-
class AddonContextDecodeBatchWorker : Napi::AsyncWorker
|
|
1075
|
+
class AddonContextDecodeBatchWorker : public Napi::AsyncWorker {
|
|
749
1076
|
public:
|
|
750
1077
|
AddonContext* ctx;
|
|
751
1078
|
|
|
752
|
-
AddonContextDecodeBatchWorker(const Napi::
|
|
753
|
-
: Napi::AsyncWorker(
|
|
1079
|
+
AddonContextDecodeBatchWorker(const Napi::Env& env, AddonContext* ctx)
|
|
1080
|
+
: Napi::AsyncWorker(env, "AddonContextDecodeBatchWorker"),
|
|
754
1081
|
ctx(ctx),
|
|
755
|
-
Napi::Promise::Deferred(
|
|
1082
|
+
deferred(Napi::Promise::Deferred::New(env)) {
|
|
756
1083
|
ctx->Ref();
|
|
757
1084
|
}
|
|
758
1085
|
~AddonContextDecodeBatchWorker() {
|
|
759
1086
|
ctx->Unref();
|
|
760
1087
|
}
|
|
761
|
-
|
|
762
|
-
|
|
1088
|
+
|
|
1089
|
+
Napi::Promise GetPromise() {
|
|
1090
|
+
return deferred.Promise();
|
|
1091
|
+
}
|
|
763
1092
|
|
|
764
1093
|
protected:
|
|
1094
|
+
Napi::Promise::Deferred deferred;
|
|
1095
|
+
|
|
765
1096
|
void Execute() {
|
|
766
|
-
|
|
767
|
-
|
|
768
|
-
|
|
769
|
-
|
|
770
|
-
if (r
|
|
771
|
-
|
|
772
|
-
|
|
773
|
-
|
|
1097
|
+
try {
|
|
1098
|
+
// Perform the evaluation using llama_decode.
|
|
1099
|
+
int r = llama_decode(ctx->ctx, ctx->batch);
|
|
1100
|
+
|
|
1101
|
+
if (r != 0) {
|
|
1102
|
+
if (r == 1) {
|
|
1103
|
+
SetError("could not find a KV slot for the batch (try reducing the size of the batch or increase the context)");
|
|
1104
|
+
} else {
|
|
1105
|
+
SetError("Eval has failed");
|
|
1106
|
+
}
|
|
1107
|
+
|
|
1108
|
+
return;
|
|
774
1109
|
}
|
|
775
1110
|
|
|
776
|
-
|
|
1111
|
+
llama_synchronize(ctx->ctx);
|
|
1112
|
+
} catch (const std::exception& e) {
|
|
1113
|
+
SetError(e.what());
|
|
1114
|
+
} catch(...) {
|
|
1115
|
+
SetError("Unknown error when calling \"llama_decode\"");
|
|
777
1116
|
}
|
|
778
1117
|
}
|
|
779
1118
|
void OnOK() {
|
|
780
|
-
|
|
781
|
-
Napi::Promise::Deferred::Resolve(env.Undefined());
|
|
1119
|
+
deferred.Resolve(Env().Undefined());
|
|
782
1120
|
}
|
|
783
1121
|
void OnError(const Napi::Error& err) {
|
|
784
|
-
|
|
1122
|
+
deferred.Reject(err.Value());
|
|
785
1123
|
}
|
|
786
1124
|
};
|
|
787
1125
|
|
|
788
1126
|
Napi::Value AddonContext::DecodeBatch(const Napi::CallbackInfo& info) {
|
|
789
|
-
AddonContextDecodeBatchWorker* worker = new AddonContextDecodeBatchWorker(info, this);
|
|
1127
|
+
AddonContextDecodeBatchWorker* worker = new AddonContextDecodeBatchWorker(info.Env(), this);
|
|
790
1128
|
worker->Queue();
|
|
791
|
-
return worker->
|
|
1129
|
+
return worker->GetPromise();
|
|
792
1130
|
}
|
|
793
1131
|
|
|
794
|
-
class
|
|
1132
|
+
class AddonContextLoadContextWorker : public Napi::AsyncWorker {
|
|
1133
|
+
public:
|
|
1134
|
+
AddonContext* context;
|
|
1135
|
+
|
|
1136
|
+
AddonContextLoadContextWorker(const Napi::Env& env, AddonContext* context)
|
|
1137
|
+
: Napi::AsyncWorker(env, "AddonContextLoadContextWorker"),
|
|
1138
|
+
context(context),
|
|
1139
|
+
deferred(Napi::Promise::Deferred::New(env)) {
|
|
1140
|
+
context->Ref();
|
|
1141
|
+
}
|
|
1142
|
+
~AddonContextLoadContextWorker() {
|
|
1143
|
+
context->Unref();
|
|
1144
|
+
}
|
|
1145
|
+
|
|
1146
|
+
Napi::Promise GetPromise() {
|
|
1147
|
+
return deferred.Promise();
|
|
1148
|
+
}
|
|
1149
|
+
|
|
1150
|
+
protected:
|
|
1151
|
+
Napi::Promise::Deferred deferred;
|
|
1152
|
+
|
|
1153
|
+
void Execute() {
|
|
1154
|
+
try {
|
|
1155
|
+
context->ctx = llama_new_context_with_model(context->model->model, context->context_params);
|
|
1156
|
+
|
|
1157
|
+
context->contextLoaded = context->ctx != nullptr && context->ctx != NULL;
|
|
1158
|
+
} catch (const std::exception& e) {
|
|
1159
|
+
SetError(e.what());
|
|
1160
|
+
} catch(...) {
|
|
1161
|
+
SetError("Unknown error when calling \"llama_new_context_with_model\"");
|
|
1162
|
+
}
|
|
1163
|
+
}
|
|
1164
|
+
void OnOK() {
|
|
1165
|
+
if (context->contextLoaded) {
|
|
1166
|
+
uint64_t contextMemorySize = llama_get_state_size(context->ctx);
|
|
1167
|
+
adjustNapiExternalMemoryAdd(Env(), contextMemorySize);
|
|
1168
|
+
context->loadedContextMemorySize = contextMemorySize;
|
|
1169
|
+
}
|
|
1170
|
+
|
|
1171
|
+
deferred.Resolve(Napi::Boolean::New(Env(), context->contextLoaded));
|
|
1172
|
+
}
|
|
1173
|
+
void OnError(const Napi::Error& err) {
|
|
1174
|
+
deferred.Reject(err.Value());
|
|
1175
|
+
}
|
|
1176
|
+
};
|
|
1177
|
+
class AddonContextUnloadContextWorker : public Napi::AsyncWorker {
|
|
1178
|
+
public:
|
|
1179
|
+
AddonContext* context;
|
|
1180
|
+
|
|
1181
|
+
AddonContextUnloadContextWorker(const Napi::Env& env, AddonContext* context)
|
|
1182
|
+
: Napi::AsyncWorker(env, "AddonContextUnloadContextWorker"),
|
|
1183
|
+
context(context),
|
|
1184
|
+
deferred(Napi::Promise::Deferred::New(env)) {
|
|
1185
|
+
context->Ref();
|
|
1186
|
+
}
|
|
1187
|
+
~AddonContextUnloadContextWorker() {
|
|
1188
|
+
context->Unref();
|
|
1189
|
+
}
|
|
1190
|
+
|
|
1191
|
+
Napi::Promise GetPromise() {
|
|
1192
|
+
return deferred.Promise();
|
|
1193
|
+
}
|
|
1194
|
+
|
|
1195
|
+
protected:
|
|
1196
|
+
Napi::Promise::Deferred deferred;
|
|
1197
|
+
|
|
1198
|
+
void Execute() {
|
|
1199
|
+
try {
|
|
1200
|
+
llama_free(context->ctx);
|
|
1201
|
+
context->contextLoaded = false;
|
|
1202
|
+
|
|
1203
|
+
try {
|
|
1204
|
+
if (context->has_batch) {
|
|
1205
|
+
llama_batch_free(context->batch);
|
|
1206
|
+
context->has_batch = false;
|
|
1207
|
+
context->batch_n_tokens = 0;
|
|
1208
|
+
}
|
|
1209
|
+
|
|
1210
|
+
context->dispose();
|
|
1211
|
+
} catch (const std::exception& e) {
|
|
1212
|
+
SetError(e.what());
|
|
1213
|
+
} catch(...) {
|
|
1214
|
+
SetError("Unknown error when calling \"llama_batch_free\"");
|
|
1215
|
+
}
|
|
1216
|
+
} catch (const std::exception& e) {
|
|
1217
|
+
SetError(e.what());
|
|
1218
|
+
} catch(...) {
|
|
1219
|
+
SetError("Unknown error when calling \"llama_free\"");
|
|
1220
|
+
}
|
|
1221
|
+
}
|
|
1222
|
+
void OnOK() {
|
|
1223
|
+
adjustNapiExternalMemorySubtract(Env(), context->loadedContextMemorySize);
|
|
1224
|
+
context->loadedContextMemorySize = 0;
|
|
1225
|
+
|
|
1226
|
+
adjustNapiExternalMemorySubtract(Env(), context->batchMemorySize);
|
|
1227
|
+
context->batchMemorySize = 0;
|
|
1228
|
+
|
|
1229
|
+
deferred.Resolve(Env().Undefined());
|
|
1230
|
+
}
|
|
1231
|
+
void OnError(const Napi::Error& err) {
|
|
1232
|
+
deferred.Reject(err.Value());
|
|
1233
|
+
}
|
|
1234
|
+
};
|
|
1235
|
+
|
|
1236
|
+
Napi::Value AddonContext::Init(const Napi::CallbackInfo& info) {
|
|
1237
|
+
if (disposed) {
|
|
1238
|
+
Napi::Error::New(info.Env(), "Context is disposed").ThrowAsJavaScriptException();
|
|
1239
|
+
return info.Env().Undefined();
|
|
1240
|
+
}
|
|
1241
|
+
|
|
1242
|
+
AddonContextLoadContextWorker* worker = new AddonContextLoadContextWorker(this->Env(), this);
|
|
1243
|
+
worker->Queue();
|
|
1244
|
+
return worker->GetPromise();
|
|
1245
|
+
}
|
|
1246
|
+
Napi::Value AddonContext::Dispose(const Napi::CallbackInfo& info) {
|
|
1247
|
+
if (disposed) {
|
|
1248
|
+
return info.Env().Undefined();
|
|
1249
|
+
}
|
|
1250
|
+
|
|
1251
|
+
if (contextLoaded) {
|
|
1252
|
+
contextLoaded = false;
|
|
1253
|
+
|
|
1254
|
+
AddonContextUnloadContextWorker* worker = new AddonContextUnloadContextWorker(this->Env(), this);
|
|
1255
|
+
worker->Queue();
|
|
1256
|
+
return worker->GetPromise();
|
|
1257
|
+
} else {
|
|
1258
|
+
dispose();
|
|
1259
|
+
|
|
1260
|
+
Napi::Promise::Deferred deferred = Napi::Promise::Deferred::New(info.Env());
|
|
1261
|
+
deferred.Resolve(info.Env().Undefined());
|
|
1262
|
+
return deferred.Promise();
|
|
1263
|
+
}
|
|
1264
|
+
}
|
|
1265
|
+
|
|
1266
|
+
class AddonContextSampleTokenWorker : public Napi::AsyncWorker {
|
|
795
1267
|
public:
|
|
796
1268
|
AddonContext* ctx;
|
|
797
1269
|
AddonGrammarEvaluationState* grammar_evaluation_state;
|
|
@@ -811,7 +1283,7 @@ class AddonContextSampleTokenWorker : Napi::AsyncWorker, Napi::Promise::Deferred
|
|
|
811
1283
|
AddonContextSampleTokenWorker(const Napi::CallbackInfo& info, AddonContext* ctx)
|
|
812
1284
|
: Napi::AsyncWorker(info.Env(), "AddonContextSampleTokenWorker"),
|
|
813
1285
|
ctx(ctx),
|
|
814
|
-
Napi::Promise::Deferred(info.Env()) {
|
|
1286
|
+
deferred(Napi::Promise::Deferred::New(info.Env())) {
|
|
815
1287
|
ctx->Ref();
|
|
816
1288
|
|
|
817
1289
|
batchLogitIndex = info[0].As<Napi::Number>().Int32Value();
|
|
@@ -874,11 +1346,25 @@ class AddonContextSampleTokenWorker : Napi::AsyncWorker, Napi::Promise::Deferred
|
|
|
874
1346
|
use_grammar = false;
|
|
875
1347
|
}
|
|
876
1348
|
}
|
|
877
|
-
|
|
878
|
-
|
|
1349
|
+
|
|
1350
|
+
Napi::Promise GetPromise() {
|
|
1351
|
+
return deferred.Promise();
|
|
1352
|
+
}
|
|
879
1353
|
|
|
880
1354
|
protected:
|
|
1355
|
+
Napi::Promise::Deferred deferred;
|
|
1356
|
+
|
|
881
1357
|
void Execute() {
|
|
1358
|
+
try {
|
|
1359
|
+
SampleToken();
|
|
1360
|
+
} catch (const std::exception& e) {
|
|
1361
|
+
SetError(e.what());
|
|
1362
|
+
} catch(...) {
|
|
1363
|
+
SetError("Unknown error when calling \"SampleToken\"");
|
|
1364
|
+
}
|
|
1365
|
+
}
|
|
1366
|
+
|
|
1367
|
+
void SampleToken() {
|
|
882
1368
|
llama_token new_token_id = 0;
|
|
883
1369
|
|
|
884
1370
|
// Select the best prediction.
|
|
@@ -940,19 +1426,18 @@ class AddonContextSampleTokenWorker : Napi::AsyncWorker, Napi::Promise::Deferred
|
|
|
940
1426
|
result = new_token_id;
|
|
941
1427
|
}
|
|
942
1428
|
void OnOK() {
|
|
943
|
-
Napi::
|
|
944
|
-
|
|
945
|
-
Napi::Promise::Deferred::Resolve(resultValue);
|
|
1429
|
+
Napi::Number resultValue = Napi::Number::New(Env(), static_cast<uint32_t>(result));
|
|
1430
|
+
deferred.Resolve(resultValue);
|
|
946
1431
|
}
|
|
947
1432
|
void OnError(const Napi::Error& err) {
|
|
948
|
-
|
|
1433
|
+
deferred.Reject(err.Value());
|
|
949
1434
|
}
|
|
950
1435
|
};
|
|
951
1436
|
|
|
952
1437
|
Napi::Value AddonContext::SampleToken(const Napi::CallbackInfo& info) {
|
|
953
1438
|
AddonContextSampleTokenWorker* worker = new AddonContextSampleTokenWorker(info, this);
|
|
954
1439
|
worker->Queue();
|
|
955
|
-
return worker->
|
|
1440
|
+
return worker->GetPromise();
|
|
956
1441
|
}
|
|
957
1442
|
|
|
958
1443
|
Napi::Value systemInfo(const Napi::CallbackInfo& info) {
|
|
@@ -1025,6 +1510,9 @@ static void addonLlamaCppLogCallback(ggml_log_level level, const char* text, voi
|
|
|
1025
1510
|
|
|
1026
1511
|
if (status == napi_ok) {
|
|
1027
1512
|
return;
|
|
1513
|
+
} else {
|
|
1514
|
+
delete stringStream;
|
|
1515
|
+
delete data;
|
|
1028
1516
|
}
|
|
1029
1517
|
}
|
|
1030
1518
|
|
|
@@ -1082,21 +1570,123 @@ Napi::Value setLoggerLogLevel(const Napi::CallbackInfo& info) {
|
|
|
1082
1570
|
return info.Env().Undefined();
|
|
1083
1571
|
}
|
|
1084
1572
|
|
|
1573
|
+
class AddonBackendLoadWorker : public Napi::AsyncWorker {
|
|
1574
|
+
public:
|
|
1575
|
+
AddonBackendLoadWorker(const Napi::Env& env)
|
|
1576
|
+
: Napi::AsyncWorker(env, "AddonBackendLoadWorker"),
|
|
1577
|
+
deferred(Napi::Promise::Deferred::New(env)) {
|
|
1578
|
+
}
|
|
1579
|
+
~AddonBackendLoadWorker() {
|
|
1580
|
+
}
|
|
1581
|
+
|
|
1582
|
+
Napi::Promise GetPromise() {
|
|
1583
|
+
return deferred.Promise();
|
|
1584
|
+
}
|
|
1585
|
+
|
|
1586
|
+
protected:
|
|
1587
|
+
Napi::Promise::Deferred deferred;
|
|
1588
|
+
|
|
1589
|
+
void Execute() {
|
|
1590
|
+
try {
|
|
1591
|
+
llama_backend_init();
|
|
1592
|
+
|
|
1593
|
+
try {
|
|
1594
|
+
if (backendDisposed) {
|
|
1595
|
+
llama_backend_free();
|
|
1596
|
+
} else {
|
|
1597
|
+
backendInitialized = true;
|
|
1598
|
+
}
|
|
1599
|
+
} catch (const std::exception& e) {
|
|
1600
|
+
SetError(e.what());
|
|
1601
|
+
} catch(...) {
|
|
1602
|
+
SetError("Unknown error when calling \"llama_backend_free\"");
|
|
1603
|
+
}
|
|
1604
|
+
} catch (const std::exception& e) {
|
|
1605
|
+
SetError(e.what());
|
|
1606
|
+
} catch(...) {
|
|
1607
|
+
SetError("Unknown error when calling \"llama_backend_init\"");
|
|
1608
|
+
}
|
|
1609
|
+
}
|
|
1610
|
+
void OnOK() {
|
|
1611
|
+
deferred.Resolve(Env().Undefined());
|
|
1612
|
+
}
|
|
1613
|
+
void OnError(const Napi::Error& err) {
|
|
1614
|
+
deferred.Reject(err.Value());
|
|
1615
|
+
}
|
|
1616
|
+
};
|
|
1617
|
+
|
|
1618
|
+
|
|
1619
|
+
class AddonBackendUnloadWorker : public Napi::AsyncWorker {
|
|
1620
|
+
public:
|
|
1621
|
+
AddonBackendUnloadWorker(const Napi::Env& env)
|
|
1622
|
+
: Napi::AsyncWorker(env, "AddonBackendUnloadWorker"),
|
|
1623
|
+
deferred(Napi::Promise::Deferred::New(env)) {
|
|
1624
|
+
}
|
|
1625
|
+
~AddonBackendUnloadWorker() {
|
|
1626
|
+
}
|
|
1627
|
+
|
|
1628
|
+
Napi::Promise GetPromise() {
|
|
1629
|
+
return deferred.Promise();
|
|
1630
|
+
}
|
|
1631
|
+
|
|
1632
|
+
protected:
|
|
1633
|
+
Napi::Promise::Deferred deferred;
|
|
1634
|
+
|
|
1635
|
+
void Execute() {
|
|
1636
|
+
try {
|
|
1637
|
+
if (backendInitialized) {
|
|
1638
|
+
backendInitialized = false;
|
|
1639
|
+
llama_backend_free();
|
|
1640
|
+
}
|
|
1641
|
+
} catch (const std::exception& e) {
|
|
1642
|
+
SetError(e.what());
|
|
1643
|
+
} catch(...) {
|
|
1644
|
+
SetError("Unknown error when calling \"llama_backend_free\"");
|
|
1645
|
+
}
|
|
1646
|
+
}
|
|
1647
|
+
void OnOK() {
|
|
1648
|
+
deferred.Resolve(Env().Undefined());
|
|
1649
|
+
}
|
|
1650
|
+
void OnError(const Napi::Error& err) {
|
|
1651
|
+
deferred.Reject(err.Value());
|
|
1652
|
+
}
|
|
1653
|
+
};
|
|
1654
|
+
|
|
1085
1655
|
Napi::Value addonInit(const Napi::CallbackInfo& info) {
|
|
1086
|
-
if (
|
|
1087
|
-
|
|
1088
|
-
|
|
1656
|
+
if (backendInitialized) {
|
|
1657
|
+
Napi::Promise::Deferred deferred = Napi::Promise::Deferred::New(info.Env());
|
|
1658
|
+
deferred.Resolve(info.Env().Undefined());
|
|
1659
|
+
return deferred.Promise();
|
|
1089
1660
|
}
|
|
1090
1661
|
|
|
1091
|
-
|
|
1662
|
+
AddonBackendLoadWorker* worker = new AddonBackendLoadWorker(info.Env());
|
|
1663
|
+
worker->Queue();
|
|
1664
|
+
return worker->GetPromise();
|
|
1665
|
+
}
|
|
1092
1666
|
|
|
1093
|
-
|
|
1667
|
+
Napi::Value addonDispose(const Napi::CallbackInfo& info) {
|
|
1668
|
+
if (backendDisposed) {
|
|
1669
|
+
Napi::Promise::Deferred deferred = Napi::Promise::Deferred::New(info.Env());
|
|
1670
|
+
deferred.Resolve(info.Env().Undefined());
|
|
1671
|
+
return deferred.Promise();
|
|
1672
|
+
}
|
|
1673
|
+
|
|
1674
|
+
backendDisposed = true;
|
|
1675
|
+
|
|
1676
|
+
AddonBackendUnloadWorker* worker = new AddonBackendUnloadWorker(info.Env());
|
|
1677
|
+
worker->Queue();
|
|
1678
|
+
return worker->GetPromise();
|
|
1094
1679
|
}
|
|
1095
1680
|
|
|
1096
1681
|
static void addonFreeLlamaBackend(Napi::Env env, int* data) {
|
|
1682
|
+
if (backendDisposed) {
|
|
1683
|
+
return;
|
|
1684
|
+
}
|
|
1685
|
+
|
|
1686
|
+
backendDisposed = true;
|
|
1097
1687
|
if (backendInitialized) {
|
|
1098
|
-
llama_backend_free();
|
|
1099
1688
|
backendInitialized = false;
|
|
1689
|
+
llama_backend_free();
|
|
1100
1690
|
}
|
|
1101
1691
|
}
|
|
1102
1692
|
|
|
@@ -1108,12 +1698,15 @@ Napi::Object registerCallback(Napi::Env env, Napi::Object exports) {
|
|
|
1108
1698
|
Napi::PropertyDescriptor::Function("getGpuVramInfo", getGpuVramInfo),
|
|
1109
1699
|
Napi::PropertyDescriptor::Function("getGpuType", getGpuType),
|
|
1110
1700
|
Napi::PropertyDescriptor::Function("init", addonInit),
|
|
1701
|
+
Napi::PropertyDescriptor::Function("dispose", addonDispose),
|
|
1111
1702
|
});
|
|
1112
1703
|
AddonModel::init(exports);
|
|
1113
1704
|
AddonGrammar::init(exports);
|
|
1114
1705
|
AddonGrammarEvaluationState::init(exports);
|
|
1115
1706
|
AddonContext::init(exports);
|
|
1116
1707
|
|
|
1708
|
+
llama_log_set(addonLlamaCppLogCallback, nullptr);
|
|
1709
|
+
|
|
1117
1710
|
exports.AddFinalizer(addonFreeLlamaBackend, static_cast<int*>(nullptr));
|
|
1118
1711
|
|
|
1119
1712
|
return exports;
|