@fugood/llama.node 0.4.3 → 0.4.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/darwin/arm64/llama-node.node +0 -0
- package/bin/darwin/x64/llama-node.node +0 -0
- package/bin/linux/arm64/llama-node.node +0 -0
- package/bin/linux/x64/llama-node.node +0 -0
- package/bin/linux-cuda/arm64/llama-node.node +0 -0
- package/bin/linux-cuda/x64/llama-node.node +0 -0
- package/bin/linux-vulkan/arm64/llama-node.node +0 -0
- package/bin/linux-vulkan/x64/llama-node.node +0 -0
- package/bin/win32/arm64/llama-node.node +0 -0
- package/bin/win32/arm64/node.lib +0 -0
- package/bin/win32/x64/llama-node.node +0 -0
- package/bin/win32/x64/node.lib +0 -0
- package/bin/win32-vulkan/arm64/llama-node.node +0 -0
- package/bin/win32-vulkan/arm64/node.lib +0 -0
- package/bin/win32-vulkan/x64/llama-node.node +0 -0
- package/bin/win32-vulkan/x64/node.lib +0 -0
- package/package.json +1 -1
- package/src/LlamaCompletionWorker.cpp +18 -0
- package/src/LlamaCompletionWorker.h +4 -0
- package/src/LlamaContext.cpp +82 -17
- package/src/addons.cc +21 -0
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
package/bin/win32/arm64/node.lib
CHANGED
|
Binary file
|
|
Binary file
|
package/bin/win32/x64/node.lib
CHANGED
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
package/package.json
CHANGED
|
@@ -507,6 +507,8 @@ void LlamaCompletionWorker::Execute() {
|
|
|
507
507
|
_result.tokens_predicted += 1;
|
|
508
508
|
n_input = 1;
|
|
509
509
|
if (_has_callback) {
|
|
510
|
+
// TODO: When we got possible stop words (startsWith)
|
|
511
|
+
// we should avoid calling the callback, wait for the next token
|
|
510
512
|
const char *c_token = strdup(token.c_str());
|
|
511
513
|
_tsfn.BlockingCall(c_token, [](Napi::Env env, Napi::Function jsCallback,
|
|
512
514
|
const char *value) {
|
|
@@ -518,6 +520,8 @@ void LlamaCompletionWorker::Execute() {
|
|
|
518
520
|
}
|
|
519
521
|
// is it an end of generation?
|
|
520
522
|
if (llama_vocab_is_eog(vocab, new_token_id)) {
|
|
523
|
+
_result.stopped_eos = true;
|
|
524
|
+
// TODO: EOS token should be cut
|
|
521
525
|
break;
|
|
522
526
|
}
|
|
523
527
|
// check for stop words
|
|
@@ -525,10 +529,16 @@ void LlamaCompletionWorker::Execute() {
|
|
|
525
529
|
const size_t stop_pos =
|
|
526
530
|
findStoppingStrings(_result.text, token.size(), _stop_words);
|
|
527
531
|
if (stop_pos != std::string::npos) {
|
|
532
|
+
_result.stopped_words = true;
|
|
533
|
+
_result.stopping_word = _result.text.substr(stop_pos, token.size());
|
|
534
|
+
_result.text = _result.text.substr(0, stop_pos - 1);
|
|
528
535
|
break;
|
|
529
536
|
}
|
|
530
537
|
}
|
|
531
538
|
}
|
|
539
|
+
if (!_result.stopped_eos && !_result.stopped_words) {
|
|
540
|
+
_result.stopped_limited = true;
|
|
541
|
+
}
|
|
532
542
|
const auto t_main_end = ggml_time_us();
|
|
533
543
|
_sess->get_mutex().unlock();
|
|
534
544
|
if (_onComplete) {
|
|
@@ -549,6 +559,14 @@ void LlamaCompletionWorker::OnOK() {
|
|
|
549
559
|
Napi::Boolean::New(env, _result.context_full));
|
|
550
560
|
result.Set("text",
|
|
551
561
|
Napi::String::New(env, _result.text.c_str()));
|
|
562
|
+
result.Set("stopped_eos",
|
|
563
|
+
Napi::Boolean::New(env, _result.stopped_eos));
|
|
564
|
+
result.Set("stopped_words",
|
|
565
|
+
Napi::Boolean::New(env, _result.stopped_words));
|
|
566
|
+
result.Set("stopping_word",
|
|
567
|
+
Napi::String::New(env, _result.stopping_word.c_str()));
|
|
568
|
+
result.Set("stopped_limited",
|
|
569
|
+
Napi::Boolean::New(env, _result.stopped_limited));
|
|
552
570
|
|
|
553
571
|
Napi::Array tool_calls = Napi::Array::New(Napi::AsyncWorker::Env());
|
|
554
572
|
std::string reasoning_content = "";
|
package/src/LlamaContext.cpp
CHANGED
|
@@ -12,6 +12,10 @@
|
|
|
12
12
|
#include "SaveSessionWorker.h"
|
|
13
13
|
#include "TokenizeWorker.h"
|
|
14
14
|
|
|
15
|
+
#include <mutex>
|
|
16
|
+
#include <queue>
|
|
17
|
+
#include <atomic>
|
|
18
|
+
|
|
15
19
|
// Helper function for formatted strings (for console logs)
|
|
16
20
|
template<typename ... Args>
|
|
17
21
|
static std::string format_string(const std::string& format, Args ... args) {
|
|
@@ -383,17 +387,60 @@ bool validateModelChatTemplate(const struct llama_model * model, const bool use_
|
|
|
383
387
|
return common_chat_verify_template(tmpl, use_jinja);
|
|
384
388
|
}
|
|
385
389
|
|
|
386
|
-
|
|
390
|
+
// Store log messages for processing
|
|
391
|
+
struct LogMessage {
|
|
392
|
+
std::string level;
|
|
393
|
+
std::string text;
|
|
394
|
+
};
|
|
395
|
+
|
|
396
|
+
// Global variables for logging
|
|
397
|
+
static Napi::ThreadSafeFunction g_tsfn;
|
|
398
|
+
static std::atomic<bool> g_logging_enabled{false};
|
|
399
|
+
static std::mutex g_mutex;
|
|
400
|
+
static std::queue<LogMessage> g_message_queue;
|
|
401
|
+
|
|
402
|
+
// Forward declaration of the cleanup function
|
|
403
|
+
extern "C" void cleanup_logging();
|
|
387
404
|
|
|
388
405
|
// toggleNativeLog(enable: boolean, callback: (log: string) => void): void
|
|
389
406
|
void LlamaContext::ToggleNativeLog(const Napi::CallbackInfo &info) {
|
|
407
|
+
Napi::Env env = info.Env();
|
|
390
408
|
bool enable = info[0].ToBoolean().Value();
|
|
409
|
+
|
|
391
410
|
if (enable) {
|
|
392
|
-
|
|
411
|
+
if (!info[1].IsFunction()) {
|
|
412
|
+
Napi::TypeError::New(env, "Callback function required").ThrowAsJavaScriptException();
|
|
413
|
+
return;
|
|
414
|
+
}
|
|
415
|
+
|
|
416
|
+
// First clean up existing thread-safe function if any
|
|
417
|
+
if (g_logging_enabled) {
|
|
418
|
+
g_tsfn.Release();
|
|
419
|
+
g_logging_enabled = false;
|
|
420
|
+
}
|
|
421
|
+
|
|
422
|
+
// Create thread-safe function that can be called from any thread
|
|
423
|
+
g_tsfn = Napi::ThreadSafeFunction::New(
|
|
424
|
+
env,
|
|
425
|
+
info[1].As<Napi::Function>(),
|
|
426
|
+
"LLAMA Logger",
|
|
427
|
+
0,
|
|
428
|
+
1,
|
|
429
|
+
[](Napi::Env) {
|
|
430
|
+
// Finalizer callback - nothing needed here
|
|
431
|
+
}
|
|
432
|
+
);
|
|
433
|
+
|
|
434
|
+
g_logging_enabled = true;
|
|
393
435
|
|
|
394
|
-
|
|
436
|
+
// Set up log callback
|
|
437
|
+
llama_log_set([](ggml_log_level level, const char* text, void* user_data) {
|
|
438
|
+
// First call the default logger
|
|
395
439
|
llama_log_callback_default(level, text, user_data);
|
|
396
440
|
|
|
441
|
+
if (!g_logging_enabled) return;
|
|
442
|
+
|
|
443
|
+
// Determine log level string
|
|
397
444
|
std::string level_str = "";
|
|
398
445
|
if (level == GGML_LOG_LEVEL_ERROR) {
|
|
399
446
|
level_str = "error";
|
|
@@ -402,24 +449,32 @@ void LlamaContext::ToggleNativeLog(const Napi::CallbackInfo &info) {
|
|
|
402
449
|
} else if (level == GGML_LOG_LEVEL_WARN) {
|
|
403
450
|
level_str = "warn";
|
|
404
451
|
}
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
|
|
413
|
-
Napi::String::New(env,
|
|
414
|
-
Napi::String::New(env, text)
|
|
452
|
+
|
|
453
|
+
// Create a heap-allocated copy of the data
|
|
454
|
+
auto* data = new LogMessage{level_str, text};
|
|
455
|
+
|
|
456
|
+
// Queue callback to be executed on the JavaScript thread
|
|
457
|
+
auto status = g_tsfn.BlockingCall(data, [](Napi::Env env, Napi::Function jsCallback, LogMessage* data) {
|
|
458
|
+
// This code runs on the JavaScript thread
|
|
459
|
+
jsCallback.Call({
|
|
460
|
+
Napi::String::New(env, data->level),
|
|
461
|
+
Napi::String::New(env, data->text)
|
|
415
462
|
});
|
|
416
|
-
|
|
417
|
-
|
|
463
|
+
delete data;
|
|
464
|
+
});
|
|
465
|
+
|
|
466
|
+
// If the call failed (e.g., runtime is shutting down), clean up the data
|
|
467
|
+
if (status != napi_ok) {
|
|
468
|
+
delete data;
|
|
418
469
|
}
|
|
419
470
|
}, nullptr);
|
|
420
471
|
} else {
|
|
421
|
-
|
|
422
|
-
|
|
472
|
+
// Disable logging
|
|
473
|
+
if (g_logging_enabled) {
|
|
474
|
+
g_logging_enabled = false;
|
|
475
|
+
g_tsfn.Release();
|
|
476
|
+
llama_log_set(llama_log_callback_default, nullptr);
|
|
477
|
+
}
|
|
423
478
|
}
|
|
424
479
|
}
|
|
425
480
|
|
|
@@ -1004,6 +1059,7 @@ Napi::Value LlamaContext::Release(const Napi::CallbackInfo &info) {
|
|
|
1004
1059
|
if (_wip != nullptr) {
|
|
1005
1060
|
_wip->SetStop();
|
|
1006
1061
|
}
|
|
1062
|
+
|
|
1007
1063
|
if (_sess == nullptr) {
|
|
1008
1064
|
auto promise = Napi::Promise::Deferred(env);
|
|
1009
1065
|
promise.Resolve(env.Undefined());
|
|
@@ -1020,6 +1076,15 @@ Napi::Value LlamaContext::Release(const Napi::CallbackInfo &info) {
|
|
|
1020
1076
|
return worker->Promise();
|
|
1021
1077
|
}
|
|
1022
1078
|
|
|
1079
|
+
// Cleanup function for the logging system
|
|
1080
|
+
// This is exposed externally for module cleanup
|
|
1081
|
+
extern "C" void cleanup_logging() {
|
|
1082
|
+
if (g_logging_enabled) {
|
|
1083
|
+
g_logging_enabled = false;
|
|
1084
|
+
g_tsfn.Release();
|
|
1085
|
+
}
|
|
1086
|
+
}
|
|
1087
|
+
|
|
1023
1088
|
LlamaContext::~LlamaContext() {
|
|
1024
1089
|
if (_mtmd_ctx != nullptr) {
|
|
1025
1090
|
mtmd_free(_mtmd_ctx);
|
package/src/addons.cc
CHANGED
|
@@ -1,8 +1,29 @@
|
|
|
1
1
|
#include "LlamaContext.h"
|
|
2
2
|
#include <napi.h>
|
|
3
3
|
|
|
4
|
+
// Forward declaration of our cleanup function
|
|
5
|
+
extern "C" void cleanup_logging();
|
|
6
|
+
|
|
7
|
+
// Register cleanup function on module unload
|
|
8
|
+
static Napi::Value register_cleanup(const Napi::CallbackInfo& info) {
|
|
9
|
+
napi_add_env_cleanup_hook(info.Env(), [](void*) {
|
|
10
|
+
cleanup_logging();
|
|
11
|
+
}, nullptr);
|
|
12
|
+
|
|
13
|
+
return info.Env().Undefined();
|
|
14
|
+
}
|
|
15
|
+
|
|
4
16
|
Napi::Object Init(Napi::Env env, Napi::Object exports) {
|
|
5
17
|
LlamaContext::Init(env, exports);
|
|
18
|
+
|
|
19
|
+
// Register our cleanup handler for module unload
|
|
20
|
+
exports.Set("__registerCleanup", Napi::Function::New(env, register_cleanup));
|
|
21
|
+
|
|
22
|
+
// Also register cleanup directly on module init
|
|
23
|
+
napi_add_env_cleanup_hook(env, [](void*) {
|
|
24
|
+
cleanup_logging();
|
|
25
|
+
}, nullptr);
|
|
26
|
+
|
|
6
27
|
return exports;
|
|
7
28
|
}
|
|
8
29
|
|