@fugood/llama.node 0.4.3 → 0.4.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@fugood/llama.node",
3
3
  "access": "public",
4
- "version": "0.4.3",
4
+ "version": "0.4.5",
5
5
  "description": "An another Node binding of llama.cpp",
6
6
  "main": "lib/index.js",
7
7
  "scripts": {
@@ -507,6 +507,8 @@ void LlamaCompletionWorker::Execute() {
507
507
  _result.tokens_predicted += 1;
508
508
  n_input = 1;
509
509
  if (_has_callback) {
510
+ // TODO: When we got possible stop words (startsWith)
511
+ // we should avoid calling the callback, wait for the next token
510
512
  const char *c_token = strdup(token.c_str());
511
513
  _tsfn.BlockingCall(c_token, [](Napi::Env env, Napi::Function jsCallback,
512
514
  const char *value) {
@@ -518,6 +520,8 @@ void LlamaCompletionWorker::Execute() {
518
520
  }
519
521
  // is it an end of generation?
520
522
  if (llama_vocab_is_eog(vocab, new_token_id)) {
523
+ _result.stopped_eos = true;
524
+ // TODO: EOS token should be cut
521
525
  break;
522
526
  }
523
527
  // check for stop words
@@ -525,10 +529,16 @@ void LlamaCompletionWorker::Execute() {
525
529
  const size_t stop_pos =
526
530
  findStoppingStrings(_result.text, token.size(), _stop_words);
527
531
  if (stop_pos != std::string::npos) {
532
+ _result.stopped_words = true;
533
+ _result.stopping_word = _result.text.substr(stop_pos, token.size());
534
+ _result.text = _result.text.substr(0, stop_pos - 1);
528
535
  break;
529
536
  }
530
537
  }
531
538
  }
539
+ if (!_result.stopped_eos && !_result.stopped_words) {
540
+ _result.stopped_limited = true;
541
+ }
532
542
  const auto t_main_end = ggml_time_us();
533
543
  _sess->get_mutex().unlock();
534
544
  if (_onComplete) {
@@ -549,6 +559,14 @@ void LlamaCompletionWorker::OnOK() {
549
559
  Napi::Boolean::New(env, _result.context_full));
550
560
  result.Set("text",
551
561
  Napi::String::New(env, _result.text.c_str()));
562
+ result.Set("stopped_eos",
563
+ Napi::Boolean::New(env, _result.stopped_eos));
564
+ result.Set("stopped_words",
565
+ Napi::Boolean::New(env, _result.stopped_words));
566
+ result.Set("stopping_word",
567
+ Napi::String::New(env, _result.stopping_word.c_str()));
568
+ result.Set("stopped_limited",
569
+ Napi::Boolean::New(env, _result.stopped_limited));
552
570
 
553
571
  Napi::Array tool_calls = Napi::Array::New(Napi::AsyncWorker::Env());
554
572
  std::string reasoning_content = "";
@@ -57,5 +57,9 @@ private:
57
57
  bool truncated = false;
58
58
  bool context_full = false;
59
59
  std::string text;
60
+ bool stopped_eos = false;
61
+ bool stopped_words = false;
62
+ std::string stopping_word;
63
+ bool stopped_limited = false;
60
64
  } _result;
61
65
  };
@@ -12,6 +12,10 @@
12
12
  #include "SaveSessionWorker.h"
13
13
  #include "TokenizeWorker.h"
14
14
 
15
+ #include <mutex>
16
+ #include <queue>
17
+ #include <atomic>
18
+
15
19
  // Helper function for formatted strings (for console logs)
16
20
  template<typename ... Args>
17
21
  static std::string format_string(const std::string& format, Args ... args) {
@@ -383,17 +387,60 @@ bool validateModelChatTemplate(const struct llama_model * model, const bool use_
383
387
  return common_chat_verify_template(tmpl, use_jinja);
384
388
  }
385
389
 
386
- static Napi::FunctionReference _log_callback;
390
+ // Store log messages for processing
391
+ struct LogMessage {
392
+ std::string level;
393
+ std::string text;
394
+ };
395
+
396
+ // Global variables for logging
397
+ static Napi::ThreadSafeFunction g_tsfn;
398
+ static std::atomic<bool> g_logging_enabled{false};
399
+ static std::mutex g_mutex;
400
+ static std::queue<LogMessage> g_message_queue;
401
+
402
+ // Forward declaration of the cleanup function
403
+ extern "C" void cleanup_logging();
387
404
 
388
405
  // toggleNativeLog(enable: boolean, callback: (log: string) => void): void
389
406
  void LlamaContext::ToggleNativeLog(const Napi::CallbackInfo &info) {
407
+ Napi::Env env = info.Env();
390
408
  bool enable = info[0].ToBoolean().Value();
409
+
391
410
  if (enable) {
392
- _log_callback.Reset(info[1].As<Napi::Function>());
411
+ if (!info[1].IsFunction()) {
412
+ Napi::TypeError::New(env, "Callback function required").ThrowAsJavaScriptException();
413
+ return;
414
+ }
415
+
416
+ // First clean up existing thread-safe function if any
417
+ if (g_logging_enabled) {
418
+ g_tsfn.Release();
419
+ g_logging_enabled = false;
420
+ }
421
+
422
+ // Create thread-safe function that can be called from any thread
423
+ g_tsfn = Napi::ThreadSafeFunction::New(
424
+ env,
425
+ info[1].As<Napi::Function>(),
426
+ "LLAMA Logger",
427
+ 0,
428
+ 1,
429
+ [](Napi::Env) {
430
+ // Finalizer callback - nothing needed here
431
+ }
432
+ );
433
+
434
+ g_logging_enabled = true;
393
435
 
394
- llama_log_set([](ggml_log_level level, const char * text, void * user_data) {
436
+ // Set up log callback
437
+ llama_log_set([](ggml_log_level level, const char* text, void* user_data) {
438
+ // First call the default logger
395
439
  llama_log_callback_default(level, text, user_data);
396
440
 
441
+ if (!g_logging_enabled) return;
442
+
443
+ // Determine log level string
397
444
  std::string level_str = "";
398
445
  if (level == GGML_LOG_LEVEL_ERROR) {
399
446
  level_str = "error";
@@ -402,24 +449,32 @@ void LlamaContext::ToggleNativeLog(const Napi::CallbackInfo &info) {
402
449
  } else if (level == GGML_LOG_LEVEL_WARN) {
403
450
  level_str = "warn";
404
451
  }
405
-
406
- if (_log_callback.IsEmpty()) {
407
- return;
408
- }
409
- try {
410
- Napi::Env env = _log_callback.Env();
411
- Napi::HandleScope scope(env);
412
- _log_callback.Call({
413
- Napi::String::New(env, level_str),
414
- Napi::String::New(env, text)
452
+
453
+ // Create a heap-allocated copy of the data
454
+ auto* data = new LogMessage{level_str, text};
455
+
456
+ // Queue callback to be executed on the JavaScript thread
457
+ auto status = g_tsfn.BlockingCall(data, [](Napi::Env env, Napi::Function jsCallback, LogMessage* data) {
458
+ // This code runs on the JavaScript thread
459
+ jsCallback.Call({
460
+ Napi::String::New(env, data->level),
461
+ Napi::String::New(env, data->text)
415
462
  });
416
- } catch (const std::exception &e) {
417
- // printf("Error calling log callback: %s\n", e.what());
463
+ delete data;
464
+ });
465
+
466
+ // If the call failed (e.g., runtime is shutting down), clean up the data
467
+ if (status != napi_ok) {
468
+ delete data;
418
469
  }
419
470
  }, nullptr);
420
471
  } else {
421
- _log_callback.Reset();
422
- llama_log_set(llama_log_callback_default, nullptr);
472
+ // Disable logging
473
+ if (g_logging_enabled) {
474
+ g_logging_enabled = false;
475
+ g_tsfn.Release();
476
+ llama_log_set(llama_log_callback_default, nullptr);
477
+ }
423
478
  }
424
479
  }
425
480
 
@@ -1004,6 +1059,7 @@ Napi::Value LlamaContext::Release(const Napi::CallbackInfo &info) {
1004
1059
  if (_wip != nullptr) {
1005
1060
  _wip->SetStop();
1006
1061
  }
1062
+
1007
1063
  if (_sess == nullptr) {
1008
1064
  auto promise = Napi::Promise::Deferred(env);
1009
1065
  promise.Resolve(env.Undefined());
@@ -1020,6 +1076,15 @@ Napi::Value LlamaContext::Release(const Napi::CallbackInfo &info) {
1020
1076
  return worker->Promise();
1021
1077
  }
1022
1078
 
1079
+ // Cleanup function for the logging system
1080
+ // This is exposed externally for module cleanup
1081
+ extern "C" void cleanup_logging() {
1082
+ if (g_logging_enabled) {
1083
+ g_logging_enabled = false;
1084
+ g_tsfn.Release();
1085
+ }
1086
+ }
1087
+
1023
1088
  LlamaContext::~LlamaContext() {
1024
1089
  if (_mtmd_ctx != nullptr) {
1025
1090
  mtmd_free(_mtmd_ctx);
package/src/addons.cc CHANGED
@@ -1,8 +1,29 @@
1
1
  #include "LlamaContext.h"
2
2
  #include <napi.h>
3
3
 
4
+ // Forward declaration of our cleanup function
5
+ extern "C" void cleanup_logging();
6
+
7
+ // Register cleanup function on module unload
8
+ static Napi::Value register_cleanup(const Napi::CallbackInfo& info) {
9
+ napi_add_env_cleanup_hook(info.Env(), [](void*) {
10
+ cleanup_logging();
11
+ }, nullptr);
12
+
13
+ return info.Env().Undefined();
14
+ }
15
+
4
16
  Napi::Object Init(Napi::Env env, Napi::Object exports) {
5
17
  LlamaContext::Init(env, exports);
18
+
19
+ // Register our cleanup handler for module unload
20
+ exports.Set("__registerCleanup", Napi::Function::New(env, register_cleanup));
21
+
22
+ // Also register cleanup directly on module init
23
+ napi_add_env_cleanup_hook(env, [](void*) {
24
+ cleanup_logging();
25
+ }, nullptr);
26
+
6
27
  return exports;
7
28
  }
8
29