@fugood/llama.node 0.4.3 → 0.4.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@fugood/llama.node",
3
3
  "access": "public",
4
- "version": "0.4.3",
4
+ "version": "0.4.4",
5
5
  "description": "An another Node binding of llama.cpp",
6
6
  "main": "lib/index.js",
7
7
  "scripts": {
@@ -507,6 +507,8 @@ void LlamaCompletionWorker::Execute() {
507
507
  _result.tokens_predicted += 1;
508
508
  n_input = 1;
509
509
  if (_has_callback) {
510
+ // TODO: When we got possible stop words (startsWith)
511
+ // we should avoid calling the callback, wait for the next token
510
512
  const char *c_token = strdup(token.c_str());
511
513
  _tsfn.BlockingCall(c_token, [](Napi::Env env, Napi::Function jsCallback,
512
514
  const char *value) {
@@ -518,6 +520,8 @@ void LlamaCompletionWorker::Execute() {
518
520
  }
519
521
  // is it an end of generation?
520
522
  if (llama_vocab_is_eog(vocab, new_token_id)) {
523
+ _result.stopped_eos = true;
524
+ // TODO: EOS token should be cut
521
525
  break;
522
526
  }
523
527
  // check for stop words
@@ -525,10 +529,16 @@ void LlamaCompletionWorker::Execute() {
525
529
  const size_t stop_pos =
526
530
  findStoppingStrings(_result.text, token.size(), _stop_words);
527
531
  if (stop_pos != std::string::npos) {
532
+ _result.stopped_words = true;
533
+ _result.stopping_word = _result.text.substr(stop_pos, token.size());
534
+ _result.text = _result.text.substr(0, stop_pos - 1);
528
535
  break;
529
536
  }
530
537
  }
531
538
  }
539
+ if (!_result.stopped_eos && !_result.stopped_words) {
540
+ _result.stopped_limited = true;
541
+ }
532
542
  const auto t_main_end = ggml_time_us();
533
543
  _sess->get_mutex().unlock();
534
544
  if (_onComplete) {
@@ -549,6 +559,14 @@ void LlamaCompletionWorker::OnOK() {
549
559
  Napi::Boolean::New(env, _result.context_full));
550
560
  result.Set("text",
551
561
  Napi::String::New(env, _result.text.c_str()));
562
+ result.Set("stopped_eos",
563
+ Napi::Boolean::New(env, _result.stopped_eos));
564
+ result.Set("stopped_words",
565
+ Napi::Boolean::New(env, _result.stopped_words));
566
+ result.Set("stopping_word",
567
+ Napi::String::New(env, _result.stopping_word.c_str()));
568
+ result.Set("stopped_limited",
569
+ Napi::Boolean::New(env, _result.stopped_limited));
552
570
 
553
571
  Napi::Array tool_calls = Napi::Array::New(Napi::AsyncWorker::Env());
554
572
  std::string reasoning_content = "";
@@ -57,5 +57,9 @@ private:
57
57
  bool truncated = false;
58
58
  bool context_full = false;
59
59
  std::string text;
60
+ bool stopped_eos = false;
61
+ bool stopped_words = false;
62
+ std::string stopping_word;
63
+ bool stopped_limited = false;
60
64
  } _result;
61
65
  };