@fugood/llama.node 0.4.2 → 0.4.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/darwin/arm64/llama-node.node +0 -0
- package/bin/darwin/x64/llama-node.node +0 -0
- package/bin/linux/arm64/llama-node.node +0 -0
- package/bin/linux/x64/llama-node.node +0 -0
- package/bin/linux-cuda/arm64/llama-node.node +0 -0
- package/bin/linux-cuda/x64/llama-node.node +0 -0
- package/bin/linux-vulkan/arm64/llama-node.node +0 -0
- package/bin/linux-vulkan/x64/llama-node.node +0 -0
- package/bin/win32/arm64/llama-node.node +0 -0
- package/bin/win32/arm64/node.lib +0 -0
- package/bin/win32/x64/llama-node.node +0 -0
- package/bin/win32/x64/node.lib +0 -0
- package/bin/win32-vulkan/arm64/llama-node.node +0 -0
- package/bin/win32-vulkan/arm64/node.lib +0 -0
- package/bin/win32-vulkan/x64/llama-node.node +0 -0
- package/bin/win32-vulkan/x64/node.lib +0 -0
- package/lib/binding.ts +2 -0
- package/package.json +1 -1
- package/src/LlamaCompletionWorker.cpp +26 -8
- package/src/LlamaCompletionWorker.h +4 -0
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
package/bin/win32/arm64/node.lib
CHANGED
|
Binary file
|
|
Binary file
|
package/bin/win32/x64/node.lib
CHANGED
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
package/lib/binding.ts
CHANGED
package/package.json
CHANGED
|
@@ -507,6 +507,8 @@ void LlamaCompletionWorker::Execute() {
|
|
|
507
507
|
_result.tokens_predicted += 1;
|
|
508
508
|
n_input = 1;
|
|
509
509
|
if (_has_callback) {
|
|
510
|
+
// TODO: When we got possible stop words (startsWith)
|
|
511
|
+
// we should avoid calling the callback, wait for the next token
|
|
510
512
|
const char *c_token = strdup(token.c_str());
|
|
511
513
|
_tsfn.BlockingCall(c_token, [](Napi::Env env, Napi::Function jsCallback,
|
|
512
514
|
const char *value) {
|
|
@@ -518,6 +520,8 @@ void LlamaCompletionWorker::Execute() {
|
|
|
518
520
|
}
|
|
519
521
|
// is it an end of generation?
|
|
520
522
|
if (llama_vocab_is_eog(vocab, new_token_id)) {
|
|
523
|
+
_result.stopped_eos = true;
|
|
524
|
+
// TODO: EOS token should be cut
|
|
521
525
|
break;
|
|
522
526
|
}
|
|
523
527
|
// check for stop words
|
|
@@ -525,10 +529,16 @@ void LlamaCompletionWorker::Execute() {
|
|
|
525
529
|
const size_t stop_pos =
|
|
526
530
|
findStoppingStrings(_result.text, token.size(), _stop_words);
|
|
527
531
|
if (stop_pos != std::string::npos) {
|
|
532
|
+
_result.stopped_words = true;
|
|
533
|
+
_result.stopping_word = _result.text.substr(stop_pos, token.size());
|
|
534
|
+
_result.text = _result.text.substr(0, stop_pos - 1);
|
|
528
535
|
break;
|
|
529
536
|
}
|
|
530
537
|
}
|
|
531
538
|
}
|
|
539
|
+
if (!_result.stopped_eos && !_result.stopped_words) {
|
|
540
|
+
_result.stopped_limited = true;
|
|
541
|
+
}
|
|
532
542
|
const auto t_main_end = ggml_time_us();
|
|
533
543
|
_sess->get_mutex().unlock();
|
|
534
544
|
if (_onComplete) {
|
|
@@ -549,18 +559,26 @@ void LlamaCompletionWorker::OnOK() {
|
|
|
549
559
|
Napi::Boolean::New(env, _result.context_full));
|
|
550
560
|
result.Set("text",
|
|
551
561
|
Napi::String::New(env, _result.text.c_str()));
|
|
562
|
+
result.Set("stopped_eos",
|
|
563
|
+
Napi::Boolean::New(env, _result.stopped_eos));
|
|
564
|
+
result.Set("stopped_words",
|
|
565
|
+
Napi::Boolean::New(env, _result.stopped_words));
|
|
566
|
+
result.Set("stopping_word",
|
|
567
|
+
Napi::String::New(env, _result.stopping_word.c_str()));
|
|
568
|
+
result.Set("stopped_limited",
|
|
569
|
+
Napi::Boolean::New(env, _result.stopped_limited));
|
|
552
570
|
|
|
553
571
|
Napi::Array tool_calls = Napi::Array::New(Napi::AsyncWorker::Env());
|
|
554
|
-
std::string
|
|
555
|
-
std::string
|
|
572
|
+
std::string reasoning_content = "";
|
|
573
|
+
std::string content;
|
|
556
574
|
if (!_stop) {
|
|
557
575
|
try {
|
|
558
576
|
common_chat_msg message = common_chat_parse(_result.text, static_cast<common_chat_format>(_chat_format));
|
|
559
577
|
if (!message.reasoning_content.empty()) {
|
|
560
|
-
reasoning_content =
|
|
578
|
+
reasoning_content = message.reasoning_content;
|
|
561
579
|
}
|
|
562
580
|
if (!message.content.empty()) {
|
|
563
|
-
content =
|
|
581
|
+
content = message.content;
|
|
564
582
|
}
|
|
565
583
|
for (size_t i = 0; i < message.tool_calls.size(); i++) {
|
|
566
584
|
const auto &tc = message.tool_calls[i];
|
|
@@ -582,11 +600,11 @@ void LlamaCompletionWorker::OnOK() {
|
|
|
582
600
|
if (tool_calls.Length() > 0) {
|
|
583
601
|
result.Set("tool_calls", tool_calls);
|
|
584
602
|
}
|
|
585
|
-
if (reasoning_content) {
|
|
586
|
-
result.Set("reasoning_content", Napi::String::New(env, reasoning_content
|
|
603
|
+
if (!reasoning_content.empty()) {
|
|
604
|
+
result.Set("reasoning_content", Napi::String::New(env, reasoning_content.c_str()));
|
|
587
605
|
}
|
|
588
|
-
if (content) {
|
|
589
|
-
result.Set("content", Napi::String::New(env, content
|
|
606
|
+
if (!content.empty()) {
|
|
607
|
+
result.Set("content", Napi::String::New(env, content.c_str()));
|
|
590
608
|
}
|
|
591
609
|
|
|
592
610
|
auto ctx = _sess->context();
|