@fugood/llama.node 0.0.1-alpha.1 → 0.0.1-alpha.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CMakeLists.txt +27 -1
- package/README.md +4 -4
- package/bin/darwin/arm64/default.metallib +0 -0
- package/bin/darwin/arm64/llama-node.node +0 -0
- package/bin/darwin/x64/default.metallib +0 -0
- package/bin/darwin/x64/llama-node.node +0 -0
- package/bin/linux/arm64/llama-node.node +0 -0
- package/bin/linux/x64/llama-node.node +0 -0
- package/bin/win32/arm64/llama-node.node +0 -0
- package/bin/win32/arm64/node.lib +0 -0
- package/bin/win32/x64/llama-node.node +0 -0
- package/bin/win32/x64/node.lib +0 -0
- package/lib/binding.js +36 -4
- package/lib/binding.ts +4 -3
- package/lib/index.js +16 -4
- package/lib/index.ts +7 -4
- package/package.json +4 -3
- package/src/DisposeWorker.cpp +11 -0
- package/src/DisposeWorker.h +14 -0
- package/src/LlamaCompletionWorker.cpp +163 -0
- package/src/LlamaCompletionWorker.h +34 -0
- package/src/LlamaContext.cpp +200 -0
- package/src/LlamaContext.h +21 -0
- package/src/LoadSessionWorker.cpp +24 -0
- package/src/LoadSessionWorker.h +17 -0
- package/src/SaveSessionWorker.cpp +21 -0
- package/src/SaveSessionWorker.h +16 -0
- package/src/addons.cc +9 -0
- package/src/common.hpp +81 -0
- package/src/addons.cpp +0 -506
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
#include "common.hpp"
|
|
2
|
+
|
|
3
|
+
class LlamaCompletionWorker;
|
|
4
|
+
|
|
5
|
+
class LlamaContext : public Napi::ObjectWrap<LlamaContext> {
|
|
6
|
+
public:
|
|
7
|
+
LlamaContext(const Napi::CallbackInfo &info);
|
|
8
|
+
static void Init(Napi::Env env, Napi::Object &exports);
|
|
9
|
+
|
|
10
|
+
private:
|
|
11
|
+
Napi::Value GetSystemInfo(const Napi::CallbackInfo &info);
|
|
12
|
+
Napi::Value Completion(const Napi::CallbackInfo &info);
|
|
13
|
+
void StopCompletion(const Napi::CallbackInfo &info);
|
|
14
|
+
Napi::Value SaveSession(const Napi::CallbackInfo &info);
|
|
15
|
+
Napi::Value LoadSession(const Napi::CallbackInfo &info);
|
|
16
|
+
Napi::Value Release(const Napi::CallbackInfo &info);
|
|
17
|
+
|
|
18
|
+
std::string _info;
|
|
19
|
+
LlamaSessionPtr _sess = nullptr;
|
|
20
|
+
LlamaCompletionWorker *_wip = nullptr;
|
|
21
|
+
};
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
#include "LoadSessionWorker.h"
|
|
2
|
+
#include "LlamaContext.h"
|
|
3
|
+
|
|
4
|
+
LoadSessionWorker::LoadSessionWorker(const Napi::CallbackInfo &info,
|
|
5
|
+
LlamaSessionPtr &sess)
|
|
6
|
+
: AsyncWorker(info.Env()), Deferred(info.Env()), _path(info[0].ToString()),
|
|
7
|
+
_sess(sess) {}
|
|
8
|
+
|
|
9
|
+
void LoadSessionWorker::Execute() {
|
|
10
|
+
_sess->get_mutex().lock();
|
|
11
|
+
// reserve the maximum number of tokens for capacity
|
|
12
|
+
std::vector<llama_token> tokens;
|
|
13
|
+
tokens.reserve(_sess->params().n_ctx);
|
|
14
|
+
if (!llama_state_load_file(_sess->context(), _path.c_str(), tokens.data(),
|
|
15
|
+
tokens.capacity(), &count)) {
|
|
16
|
+
SetError("Failed to load session");
|
|
17
|
+
}
|
|
18
|
+
_sess->set_tokens(std::move(tokens));
|
|
19
|
+
_sess->get_mutex().unlock();
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
void LoadSessionWorker::OnOK() { Resolve(AsyncWorker::Env().Undefined()); }
|
|
23
|
+
|
|
24
|
+
void LoadSessionWorker::OnError(const Napi::Error &err) { Reject(err.Value()); }
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
#include "common.hpp"
|
|
2
|
+
|
|
3
|
+
class LoadSessionWorker : public Napi::AsyncWorker,
|
|
4
|
+
public Napi::Promise::Deferred {
|
|
5
|
+
public:
|
|
6
|
+
LoadSessionWorker(const Napi::CallbackInfo &info, LlamaSessionPtr &sess);
|
|
7
|
+
|
|
8
|
+
protected:
|
|
9
|
+
void Execute();
|
|
10
|
+
void OnOK();
|
|
11
|
+
void OnError(const Napi::Error &err);
|
|
12
|
+
|
|
13
|
+
private:
|
|
14
|
+
std::string _path;
|
|
15
|
+
LlamaSessionPtr _sess;
|
|
16
|
+
size_t count = 0;
|
|
17
|
+
};
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
#include "SaveSessionWorker.h"
|
|
2
|
+
#include "LlamaContext.h"
|
|
3
|
+
|
|
4
|
+
SaveSessionWorker::SaveSessionWorker(const Napi::CallbackInfo &info,
|
|
5
|
+
LlamaSessionPtr &sess)
|
|
6
|
+
: AsyncWorker(info.Env()), Deferred(info.Env()), _path(info[0].ToString()),
|
|
7
|
+
_sess(sess) {}
|
|
8
|
+
|
|
9
|
+
void SaveSessionWorker::Execute() {
|
|
10
|
+
_sess->get_mutex().lock();
|
|
11
|
+
auto tokens = _sess->tokens_ptr();
|
|
12
|
+
if (!llama_state_save_file(_sess->context(), _path.c_str(), tokens->data(),
|
|
13
|
+
tokens->size())) {
|
|
14
|
+
SetError("Failed to save session");
|
|
15
|
+
}
|
|
16
|
+
_sess->get_mutex().unlock();
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
void SaveSessionWorker::OnOK() { Resolve(AsyncWorker::Env().Undefined()); }
|
|
20
|
+
|
|
21
|
+
void SaveSessionWorker::OnError(const Napi::Error &err) { Reject(err.Value()); }
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
#include "common.hpp"
|
|
2
|
+
|
|
3
|
+
class SaveSessionWorker : public Napi::AsyncWorker,
|
|
4
|
+
public Napi::Promise::Deferred {
|
|
5
|
+
public:
|
|
6
|
+
SaveSessionWorker(const Napi::CallbackInfo &info, LlamaSessionPtr &sess);
|
|
7
|
+
|
|
8
|
+
protected:
|
|
9
|
+
void Execute();
|
|
10
|
+
void OnOK();
|
|
11
|
+
void OnError(const Napi::Error &err);
|
|
12
|
+
|
|
13
|
+
private:
|
|
14
|
+
std::string _path;
|
|
15
|
+
LlamaSessionPtr _sess;
|
|
16
|
+
};
|
package/src/addons.cc
ADDED
package/src/common.hpp
ADDED
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
#pragma once
|
|
2
|
+
|
|
3
|
+
#include "common/common.h"
|
|
4
|
+
#include "llama.h"
|
|
5
|
+
#include <memory>
|
|
6
|
+
#include <mutex>
|
|
7
|
+
#include <napi.h>
|
|
8
|
+
#include <string>
|
|
9
|
+
#include <thread>
|
|
10
|
+
#include <tuple>
|
|
11
|
+
#include <vector>
|
|
12
|
+
|
|
13
|
+
typedef std::unique_ptr<llama_model, decltype(&llama_free_model)> LlamaCppModel;
|
|
14
|
+
typedef std::unique_ptr<llama_context, decltype(&llama_free)> LlamaCppContext;
|
|
15
|
+
typedef std::unique_ptr<llama_sampling_context, decltype(&llama_sampling_free)>
|
|
16
|
+
LlamaCppSampling;
|
|
17
|
+
typedef std::unique_ptr<llama_batch, decltype(&llama_batch_free)> LlamaCppBatch;
|
|
18
|
+
|
|
19
|
+
template <typename T>
|
|
20
|
+
constexpr T get_option(const Napi::Object &options, const std::string &name,
|
|
21
|
+
const T default_value) {
|
|
22
|
+
if (options.Has(name) && !options.Get(name).IsUndefined() &&
|
|
23
|
+
!options.Get(name).IsNull()) {
|
|
24
|
+
if constexpr (std::is_same<T, std::string>::value) {
|
|
25
|
+
return options.Get(name).ToString().operator T();
|
|
26
|
+
} else if constexpr (std::is_same<T, int32_t>::value ||
|
|
27
|
+
std::is_same<T, uint32_t>::value ||
|
|
28
|
+
std::is_same<T, float>::value ||
|
|
29
|
+
std::is_same<T, double>::value) {
|
|
30
|
+
return options.Get(name).ToNumber().operator T();
|
|
31
|
+
} else if constexpr (std::is_same<T, bool>::value) {
|
|
32
|
+
return options.Get(name).ToBoolean().operator T();
|
|
33
|
+
} else {
|
|
34
|
+
static_assert(std::is_same<T, std::string>::value ||
|
|
35
|
+
std::is_same<T, int32_t>::value ||
|
|
36
|
+
std::is_same<T, uint32_t>::value ||
|
|
37
|
+
std::is_same<T, float>::value ||
|
|
38
|
+
std::is_same<T, double>::value ||
|
|
39
|
+
std::is_same<T, bool>::value,
|
|
40
|
+
"Unsupported type");
|
|
41
|
+
}
|
|
42
|
+
} else {
|
|
43
|
+
return default_value;
|
|
44
|
+
}
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
class LlamaSession {
|
|
48
|
+
public:
|
|
49
|
+
LlamaSession(llama_context *ctx, gpt_params params)
|
|
50
|
+
: ctx_(LlamaCppContext(ctx, llama_free)), params_(params) {
|
|
51
|
+
tokens_.reserve(params.n_ctx);
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
~LlamaSession() { dispose(); }
|
|
55
|
+
|
|
56
|
+
llama_context *context() { return ctx_.get(); }
|
|
57
|
+
|
|
58
|
+
std::vector<llama_token>* tokens_ptr() { return &tokens_; }
|
|
59
|
+
|
|
60
|
+
void set_tokens(std::vector<llama_token> tokens) {
|
|
61
|
+
tokens_ = std::move(tokens);
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
const gpt_params ¶ms() const { return params_; }
|
|
65
|
+
|
|
66
|
+
std::mutex &get_mutex() { return mutex; }
|
|
67
|
+
|
|
68
|
+
void dispose() {
|
|
69
|
+
std::lock_guard<std::mutex> lock(mutex);
|
|
70
|
+
tokens_.clear();
|
|
71
|
+
ctx_.reset();
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
private:
|
|
75
|
+
LlamaCppContext ctx_;
|
|
76
|
+
const gpt_params params_;
|
|
77
|
+
std::vector<llama_token> tokens_{};
|
|
78
|
+
std::mutex mutex;
|
|
79
|
+
};
|
|
80
|
+
|
|
81
|
+
typedef std::shared_ptr<LlamaSession> LlamaSessionPtr;
|
package/src/addons.cpp
DELETED
|
@@ -1,506 +0,0 @@
|
|
|
1
|
-
#include "common/common.h"
|
|
2
|
-
#include "llama.h"
|
|
3
|
-
#include <memory>
|
|
4
|
-
#include <mutex>
|
|
5
|
-
#include <napi.h>
|
|
6
|
-
#include <string>
|
|
7
|
-
#include <thread>
|
|
8
|
-
#include <tuple>
|
|
9
|
-
#include <vector>
|
|
10
|
-
|
|
11
|
-
typedef std::unique_ptr<llama_model, decltype(&llama_free_model)> LlamaCppModel;
|
|
12
|
-
typedef std::unique_ptr<llama_context, decltype(&llama_free)> LlamaCppContext;
|
|
13
|
-
typedef std::unique_ptr<llama_sampling_context, decltype(&llama_sampling_free)>
|
|
14
|
-
LlamaCppSampling;
|
|
15
|
-
typedef std::unique_ptr<llama_batch, decltype(&llama_batch_free)> LlamaCppBatch;
|
|
16
|
-
|
|
17
|
-
size_t common_part(const std::vector<llama_token> &a,
|
|
18
|
-
const std::vector<llama_token> &b) {
|
|
19
|
-
size_t i = 0;
|
|
20
|
-
while (i < a.size() && i < b.size() && a[i] == b[i]) {
|
|
21
|
-
i++;
|
|
22
|
-
}
|
|
23
|
-
return i;
|
|
24
|
-
}
|
|
25
|
-
|
|
26
|
-
class LlamaCompletionWorker;
|
|
27
|
-
|
|
28
|
-
class LlamaContext : public Napi::ObjectWrap<LlamaContext> {
|
|
29
|
-
public:
|
|
30
|
-
// construct({ model, embedding, n_ctx, n_batch, n_threads, n_gpu_layers,
|
|
31
|
-
// use_mlock, use_mmap }): LlamaContext throws error
|
|
32
|
-
LlamaContext(const Napi::CallbackInfo &info)
|
|
33
|
-
: Napi::ObjectWrap<LlamaContext>(info) {
|
|
34
|
-
Napi::Env env = info.Env();
|
|
35
|
-
if (info.Length() < 1 || !info[0].IsObject()) {
|
|
36
|
-
Napi::TypeError::New(env, "Object expected").ThrowAsJavaScriptException();
|
|
37
|
-
}
|
|
38
|
-
auto options = info[0].As<Napi::Object>();
|
|
39
|
-
|
|
40
|
-
if (options.Has("model")) {
|
|
41
|
-
params.model = options.Get("model").ToString();
|
|
42
|
-
}
|
|
43
|
-
if (options.Has("embedding")) {
|
|
44
|
-
params.embedding = options.Get("embedding").ToBoolean();
|
|
45
|
-
}
|
|
46
|
-
if (options.Has("n_ctx")) {
|
|
47
|
-
params.n_ctx = options.Get("n_ctx").ToNumber();
|
|
48
|
-
}
|
|
49
|
-
if (options.Has("n_batch")) {
|
|
50
|
-
params.n_batch = options.Get("n_batch").ToNumber();
|
|
51
|
-
}
|
|
52
|
-
if (options.Has("n_threads")) {
|
|
53
|
-
params.n_threads = options.Get("n_threads").ToNumber();
|
|
54
|
-
}
|
|
55
|
-
if (options.Has("n_gpu_layers")) {
|
|
56
|
-
params.n_gpu_layers = options.Get("n_gpu_layers").ToNumber();
|
|
57
|
-
}
|
|
58
|
-
if (options.Has("use_mlock")) {
|
|
59
|
-
params.use_mlock = options.Get("use_mlock").ToBoolean();
|
|
60
|
-
}
|
|
61
|
-
if (options.Has("use_mmap")) {
|
|
62
|
-
params.use_mmap = options.Get("use_mmap").ToBoolean();
|
|
63
|
-
}
|
|
64
|
-
if (options.Has("numa")) {
|
|
65
|
-
int numa = options.Get("numa").ToNumber();
|
|
66
|
-
params.numa = static_cast<ggml_numa_strategy>(numa);
|
|
67
|
-
}
|
|
68
|
-
if (options.Has("seed")) {
|
|
69
|
-
params.seed = options.Get("seed").ToNumber();
|
|
70
|
-
}
|
|
71
|
-
|
|
72
|
-
llama_backend_init();
|
|
73
|
-
llama_numa_init(params.numa);
|
|
74
|
-
|
|
75
|
-
auto tuple = llama_init_from_gpt_params(params);
|
|
76
|
-
model.reset(std::get<0>(tuple));
|
|
77
|
-
ctx.reset(std::get<1>(tuple));
|
|
78
|
-
|
|
79
|
-
if (model == nullptr || ctx == nullptr) {
|
|
80
|
-
Napi::TypeError::New(env, "Failed to load model")
|
|
81
|
-
.ThrowAsJavaScriptException();
|
|
82
|
-
}
|
|
83
|
-
}
|
|
84
|
-
|
|
85
|
-
static void Export(Napi::Env env, Napi::Object &exports) {
|
|
86
|
-
Napi::Function func = DefineClass(
|
|
87
|
-
env, "LlamaContext",
|
|
88
|
-
{InstanceMethod<&LlamaContext::GetSystemInfo>(
|
|
89
|
-
"getSystemInfo",
|
|
90
|
-
static_cast<napi_property_attributes>(napi_enumerable)),
|
|
91
|
-
InstanceMethod<&LlamaContext::Completion>(
|
|
92
|
-
"completion",
|
|
93
|
-
static_cast<napi_property_attributes>(napi_enumerable)),
|
|
94
|
-
InstanceMethod<&LlamaContext::StopCompletion>(
|
|
95
|
-
"stopCompletion",
|
|
96
|
-
static_cast<napi_property_attributes>(napi_enumerable)),
|
|
97
|
-
InstanceMethod<&LlamaContext::SaveSession>(
|
|
98
|
-
"saveSession",
|
|
99
|
-
static_cast<napi_property_attributes>(napi_enumerable)),
|
|
100
|
-
InstanceMethod<&LlamaContext::LoadSession>(
|
|
101
|
-
"loadSession",
|
|
102
|
-
static_cast<napi_property_attributes>(napi_enumerable))});
|
|
103
|
-
Napi::FunctionReference *constructor = new Napi::FunctionReference();
|
|
104
|
-
*constructor = Napi::Persistent(func);
|
|
105
|
-
#if NAPI_VERSION > 5
|
|
106
|
-
env.SetInstanceData(constructor);
|
|
107
|
-
#endif
|
|
108
|
-
exports.Set("LlamaContext", func);
|
|
109
|
-
}
|
|
110
|
-
|
|
111
|
-
llama_context *getContext() { return ctx.get(); }
|
|
112
|
-
llama_model *getModel() { return model.get(); }
|
|
113
|
-
|
|
114
|
-
std::vector<llama_token> *getTokens() { return tokens.get(); }
|
|
115
|
-
|
|
116
|
-
const gpt_params &getParams() const { return params; }
|
|
117
|
-
|
|
118
|
-
void ensureTokens() {
|
|
119
|
-
if (tokens == nullptr) {
|
|
120
|
-
tokens = std::make_unique<std::vector<llama_token>>();
|
|
121
|
-
}
|
|
122
|
-
}
|
|
123
|
-
|
|
124
|
-
void setTokens(std::vector<llama_token> tokens) {
|
|
125
|
-
this->tokens.reset(new std::vector<llama_token>(std::move(tokens)));
|
|
126
|
-
}
|
|
127
|
-
|
|
128
|
-
std::mutex &getMutex() { return mutex; }
|
|
129
|
-
|
|
130
|
-
private:
|
|
131
|
-
Napi::Value GetSystemInfo(const Napi::CallbackInfo &info);
|
|
132
|
-
Napi::Value Completion(const Napi::CallbackInfo &info);
|
|
133
|
-
void StopCompletion(const Napi::CallbackInfo &info);
|
|
134
|
-
Napi::Value SaveSession(const Napi::CallbackInfo &info);
|
|
135
|
-
Napi::Value LoadSession(const Napi::CallbackInfo &info);
|
|
136
|
-
|
|
137
|
-
gpt_params params;
|
|
138
|
-
LlamaCppModel model{nullptr, llama_free_model};
|
|
139
|
-
LlamaCppContext ctx{nullptr, llama_free};
|
|
140
|
-
std::unique_ptr<std::vector<llama_token>> tokens;
|
|
141
|
-
std::mutex mutex;
|
|
142
|
-
LlamaCompletionWorker *compl_worker = nullptr;
|
|
143
|
-
};
|
|
144
|
-
|
|
145
|
-
class LlamaCompletionWorker : public Napi::AsyncWorker,
|
|
146
|
-
public Napi::Promise::Deferred {
|
|
147
|
-
LlamaContext *_ctx;
|
|
148
|
-
gpt_params _params;
|
|
149
|
-
std::vector<std::string> _stop_words;
|
|
150
|
-
std::string generated_text = "";
|
|
151
|
-
Napi::ThreadSafeFunction _tsfn;
|
|
152
|
-
bool _has_callback = false;
|
|
153
|
-
bool _stop = false;
|
|
154
|
-
size_t tokens_predicted = 0;
|
|
155
|
-
size_t tokens_evaluated = 0;
|
|
156
|
-
bool truncated = false;
|
|
157
|
-
|
|
158
|
-
public:
|
|
159
|
-
LlamaCompletionWorker(const Napi::CallbackInfo &info, LlamaContext *ctx,
|
|
160
|
-
Napi::Function callback, gpt_params params,
|
|
161
|
-
std::vector<std::string> stop_words = {})
|
|
162
|
-
: AsyncWorker(info.Env()), Deferred(info.Env()), _ctx(ctx),
|
|
163
|
-
_params(params), _stop_words(stop_words) {
|
|
164
|
-
_ctx->Ref();
|
|
165
|
-
if (!callback.IsEmpty()) {
|
|
166
|
-
_tsfn = Napi::ThreadSafeFunction::New(info.Env(), callback,
|
|
167
|
-
"LlamaCompletionCallback", 0, 1);
|
|
168
|
-
_has_callback = true;
|
|
169
|
-
}
|
|
170
|
-
}
|
|
171
|
-
|
|
172
|
-
~LlamaCompletionWorker() {
|
|
173
|
-
_ctx->Unref();
|
|
174
|
-
if (_has_callback) {
|
|
175
|
-
_tsfn.Abort();
|
|
176
|
-
_tsfn.Release();
|
|
177
|
-
}
|
|
178
|
-
}
|
|
179
|
-
|
|
180
|
-
void Stop() { _stop = true; }
|
|
181
|
-
|
|
182
|
-
protected:
|
|
183
|
-
size_t findStoppingStrings(const std::string &text,
|
|
184
|
-
const size_t last_token_size) {
|
|
185
|
-
size_t stop_pos = std::string::npos;
|
|
186
|
-
|
|
187
|
-
for (const std::string &word : _stop_words) {
|
|
188
|
-
size_t pos;
|
|
189
|
-
|
|
190
|
-
const size_t tmp = word.size() + last_token_size;
|
|
191
|
-
const size_t from_pos = text.size() > tmp ? text.size() - tmp : 0;
|
|
192
|
-
|
|
193
|
-
pos = text.find(word, from_pos);
|
|
194
|
-
|
|
195
|
-
if (pos != std::string::npos &&
|
|
196
|
-
(stop_pos == std::string::npos || pos < stop_pos)) {
|
|
197
|
-
stop_pos = pos;
|
|
198
|
-
}
|
|
199
|
-
}
|
|
200
|
-
|
|
201
|
-
return stop_pos;
|
|
202
|
-
}
|
|
203
|
-
|
|
204
|
-
void Execute() {
|
|
205
|
-
_ctx->getMutex().lock();
|
|
206
|
-
_ctx->ensureTokens();
|
|
207
|
-
const auto t_main_start = ggml_time_us();
|
|
208
|
-
const size_t n_ctx = _params.n_ctx;
|
|
209
|
-
auto n_keep = _params.n_keep;
|
|
210
|
-
auto n_predict = _params.n_predict;
|
|
211
|
-
size_t n_cur = 0;
|
|
212
|
-
size_t n_input = 0;
|
|
213
|
-
const bool add_bos = llama_should_add_bos_token(_ctx->getModel());
|
|
214
|
-
auto *ctx = _ctx->getContext();
|
|
215
|
-
|
|
216
|
-
llama_set_rng_seed(ctx, _params.seed);
|
|
217
|
-
|
|
218
|
-
LlamaCppSampling sampling{llama_sampling_init(_params.sparams),
|
|
219
|
-
llama_sampling_free};
|
|
220
|
-
|
|
221
|
-
std::vector<llama_token> prompt_tokens =
|
|
222
|
-
::llama_tokenize(ctx, _params.prompt, add_bos);
|
|
223
|
-
n_input = prompt_tokens.size();
|
|
224
|
-
if (_ctx->getTokens() != nullptr) {
|
|
225
|
-
n_cur = common_part(*_ctx->getTokens(), prompt_tokens);
|
|
226
|
-
if (n_cur == n_input) {
|
|
227
|
-
--n_cur;
|
|
228
|
-
}
|
|
229
|
-
n_input -= n_cur;
|
|
230
|
-
llama_kv_cache_seq_rm(ctx, 0, n_cur, -1);
|
|
231
|
-
}
|
|
232
|
-
_ctx->setTokens(std::move(prompt_tokens));
|
|
233
|
-
|
|
234
|
-
const int max_len = _params.n_predict < 0 ? 0 : _params.n_predict;
|
|
235
|
-
|
|
236
|
-
for (int i = 0; i < max_len || _stop; i++) {
|
|
237
|
-
auto *embd = _ctx->getTokens();
|
|
238
|
-
// check if we need to remove some tokens
|
|
239
|
-
if (embd->size() >= n_ctx) {
|
|
240
|
-
const int n_left = n_cur - n_keep - 1;
|
|
241
|
-
const int n_discard = n_left / 2;
|
|
242
|
-
|
|
243
|
-
llama_kv_cache_seq_rm(ctx, 0, n_keep + 1, n_keep + n_discard + 1);
|
|
244
|
-
llama_kv_cache_seq_add(ctx, 0, n_keep + 1 + n_discard, n_cur,
|
|
245
|
-
-n_discard);
|
|
246
|
-
|
|
247
|
-
for (size_t i = n_keep + 1 + n_discard; i < embd->size(); i++) {
|
|
248
|
-
(*embd)[i - n_discard] = (*embd)[i];
|
|
249
|
-
}
|
|
250
|
-
embd->resize(embd->size() - n_discard);
|
|
251
|
-
|
|
252
|
-
n_cur -= n_discard;
|
|
253
|
-
truncated = true;
|
|
254
|
-
}
|
|
255
|
-
int ret = llama_decode(
|
|
256
|
-
ctx, llama_batch_get_one(embd->data() + n_cur, n_input, n_cur, 0));
|
|
257
|
-
if (ret < 0) {
|
|
258
|
-
SetError("Failed to decode token, code: " + std::to_string(ret));
|
|
259
|
-
break;
|
|
260
|
-
}
|
|
261
|
-
// sample the next token
|
|
262
|
-
const llama_token new_token_id =
|
|
263
|
-
llama_sampling_sample(sampling.get(), ctx, nullptr);
|
|
264
|
-
// prepare the next batch
|
|
265
|
-
embd->push_back(new_token_id);
|
|
266
|
-
auto token = llama_token_to_piece(ctx, new_token_id);
|
|
267
|
-
generated_text += token;
|
|
268
|
-
n_cur += n_input;
|
|
269
|
-
tokens_evaluated += n_input;
|
|
270
|
-
tokens_predicted += 1;
|
|
271
|
-
n_input = 1;
|
|
272
|
-
if (_has_callback) {
|
|
273
|
-
// _cb.Call({ Napi::String::New(AsyncWorker::Env(), token) });
|
|
274
|
-
const char *c_token = strdup(token.c_str());
|
|
275
|
-
_tsfn.BlockingCall(c_token, [](Napi::Env env, Napi::Function jsCallback,
|
|
276
|
-
const char *value) {
|
|
277
|
-
auto obj = Napi::Object::New(env);
|
|
278
|
-
obj.Set("token", Napi::String::New(env, value));
|
|
279
|
-
jsCallback.Call({obj});
|
|
280
|
-
});
|
|
281
|
-
}
|
|
282
|
-
// is it an end of generation?
|
|
283
|
-
if (llama_token_is_eog(_ctx->getModel(), new_token_id)) {
|
|
284
|
-
break;
|
|
285
|
-
}
|
|
286
|
-
// check for stop words
|
|
287
|
-
if (!_stop_words.empty()) {
|
|
288
|
-
const size_t stop_pos =
|
|
289
|
-
findStoppingStrings(generated_text, token.size());
|
|
290
|
-
if (stop_pos != std::string::npos) {
|
|
291
|
-
break;
|
|
292
|
-
}
|
|
293
|
-
}
|
|
294
|
-
}
|
|
295
|
-
const auto t_main_end = ggml_time_us();
|
|
296
|
-
_ctx->getMutex().unlock();
|
|
297
|
-
}
|
|
298
|
-
|
|
299
|
-
void OnOK() {
|
|
300
|
-
auto result = Napi::Object::New(Napi::AsyncWorker::Env());
|
|
301
|
-
result.Set("tokens_evaluated",
|
|
302
|
-
Napi::Number::New(Napi::AsyncWorker::Env(), tokens_evaluated));
|
|
303
|
-
result.Set("tokens_predicted",
|
|
304
|
-
Napi::Number::New(Napi::AsyncWorker::Env(), tokens_predicted));
|
|
305
|
-
result.Set("truncated",
|
|
306
|
-
Napi::Boolean::New(Napi::AsyncWorker::Env(), truncated));
|
|
307
|
-
result.Set("text",
|
|
308
|
-
Napi::String::New(Napi::AsyncWorker::Env(), generated_text));
|
|
309
|
-
Napi::Promise::Deferred::Resolve(result);
|
|
310
|
-
}
|
|
311
|
-
|
|
312
|
-
void OnError(const Napi::Error &err) {
|
|
313
|
-
Napi::Promise::Deferred::Reject(err.Value());
|
|
314
|
-
}
|
|
315
|
-
};
|
|
316
|
-
|
|
317
|
-
class SaveSessionWorker : public Napi::AsyncWorker,
|
|
318
|
-
public Napi::Promise::Deferred {
|
|
319
|
-
std::string _path;
|
|
320
|
-
LlamaContext *_ctx;
|
|
321
|
-
|
|
322
|
-
public:
|
|
323
|
-
SaveSessionWorker(const Napi::CallbackInfo &info, LlamaContext *ctx)
|
|
324
|
-
: AsyncWorker(info.Env()), Deferred(info.Env()),
|
|
325
|
-
_path(info[0].ToString()), _ctx(ctx) {
|
|
326
|
-
_ctx->Ref();
|
|
327
|
-
}
|
|
328
|
-
|
|
329
|
-
protected:
|
|
330
|
-
void Execute() {
|
|
331
|
-
_ctx->getMutex().lock();
|
|
332
|
-
if (_ctx->getTokens() == nullptr) {
|
|
333
|
-
SetError("Failed to save session");
|
|
334
|
-
return;
|
|
335
|
-
}
|
|
336
|
-
if (!llama_state_save_file(_ctx->getContext(), _path.c_str(),
|
|
337
|
-
_ctx->getTokens()->data(),
|
|
338
|
-
_ctx->getTokens()->size())) {
|
|
339
|
-
SetError("Failed to save session");
|
|
340
|
-
}
|
|
341
|
-
_ctx->getMutex().unlock();
|
|
342
|
-
}
|
|
343
|
-
|
|
344
|
-
void OnOK() { Resolve(AsyncWorker::Env().Undefined()); }
|
|
345
|
-
|
|
346
|
-
void OnError(const Napi::Error &err) { Reject(err.Value()); }
|
|
347
|
-
};
|
|
348
|
-
|
|
349
|
-
class LoadSessionWorker : public Napi::AsyncWorker,
|
|
350
|
-
public Napi::Promise::Deferred {
|
|
351
|
-
std::string _path;
|
|
352
|
-
LlamaContext *_ctx;
|
|
353
|
-
size_t count = 0;
|
|
354
|
-
|
|
355
|
-
public:
|
|
356
|
-
LoadSessionWorker(const Napi::CallbackInfo &info, LlamaContext *ctx)
|
|
357
|
-
: AsyncWorker(info.Env()), Deferred(info.Env()),
|
|
358
|
-
_path(info[0].ToString()), _ctx(ctx) {
|
|
359
|
-
_ctx->Ref();
|
|
360
|
-
}
|
|
361
|
-
|
|
362
|
-
protected:
|
|
363
|
-
void Execute() {
|
|
364
|
-
_ctx->getMutex().lock();
|
|
365
|
-
_ctx->ensureTokens();
|
|
366
|
-
// reserve the maximum number of tokens for capacity
|
|
367
|
-
_ctx->getTokens()->reserve(_ctx->getParams().n_ctx);
|
|
368
|
-
if (!llama_state_load_file(_ctx->getContext(), _path.c_str(),
|
|
369
|
-
_ctx->getTokens()->data(),
|
|
370
|
-
_ctx->getTokens()->capacity(), &count)) {
|
|
371
|
-
SetError("Failed to load session");
|
|
372
|
-
}
|
|
373
|
-
_ctx->getMutex().unlock();
|
|
374
|
-
}
|
|
375
|
-
|
|
376
|
-
void OnOK() { Resolve(AsyncWorker::Env().Undefined()); }
|
|
377
|
-
|
|
378
|
-
void OnError(const Napi::Error &err) { Reject(err.Value()); }
|
|
379
|
-
};
|
|
380
|
-
|
|
381
|
-
// getSystemInfo(): string
|
|
382
|
-
Napi::Value LlamaContext::GetSystemInfo(const Napi::CallbackInfo &info) {
|
|
383
|
-
return Napi::String::New(info.Env(), get_system_info(params).c_str());
|
|
384
|
-
}
|
|
385
|
-
|
|
386
|
-
// completion(options: LlamaCompletionOptions, onToken?: (token: string) =>
|
|
387
|
-
// void): Promise<LlamaCompletionResult>
|
|
388
|
-
Napi::Value LlamaContext::Completion(const Napi::CallbackInfo &info) {
|
|
389
|
-
Napi::Env env = info.Env();
|
|
390
|
-
if (info.Length() < 1 || !info[0].IsObject()) {
|
|
391
|
-
Napi::TypeError::New(env, "Object expected").ThrowAsJavaScriptException();
|
|
392
|
-
}
|
|
393
|
-
if (info.Length() >= 2 && !info[1].IsFunction()) {
|
|
394
|
-
Napi::TypeError::New(env, "Function expected").ThrowAsJavaScriptException();
|
|
395
|
-
}
|
|
396
|
-
auto options = info[0].As<Napi::Object>();
|
|
397
|
-
|
|
398
|
-
gpt_params params;
|
|
399
|
-
if (options.Has("prompt")) {
|
|
400
|
-
params.prompt = options.Get("prompt").ToString();
|
|
401
|
-
} else {
|
|
402
|
-
Napi::TypeError::New(env, "Prompt is required")
|
|
403
|
-
.ThrowAsJavaScriptException();
|
|
404
|
-
}
|
|
405
|
-
params.n_predict =
|
|
406
|
-
options.Has("n_predict") ? options.Get("n_predict").ToNumber() : -1;
|
|
407
|
-
params.sparams.temp = options.Has("temperature")
|
|
408
|
-
? options.Get("temperature").ToNumber()
|
|
409
|
-
: 0.80f;
|
|
410
|
-
params.sparams.top_k =
|
|
411
|
-
options.Has("top_k") ? options.Get("top_k").ToNumber() : 40;
|
|
412
|
-
params.sparams.top_p =
|
|
413
|
-
options.Has("top_p") ? options.Get("top_p").ToNumber() : 0.95f;
|
|
414
|
-
params.sparams.min_p =
|
|
415
|
-
options.Has("min_p") ? options.Get("min_p").ToNumber() : 0.05f;
|
|
416
|
-
params.sparams.tfs_z =
|
|
417
|
-
options.Has("tfs_z") ? options.Get("tfs_z").ToNumber() : 1.00f;
|
|
418
|
-
params.sparams.mirostat =
|
|
419
|
-
options.Has("mirostat") ? options.Get("mirostat").ToNumber() : 0;
|
|
420
|
-
params.sparams.mirostat_tau = options.Has("mirostat_tau")
|
|
421
|
-
? options.Get("mirostat_tau").ToNumber()
|
|
422
|
-
: 5.00f;
|
|
423
|
-
params.sparams.mirostat_eta = options.Has("mirostat_eta")
|
|
424
|
-
? options.Get("mirostat_eta").ToNumber()
|
|
425
|
-
: 0.10f;
|
|
426
|
-
params.sparams.penalty_last_n = options.Has("penalty_last_n")
|
|
427
|
-
? options.Get("penalty_last_n").ToNumber()
|
|
428
|
-
: 64;
|
|
429
|
-
params.sparams.penalty_repeat = options.Has("penalty_repeat")
|
|
430
|
-
? options.Get("penalty_repeat").ToNumber()
|
|
431
|
-
: 1.00f;
|
|
432
|
-
params.sparams.penalty_freq = options.Has("penalty_freq")
|
|
433
|
-
? options.Get("penalty_freq").ToNumber()
|
|
434
|
-
: 0.00f;
|
|
435
|
-
params.sparams.penalty_present =
|
|
436
|
-
options.Has("penalty_present") ? options.Get("penalty_present").ToNumber()
|
|
437
|
-
: 0.00f;
|
|
438
|
-
params.sparams.penalize_nl = options.Has("penalize_nl")
|
|
439
|
-
? options.Get("penalize_nl").ToBoolean()
|
|
440
|
-
: false;
|
|
441
|
-
params.sparams.typical_p =
|
|
442
|
-
options.Has("typical_p") ? options.Get("typical_p").ToNumber() : 1.00f;
|
|
443
|
-
params.ignore_eos =
|
|
444
|
-
options.Has("ignore_eos") ? options.Get("ignore_eos").ToBoolean() : false;
|
|
445
|
-
params.sparams.grammar = options.Has("grammar")
|
|
446
|
-
? options.Get("grammar").ToString().Utf8Value()
|
|
447
|
-
: "";
|
|
448
|
-
params.n_keep = options.Has("n_keep") ? options.Get("n_keep").ToNumber() : 0;
|
|
449
|
-
params.seed =
|
|
450
|
-
options.Has("seed") ? options.Get("seed").ToNumber() : LLAMA_DEFAULT_SEED;
|
|
451
|
-
std::vector<std::string> stop_words;
|
|
452
|
-
if (options.Has("stop")) {
|
|
453
|
-
auto stop_words_array = options.Get("stop").As<Napi::Array>();
|
|
454
|
-
for (size_t i = 0; i < stop_words_array.Length(); i++) {
|
|
455
|
-
stop_words.push_back(stop_words_array.Get(i).ToString());
|
|
456
|
-
}
|
|
457
|
-
}
|
|
458
|
-
|
|
459
|
-
// options.on_sample
|
|
460
|
-
Napi::Function callback;
|
|
461
|
-
if (info.Length() >= 2) {
|
|
462
|
-
callback = info[1].As<Napi::Function>();
|
|
463
|
-
}
|
|
464
|
-
|
|
465
|
-
auto worker =
|
|
466
|
-
new LlamaCompletionWorker(info, this, callback, params, stop_words);
|
|
467
|
-
worker->Queue();
|
|
468
|
-
compl_worker = worker;
|
|
469
|
-
return worker->Promise();
|
|
470
|
-
}
|
|
471
|
-
|
|
472
|
-
// stopCompletion(): void
|
|
473
|
-
void LlamaContext::StopCompletion(const Napi::CallbackInfo &info) {
|
|
474
|
-
if (compl_worker != nullptr) {
|
|
475
|
-
compl_worker->Stop();
|
|
476
|
-
}
|
|
477
|
-
}
|
|
478
|
-
|
|
479
|
-
// saveSession(path: string): Promise<void> throws error
|
|
480
|
-
Napi::Value LlamaContext::SaveSession(const Napi::CallbackInfo &info) {
|
|
481
|
-
Napi::Env env = info.Env();
|
|
482
|
-
if (info.Length() < 1 || !info[0].IsString()) {
|
|
483
|
-
Napi::TypeError::New(env, "String expected").ThrowAsJavaScriptException();
|
|
484
|
-
}
|
|
485
|
-
auto *worker = new SaveSessionWorker(info, this);
|
|
486
|
-
worker->Queue();
|
|
487
|
-
return worker->Promise();
|
|
488
|
-
}
|
|
489
|
-
|
|
490
|
-
// loadSession(path: string): Promise<{ count }> throws error
|
|
491
|
-
Napi::Value LlamaContext::LoadSession(const Napi::CallbackInfo &info) {
|
|
492
|
-
Napi::Env env = info.Env();
|
|
493
|
-
if (info.Length() < 1 || !info[0].IsString()) {
|
|
494
|
-
Napi::TypeError::New(env, "String expected").ThrowAsJavaScriptException();
|
|
495
|
-
}
|
|
496
|
-
auto *worker = new LoadSessionWorker(info, this);
|
|
497
|
-
worker->Queue();
|
|
498
|
-
return worker->Promise();
|
|
499
|
-
}
|
|
500
|
-
|
|
501
|
-
Napi::Object Init(Napi::Env env, Napi::Object exports) {
|
|
502
|
-
LlamaContext::Export(env, exports);
|
|
503
|
-
return exports;
|
|
504
|
-
}
|
|
505
|
-
|
|
506
|
-
NODE_API_MODULE(addons, Init)
|