@fugood/llama.node 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CMakeLists.txt +6 -0
- package/README.md +2 -1
- package/bin/darwin/arm64/llama-node.node +0 -0
- package/bin/darwin/x64/llama-node.node +0 -0
- package/bin/linux/arm64/llama-node.node +0 -0
- package/bin/linux/x64/llama-node.node +0 -0
- package/bin/linux-vulkan/arm64/llama-node.node +0 -0
- package/bin/linux-vulkan/x64/llama-node.node +0 -0
- package/lib/binding.ts +11 -0
- package/package.json +1 -1
- package/src/DetokenizeWorker.cpp +22 -0
- package/src/DetokenizeWorker.h +19 -0
- package/src/EmbeddingWorker.cpp +46 -0
- package/src/EmbeddingWorker.h +23 -0
- package/src/LlamaContext.cpp +62 -0
- package/src/LlamaContext.h +3 -0
- package/src/TokenizeWorker.cpp +26 -0
- package/src/TokenizeWorker.h +23 -0
- package/src/common.hpp +3 -2
- package/bin/win32/arm64/llama-node.node +0 -0
- package/bin/win32/arm64/node.lib +0 -0
- package/bin/win32/x64/llama-node.node +0 -0
- package/bin/win32/x64/node.lib +0 -0
package/CMakeLists.txt
CHANGED
|
@@ -77,6 +77,12 @@ file(
|
|
|
77
77
|
"src/LlamaCompletionWorker.h"
|
|
78
78
|
"src/LlamaContext.cpp"
|
|
79
79
|
"src/LlamaContext.h"
|
|
80
|
+
"src/TokenizeWorker.cpp"
|
|
81
|
+
"src/TokenizeWorker.h"
|
|
82
|
+
"src/DetokenizeWorker.cpp"
|
|
83
|
+
"src/DetokenizeWorker.h"
|
|
84
|
+
"src/EmbeddingWorker.cpp"
|
|
85
|
+
"src/EmbeddingWorker.h"
|
|
80
86
|
"src/LoadSessionWorker.cpp"
|
|
81
87
|
"src/LoadSessionWorker.h"
|
|
82
88
|
"src/SaveSessionWorker.cpp"
|
package/README.md
CHANGED
|
@@ -47,7 +47,8 @@ console.log('Result:', text)
|
|
|
47
47
|
|
|
48
48
|
## Lib Variants
|
|
49
49
|
|
|
50
|
-
- [x] `default`: General usage,
|
|
50
|
+
- [x] `default`: General usage, not support GPU except macOS (Metal)
|
|
51
|
+
- [x] `vulkan`: Support GPU Vulkan (Windows/Linux), but some scenario might unstable
|
|
51
52
|
|
|
52
53
|
## License
|
|
53
54
|
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
package/lib/binding.ts
CHANGED
|
@@ -37,11 +37,22 @@ export type LlamaCompletionToken = {
|
|
|
37
37
|
token: string
|
|
38
38
|
}
|
|
39
39
|
|
|
40
|
+
export type TokenizeResult = {
|
|
41
|
+
tokens: Int32Array
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
export type EmbeddingResult = {
|
|
45
|
+
embedding: Float32Array
|
|
46
|
+
}
|
|
47
|
+
|
|
40
48
|
export interface LlamaContext {
|
|
41
49
|
new (options: LlamaModelOptions): LlamaContext
|
|
42
50
|
getSystemInfo(): string
|
|
43
51
|
completion(options: LlamaCompletionOptions, callback?: (token: LlamaCompletionToken) => void): Promise<LlamaCompletionResult>
|
|
44
52
|
stopCompletion(): void
|
|
53
|
+
tokenize(text: string): Promise<TokenizeResult>
|
|
54
|
+
detokenize(tokens: number[]): Promise<string>
|
|
55
|
+
embedding(text: string): Promise<EmbeddingResult>
|
|
45
56
|
saveSession(path: string): Promise<void>
|
|
46
57
|
loadSession(path: string): Promise<void>
|
|
47
58
|
release(): Promise<void>
|
package/package.json
CHANGED
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
#include "DetokenizeWorker.h"
|
|
2
|
+
#include "LlamaContext.h"
|
|
3
|
+
|
|
4
|
+
DetokenizeWorker::DetokenizeWorker(const Napi::CallbackInfo &info,
|
|
5
|
+
LlamaSessionPtr &sess,
|
|
6
|
+
std::vector<llama_token> &tokens)
|
|
7
|
+
: AsyncWorker(info.Env()), Deferred(info.Env()), _sess(sess),
|
|
8
|
+
_tokens(std::move(tokens)) {}
|
|
9
|
+
|
|
10
|
+
void DetokenizeWorker::Execute() {
|
|
11
|
+
const auto text = ::llama_detokenize_bpe(_sess->context(), _tokens);
|
|
12
|
+
_text = std::move(text);
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
void DetokenizeWorker::OnOK() {
|
|
16
|
+
Napi::Promise::Deferred::Resolve(
|
|
17
|
+
Napi::String::New(Napi::AsyncWorker::Env(), _text));
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
void DetokenizeWorker::OnError(const Napi::Error &err) {
|
|
21
|
+
Napi::Promise::Deferred::Reject(err.Value());
|
|
22
|
+
}
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
#include "common.hpp"
|
|
2
|
+
#include <vector>
|
|
3
|
+
|
|
4
|
+
class DetokenizeWorker : public Napi::AsyncWorker,
|
|
5
|
+
public Napi::Promise::Deferred {
|
|
6
|
+
public:
|
|
7
|
+
DetokenizeWorker(const Napi::CallbackInfo &info, LlamaSessionPtr &sess,
|
|
8
|
+
std::vector<llama_token> &tokens);
|
|
9
|
+
|
|
10
|
+
protected:
|
|
11
|
+
void Execute();
|
|
12
|
+
void OnOK();
|
|
13
|
+
void OnError(const Napi::Error &err);
|
|
14
|
+
|
|
15
|
+
private:
|
|
16
|
+
LlamaSessionPtr _sess;
|
|
17
|
+
std::vector<llama_token> _tokens;
|
|
18
|
+
std::string _text;
|
|
19
|
+
};
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
#include "EmbeddingWorker.h"
|
|
2
|
+
#include "LlamaContext.h"
|
|
3
|
+
|
|
4
|
+
EmbeddingWorker::EmbeddingWorker(const Napi::CallbackInfo &info,
|
|
5
|
+
LlamaSessionPtr &sess, std::string text)
|
|
6
|
+
: AsyncWorker(info.Env()), Deferred(info.Env()), _sess(sess), _text(text) {}
|
|
7
|
+
|
|
8
|
+
void EmbeddingWorker::Execute() {
|
|
9
|
+
llama_kv_cache_clear(_sess->context());
|
|
10
|
+
auto tokens = ::llama_tokenize(_sess->context(), _text, true);
|
|
11
|
+
// add SEP if not present
|
|
12
|
+
if (tokens.empty() || tokens.back() != llama_token_sep(_sess->model())) {
|
|
13
|
+
tokens.push_back(llama_token_sep(_sess->model()));
|
|
14
|
+
}
|
|
15
|
+
const int n_embd = llama_n_embd(_sess->model());
|
|
16
|
+
do {
|
|
17
|
+
int ret =
|
|
18
|
+
llama_decode(_sess->context(),
|
|
19
|
+
llama_batch_get_one(tokens.data(), tokens.size(), 0, 0));
|
|
20
|
+
if (ret < 0) {
|
|
21
|
+
SetError("Failed to inference, code: " + std::to_string(ret));
|
|
22
|
+
break;
|
|
23
|
+
}
|
|
24
|
+
const float *embd = llama_get_embeddings_seq(_sess->context(), 0);
|
|
25
|
+
if (embd == nullptr) {
|
|
26
|
+
SetError("Failed to get embeddings");
|
|
27
|
+
break;
|
|
28
|
+
}
|
|
29
|
+
_result.embedding.resize(n_embd);
|
|
30
|
+
memcpy(_result.embedding.data(), embd, n_embd * sizeof(float));
|
|
31
|
+
} while (false);
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
void EmbeddingWorker::OnOK() {
|
|
35
|
+
auto result = Napi::Object::New(Napi::AsyncWorker::Env());
|
|
36
|
+
auto embedding = Napi::Float32Array::New(Napi::AsyncWorker::Env(),
|
|
37
|
+
_result.embedding.size());
|
|
38
|
+
memcpy(embedding.Data(), _result.embedding.data(),
|
|
39
|
+
_result.embedding.size() * sizeof(float));
|
|
40
|
+
result.Set("embedding", embedding);
|
|
41
|
+
Napi::Promise::Deferred::Resolve(result);
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
void EmbeddingWorker::OnError(const Napi::Error &err) {
|
|
45
|
+
Napi::Promise::Deferred::Reject(err.Value());
|
|
46
|
+
}
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
#include "common.hpp"
|
|
2
|
+
#include <vector>
|
|
3
|
+
|
|
4
|
+
struct EmbeddingResult {
|
|
5
|
+
std::vector<float> embedding;
|
|
6
|
+
};
|
|
7
|
+
|
|
8
|
+
class EmbeddingWorker : public Napi::AsyncWorker,
|
|
9
|
+
public Napi::Promise::Deferred {
|
|
10
|
+
public:
|
|
11
|
+
EmbeddingWorker(const Napi::CallbackInfo &info, LlamaSessionPtr &sess,
|
|
12
|
+
std::string text);
|
|
13
|
+
|
|
14
|
+
protected:
|
|
15
|
+
void Execute();
|
|
16
|
+
void OnOK();
|
|
17
|
+
void OnError(const Napi::Error &err);
|
|
18
|
+
|
|
19
|
+
private:
|
|
20
|
+
LlamaSessionPtr _sess;
|
|
21
|
+
std::string _text;
|
|
22
|
+
EmbeddingResult _result;
|
|
23
|
+
};
|
package/src/LlamaContext.cpp
CHANGED
|
@@ -1,8 +1,11 @@
|
|
|
1
1
|
#include "LlamaContext.h"
|
|
2
|
+
#include "DetokenizeWorker.h"
|
|
2
3
|
#include "DisposeWorker.h"
|
|
4
|
+
#include "EmbeddingWorker.h"
|
|
3
5
|
#include "LlamaCompletionWorker.h"
|
|
4
6
|
#include "LoadSessionWorker.h"
|
|
5
7
|
#include "SaveSessionWorker.h"
|
|
8
|
+
#include "TokenizeWorker.h"
|
|
6
9
|
|
|
7
10
|
void LlamaContext::Init(Napi::Env env, Napi::Object &exports) {
|
|
8
11
|
Napi::Function func = DefineClass(
|
|
@@ -16,6 +19,13 @@ void LlamaContext::Init(Napi::Env env, Napi::Object &exports) {
|
|
|
16
19
|
InstanceMethod<&LlamaContext::StopCompletion>(
|
|
17
20
|
"stopCompletion",
|
|
18
21
|
static_cast<napi_property_attributes>(napi_enumerable)),
|
|
22
|
+
InstanceMethod<&LlamaContext::Tokenize>(
|
|
23
|
+
"tokenize", static_cast<napi_property_attributes>(napi_enumerable)),
|
|
24
|
+
InstanceMethod<&LlamaContext::Detokenize>(
|
|
25
|
+
"detokenize",
|
|
26
|
+
static_cast<napi_property_attributes>(napi_enumerable)),
|
|
27
|
+
InstanceMethod<&LlamaContext::Embedding>(
|
|
28
|
+
"embedding", static_cast<napi_property_attributes>(napi_enumerable)),
|
|
19
29
|
InstanceMethod<&LlamaContext::SaveSession>(
|
|
20
30
|
"saveSession",
|
|
21
31
|
static_cast<napi_property_attributes>(napi_enumerable)),
|
|
@@ -158,6 +168,58 @@ void LlamaContext::StopCompletion(const Napi::CallbackInfo &info) {
|
|
|
158
168
|
}
|
|
159
169
|
}
|
|
160
170
|
|
|
171
|
+
// tokenize(text: string): Promise<TokenizeResult>
|
|
172
|
+
Napi::Value LlamaContext::Tokenize(const Napi::CallbackInfo &info) {
|
|
173
|
+
Napi::Env env = info.Env();
|
|
174
|
+
if (info.Length() < 1 || !info[0].IsString()) {
|
|
175
|
+
Napi::TypeError::New(env, "String expected").ThrowAsJavaScriptException();
|
|
176
|
+
}
|
|
177
|
+
if (_sess == nullptr) {
|
|
178
|
+
Napi::TypeError::New(env, "Context is disposed")
|
|
179
|
+
.ThrowAsJavaScriptException();
|
|
180
|
+
}
|
|
181
|
+
auto text = info[0].ToString().Utf8Value();
|
|
182
|
+
auto *worker = new TokenizeWorker(info, _sess, text);
|
|
183
|
+
worker->Queue();
|
|
184
|
+
return worker->Promise();
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
// detokenize(tokens: number[]): Promise<string>
|
|
188
|
+
Napi::Value LlamaContext::Detokenize(const Napi::CallbackInfo &info) {
|
|
189
|
+
Napi::Env env = info.Env();
|
|
190
|
+
if (info.Length() < 1 || !info[0].IsArray()) {
|
|
191
|
+
Napi::TypeError::New(env, "Array expected").ThrowAsJavaScriptException();
|
|
192
|
+
}
|
|
193
|
+
if (_sess == nullptr) {
|
|
194
|
+
Napi::TypeError::New(env, "Context is disposed")
|
|
195
|
+
.ThrowAsJavaScriptException();
|
|
196
|
+
}
|
|
197
|
+
auto tokens = info[0].As<Napi::Array>();
|
|
198
|
+
std::vector<int32_t> token_ids;
|
|
199
|
+
for (size_t i = 0; i < tokens.Length(); i++) {
|
|
200
|
+
token_ids.push_back(tokens.Get(i).ToNumber().Int32Value());
|
|
201
|
+
}
|
|
202
|
+
auto *worker = new DetokenizeWorker(info, _sess, token_ids);
|
|
203
|
+
worker->Queue();
|
|
204
|
+
return worker->Promise();
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
// embedding(text: string): Promise<EmbeddingResult>
|
|
208
|
+
Napi::Value LlamaContext::Embedding(const Napi::CallbackInfo &info) {
|
|
209
|
+
Napi::Env env = info.Env();
|
|
210
|
+
if (info.Length() < 1 || !info[0].IsString()) {
|
|
211
|
+
Napi::TypeError::New(env, "String expected").ThrowAsJavaScriptException();
|
|
212
|
+
}
|
|
213
|
+
if (_sess == nullptr) {
|
|
214
|
+
Napi::TypeError::New(env, "Context is disposed")
|
|
215
|
+
.ThrowAsJavaScriptException();
|
|
216
|
+
}
|
|
217
|
+
auto text = info[0].ToString().Utf8Value();
|
|
218
|
+
auto *worker = new EmbeddingWorker(info, _sess, text);
|
|
219
|
+
worker->Queue();
|
|
220
|
+
return worker->Promise();
|
|
221
|
+
}
|
|
222
|
+
|
|
161
223
|
// saveSession(path: string): Promise<void> throws error
|
|
162
224
|
Napi::Value LlamaContext::SaveSession(const Napi::CallbackInfo &info) {
|
|
163
225
|
Napi::Env env = info.Env();
|
package/src/LlamaContext.h
CHANGED
|
@@ -11,6 +11,9 @@ private:
|
|
|
11
11
|
Napi::Value GetSystemInfo(const Napi::CallbackInfo &info);
|
|
12
12
|
Napi::Value Completion(const Napi::CallbackInfo &info);
|
|
13
13
|
void StopCompletion(const Napi::CallbackInfo &info);
|
|
14
|
+
Napi::Value Tokenize(const Napi::CallbackInfo &info);
|
|
15
|
+
Napi::Value Detokenize(const Napi::CallbackInfo &info);
|
|
16
|
+
Napi::Value Embedding(const Napi::CallbackInfo &info);
|
|
14
17
|
Napi::Value SaveSession(const Napi::CallbackInfo &info);
|
|
15
18
|
Napi::Value LoadSession(const Napi::CallbackInfo &info);
|
|
16
19
|
Napi::Value Release(const Napi::CallbackInfo &info);
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
#include "TokenizeWorker.h"
|
|
2
|
+
#include "LlamaContext.h"
|
|
3
|
+
|
|
4
|
+
TokenizeWorker::TokenizeWorker(const Napi::CallbackInfo &info,
|
|
5
|
+
LlamaSessionPtr &sess, std::string text)
|
|
6
|
+
: AsyncWorker(info.Env()), Deferred(info.Env()), _sess(sess), _text(text) {}
|
|
7
|
+
|
|
8
|
+
void TokenizeWorker::Execute() {
|
|
9
|
+
const auto tokens = ::llama_tokenize(_sess->context(), _text, false);
|
|
10
|
+
_result = {.tokens = std::move(tokens)};
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
void TokenizeWorker::OnOK() {
|
|
14
|
+
Napi::HandleScope scope(Napi::AsyncWorker::Env());
|
|
15
|
+
auto result = Napi::Object::New(Napi::AsyncWorker::Env());
|
|
16
|
+
auto tokens =
|
|
17
|
+
Napi::Int32Array::New(Napi::AsyncWorker::Env(), _result.tokens.size());
|
|
18
|
+
memcpy(tokens.Data(), _result.tokens.data(),
|
|
19
|
+
_result.tokens.size() * sizeof(llama_token));
|
|
20
|
+
result.Set("tokens", tokens);
|
|
21
|
+
Napi::Promise::Deferred::Resolve(result);
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
void TokenizeWorker::OnError(const Napi::Error &err) {
|
|
25
|
+
Napi::Promise::Deferred::Reject(err.Value());
|
|
26
|
+
}
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
#include "common.hpp"
|
|
2
|
+
#include <vector>
|
|
3
|
+
|
|
4
|
+
struct TokenizeResult {
|
|
5
|
+
std::vector<llama_token> tokens;
|
|
6
|
+
};
|
|
7
|
+
|
|
8
|
+
class TokenizeWorker : public Napi::AsyncWorker,
|
|
9
|
+
public Napi::Promise::Deferred {
|
|
10
|
+
public:
|
|
11
|
+
TokenizeWorker(const Napi::CallbackInfo &info, LlamaSessionPtr &sess,
|
|
12
|
+
std::string text);
|
|
13
|
+
|
|
14
|
+
protected:
|
|
15
|
+
void Execute();
|
|
16
|
+
void OnOK();
|
|
17
|
+
void OnError(const Napi::Error &err);
|
|
18
|
+
|
|
19
|
+
private:
|
|
20
|
+
LlamaSessionPtr _sess;
|
|
21
|
+
std::string _text;
|
|
22
|
+
TokenizeResult _result;
|
|
23
|
+
};
|
package/src/common.hpp
CHANGED
|
@@ -47,7 +47,8 @@ constexpr T get_option(const Napi::Object &options, const std::string &name,
|
|
|
47
47
|
class LlamaSession {
|
|
48
48
|
public:
|
|
49
49
|
LlamaSession(llama_model *model, llama_context *ctx, gpt_params params)
|
|
50
|
-
: model_(LlamaCppModel(model, llama_free_model)),
|
|
50
|
+
: model_(LlamaCppModel(model, llama_free_model)),
|
|
51
|
+
ctx_(LlamaCppContext(ctx, llama_free)), params_(params) {
|
|
51
52
|
tokens_.reserve(params.n_ctx);
|
|
52
53
|
}
|
|
53
54
|
|
|
@@ -57,7 +58,7 @@ public:
|
|
|
57
58
|
|
|
58
59
|
inline llama_model *model() { return model_.get(); }
|
|
59
60
|
|
|
60
|
-
inline std::vector<llama_token
|
|
61
|
+
inline std::vector<llama_token> *tokens_ptr() { return &tokens_; }
|
|
61
62
|
|
|
62
63
|
inline void set_tokens(std::vector<llama_token> tokens) {
|
|
63
64
|
tokens_ = std::move(tokens);
|
|
Binary file
|
package/bin/win32/arm64/node.lib
DELETED
|
Binary file
|
|
Binary file
|
package/bin/win32/x64/node.lib
DELETED
|
Binary file
|