@fugood/llama.node 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CMakeLists.txt CHANGED
@@ -77,6 +77,12 @@ file(
77
77
  "src/LlamaCompletionWorker.h"
78
78
  "src/LlamaContext.cpp"
79
79
  "src/LlamaContext.h"
80
+ "src/TokenizeWorker.cpp"
81
+ "src/TokenizeWorker.h"
82
+ "src/DetokenizeWorker.cpp"
83
+ "src/DetokenizeWorker.h"
84
+ "src/EmbeddingWorker.cpp"
85
+ "src/EmbeddingWorker.h"
80
86
  "src/LoadSessionWorker.cpp"
81
87
  "src/LoadSessionWorker.h"
82
88
  "src/SaveSessionWorker.cpp"
package/README.md CHANGED
@@ -47,7 +47,8 @@ console.log('Result:', text)
47
47
 
48
48
  ## Lib Variants
49
49
 
50
- - [x] `default`: General usage, Supported GPU: Metal (macOS) and Vulkan (Linux / Windows)
50
+ - [x] `default`: General usage, not support GPU except macOS (Metal)
51
+ - [x] `vulkan`: Support GPU Vulkan (Windows/Linux), but some scenario might unstable
51
52
 
52
53
  ## License
53
54
 
Binary file
Binary file
Binary file
Binary file
package/lib/binding.ts CHANGED
@@ -37,11 +37,22 @@ export type LlamaCompletionToken = {
37
37
  token: string
38
38
  }
39
39
 
40
+ export type TokenizeResult = {
41
+ tokens: Int32Array
42
+ }
43
+
44
+ export type EmbeddingResult = {
45
+ embedding: Float32Array
46
+ }
47
+
40
48
  export interface LlamaContext {
41
49
  new (options: LlamaModelOptions): LlamaContext
42
50
  getSystemInfo(): string
43
51
  completion(options: LlamaCompletionOptions, callback?: (token: LlamaCompletionToken) => void): Promise<LlamaCompletionResult>
44
52
  stopCompletion(): void
53
+ tokenize(text: string): Promise<TokenizeResult>
54
+ detokenize(tokens: number[]): Promise<string>
55
+ embedding(text: string): Promise<EmbeddingResult>
45
56
  saveSession(path: string): Promise<void>
46
57
  loadSession(path: string): Promise<void>
47
58
  release(): Promise<void>
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@fugood/llama.node",
3
3
  "access": "public",
4
- "version": "0.1.0",
4
+ "version": "0.2.0",
5
5
  "description": "Llama.cpp for Node.js",
6
6
  "main": "lib/index.js",
7
7
  "scripts": {
@@ -0,0 +1,22 @@
1
+ #include "DetokenizeWorker.h"
2
+ #include "LlamaContext.h"
3
+
4
+ DetokenizeWorker::DetokenizeWorker(const Napi::CallbackInfo &info,
5
+ LlamaSessionPtr &sess,
6
+ std::vector<llama_token> &tokens)
7
+ : AsyncWorker(info.Env()), Deferred(info.Env()), _sess(sess),
8
+ _tokens(std::move(tokens)) {}
9
+
10
+ void DetokenizeWorker::Execute() {
11
+ const auto text = ::llama_detokenize_bpe(_sess->context(), _tokens);
12
+ _text = std::move(text);
13
+ }
14
+
15
+ void DetokenizeWorker::OnOK() {
16
+ Napi::Promise::Deferred::Resolve(
17
+ Napi::String::New(Napi::AsyncWorker::Env(), _text));
18
+ }
19
+
20
+ void DetokenizeWorker::OnError(const Napi::Error &err) {
21
+ Napi::Promise::Deferred::Reject(err.Value());
22
+ }
@@ -0,0 +1,19 @@
1
+ #include "common.hpp"
2
+ #include <vector>
3
+
4
+ class DetokenizeWorker : public Napi::AsyncWorker,
5
+ public Napi::Promise::Deferred {
6
+ public:
7
+ DetokenizeWorker(const Napi::CallbackInfo &info, LlamaSessionPtr &sess,
8
+ std::vector<llama_token> &tokens);
9
+
10
+ protected:
11
+ void Execute();
12
+ void OnOK();
13
+ void OnError(const Napi::Error &err);
14
+
15
+ private:
16
+ LlamaSessionPtr _sess;
17
+ std::vector<llama_token> _tokens;
18
+ std::string _text;
19
+ };
@@ -0,0 +1,46 @@
1
+ #include "EmbeddingWorker.h"
2
+ #include "LlamaContext.h"
3
+
4
+ EmbeddingWorker::EmbeddingWorker(const Napi::CallbackInfo &info,
5
+ LlamaSessionPtr &sess, std::string text)
6
+ : AsyncWorker(info.Env()), Deferred(info.Env()), _sess(sess), _text(text) {}
7
+
8
+ void EmbeddingWorker::Execute() {
9
+ llama_kv_cache_clear(_sess->context());
10
+ auto tokens = ::llama_tokenize(_sess->context(), _text, true);
11
+ // add SEP if not present
12
+ if (tokens.empty() || tokens.back() != llama_token_sep(_sess->model())) {
13
+ tokens.push_back(llama_token_sep(_sess->model()));
14
+ }
15
+ const int n_embd = llama_n_embd(_sess->model());
16
+ do {
17
+ int ret =
18
+ llama_decode(_sess->context(),
19
+ llama_batch_get_one(tokens.data(), tokens.size(), 0, 0));
20
+ if (ret < 0) {
21
+ SetError("Failed to inference, code: " + std::to_string(ret));
22
+ break;
23
+ }
24
+ const float *embd = llama_get_embeddings_seq(_sess->context(), 0);
25
+ if (embd == nullptr) {
26
+ SetError("Failed to get embeddings");
27
+ break;
28
+ }
29
+ _result.embedding.resize(n_embd);
30
+ memcpy(_result.embedding.data(), embd, n_embd * sizeof(float));
31
+ } while (false);
32
+ }
33
+
34
+ void EmbeddingWorker::OnOK() {
35
+ auto result = Napi::Object::New(Napi::AsyncWorker::Env());
36
+ auto embedding = Napi::Float32Array::New(Napi::AsyncWorker::Env(),
37
+ _result.embedding.size());
38
+ memcpy(embedding.Data(), _result.embedding.data(),
39
+ _result.embedding.size() * sizeof(float));
40
+ result.Set("embedding", embedding);
41
+ Napi::Promise::Deferred::Resolve(result);
42
+ }
43
+
44
+ void EmbeddingWorker::OnError(const Napi::Error &err) {
45
+ Napi::Promise::Deferred::Reject(err.Value());
46
+ }
@@ -0,0 +1,23 @@
1
+ #include "common.hpp"
2
+ #include <vector>
3
+
4
+ struct EmbeddingResult {
5
+ std::vector<float> embedding;
6
+ };
7
+
8
+ class EmbeddingWorker : public Napi::AsyncWorker,
9
+ public Napi::Promise::Deferred {
10
+ public:
11
+ EmbeddingWorker(const Napi::CallbackInfo &info, LlamaSessionPtr &sess,
12
+ std::string text);
13
+
14
+ protected:
15
+ void Execute();
16
+ void OnOK();
17
+ void OnError(const Napi::Error &err);
18
+
19
+ private:
20
+ LlamaSessionPtr _sess;
21
+ std::string _text;
22
+ EmbeddingResult _result;
23
+ };
@@ -1,8 +1,11 @@
1
1
  #include "LlamaContext.h"
2
+ #include "DetokenizeWorker.h"
2
3
  #include "DisposeWorker.h"
4
+ #include "EmbeddingWorker.h"
3
5
  #include "LlamaCompletionWorker.h"
4
6
  #include "LoadSessionWorker.h"
5
7
  #include "SaveSessionWorker.h"
8
+ #include "TokenizeWorker.h"
6
9
 
7
10
  void LlamaContext::Init(Napi::Env env, Napi::Object &exports) {
8
11
  Napi::Function func = DefineClass(
@@ -16,6 +19,13 @@ void LlamaContext::Init(Napi::Env env, Napi::Object &exports) {
16
19
  InstanceMethod<&LlamaContext::StopCompletion>(
17
20
  "stopCompletion",
18
21
  static_cast<napi_property_attributes>(napi_enumerable)),
22
+ InstanceMethod<&LlamaContext::Tokenize>(
23
+ "tokenize", static_cast<napi_property_attributes>(napi_enumerable)),
24
+ InstanceMethod<&LlamaContext::Detokenize>(
25
+ "detokenize",
26
+ static_cast<napi_property_attributes>(napi_enumerable)),
27
+ InstanceMethod<&LlamaContext::Embedding>(
28
+ "embedding", static_cast<napi_property_attributes>(napi_enumerable)),
19
29
  InstanceMethod<&LlamaContext::SaveSession>(
20
30
  "saveSession",
21
31
  static_cast<napi_property_attributes>(napi_enumerable)),
@@ -158,6 +168,58 @@ void LlamaContext::StopCompletion(const Napi::CallbackInfo &info) {
158
168
  }
159
169
  }
160
170
 
171
+ // tokenize(text: string): Promise<TokenizeResult>
172
+ Napi::Value LlamaContext::Tokenize(const Napi::CallbackInfo &info) {
173
+ Napi::Env env = info.Env();
174
+ if (info.Length() < 1 || !info[0].IsString()) {
175
+ Napi::TypeError::New(env, "String expected").ThrowAsJavaScriptException();
176
+ }
177
+ if (_sess == nullptr) {
178
+ Napi::TypeError::New(env, "Context is disposed")
179
+ .ThrowAsJavaScriptException();
180
+ }
181
+ auto text = info[0].ToString().Utf8Value();
182
+ auto *worker = new TokenizeWorker(info, _sess, text);
183
+ worker->Queue();
184
+ return worker->Promise();
185
+ }
186
+
187
+ // detokenize(tokens: number[]): Promise<string>
188
+ Napi::Value LlamaContext::Detokenize(const Napi::CallbackInfo &info) {
189
+ Napi::Env env = info.Env();
190
+ if (info.Length() < 1 || !info[0].IsArray()) {
191
+ Napi::TypeError::New(env, "Array expected").ThrowAsJavaScriptException();
192
+ }
193
+ if (_sess == nullptr) {
194
+ Napi::TypeError::New(env, "Context is disposed")
195
+ .ThrowAsJavaScriptException();
196
+ }
197
+ auto tokens = info[0].As<Napi::Array>();
198
+ std::vector<int32_t> token_ids;
199
+ for (size_t i = 0; i < tokens.Length(); i++) {
200
+ token_ids.push_back(tokens.Get(i).ToNumber().Int32Value());
201
+ }
202
+ auto *worker = new DetokenizeWorker(info, _sess, token_ids);
203
+ worker->Queue();
204
+ return worker->Promise();
205
+ }
206
+
207
+ // embedding(text: string): Promise<EmbeddingResult>
208
+ Napi::Value LlamaContext::Embedding(const Napi::CallbackInfo &info) {
209
+ Napi::Env env = info.Env();
210
+ if (info.Length() < 1 || !info[0].IsString()) {
211
+ Napi::TypeError::New(env, "String expected").ThrowAsJavaScriptException();
212
+ }
213
+ if (_sess == nullptr) {
214
+ Napi::TypeError::New(env, "Context is disposed")
215
+ .ThrowAsJavaScriptException();
216
+ }
217
+ auto text = info[0].ToString().Utf8Value();
218
+ auto *worker = new EmbeddingWorker(info, _sess, text);
219
+ worker->Queue();
220
+ return worker->Promise();
221
+ }
222
+
161
223
  // saveSession(path: string): Promise<void> throws error
162
224
  Napi::Value LlamaContext::SaveSession(const Napi::CallbackInfo &info) {
163
225
  Napi::Env env = info.Env();
@@ -11,6 +11,9 @@ private:
11
11
  Napi::Value GetSystemInfo(const Napi::CallbackInfo &info);
12
12
  Napi::Value Completion(const Napi::CallbackInfo &info);
13
13
  void StopCompletion(const Napi::CallbackInfo &info);
14
+ Napi::Value Tokenize(const Napi::CallbackInfo &info);
15
+ Napi::Value Detokenize(const Napi::CallbackInfo &info);
16
+ Napi::Value Embedding(const Napi::CallbackInfo &info);
14
17
  Napi::Value SaveSession(const Napi::CallbackInfo &info);
15
18
  Napi::Value LoadSession(const Napi::CallbackInfo &info);
16
19
  Napi::Value Release(const Napi::CallbackInfo &info);
@@ -0,0 +1,26 @@
1
+ #include "TokenizeWorker.h"
2
+ #include "LlamaContext.h"
3
+
4
+ TokenizeWorker::TokenizeWorker(const Napi::CallbackInfo &info,
5
+ LlamaSessionPtr &sess, std::string text)
6
+ : AsyncWorker(info.Env()), Deferred(info.Env()), _sess(sess), _text(text) {}
7
+
8
+ void TokenizeWorker::Execute() {
9
+ const auto tokens = ::llama_tokenize(_sess->context(), _text, false);
10
+ _result = {.tokens = std::move(tokens)};
11
+ }
12
+
13
+ void TokenizeWorker::OnOK() {
14
+ Napi::HandleScope scope(Napi::AsyncWorker::Env());
15
+ auto result = Napi::Object::New(Napi::AsyncWorker::Env());
16
+ auto tokens =
17
+ Napi::Int32Array::New(Napi::AsyncWorker::Env(), _result.tokens.size());
18
+ memcpy(tokens.Data(), _result.tokens.data(),
19
+ _result.tokens.size() * sizeof(llama_token));
20
+ result.Set("tokens", tokens);
21
+ Napi::Promise::Deferred::Resolve(result);
22
+ }
23
+
24
+ void TokenizeWorker::OnError(const Napi::Error &err) {
25
+ Napi::Promise::Deferred::Reject(err.Value());
26
+ }
@@ -0,0 +1,23 @@
1
+ #include "common.hpp"
2
+ #include <vector>
3
+
4
+ struct TokenizeResult {
5
+ std::vector<llama_token> tokens;
6
+ };
7
+
8
+ class TokenizeWorker : public Napi::AsyncWorker,
9
+ public Napi::Promise::Deferred {
10
+ public:
11
+ TokenizeWorker(const Napi::CallbackInfo &info, LlamaSessionPtr &sess,
12
+ std::string text);
13
+
14
+ protected:
15
+ void Execute();
16
+ void OnOK();
17
+ void OnError(const Napi::Error &err);
18
+
19
+ private:
20
+ LlamaSessionPtr _sess;
21
+ std::string _text;
22
+ TokenizeResult _result;
23
+ };
package/src/common.hpp CHANGED
@@ -47,7 +47,8 @@ constexpr T get_option(const Napi::Object &options, const std::string &name,
47
47
  class LlamaSession {
48
48
  public:
49
49
  LlamaSession(llama_model *model, llama_context *ctx, gpt_params params)
50
- : model_(LlamaCppModel(model, llama_free_model)), ctx_(LlamaCppContext(ctx, llama_free)), params_(params) {
50
+ : model_(LlamaCppModel(model, llama_free_model)),
51
+ ctx_(LlamaCppContext(ctx, llama_free)), params_(params) {
51
52
  tokens_.reserve(params.n_ctx);
52
53
  }
53
54
 
@@ -57,7 +58,7 @@ public:
57
58
 
58
59
  inline llama_model *model() { return model_.get(); }
59
60
 
60
- inline std::vector<llama_token>* tokens_ptr() { return &tokens_; }
61
+ inline std::vector<llama_token> *tokens_ptr() { return &tokens_; }
61
62
 
62
63
  inline void set_tokens(std::vector<llama_token> tokens) {
63
64
  tokens_ = std::move(tokens);
Binary file
Binary file
Binary file
Binary file