npm - @fugood/llama.node - Versions diffs - 0.1.0 → 0.2.0 - Mend

@fugood/llama.node 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (23) hide show

package/CMakeLists.txt +6 -0
package/README.md +2 -1
package/bin/darwin/arm64/llama-node.node +0 -0
package/bin/darwin/x64/llama-node.node +0 -0
package/bin/linux/arm64/llama-node.node +0 -0
package/bin/linux/x64/llama-node.node +0 -0
package/bin/linux-vulkan/arm64/llama-node.node +0 -0
package/bin/linux-vulkan/x64/llama-node.node +0 -0
package/lib/binding.ts +11 -0
package/package.json +1 -1
package/src/DetokenizeWorker.cpp +22 -0
package/src/DetokenizeWorker.h +19 -0
package/src/EmbeddingWorker.cpp +46 -0
package/src/EmbeddingWorker.h +23 -0
package/src/LlamaContext.cpp +62 -0
package/src/LlamaContext.h +3 -0
package/src/TokenizeWorker.cpp +26 -0
package/src/TokenizeWorker.h +23 -0
package/src/common.hpp +3 -2
package/bin/win32/arm64/llama-node.node +0 -0
package/bin/win32/arm64/node.lib +0 -0
package/bin/win32/x64/llama-node.node +0 -0
package/bin/win32/x64/node.lib +0 -0

package/CMakeLists.txt CHANGED Viewed

@@ -77,6 +77,12 @@ file(
     "src/LlamaCompletionWorker.h"
     "src/LlamaContext.cpp"
     "src/LlamaContext.h"
+    "src/TokenizeWorker.cpp"
+    "src/TokenizeWorker.h"
+    "src/DetokenizeWorker.cpp"
+    "src/DetokenizeWorker.h"
+    "src/EmbeddingWorker.cpp"
+    "src/EmbeddingWorker.h"
     "src/LoadSessionWorker.cpp"
     "src/LoadSessionWorker.h"
     "src/SaveSessionWorker.cpp"

package/README.md CHANGED Viewed

@@ -47,7 +47,8 @@ console.log('Result:', text)
 ## Lib Variants
-- [x] `default`: General usage, Supported GPU: Metal (macOS) and Vulkan (Linux / Windows)
+- [x] `default`: General usage, not support GPU except macOS (Metal)
+- [x] `vulkan`: Support GPU Vulkan (Windows/Linux), but some scenario might unstable
 ## License

package/bin/darwin/arm64/llama-node.node CHANGED Viewed

Binary file

package/bin/darwin/x64/llama-node.node CHANGED Viewed

Binary file

package/bin/linux/arm64/llama-node.node CHANGED Viewed

Binary file

package/bin/linux/x64/llama-node.node CHANGED Viewed

Binary file

package/bin/linux-vulkan/arm64/llama-node.node ADDED Viewed

Binary file

package/bin/linux-vulkan/x64/llama-node.node ADDED Viewed

Binary file

package/lib/binding.ts CHANGED Viewed

@@ -37,11 +37,22 @@ export type LlamaCompletionToken = {
   token: string
 }
+export type TokenizeResult = {
+  tokens: Int32Array
+}
+export type EmbeddingResult = {
+  embedding: Float32Array
+}
 export interface LlamaContext {
   new (options: LlamaModelOptions): LlamaContext
   getSystemInfo(): string
   completion(options: LlamaCompletionOptions, callback?: (token: LlamaCompletionToken) => void): Promise<LlamaCompletionResult>
   stopCompletion(): void
+  tokenize(text: string): Promise<TokenizeResult>
+  detokenize(tokens: number[]): Promise<string>
+  embedding(text: string): Promise<EmbeddingResult>
   saveSession(path: string): Promise<void>
   loadSession(path: string): Promise<void>
   release(): Promise<void>

package/package.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "name": "@fugood/llama.node",
   "access": "public",
-  "version": "0.1.0",
+  "version": "0.2.0",
   "description": "Llama.cpp for Node.js",
   "main": "lib/index.js",
   "scripts": {

package/src/DetokenizeWorker.cpp ADDED Viewed

@@ -0,0 +1,22 @@
+#include "DetokenizeWorker.h"
+#include "LlamaContext.h"
+DetokenizeWorker::DetokenizeWorker(const Napi::CallbackInfo &info,
+                                   LlamaSessionPtr &sess,
+                                   std::vector<llama_token> &tokens)
+    : AsyncWorker(info.Env()), Deferred(info.Env()), _sess(sess),
+      _tokens(std::move(tokens)) {}
+void DetokenizeWorker::Execute() {
+  const auto text = ::llama_detokenize_bpe(_sess->context(), _tokens);
+  _text = std::move(text);
+}
+void DetokenizeWorker::OnOK() {
+  Napi::Promise::Deferred::Resolve(
+      Napi::String::New(Napi::AsyncWorker::Env(), _text));
+}
+void DetokenizeWorker::OnError(const Napi::Error &err) {
+  Napi::Promise::Deferred::Reject(err.Value());
+}

package/src/DetokenizeWorker.h ADDED Viewed

@@ -0,0 +1,19 @@
+#include "common.hpp"
+#include <vector>
+class DetokenizeWorker : public Napi::AsyncWorker,
+                         public Napi::Promise::Deferred {
+public:
+  DetokenizeWorker(const Napi::CallbackInfo &info, LlamaSessionPtr &sess,
+                   std::vector<llama_token> &tokens);
+protected:
+  void Execute();
+  void OnOK();
+  void OnError(const Napi::Error &err);
+private:
+  LlamaSessionPtr _sess;
+  std::vector<llama_token> _tokens;
+  std::string _text;
+};

package/src/EmbeddingWorker.cpp ADDED Viewed

@@ -0,0 +1,46 @@
+#include "EmbeddingWorker.h"
+#include "LlamaContext.h"
+EmbeddingWorker::EmbeddingWorker(const Napi::CallbackInfo &info,
+                                 LlamaSessionPtr &sess, std::string text)
+    : AsyncWorker(info.Env()), Deferred(info.Env()), _sess(sess), _text(text) {}
+void EmbeddingWorker::Execute() {
+  llama_kv_cache_clear(_sess->context());
+  auto tokens = ::llama_tokenize(_sess->context(), _text, true);
+  // add SEP if not present
+  if (tokens.empty() || tokens.back() != llama_token_sep(_sess->model())) {
+    tokens.push_back(llama_token_sep(_sess->model()));
+  }
+  const int n_embd = llama_n_embd(_sess->model());
+  do {
+    int ret =
+        llama_decode(_sess->context(),
+                     llama_batch_get_one(tokens.data(), tokens.size(), 0, 0));
+    if (ret < 0) {
+      SetError("Failed to inference, code: " + std::to_string(ret));
+      break;
+    }
+    const float *embd = llama_get_embeddings_seq(_sess->context(), 0);
+    if (embd == nullptr) {
+      SetError("Failed to get embeddings");
+      break;
+    }
+    _result.embedding.resize(n_embd);
+    memcpy(_result.embedding.data(), embd, n_embd * sizeof(float));
+  } while (false);
+}
+void EmbeddingWorker::OnOK() {
+  auto result = Napi::Object::New(Napi::AsyncWorker::Env());
+  auto embedding = Napi::Float32Array::New(Napi::AsyncWorker::Env(),
+                                           _result.embedding.size());
+  memcpy(embedding.Data(), _result.embedding.data(),
+         _result.embedding.size() * sizeof(float));
+  result.Set("embedding", embedding);
+  Napi::Promise::Deferred::Resolve(result);
+}
+void EmbeddingWorker::OnError(const Napi::Error &err) {
+  Napi::Promise::Deferred::Reject(err.Value());
+}

package/src/EmbeddingWorker.h ADDED Viewed

@@ -0,0 +1,23 @@
+#include "common.hpp"
+#include <vector>
+struct EmbeddingResult {
+  std::vector<float> embedding;
+};
+class EmbeddingWorker : public Napi::AsyncWorker,
+                        public Napi::Promise::Deferred {
+public:
+  EmbeddingWorker(const Napi::CallbackInfo &info, LlamaSessionPtr &sess,
+                  std::string text);
+protected:
+  void Execute();
+  void OnOK();
+  void OnError(const Napi::Error &err);
+private:
+  LlamaSessionPtr _sess;
+  std::string _text;
+  EmbeddingResult _result;
+};

package/src/LlamaContext.cpp CHANGED Viewed

@@ -1,8 +1,11 @@
 #include "LlamaContext.h"
+#include "DetokenizeWorker.h"
 #include "DisposeWorker.h"
+#include "EmbeddingWorker.h"
 #include "LlamaCompletionWorker.h"
 #include "LoadSessionWorker.h"
 #include "SaveSessionWorker.h"
+#include "TokenizeWorker.h"
 void LlamaContext::Init(Napi::Env env, Napi::Object &exports) {
   Napi::Function func = DefineClass(
@@ -16,6 +19,13 @@ void LlamaContext::Init(Napi::Env env, Napi::Object &exports) {
        InstanceMethod<&LlamaContext::StopCompletion>(
            "stopCompletion",
            static_cast<napi_property_attributes>(napi_enumerable)),
+       InstanceMethod<&LlamaContext::Tokenize>(
+           "tokenize", static_cast<napi_property_attributes>(napi_enumerable)),
+       InstanceMethod<&LlamaContext::Detokenize>(
+           "detokenize",
+           static_cast<napi_property_attributes>(napi_enumerable)),
+       InstanceMethod<&LlamaContext::Embedding>(
+           "embedding", static_cast<napi_property_attributes>(napi_enumerable)),
        InstanceMethod<&LlamaContext::SaveSession>(
            "saveSession",
            static_cast<napi_property_attributes>(napi_enumerable)),
@@ -158,6 +168,58 @@ void LlamaContext::StopCompletion(const Napi::CallbackInfo &info) {
   }
 }
+// tokenize(text: string): Promise<TokenizeResult>
+Napi::Value LlamaContext::Tokenize(const Napi::CallbackInfo &info) {
+  Napi::Env env = info.Env();
+  if (info.Length() < 1 || !info[0].IsString()) {
+    Napi::TypeError::New(env, "String expected").ThrowAsJavaScriptException();
+  }
+  if (_sess == nullptr) {
+    Napi::TypeError::New(env, "Context is disposed")
+        .ThrowAsJavaScriptException();
+  }
+  auto text = info[0].ToString().Utf8Value();
+  auto *worker = new TokenizeWorker(info, _sess, text);
+  worker->Queue();
+  return worker->Promise();
+}
+// detokenize(tokens: number[]): Promise<string>
+Napi::Value LlamaContext::Detokenize(const Napi::CallbackInfo &info) {
+  Napi::Env env = info.Env();
+  if (info.Length() < 1 || !info[0].IsArray()) {
+    Napi::TypeError::New(env, "Array expected").ThrowAsJavaScriptException();
+  }
+  if (_sess == nullptr) {
+    Napi::TypeError::New(env, "Context is disposed")
+        .ThrowAsJavaScriptException();
+  }
+  auto tokens = info[0].As<Napi::Array>();
+  std::vector<int32_t> token_ids;
+  for (size_t i = 0; i < tokens.Length(); i++) {
+    token_ids.push_back(tokens.Get(i).ToNumber().Int32Value());
+  }
+  auto *worker = new DetokenizeWorker(info, _sess, token_ids);
+  worker->Queue();
+  return worker->Promise();
+}
+// embedding(text: string): Promise<EmbeddingResult>
+Napi::Value LlamaContext::Embedding(const Napi::CallbackInfo &info) {
+  Napi::Env env = info.Env();
+  if (info.Length() < 1 || !info[0].IsString()) {
+    Napi::TypeError::New(env, "String expected").ThrowAsJavaScriptException();
+  }
+  if (_sess == nullptr) {
+    Napi::TypeError::New(env, "Context is disposed")
+        .ThrowAsJavaScriptException();
+  }
+  auto text = info[0].ToString().Utf8Value();
+  auto *worker = new EmbeddingWorker(info, _sess, text);
+  worker->Queue();
+  return worker->Promise();
+}
 // saveSession(path: string): Promise<void> throws error
 Napi::Value LlamaContext::SaveSession(const Napi::CallbackInfo &info) {
   Napi::Env env = info.Env();

package/src/LlamaContext.h CHANGED Viewed

@@ -11,6 +11,9 @@ private:
   Napi::Value GetSystemInfo(const Napi::CallbackInfo &info);
   Napi::Value Completion(const Napi::CallbackInfo &info);
   void StopCompletion(const Napi::CallbackInfo &info);
+  Napi::Value Tokenize(const Napi::CallbackInfo &info);
+  Napi::Value Detokenize(const Napi::CallbackInfo &info);
+  Napi::Value Embedding(const Napi::CallbackInfo &info);
   Napi::Value SaveSession(const Napi::CallbackInfo &info);
   Napi::Value LoadSession(const Napi::CallbackInfo &info);
   Napi::Value Release(const Napi::CallbackInfo &info);

package/src/TokenizeWorker.cpp ADDED Viewed

@@ -0,0 +1,26 @@
+#include "TokenizeWorker.h"
+#include "LlamaContext.h"
+TokenizeWorker::TokenizeWorker(const Napi::CallbackInfo &info,
+                               LlamaSessionPtr &sess, std::string text)
+    : AsyncWorker(info.Env()), Deferred(info.Env()), _sess(sess), _text(text) {}
+void TokenizeWorker::Execute() {
+  const auto tokens = ::llama_tokenize(_sess->context(), _text, false);
+  _result = {.tokens = std::move(tokens)};
+}
+void TokenizeWorker::OnOK() {
+  Napi::HandleScope scope(Napi::AsyncWorker::Env());
+  auto result = Napi::Object::New(Napi::AsyncWorker::Env());
+  auto tokens =
+      Napi::Int32Array::New(Napi::AsyncWorker::Env(), _result.tokens.size());
+  memcpy(tokens.Data(), _result.tokens.data(),
+         _result.tokens.size() * sizeof(llama_token));
+  result.Set("tokens", tokens);
+  Napi::Promise::Deferred::Resolve(result);
+}
+void TokenizeWorker::OnError(const Napi::Error &err) {
+  Napi::Promise::Deferred::Reject(err.Value());
+}

package/src/TokenizeWorker.h ADDED Viewed

@@ -0,0 +1,23 @@
+#include "common.hpp"
+#include <vector>
+struct TokenizeResult {
+  std::vector<llama_token> tokens;
+};
+class TokenizeWorker : public Napi::AsyncWorker,
+                       public Napi::Promise::Deferred {
+public:
+  TokenizeWorker(const Napi::CallbackInfo &info, LlamaSessionPtr &sess,
+                 std::string text);
+protected:
+  void Execute();
+  void OnOK();
+  void OnError(const Napi::Error &err);
+private:
+  LlamaSessionPtr _sess;
+  std::string _text;
+  TokenizeResult _result;
+};

package/src/common.hpp CHANGED Viewed

@@ -47,7 +47,8 @@ constexpr T get_option(const Napi::Object &options, const std::string &name,
 class LlamaSession {
 public:
   LlamaSession(llama_model *model, llama_context *ctx, gpt_params params)
-      : model_(LlamaCppModel(model, llama_free_model)), ctx_(LlamaCppContext(ctx, llama_free)), params_(params) {
+      : model_(LlamaCppModel(model, llama_free_model)),
+        ctx_(LlamaCppContext(ctx, llama_free)), params_(params) {
     tokens_.reserve(params.n_ctx);
   }
@@ -57,7 +58,7 @@ public:
   inline llama_model *model() { return model_.get(); }
-  inline std::vector<llama_token>* tokens_ptr() { return &tokens_; }
+  inline std::vector<llama_token> *tokens_ptr() { return &tokens_; }
   inline void set_tokens(std::vector<llama_token> tokens) {
     tokens_ = std::move(tokens);

package/bin/win32/arm64/llama-node.node DELETED Viewed

Binary file

package/bin/win32/arm64/node.lib DELETED Viewed

Binary file

package/bin/win32/x64/llama-node.node DELETED Viewed

Binary file

package/bin/win32/x64/node.lib DELETED Viewed

Binary file