npm - @fugood/llama.node - Versions diffs - 0.4.7 → 0.6.0 - Mend

@fugood/llama.node 0.4.7 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (98) hide show

package/CMakeLists.txt +4 -0
package/bin/darwin/arm64/llama-node.node +0 -0
package/bin/darwin/x64/llama-node.node +0 -0
package/bin/linux/arm64/llama-node.node +0 -0
package/bin/linux/x64/llama-node.node +0 -0
package/bin/linux-cuda/arm64/llama-node.node +0 -0
package/bin/linux-cuda/x64/llama-node.node +0 -0
package/bin/linux-vulkan/arm64/llama-node.node +0 -0
package/bin/linux-vulkan/x64/llama-node.node +0 -0
package/lib/binding.ts +66 -6
package/lib/index.js +59 -17
package/lib/index.ts +74 -23
package/package.json +1 -1
package/src/DecodeAudioTokenWorker.cpp +40 -0
package/src/DecodeAudioTokenWorker.h +22 -0
package/src/EmbeddingWorker.cpp +7 -5
package/src/LlamaCompletionWorker.cpp +68 -54
package/src/LlamaCompletionWorker.h +7 -8
package/src/LlamaContext.cpp +551 -235
package/src/LlamaContext.h +26 -4
package/src/LoadSessionWorker.cpp +4 -2
package/src/SaveSessionWorker.cpp +10 -6
package/src/TokenizeWorker.cpp +23 -14
package/src/TokenizeWorker.h +2 -2
package/src/addons.cc +8 -11
package/src/common.hpp +129 -126
package/src/llama.cpp/.github/workflows/build.yml +2 -2
package/src/llama.cpp/.github/workflows/release.yml +152 -129
package/src/llama.cpp/.github/workflows/winget.yml +42 -0
package/src/llama.cpp/common/arg.cpp +14 -13
package/src/llama.cpp/common/common.cpp +4 -75
package/src/llama.cpp/common/common.h +7 -12
package/src/llama.cpp/examples/lookahead/lookahead.cpp +0 -13
package/src/llama.cpp/examples/lookup/lookup.cpp +0 -11
package/src/llama.cpp/examples/parallel/parallel.cpp +0 -9
package/src/llama.cpp/examples/retrieval/retrieval.cpp +6 -6
package/src/llama.cpp/examples/simple/simple.cpp +1 -1
package/src/llama.cpp/examples/simple-chat/simple-chat.cpp +2 -2
package/src/llama.cpp/examples/sycl/run-llama2.sh +4 -4
package/src/llama.cpp/examples/sycl/run-llama3.sh +28 -0
package/src/llama.cpp/examples/sycl/win-run-llama2.bat +1 -1
package/src/llama.cpp/examples/sycl/win-run-llama3.bat +9 -0
package/src/llama.cpp/ggml/include/ggml-opt.h +2 -0
package/src/llama.cpp/ggml/include/ggml.h +11 -0
package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.cpp +274 -0
package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.h +27 -0
package/src/llama.cpp/ggml/src/ggml-cann/ggml-cann.cpp +18 -2
package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +1 -0
package/src/llama.cpp/ggml/src/ggml-cpu/ops.cpp +107 -0
package/src/llama.cpp/ggml/src/ggml-cpu/vec.h +16 -0
package/src/llama.cpp/ggml/src/ggml-musa/CMakeLists.txt +8 -2
package/src/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +315 -155
package/src/llama.cpp/ggml/src/ggml-opt.cpp +5 -0
package/src/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +43 -12
package/src/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +171 -112
package/src/llama.cpp/ggml/src/ggml.c +64 -18
package/src/llama.cpp/include/llama.h +24 -124
package/src/llama.cpp/requirements/requirements-convert_hf_to_gguf.txt +5 -1
package/src/llama.cpp/requirements/requirements-convert_hf_to_gguf_update.txt +5 -1
package/src/llama.cpp/requirements/requirements-convert_lora_to_gguf.txt +2 -0
package/src/llama.cpp/src/llama-batch.cpp +3 -1
package/src/llama.cpp/src/llama-context.cpp +60 -110
package/src/llama.cpp/src/llama-graph.cpp +137 -233
package/src/llama.cpp/src/llama-graph.h +49 -7
package/src/llama.cpp/src/llama-hparams.cpp +17 -1
package/src/llama.cpp/src/llama-hparams.h +34 -5
package/src/llama.cpp/src/llama-kv-cache.cpp +654 -321
package/src/llama.cpp/src/llama-kv-cache.h +201 -85
package/src/llama.cpp/src/llama-memory.h +3 -2
package/src/llama.cpp/src/llama-model.cpp +273 -94
package/src/llama.cpp/src/llama-model.h +4 -1
package/src/llama.cpp/tests/test-arg-parser.cpp +1 -1
package/src/llama.cpp/tools/llama-bench/llama-bench.cpp +1 -0
package/src/llama.cpp/tools/mtmd/CMakeLists.txt +13 -2
package/src/llama.cpp/tools/mtmd/clip-impl.h +108 -11
package/src/llama.cpp/tools/mtmd/clip.cpp +466 -88
package/src/llama.cpp/tools/mtmd/clip.h +6 -4
package/src/llama.cpp/tools/mtmd/miniaudio.h +93468 -0
package/src/llama.cpp/tools/mtmd/mtmd-audio.cpp +855 -0
package/src/llama.cpp/tools/mtmd/mtmd-audio.h +62 -0
package/src/llama.cpp/tools/mtmd/mtmd-cli.cpp +21 -14
package/src/llama.cpp/tools/mtmd/mtmd-helper.cpp +36 -49
package/src/llama.cpp/tools/mtmd/mtmd.cpp +362 -98
package/src/llama.cpp/tools/mtmd/mtmd.h +52 -21
package/src/llama.cpp/tools/run/run.cpp +2 -2
package/src/llama.cpp/tools/server/server.cpp +158 -47
package/src/llama.cpp/tools/server/utils.hpp +71 -43
package/src/llama.cpp/tools/tts/tts.cpp +4 -2
package/src/tts_utils.cpp +342 -0
package/src/tts_utils.h +62 -0
package/bin/win32/arm64/llama-node.node +0 -0
package/bin/win32/arm64/node.lib +0 -0
package/bin/win32/x64/llama-node.node +0 -0
package/bin/win32/x64/node.lib +0 -0
package/bin/win32-vulkan/arm64/llama-node.node +0 -0
package/bin/win32-vulkan/arm64/node.lib +0 -0
package/bin/win32-vulkan/x64/llama-node.node +0 -0
package/bin/win32-vulkan/x64/node.lib +0 -0

package/src/LlamaContext.h CHANGED Viewed

@@ -1,15 +1,22 @@
 #include "common.hpp"
-#include "tools/mtmd/mtmd.h"
 #include "tools/mtmd/clip.h"
+#include "tools/mtmd/mtmd.h"
+#include "tts_utils.h"
 class LlamaCompletionWorker;
+struct vocoder_context {
+  common_params params;
+  std::shared_ptr<llama_model> model;
+  std::shared_ptr<llama_context> context;
+};
 class LlamaContext : public Napi::ObjectWrap<LlamaContext> {
 public:
   LlamaContext(const Napi::CallbackInfo &info);
   ~LlamaContext();
   static void ToggleNativeLog(const Napi::CallbackInfo &info);
-  static Napi::Value ModelInfo(const Napi::CallbackInfo& info);
+  static Napi::Value ModelInfo(const Napi::CallbackInfo &info);
   static void Init(Napi::Env env, Napi::Object &exports);
 private:
@@ -27,20 +34,35 @@ private:
   void RemoveLoraAdapters(const Napi::CallbackInfo &info);
   Napi::Value GetLoadedLoraAdapters(const Napi::CallbackInfo &info);
   Napi::Value Release(const Napi::CallbackInfo &info);
   // Multimodal methods
   Napi::Value InitMultimodal(const Napi::CallbackInfo &info);
   Napi::Value IsMultimodalEnabled(const Napi::CallbackInfo &info);
+  Napi::Value GetMultimodalSupport(const Napi::CallbackInfo &info);
   void ReleaseMultimodal(const Napi::CallbackInfo &info);
+  // TTS methods
+  tts_type getTTSType(Napi::Env env, nlohmann::json speaker = nullptr);
+  Napi::Value InitVocoder(const Napi::CallbackInfo &info);
+  void ReleaseVocoder(const Napi::CallbackInfo &info);
+  Napi::Value IsVocoderEnabled(const Napi::CallbackInfo &info);
+  Napi::Value GetFormattedAudioCompletion(const Napi::CallbackInfo &info);
+  Napi::Value GetAudioCompletionGuideTokens(const Napi::CallbackInfo &info);
+  Napi::Value DecodeAudioTokens(const Napi::CallbackInfo &info);
   std::string _info;
   Napi::Object _meta;
   LlamaSessionPtr _sess = nullptr;
   common_chat_templates_ptr _templates;
   std::vector<common_adapter_lora_info> _lora;
   LlamaCompletionWorker *_wip = nullptr;
   // Multimodal support
   mtmd_context *_mtmd_ctx = nullptr;
   bool _has_multimodal = false;
+  // Vocoder support
+  tts_type _tts_type = UNKNOWN;
+  vocoder_context _vocoder;
+  bool _has_vocoder = false;
 };

package/src/LoadSessionWorker.cpp CHANGED Viewed

@@ -12,8 +12,10 @@ void LoadSessionWorker::Execute() {
   std::vector<llama_token> tokens;
   tokens.reserve(_sess->params().n_ctx);
-  // Find LLAMA_TOKEN_NULL in the tokens and resize the array to the index of the null token
-  auto null_token_iter = std::find(tokens.begin(), tokens.end(), LLAMA_TOKEN_NULL);
+  // Find LLAMA_TOKEN_NULL in the tokens and resize the array to the index of
+  // the null token
+  auto null_token_iter =
+      std::find(tokens.begin(), tokens.end(), LLAMA_TOKEN_NULL);
   if (null_token_iter != tokens.end()) {
     tokens.resize(std::distance(tokens.begin(), null_token_iter));
   }

package/src/SaveSessionWorker.cpp CHANGED Viewed

@@ -9,16 +9,20 @@ SaveSessionWorker::SaveSessionWorker(const Napi::CallbackInfo &info,
 void SaveSessionWorker::Execute() {
   _sess->get_mutex().lock();
   auto tokens = _sess->tokens_ptr();
-  auto tokens_to_save = std::vector<llama_token>(tokens->begin(), tokens->end());
+  auto tokens_to_save =
+      std::vector<llama_token>(tokens->begin(), tokens->end());
-  // Find LLAMA_TOKEN_NULL in the tokens and resize the array to the index of the null token
-  auto null_token_iter = std::find(tokens_to_save.begin(), tokens_to_save.end(), LLAMA_TOKEN_NULL);
+  // Find LLAMA_TOKEN_NULL in the tokens and resize the array to the index of
+  // the null token
+  auto null_token_iter =
+      std::find(tokens_to_save.begin(), tokens_to_save.end(), LLAMA_TOKEN_NULL);
   if (null_token_iter != tokens_to_save.end()) {
-    tokens_to_save.resize(std::distance(tokens_to_save.begin(), null_token_iter));
+    tokens_to_save.resize(
+        std::distance(tokens_to_save.begin(), null_token_iter));
   }
-  if (!llama_state_save_file(_sess->context(), _path.c_str(), tokens_to_save.data(),
-                             tokens_to_save.size())) {
+  if (!llama_state_save_file(_sess->context(), _path.c_str(),
+                             tokens_to_save.data(), tokens_to_save.size())) {
     SetError("Failed to save session");
   }
   _sess->get_mutex().unlock();

package/src/TokenizeWorker.cpp CHANGED Viewed

@@ -2,17 +2,24 @@
 #include "LlamaContext.h"
 TokenizeWorker::TokenizeWorker(const Napi::CallbackInfo &info,
-                               LlamaSessionPtr &sess, std::string text, std::vector<std::string> image_paths)
-    : AsyncWorker(info.Env()), Deferred(info.Env()), _sess(sess), _text(text), _image_paths(image_paths) {}
+                               LlamaSessionPtr &sess, std::string text,
+                               std::vector<std::string> media_paths)
+    : AsyncWorker(info.Env()), Deferred(info.Env()), _sess(sess), _text(text),
+      _media_paths(media_paths) {}
 void TokenizeWorker::Execute() {
   auto mtmd_ctx = _sess->get_mtmd_ctx();
-  if (!_image_paths.empty()) {
-    _result = tokenizeWithImages(mtmd_ctx, _text, _image_paths);
+  if (!_media_paths.empty()) {
+    try {
+      _result = tokenizeWithMedia(mtmd_ctx, _text, _media_paths);
+      mtmd_input_chunks_free(_result.chunks);
+    } catch (const std::exception &e) {
+      SetError(e.what());
+    }
   } else {
     const auto tokens = common_tokenize(_sess->context(), _text, false);
     _result.tokens = tokens;
-    _result.has_image = false;
+    _result.has_media = false;
   }
 }
@@ -24,24 +31,26 @@ void TokenizeWorker::OnOK() {
   memcpy(tokens.Data(), _result.tokens.data(),
          _result.tokens.size() * sizeof(llama_token));
   result.Set("tokens", tokens);
-  if (_result.has_image) {
-    result.Set("has_image", _result.has_image);
-    auto bitmap_hashes = Napi::Array::New(Napi::AsyncWorker::Env(), _result.bitmap_hashes.size());
+  result.Set("has_media", _result.has_media);
+  if (_result.has_media) {
+    auto bitmap_hashes = Napi::Array::New(Napi::AsyncWorker::Env(),
+                                          _result.bitmap_hashes.size());
     for (size_t i = 0; i < _result.bitmap_hashes.size(); i++) {
       bitmap_hashes.Set(i, _result.bitmap_hashes[i]);
     }
     result.Set("bitmap_hashes", bitmap_hashes);
-    auto chunk_pos = Napi::Array::New(Napi::AsyncWorker::Env(), _result.chunk_pos.size());
+    auto chunk_pos =
+        Napi::Array::New(Napi::AsyncWorker::Env(), _result.chunk_pos.size());
     for (size_t i = 0; i < _result.chunk_pos.size(); i++) {
       chunk_pos.Set(i, _result.chunk_pos[i]);
     }
     result.Set("chunk_pos", chunk_pos);
-    auto chunk_pos_images = Napi::Array::New(Napi::AsyncWorker::Env(), _result.chunk_pos_images.size());
-    for (size_t i = 0; i < _result.chunk_pos_images.size(); i++) {
-      chunk_pos_images.Set(i, _result.chunk_pos_images[i]);
+    auto chunk_pos_media = Napi::Array::New(Napi::AsyncWorker::Env(),
+                                            _result.chunk_pos_media.size());
+    for (size_t i = 0; i < _result.chunk_pos_media.size(); i++) {
+      chunk_pos_media.Set(i, _result.chunk_pos_media[i]);
     }
-    result.Set("chunk_pos_images", chunk_pos_images);
+    result.Set("chunk_pos_media", chunk_pos_media);
   }
   Napi::Promise::Deferred::Resolve(result);
 }

package/src/TokenizeWorker.h CHANGED Viewed

@@ -5,7 +5,7 @@ class TokenizeWorker : public Napi::AsyncWorker,
                        public Napi::Promise::Deferred {
 public:
   TokenizeWorker(const Napi::CallbackInfo &info, LlamaSessionPtr &sess,
-                 std::string text, std::vector<std::string> image_paths);
+                 std::string text, std::vector<std::string> media_paths);
 protected:
   void Execute();
@@ -15,6 +15,6 @@ protected:
 private:
   LlamaSessionPtr _sess;
   std::string _text;
-  std::vector<std::string> _image_paths;
+  std::vector<std::string> _media_paths;
   TokenizeResult _result;
 };

package/src/addons.cc CHANGED Viewed

@@ -5,25 +5,22 @@
 extern "C" void cleanup_logging();
 // Register cleanup function on module unload
-static Napi::Value register_cleanup(const Napi::CallbackInfo& info) {
-  napi_add_env_cleanup_hook(info.Env(), [](void*) {
-    cleanup_logging();
-  }, nullptr);
+static Napi::Value register_cleanup(const Napi::CallbackInfo &info) {
+  napi_add_env_cleanup_hook(
+      info.Env(), [](void *) { cleanup_logging(); }, nullptr);
   return info.Env().Undefined();
 }
 Napi::Object Init(Napi::Env env, Napi::Object exports) {
   LlamaContext::Init(env, exports);
   // Register our cleanup handler for module unload
   exports.Set("__registerCleanup", Napi::Function::New(env, register_cleanup));
   // Also register cleanup directly on module init
-  napi_add_env_cleanup_hook(env, [](void*) {
-    cleanup_logging();
-  }, nullptr);
+  napi_add_env_cleanup_hook(env, [](void *) { cleanup_logging(); }, nullptr);
   return exports;
 }