@fugood/llama.node 0.4.7 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (98) hide show
  1. package/CMakeLists.txt +4 -0
  2. package/bin/darwin/arm64/llama-node.node +0 -0
  3. package/bin/darwin/x64/llama-node.node +0 -0
  4. package/bin/linux/arm64/llama-node.node +0 -0
  5. package/bin/linux/x64/llama-node.node +0 -0
  6. package/bin/linux-cuda/arm64/llama-node.node +0 -0
  7. package/bin/linux-cuda/x64/llama-node.node +0 -0
  8. package/bin/linux-vulkan/arm64/llama-node.node +0 -0
  9. package/bin/linux-vulkan/x64/llama-node.node +0 -0
  10. package/lib/binding.ts +66 -6
  11. package/lib/index.js +59 -17
  12. package/lib/index.ts +74 -23
  13. package/package.json +1 -1
  14. package/src/DecodeAudioTokenWorker.cpp +40 -0
  15. package/src/DecodeAudioTokenWorker.h +22 -0
  16. package/src/EmbeddingWorker.cpp +7 -5
  17. package/src/LlamaCompletionWorker.cpp +68 -54
  18. package/src/LlamaCompletionWorker.h +7 -8
  19. package/src/LlamaContext.cpp +551 -235
  20. package/src/LlamaContext.h +26 -4
  21. package/src/LoadSessionWorker.cpp +4 -2
  22. package/src/SaveSessionWorker.cpp +10 -6
  23. package/src/TokenizeWorker.cpp +23 -14
  24. package/src/TokenizeWorker.h +2 -2
  25. package/src/addons.cc +8 -11
  26. package/src/common.hpp +129 -126
  27. package/src/llama.cpp/.github/workflows/build.yml +2 -2
  28. package/src/llama.cpp/.github/workflows/release.yml +152 -129
  29. package/src/llama.cpp/.github/workflows/winget.yml +42 -0
  30. package/src/llama.cpp/common/arg.cpp +14 -13
  31. package/src/llama.cpp/common/common.cpp +4 -75
  32. package/src/llama.cpp/common/common.h +7 -12
  33. package/src/llama.cpp/examples/lookahead/lookahead.cpp +0 -13
  34. package/src/llama.cpp/examples/lookup/lookup.cpp +0 -11
  35. package/src/llama.cpp/examples/parallel/parallel.cpp +0 -9
  36. package/src/llama.cpp/examples/retrieval/retrieval.cpp +6 -6
  37. package/src/llama.cpp/examples/simple/simple.cpp +1 -1
  38. package/src/llama.cpp/examples/simple-chat/simple-chat.cpp +2 -2
  39. package/src/llama.cpp/examples/sycl/run-llama2.sh +4 -4
  40. package/src/llama.cpp/examples/sycl/run-llama3.sh +28 -0
  41. package/src/llama.cpp/examples/sycl/win-run-llama2.bat +1 -1
  42. package/src/llama.cpp/examples/sycl/win-run-llama3.bat +9 -0
  43. package/src/llama.cpp/ggml/include/ggml-opt.h +2 -0
  44. package/src/llama.cpp/ggml/include/ggml.h +11 -0
  45. package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.cpp +274 -0
  46. package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.h +27 -0
  47. package/src/llama.cpp/ggml/src/ggml-cann/ggml-cann.cpp +18 -2
  48. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +1 -0
  49. package/src/llama.cpp/ggml/src/ggml-cpu/ops.cpp +107 -0
  50. package/src/llama.cpp/ggml/src/ggml-cpu/vec.h +16 -0
  51. package/src/llama.cpp/ggml/src/ggml-musa/CMakeLists.txt +8 -2
  52. package/src/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +315 -155
  53. package/src/llama.cpp/ggml/src/ggml-opt.cpp +5 -0
  54. package/src/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +43 -12
  55. package/src/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +171 -112
  56. package/src/llama.cpp/ggml/src/ggml.c +64 -18
  57. package/src/llama.cpp/include/llama.h +24 -124
  58. package/src/llama.cpp/requirements/requirements-convert_hf_to_gguf.txt +5 -1
  59. package/src/llama.cpp/requirements/requirements-convert_hf_to_gguf_update.txt +5 -1
  60. package/src/llama.cpp/requirements/requirements-convert_lora_to_gguf.txt +2 -0
  61. package/src/llama.cpp/src/llama-batch.cpp +3 -1
  62. package/src/llama.cpp/src/llama-context.cpp +60 -110
  63. package/src/llama.cpp/src/llama-graph.cpp +137 -233
  64. package/src/llama.cpp/src/llama-graph.h +49 -7
  65. package/src/llama.cpp/src/llama-hparams.cpp +17 -1
  66. package/src/llama.cpp/src/llama-hparams.h +34 -5
  67. package/src/llama.cpp/src/llama-kv-cache.cpp +654 -321
  68. package/src/llama.cpp/src/llama-kv-cache.h +201 -85
  69. package/src/llama.cpp/src/llama-memory.h +3 -2
  70. package/src/llama.cpp/src/llama-model.cpp +273 -94
  71. package/src/llama.cpp/src/llama-model.h +4 -1
  72. package/src/llama.cpp/tests/test-arg-parser.cpp +1 -1
  73. package/src/llama.cpp/tools/llama-bench/llama-bench.cpp +1 -0
  74. package/src/llama.cpp/tools/mtmd/CMakeLists.txt +13 -2
  75. package/src/llama.cpp/tools/mtmd/clip-impl.h +108 -11
  76. package/src/llama.cpp/tools/mtmd/clip.cpp +466 -88
  77. package/src/llama.cpp/tools/mtmd/clip.h +6 -4
  78. package/src/llama.cpp/tools/mtmd/miniaudio.h +93468 -0
  79. package/src/llama.cpp/tools/mtmd/mtmd-audio.cpp +855 -0
  80. package/src/llama.cpp/tools/mtmd/mtmd-audio.h +62 -0
  81. package/src/llama.cpp/tools/mtmd/mtmd-cli.cpp +21 -14
  82. package/src/llama.cpp/tools/mtmd/mtmd-helper.cpp +36 -49
  83. package/src/llama.cpp/tools/mtmd/mtmd.cpp +362 -98
  84. package/src/llama.cpp/tools/mtmd/mtmd.h +52 -21
  85. package/src/llama.cpp/tools/run/run.cpp +2 -2
  86. package/src/llama.cpp/tools/server/server.cpp +158 -47
  87. package/src/llama.cpp/tools/server/utils.hpp +71 -43
  88. package/src/llama.cpp/tools/tts/tts.cpp +4 -2
  89. package/src/tts_utils.cpp +342 -0
  90. package/src/tts_utils.h +62 -0
  91. package/bin/win32/arm64/llama-node.node +0 -0
  92. package/bin/win32/arm64/node.lib +0 -0
  93. package/bin/win32/x64/llama-node.node +0 -0
  94. package/bin/win32/x64/node.lib +0 -0
  95. package/bin/win32-vulkan/arm64/llama-node.node +0 -0
  96. package/bin/win32-vulkan/arm64/node.lib +0 -0
  97. package/bin/win32-vulkan/x64/llama-node.node +0 -0
  98. package/bin/win32-vulkan/x64/node.lib +0 -0
@@ -1,15 +1,22 @@
1
1
  #include "common.hpp"
2
- #include "tools/mtmd/mtmd.h"
3
2
  #include "tools/mtmd/clip.h"
3
+ #include "tools/mtmd/mtmd.h"
4
+ #include "tts_utils.h"
4
5
 
5
6
  class LlamaCompletionWorker;
6
7
 
8
+ struct vocoder_context {
9
+ common_params params;
10
+ std::shared_ptr<llama_model> model;
11
+ std::shared_ptr<llama_context> context;
12
+ };
13
+
7
14
  class LlamaContext : public Napi::ObjectWrap<LlamaContext> {
8
15
  public:
9
16
  LlamaContext(const Napi::CallbackInfo &info);
10
17
  ~LlamaContext();
11
18
  static void ToggleNativeLog(const Napi::CallbackInfo &info);
12
- static Napi::Value ModelInfo(const Napi::CallbackInfo& info);
19
+ static Napi::Value ModelInfo(const Napi::CallbackInfo &info);
13
20
  static void Init(Napi::Env env, Napi::Object &exports);
14
21
 
15
22
  private:
@@ -27,20 +34,35 @@ private:
27
34
  void RemoveLoraAdapters(const Napi::CallbackInfo &info);
28
35
  Napi::Value GetLoadedLoraAdapters(const Napi::CallbackInfo &info);
29
36
  Napi::Value Release(const Napi::CallbackInfo &info);
30
-
37
+
31
38
  // Multimodal methods
32
39
  Napi::Value InitMultimodal(const Napi::CallbackInfo &info);
33
40
  Napi::Value IsMultimodalEnabled(const Napi::CallbackInfo &info);
41
+ Napi::Value GetMultimodalSupport(const Napi::CallbackInfo &info);
34
42
  void ReleaseMultimodal(const Napi::CallbackInfo &info);
35
43
 
44
+ // TTS methods
45
+ tts_type getTTSType(Napi::Env env, nlohmann::json speaker = nullptr);
46
+ Napi::Value InitVocoder(const Napi::CallbackInfo &info);
47
+ void ReleaseVocoder(const Napi::CallbackInfo &info);
48
+ Napi::Value IsVocoderEnabled(const Napi::CallbackInfo &info);
49
+ Napi::Value GetFormattedAudioCompletion(const Napi::CallbackInfo &info);
50
+ Napi::Value GetAudioCompletionGuideTokens(const Napi::CallbackInfo &info);
51
+ Napi::Value DecodeAudioTokens(const Napi::CallbackInfo &info);
52
+
36
53
  std::string _info;
37
54
  Napi::Object _meta;
38
55
  LlamaSessionPtr _sess = nullptr;
39
56
  common_chat_templates_ptr _templates;
40
57
  std::vector<common_adapter_lora_info> _lora;
41
58
  LlamaCompletionWorker *_wip = nullptr;
42
-
59
+
43
60
  // Multimodal support
44
61
  mtmd_context *_mtmd_ctx = nullptr;
45
62
  bool _has_multimodal = false;
63
+
64
+ // Vocoder support
65
+ tts_type _tts_type = UNKNOWN;
66
+ vocoder_context _vocoder;
67
+ bool _has_vocoder = false;
46
68
  };
@@ -12,8 +12,10 @@ void LoadSessionWorker::Execute() {
12
12
  std::vector<llama_token> tokens;
13
13
  tokens.reserve(_sess->params().n_ctx);
14
14
 
15
- // Find LLAMA_TOKEN_NULL in the tokens and resize the array to the index of the null token
16
- auto null_token_iter = std::find(tokens.begin(), tokens.end(), LLAMA_TOKEN_NULL);
15
+ // Find LLAMA_TOKEN_NULL in the tokens and resize the array to the index of
16
+ // the null token
17
+ auto null_token_iter =
18
+ std::find(tokens.begin(), tokens.end(), LLAMA_TOKEN_NULL);
17
19
  if (null_token_iter != tokens.end()) {
18
20
  tokens.resize(std::distance(tokens.begin(), null_token_iter));
19
21
  }
@@ -9,16 +9,20 @@ SaveSessionWorker::SaveSessionWorker(const Napi::CallbackInfo &info,
9
9
  void SaveSessionWorker::Execute() {
10
10
  _sess->get_mutex().lock();
11
11
  auto tokens = _sess->tokens_ptr();
12
- auto tokens_to_save = std::vector<llama_token>(tokens->begin(), tokens->end());
12
+ auto tokens_to_save =
13
+ std::vector<llama_token>(tokens->begin(), tokens->end());
13
14
 
14
- // Find LLAMA_TOKEN_NULL in the tokens and resize the array to the index of the null token
15
- auto null_token_iter = std::find(tokens_to_save.begin(), tokens_to_save.end(), LLAMA_TOKEN_NULL);
15
+ // Find LLAMA_TOKEN_NULL in the tokens and resize the array to the index of
16
+ // the null token
17
+ auto null_token_iter =
18
+ std::find(tokens_to_save.begin(), tokens_to_save.end(), LLAMA_TOKEN_NULL);
16
19
  if (null_token_iter != tokens_to_save.end()) {
17
- tokens_to_save.resize(std::distance(tokens_to_save.begin(), null_token_iter));
20
+ tokens_to_save.resize(
21
+ std::distance(tokens_to_save.begin(), null_token_iter));
18
22
  }
19
23
 
20
- if (!llama_state_save_file(_sess->context(), _path.c_str(), tokens_to_save.data(),
21
- tokens_to_save.size())) {
24
+ if (!llama_state_save_file(_sess->context(), _path.c_str(),
25
+ tokens_to_save.data(), tokens_to_save.size())) {
22
26
  SetError("Failed to save session");
23
27
  }
24
28
  _sess->get_mutex().unlock();
@@ -2,17 +2,24 @@
2
2
  #include "LlamaContext.h"
3
3
 
4
4
  TokenizeWorker::TokenizeWorker(const Napi::CallbackInfo &info,
5
- LlamaSessionPtr &sess, std::string text, std::vector<std::string> image_paths)
6
- : AsyncWorker(info.Env()), Deferred(info.Env()), _sess(sess), _text(text), _image_paths(image_paths) {}
5
+ LlamaSessionPtr &sess, std::string text,
6
+ std::vector<std::string> media_paths)
7
+ : AsyncWorker(info.Env()), Deferred(info.Env()), _sess(sess), _text(text),
8
+ _media_paths(media_paths) {}
7
9
 
8
10
  void TokenizeWorker::Execute() {
9
11
  auto mtmd_ctx = _sess->get_mtmd_ctx();
10
- if (!_image_paths.empty()) {
11
- _result = tokenizeWithImages(mtmd_ctx, _text, _image_paths);
12
+ if (!_media_paths.empty()) {
13
+ try {
14
+ _result = tokenizeWithMedia(mtmd_ctx, _text, _media_paths);
15
+ mtmd_input_chunks_free(_result.chunks);
16
+ } catch (const std::exception &e) {
17
+ SetError(e.what());
18
+ }
12
19
  } else {
13
20
  const auto tokens = common_tokenize(_sess->context(), _text, false);
14
21
  _result.tokens = tokens;
15
- _result.has_image = false;
22
+ _result.has_media = false;
16
23
  }
17
24
  }
18
25
 
@@ -24,24 +31,26 @@ void TokenizeWorker::OnOK() {
24
31
  memcpy(tokens.Data(), _result.tokens.data(),
25
32
  _result.tokens.size() * sizeof(llama_token));
26
33
  result.Set("tokens", tokens);
27
- if (_result.has_image) {
28
- result.Set("has_image", _result.has_image);
29
-
30
- auto bitmap_hashes = Napi::Array::New(Napi::AsyncWorker::Env(), _result.bitmap_hashes.size());
34
+ result.Set("has_media", _result.has_media);
35
+ if (_result.has_media) {
36
+ auto bitmap_hashes = Napi::Array::New(Napi::AsyncWorker::Env(),
37
+ _result.bitmap_hashes.size());
31
38
  for (size_t i = 0; i < _result.bitmap_hashes.size(); i++) {
32
39
  bitmap_hashes.Set(i, _result.bitmap_hashes[i]);
33
40
  }
34
41
  result.Set("bitmap_hashes", bitmap_hashes);
35
- auto chunk_pos = Napi::Array::New(Napi::AsyncWorker::Env(), _result.chunk_pos.size());
42
+ auto chunk_pos =
43
+ Napi::Array::New(Napi::AsyncWorker::Env(), _result.chunk_pos.size());
36
44
  for (size_t i = 0; i < _result.chunk_pos.size(); i++) {
37
45
  chunk_pos.Set(i, _result.chunk_pos[i]);
38
46
  }
39
47
  result.Set("chunk_pos", chunk_pos);
40
- auto chunk_pos_images = Napi::Array::New(Napi::AsyncWorker::Env(), _result.chunk_pos_images.size());
41
- for (size_t i = 0; i < _result.chunk_pos_images.size(); i++) {
42
- chunk_pos_images.Set(i, _result.chunk_pos_images[i]);
48
+ auto chunk_pos_media = Napi::Array::New(Napi::AsyncWorker::Env(),
49
+ _result.chunk_pos_media.size());
50
+ for (size_t i = 0; i < _result.chunk_pos_media.size(); i++) {
51
+ chunk_pos_media.Set(i, _result.chunk_pos_media[i]);
43
52
  }
44
- result.Set("chunk_pos_images", chunk_pos_images);
53
+ result.Set("chunk_pos_media", chunk_pos_media);
45
54
  }
46
55
  Napi::Promise::Deferred::Resolve(result);
47
56
  }
@@ -5,7 +5,7 @@ class TokenizeWorker : public Napi::AsyncWorker,
5
5
  public Napi::Promise::Deferred {
6
6
  public:
7
7
  TokenizeWorker(const Napi::CallbackInfo &info, LlamaSessionPtr &sess,
8
- std::string text, std::vector<std::string> image_paths);
8
+ std::string text, std::vector<std::string> media_paths);
9
9
 
10
10
  protected:
11
11
  void Execute();
@@ -15,6 +15,6 @@ protected:
15
15
  private:
16
16
  LlamaSessionPtr _sess;
17
17
  std::string _text;
18
- std::vector<std::string> _image_paths;
18
+ std::vector<std::string> _media_paths;
19
19
  TokenizeResult _result;
20
20
  };
package/src/addons.cc CHANGED
@@ -5,25 +5,22 @@
5
5
  extern "C" void cleanup_logging();
6
6
 
7
7
  // Register cleanup function on module unload
8
- static Napi::Value register_cleanup(const Napi::CallbackInfo& info) {
9
- napi_add_env_cleanup_hook(info.Env(), [](void*) {
10
- cleanup_logging();
11
- }, nullptr);
12
-
8
+ static Napi::Value register_cleanup(const Napi::CallbackInfo &info) {
9
+ napi_add_env_cleanup_hook(
10
+ info.Env(), [](void *) { cleanup_logging(); }, nullptr);
11
+
13
12
  return info.Env().Undefined();
14
13
  }
15
14
 
16
15
  Napi::Object Init(Napi::Env env, Napi::Object exports) {
17
16
  LlamaContext::Init(env, exports);
18
-
17
+
19
18
  // Register our cleanup handler for module unload
20
19
  exports.Set("__registerCleanup", Napi::Function::New(env, register_cleanup));
21
-
20
+
22
21
  // Also register cleanup directly on module init
23
- napi_add_env_cleanup_hook(env, [](void*) {
24
- cleanup_logging();
25
- }, nullptr);
26
-
22
+ napi_add_env_cleanup_hook(env, [](void *) { cleanup_logging(); }, nullptr);
23
+
27
24
  return exports;
28
25
  }
29
26