@fugood/llama.node 1.1.10 → 1.2.0-rc.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (77) hide show
  1. package/CMakeLists.txt +5 -8
  2. package/lib/binding.ts +20 -2
  3. package/lib/index.js +2 -2
  4. package/lib/index.ts +2 -2
  5. package/package.json +20 -16
  6. package/src/DecodeAudioTokenWorker.cpp +23 -26
  7. package/src/DecodeAudioTokenWorker.h +6 -8
  8. package/src/DetokenizeWorker.cpp +5 -8
  9. package/src/DetokenizeWorker.h +6 -5
  10. package/src/DisposeWorker.cpp +23 -3
  11. package/src/DisposeWorker.h +4 -2
  12. package/src/EmbeddingWorker.cpp +9 -35
  13. package/src/EmbeddingWorker.h +3 -2
  14. package/src/LlamaCompletionWorker.cpp +217 -315
  15. package/src/LlamaCompletionWorker.h +6 -12
  16. package/src/LlamaContext.cpp +174 -388
  17. package/src/LlamaContext.h +8 -13
  18. package/src/LoadSessionWorker.cpp +22 -19
  19. package/src/LoadSessionWorker.h +3 -2
  20. package/src/RerankWorker.h +3 -2
  21. package/src/SaveSessionWorker.cpp +22 -19
  22. package/src/SaveSessionWorker.h +3 -2
  23. package/src/TokenizeWorker.cpp +38 -35
  24. package/src/TokenizeWorker.h +12 -3
  25. package/src/common.hpp +0 -458
  26. package/src/llama.cpp/common/arg.cpp +67 -37
  27. package/src/llama.cpp/common/chat.cpp +263 -2
  28. package/src/llama.cpp/common/chat.h +4 -0
  29. package/src/llama.cpp/common/common.cpp +10 -3
  30. package/src/llama.cpp/common/common.h +5 -2
  31. package/src/llama.cpp/common/log.cpp +53 -2
  32. package/src/llama.cpp/common/log.h +10 -4
  33. package/src/llama.cpp/common/sampling.cpp +23 -2
  34. package/src/llama.cpp/common/sampling.h +3 -1
  35. package/src/llama.cpp/common/speculative.cpp +1 -1
  36. package/src/llama.cpp/ggml/CMakeLists.txt +4 -3
  37. package/src/llama.cpp/ggml/include/ggml-backend.h +3 -0
  38. package/src/llama.cpp/ggml/include/ggml-cpu.h +0 -1
  39. package/src/llama.cpp/ggml/include/ggml.h +50 -1
  40. package/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +19 -16
  41. package/src/llama.cpp/ggml/src/ggml-cpu/arch/riscv/quants.c +210 -96
  42. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-impl.h +1 -7
  43. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +11 -37
  44. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +3 -4
  45. package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.cpp +43 -6
  46. package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.h +4 -1
  47. package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +18 -18
  48. package/src/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm.cpp +232 -123
  49. package/src/llama.cpp/ggml/src/ggml-cpu/ops.cpp +234 -16
  50. package/src/llama.cpp/ggml/src/ggml-cpu/ops.h +1 -0
  51. package/src/llama.cpp/ggml/src/ggml-cpu/simd-mappings.h +80 -51
  52. package/src/llama.cpp/ggml/src/ggml-cpu/vec.cpp +161 -20
  53. package/src/llama.cpp/ggml/src/ggml-cpu/vec.h +399 -50
  54. package/src/llama.cpp/include/llama.h +32 -7
  55. package/src/llama.cpp/src/llama-adapter.cpp +101 -4
  56. package/src/llama.cpp/src/llama-adapter.h +6 -0
  57. package/src/llama.cpp/src/llama-arch.cpp +69 -2
  58. package/src/llama.cpp/src/llama-arch.h +6 -0
  59. package/src/llama.cpp/src/llama-context.cpp +92 -45
  60. package/src/llama.cpp/src/llama-context.h +1 -5
  61. package/src/llama.cpp/src/llama-graph.cpp +74 -19
  62. package/src/llama.cpp/src/llama-graph.h +10 -1
  63. package/src/llama.cpp/src/llama-hparams.cpp +37 -0
  64. package/src/llama.cpp/src/llama-hparams.h +9 -3
  65. package/src/llama.cpp/src/llama-impl.h +2 -0
  66. package/src/llama.cpp/src/llama-kv-cache.cpp +33 -120
  67. package/src/llama.cpp/src/llama-kv-cache.h +4 -13
  68. package/src/llama.cpp/src/llama-model-loader.cpp +1 -0
  69. package/src/llama.cpp/src/llama-model.cpp +434 -21
  70. package/src/llama.cpp/src/llama-model.h +1 -1
  71. package/src/llama.cpp/src/llama-sampling.cpp +226 -126
  72. package/src/llama.cpp/src/llama-vocab.cpp +1 -1
  73. package/src/llama.cpp/src/llama.cpp +12 -0
  74. package/src/anyascii.c +0 -22223
  75. package/src/anyascii.h +0 -42
  76. package/src/tts_utils.cpp +0 -371
  77. package/src/tts_utils.h +0 -103
@@ -1,7 +1,11 @@
1
1
  #include "common.hpp"
2
2
  #include "tools/mtmd/clip.h"
3
3
  #include "tools/mtmd/mtmd.h"
4
- #include "tts_utils.h"
4
+ #include "rn-llama/rn-llama.h"
5
+ #include "rn-llama/rn-completion.h"
6
+ #include "rn-llama/rn-tts.h"
7
+
8
+ using namespace rnllama;
5
9
 
6
10
  class LlamaCompletionWorker;
7
11
 
@@ -43,7 +47,7 @@ private:
43
47
  void ReleaseMultimodal(const Napi::CallbackInfo &info);
44
48
 
45
49
  // TTS methods
46
- tts_type getTTSType(Napi::Env env, nlohmann::json speaker = nullptr);
50
+ rnllama::tts_type getTTSType(Napi::Env env, nlohmann::json speaker = nullptr);
47
51
  Napi::Value InitVocoder(const Napi::CallbackInfo &info);
48
52
  void ReleaseVocoder(const Napi::CallbackInfo &info);
49
53
  Napi::Value IsVocoderEnabled(const Napi::CallbackInfo &info);
@@ -53,17 +57,8 @@ private:
53
57
 
54
58
  std::string _info;
55
59
  Napi::Object _meta;
56
- LlamaSessionPtr _sess = nullptr;
57
- common_chat_templates_ptr _templates;
58
- std::vector<common_adapter_lora_info> _lora;
59
60
  LlamaCompletionWorker *_wip = nullptr;
60
61
 
61
- // Multimodal support
62
- mtmd_context *_mtmd_ctx = nullptr;
63
- bool _has_multimodal = false;
64
-
65
- // Vocoder support
66
- tts_type _tts_type = UNKNOWN;
67
- vocoder_context _vocoder;
68
- bool _has_vocoder = false;
62
+ // Use rn-llama context instead of direct llama.cpp types
63
+ llama_rn_context *_rn_ctx = nullptr;
69
64
  };
@@ -2,31 +2,34 @@
2
2
  #include "LlamaContext.h"
3
3
 
4
4
  LoadSessionWorker::LoadSessionWorker(const Napi::CallbackInfo &info,
5
- LlamaSessionPtr &sess)
5
+ rnllama::llama_rn_context* rn_ctx)
6
6
  : AsyncWorker(info.Env()), Deferred(info.Env()), _path(info[0].ToString()),
7
- _sess(sess) {}
7
+ _rn_ctx(rn_ctx) {}
8
8
 
9
9
  void LoadSessionWorker::Execute() {
10
- _sess->get_mutex().lock();
11
- // reserve the maximum number of tokens for capacity
12
- std::vector<llama_token> tokens;
13
- tokens.reserve(_sess->params().n_ctx);
10
+ try {
11
+ if (!_rn_ctx || !_rn_ctx->ctx) {
12
+ SetError("Context not available");
13
+ return;
14
+ }
14
15
 
15
- // Find LLAMA_TOKEN_NULL in the tokens and resize the array to the index of
16
- // the null token
17
- auto null_token_iter =
18
- std::find(tokens.begin(), tokens.end(), LLAMA_TOKEN_NULL);
19
- if (null_token_iter != tokens.end()) {
20
- tokens.resize(std::distance(tokens.begin(), null_token_iter));
21
- }
16
+ // reserve the maximum number of tokens for capacity
17
+ std::vector<llama_token> tokens;
18
+ tokens.reserve(_rn_ctx->n_ctx);
22
19
 
23
- if (!llama_state_load_file(_sess->context(), _path.c_str(), tokens.data(),
24
- tokens.capacity(), &count)) {
25
- SetError("Failed to load session");
20
+ if (!llama_state_load_file(_rn_ctx->ctx, _path.c_str(), tokens.data(),
21
+ tokens.capacity(), &count)) {
22
+ SetError("Failed to load session");
23
+ return;
24
+ }
25
+
26
+ tokens.resize(count);
27
+
28
+ _rn_ctx->completion->embd = std::move(tokens);
29
+ _rn_ctx->completion->n_past = count;
30
+ } catch (const std::exception &e) {
31
+ SetError(e.what());
26
32
  }
27
- tokens.resize(count);
28
- _sess->set_tokens(std::move(tokens));
29
- _sess->get_mutex().unlock();
30
33
  }
31
34
 
32
35
  void LoadSessionWorker::OnOK() { Resolve(AsyncWorker::Env().Undefined()); }
@@ -1,9 +1,10 @@
1
1
  #include "common.hpp"
2
+ #include "rn-llama/rn-llama.h"
2
3
 
3
4
  class LoadSessionWorker : public Napi::AsyncWorker,
4
5
  public Napi::Promise::Deferred {
5
6
  public:
6
- LoadSessionWorker(const Napi::CallbackInfo &info, LlamaSessionPtr &sess);
7
+ LoadSessionWorker(const Napi::CallbackInfo &info, rnllama::llama_rn_context* rn_ctx);
7
8
 
8
9
  protected:
9
10
  void Execute();
@@ -12,6 +13,6 @@ protected:
12
13
 
13
14
  private:
14
15
  std::string _path;
15
- LlamaSessionPtr _sess;
16
+ rnllama::llama_rn_context* _rn_ctx;
16
17
  size_t count = 0;
17
18
  };
@@ -1,4 +1,5 @@
1
1
  #include "common.hpp"
2
+ #include "rn-llama/rn-llama.h"
2
3
  #include <vector>
3
4
 
4
5
  struct RerankResult {
@@ -8,7 +9,7 @@ struct RerankResult {
8
9
  class RerankWorker : public Napi::AsyncWorker,
9
10
  public Napi::Promise::Deferred {
10
11
  public:
11
- RerankWorker(const Napi::CallbackInfo &info, LlamaSessionPtr &sess,
12
+ RerankWorker(const Napi::CallbackInfo &info, rnllama::llama_rn_context* rn_ctx,
12
13
  std::string query, std::vector<std::string> documents,
13
14
  common_params &params);
14
15
 
@@ -18,7 +19,7 @@ protected:
18
19
  void OnError(const Napi::Error &err);
19
20
 
20
21
  private:
21
- LlamaSessionPtr _sess;
22
+ rnllama::llama_rn_context* _rn_ctx;
22
23
  std::string _query;
23
24
  std::vector<std::string> _documents;
24
25
  common_params _params;
@@ -2,30 +2,33 @@
2
2
  #include "LlamaContext.h"
3
3
 
4
4
  SaveSessionWorker::SaveSessionWorker(const Napi::CallbackInfo &info,
5
- LlamaSessionPtr &sess)
5
+ rnllama::llama_rn_context* rn_ctx)
6
6
  : AsyncWorker(info.Env()), Deferred(info.Env()), _path(info[0].ToString()),
7
- _sess(sess) {}
7
+ _rn_ctx(rn_ctx) {}
8
8
 
9
9
  void SaveSessionWorker::Execute() {
10
- _sess->get_mutex().lock();
11
- auto tokens = _sess->tokens_ptr();
12
- auto tokens_to_save =
13
- std::vector<llama_token>(tokens->begin(), tokens->end());
10
+ try {
11
+ if (!_rn_ctx || !_rn_ctx->ctx) {
12
+ SetError("Context not available");
13
+ return;
14
+ }
14
15
 
15
- // Find LLAMA_TOKEN_NULL in the tokens and resize the array to the index of
16
- // the null token
17
- auto null_token_iter =
18
- std::find(tokens_to_save.begin(), tokens_to_save.end(), LLAMA_TOKEN_NULL);
19
- if (null_token_iter != tokens_to_save.end()) {
20
- tokens_to_save.resize(
21
- std::distance(tokens_to_save.begin(), null_token_iter));
16
+ // For rn-llama, we save the context state directly
17
+ if (_rn_ctx->completion && !_rn_ctx->completion->embd.empty()) {
18
+ auto &tokens = _rn_ctx->completion->embd;
19
+ if (!llama_state_save_file(_rn_ctx->ctx, _path.c_str(),
20
+ tokens.data(), tokens.size())) {
21
+ SetError("Failed to save session");
22
+ }
23
+ } else {
24
+ // Save empty session if no tokens available
25
+ if (!llama_state_save_file(_rn_ctx->ctx, _path.c_str(), nullptr, 0)) {
26
+ SetError("Failed to save session");
27
+ }
28
+ }
29
+ } catch (const std::exception &e) {
30
+ SetError(e.what());
22
31
  }
23
-
24
- if (!llama_state_save_file(_sess->context(), _path.c_str(),
25
- tokens_to_save.data(), tokens_to_save.size())) {
26
- SetError("Failed to save session");
27
- }
28
- _sess->get_mutex().unlock();
29
32
  }
30
33
 
31
34
  void SaveSessionWorker::OnOK() { Resolve(AsyncWorker::Env().Undefined()); }
@@ -1,9 +1,10 @@
1
1
  #include "common.hpp"
2
+ #include "rn-llama/rn-llama.h"
2
3
 
3
4
  class SaveSessionWorker : public Napi::AsyncWorker,
4
5
  public Napi::Promise::Deferred {
5
6
  public:
6
- SaveSessionWorker(const Napi::CallbackInfo &info, LlamaSessionPtr &sess);
7
+ SaveSessionWorker(const Napi::CallbackInfo &info, rnllama::llama_rn_context* rn_ctx);
7
8
 
8
9
  protected:
9
10
  void Execute();
@@ -12,5 +13,5 @@ protected:
12
13
 
13
14
  private:
14
15
  std::string _path;
15
- LlamaSessionPtr _sess;
16
+ rnllama::llama_rn_context* _rn_ctx;
16
17
  };
@@ -2,59 +2,62 @@
2
2
  #include "LlamaContext.h"
3
3
 
4
4
  TokenizeWorker::TokenizeWorker(const Napi::CallbackInfo &info,
5
- LlamaSessionPtr &sess, std::string text,
5
+ rnllama::llama_rn_context* rn_ctx, std::string text,
6
6
  std::vector<std::string> media_paths)
7
- : AsyncWorker(info.Env()), Deferred(info.Env()), _sess(sess), _text(text),
7
+ : AsyncWorker(info.Env()), Deferred(info.Env()), _rn_ctx(rn_ctx), _text(text),
8
8
  _media_paths(media_paths) {}
9
9
 
10
10
  void TokenizeWorker::Execute() {
11
- auto mtmd_ctx = _sess->get_mtmd_ctx();
12
- if (!_media_paths.empty()) {
13
- try {
14
- _result = tokenizeWithMedia(mtmd_ctx, _text, _media_paths);
15
- mtmd_input_chunks_free(_result.chunks);
16
- } catch (const std::exception &e) {
17
- SetError(e.what());
11
+ try {
12
+ // Use rn-llama tokenize API directly
13
+ auto result = _rn_ctx->tokenize(_text, _media_paths);
14
+
15
+ // Convert llama_token to int32_t
16
+ _result.tokens.resize(result.tokens.size());
17
+ for (size_t i = 0; i < result.tokens.size(); i++) {
18
+ _result.tokens[i] = static_cast<int32_t>(result.tokens[i]);
18
19
  }
19
- } else {
20
- const auto tokens = common_tokenize(_sess->context(), _text, false);
21
- _result.tokens = tokens;
22
- _result.has_media = false;
20
+
21
+ _result.has_media = result.has_media;
22
+ _result.bitmap_hashes = result.bitmap_hashes;
23
+ _result.chunk_pos = result.chunk_pos;
24
+ _result.chunk_pos_media = result.chunk_pos_media;
25
+ } catch (const std::exception &e) {
26
+ SetError(e.what());
23
27
  }
24
28
  }
25
29
 
26
30
  void TokenizeWorker::OnOK() {
27
- Napi::HandleScope scope(Napi::AsyncWorker::Env());
28
- auto result = Napi::Object::New(Napi::AsyncWorker::Env());
29
- auto tokens =
30
- Napi::Int32Array::New(Napi::AsyncWorker::Env(), _result.tokens.size());
31
- memcpy(tokens.Data(), _result.tokens.data(),
32
- _result.tokens.size() * sizeof(llama_token));
33
- result.Set("tokens", tokens);
34
- result.Set("has_media", _result.has_media);
31
+ Napi::Env env = Napi::AsyncWorker::Env();
32
+ Napi::Object ret = Napi::Object::New(env);
33
+ auto tokens = Napi::Int32Array::New(env, _result.tokens.size());
34
+ memcpy(tokens.Data(), _result.tokens.data(), _result.tokens.size() * sizeof(int32_t));
35
+ ret.Set("tokens", tokens);
36
+ ret.Set("has_media", Napi::Boolean::New(env, _result.has_media));
37
+
35
38
  if (_result.has_media) {
36
- auto bitmap_hashes = Napi::Array::New(Napi::AsyncWorker::Env(),
37
- _result.bitmap_hashes.size());
39
+ auto bitmap_hashes = Napi::Array::New(env, _result.bitmap_hashes.size());
38
40
  for (size_t i = 0; i < _result.bitmap_hashes.size(); i++) {
39
- bitmap_hashes.Set(i, _result.bitmap_hashes[i]);
41
+ bitmap_hashes.Set(i, Napi::String::New(env, _result.bitmap_hashes[i]));
40
42
  }
41
- result.Set("bitmap_hashes", bitmap_hashes);
42
- auto chunk_pos =
43
- Napi::Array::New(Napi::AsyncWorker::Env(), _result.chunk_pos.size());
43
+ ret.Set("bitmap_hashes", bitmap_hashes);
44
+
45
+ auto chunk_pos = Napi::Array::New(env, _result.chunk_pos.size());
44
46
  for (size_t i = 0; i < _result.chunk_pos.size(); i++) {
45
- chunk_pos.Set(i, _result.chunk_pos[i]);
47
+ chunk_pos.Set(i, Napi::Number::New(env, static_cast<double>(_result.chunk_pos[i])));
46
48
  }
47
- result.Set("chunk_pos", chunk_pos);
48
- auto chunk_pos_media = Napi::Array::New(Napi::AsyncWorker::Env(),
49
- _result.chunk_pos_media.size());
49
+ ret.Set("chunk_pos", chunk_pos);
50
+
51
+ auto chunk_pos_media = Napi::Array::New(env, _result.chunk_pos_media.size());
50
52
  for (size_t i = 0; i < _result.chunk_pos_media.size(); i++) {
51
- chunk_pos_media.Set(i, _result.chunk_pos_media[i]);
53
+ chunk_pos_media.Set(i, Napi::Number::New(env, static_cast<double>(_result.chunk_pos_media[i])));
52
54
  }
53
- result.Set("chunk_pos_media", chunk_pos_media);
55
+ ret.Set("chunk_pos_media", chunk_pos_media);
54
56
  }
55
- Napi::Promise::Deferred::Resolve(result);
57
+
58
+ Napi::Promise::Deferred::Resolve(ret);
56
59
  }
57
60
 
58
61
  void TokenizeWorker::OnError(const Napi::Error &err) {
59
62
  Napi::Promise::Deferred::Reject(err.Value());
60
- }
63
+ }
@@ -1,10 +1,19 @@
1
1
  #include "common.hpp"
2
+ #include "rn-llama/rn-llama.h"
2
3
  #include <vector>
3
4
 
5
+ struct TokenizeResult {
6
+ std::vector<int32_t> tokens;
7
+ bool has_media;
8
+ std::vector<std::string> bitmap_hashes;
9
+ std::vector<size_t> chunk_pos;
10
+ std::vector<size_t> chunk_pos_media;
11
+ };
12
+
4
13
  class TokenizeWorker : public Napi::AsyncWorker,
5
14
  public Napi::Promise::Deferred {
6
15
  public:
7
- TokenizeWorker(const Napi::CallbackInfo &info, LlamaSessionPtr &sess,
16
+ TokenizeWorker(const Napi::CallbackInfo &info, rnllama::llama_rn_context* rn_ctx,
8
17
  std::string text, std::vector<std::string> media_paths);
9
18
 
10
19
  protected:
@@ -13,8 +22,8 @@ protected:
13
22
  void OnError(const Napi::Error &err);
14
23
 
15
24
  private:
16
- LlamaSessionPtr _sess;
25
+ rnllama::llama_rn_context* _rn_ctx;
17
26
  std::string _text;
18
27
  std::vector<std::string> _media_paths;
19
28
  TokenizeResult _result;
20
- };
29
+ };