@fugood/llama.node 1.1.11 → 1.2.0-rc.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CMakeLists.txt +5 -8
- package/lib/binding.ts +18 -1
- package/lib/index.js +2 -2
- package/lib/index.ts +2 -2
- package/package.json +20 -16
- package/src/DecodeAudioTokenWorker.cpp +23 -26
- package/src/DecodeAudioTokenWorker.h +6 -8
- package/src/DetokenizeWorker.cpp +5 -8
- package/src/DetokenizeWorker.h +6 -5
- package/src/DisposeWorker.cpp +23 -3
- package/src/DisposeWorker.h +4 -2
- package/src/EmbeddingWorker.cpp +9 -35
- package/src/EmbeddingWorker.h +3 -2
- package/src/LlamaCompletionWorker.cpp +217 -315
- package/src/LlamaCompletionWorker.h +6 -12
- package/src/LlamaContext.cpp +166 -396
- package/src/LlamaContext.h +8 -13
- package/src/LoadSessionWorker.cpp +22 -19
- package/src/LoadSessionWorker.h +3 -2
- package/src/RerankWorker.h +3 -2
- package/src/SaveSessionWorker.cpp +22 -19
- package/src/SaveSessionWorker.h +3 -2
- package/src/TokenizeWorker.cpp +38 -35
- package/src/TokenizeWorker.h +12 -3
- package/src/common.hpp +0 -458
- package/src/llama.cpp/common/arg.cpp +50 -30
- package/src/llama.cpp/common/chat.cpp +111 -1
- package/src/llama.cpp/common/chat.h +3 -0
- package/src/llama.cpp/common/common.h +1 -1
- package/src/llama.cpp/common/log.cpp +53 -2
- package/src/llama.cpp/common/log.h +10 -4
- package/src/llama.cpp/common/sampling.cpp +23 -2
- package/src/llama.cpp/common/sampling.h +3 -1
- package/src/llama.cpp/common/speculative.cpp +1 -1
- package/src/llama.cpp/ggml/CMakeLists.txt +3 -2
- package/src/llama.cpp/ggml/include/ggml-backend.h +3 -0
- package/src/llama.cpp/ggml/include/ggml-cpu.h +0 -1
- package/src/llama.cpp/ggml/include/ggml.h +50 -1
- package/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +14 -13
- package/src/llama.cpp/ggml/src/ggml-cpu/arch/riscv/quants.c +210 -96
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-impl.h +0 -6
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +11 -37
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +3 -4
- package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +4 -9
- package/src/llama.cpp/ggml/src/ggml-cpu/ops.cpp +218 -4
- package/src/llama.cpp/ggml/src/ggml-cpu/ops.h +1 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/simd-mappings.h +41 -37
- package/src/llama.cpp/ggml/src/ggml-cpu/vec.cpp +150 -28
- package/src/llama.cpp/ggml/src/ggml-cpu/vec.h +320 -73
- package/src/llama.cpp/include/llama.h +5 -6
- package/src/llama.cpp/src/llama-adapter.cpp +33 -0
- package/src/llama.cpp/src/llama-adapter.h +3 -0
- package/src/llama.cpp/src/llama-arch.cpp +27 -4
- package/src/llama.cpp/src/llama-arch.h +2 -0
- package/src/llama.cpp/src/llama-context.cpp +62 -56
- package/src/llama.cpp/src/llama-context.h +1 -1
- package/src/llama.cpp/src/llama-graph.cpp +54 -9
- package/src/llama.cpp/src/llama-graph.h +8 -0
- package/src/llama.cpp/src/llama-hparams.cpp +37 -0
- package/src/llama.cpp/src/llama-hparams.h +9 -3
- package/src/llama.cpp/src/llama-kv-cache.cpp +1 -23
- package/src/llama.cpp/src/llama-kv-cache.h +1 -0
- package/src/llama.cpp/src/llama-model.cpp +159 -1
- package/src/llama.cpp/src/llama-model.h +0 -1
- package/src/llama.cpp/src/llama-sampling.cpp +226 -126
- package/src/anyascii.c +0 -22223
- package/src/anyascii.h +0 -42
- package/src/tts_utils.cpp +0 -371
- package/src/tts_utils.h +0 -103
package/src/LlamaContext.h
CHANGED
|
@@ -1,7 +1,11 @@
|
|
|
1
1
|
#include "common.hpp"
|
|
2
2
|
#include "tools/mtmd/clip.h"
|
|
3
3
|
#include "tools/mtmd/mtmd.h"
|
|
4
|
-
#include "
|
|
4
|
+
#include "rn-llama/rn-llama.h"
|
|
5
|
+
#include "rn-llama/rn-completion.h"
|
|
6
|
+
#include "rn-llama/rn-tts.h"
|
|
7
|
+
|
|
8
|
+
using namespace rnllama;
|
|
5
9
|
|
|
6
10
|
class LlamaCompletionWorker;
|
|
7
11
|
|
|
@@ -43,7 +47,7 @@ private:
|
|
|
43
47
|
void ReleaseMultimodal(const Napi::CallbackInfo &info);
|
|
44
48
|
|
|
45
49
|
// TTS methods
|
|
46
|
-
tts_type getTTSType(Napi::Env env, nlohmann::json speaker = nullptr);
|
|
50
|
+
rnllama::tts_type getTTSType(Napi::Env env, nlohmann::json speaker = nullptr);
|
|
47
51
|
Napi::Value InitVocoder(const Napi::CallbackInfo &info);
|
|
48
52
|
void ReleaseVocoder(const Napi::CallbackInfo &info);
|
|
49
53
|
Napi::Value IsVocoderEnabled(const Napi::CallbackInfo &info);
|
|
@@ -53,17 +57,8 @@ private:
|
|
|
53
57
|
|
|
54
58
|
std::string _info;
|
|
55
59
|
Napi::Object _meta;
|
|
56
|
-
LlamaSessionPtr _sess = nullptr;
|
|
57
|
-
common_chat_templates_ptr _templates;
|
|
58
|
-
std::vector<common_adapter_lora_info> _lora;
|
|
59
60
|
LlamaCompletionWorker *_wip = nullptr;
|
|
60
61
|
|
|
61
|
-
//
|
|
62
|
-
|
|
63
|
-
bool _has_multimodal = false;
|
|
64
|
-
|
|
65
|
-
// Vocoder support
|
|
66
|
-
tts_type _tts_type = UNKNOWN;
|
|
67
|
-
vocoder_context _vocoder;
|
|
68
|
-
bool _has_vocoder = false;
|
|
62
|
+
// Use rn-llama context instead of direct llama.cpp types
|
|
63
|
+
llama_rn_context *_rn_ctx = nullptr;
|
|
69
64
|
};
|
|
@@ -2,31 +2,34 @@
|
|
|
2
2
|
#include "LlamaContext.h"
|
|
3
3
|
|
|
4
4
|
LoadSessionWorker::LoadSessionWorker(const Napi::CallbackInfo &info,
|
|
5
|
-
|
|
5
|
+
rnllama::llama_rn_context* rn_ctx)
|
|
6
6
|
: AsyncWorker(info.Env()), Deferred(info.Env()), _path(info[0].ToString()),
|
|
7
|
-
|
|
7
|
+
_rn_ctx(rn_ctx) {}
|
|
8
8
|
|
|
9
9
|
void LoadSessionWorker::Execute() {
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
10
|
+
try {
|
|
11
|
+
if (!_rn_ctx || !_rn_ctx->ctx) {
|
|
12
|
+
SetError("Context not available");
|
|
13
|
+
return;
|
|
14
|
+
}
|
|
14
15
|
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
std::find(tokens.begin(), tokens.end(), LLAMA_TOKEN_NULL);
|
|
19
|
-
if (null_token_iter != tokens.end()) {
|
|
20
|
-
tokens.resize(std::distance(tokens.begin(), null_token_iter));
|
|
21
|
-
}
|
|
16
|
+
// reserve the maximum number of tokens for capacity
|
|
17
|
+
std::vector<llama_token> tokens;
|
|
18
|
+
tokens.reserve(_rn_ctx->n_ctx);
|
|
22
19
|
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
20
|
+
if (!llama_state_load_file(_rn_ctx->ctx, _path.c_str(), tokens.data(),
|
|
21
|
+
tokens.capacity(), &count)) {
|
|
22
|
+
SetError("Failed to load session");
|
|
23
|
+
return;
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
tokens.resize(count);
|
|
27
|
+
|
|
28
|
+
_rn_ctx->completion->embd = std::move(tokens);
|
|
29
|
+
_rn_ctx->completion->n_past = count;
|
|
30
|
+
} catch (const std::exception &e) {
|
|
31
|
+
SetError(e.what());
|
|
26
32
|
}
|
|
27
|
-
tokens.resize(count);
|
|
28
|
-
_sess->set_tokens(std::move(tokens));
|
|
29
|
-
_sess->get_mutex().unlock();
|
|
30
33
|
}
|
|
31
34
|
|
|
32
35
|
void LoadSessionWorker::OnOK() { Resolve(AsyncWorker::Env().Undefined()); }
|
package/src/LoadSessionWorker.h
CHANGED
|
@@ -1,9 +1,10 @@
|
|
|
1
1
|
#include "common.hpp"
|
|
2
|
+
#include "rn-llama/rn-llama.h"
|
|
2
3
|
|
|
3
4
|
class LoadSessionWorker : public Napi::AsyncWorker,
|
|
4
5
|
public Napi::Promise::Deferred {
|
|
5
6
|
public:
|
|
6
|
-
LoadSessionWorker(const Napi::CallbackInfo &info,
|
|
7
|
+
LoadSessionWorker(const Napi::CallbackInfo &info, rnllama::llama_rn_context* rn_ctx);
|
|
7
8
|
|
|
8
9
|
protected:
|
|
9
10
|
void Execute();
|
|
@@ -12,6 +13,6 @@ protected:
|
|
|
12
13
|
|
|
13
14
|
private:
|
|
14
15
|
std::string _path;
|
|
15
|
-
|
|
16
|
+
rnllama::llama_rn_context* _rn_ctx;
|
|
16
17
|
size_t count = 0;
|
|
17
18
|
};
|
package/src/RerankWorker.h
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
#include "common.hpp"
|
|
2
|
+
#include "rn-llama/rn-llama.h"
|
|
2
3
|
#include <vector>
|
|
3
4
|
|
|
4
5
|
struct RerankResult {
|
|
@@ -8,7 +9,7 @@ struct RerankResult {
|
|
|
8
9
|
class RerankWorker : public Napi::AsyncWorker,
|
|
9
10
|
public Napi::Promise::Deferred {
|
|
10
11
|
public:
|
|
11
|
-
RerankWorker(const Napi::CallbackInfo &info,
|
|
12
|
+
RerankWorker(const Napi::CallbackInfo &info, rnllama::llama_rn_context* rn_ctx,
|
|
12
13
|
std::string query, std::vector<std::string> documents,
|
|
13
14
|
common_params ¶ms);
|
|
14
15
|
|
|
@@ -18,7 +19,7 @@ protected:
|
|
|
18
19
|
void OnError(const Napi::Error &err);
|
|
19
20
|
|
|
20
21
|
private:
|
|
21
|
-
|
|
22
|
+
rnllama::llama_rn_context* _rn_ctx;
|
|
22
23
|
std::string _query;
|
|
23
24
|
std::vector<std::string> _documents;
|
|
24
25
|
common_params _params;
|
|
@@ -2,30 +2,33 @@
|
|
|
2
2
|
#include "LlamaContext.h"
|
|
3
3
|
|
|
4
4
|
SaveSessionWorker::SaveSessionWorker(const Napi::CallbackInfo &info,
|
|
5
|
-
|
|
5
|
+
rnllama::llama_rn_context* rn_ctx)
|
|
6
6
|
: AsyncWorker(info.Env()), Deferred(info.Env()), _path(info[0].ToString()),
|
|
7
|
-
|
|
7
|
+
_rn_ctx(rn_ctx) {}
|
|
8
8
|
|
|
9
9
|
void SaveSessionWorker::Execute() {
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
10
|
+
try {
|
|
11
|
+
if (!_rn_ctx || !_rn_ctx->ctx) {
|
|
12
|
+
SetError("Context not available");
|
|
13
|
+
return;
|
|
14
|
+
}
|
|
14
15
|
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
16
|
+
// For rn-llama, we save the context state directly
|
|
17
|
+
if (_rn_ctx->completion && !_rn_ctx->completion->embd.empty()) {
|
|
18
|
+
auto &tokens = _rn_ctx->completion->embd;
|
|
19
|
+
if (!llama_state_save_file(_rn_ctx->ctx, _path.c_str(),
|
|
20
|
+
tokens.data(), tokens.size())) {
|
|
21
|
+
SetError("Failed to save session");
|
|
22
|
+
}
|
|
23
|
+
} else {
|
|
24
|
+
// Save empty session if no tokens available
|
|
25
|
+
if (!llama_state_save_file(_rn_ctx->ctx, _path.c_str(), nullptr, 0)) {
|
|
26
|
+
SetError("Failed to save session");
|
|
27
|
+
}
|
|
28
|
+
}
|
|
29
|
+
} catch (const std::exception &e) {
|
|
30
|
+
SetError(e.what());
|
|
22
31
|
}
|
|
23
|
-
|
|
24
|
-
if (!llama_state_save_file(_sess->context(), _path.c_str(),
|
|
25
|
-
tokens_to_save.data(), tokens_to_save.size())) {
|
|
26
|
-
SetError("Failed to save session");
|
|
27
|
-
}
|
|
28
|
-
_sess->get_mutex().unlock();
|
|
29
32
|
}
|
|
30
33
|
|
|
31
34
|
void SaveSessionWorker::OnOK() { Resolve(AsyncWorker::Env().Undefined()); }
|
package/src/SaveSessionWorker.h
CHANGED
|
@@ -1,9 +1,10 @@
|
|
|
1
1
|
#include "common.hpp"
|
|
2
|
+
#include "rn-llama/rn-llama.h"
|
|
2
3
|
|
|
3
4
|
class SaveSessionWorker : public Napi::AsyncWorker,
|
|
4
5
|
public Napi::Promise::Deferred {
|
|
5
6
|
public:
|
|
6
|
-
SaveSessionWorker(const Napi::CallbackInfo &info,
|
|
7
|
+
SaveSessionWorker(const Napi::CallbackInfo &info, rnllama::llama_rn_context* rn_ctx);
|
|
7
8
|
|
|
8
9
|
protected:
|
|
9
10
|
void Execute();
|
|
@@ -12,5 +13,5 @@ protected:
|
|
|
12
13
|
|
|
13
14
|
private:
|
|
14
15
|
std::string _path;
|
|
15
|
-
|
|
16
|
+
rnllama::llama_rn_context* _rn_ctx;
|
|
16
17
|
};
|
package/src/TokenizeWorker.cpp
CHANGED
|
@@ -2,59 +2,62 @@
|
|
|
2
2
|
#include "LlamaContext.h"
|
|
3
3
|
|
|
4
4
|
TokenizeWorker::TokenizeWorker(const Napi::CallbackInfo &info,
|
|
5
|
-
|
|
5
|
+
rnllama::llama_rn_context* rn_ctx, std::string text,
|
|
6
6
|
std::vector<std::string> media_paths)
|
|
7
|
-
: AsyncWorker(info.Env()), Deferred(info.Env()),
|
|
7
|
+
: AsyncWorker(info.Env()), Deferred(info.Env()), _rn_ctx(rn_ctx), _text(text),
|
|
8
8
|
_media_paths(media_paths) {}
|
|
9
9
|
|
|
10
10
|
void TokenizeWorker::Execute() {
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
11
|
+
try {
|
|
12
|
+
// Use rn-llama tokenize API directly
|
|
13
|
+
auto result = _rn_ctx->tokenize(_text, _media_paths);
|
|
14
|
+
|
|
15
|
+
// Convert llama_token to int32_t
|
|
16
|
+
_result.tokens.resize(result.tokens.size());
|
|
17
|
+
for (size_t i = 0; i < result.tokens.size(); i++) {
|
|
18
|
+
_result.tokens[i] = static_cast<int32_t>(result.tokens[i]);
|
|
18
19
|
}
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
_result.
|
|
22
|
-
_result.
|
|
20
|
+
|
|
21
|
+
_result.has_media = result.has_media;
|
|
22
|
+
_result.bitmap_hashes = result.bitmap_hashes;
|
|
23
|
+
_result.chunk_pos = result.chunk_pos;
|
|
24
|
+
_result.chunk_pos_media = result.chunk_pos_media;
|
|
25
|
+
} catch (const std::exception &e) {
|
|
26
|
+
SetError(e.what());
|
|
23
27
|
}
|
|
24
28
|
}
|
|
25
29
|
|
|
26
30
|
void TokenizeWorker::OnOK() {
|
|
27
|
-
Napi::
|
|
28
|
-
|
|
29
|
-
auto tokens =
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
result.Set("has_media", _result.has_media);
|
|
31
|
+
Napi::Env env = Napi::AsyncWorker::Env();
|
|
32
|
+
Napi::Object ret = Napi::Object::New(env);
|
|
33
|
+
auto tokens = Napi::Int32Array::New(env, _result.tokens.size());
|
|
34
|
+
memcpy(tokens.Data(), _result.tokens.data(), _result.tokens.size() * sizeof(int32_t));
|
|
35
|
+
ret.Set("tokens", tokens);
|
|
36
|
+
ret.Set("has_media", Napi::Boolean::New(env, _result.has_media));
|
|
37
|
+
|
|
35
38
|
if (_result.has_media) {
|
|
36
|
-
auto bitmap_hashes = Napi::Array::New(
|
|
37
|
-
_result.bitmap_hashes.size());
|
|
39
|
+
auto bitmap_hashes = Napi::Array::New(env, _result.bitmap_hashes.size());
|
|
38
40
|
for (size_t i = 0; i < _result.bitmap_hashes.size(); i++) {
|
|
39
|
-
bitmap_hashes.Set(i, _result.bitmap_hashes[i]);
|
|
41
|
+
bitmap_hashes.Set(i, Napi::String::New(env, _result.bitmap_hashes[i]));
|
|
40
42
|
}
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
43
|
+
ret.Set("bitmap_hashes", bitmap_hashes);
|
|
44
|
+
|
|
45
|
+
auto chunk_pos = Napi::Array::New(env, _result.chunk_pos.size());
|
|
44
46
|
for (size_t i = 0; i < _result.chunk_pos.size(); i++) {
|
|
45
|
-
chunk_pos.Set(i, _result.chunk_pos[i]);
|
|
47
|
+
chunk_pos.Set(i, Napi::Number::New(env, static_cast<double>(_result.chunk_pos[i])));
|
|
46
48
|
}
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
49
|
+
ret.Set("chunk_pos", chunk_pos);
|
|
50
|
+
|
|
51
|
+
auto chunk_pos_media = Napi::Array::New(env, _result.chunk_pos_media.size());
|
|
50
52
|
for (size_t i = 0; i < _result.chunk_pos_media.size(); i++) {
|
|
51
|
-
chunk_pos_media.Set(i, _result.chunk_pos_media[i]);
|
|
53
|
+
chunk_pos_media.Set(i, Napi::Number::New(env, static_cast<double>(_result.chunk_pos_media[i])));
|
|
52
54
|
}
|
|
53
|
-
|
|
55
|
+
ret.Set("chunk_pos_media", chunk_pos_media);
|
|
54
56
|
}
|
|
55
|
-
|
|
57
|
+
|
|
58
|
+
Napi::Promise::Deferred::Resolve(ret);
|
|
56
59
|
}
|
|
57
60
|
|
|
58
61
|
void TokenizeWorker::OnError(const Napi::Error &err) {
|
|
59
62
|
Napi::Promise::Deferred::Reject(err.Value());
|
|
60
|
-
}
|
|
63
|
+
}
|
package/src/TokenizeWorker.h
CHANGED
|
@@ -1,10 +1,19 @@
|
|
|
1
1
|
#include "common.hpp"
|
|
2
|
+
#include "rn-llama/rn-llama.h"
|
|
2
3
|
#include <vector>
|
|
3
4
|
|
|
5
|
+
struct TokenizeResult {
|
|
6
|
+
std::vector<int32_t> tokens;
|
|
7
|
+
bool has_media;
|
|
8
|
+
std::vector<std::string> bitmap_hashes;
|
|
9
|
+
std::vector<size_t> chunk_pos;
|
|
10
|
+
std::vector<size_t> chunk_pos_media;
|
|
11
|
+
};
|
|
12
|
+
|
|
4
13
|
class TokenizeWorker : public Napi::AsyncWorker,
|
|
5
14
|
public Napi::Promise::Deferred {
|
|
6
15
|
public:
|
|
7
|
-
TokenizeWorker(const Napi::CallbackInfo &info,
|
|
16
|
+
TokenizeWorker(const Napi::CallbackInfo &info, rnllama::llama_rn_context* rn_ctx,
|
|
8
17
|
std::string text, std::vector<std::string> media_paths);
|
|
9
18
|
|
|
10
19
|
protected:
|
|
@@ -13,8 +22,8 @@ protected:
|
|
|
13
22
|
void OnError(const Napi::Error &err);
|
|
14
23
|
|
|
15
24
|
private:
|
|
16
|
-
|
|
25
|
+
rnllama::llama_rn_context* _rn_ctx;
|
|
17
26
|
std::string _text;
|
|
18
27
|
std::vector<std::string> _media_paths;
|
|
19
28
|
TokenizeResult _result;
|
|
20
|
-
};
|
|
29
|
+
};
|