@fugood/llama.node 0.4.7 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CMakeLists.txt +4 -0
- package/bin/darwin/arm64/llama-node.node +0 -0
- package/bin/darwin/x64/llama-node.node +0 -0
- package/bin/linux/arm64/llama-node.node +0 -0
- package/bin/linux/x64/llama-node.node +0 -0
- package/bin/linux-cuda/arm64/llama-node.node +0 -0
- package/bin/linux-cuda/x64/llama-node.node +0 -0
- package/bin/linux-vulkan/arm64/llama-node.node +0 -0
- package/bin/linux-vulkan/x64/llama-node.node +0 -0
- package/lib/binding.ts +66 -6
- package/lib/index.js +59 -17
- package/lib/index.ts +74 -23
- package/package.json +1 -1
- package/src/DecodeAudioTokenWorker.cpp +40 -0
- package/src/DecodeAudioTokenWorker.h +22 -0
- package/src/EmbeddingWorker.cpp +7 -5
- package/src/LlamaCompletionWorker.cpp +68 -54
- package/src/LlamaCompletionWorker.h +7 -8
- package/src/LlamaContext.cpp +551 -235
- package/src/LlamaContext.h +26 -4
- package/src/LoadSessionWorker.cpp +4 -2
- package/src/SaveSessionWorker.cpp +10 -6
- package/src/TokenizeWorker.cpp +23 -14
- package/src/TokenizeWorker.h +2 -2
- package/src/addons.cc +8 -11
- package/src/common.hpp +129 -126
- package/src/llama.cpp/.github/workflows/build.yml +2 -2
- package/src/llama.cpp/.github/workflows/release.yml +152 -129
- package/src/llama.cpp/.github/workflows/winget.yml +42 -0
- package/src/llama.cpp/common/arg.cpp +14 -13
- package/src/llama.cpp/common/common.cpp +4 -75
- package/src/llama.cpp/common/common.h +7 -12
- package/src/llama.cpp/examples/lookahead/lookahead.cpp +0 -13
- package/src/llama.cpp/examples/lookup/lookup.cpp +0 -11
- package/src/llama.cpp/examples/parallel/parallel.cpp +0 -9
- package/src/llama.cpp/examples/retrieval/retrieval.cpp +6 -6
- package/src/llama.cpp/examples/simple/simple.cpp +1 -1
- package/src/llama.cpp/examples/simple-chat/simple-chat.cpp +2 -2
- package/src/llama.cpp/examples/sycl/run-llama2.sh +4 -4
- package/src/llama.cpp/examples/sycl/run-llama3.sh +28 -0
- package/src/llama.cpp/examples/sycl/win-run-llama2.bat +1 -1
- package/src/llama.cpp/examples/sycl/win-run-llama3.bat +9 -0
- package/src/llama.cpp/ggml/include/ggml-opt.h +2 -0
- package/src/llama.cpp/ggml/include/ggml.h +11 -0
- package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.cpp +274 -0
- package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.h +27 -0
- package/src/llama.cpp/ggml/src/ggml-cann/ggml-cann.cpp +18 -2
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +1 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ops.cpp +107 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/vec.h +16 -0
- package/src/llama.cpp/ggml/src/ggml-musa/CMakeLists.txt +8 -2
- package/src/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +315 -155
- package/src/llama.cpp/ggml/src/ggml-opt.cpp +5 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +43 -12
- package/src/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +171 -112
- package/src/llama.cpp/ggml/src/ggml.c +64 -18
- package/src/llama.cpp/include/llama.h +24 -124
- package/src/llama.cpp/requirements/requirements-convert_hf_to_gguf.txt +5 -1
- package/src/llama.cpp/requirements/requirements-convert_hf_to_gguf_update.txt +5 -1
- package/src/llama.cpp/requirements/requirements-convert_lora_to_gguf.txt +2 -0
- package/src/llama.cpp/src/llama-batch.cpp +3 -1
- package/src/llama.cpp/src/llama-context.cpp +60 -110
- package/src/llama.cpp/src/llama-graph.cpp +137 -233
- package/src/llama.cpp/src/llama-graph.h +49 -7
- package/src/llama.cpp/src/llama-hparams.cpp +17 -1
- package/src/llama.cpp/src/llama-hparams.h +34 -5
- package/src/llama.cpp/src/llama-kv-cache.cpp +654 -321
- package/src/llama.cpp/src/llama-kv-cache.h +201 -85
- package/src/llama.cpp/src/llama-memory.h +3 -2
- package/src/llama.cpp/src/llama-model.cpp +273 -94
- package/src/llama.cpp/src/llama-model.h +4 -1
- package/src/llama.cpp/tests/test-arg-parser.cpp +1 -1
- package/src/llama.cpp/tools/llama-bench/llama-bench.cpp +1 -0
- package/src/llama.cpp/tools/mtmd/CMakeLists.txt +13 -2
- package/src/llama.cpp/tools/mtmd/clip-impl.h +108 -11
- package/src/llama.cpp/tools/mtmd/clip.cpp +466 -88
- package/src/llama.cpp/tools/mtmd/clip.h +6 -4
- package/src/llama.cpp/tools/mtmd/miniaudio.h +93468 -0
- package/src/llama.cpp/tools/mtmd/mtmd-audio.cpp +855 -0
- package/src/llama.cpp/tools/mtmd/mtmd-audio.h +62 -0
- package/src/llama.cpp/tools/mtmd/mtmd-cli.cpp +21 -14
- package/src/llama.cpp/tools/mtmd/mtmd-helper.cpp +36 -49
- package/src/llama.cpp/tools/mtmd/mtmd.cpp +362 -98
- package/src/llama.cpp/tools/mtmd/mtmd.h +52 -21
- package/src/llama.cpp/tools/run/run.cpp +2 -2
- package/src/llama.cpp/tools/server/server.cpp +158 -47
- package/src/llama.cpp/tools/server/utils.hpp +71 -43
- package/src/llama.cpp/tools/tts/tts.cpp +4 -2
- package/src/tts_utils.cpp +342 -0
- package/src/tts_utils.h +62 -0
- package/bin/win32/arm64/llama-node.node +0 -0
- package/bin/win32/arm64/node.lib +0 -0
- package/bin/win32/x64/llama-node.node +0 -0
- package/bin/win32/x64/node.lib +0 -0
- package/bin/win32-vulkan/arm64/llama-node.node +0 -0
- package/bin/win32-vulkan/arm64/node.lib +0 -0
- package/bin/win32-vulkan/x64/llama-node.node +0 -0
- package/bin/win32-vulkan/x64/node.lib +0 -0
package/src/LlamaContext.h
CHANGED
|
@@ -1,15 +1,22 @@
|
|
|
1
1
|
#include "common.hpp"
|
|
2
|
-
#include "tools/mtmd/mtmd.h"
|
|
3
2
|
#include "tools/mtmd/clip.h"
|
|
3
|
+
#include "tools/mtmd/mtmd.h"
|
|
4
|
+
#include "tts_utils.h"
|
|
4
5
|
|
|
5
6
|
class LlamaCompletionWorker;
|
|
6
7
|
|
|
8
|
+
struct vocoder_context {
|
|
9
|
+
common_params params;
|
|
10
|
+
std::shared_ptr<llama_model> model;
|
|
11
|
+
std::shared_ptr<llama_context> context;
|
|
12
|
+
};
|
|
13
|
+
|
|
7
14
|
class LlamaContext : public Napi::ObjectWrap<LlamaContext> {
|
|
8
15
|
public:
|
|
9
16
|
LlamaContext(const Napi::CallbackInfo &info);
|
|
10
17
|
~LlamaContext();
|
|
11
18
|
static void ToggleNativeLog(const Napi::CallbackInfo &info);
|
|
12
|
-
static Napi::Value ModelInfo(const Napi::CallbackInfo&
|
|
19
|
+
static Napi::Value ModelInfo(const Napi::CallbackInfo &info);
|
|
13
20
|
static void Init(Napi::Env env, Napi::Object &exports);
|
|
14
21
|
|
|
15
22
|
private:
|
|
@@ -27,20 +34,35 @@ private:
|
|
|
27
34
|
void RemoveLoraAdapters(const Napi::CallbackInfo &info);
|
|
28
35
|
Napi::Value GetLoadedLoraAdapters(const Napi::CallbackInfo &info);
|
|
29
36
|
Napi::Value Release(const Napi::CallbackInfo &info);
|
|
30
|
-
|
|
37
|
+
|
|
31
38
|
// Multimodal methods
|
|
32
39
|
Napi::Value InitMultimodal(const Napi::CallbackInfo &info);
|
|
33
40
|
Napi::Value IsMultimodalEnabled(const Napi::CallbackInfo &info);
|
|
41
|
+
Napi::Value GetMultimodalSupport(const Napi::CallbackInfo &info);
|
|
34
42
|
void ReleaseMultimodal(const Napi::CallbackInfo &info);
|
|
35
43
|
|
|
44
|
+
// TTS methods
|
|
45
|
+
tts_type getTTSType(Napi::Env env, nlohmann::json speaker = nullptr);
|
|
46
|
+
Napi::Value InitVocoder(const Napi::CallbackInfo &info);
|
|
47
|
+
void ReleaseVocoder(const Napi::CallbackInfo &info);
|
|
48
|
+
Napi::Value IsVocoderEnabled(const Napi::CallbackInfo &info);
|
|
49
|
+
Napi::Value GetFormattedAudioCompletion(const Napi::CallbackInfo &info);
|
|
50
|
+
Napi::Value GetAudioCompletionGuideTokens(const Napi::CallbackInfo &info);
|
|
51
|
+
Napi::Value DecodeAudioTokens(const Napi::CallbackInfo &info);
|
|
52
|
+
|
|
36
53
|
std::string _info;
|
|
37
54
|
Napi::Object _meta;
|
|
38
55
|
LlamaSessionPtr _sess = nullptr;
|
|
39
56
|
common_chat_templates_ptr _templates;
|
|
40
57
|
std::vector<common_adapter_lora_info> _lora;
|
|
41
58
|
LlamaCompletionWorker *_wip = nullptr;
|
|
42
|
-
|
|
59
|
+
|
|
43
60
|
// Multimodal support
|
|
44
61
|
mtmd_context *_mtmd_ctx = nullptr;
|
|
45
62
|
bool _has_multimodal = false;
|
|
63
|
+
|
|
64
|
+
// Vocoder support
|
|
65
|
+
tts_type _tts_type = UNKNOWN;
|
|
66
|
+
vocoder_context _vocoder;
|
|
67
|
+
bool _has_vocoder = false;
|
|
46
68
|
};
|
|
@@ -12,8 +12,10 @@ void LoadSessionWorker::Execute() {
|
|
|
12
12
|
std::vector<llama_token> tokens;
|
|
13
13
|
tokens.reserve(_sess->params().n_ctx);
|
|
14
14
|
|
|
15
|
-
// Find LLAMA_TOKEN_NULL in the tokens and resize the array to the index of
|
|
16
|
-
|
|
15
|
+
// Find LLAMA_TOKEN_NULL in the tokens and resize the array to the index of
|
|
16
|
+
// the null token
|
|
17
|
+
auto null_token_iter =
|
|
18
|
+
std::find(tokens.begin(), tokens.end(), LLAMA_TOKEN_NULL);
|
|
17
19
|
if (null_token_iter != tokens.end()) {
|
|
18
20
|
tokens.resize(std::distance(tokens.begin(), null_token_iter));
|
|
19
21
|
}
|
|
@@ -9,16 +9,20 @@ SaveSessionWorker::SaveSessionWorker(const Napi::CallbackInfo &info,
|
|
|
9
9
|
void SaveSessionWorker::Execute() {
|
|
10
10
|
_sess->get_mutex().lock();
|
|
11
11
|
auto tokens = _sess->tokens_ptr();
|
|
12
|
-
auto tokens_to_save =
|
|
12
|
+
auto tokens_to_save =
|
|
13
|
+
std::vector<llama_token>(tokens->begin(), tokens->end());
|
|
13
14
|
|
|
14
|
-
// Find LLAMA_TOKEN_NULL in the tokens and resize the array to the index of
|
|
15
|
-
|
|
15
|
+
// Find LLAMA_TOKEN_NULL in the tokens and resize the array to the index of
|
|
16
|
+
// the null token
|
|
17
|
+
auto null_token_iter =
|
|
18
|
+
std::find(tokens_to_save.begin(), tokens_to_save.end(), LLAMA_TOKEN_NULL);
|
|
16
19
|
if (null_token_iter != tokens_to_save.end()) {
|
|
17
|
-
tokens_to_save.resize(
|
|
20
|
+
tokens_to_save.resize(
|
|
21
|
+
std::distance(tokens_to_save.begin(), null_token_iter));
|
|
18
22
|
}
|
|
19
23
|
|
|
20
|
-
if (!llama_state_save_file(_sess->context(), _path.c_str(),
|
|
21
|
-
tokens_to_save.size())) {
|
|
24
|
+
if (!llama_state_save_file(_sess->context(), _path.c_str(),
|
|
25
|
+
tokens_to_save.data(), tokens_to_save.size())) {
|
|
22
26
|
SetError("Failed to save session");
|
|
23
27
|
}
|
|
24
28
|
_sess->get_mutex().unlock();
|
package/src/TokenizeWorker.cpp
CHANGED
|
@@ -2,17 +2,24 @@
|
|
|
2
2
|
#include "LlamaContext.h"
|
|
3
3
|
|
|
4
4
|
TokenizeWorker::TokenizeWorker(const Napi::CallbackInfo &info,
|
|
5
|
-
LlamaSessionPtr &sess, std::string text,
|
|
6
|
-
|
|
5
|
+
LlamaSessionPtr &sess, std::string text,
|
|
6
|
+
std::vector<std::string> media_paths)
|
|
7
|
+
: AsyncWorker(info.Env()), Deferred(info.Env()), _sess(sess), _text(text),
|
|
8
|
+
_media_paths(media_paths) {}
|
|
7
9
|
|
|
8
10
|
void TokenizeWorker::Execute() {
|
|
9
11
|
auto mtmd_ctx = _sess->get_mtmd_ctx();
|
|
10
|
-
if (!
|
|
11
|
-
|
|
12
|
+
if (!_media_paths.empty()) {
|
|
13
|
+
try {
|
|
14
|
+
_result = tokenizeWithMedia(mtmd_ctx, _text, _media_paths);
|
|
15
|
+
mtmd_input_chunks_free(_result.chunks);
|
|
16
|
+
} catch (const std::exception &e) {
|
|
17
|
+
SetError(e.what());
|
|
18
|
+
}
|
|
12
19
|
} else {
|
|
13
20
|
const auto tokens = common_tokenize(_sess->context(), _text, false);
|
|
14
21
|
_result.tokens = tokens;
|
|
15
|
-
_result.
|
|
22
|
+
_result.has_media = false;
|
|
16
23
|
}
|
|
17
24
|
}
|
|
18
25
|
|
|
@@ -24,24 +31,26 @@ void TokenizeWorker::OnOK() {
|
|
|
24
31
|
memcpy(tokens.Data(), _result.tokens.data(),
|
|
25
32
|
_result.tokens.size() * sizeof(llama_token));
|
|
26
33
|
result.Set("tokens", tokens);
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
34
|
+
result.Set("has_media", _result.has_media);
|
|
35
|
+
if (_result.has_media) {
|
|
36
|
+
auto bitmap_hashes = Napi::Array::New(Napi::AsyncWorker::Env(),
|
|
37
|
+
_result.bitmap_hashes.size());
|
|
31
38
|
for (size_t i = 0; i < _result.bitmap_hashes.size(); i++) {
|
|
32
39
|
bitmap_hashes.Set(i, _result.bitmap_hashes[i]);
|
|
33
40
|
}
|
|
34
41
|
result.Set("bitmap_hashes", bitmap_hashes);
|
|
35
|
-
auto chunk_pos =
|
|
42
|
+
auto chunk_pos =
|
|
43
|
+
Napi::Array::New(Napi::AsyncWorker::Env(), _result.chunk_pos.size());
|
|
36
44
|
for (size_t i = 0; i < _result.chunk_pos.size(); i++) {
|
|
37
45
|
chunk_pos.Set(i, _result.chunk_pos[i]);
|
|
38
46
|
}
|
|
39
47
|
result.Set("chunk_pos", chunk_pos);
|
|
40
|
-
auto
|
|
41
|
-
|
|
42
|
-
|
|
48
|
+
auto chunk_pos_media = Napi::Array::New(Napi::AsyncWorker::Env(),
|
|
49
|
+
_result.chunk_pos_media.size());
|
|
50
|
+
for (size_t i = 0; i < _result.chunk_pos_media.size(); i++) {
|
|
51
|
+
chunk_pos_media.Set(i, _result.chunk_pos_media[i]);
|
|
43
52
|
}
|
|
44
|
-
result.Set("
|
|
53
|
+
result.Set("chunk_pos_media", chunk_pos_media);
|
|
45
54
|
}
|
|
46
55
|
Napi::Promise::Deferred::Resolve(result);
|
|
47
56
|
}
|
package/src/TokenizeWorker.h
CHANGED
|
@@ -5,7 +5,7 @@ class TokenizeWorker : public Napi::AsyncWorker,
|
|
|
5
5
|
public Napi::Promise::Deferred {
|
|
6
6
|
public:
|
|
7
7
|
TokenizeWorker(const Napi::CallbackInfo &info, LlamaSessionPtr &sess,
|
|
8
|
-
std::string text, std::vector<std::string>
|
|
8
|
+
std::string text, std::vector<std::string> media_paths);
|
|
9
9
|
|
|
10
10
|
protected:
|
|
11
11
|
void Execute();
|
|
@@ -15,6 +15,6 @@ protected:
|
|
|
15
15
|
private:
|
|
16
16
|
LlamaSessionPtr _sess;
|
|
17
17
|
std::string _text;
|
|
18
|
-
std::vector<std::string>
|
|
18
|
+
std::vector<std::string> _media_paths;
|
|
19
19
|
TokenizeResult _result;
|
|
20
20
|
};
|
package/src/addons.cc
CHANGED
|
@@ -5,25 +5,22 @@
|
|
|
5
5
|
extern "C" void cleanup_logging();
|
|
6
6
|
|
|
7
7
|
// Register cleanup function on module unload
|
|
8
|
-
static Napi::Value register_cleanup(const Napi::CallbackInfo&
|
|
9
|
-
napi_add_env_cleanup_hook(
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
8
|
+
static Napi::Value register_cleanup(const Napi::CallbackInfo &info) {
|
|
9
|
+
napi_add_env_cleanup_hook(
|
|
10
|
+
info.Env(), [](void *) { cleanup_logging(); }, nullptr);
|
|
11
|
+
|
|
13
12
|
return info.Env().Undefined();
|
|
14
13
|
}
|
|
15
14
|
|
|
16
15
|
Napi::Object Init(Napi::Env env, Napi::Object exports) {
|
|
17
16
|
LlamaContext::Init(env, exports);
|
|
18
|
-
|
|
17
|
+
|
|
19
18
|
// Register our cleanup handler for module unload
|
|
20
19
|
exports.Set("__registerCleanup", Napi::Function::New(env, register_cleanup));
|
|
21
|
-
|
|
20
|
+
|
|
22
21
|
// Also register cleanup directly on module init
|
|
23
|
-
napi_add_env_cleanup_hook(env, [](void*) {
|
|
24
|
-
|
|
25
|
-
}, nullptr);
|
|
26
|
-
|
|
22
|
+
napi_add_env_cleanup_hook(env, [](void *) { cleanup_logging(); }, nullptr);
|
|
23
|
+
|
|
27
24
|
return exports;
|
|
28
25
|
}
|
|
29
26
|
|