@fugood/llama.node 1.0.0-beta.5 → 1.0.0-beta.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/binding.ts +1 -1
- package/package.json +14 -14
- package/scripts/llama.cpp.patch +27 -26
- package/src/LlamaCompletionWorker.cpp +21 -4
- package/src/LlamaCompletionWorker.h +2 -0
- package/src/LlamaContext.cpp +3 -12
- package/src/common.hpp +6 -5
- package/src/llama.cpp/CMakeLists.txt +15 -4
- package/src/llama.cpp/common/CMakeLists.txt +15 -24
- package/src/llama.cpp/common/arg.cpp +172 -110
- package/src/llama.cpp/common/chat-parser.cpp +385 -0
- package/src/llama.cpp/common/chat-parser.h +120 -0
- package/src/llama.cpp/common/chat.cpp +726 -596
- package/src/llama.cpp/common/chat.h +74 -8
- package/src/llama.cpp/common/common.cpp +56 -38
- package/src/llama.cpp/common/common.h +9 -3
- package/src/llama.cpp/common/json-partial.cpp +256 -0
- package/src/llama.cpp/common/json-partial.h +38 -0
- package/src/llama.cpp/common/json-schema-to-grammar.cpp +2 -1
- package/src/llama.cpp/common/json-schema-to-grammar.h +4 -4
- package/src/llama.cpp/common/sampling.cpp +7 -8
- package/src/llama.cpp/common/speculative.cpp +6 -4
- package/src/llama.cpp/ggml/CMakeLists.txt +48 -3
- package/src/llama.cpp/ggml/include/ggml.h +22 -3
- package/src/llama.cpp/ggml/src/CMakeLists.txt +81 -22
- package/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +131 -49
- package/src/llama.cpp/ggml/src/ggml-cpu/amx/amx.cpp +1 -1
- package/src/llama.cpp/ggml/src/ggml-cpu/amx/mmq.cpp +1 -1
- package/src/llama.cpp/ggml/src/ggml-cpu/arch/arm/cpu-feats.cpp +94 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/arch/arm/quants.c +4113 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/arch/arm/repack.cpp +2162 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/arch/loongarch/quants.c +2638 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/arch/powerpc/cpu-feats.cpp +82 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/arch/powerpc/quants.c +2731 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/arch/riscv/quants.c +2068 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/arch/riscv/repack.cpp +396 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/arch/s390/quants.c +1299 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/arch/wasm/quants.c +1480 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/arch/x86/quants.c +4310 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/{ggml-cpu-aarch64.cpp → arch/x86/repack.cpp} +59 -3206
- package/src/llama.cpp/ggml/src/ggml-cpu/arch-fallback.h +184 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/common.h +1 -1
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-impl.h +12 -13
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +64 -88
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +8 -8
- package/src/llama.cpp/ggml/src/ggml-cpu/{ggml-cpu-hbm.cpp → hbm.cpp} +1 -1
- package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +1 -1
- package/src/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm.cpp +56 -7
- package/src/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm.h +5 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ops.cpp +282 -100
- package/src/llama.cpp/ggml/src/ggml-cpu/ops.h +1 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/quants.c +1157 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/{ggml-cpu-quants.h → quants.h} +26 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/repack.cpp +1570 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/repack.h +98 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/simd-mappings.h +119 -5
- package/src/llama.cpp/ggml/src/ggml-cpu/{ggml-cpu-traits.cpp → traits.cpp} +1 -1
- package/src/llama.cpp/ggml/src/ggml-cpu/vec.cpp +85 -16
- package/src/llama.cpp/ggml/src/ggml-cpu/vec.h +204 -49
- package/src/llama.cpp/include/llama.h +145 -40
- package/src/llama.cpp/src/CMakeLists.txt +5 -1
- package/src/llama.cpp/src/llama-arch.cpp +99 -3
- package/src/llama.cpp/src/llama-arch.h +10 -1
- package/src/llama.cpp/src/llama-batch.cpp +728 -272
- package/src/llama.cpp/src/llama-batch.h +112 -54
- package/src/llama.cpp/src/llama-chat.cpp +19 -2
- package/src/llama.cpp/src/llama-chat.h +1 -0
- package/src/llama.cpp/src/llama-context.cpp +525 -339
- package/src/llama.cpp/src/llama-context.h +38 -17
- package/src/llama.cpp/src/llama-cparams.cpp +4 -0
- package/src/llama.cpp/src/llama-cparams.h +2 -0
- package/src/llama.cpp/src/llama-grammar.cpp +12 -2
- package/src/llama.cpp/src/llama-graph.cpp +413 -353
- package/src/llama.cpp/src/llama-graph.h +112 -56
- package/src/llama.cpp/src/llama-hparams.cpp +10 -2
- package/src/llama.cpp/src/llama-hparams.h +13 -2
- package/src/llama.cpp/src/llama-kv-cache-unified-iswa.cpp +279 -0
- package/src/llama.cpp/src/llama-kv-cache-unified-iswa.h +128 -0
- package/src/llama.cpp/src/llama-kv-cache-unified.cpp +1815 -0
- package/src/llama.cpp/src/llama-kv-cache-unified.h +303 -0
- package/src/llama.cpp/src/llama-kv-cells.h +415 -0
- package/src/llama.cpp/src/llama-memory-hybrid.cpp +246 -0
- package/src/llama.cpp/src/llama-memory-hybrid.h +138 -0
- package/src/llama.cpp/src/llama-memory-recurrent.cpp +1112 -0
- package/src/llama.cpp/src/llama-memory-recurrent.h +183 -0
- package/src/llama.cpp/src/llama-memory.cpp +41 -0
- package/src/llama.cpp/src/llama-memory.h +86 -5
- package/src/llama.cpp/src/llama-mmap.cpp +1 -1
- package/src/llama.cpp/src/llama-model-loader.cpp +42 -17
- package/src/llama.cpp/src/llama-model-saver.cpp +1 -0
- package/src/llama.cpp/src/llama-model.cpp +1137 -528
- package/src/llama.cpp/src/llama-model.h +4 -0
- package/src/llama.cpp/src/llama-quant.cpp +2 -1
- package/src/llama.cpp/src/llama-sampling.cpp +2 -2
- package/src/llama.cpp/src/llama-vocab.cpp +69 -32
- package/src/llama.cpp/src/llama-vocab.h +1 -0
- package/src/llama.cpp/src/llama.cpp +11 -7
- package/src/llama.cpp/src/unicode.cpp +5 -0
- package/src/tts_utils.h +1 -1
- package/src/llama.cpp/common/json.hpp +0 -24766
- package/src/llama.cpp/common/minja/chat-template.hpp +0 -541
- package/src/llama.cpp/common/minja/minja.hpp +0 -2974
- package/src/llama.cpp/common/stb_image.h +0 -7988
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.h +0 -8
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.c +0 -13326
- package/src/llama.cpp/src/llama-kv-cache.cpp +0 -2827
- package/src/llama.cpp/src/llama-kv-cache.h +0 -515
- /package/src/llama.cpp/ggml/src/ggml-cpu/{cpu-feats-x86.cpp → arch/x86/cpu-feats.cpp} +0 -0
- /package/src/llama.cpp/ggml/src/ggml-cpu/{ggml-cpu-hbm.h → hbm.h} +0 -0
- /package/src/llama.cpp/ggml/src/ggml-cpu/{ggml-cpu-traits.h → traits.h} +0 -0
package/lib/binding.ts
CHANGED
|
@@ -22,7 +22,6 @@ export type ChatMessage = {
|
|
|
22
22
|
export type LlamaModelOptions = {
|
|
23
23
|
model: string
|
|
24
24
|
chat_template?: string
|
|
25
|
-
reasoning_format?: string
|
|
26
25
|
embedding?: boolean
|
|
27
26
|
embd_normalize?: number
|
|
28
27
|
pooling_type?: 'none' | 'mean' | 'cls' | 'last' | 'rank'
|
|
@@ -74,6 +73,7 @@ export type CompletionResponseFormat = {
|
|
|
74
73
|
export type LlamaCompletionOptions = {
|
|
75
74
|
messages?: ChatMessage[]
|
|
76
75
|
jinja?: boolean
|
|
76
|
+
reasoning_format?: string
|
|
77
77
|
chat_template?: string
|
|
78
78
|
response_format?: CompletionResponseFormat
|
|
79
79
|
tools?: object
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@fugood/llama.node",
|
|
3
3
|
"access": "public",
|
|
4
|
-
"version": "1.0.0-beta.
|
|
4
|
+
"version": "1.0.0-beta.6",
|
|
5
5
|
"description": "An another Node binding of llama.cpp",
|
|
6
6
|
"main": "lib/index.js",
|
|
7
7
|
"scripts": {
|
|
@@ -70,19 +70,19 @@
|
|
|
70
70
|
"CMakeLists.txt"
|
|
71
71
|
],
|
|
72
72
|
"optionalDependencies": {
|
|
73
|
-
"@fugood/node-llama-linux-x64": "1.0.0-beta.
|
|
74
|
-
"@fugood/node-llama-linux-x64-vulkan": "1.0.0-beta.
|
|
75
|
-
"@fugood/node-llama-linux-x64-cuda": "1.0.0-beta.
|
|
76
|
-
"@fugood/node-llama-linux-arm64": "1.0.0-beta.
|
|
77
|
-
"@fugood/node-llama-linux-arm64-vulkan": "1.0.0-beta.
|
|
78
|
-
"@fugood/node-llama-linux-arm64-cuda": "1.0.0-beta.
|
|
79
|
-
"@fugood/node-llama-win32-x64": "1.0.0-beta.
|
|
80
|
-
"@fugood/node-llama-win32-x64-vulkan": "1.0.0-beta.
|
|
81
|
-
"@fugood/node-llama-win32-x64-cuda": "1.0.0-beta.
|
|
82
|
-
"@fugood/node-llama-win32-arm64": "1.0.0-beta.
|
|
83
|
-
"@fugood/node-llama-win32-arm64-vulkan": "1.0.0-beta.
|
|
84
|
-
"@fugood/node-llama-darwin-x64": "1.0.0-beta.
|
|
85
|
-
"@fugood/node-llama-darwin-arm64": "1.0.0-beta.
|
|
73
|
+
"@fugood/node-llama-linux-x64": "1.0.0-beta.6",
|
|
74
|
+
"@fugood/node-llama-linux-x64-vulkan": "1.0.0-beta.6",
|
|
75
|
+
"@fugood/node-llama-linux-x64-cuda": "1.0.0-beta.6",
|
|
76
|
+
"@fugood/node-llama-linux-arm64": "1.0.0-beta.6",
|
|
77
|
+
"@fugood/node-llama-linux-arm64-vulkan": "1.0.0-beta.6",
|
|
78
|
+
"@fugood/node-llama-linux-arm64-cuda": "1.0.0-beta.6",
|
|
79
|
+
"@fugood/node-llama-win32-x64": "1.0.0-beta.6",
|
|
80
|
+
"@fugood/node-llama-win32-x64-vulkan": "1.0.0-beta.6",
|
|
81
|
+
"@fugood/node-llama-win32-x64-cuda": "1.0.0-beta.6",
|
|
82
|
+
"@fugood/node-llama-win32-arm64": "1.0.0-beta.6",
|
|
83
|
+
"@fugood/node-llama-win32-arm64-vulkan": "1.0.0-beta.6",
|
|
84
|
+
"@fugood/node-llama-darwin-x64": "1.0.0-beta.6",
|
|
85
|
+
"@fugood/node-llama-darwin-arm64": "1.0.0-beta.6"
|
|
86
86
|
},
|
|
87
87
|
"devDependencies": {
|
|
88
88
|
"@babel/preset-env": "^7.24.4",
|
package/scripts/llama.cpp.patch
CHANGED
|
@@ -1,18 +1,19 @@
|
|
|
1
1
|
diff --git a/src/llama.cpp/common/chat.cpp b/src/llama.cpp/common/chat.cpp
|
|
2
|
-
index
|
|
2
|
+
index 7d9aaeb1..a7b68d4a 100644
|
|
3
3
|
--- a/src/llama.cpp/common/chat.cpp
|
|
4
4
|
+++ b/src/llama.cpp/common/chat.cpp
|
|
5
|
-
@@ -
|
|
6
|
-
#include "chat.h"
|
|
7
|
-
#include "json-schema-to-grammar.h"
|
|
5
|
+
@@ -6,9 +6,6 @@
|
|
8
6
|
#include "log.h"
|
|
9
|
-
|
|
10
|
-
-#include "minja/minja.hpp"
|
|
7
|
+
#include "regex-partial.h"
|
|
11
8
|
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
9
|
+
-#include <minja/chat-template.hpp>
|
|
10
|
+
-#include <minja/minja.hpp>
|
|
11
|
+
-
|
|
12
|
+
#include <cstdio>
|
|
13
|
+
#include <exception>
|
|
14
|
+
#include <iostream>
|
|
15
|
+
@@ -121,14 +118,6 @@ std::vector<common_chat_msg_diff> common_chat_msg_diff::compute_diffs(const comm
|
|
16
|
+
return diffs;
|
|
16
17
|
}
|
|
17
18
|
|
|
18
19
|
-typedef minja::chat_template common_chat_template;
|
|
@@ -27,17 +28,17 @@ index f138c7bc..e177fe92 100644
|
|
|
27
28
|
json messages;
|
|
28
29
|
json tools;
|
|
29
30
|
diff --git a/src/llama.cpp/common/chat.h b/src/llama.cpp/common/chat.h
|
|
30
|
-
index
|
|
31
|
+
index 9f59e6b0..9b7fe724 100644
|
|
31
32
|
--- a/src/llama.cpp/common/chat.h
|
|
32
33
|
+++ b/src/llama.cpp/common/chat.h
|
|
33
|
-
@@ -
|
|
34
|
-
#include <chrono>
|
|
34
|
+
@@ -8,7 +8,16 @@
|
|
35
35
|
#include <string>
|
|
36
36
|
#include <vector>
|
|
37
|
-
+#include "minja/chat-template.hpp"
|
|
38
|
-
+#include "minja/minja.hpp"
|
|
39
37
|
|
|
40
38
|
-struct common_chat_templates;
|
|
39
|
+
+#include <minja/chat-template.hpp>
|
|
40
|
+
+#include <minja/minja.hpp>
|
|
41
|
+
+
|
|
41
42
|
+typedef minja::chat_template common_chat_template;
|
|
42
43
|
+
|
|
43
44
|
+struct common_chat_templates {
|
|
@@ -49,10 +50,10 @@ index d26a09c2..cb92721a 100644
|
|
|
49
50
|
struct common_chat_tool_call {
|
|
50
51
|
std::string name;
|
|
51
52
|
diff --git a/src/llama.cpp/common/common.cpp b/src/llama.cpp/common/common.cpp
|
|
52
|
-
index
|
|
53
|
+
index e4e71ad1..091ddda4 100644
|
|
53
54
|
--- a/src/llama.cpp/common/common.cpp
|
|
54
55
|
+++ b/src/llama.cpp/common/common.cpp
|
|
55
|
-
@@ -
|
|
56
|
+
@@ -1101,6 +1101,7 @@ struct llama_model_params common_model_params_to_llama(common_params & params) {
|
|
56
57
|
mparams.n_gpu_layers = params.n_gpu_layers;
|
|
57
58
|
}
|
|
58
59
|
|
|
@@ -61,10 +62,10 @@ index 94f545f8..a55df8aa 100644
|
|
|
61
62
|
mparams.split_mode = params.split_mode;
|
|
62
63
|
mparams.tensor_split = params.tensor_split;
|
|
63
64
|
diff --git a/src/llama.cpp/common/common.h b/src/llama.cpp/common/common.h
|
|
64
|
-
index
|
|
65
|
+
index e08a59ea..d120b67d 100644
|
|
65
66
|
--- a/src/llama.cpp/common/common.h
|
|
66
67
|
+++ b/src/llama.cpp/common/common.h
|
|
67
|
-
@@ -
|
|
68
|
+
@@ -223,6 +223,7 @@ enum common_reasoning_format {
|
|
68
69
|
};
|
|
69
70
|
|
|
70
71
|
struct common_params {
|
|
@@ -73,11 +74,11 @@ index 0a9dc059..996afcd8 100644
|
|
|
73
74
|
int32_t n_ctx = 4096; // context size
|
|
74
75
|
int32_t n_batch = 2048; // logical batch size for prompt processing (must be >=32 to use BLAS)
|
|
75
76
|
diff --git a/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt b/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt
|
|
76
|
-
index
|
|
77
|
+
index 71b1d67b..093cd6f9 100644
|
|
77
78
|
--- a/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt
|
|
78
79
|
+++ b/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt
|
|
79
|
-
@@ -
|
|
80
|
-
|
|
80
|
+
@@ -104,7 +104,7 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
|
|
81
|
+
)
|
|
81
82
|
|
|
82
83
|
if (MSVC AND NOT CMAKE_C_COMPILER_ID STREQUAL "Clang")
|
|
83
84
|
- message(FATAL_ERROR "MSVC is not supported for ARM, use clang")
|
|
@@ -86,10 +87,10 @@ index 9a3085be..8218cc16 100644
|
|
|
86
87
|
check_cxx_compiler_flag(-mfp16-format=ieee GGML_COMPILER_SUPPORTS_FP16_FORMAT_I3E)
|
|
87
88
|
if (NOT "${GGML_COMPILER_SUPPORTS_FP16_FORMAT_I3E}" STREQUAL "")
|
|
88
89
|
diff --git a/src/llama.cpp/ggml/src/ggml-vulkan/CMakeLists.txt b/src/llama.cpp/ggml/src/ggml-vulkan/CMakeLists.txt
|
|
89
|
-
index
|
|
90
|
+
index 39f022f3..7ae9047e 100644
|
|
90
91
|
--- a/src/llama.cpp/ggml/src/ggml-vulkan/CMakeLists.txt
|
|
91
92
|
+++ b/src/llama.cpp/ggml/src/ggml-vulkan/CMakeLists.txt
|
|
92
|
-
@@ -
|
|
93
|
+
@@ -110,7 +110,7 @@ if (Vulkan_FOUND)
|
|
93
94
|
endif()
|
|
94
95
|
|
|
95
96
|
# Set up toolchain for host compilation whether cross-compiling or not
|
|
@@ -98,10 +99,10 @@ index 662f1377..f9f99698 100644
|
|
|
98
99
|
if (GGML_VULKAN_SHADERS_GEN_TOOLCHAIN)
|
|
99
100
|
set(HOST_CMAKE_TOOLCHAIN_FILE ${GGML_VULKAN_SHADERS_GEN_TOOLCHAIN})
|
|
100
101
|
else()
|
|
101
|
-
@@ -
|
|
102
|
+
@@ -130,7 +130,7 @@ if (Vulkan_FOUND)
|
|
103
|
+
|
|
102
104
|
include(ExternalProject)
|
|
103
105
|
|
|
104
|
-
# Add toolchain file if cross-compiling
|
|
105
106
|
- if (CMAKE_CROSSCOMPILING)
|
|
106
107
|
+ if (CMAKE_CROSSCOMPILING OR NOT CMAKE_HOST_SYSTEM_PROCESSOR STREQUAL CMAKE_SYSTEM_PROCESSOR)
|
|
107
108
|
list(APPEND VULKAN_SHADER_GEN_CMAKE_ARGS -DCMAKE_TOOLCHAIN_FILE=${HOST_CMAKE_TOOLCHAIN_FILE})
|
|
@@ -25,12 +25,16 @@ size_t findStoppingStrings(const std::string &text,
|
|
|
25
25
|
|
|
26
26
|
LlamaCompletionWorker::LlamaCompletionWorker(
|
|
27
27
|
const Napi::CallbackInfo &info, LlamaSessionPtr &sess,
|
|
28
|
-
Napi::Function callback,
|
|
29
|
-
|
|
28
|
+
Napi::Function callback,
|
|
29
|
+
common_params params,
|
|
30
|
+
std::vector<std::string> stop_words,
|
|
31
|
+
int32_t chat_format,
|
|
32
|
+
std::string reasoning_format,
|
|
30
33
|
const std::vector<std::string> &media_paths,
|
|
31
34
|
const std::vector<llama_token> &guide_tokens)
|
|
32
35
|
: AsyncWorker(info.Env()), Deferred(info.Env()), _sess(sess),
|
|
33
36
|
_params(params), _stop_words(stop_words), _chat_format(chat_format),
|
|
37
|
+
_reasoning_format(reasoning_format),
|
|
34
38
|
_media_paths(media_paths), _guide_tokens(guide_tokens) {
|
|
35
39
|
if (!callback.IsEmpty()) {
|
|
36
40
|
_tsfn = Napi::ThreadSafeFunction::New(info.Env(), callback,
|
|
@@ -65,7 +69,7 @@ void LlamaCompletionWorker::Execute() {
|
|
|
65
69
|
|
|
66
70
|
// Process media if any are provided
|
|
67
71
|
if (!_media_paths.empty()) {
|
|
68
|
-
|
|
72
|
+
auto *mtmd_ctx = _sess->get_mtmd_ctx();
|
|
69
73
|
|
|
70
74
|
if (mtmd_ctx != nullptr) {
|
|
71
75
|
// Process the media and get the tokens
|
|
@@ -234,8 +238,21 @@ void LlamaCompletionWorker::OnOK() {
|
|
|
234
238
|
std::string content;
|
|
235
239
|
if (!_stop) {
|
|
236
240
|
try {
|
|
241
|
+
common_chat_syntax chat_syntax;
|
|
242
|
+
chat_syntax.format = static_cast<common_chat_format>(_chat_format);
|
|
243
|
+
|
|
244
|
+
if (_reasoning_format == "deepseek") {
|
|
245
|
+
chat_syntax.reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK;
|
|
246
|
+
} else if (_reasoning_format == "deepseek-legacy") {
|
|
247
|
+
chat_syntax.reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK_LEGACY;
|
|
248
|
+
} else {
|
|
249
|
+
chat_syntax.reasoning_format = COMMON_REASONING_FORMAT_NONE;
|
|
250
|
+
}
|
|
237
251
|
common_chat_msg message = common_chat_parse(
|
|
238
|
-
_result.text,
|
|
252
|
+
_result.text,
|
|
253
|
+
false,
|
|
254
|
+
chat_syntax
|
|
255
|
+
);
|
|
239
256
|
if (!message.reasoning_content.empty()) {
|
|
240
257
|
reasoning_content = message.reasoning_content;
|
|
241
258
|
}
|
|
@@ -20,6 +20,7 @@ public:
|
|
|
20
20
|
Napi::Function callback, common_params params,
|
|
21
21
|
std::vector<std::string> stop_words,
|
|
22
22
|
int32_t chat_format,
|
|
23
|
+
std::string reasoning_format,
|
|
23
24
|
const std::vector<std::string> &media_paths = {},
|
|
24
25
|
const std::vector<llama_token> &guide_tokens = {});
|
|
25
26
|
|
|
@@ -41,6 +42,7 @@ private:
|
|
|
41
42
|
common_params _params;
|
|
42
43
|
std::vector<std::string> _stop_words;
|
|
43
44
|
int32_t _chat_format;
|
|
45
|
+
std::string _reasoning_format;
|
|
44
46
|
std::vector<std::string> _media_paths;
|
|
45
47
|
std::vector<llama_token> _guide_tokens;
|
|
46
48
|
std::function<void()> _onComplete;
|
package/src/LlamaContext.cpp
CHANGED
|
@@ -10,7 +10,7 @@
|
|
|
10
10
|
#include "ggml.h"
|
|
11
11
|
#include "gguf.h"
|
|
12
12
|
#include "json-schema-to-grammar.h"
|
|
13
|
-
#include
|
|
13
|
+
#include <nlohmann/json.hpp>
|
|
14
14
|
#include "llama-impl.h"
|
|
15
15
|
|
|
16
16
|
#include <atomic>
|
|
@@ -223,14 +223,6 @@ LlamaContext::LlamaContext(const Napi::CallbackInfo &info)
|
|
|
223
223
|
|
|
224
224
|
params.chat_template = get_option<std::string>(options, "chat_template", "");
|
|
225
225
|
|
|
226
|
-
std::string reasoning_format =
|
|
227
|
-
get_option<std::string>(options, "reasoning_format", "none");
|
|
228
|
-
if (reasoning_format == "deepseek") {
|
|
229
|
-
params.reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK;
|
|
230
|
-
} else {
|
|
231
|
-
params.reasoning_format = COMMON_REASONING_FORMAT_NONE;
|
|
232
|
-
}
|
|
233
|
-
|
|
234
226
|
params.n_ctx = get_option<int32_t>(options, "n_ctx", 512);
|
|
235
227
|
params.n_batch = get_option<int32_t>(options, "n_batch", 2048);
|
|
236
228
|
params.n_ubatch = get_option<int32_t>(options, "n_ubatch", 512);
|
|
@@ -521,8 +513,6 @@ common_chat_params getFormattedChatWithJinja(
|
|
|
521
513
|
if (!json_schema.empty()) {
|
|
522
514
|
inputs.json_schema = json::parse(json_schema);
|
|
523
515
|
}
|
|
524
|
-
inputs.extract_reasoning =
|
|
525
|
-
sess->params().reasoning_format != COMMON_REASONING_FORMAT_NONE;
|
|
526
516
|
|
|
527
517
|
// If chat_template is provided, create new one and use it (probably slow)
|
|
528
518
|
if (!chat_template.empty()) {
|
|
@@ -695,6 +685,7 @@ Napi::Value LlamaContext::Completion(const Napi::CallbackInfo &info) {
|
|
|
695
685
|
}
|
|
696
686
|
|
|
697
687
|
int32_t chat_format = get_option<int32_t>(options, "chat_format", 0);
|
|
688
|
+
std::string reasoning_format = get_option<std::string>(options, "reasoning_format", "none");
|
|
698
689
|
|
|
699
690
|
common_params params = _sess->params();
|
|
700
691
|
auto grammar_from_params = get_option<std::string>(options, "grammar", "");
|
|
@@ -904,7 +895,7 @@ Napi::Value LlamaContext::Completion(const Napi::CallbackInfo &info) {
|
|
|
904
895
|
|
|
905
896
|
auto *worker =
|
|
906
897
|
new LlamaCompletionWorker(info, _sess, callback, params, stop_words,
|
|
907
|
-
chat_format, media_paths, guide_tokens);
|
|
898
|
+
chat_format, reasoning_format, media_paths, guide_tokens);
|
|
908
899
|
worker->Queue();
|
|
909
900
|
_wip = worker;
|
|
910
901
|
worker->OnComplete([this]() { _wip = nullptr; });
|
package/src/common.hpp
CHANGED
|
@@ -6,6 +6,7 @@
|
|
|
6
6
|
#include "llama.h"
|
|
7
7
|
#include "tools/mtmd/clip.h"
|
|
8
8
|
#include "tools/mtmd/mtmd.h"
|
|
9
|
+
#include "tools/mtmd/mtmd-helper.h"
|
|
9
10
|
#include <memory>
|
|
10
11
|
#include <mutex>
|
|
11
12
|
#include <napi.h>
|
|
@@ -97,7 +98,7 @@ public:
|
|
|
97
98
|
inline std::mutex &get_mutex() { return mutex; }
|
|
98
99
|
|
|
99
100
|
// Getter for the multimodal context
|
|
100
|
-
inline
|
|
101
|
+
inline mtmd_context *get_mtmd_ctx() { return _mtmd_ctx; }
|
|
101
102
|
|
|
102
103
|
// Setter for the multimodal context
|
|
103
104
|
inline void set_mtmd_ctx(mtmd_context *ctx) { _mtmd_ctx = ctx; }
|
|
@@ -219,7 +220,7 @@ struct TokenizeResult {
|
|
|
219
220
|
};
|
|
220
221
|
|
|
221
222
|
static TokenizeResult
|
|
222
|
-
tokenizeWithMedia(
|
|
223
|
+
tokenizeWithMedia(mtmd_context *mtmd_ctx, const std::string &prompt,
|
|
223
224
|
const std::vector<std::string> &media_paths) {
|
|
224
225
|
if (mtmd_ctx == nullptr) {
|
|
225
226
|
throw std::runtime_error("Multimodal context is not initialized");
|
|
@@ -263,7 +264,7 @@ tokenizeWithMedia(const mtmd_context *mtmd_ctx, const std::string &prompt,
|
|
|
263
264
|
std::vector<uint8_t> media_data = base64_decode(base64_data);
|
|
264
265
|
|
|
265
266
|
// Load bitmap from memory buffer using direct initialization
|
|
266
|
-
mtmd::bitmap bmp(mtmd_helper_bitmap_init_from_buf(media_data.data(),
|
|
267
|
+
mtmd::bitmap bmp(mtmd_helper_bitmap_init_from_buf(mtmd_ctx, media_data.data(),
|
|
267
268
|
media_data.size()));
|
|
268
269
|
if (!bmp.ptr) {
|
|
269
270
|
bitmaps.entries.clear();
|
|
@@ -300,7 +301,7 @@ tokenizeWithMedia(const mtmd_context *mtmd_ctx, const std::string &prompt,
|
|
|
300
301
|
fclose(file);
|
|
301
302
|
|
|
302
303
|
// Create bitmap directly
|
|
303
|
-
mtmd::bitmap bmp(mtmd_helper_bitmap_init_from_file(media_path.c_str()));
|
|
304
|
+
mtmd::bitmap bmp(mtmd_helper_bitmap_init_from_file(mtmd_ctx, media_path.c_str()));
|
|
304
305
|
if (!bmp.ptr) {
|
|
305
306
|
bitmaps.entries.clear();
|
|
306
307
|
throw std::runtime_error("Failed to load media");
|
|
@@ -388,7 +389,7 @@ tokenizeWithMedia(const mtmd_context *mtmd_ctx, const std::string &prompt,
|
|
|
388
389
|
|
|
389
390
|
// Process media and add them to the tokenized input
|
|
390
391
|
static llama_pos
|
|
391
|
-
processMediaPrompt(llama_context *ctx,
|
|
392
|
+
processMediaPrompt(llama_context *ctx, mtmd_context *mtmd_ctx,
|
|
392
393
|
LlamaSessionPtr sess, const common_params ¶ms,
|
|
393
394
|
const std::vector<std::string> &media_paths) {
|
|
394
395
|
if (mtmd_ctx == nullptr) {
|
|
@@ -89,6 +89,14 @@ option(LLAMA_LLGUIDANCE "llama-common: include LLGuidance library for structured
|
|
|
89
89
|
include(${CMAKE_CURRENT_SOURCE_DIR}/cmake/build-info.cmake)
|
|
90
90
|
include(${CMAKE_CURRENT_SOURCE_DIR}/cmake/common.cmake)
|
|
91
91
|
|
|
92
|
+
if (NOT DEFINED LLAMA_BUILD_NUMBER)
|
|
93
|
+
set(LLAMA_BUILD_NUMBER ${BUILD_NUMBER})
|
|
94
|
+
endif()
|
|
95
|
+
if (NOT DEFINED LLAMA_BUILD_COMMIT)
|
|
96
|
+
set(LLAMA_BUILD_COMMIT ${BUILD_COMMIT})
|
|
97
|
+
endif()
|
|
98
|
+
set(LLAMA_INSTALL_VERSION 0.0.${BUILD_NUMBER})
|
|
99
|
+
|
|
92
100
|
# override ggml options
|
|
93
101
|
set(GGML_ALL_WARNINGS ${LLAMA_ALL_WARNINGS})
|
|
94
102
|
set(GGML_FATAL_WARNINGS ${LLAMA_FATAL_WARNINGS})
|
|
@@ -155,10 +163,17 @@ if (LLAMA_USE_SYSTEM_GGML)
|
|
|
155
163
|
endif()
|
|
156
164
|
|
|
157
165
|
if (NOT TARGET ggml AND NOT LLAMA_USE_SYSTEM_GGML)
|
|
166
|
+
set(GGML_BUILD_NUMBER ${LLAMA_BUILD_NUMBER})
|
|
167
|
+
set(GGML_BUILD_COMMIT ${LLAMA_BUILD_COMMIT})
|
|
158
168
|
add_subdirectory(ggml)
|
|
159
169
|
# ... otherwise assume ggml is added by a parent CMakeLists.txt
|
|
160
170
|
endif()
|
|
161
171
|
|
|
172
|
+
if (MINGW)
|
|
173
|
+
# Target Windows 8 for PrefetchVirtualMemory
|
|
174
|
+
add_compile_definitions(_WIN32_WINNT=${GGML_WIN_VER})
|
|
175
|
+
endif()
|
|
176
|
+
|
|
162
177
|
#
|
|
163
178
|
# build the library
|
|
164
179
|
#
|
|
@@ -199,10 +214,6 @@ endif()
|
|
|
199
214
|
include(GNUInstallDirs)
|
|
200
215
|
include(CMakePackageConfigHelpers)
|
|
201
216
|
|
|
202
|
-
set(LLAMA_BUILD_NUMBER ${BUILD_NUMBER})
|
|
203
|
-
set(LLAMA_BUILD_COMMIT ${BUILD_COMMIT})
|
|
204
|
-
set(LLAMA_INSTALL_VERSION 0.0.${BUILD_NUMBER})
|
|
205
|
-
|
|
206
217
|
set(LLAMA_INCLUDE_INSTALL_DIR ${CMAKE_INSTALL_INCLUDEDIR} CACHE PATH "Location of header files")
|
|
207
218
|
set(LLAMA_LIB_INSTALL_DIR ${CMAKE_INSTALL_LIBDIR} CACHE PATH "Location of library files")
|
|
208
219
|
set(LLAMA_BIN_INSTALL_DIR ${CMAKE_INSTALL_BINDIR} CACHE PATH "Location of binary files")
|
|
@@ -7,8 +7,8 @@ llama_add_compile_flags()
|
|
|
7
7
|
# Build info header
|
|
8
8
|
#
|
|
9
9
|
|
|
10
|
-
if(EXISTS "${
|
|
11
|
-
set(GIT_DIR "${
|
|
10
|
+
if(EXISTS "${PROJECT_SOURCE_DIR}/.git")
|
|
11
|
+
set(GIT_DIR "${PROJECT_SOURCE_DIR}/.git")
|
|
12
12
|
|
|
13
13
|
# Is git submodule
|
|
14
14
|
if(NOT IS_DIRECTORY "${GIT_DIR}")
|
|
@@ -18,36 +18,26 @@ if(EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/../.git")
|
|
|
18
18
|
if (SLASH_POS EQUAL 0)
|
|
19
19
|
set(GIT_DIR "${REAL_GIT_DIR}")
|
|
20
20
|
else()
|
|
21
|
-
set(GIT_DIR "${
|
|
21
|
+
set(GIT_DIR "${PROJECT_SOURCE_DIR}/${REAL_GIT_DIR}")
|
|
22
22
|
endif()
|
|
23
23
|
endif()
|
|
24
24
|
|
|
25
25
|
if(EXISTS "${GIT_DIR}/index")
|
|
26
|
-
|
|
26
|
+
# For build-info.cpp below
|
|
27
|
+
set_property(DIRECTORY APPEND PROPERTY CMAKE_CONFIGURE_DEPENDS "${GIT_DIR}/index")
|
|
27
28
|
else()
|
|
28
29
|
message(WARNING "Git index not found in git repository.")
|
|
29
|
-
set(GIT_INDEX "")
|
|
30
30
|
endif()
|
|
31
31
|
else()
|
|
32
32
|
message(WARNING "Git repository not found; to enable automatic generation of build info, make sure Git is installed and the project is a Git repository.")
|
|
33
|
-
set(GIT_INDEX "")
|
|
34
33
|
endif()
|
|
35
34
|
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
COMMAND ${CMAKE_COMMAND} -DMSVC=${MSVC} -DCMAKE_C_COMPILER_VERSION=${CMAKE_C_COMPILER_VERSION}
|
|
41
|
-
-DCMAKE_C_COMPILER_ID=${CMAKE_C_COMPILER_ID} -DCMAKE_VS_PLATFORM_NAME=${CMAKE_VS_PLATFORM_NAME}
|
|
42
|
-
-DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}
|
|
43
|
-
-DCMAKE_SYSTEM_NAME=${CMAKE_SYSTEM_NAME} -DCMAKE_SYSTEM_PROCESSOR=${CMAKE_SYSTEM_PROCESSOR}
|
|
44
|
-
-P "${CMAKE_CURRENT_SOURCE_DIR}/cmake/build-info-gen-cpp.cmake"
|
|
45
|
-
WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/.."
|
|
46
|
-
DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/build-info.cpp.in" ${GIT_INDEX}
|
|
47
|
-
VERBATIM
|
|
48
|
-
)
|
|
35
|
+
set(TEMPLATE_FILE "${CMAKE_CURRENT_SOURCE_DIR}/build-info.cpp.in")
|
|
36
|
+
set(OUTPUT_FILE "${CMAKE_CURRENT_BINARY_DIR}/build-info.cpp")
|
|
37
|
+
configure_file(${TEMPLATE_FILE} ${OUTPUT_FILE})
|
|
38
|
+
|
|
49
39
|
set(TARGET build_info)
|
|
50
|
-
add_library(${TARGET} OBJECT
|
|
40
|
+
add_library(${TARGET} OBJECT ${OUTPUT_FILE})
|
|
51
41
|
if (BUILD_SHARED_LIBS)
|
|
52
42
|
set_target_properties(${TARGET} PROPERTIES POSITION_INDEPENDENT_CODE ON)
|
|
53
43
|
endif()
|
|
@@ -58,19 +48,20 @@ add_library(${TARGET} STATIC
|
|
|
58
48
|
arg.cpp
|
|
59
49
|
arg.h
|
|
60
50
|
base64.hpp
|
|
51
|
+
chat-parser.cpp
|
|
52
|
+
chat-parser.h
|
|
61
53
|
chat.cpp
|
|
62
54
|
chat.h
|
|
63
55
|
common.cpp
|
|
64
56
|
common.h
|
|
65
57
|
console.cpp
|
|
66
58
|
console.h
|
|
59
|
+
json-partial.cpp
|
|
60
|
+
json-partial.h
|
|
67
61
|
json-schema-to-grammar.cpp
|
|
68
|
-
json.hpp
|
|
69
62
|
llguidance.cpp
|
|
70
63
|
log.cpp
|
|
71
64
|
log.h
|
|
72
|
-
minja/chat-template.hpp
|
|
73
|
-
minja/minja.hpp
|
|
74
65
|
ngram-cache.cpp
|
|
75
66
|
ngram-cache.h
|
|
76
67
|
regex-partial.cpp
|
|
@@ -143,7 +134,7 @@ if (LLAMA_LLGUIDANCE)
|
|
|
143
134
|
set(LLAMA_COMMON_EXTRA_LIBS ${LLAMA_COMMON_EXTRA_LIBS} llguidance ${LLGUIDANCE_PLATFORM_LIBS})
|
|
144
135
|
endif ()
|
|
145
136
|
|
|
146
|
-
target_include_directories(${TARGET} PUBLIC .)
|
|
137
|
+
target_include_directories(${TARGET} PUBLIC . ../vendor)
|
|
147
138
|
target_compile_features (${TARGET} PUBLIC cxx_std_17)
|
|
148
139
|
target_link_libraries (${TARGET} PRIVATE ${LLAMA_COMMON_EXTRA_LIBS} PUBLIC llama Threads::Threads)
|
|
149
140
|
|