@fugood/llama.node 1.0.0-beta.5 → 1.0.0-beta.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/binding.ts +3 -1
- package/lib/index.js +2 -0
- package/lib/index.ts +3 -1
- package/package.json +14 -14
- package/scripts/llama.cpp.patch +27 -26
- package/src/EmbeddingWorker.cpp +1 -1
- package/src/LlamaCompletionWorker.cpp +28 -7
- package/src/LlamaCompletionWorker.h +4 -0
- package/src/LlamaContext.cpp +14 -17
- package/src/common.hpp +7 -6
- package/src/llama.cpp/CMakeLists.txt +15 -4
- package/src/llama.cpp/common/CMakeLists.txt +15 -24
- package/src/llama.cpp/common/arg.cpp +172 -110
- package/src/llama.cpp/common/chat-parser.cpp +385 -0
- package/src/llama.cpp/common/chat-parser.h +120 -0
- package/src/llama.cpp/common/chat.cpp +726 -596
- package/src/llama.cpp/common/chat.h +74 -8
- package/src/llama.cpp/common/common.cpp +56 -38
- package/src/llama.cpp/common/common.h +9 -3
- package/src/llama.cpp/common/json-partial.cpp +256 -0
- package/src/llama.cpp/common/json-partial.h +38 -0
- package/src/llama.cpp/common/json-schema-to-grammar.cpp +2 -1
- package/src/llama.cpp/common/json-schema-to-grammar.h +4 -4
- package/src/llama.cpp/common/sampling.cpp +7 -8
- package/src/llama.cpp/common/speculative.cpp +6 -4
- package/src/llama.cpp/ggml/CMakeLists.txt +48 -3
- package/src/llama.cpp/ggml/include/ggml.h +22 -3
- package/src/llama.cpp/ggml/src/CMakeLists.txt +81 -22
- package/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +131 -49
- package/src/llama.cpp/ggml/src/ggml-cpu/amx/amx.cpp +1 -1
- package/src/llama.cpp/ggml/src/ggml-cpu/amx/mmq.cpp +1 -1
- package/src/llama.cpp/ggml/src/ggml-cpu/arch/arm/cpu-feats.cpp +94 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/arch/arm/quants.c +4113 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/arch/arm/repack.cpp +2162 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/arch/loongarch/quants.c +2638 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/arch/powerpc/cpu-feats.cpp +82 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/arch/powerpc/quants.c +2731 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/arch/riscv/quants.c +2068 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/arch/riscv/repack.cpp +396 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/arch/s390/quants.c +1299 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/arch/wasm/quants.c +1480 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/arch/x86/quants.c +4310 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/{ggml-cpu-aarch64.cpp → arch/x86/repack.cpp} +59 -3206
- package/src/llama.cpp/ggml/src/ggml-cpu/arch-fallback.h +184 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/common.h +1 -1
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-impl.h +12 -13
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +64 -88
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +8 -8
- package/src/llama.cpp/ggml/src/ggml-cpu/{ggml-cpu-hbm.cpp → hbm.cpp} +1 -1
- package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +1 -1
- package/src/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm.cpp +56 -7
- package/src/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm.h +5 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ops.cpp +282 -100
- package/src/llama.cpp/ggml/src/ggml-cpu/ops.h +1 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/quants.c +1157 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/{ggml-cpu-quants.h → quants.h} +26 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/repack.cpp +1570 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/repack.h +98 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/simd-mappings.h +119 -5
- package/src/llama.cpp/ggml/src/ggml-cpu/{ggml-cpu-traits.cpp → traits.cpp} +1 -1
- package/src/llama.cpp/ggml/src/ggml-cpu/vec.cpp +85 -16
- package/src/llama.cpp/ggml/src/ggml-cpu/vec.h +204 -49
- package/src/llama.cpp/include/llama.h +145 -40
- package/src/llama.cpp/src/CMakeLists.txt +5 -1
- package/src/llama.cpp/src/llama-arch.cpp +99 -3
- package/src/llama.cpp/src/llama-arch.h +10 -1
- package/src/llama.cpp/src/llama-batch.cpp +728 -272
- package/src/llama.cpp/src/llama-batch.h +112 -54
- package/src/llama.cpp/src/llama-chat.cpp +19 -2
- package/src/llama.cpp/src/llama-chat.h +1 -0
- package/src/llama.cpp/src/llama-context.cpp +525 -339
- package/src/llama.cpp/src/llama-context.h +38 -17
- package/src/llama.cpp/src/llama-cparams.cpp +4 -0
- package/src/llama.cpp/src/llama-cparams.h +2 -0
- package/src/llama.cpp/src/llama-grammar.cpp +12 -2
- package/src/llama.cpp/src/llama-graph.cpp +413 -353
- package/src/llama.cpp/src/llama-graph.h +112 -56
- package/src/llama.cpp/src/llama-hparams.cpp +10 -2
- package/src/llama.cpp/src/llama-hparams.h +13 -2
- package/src/llama.cpp/src/llama-kv-cache-unified-iswa.cpp +279 -0
- package/src/llama.cpp/src/llama-kv-cache-unified-iswa.h +128 -0
- package/src/llama.cpp/src/llama-kv-cache-unified.cpp +1815 -0
- package/src/llama.cpp/src/llama-kv-cache-unified.h +303 -0
- package/src/llama.cpp/src/llama-kv-cells.h +415 -0
- package/src/llama.cpp/src/llama-memory-hybrid.cpp +246 -0
- package/src/llama.cpp/src/llama-memory-hybrid.h +138 -0
- package/src/llama.cpp/src/llama-memory-recurrent.cpp +1112 -0
- package/src/llama.cpp/src/llama-memory-recurrent.h +183 -0
- package/src/llama.cpp/src/llama-memory.cpp +41 -0
- package/src/llama.cpp/src/llama-memory.h +86 -5
- package/src/llama.cpp/src/llama-mmap.cpp +1 -1
- package/src/llama.cpp/src/llama-model-loader.cpp +42 -17
- package/src/llama.cpp/src/llama-model-saver.cpp +1 -0
- package/src/llama.cpp/src/llama-model.cpp +1137 -528
- package/src/llama.cpp/src/llama-model.h +4 -0
- package/src/llama.cpp/src/llama-quant.cpp +2 -1
- package/src/llama.cpp/src/llama-sampling.cpp +2 -2
- package/src/llama.cpp/src/llama-vocab.cpp +69 -32
- package/src/llama.cpp/src/llama-vocab.h +1 -0
- package/src/llama.cpp/src/llama.cpp +11 -7
- package/src/llama.cpp/src/unicode.cpp +5 -0
- package/src/tts_utils.h +1 -1
- package/src/llama.cpp/common/json.hpp +0 -24766
- package/src/llama.cpp/common/minja/chat-template.hpp +0 -541
- package/src/llama.cpp/common/minja/minja.hpp +0 -2974
- package/src/llama.cpp/common/stb_image.h +0 -7988
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.h +0 -8
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.c +0 -13326
- package/src/llama.cpp/src/llama-kv-cache.cpp +0 -2827
- package/src/llama.cpp/src/llama-kv-cache.h +0 -515
- /package/src/llama.cpp/ggml/src/ggml-cpu/{cpu-feats-x86.cpp → arch/x86/cpu-feats.cpp} +0 -0
- /package/src/llama.cpp/ggml/src/ggml-cpu/{ggml-cpu-hbm.h → hbm.h} +0 -0
- /package/src/llama.cpp/ggml/src/ggml-cpu/{ggml-cpu-traits.h → traits.h} +0 -0
package/lib/binding.ts
CHANGED
|
@@ -22,7 +22,6 @@ export type ChatMessage = {
|
|
|
22
22
|
export type LlamaModelOptions = {
|
|
23
23
|
model: string
|
|
24
24
|
chat_template?: string
|
|
25
|
-
reasoning_format?: string
|
|
26
25
|
embedding?: boolean
|
|
27
26
|
embd_normalize?: number
|
|
28
27
|
pooling_type?: 'none' | 'mean' | 'cls' | 'last' | 'rank'
|
|
@@ -74,11 +73,14 @@ export type CompletionResponseFormat = {
|
|
|
74
73
|
export type LlamaCompletionOptions = {
|
|
75
74
|
messages?: ChatMessage[]
|
|
76
75
|
jinja?: boolean
|
|
76
|
+
reasoning_format?: string
|
|
77
77
|
chat_template?: string
|
|
78
78
|
response_format?: CompletionResponseFormat
|
|
79
79
|
tools?: object
|
|
80
80
|
parallel_tool_calls?: boolean
|
|
81
81
|
tool_choice?: string
|
|
82
|
+
enable_thinking?: boolean
|
|
83
|
+
thinking_forced_open?: boolean
|
|
82
84
|
prompt?: string
|
|
83
85
|
temperature?: number
|
|
84
86
|
top_k?: number
|
package/lib/index.js
CHANGED
|
@@ -131,6 +131,7 @@ class LlamaContextWrapper {
|
|
|
131
131
|
};
|
|
132
132
|
}
|
|
133
133
|
getFormattedChat(messages, template, params) {
|
|
134
|
+
var _a;
|
|
134
135
|
const { messages: chat, has_media, media_paths, } = this._formatMediaChat(messages);
|
|
135
136
|
const useJinja = this.isJinjaSupported() && (params === null || params === void 0 ? void 0 : params.jinja);
|
|
136
137
|
let tmpl;
|
|
@@ -143,6 +144,7 @@ class LlamaContextWrapper {
|
|
|
143
144
|
tools: params === null || params === void 0 ? void 0 : params.tools,
|
|
144
145
|
parallel_tool_calls: params === null || params === void 0 ? void 0 : params.parallel_tool_calls,
|
|
145
146
|
tool_choice: params === null || params === void 0 ? void 0 : params.tool_choice,
|
|
147
|
+
enable_thinking: (_a = params === null || params === void 0 ? void 0 : params.enable_thinking) !== null && _a !== void 0 ? _a : true,
|
|
146
148
|
});
|
|
147
149
|
if (!useJinja) {
|
|
148
150
|
return {
|
package/lib/index.ts
CHANGED
|
@@ -158,7 +158,8 @@ class LlamaContextWrapper {
|
|
|
158
158
|
response_format?: CompletionResponseFormat
|
|
159
159
|
tools?: object
|
|
160
160
|
parallel_tool_calls?: object
|
|
161
|
-
tool_choice?: string
|
|
161
|
+
tool_choice?: string,
|
|
162
|
+
enable_thinking?: boolean,
|
|
162
163
|
},
|
|
163
164
|
): FormattedChatResult {
|
|
164
165
|
const {
|
|
@@ -178,6 +179,7 @@ class LlamaContextWrapper {
|
|
|
178
179
|
tools: params?.tools,
|
|
179
180
|
parallel_tool_calls: params?.parallel_tool_calls,
|
|
180
181
|
tool_choice: params?.tool_choice,
|
|
182
|
+
enable_thinking: params?.enable_thinking ?? true,
|
|
181
183
|
})
|
|
182
184
|
|
|
183
185
|
if (!useJinja) {
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@fugood/llama.node",
|
|
3
3
|
"access": "public",
|
|
4
|
-
"version": "1.0.0-beta.
|
|
4
|
+
"version": "1.0.0-beta.7",
|
|
5
5
|
"description": "An another Node binding of llama.cpp",
|
|
6
6
|
"main": "lib/index.js",
|
|
7
7
|
"scripts": {
|
|
@@ -70,19 +70,19 @@
|
|
|
70
70
|
"CMakeLists.txt"
|
|
71
71
|
],
|
|
72
72
|
"optionalDependencies": {
|
|
73
|
-
"@fugood/node-llama-linux-x64": "1.0.0-beta.
|
|
74
|
-
"@fugood/node-llama-linux-x64-vulkan": "1.0.0-beta.
|
|
75
|
-
"@fugood/node-llama-linux-x64-cuda": "1.0.0-beta.
|
|
76
|
-
"@fugood/node-llama-linux-arm64": "1.0.0-beta.
|
|
77
|
-
"@fugood/node-llama-linux-arm64-vulkan": "1.0.0-beta.
|
|
78
|
-
"@fugood/node-llama-linux-arm64-cuda": "1.0.0-beta.
|
|
79
|
-
"@fugood/node-llama-win32-x64": "1.0.0-beta.
|
|
80
|
-
"@fugood/node-llama-win32-x64-vulkan": "1.0.0-beta.
|
|
81
|
-
"@fugood/node-llama-win32-x64-cuda": "1.0.0-beta.
|
|
82
|
-
"@fugood/node-llama-win32-arm64": "1.0.0-beta.
|
|
83
|
-
"@fugood/node-llama-win32-arm64-vulkan": "1.0.0-beta.
|
|
84
|
-
"@fugood/node-llama-darwin-x64": "1.0.0-beta.
|
|
85
|
-
"@fugood/node-llama-darwin-arm64": "1.0.0-beta.
|
|
73
|
+
"@fugood/node-llama-linux-x64": "1.0.0-beta.7",
|
|
74
|
+
"@fugood/node-llama-linux-x64-vulkan": "1.0.0-beta.7",
|
|
75
|
+
"@fugood/node-llama-linux-x64-cuda": "1.0.0-beta.7",
|
|
76
|
+
"@fugood/node-llama-linux-arm64": "1.0.0-beta.7",
|
|
77
|
+
"@fugood/node-llama-linux-arm64-vulkan": "1.0.0-beta.7",
|
|
78
|
+
"@fugood/node-llama-linux-arm64-cuda": "1.0.0-beta.7",
|
|
79
|
+
"@fugood/node-llama-win32-x64": "1.0.0-beta.7",
|
|
80
|
+
"@fugood/node-llama-win32-x64-vulkan": "1.0.0-beta.7",
|
|
81
|
+
"@fugood/node-llama-win32-x64-cuda": "1.0.0-beta.7",
|
|
82
|
+
"@fugood/node-llama-win32-arm64": "1.0.0-beta.7",
|
|
83
|
+
"@fugood/node-llama-win32-arm64-vulkan": "1.0.0-beta.7",
|
|
84
|
+
"@fugood/node-llama-darwin-x64": "1.0.0-beta.7",
|
|
85
|
+
"@fugood/node-llama-darwin-arm64": "1.0.0-beta.7"
|
|
86
86
|
},
|
|
87
87
|
"devDependencies": {
|
|
88
88
|
"@babel/preset-env": "^7.24.4",
|
package/scripts/llama.cpp.patch
CHANGED
|
@@ -1,18 +1,19 @@
|
|
|
1
1
|
diff --git a/src/llama.cpp/common/chat.cpp b/src/llama.cpp/common/chat.cpp
|
|
2
|
-
index
|
|
2
|
+
index 7d9aaeb1..a7b68d4a 100644
|
|
3
3
|
--- a/src/llama.cpp/common/chat.cpp
|
|
4
4
|
+++ b/src/llama.cpp/common/chat.cpp
|
|
5
|
-
@@ -
|
|
6
|
-
#include "chat.h"
|
|
7
|
-
#include "json-schema-to-grammar.h"
|
|
5
|
+
@@ -6,9 +6,6 @@
|
|
8
6
|
#include "log.h"
|
|
9
|
-
|
|
10
|
-
-#include "minja/minja.hpp"
|
|
7
|
+
#include "regex-partial.h"
|
|
11
8
|
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
9
|
+
-#include <minja/chat-template.hpp>
|
|
10
|
+
-#include <minja/minja.hpp>
|
|
11
|
+
-
|
|
12
|
+
#include <cstdio>
|
|
13
|
+
#include <exception>
|
|
14
|
+
#include <iostream>
|
|
15
|
+
@@ -121,14 +118,6 @@ std::vector<common_chat_msg_diff> common_chat_msg_diff::compute_diffs(const comm
|
|
16
|
+
return diffs;
|
|
16
17
|
}
|
|
17
18
|
|
|
18
19
|
-typedef minja::chat_template common_chat_template;
|
|
@@ -27,17 +28,17 @@ index f138c7bc..e177fe92 100644
|
|
|
27
28
|
json messages;
|
|
28
29
|
json tools;
|
|
29
30
|
diff --git a/src/llama.cpp/common/chat.h b/src/llama.cpp/common/chat.h
|
|
30
|
-
index
|
|
31
|
+
index 9f59e6b0..9b7fe724 100644
|
|
31
32
|
--- a/src/llama.cpp/common/chat.h
|
|
32
33
|
+++ b/src/llama.cpp/common/chat.h
|
|
33
|
-
@@ -
|
|
34
|
-
#include <chrono>
|
|
34
|
+
@@ -8,7 +8,16 @@
|
|
35
35
|
#include <string>
|
|
36
36
|
#include <vector>
|
|
37
|
-
+#include "minja/chat-template.hpp"
|
|
38
|
-
+#include "minja/minja.hpp"
|
|
39
37
|
|
|
40
38
|
-struct common_chat_templates;
|
|
39
|
+
+#include <minja/chat-template.hpp>
|
|
40
|
+
+#include <minja/minja.hpp>
|
|
41
|
+
+
|
|
41
42
|
+typedef minja::chat_template common_chat_template;
|
|
42
43
|
+
|
|
43
44
|
+struct common_chat_templates {
|
|
@@ -49,10 +50,10 @@ index d26a09c2..cb92721a 100644
|
|
|
49
50
|
struct common_chat_tool_call {
|
|
50
51
|
std::string name;
|
|
51
52
|
diff --git a/src/llama.cpp/common/common.cpp b/src/llama.cpp/common/common.cpp
|
|
52
|
-
index
|
|
53
|
+
index e4e71ad1..091ddda4 100644
|
|
53
54
|
--- a/src/llama.cpp/common/common.cpp
|
|
54
55
|
+++ b/src/llama.cpp/common/common.cpp
|
|
55
|
-
@@ -
|
|
56
|
+
@@ -1101,6 +1101,7 @@ struct llama_model_params common_model_params_to_llama(common_params & params) {
|
|
56
57
|
mparams.n_gpu_layers = params.n_gpu_layers;
|
|
57
58
|
}
|
|
58
59
|
|
|
@@ -61,10 +62,10 @@ index 94f545f8..a55df8aa 100644
|
|
|
61
62
|
mparams.split_mode = params.split_mode;
|
|
62
63
|
mparams.tensor_split = params.tensor_split;
|
|
63
64
|
diff --git a/src/llama.cpp/common/common.h b/src/llama.cpp/common/common.h
|
|
64
|
-
index
|
|
65
|
+
index e08a59ea..d120b67d 100644
|
|
65
66
|
--- a/src/llama.cpp/common/common.h
|
|
66
67
|
+++ b/src/llama.cpp/common/common.h
|
|
67
|
-
@@ -
|
|
68
|
+
@@ -223,6 +223,7 @@ enum common_reasoning_format {
|
|
68
69
|
};
|
|
69
70
|
|
|
70
71
|
struct common_params {
|
|
@@ -73,11 +74,11 @@ index 0a9dc059..996afcd8 100644
|
|
|
73
74
|
int32_t n_ctx = 4096; // context size
|
|
74
75
|
int32_t n_batch = 2048; // logical batch size for prompt processing (must be >=32 to use BLAS)
|
|
75
76
|
diff --git a/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt b/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt
|
|
76
|
-
index
|
|
77
|
+
index 71b1d67b..093cd6f9 100644
|
|
77
78
|
--- a/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt
|
|
78
79
|
+++ b/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt
|
|
79
|
-
@@ -
|
|
80
|
-
|
|
80
|
+
@@ -104,7 +104,7 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
|
|
81
|
+
)
|
|
81
82
|
|
|
82
83
|
if (MSVC AND NOT CMAKE_C_COMPILER_ID STREQUAL "Clang")
|
|
83
84
|
- message(FATAL_ERROR "MSVC is not supported for ARM, use clang")
|
|
@@ -86,10 +87,10 @@ index 9a3085be..8218cc16 100644
|
|
|
86
87
|
check_cxx_compiler_flag(-mfp16-format=ieee GGML_COMPILER_SUPPORTS_FP16_FORMAT_I3E)
|
|
87
88
|
if (NOT "${GGML_COMPILER_SUPPORTS_FP16_FORMAT_I3E}" STREQUAL "")
|
|
88
89
|
diff --git a/src/llama.cpp/ggml/src/ggml-vulkan/CMakeLists.txt b/src/llama.cpp/ggml/src/ggml-vulkan/CMakeLists.txt
|
|
89
|
-
index
|
|
90
|
+
index 39f022f3..7ae9047e 100644
|
|
90
91
|
--- a/src/llama.cpp/ggml/src/ggml-vulkan/CMakeLists.txt
|
|
91
92
|
+++ b/src/llama.cpp/ggml/src/ggml-vulkan/CMakeLists.txt
|
|
92
|
-
@@ -
|
|
93
|
+
@@ -110,7 +110,7 @@ if (Vulkan_FOUND)
|
|
93
94
|
endif()
|
|
94
95
|
|
|
95
96
|
# Set up toolchain for host compilation whether cross-compiling or not
|
|
@@ -98,10 +99,10 @@ index 662f1377..f9f99698 100644
|
|
|
98
99
|
if (GGML_VULKAN_SHADERS_GEN_TOOLCHAIN)
|
|
99
100
|
set(HOST_CMAKE_TOOLCHAIN_FILE ${GGML_VULKAN_SHADERS_GEN_TOOLCHAIN})
|
|
100
101
|
else()
|
|
101
|
-
@@ -
|
|
102
|
+
@@ -130,7 +130,7 @@ if (Vulkan_FOUND)
|
|
103
|
+
|
|
102
104
|
include(ExternalProject)
|
|
103
105
|
|
|
104
|
-
# Add toolchain file if cross-compiling
|
|
105
106
|
- if (CMAKE_CROSSCOMPILING)
|
|
106
107
|
+ if (CMAKE_CROSSCOMPILING OR NOT CMAKE_HOST_SYSTEM_PROCESSOR STREQUAL CMAKE_SYSTEM_PROCESSOR)
|
|
107
108
|
list(APPEND VULKAN_SHADER_GEN_CMAKE_ARGS -DCMAKE_TOOLCHAIN_FILE=${HOST_CMAKE_TOOLCHAIN_FILE})
|
package/src/EmbeddingWorker.cpp
CHANGED
|
@@ -8,7 +8,7 @@ EmbeddingWorker::EmbeddingWorker(const Napi::CallbackInfo &info,
|
|
|
8
8
|
_params(params) {}
|
|
9
9
|
|
|
10
10
|
void EmbeddingWorker::Execute() {
|
|
11
|
-
|
|
11
|
+
llama_memory_clear(llama_get_memory(_sess->context()), true);
|
|
12
12
|
auto tokens = ::common_tokenize(_sess->context(), _text, true);
|
|
13
13
|
// add SEP if not present
|
|
14
14
|
auto vocab = llama_model_get_vocab(_sess->model());
|
|
@@ -25,12 +25,18 @@ size_t findStoppingStrings(const std::string &text,
|
|
|
25
25
|
|
|
26
26
|
LlamaCompletionWorker::LlamaCompletionWorker(
|
|
27
27
|
const Napi::CallbackInfo &info, LlamaSessionPtr &sess,
|
|
28
|
-
Napi::Function callback,
|
|
29
|
-
|
|
28
|
+
Napi::Function callback,
|
|
29
|
+
common_params params,
|
|
30
|
+
std::vector<std::string> stop_words,
|
|
31
|
+
int32_t chat_format,
|
|
32
|
+
bool thinking_forced_open,
|
|
33
|
+
std::string reasoning_format,
|
|
30
34
|
const std::vector<std::string> &media_paths,
|
|
31
35
|
const std::vector<llama_token> &guide_tokens)
|
|
32
36
|
: AsyncWorker(info.Env()), Deferred(info.Env()), _sess(sess),
|
|
33
37
|
_params(params), _stop_words(stop_words), _chat_format(chat_format),
|
|
38
|
+
_thinking_forced_open(thinking_forced_open),
|
|
39
|
+
_reasoning_format(reasoning_format),
|
|
34
40
|
_media_paths(media_paths), _guide_tokens(guide_tokens) {
|
|
35
41
|
if (!callback.IsEmpty()) {
|
|
36
42
|
_tsfn = Napi::ThreadSafeFunction::New(info.Env(), callback,
|
|
@@ -65,7 +71,7 @@ void LlamaCompletionWorker::Execute() {
|
|
|
65
71
|
|
|
66
72
|
// Process media if any are provided
|
|
67
73
|
if (!_media_paths.empty()) {
|
|
68
|
-
|
|
74
|
+
auto *mtmd_ctx = _sess->get_mtmd_ctx();
|
|
69
75
|
|
|
70
76
|
if (mtmd_ctx != nullptr) {
|
|
71
77
|
// Process the media and get the tokens
|
|
@@ -109,7 +115,7 @@ void LlamaCompletionWorker::Execute() {
|
|
|
109
115
|
--n_cur;
|
|
110
116
|
}
|
|
111
117
|
n_input -= n_cur;
|
|
112
|
-
|
|
118
|
+
llama_memory_seq_rm(llama_get_memory(ctx), 0, n_cur, -1);
|
|
113
119
|
}
|
|
114
120
|
// Set the tokens
|
|
115
121
|
_sess->set_tokens(std::move(prompt_tokens));
|
|
@@ -131,8 +137,9 @@ void LlamaCompletionWorker::Execute() {
|
|
|
131
137
|
const int n_left = n_cur - n_keep - 1;
|
|
132
138
|
const int n_discard = n_left / 2;
|
|
133
139
|
|
|
134
|
-
|
|
135
|
-
|
|
140
|
+
auto mem = llama_get_memory(ctx);
|
|
141
|
+
llama_memory_seq_rm(mem, 0, n_keep + 1, n_keep + n_discard + 1);
|
|
142
|
+
llama_memory_seq_add(mem, 0, n_keep + 1 + n_discard, n_cur, -n_discard);
|
|
136
143
|
|
|
137
144
|
// shift the tokens
|
|
138
145
|
embd->insert(embd->begin() + n_keep + 1,
|
|
@@ -234,8 +241,22 @@ void LlamaCompletionWorker::OnOK() {
|
|
|
234
241
|
std::string content;
|
|
235
242
|
if (!_stop) {
|
|
236
243
|
try {
|
|
244
|
+
common_chat_syntax chat_syntax;
|
|
245
|
+
chat_syntax.format = static_cast<common_chat_format>(_chat_format);
|
|
246
|
+
chat_syntax.thinking_forced_open = _thinking_forced_open;
|
|
247
|
+
|
|
248
|
+
if (_reasoning_format == "deepseek") {
|
|
249
|
+
chat_syntax.reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK;
|
|
250
|
+
} else if (_reasoning_format == "deepseek-legacy") {
|
|
251
|
+
chat_syntax.reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK_LEGACY;
|
|
252
|
+
} else {
|
|
253
|
+
chat_syntax.reasoning_format = COMMON_REASONING_FORMAT_NONE;
|
|
254
|
+
}
|
|
237
255
|
common_chat_msg message = common_chat_parse(
|
|
238
|
-
_result.text,
|
|
256
|
+
_result.text,
|
|
257
|
+
false,
|
|
258
|
+
chat_syntax
|
|
259
|
+
);
|
|
239
260
|
if (!message.reasoning_content.empty()) {
|
|
240
261
|
reasoning_content = message.reasoning_content;
|
|
241
262
|
}
|
|
@@ -20,6 +20,8 @@ public:
|
|
|
20
20
|
Napi::Function callback, common_params params,
|
|
21
21
|
std::vector<std::string> stop_words,
|
|
22
22
|
int32_t chat_format,
|
|
23
|
+
bool thinking_forced_open,
|
|
24
|
+
std::string reasoning_format,
|
|
23
25
|
const std::vector<std::string> &media_paths = {},
|
|
24
26
|
const std::vector<llama_token> &guide_tokens = {});
|
|
25
27
|
|
|
@@ -41,6 +43,8 @@ private:
|
|
|
41
43
|
common_params _params;
|
|
42
44
|
std::vector<std::string> _stop_words;
|
|
43
45
|
int32_t _chat_format;
|
|
46
|
+
bool _thinking_forced_open;
|
|
47
|
+
std::string _reasoning_format;
|
|
44
48
|
std::vector<std::string> _media_paths;
|
|
45
49
|
std::vector<llama_token> _guide_tokens;
|
|
46
50
|
std::function<void()> _onComplete;
|
package/src/LlamaContext.cpp
CHANGED
|
@@ -10,7 +10,7 @@
|
|
|
10
10
|
#include "ggml.h"
|
|
11
11
|
#include "gguf.h"
|
|
12
12
|
#include "json-schema-to-grammar.h"
|
|
13
|
-
#include
|
|
13
|
+
#include <nlohmann/json.hpp>
|
|
14
14
|
#include "llama-impl.h"
|
|
15
15
|
|
|
16
16
|
#include <atomic>
|
|
@@ -223,14 +223,6 @@ LlamaContext::LlamaContext(const Napi::CallbackInfo &info)
|
|
|
223
223
|
|
|
224
224
|
params.chat_template = get_option<std::string>(options, "chat_template", "");
|
|
225
225
|
|
|
226
|
-
std::string reasoning_format =
|
|
227
|
-
get_option<std::string>(options, "reasoning_format", "none");
|
|
228
|
-
if (reasoning_format == "deepseek") {
|
|
229
|
-
params.reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK;
|
|
230
|
-
} else {
|
|
231
|
-
params.reasoning_format = COMMON_REASONING_FORMAT_NONE;
|
|
232
|
-
}
|
|
233
|
-
|
|
234
226
|
params.n_ctx = get_option<int32_t>(options, "n_ctx", 512);
|
|
235
227
|
params.n_batch = get_option<int32_t>(options, "n_batch", 2048);
|
|
236
228
|
params.n_ubatch = get_option<int32_t>(options, "n_ubatch", 512);
|
|
@@ -507,7 +499,9 @@ common_chat_params getFormattedChatWithJinja(
|
|
|
507
499
|
const common_chat_templates_ptr &templates, const std::string &messages,
|
|
508
500
|
const std::string &chat_template, const std::string &json_schema,
|
|
509
501
|
const std::string &tools, const bool ¶llel_tool_calls,
|
|
510
|
-
const std::string &tool_choice
|
|
502
|
+
const std::string &tool_choice,
|
|
503
|
+
const bool &enable_thinking
|
|
504
|
+
) {
|
|
511
505
|
common_chat_templates_inputs inputs;
|
|
512
506
|
inputs.messages = common_chat_msgs_parse_oaicompat(json::parse(messages));
|
|
513
507
|
auto useTools = !tools.empty();
|
|
@@ -521,8 +515,7 @@ common_chat_params getFormattedChatWithJinja(
|
|
|
521
515
|
if (!json_schema.empty()) {
|
|
522
516
|
inputs.json_schema = json::parse(json_schema);
|
|
523
517
|
}
|
|
524
|
-
inputs.
|
|
525
|
-
sess->params().reasoning_format != COMMON_REASONING_FORMAT_NONE;
|
|
518
|
+
inputs.enable_thinking = enable_thinking;
|
|
526
519
|
|
|
527
520
|
// If chat_template is provided, create new one and use it (probably slow)
|
|
528
521
|
if (!chat_template.empty()) {
|
|
@@ -596,12 +589,11 @@ Napi::Value LlamaContext::GetFormattedChat(const Napi::CallbackInfo &info) {
|
|
|
596
589
|
auto parallel_tool_calls =
|
|
597
590
|
get_option<bool>(params, "parallel_tool_calls", false);
|
|
598
591
|
auto tool_choice = get_option<std::string>(params, "tool_choice", "");
|
|
592
|
+
auto enable_thinking = get_option<bool>(params, "enable_thinking", false);
|
|
599
593
|
|
|
600
594
|
auto chatParams = getFormattedChatWithJinja(
|
|
601
595
|
_sess, _templates, messages, chat_template, json_schema_str, tools_str,
|
|
602
|
-
parallel_tool_calls, tool_choice);
|
|
603
|
-
|
|
604
|
-
console_log(env, std::string("format: ") + std::to_string(chatParams.format));
|
|
596
|
+
parallel_tool_calls, tool_choice, enable_thinking);
|
|
605
597
|
|
|
606
598
|
Napi::Object result = Napi::Object::New(env);
|
|
607
599
|
result.Set("prompt", chatParams.prompt);
|
|
@@ -622,6 +614,7 @@ Napi::Value LlamaContext::GetFormattedChat(const Napi::CallbackInfo &info) {
|
|
|
622
614
|
grammar_triggers.Set(i, triggerObj);
|
|
623
615
|
}
|
|
624
616
|
result.Set("grammar_triggers", grammar_triggers);
|
|
617
|
+
result.Set("thinking_forced_open", chatParams.thinking_forced_open);
|
|
625
618
|
// preserved_tokens: string[]
|
|
626
619
|
Napi::Array preserved_tokens = Napi::Array::New(env);
|
|
627
620
|
for (size_t i = 0; i < chatParams.preserved_tokens.size(); i++) {
|
|
@@ -695,6 +688,8 @@ Napi::Value LlamaContext::Completion(const Napi::CallbackInfo &info) {
|
|
|
695
688
|
}
|
|
696
689
|
|
|
697
690
|
int32_t chat_format = get_option<int32_t>(options, "chat_format", 0);
|
|
691
|
+
bool thinking_forced_open = get_option<bool>(options, "thinking_forced_open", false);
|
|
692
|
+
std::string reasoning_format = get_option<std::string>(options, "reasoning_format", "none");
|
|
698
693
|
|
|
699
694
|
common_params params = _sess->params();
|
|
700
695
|
auto grammar_from_params = get_option<std::string>(options, "grammar", "");
|
|
@@ -802,14 +797,16 @@ Napi::Value LlamaContext::Completion(const Napi::CallbackInfo &info) {
|
|
|
802
797
|
get_option<bool>(options, "parallel_tool_calls", false);
|
|
803
798
|
auto tool_choice =
|
|
804
799
|
get_option<std::string>(options, "tool_choice", "none");
|
|
800
|
+
auto enable_thinking = get_option<bool>(options, "enable_thinking", true);
|
|
805
801
|
|
|
806
802
|
auto chatParams = getFormattedChatWithJinja(
|
|
807
803
|
_sess, _templates, json_stringify(messages), chat_template,
|
|
808
|
-
json_schema_str, tools_str, parallel_tool_calls, tool_choice);
|
|
804
|
+
json_schema_str, tools_str, parallel_tool_calls, tool_choice, enable_thinking);
|
|
809
805
|
|
|
810
806
|
params.prompt = chatParams.prompt;
|
|
811
807
|
|
|
812
808
|
chat_format = chatParams.format;
|
|
809
|
+
thinking_forced_open = chatParams.thinking_forced_open;
|
|
813
810
|
|
|
814
811
|
for (const auto &token : chatParams.preserved_tokens) {
|
|
815
812
|
auto ids =
|
|
@@ -904,7 +901,7 @@ Napi::Value LlamaContext::Completion(const Napi::CallbackInfo &info) {
|
|
|
904
901
|
|
|
905
902
|
auto *worker =
|
|
906
903
|
new LlamaCompletionWorker(info, _sess, callback, params, stop_words,
|
|
907
|
-
chat_format, media_paths, guide_tokens);
|
|
904
|
+
chat_format, thinking_forced_open, reasoning_format, media_paths, guide_tokens);
|
|
908
905
|
worker->Queue();
|
|
909
906
|
_wip = worker;
|
|
910
907
|
worker->OnComplete([this]() { _wip = nullptr; });
|
package/src/common.hpp
CHANGED
|
@@ -6,6 +6,7 @@
|
|
|
6
6
|
#include "llama.h"
|
|
7
7
|
#include "tools/mtmd/clip.h"
|
|
8
8
|
#include "tools/mtmd/mtmd.h"
|
|
9
|
+
#include "tools/mtmd/mtmd-helper.h"
|
|
9
10
|
#include <memory>
|
|
10
11
|
#include <mutex>
|
|
11
12
|
#include <napi.h>
|
|
@@ -97,7 +98,7 @@ public:
|
|
|
97
98
|
inline std::mutex &get_mutex() { return mutex; }
|
|
98
99
|
|
|
99
100
|
// Getter for the multimodal context
|
|
100
|
-
inline
|
|
101
|
+
inline mtmd_context *get_mtmd_ctx() { return _mtmd_ctx; }
|
|
101
102
|
|
|
102
103
|
// Setter for the multimodal context
|
|
103
104
|
inline void set_mtmd_ctx(mtmd_context *ctx) { _mtmd_ctx = ctx; }
|
|
@@ -219,7 +220,7 @@ struct TokenizeResult {
|
|
|
219
220
|
};
|
|
220
221
|
|
|
221
222
|
static TokenizeResult
|
|
222
|
-
tokenizeWithMedia(
|
|
223
|
+
tokenizeWithMedia(mtmd_context *mtmd_ctx, const std::string &prompt,
|
|
223
224
|
const std::vector<std::string> &media_paths) {
|
|
224
225
|
if (mtmd_ctx == nullptr) {
|
|
225
226
|
throw std::runtime_error("Multimodal context is not initialized");
|
|
@@ -263,7 +264,7 @@ tokenizeWithMedia(const mtmd_context *mtmd_ctx, const std::string &prompt,
|
|
|
263
264
|
std::vector<uint8_t> media_data = base64_decode(base64_data);
|
|
264
265
|
|
|
265
266
|
// Load bitmap from memory buffer using direct initialization
|
|
266
|
-
mtmd::bitmap bmp(mtmd_helper_bitmap_init_from_buf(media_data.data(),
|
|
267
|
+
mtmd::bitmap bmp(mtmd_helper_bitmap_init_from_buf(mtmd_ctx, media_data.data(),
|
|
267
268
|
media_data.size()));
|
|
268
269
|
if (!bmp.ptr) {
|
|
269
270
|
bitmaps.entries.clear();
|
|
@@ -300,7 +301,7 @@ tokenizeWithMedia(const mtmd_context *mtmd_ctx, const std::string &prompt,
|
|
|
300
301
|
fclose(file);
|
|
301
302
|
|
|
302
303
|
// Create bitmap directly
|
|
303
|
-
mtmd::bitmap bmp(mtmd_helper_bitmap_init_from_file(media_path.c_str()));
|
|
304
|
+
mtmd::bitmap bmp(mtmd_helper_bitmap_init_from_file(mtmd_ctx, media_path.c_str()));
|
|
304
305
|
if (!bmp.ptr) {
|
|
305
306
|
bitmaps.entries.clear();
|
|
306
307
|
throw std::runtime_error("Failed to load media");
|
|
@@ -388,7 +389,7 @@ tokenizeWithMedia(const mtmd_context *mtmd_ctx, const std::string &prompt,
|
|
|
388
389
|
|
|
389
390
|
// Process media and add them to the tokenized input
|
|
390
391
|
static llama_pos
|
|
391
|
-
processMediaPrompt(llama_context *ctx,
|
|
392
|
+
processMediaPrompt(llama_context *ctx, mtmd_context *mtmd_ctx,
|
|
392
393
|
LlamaSessionPtr sess, const common_params ¶ms,
|
|
393
394
|
const std::vector<std::string> &media_paths) {
|
|
394
395
|
if (mtmd_ctx == nullptr) {
|
|
@@ -460,7 +461,7 @@ processMediaPrompt(llama_context *ctx, const mtmd_context *mtmd_ctx,
|
|
|
460
461
|
}
|
|
461
462
|
|
|
462
463
|
// Clear all KV cache entries after position n_past
|
|
463
|
-
|
|
464
|
+
llama_memory_seq_rm(llama_get_memory(ctx), 0, n_past, -1);
|
|
464
465
|
|
|
465
466
|
size_t num_chunks = mtmd_input_chunks_size(chunks);
|
|
466
467
|
|
|
@@ -89,6 +89,14 @@ option(LLAMA_LLGUIDANCE "llama-common: include LLGuidance library for structured
|
|
|
89
89
|
include(${CMAKE_CURRENT_SOURCE_DIR}/cmake/build-info.cmake)
|
|
90
90
|
include(${CMAKE_CURRENT_SOURCE_DIR}/cmake/common.cmake)
|
|
91
91
|
|
|
92
|
+
if (NOT DEFINED LLAMA_BUILD_NUMBER)
|
|
93
|
+
set(LLAMA_BUILD_NUMBER ${BUILD_NUMBER})
|
|
94
|
+
endif()
|
|
95
|
+
if (NOT DEFINED LLAMA_BUILD_COMMIT)
|
|
96
|
+
set(LLAMA_BUILD_COMMIT ${BUILD_COMMIT})
|
|
97
|
+
endif()
|
|
98
|
+
set(LLAMA_INSTALL_VERSION 0.0.${BUILD_NUMBER})
|
|
99
|
+
|
|
92
100
|
# override ggml options
|
|
93
101
|
set(GGML_ALL_WARNINGS ${LLAMA_ALL_WARNINGS})
|
|
94
102
|
set(GGML_FATAL_WARNINGS ${LLAMA_FATAL_WARNINGS})
|
|
@@ -155,10 +163,17 @@ if (LLAMA_USE_SYSTEM_GGML)
|
|
|
155
163
|
endif()
|
|
156
164
|
|
|
157
165
|
if (NOT TARGET ggml AND NOT LLAMA_USE_SYSTEM_GGML)
|
|
166
|
+
set(GGML_BUILD_NUMBER ${LLAMA_BUILD_NUMBER})
|
|
167
|
+
set(GGML_BUILD_COMMIT ${LLAMA_BUILD_COMMIT})
|
|
158
168
|
add_subdirectory(ggml)
|
|
159
169
|
# ... otherwise assume ggml is added by a parent CMakeLists.txt
|
|
160
170
|
endif()
|
|
161
171
|
|
|
172
|
+
if (MINGW)
|
|
173
|
+
# Target Windows 8 for PrefetchVirtualMemory
|
|
174
|
+
add_compile_definitions(_WIN32_WINNT=${GGML_WIN_VER})
|
|
175
|
+
endif()
|
|
176
|
+
|
|
162
177
|
#
|
|
163
178
|
# build the library
|
|
164
179
|
#
|
|
@@ -199,10 +214,6 @@ endif()
|
|
|
199
214
|
include(GNUInstallDirs)
|
|
200
215
|
include(CMakePackageConfigHelpers)
|
|
201
216
|
|
|
202
|
-
set(LLAMA_BUILD_NUMBER ${BUILD_NUMBER})
|
|
203
|
-
set(LLAMA_BUILD_COMMIT ${BUILD_COMMIT})
|
|
204
|
-
set(LLAMA_INSTALL_VERSION 0.0.${BUILD_NUMBER})
|
|
205
|
-
|
|
206
217
|
set(LLAMA_INCLUDE_INSTALL_DIR ${CMAKE_INSTALL_INCLUDEDIR} CACHE PATH "Location of header files")
|
|
207
218
|
set(LLAMA_LIB_INSTALL_DIR ${CMAKE_INSTALL_LIBDIR} CACHE PATH "Location of library files")
|
|
208
219
|
set(LLAMA_BIN_INSTALL_DIR ${CMAKE_INSTALL_BINDIR} CACHE PATH "Location of binary files")
|
|
@@ -7,8 +7,8 @@ llama_add_compile_flags()
|
|
|
7
7
|
# Build info header
|
|
8
8
|
#
|
|
9
9
|
|
|
10
|
-
if(EXISTS "${
|
|
11
|
-
set(GIT_DIR "${
|
|
10
|
+
if(EXISTS "${PROJECT_SOURCE_DIR}/.git")
|
|
11
|
+
set(GIT_DIR "${PROJECT_SOURCE_DIR}/.git")
|
|
12
12
|
|
|
13
13
|
# Is git submodule
|
|
14
14
|
if(NOT IS_DIRECTORY "${GIT_DIR}")
|
|
@@ -18,36 +18,26 @@ if(EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/../.git")
|
|
|
18
18
|
if (SLASH_POS EQUAL 0)
|
|
19
19
|
set(GIT_DIR "${REAL_GIT_DIR}")
|
|
20
20
|
else()
|
|
21
|
-
set(GIT_DIR "${
|
|
21
|
+
set(GIT_DIR "${PROJECT_SOURCE_DIR}/${REAL_GIT_DIR}")
|
|
22
22
|
endif()
|
|
23
23
|
endif()
|
|
24
24
|
|
|
25
25
|
if(EXISTS "${GIT_DIR}/index")
|
|
26
|
-
|
|
26
|
+
# For build-info.cpp below
|
|
27
|
+
set_property(DIRECTORY APPEND PROPERTY CMAKE_CONFIGURE_DEPENDS "${GIT_DIR}/index")
|
|
27
28
|
else()
|
|
28
29
|
message(WARNING "Git index not found in git repository.")
|
|
29
|
-
set(GIT_INDEX "")
|
|
30
30
|
endif()
|
|
31
31
|
else()
|
|
32
32
|
message(WARNING "Git repository not found; to enable automatic generation of build info, make sure Git is installed and the project is a Git repository.")
|
|
33
|
-
set(GIT_INDEX "")
|
|
34
33
|
endif()
|
|
35
34
|
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
COMMAND ${CMAKE_COMMAND} -DMSVC=${MSVC} -DCMAKE_C_COMPILER_VERSION=${CMAKE_C_COMPILER_VERSION}
|
|
41
|
-
-DCMAKE_C_COMPILER_ID=${CMAKE_C_COMPILER_ID} -DCMAKE_VS_PLATFORM_NAME=${CMAKE_VS_PLATFORM_NAME}
|
|
42
|
-
-DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}
|
|
43
|
-
-DCMAKE_SYSTEM_NAME=${CMAKE_SYSTEM_NAME} -DCMAKE_SYSTEM_PROCESSOR=${CMAKE_SYSTEM_PROCESSOR}
|
|
44
|
-
-P "${CMAKE_CURRENT_SOURCE_DIR}/cmake/build-info-gen-cpp.cmake"
|
|
45
|
-
WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/.."
|
|
46
|
-
DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/build-info.cpp.in" ${GIT_INDEX}
|
|
47
|
-
VERBATIM
|
|
48
|
-
)
|
|
35
|
+
set(TEMPLATE_FILE "${CMAKE_CURRENT_SOURCE_DIR}/build-info.cpp.in")
|
|
36
|
+
set(OUTPUT_FILE "${CMAKE_CURRENT_BINARY_DIR}/build-info.cpp")
|
|
37
|
+
configure_file(${TEMPLATE_FILE} ${OUTPUT_FILE})
|
|
38
|
+
|
|
49
39
|
set(TARGET build_info)
|
|
50
|
-
add_library(${TARGET} OBJECT
|
|
40
|
+
add_library(${TARGET} OBJECT ${OUTPUT_FILE})
|
|
51
41
|
if (BUILD_SHARED_LIBS)
|
|
52
42
|
set_target_properties(${TARGET} PROPERTIES POSITION_INDEPENDENT_CODE ON)
|
|
53
43
|
endif()
|
|
@@ -58,19 +48,20 @@ add_library(${TARGET} STATIC
|
|
|
58
48
|
arg.cpp
|
|
59
49
|
arg.h
|
|
60
50
|
base64.hpp
|
|
51
|
+
chat-parser.cpp
|
|
52
|
+
chat-parser.h
|
|
61
53
|
chat.cpp
|
|
62
54
|
chat.h
|
|
63
55
|
common.cpp
|
|
64
56
|
common.h
|
|
65
57
|
console.cpp
|
|
66
58
|
console.h
|
|
59
|
+
json-partial.cpp
|
|
60
|
+
json-partial.h
|
|
67
61
|
json-schema-to-grammar.cpp
|
|
68
|
-
json.hpp
|
|
69
62
|
llguidance.cpp
|
|
70
63
|
log.cpp
|
|
71
64
|
log.h
|
|
72
|
-
minja/chat-template.hpp
|
|
73
|
-
minja/minja.hpp
|
|
74
65
|
ngram-cache.cpp
|
|
75
66
|
ngram-cache.h
|
|
76
67
|
regex-partial.cpp
|
|
@@ -143,7 +134,7 @@ if (LLAMA_LLGUIDANCE)
|
|
|
143
134
|
set(LLAMA_COMMON_EXTRA_LIBS ${LLAMA_COMMON_EXTRA_LIBS} llguidance ${LLGUIDANCE_PLATFORM_LIBS})
|
|
144
135
|
endif ()
|
|
145
136
|
|
|
146
|
-
target_include_directories(${TARGET} PUBLIC .)
|
|
137
|
+
target_include_directories(${TARGET} PUBLIC . ../vendor)
|
|
147
138
|
target_compile_features (${TARGET} PUBLIC cxx_std_17)
|
|
148
139
|
target_link_libraries (${TARGET} PRIVATE ${LLAMA_COMMON_EXTRA_LIBS} PUBLIC llama Threads::Threads)
|
|
149
140
|
|