@fugood/llama.node 0.3.9 → 0.3.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/darwin/arm64/llama-node.node +0 -0
- package/bin/darwin/x64/llama-node.node +0 -0
- package/bin/linux/arm64/llama-node.node +0 -0
- package/bin/linux/x64/llama-node.node +0 -0
- package/bin/linux-cuda/arm64/llama-node.node +0 -0
- package/bin/linux-cuda/x64/llama-node.node +0 -0
- package/bin/linux-vulkan/arm64/llama-node.node +0 -0
- package/bin/linux-vulkan/x64/llama-node.node +0 -0
- package/bin/win32/arm64/llama-node.node +0 -0
- package/bin/win32/arm64/node.lib +0 -0
- package/bin/win32/x64/llama-node.node +0 -0
- package/bin/win32/x64/node.lib +0 -0
- package/bin/win32-vulkan/arm64/llama-node.node +0 -0
- package/bin/win32-vulkan/arm64/node.lib +0 -0
- package/bin/win32-vulkan/x64/llama-node.node +0 -0
- package/bin/win32-vulkan/x64/node.lib +0 -0
- package/lib/binding.js +2 -2
- package/lib/binding.ts +47 -8
- package/lib/index.js +21 -1
- package/lib/index.ts +31 -1
- package/package.json +12 -3
- package/src/LlamaCompletionWorker.cpp +33 -6
- package/src/LlamaCompletionWorker.h +3 -1
- package/src/LlamaContext.cpp +336 -28
- package/src/LlamaContext.h +2 -0
- package/src/common.hpp +19 -2
- package/src/llama.cpp/.github/workflows/build.yml +289 -107
- package/src/llama.cpp/.github/workflows/close-issue.yml +1 -1
- package/src/llama.cpp/.github/workflows/docker.yml +2 -1
- package/src/llama.cpp/.github/workflows/server.yml +25 -2
- package/src/llama.cpp/CMakeLists.txt +10 -19
- package/src/llama.cpp/cmake/build-info.cmake +1 -1
- package/src/llama.cpp/common/CMakeLists.txt +32 -0
- package/src/llama.cpp/common/arg.cpp +66 -16
- package/src/llama.cpp/common/chat-template.hpp +515 -0
- package/src/llama.cpp/common/chat.cpp +966 -0
- package/src/llama.cpp/common/chat.hpp +52 -0
- package/src/llama.cpp/common/common.cpp +159 -36
- package/src/llama.cpp/common/common.h +56 -14
- package/src/llama.cpp/common/json-schema-to-grammar.cpp +46 -66
- package/src/llama.cpp/common/json-schema-to-grammar.h +15 -1
- package/src/llama.cpp/common/llguidance.cpp +270 -0
- package/src/llama.cpp/common/log.cpp +1 -10
- package/src/llama.cpp/common/log.h +10 -0
- package/src/llama.cpp/common/minja.hpp +2868 -0
- package/src/llama.cpp/common/sampling.cpp +22 -1
- package/src/llama.cpp/common/sampling.h +3 -0
- package/src/llama.cpp/docs/build.md +54 -9
- package/src/llama.cpp/examples/export-lora/export-lora.cpp +12 -2
- package/src/llama.cpp/examples/gbnf-validator/gbnf-validator.cpp +1 -1
- package/src/llama.cpp/examples/llava/CMakeLists.txt +7 -0
- package/src/llama.cpp/examples/llava/clip-quantize-cli.cpp +59 -0
- package/src/llama.cpp/examples/llava/clip.cpp +133 -14
- package/src/llama.cpp/examples/llava/clip.h +2 -0
- package/src/llama.cpp/examples/llava/llava.cpp +22 -8
- package/src/llama.cpp/examples/llava/minicpmv-cli.cpp +9 -1
- package/src/llama.cpp/examples/main/main.cpp +26 -25
- package/src/llama.cpp/examples/run/linenoise.cpp/linenoise.cpp +136 -137
- package/src/llama.cpp/examples/run/linenoise.cpp/linenoise.h +18 -4
- package/src/llama.cpp/examples/run/run.cpp +224 -69
- package/src/llama.cpp/examples/server/server.cpp +252 -81
- package/src/llama.cpp/examples/server/utils.hpp +73 -21
- package/src/llama.cpp/examples/simple-chat/simple-chat.cpp +6 -4
- package/src/llama.cpp/examples/simple-cmake-pkg/CMakeLists.txt +11 -0
- package/src/llama.cpp/ggml/CMakeLists.txt +78 -1
- package/src/llama.cpp/ggml/include/ggml.h +1 -1
- package/src/llama.cpp/ggml/src/CMakeLists.txt +21 -4
- package/src/llama.cpp/ggml/src/ggml-alloc.c +1 -13
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.c +91 -78
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +7 -7
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +2 -1
- package/src/llama.cpp/ggml/src/ggml-cuda/CMakeLists.txt +1 -1
- package/src/llama.cpp/ggml/src/ggml-cuda/vendors/hip.h +46 -0
- package/src/llama.cpp/ggml/src/ggml-hip/CMakeLists.txt +16 -1
- package/src/llama.cpp/ggml/src/ggml-musa/CMakeLists.txt +1 -1
- package/src/llama.cpp/ggml/src/ggml-rpc/ggml-rpc.cpp +28 -8
- package/src/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +5 -7
- package/src/llama.cpp/ggml/src/ggml-sycl/softmax.cpp +33 -23
- package/src/llama.cpp/ggml/src/ggml-sycl/softmax.hpp +1 -5
- package/src/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +323 -121
- package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +13 -3
- package/src/llama.cpp/ggml/src/ggml.c +23 -13
- package/src/llama.cpp/include/llama.h +14 -1
- package/src/llama.cpp/models/ggml-vocab-deepseek-r1-qwen.gguf.inp +112 -0
- package/src/llama.cpp/models/ggml-vocab-deepseek-r1-qwen.gguf.out +46 -0
- package/src/llama.cpp/src/CMakeLists.txt +1 -1
- package/src/llama.cpp/src/llama-arch.cpp +7 -2
- package/src/llama.cpp/src/llama-arch.h +3 -1
- package/src/llama.cpp/src/llama-chat.cpp +11 -2
- package/src/llama.cpp/src/llama-chat.h +1 -0
- package/src/llama.cpp/src/llama-grammar.cpp +86 -6
- package/src/llama.cpp/src/llama-grammar.h +22 -1
- package/src/llama.cpp/src/llama-mmap.cpp +1 -0
- package/src/llama.cpp/src/llama-model-loader.cpp +1 -1
- package/src/llama.cpp/src/llama-model.cpp +76 -6
- package/src/llama.cpp/src/llama-sampling.cpp +47 -4
- package/src/llama.cpp/src/llama-vocab.cpp +10 -4
- package/src/llama.cpp/src/llama.cpp +181 -123
- package/src/llama.cpp/tests/CMakeLists.txt +4 -0
- package/src/llama.cpp/tests/test-backend-ops.cpp +158 -57
- package/src/llama.cpp/tests/test-chat-template.cpp +154 -31
- package/src/llama.cpp/tests/test-chat.cpp +607 -0
- package/src/llama.cpp/tests/test-grammar-integration.cpp +2 -2
- package/src/llama.cpp/tests/test-grammar-llguidance.cpp +1140 -0
- package/src/llama.cpp/tests/test-json-schema-to-grammar.cpp +1 -1
- package/src/llama.cpp/examples/main-cmake-pkg/CMakeLists.txt +0 -32
|
@@ -17,13 +17,13 @@
|
|
|
17
17
|
#include <cstring>
|
|
18
18
|
#include <cstdlib>
|
|
19
19
|
#include <cassert>
|
|
20
|
+
#include <algorithm>
|
|
20
21
|
#include <sys/stat.h>
|
|
21
22
|
#include <sys/types.h>
|
|
22
23
|
|
|
23
24
|
#ifdef _WIN32
|
|
24
25
|
#include <windows.h>
|
|
25
26
|
#include <direct.h> // For _mkdir on Windows
|
|
26
|
-
#include <algorithm> // For std::replace on w64devkit
|
|
27
27
|
#else
|
|
28
28
|
#include <unistd.h>
|
|
29
29
|
#include <sys/wait.h>
|
|
@@ -55,6 +55,12 @@ const std::vector<std::string> type_names = {
|
|
|
55
55
|
"q4_k",
|
|
56
56
|
"q5_k",
|
|
57
57
|
"q6_k",
|
|
58
|
+
"iq2_xxs",
|
|
59
|
+
"iq2_xs",
|
|
60
|
+
"iq2_s",
|
|
61
|
+
"iq3_xxs",
|
|
62
|
+
"iq3_s",
|
|
63
|
+
"iq4_xs",
|
|
58
64
|
"iq4_nl"
|
|
59
65
|
};
|
|
60
66
|
|
|
@@ -316,8 +322,11 @@ void matmul_shaders(bool fp16, bool matmul_id, bool coopmat, bool coopmat2, bool
|
|
|
316
322
|
// For aligned matmul loads
|
|
317
323
|
std::string load_vec_a = (coopmat2 || tname == "f32" || tname == "f16") ? load_vec : "2";
|
|
318
324
|
|
|
319
|
-
|
|
320
|
-
|
|
325
|
+
// don't generate f32 variants for coopmat2
|
|
326
|
+
if (!coopmat2) {
|
|
327
|
+
string_to_spv(shader_name + "_" + tname + "_f32", source_name, merge_maps(base_dict, {{data_a_key, "1"}, {"LOAD_VEC_A", load_vec_a_unaligned}, {"B_TYPE", "float"}, {"D_TYPE", "float"}, {"B_IS_FLOAT", "1"}}), fp16, coopmat, coopmat2, f16acc);
|
|
328
|
+
string_to_spv(shader_name + "_" + tname + "_f32_aligned", source_name, merge_maps(base_dict, {{data_a_key, "1"}, {"LOAD_VEC_A", load_vec_a}, {"LOAD_VEC_B", load_vec}, {"B_TYPE", aligned_b_type_f32}, {"D_TYPE", "float"}, {"B_IS_FLOAT", "1"}, {"ALIGNED", "1"}}), fp16, coopmat, coopmat2, f16acc);
|
|
329
|
+
}
|
|
321
330
|
|
|
322
331
|
if (tname != "f16" && tname != "f32") {
|
|
323
332
|
string_to_spv(shader_name + "_" + tname + "_f16", source_name, merge_maps(base_dict, {{data_a_key, "1"}, {"LOAD_VEC_A", load_vec_a_unaligned}, {"B_TYPE", "float16_t"}, {"D_TYPE", "float"}, {"B_IS_FLOAT", "1"}}), fp16, coopmat, coopmat2, f16acc);
|
|
@@ -499,6 +508,7 @@ void write_output_files() {
|
|
|
499
508
|
fprintf(hdr, "#include <cstdint>\n\n");
|
|
500
509
|
fprintf(src, "#include \"%s\"\n\n", basename(target_hpp).c_str());
|
|
501
510
|
|
|
511
|
+
std::sort(shader_fnames.begin(), shader_fnames.end());
|
|
502
512
|
for (const auto& pair : shader_fnames) {
|
|
503
513
|
const std::string& name = pair.first;
|
|
504
514
|
#ifdef _WIN32
|
|
@@ -128,6 +128,10 @@ static void ggml_print_backtrace_symbols(void) {
|
|
|
128
128
|
#endif
|
|
129
129
|
|
|
130
130
|
static void ggml_print_backtrace(void) {
|
|
131
|
+
const char * GGML_NO_BACKTRACE = getenv("GGML_NO_BACKTRACE");
|
|
132
|
+
if (GGML_NO_BACKTRACE) {
|
|
133
|
+
return;
|
|
134
|
+
}
|
|
131
135
|
char attach[32];
|
|
132
136
|
snprintf(attach, sizeof(attach), "attach %d", getpid());
|
|
133
137
|
int pid = fork();
|
|
@@ -5339,7 +5343,7 @@ static void ggml_compute_backward(
|
|
|
5339
5343
|
} break;
|
|
5340
5344
|
case GGML_OP_MUL: {
|
|
5341
5345
|
if (src0_needs_grads) {
|
|
5342
|
-
ggml_add_or_set(ctx, cgraph, isrc0, ggml_mul(ctx,
|
|
5346
|
+
ggml_add_or_set(ctx, cgraph, isrc0, ggml_mul(ctx, grad, src1));
|
|
5343
5347
|
}
|
|
5344
5348
|
if (src1_needs_grads) {
|
|
5345
5349
|
struct ggml_tensor * tmp = ggml_mul(ctx, src0, grad);
|
|
@@ -5431,21 +5435,25 @@ static void ggml_compute_backward(
|
|
|
5431
5435
|
// src1.shape [n,p,qq,rr]
|
|
5432
5436
|
|
|
5433
5437
|
if (src0_needs_grads) {
|
|
5434
|
-
|
|
5438
|
+
GGML_ASSERT(grad->ne[2] == src1->ne[2]);
|
|
5439
|
+
GGML_ASSERT(grad->ne[3] == src1->ne[3]);
|
|
5440
|
+
struct ggml_tensor * tmp =
|
|
5435
5441
|
ggml_out_prod(ctx, // [n,m,qq,rr]
|
|
5436
5442
|
src1, // [n,p,qq,rr]
|
|
5437
5443
|
grad); // [m,p,qq,rr]
|
|
5438
|
-
|
|
5439
|
-
|
|
5440
|
-
|
|
5441
|
-
|
|
5442
|
-
|
|
5443
|
-
|
|
5444
|
-
|
|
5445
|
-
|
|
5446
|
-
|
|
5444
|
+
if (!ggml_are_same_shape(tmp, src0)) {
|
|
5445
|
+
GGML_ASSERT(tmp->ne[0] == src0->ne[0]);
|
|
5446
|
+
GGML_ASSERT(tmp->ne[1] == src0->ne[1]);
|
|
5447
|
+
GGML_ASSERT(tmp->ne[3] == 1);
|
|
5448
|
+
|
|
5449
|
+
const int64_t nr2 = tmp->ne[2] / src0->ne[2];
|
|
5450
|
+
const size_t nb2 = tmp->nb[2] * nr2;
|
|
5451
|
+
const size_t nb3 = tmp->nb[2];
|
|
5452
|
+
|
|
5453
|
+
tmp = ggml_view_4d(ctx, tmp, src0->ne[0], src0->ne[1], src0->ne[2], nr2, tmp->nb[1], nb2, nb3, 0);
|
|
5454
|
+
tmp = ggml_repeat_back(ctx, tmp, src0);
|
|
5447
5455
|
}
|
|
5448
|
-
ggml_add_or_set(ctx, cgraph, isrc0,
|
|
5456
|
+
ggml_add_or_set(ctx, cgraph, isrc0, tmp);
|
|
5449
5457
|
}
|
|
5450
5458
|
if (src1_needs_grads) {
|
|
5451
5459
|
ggml_add_or_set(ctx, cgraph, isrc1,
|
|
@@ -5514,7 +5522,9 @@ static void ggml_compute_backward(
|
|
|
5514
5522
|
if (src0_needs_grads) {
|
|
5515
5523
|
GGML_ASSERT(!cgraph->grads[isrc0] || ggml_is_contiguous(cgraph->grads[isrc0]));
|
|
5516
5524
|
GGML_ASSERT(ggml_is_contiguous(grad));
|
|
5517
|
-
|
|
5525
|
+
GGML_ASSERT(ggml_nelements(tensor) == ggml_nelements(src0));
|
|
5526
|
+
ggml_add_or_set(ctx, cgraph, isrc0,
|
|
5527
|
+
ggml_are_same_shape(tensor, src0) ? grad : ggml_reshape(ctx, grad, src0));
|
|
5518
5528
|
}
|
|
5519
5529
|
} break;
|
|
5520
5530
|
case GGML_OP_RESHAPE: {
|
|
@@ -510,7 +510,8 @@ extern "C" {
|
|
|
510
510
|
LLAMA_API uint64_t llama_model_size(const struct llama_model * model);
|
|
511
511
|
|
|
512
512
|
// Get the default chat template. Returns nullptr if not available
|
|
513
|
-
|
|
513
|
+
// If name is NULL, returns the default chat template
|
|
514
|
+
LLAMA_API const char * llama_model_chat_template(const struct llama_model * model, const char * name);
|
|
514
515
|
|
|
515
516
|
// Returns the total number of parameters in the model
|
|
516
517
|
LLAMA_API uint64_t llama_model_n_params(const struct llama_model * model);
|
|
@@ -1198,6 +1199,18 @@ extern "C" {
|
|
|
1198
1199
|
const char * grammar_str,
|
|
1199
1200
|
const char * grammar_root);
|
|
1200
1201
|
|
|
1202
|
+
/// @details Lazy grammar sampler, introduced in https://github.com/ggerganov/llama.cpp/pull/9639
|
|
1203
|
+
/// @param trigger_words A list of words that will trigger the grammar sampler. This may be updated to a loose regex syntax (w/ ^) in a near future.
|
|
1204
|
+
/// @param trigger_tokens A list of tokens that will trigger the grammar sampler.
|
|
1205
|
+
LLAMA_API struct llama_sampler * llama_sampler_init_grammar_lazy(
|
|
1206
|
+
const struct llama_vocab * vocab,
|
|
1207
|
+
const char * grammar_str,
|
|
1208
|
+
const char * grammar_root,
|
|
1209
|
+
const char ** trigger_words,
|
|
1210
|
+
size_t num_trigger_words,
|
|
1211
|
+
const llama_token * trigger_tokens,
|
|
1212
|
+
size_t num_trigger_tokens);
|
|
1213
|
+
|
|
1201
1214
|
/// NOTE: Avoid using on the full vocabulary as searching for repeated tokens can become slow. For example, apply top-k or top-p sampling first.
|
|
1202
1215
|
LLAMA_API struct llama_sampler * llama_sampler_init_penalties(
|
|
1203
1216
|
int32_t penalty_last_n, // last n tokens to penalize (0 = disable penalty, -1 = context size)
|
|
@@ -0,0 +1,112 @@
|
|
|
1
|
+
ied 4 ½ months
|
|
2
|
+
__ggml_vocab_test__
|
|
3
|
+
Führer
|
|
4
|
+
__ggml_vocab_test__
|
|
5
|
+
|
|
6
|
+
__ggml_vocab_test__
|
|
7
|
+
|
|
8
|
+
__ggml_vocab_test__
|
|
9
|
+
|
|
10
|
+
__ggml_vocab_test__
|
|
11
|
+
|
|
12
|
+
__ggml_vocab_test__
|
|
13
|
+
|
|
14
|
+
__ggml_vocab_test__
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
__ggml_vocab_test__
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
__ggml_vocab_test__
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
__ggml_vocab_test__
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
__ggml_vocab_test__
|
|
30
|
+
Hello world
|
|
31
|
+
__ggml_vocab_test__
|
|
32
|
+
Hello world
|
|
33
|
+
__ggml_vocab_test__
|
|
34
|
+
Hello World
|
|
35
|
+
__ggml_vocab_test__
|
|
36
|
+
Hello World
|
|
37
|
+
__ggml_vocab_test__
|
|
38
|
+
Hello World!
|
|
39
|
+
__ggml_vocab_test__
|
|
40
|
+
Hello, world!
|
|
41
|
+
__ggml_vocab_test__
|
|
42
|
+
Hello, world!
|
|
43
|
+
__ggml_vocab_test__
|
|
44
|
+
this is 🦙.cpp
|
|
45
|
+
__ggml_vocab_test__
|
|
46
|
+
w048 7tuijk dsdfhu
|
|
47
|
+
__ggml_vocab_test__
|
|
48
|
+
нещо на Български
|
|
49
|
+
__ggml_vocab_test__
|
|
50
|
+
កាន់តែពិសេសអាចខលចេញ
|
|
51
|
+
__ggml_vocab_test__
|
|
52
|
+
🚀 (normal) 😶🌫️ (multiple emojis concatenated) ✅ (only emoji that has its own token)
|
|
53
|
+
__ggml_vocab_test__
|
|
54
|
+
Hello
|
|
55
|
+
__ggml_vocab_test__
|
|
56
|
+
Hello
|
|
57
|
+
__ggml_vocab_test__
|
|
58
|
+
Hello
|
|
59
|
+
__ggml_vocab_test__
|
|
60
|
+
Hello
|
|
61
|
+
__ggml_vocab_test__
|
|
62
|
+
Hello
|
|
63
|
+
__ggml_vocab_test__
|
|
64
|
+
Hello
|
|
65
|
+
Hello
|
|
66
|
+
__ggml_vocab_test__
|
|
67
|
+
(
|
|
68
|
+
__ggml_vocab_test__
|
|
69
|
+
|
|
70
|
+
=
|
|
71
|
+
__ggml_vocab_test__
|
|
72
|
+
' era
|
|
73
|
+
__ggml_vocab_test__
|
|
74
|
+
Hello, y'all! How are you 😁 ?我想在apple工作1314151天~
|
|
75
|
+
__ggml_vocab_test__
|
|
76
|
+
!!!!!!
|
|
77
|
+
__ggml_vocab_test__
|
|
78
|
+
3
|
|
79
|
+
__ggml_vocab_test__
|
|
80
|
+
33
|
|
81
|
+
__ggml_vocab_test__
|
|
82
|
+
333
|
|
83
|
+
__ggml_vocab_test__
|
|
84
|
+
3333
|
|
85
|
+
__ggml_vocab_test__
|
|
86
|
+
33333
|
|
87
|
+
__ggml_vocab_test__
|
|
88
|
+
333333
|
|
89
|
+
__ggml_vocab_test__
|
|
90
|
+
3333333
|
|
91
|
+
__ggml_vocab_test__
|
|
92
|
+
33333333
|
|
93
|
+
__ggml_vocab_test__
|
|
94
|
+
333333333
|
|
95
|
+
__ggml_vocab_test__
|
|
96
|
+
Cửa Việt
|
|
97
|
+
__ggml_vocab_test__
|
|
98
|
+
discards
|
|
99
|
+
__ggml_vocab_test__
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
🚀 (normal) 😶🌫️ (multiple emojis concatenated) ✅ 🦙🦙 3 33 333 3333 33333 333333 3333333 33333333 3.3 3..3 3...3 កាន់តែពិសេសអាច😁 ?我想在apple工作1314151天~ ------======= нещо на Български ''''''```````""""......!!!!!!?????? I've been 'told he's there, 'RE you sure? 'M not sure I'll make it, 'D you like some tea? We'Ve a'lL
|
|
112
|
+
__ggml_vocab_test__
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
1122 220 19 220 26062 3951
|
|
2
|
+
37 50753 261
|
|
3
|
+
|
|
4
|
+
220
|
|
5
|
+
256
|
|
6
|
+
262
|
|
7
|
+
197
|
|
8
|
+
198
|
|
9
|
+
271
|
|
10
|
+
1406
|
|
11
|
+
1572
|
|
12
|
+
9707 1879
|
|
13
|
+
21927 1879
|
|
14
|
+
9707 4337
|
|
15
|
+
21927 4337
|
|
16
|
+
21927 4337 0
|
|
17
|
+
9707 11 1879 0
|
|
18
|
+
21927 11 1879 0
|
|
19
|
+
419 374 11162 99 247 13 10821
|
|
20
|
+
86 15 19 23 220 22 83 1963 41808 11472 2940 16739
|
|
21
|
+
78762 14144 1456 13073 63471 33594 3038 133178 79012
|
|
22
|
+
146394 97529 241 44258 233 146568 44258 224 147603 20879 115 146280 44258 223 146280 147272 97529 227 147805 148301 147270 44258 223 146848
|
|
23
|
+
145836 320 8252 8 26525 114 378 235 149921 30543 320 35673 99066 97534 8 25521 227 320 3243 42365 429 702 1181 1828 3950 8
|
|
24
|
+
9707
|
|
25
|
+
21927
|
|
26
|
+
220 21927
|
|
27
|
+
256 21927
|
|
28
|
+
262 21927
|
|
29
|
+
262 21927 198 262 21927
|
|
30
|
+
320
|
|
31
|
+
198 284
|
|
32
|
+
6 11385
|
|
33
|
+
9707 11 379 64848 0 2585 525 498 26525 223 937 104100 18493 22377 99257 16 18 16 19 16 20 16 35727 21216
|
|
34
|
+
17085 2928
|
|
35
|
+
18
|
|
36
|
+
18 18
|
|
37
|
+
18 18 18
|
|
38
|
+
18 18 18 18
|
|
39
|
+
18 18 18 18 18
|
|
40
|
+
18 18 18 18 18 18
|
|
41
|
+
18 18 18 18 18 18 18
|
|
42
|
+
18 18 18 18 18 18 18 18
|
|
43
|
+
18 18 18 18 18 18 18 18 18
|
|
44
|
+
34 90063 128324
|
|
45
|
+
2560 2347
|
|
46
|
+
198 4710 14731 65497 7847 1572 2303 78672 10947 145836 320 8252 8 26525 114 378 235 149921 30543 320 35673 99066 97534 8 25521 227 11162 99 247 149955 220 18 220 18 18 220 18 18 18 220 18 18 18 18 220 18 18 18 18 18 220 18 18 18 18 18 18 220 18 18 18 18 18 18 18 220 18 18 18 18 18 18 18 18 220 18 13 18 220 18 496 18 220 18 1112 18 220 146394 97529 241 44258 233 146568 44258 224 147603 20879 115 146280 44258 223 146280 147272 97529 227 144534 937 104100 18493 22377 99257 16 18 16 19 16 20 16 35727 21216 55460 53237 18658 14144 1456 13073 63471 33594 3038 133178 79012 3355 4605 4605 13874 13874 73594 3014 3014 28149 17085 2928 26610 7646 358 3003 1012 364 83 813 566 594 1052 11 364 787 498 2704 30 364 44 537 2704 358 3278 1281 432 11 364 35 498 1075 1045 15243 30 1205 6 42612 264 63866 43
|
|
@@ -29,7 +29,7 @@ add_library(llama
|
|
|
29
29
|
unicode-data.cpp
|
|
30
30
|
)
|
|
31
31
|
|
|
32
|
-
target_include_directories(llama PUBLIC . ../include)
|
|
32
|
+
target_include_directories(llama PUBLIC . ../include ../common)
|
|
33
33
|
target_compile_features (llama PUBLIC cxx_std_17) # don't bump
|
|
34
34
|
|
|
35
35
|
target_link_libraries(llama PUBLIC ggml)
|
|
@@ -179,6 +179,7 @@ static const std::map<llm_kv, const char *> LLM_KV_NAMES = {
|
|
|
179
179
|
{ LLM_KV_TOKENIZER_HF_JSON, "tokenizer.huggingface.json" },
|
|
180
180
|
{ LLM_KV_TOKENIZER_RWKV, "tokenizer.rwkv.world" },
|
|
181
181
|
{ LLM_KV_TOKENIZER_CHAT_TEMPLATE, "tokenizer.chat_template" },
|
|
182
|
+
{ LLM_KV_TOKENIZER_CHAT_TEMPLATE_N, "tokenizer.chat_template.%s" },
|
|
182
183
|
{ LLM_KV_TOKENIZER_FIM_PRE_ID, "tokenizer.ggml.fim_pre_token_id" },
|
|
183
184
|
{ LLM_KV_TOKENIZER_FIM_SUF_ID, "tokenizer.ggml.fim_suf_token_id" },
|
|
184
185
|
{ LLM_KV_TOKENIZER_FIM_MID_ID, "tokenizer.ggml.fim_mid_token_id" },
|
|
@@ -1023,6 +1024,9 @@ static const std::map<llm_arch, std::map<llm_tensor, const char *>> LLM_TENSOR_N
|
|
|
1023
1024
|
{ LLM_TENSOR_OUTPUT, "output" },
|
|
1024
1025
|
{ LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
|
|
1025
1026
|
{ LLM_TENSOR_ATTN_QKV, "blk.%d.attn_qkv" },
|
|
1027
|
+
{ LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
|
|
1028
|
+
{ LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
|
|
1029
|
+
{ LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
|
|
1026
1030
|
{ LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
|
|
1027
1031
|
{ LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
|
|
1028
1032
|
{ LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
|
|
@@ -1443,10 +1447,11 @@ static const std::map<llm_tensor, llm_tensor_info> LLM_TENSOR_INFOS = {
|
|
|
1443
1447
|
{LLM_TENSOR_CONVNEXT_GAMMA, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
|
|
1444
1448
|
};
|
|
1445
1449
|
|
|
1446
|
-
LLM_KV::LLM_KV(llm_arch arch) : arch(arch) {}
|
|
1450
|
+
LLM_KV::LLM_KV(llm_arch arch, const char * suffix) : arch(arch), suffix(suffix) {}
|
|
1447
1451
|
|
|
1448
1452
|
std::string LLM_KV::operator()(llm_kv kv) const {
|
|
1449
|
-
return ::format(LLM_KV_NAMES.at(kv), LLM_ARCH_NAMES.at(arch))
|
|
1453
|
+
return suffix ? ::format(LLM_KV_NAMES.at(kv), LLM_ARCH_NAMES.at(arch), suffix)
|
|
1454
|
+
: ::format(LLM_KV_NAMES.at(kv), LLM_ARCH_NAMES.at(arch));
|
|
1450
1455
|
}
|
|
1451
1456
|
|
|
1452
1457
|
std::string LLM_TN_IMPL::str() const {
|
|
@@ -177,6 +177,7 @@ enum llm_kv {
|
|
|
177
177
|
LLM_KV_TOKENIZER_HF_JSON,
|
|
178
178
|
LLM_KV_TOKENIZER_RWKV,
|
|
179
179
|
LLM_KV_TOKENIZER_CHAT_TEMPLATE,
|
|
180
|
+
LLM_KV_TOKENIZER_CHAT_TEMPLATE_N,
|
|
180
181
|
LLM_KV_TOKENIZER_FIM_PRE_ID,
|
|
181
182
|
LLM_KV_TOKENIZER_FIM_SUF_ID,
|
|
182
183
|
LLM_KV_TOKENIZER_FIM_MID_ID,
|
|
@@ -335,9 +336,10 @@ enum llm_tensor_layer {
|
|
|
335
336
|
};
|
|
336
337
|
|
|
337
338
|
struct LLM_KV {
|
|
338
|
-
LLM_KV(llm_arch arch);
|
|
339
|
+
LLM_KV(llm_arch arch, const char * suffix = nullptr);
|
|
339
340
|
|
|
340
341
|
llm_arch arch;
|
|
342
|
+
const char * suffix;
|
|
341
343
|
|
|
342
344
|
std::string operator()(llm_kv kv) const;
|
|
343
345
|
};
|
|
@@ -51,6 +51,7 @@ static const std::map<std::string, llm_chat_template> LLM_CHAT_TEMPLATES = {
|
|
|
51
51
|
{ "llama3", LLM_CHAT_TEMPLATE_LLAMA_3 },
|
|
52
52
|
{ "chatglm3", LLM_CHAT_TEMPLATE_CHATGML_3 },
|
|
53
53
|
{ "chatglm4", LLM_CHAT_TEMPLATE_CHATGML_4 },
|
|
54
|
+
{ "glmedge", LLM_CHAT_TEMPLATE_GLMEDGE },
|
|
54
55
|
{ "minicpm", LLM_CHAT_TEMPLATE_MINICPM },
|
|
55
56
|
{ "exaone3", LLM_CHAT_TEMPLATE_EXAONE_3 },
|
|
56
57
|
{ "rwkv-world", LLM_CHAT_TEMPLATE_RWKV_WORLD },
|
|
@@ -115,7 +116,7 @@ llm_chat_template llm_chat_detect_template(const std::string & tmpl) {
|
|
|
115
116
|
} else if (tmpl_contains("<|assistant|>") && tmpl_contains("<|end|>")) {
|
|
116
117
|
return LLM_CHAT_TEMPLATE_PHI_3;
|
|
117
118
|
} else if (tmpl_contains("<|assistant|>") && tmpl_contains("<|user|>")) {
|
|
118
|
-
return LLM_CHAT_TEMPLATE_FALCON_3;
|
|
119
|
+
return tmpl_contains("</s>") ? LLM_CHAT_TEMPLATE_FALCON_3 : LLM_CHAT_TEMPLATE_GLMEDGE;
|
|
119
120
|
} else if (tmpl_contains("<|user|>") && tmpl_contains("<|endoftext|>")) {
|
|
120
121
|
return LLM_CHAT_TEMPLATE_ZEPHYR;
|
|
121
122
|
} else if (tmpl_contains("bos_token + message['role']")) {
|
|
@@ -152,7 +153,7 @@ llm_chat_template llm_chat_detect_template(const std::string & tmpl) {
|
|
|
152
153
|
return LLM_CHAT_TEMPLATE_MINICPM;
|
|
153
154
|
} else if (tmpl_contains("'Assistant: ' + message['content'] + eos_token")) {
|
|
154
155
|
return LLM_CHAT_TEMPLATE_DEEPSEEK_2;
|
|
155
|
-
} else if (tmpl_contains(LU8("
|
|
156
|
+
} else if (tmpl_contains(LU8("<|Assistant|>")) && tmpl_contains(LU8("<|User|>")) && tmpl_contains(LU8("<|end▁of▁sentence|>"))) {
|
|
156
157
|
return LLM_CHAT_TEMPLATE_DEEPSEEK_3;
|
|
157
158
|
} else if (tmpl_contains("[|system|]") && tmpl_contains("[|assistant|]") && tmpl_contains("[|endofturn|]")) {
|
|
158
159
|
// ref: https://huggingface.co/LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct/discussions/8#66bae61b1893d14ee8ed85bb
|
|
@@ -440,6 +441,14 @@ int32_t llm_chat_apply_template(
|
|
|
440
441
|
if (add_ass) {
|
|
441
442
|
ss << "<|assistant|>";
|
|
442
443
|
}
|
|
444
|
+
} else if (tmpl == LLM_CHAT_TEMPLATE_GLMEDGE) {
|
|
445
|
+
for (auto message : chat) {
|
|
446
|
+
std::string role(message->role);
|
|
447
|
+
ss << "<|" << role << "|>" << "\n" << message->content;
|
|
448
|
+
}
|
|
449
|
+
if (add_ass) {
|
|
450
|
+
ss << "<|assistant|>";
|
|
451
|
+
}
|
|
443
452
|
} else if (tmpl == LLM_CHAT_TEMPLATE_MINICPM) {
|
|
444
453
|
// MiniCPM-3B-OpenHermes-2.5-v2-GGUF
|
|
445
454
|
for (auto message : chat) {
|
|
@@ -560,7 +560,7 @@ bool llama_grammar_parser::parse(const char * src) {
|
|
|
560
560
|
}
|
|
561
561
|
}
|
|
562
562
|
} catch (const std::exception & err) {
|
|
563
|
-
fprintf(stderr, "%s: error parsing grammar: %s\n", __func__, err.what());
|
|
563
|
+
fprintf(stderr, "%s: error parsing grammar: %s\n\n%s\n", __func__, err.what(), src);
|
|
564
564
|
rules.clear();
|
|
565
565
|
return false;
|
|
566
566
|
}
|
|
@@ -960,10 +960,28 @@ struct llama_grammar * llama_grammar_init_impl(
|
|
|
960
960
|
// Important: vec_rules has to be moved here, not copied, because stacks contains
|
|
961
961
|
// pointers to elements of vec_rules. If vec_rules were copied into llama_grammar
|
|
962
962
|
// then the pointers would be invalidated when the local vec_rules goes out of scope.
|
|
963
|
-
return new llama_grammar {
|
|
963
|
+
return new llama_grammar {
|
|
964
|
+
vocab,
|
|
965
|
+
std::move(vec_rules),
|
|
966
|
+
std::move(stacks),
|
|
967
|
+
/* .partial_utf8 = */ {},
|
|
968
|
+
/* .lazy =*/ false,
|
|
969
|
+
/* .awaiting_trigger = */ false,
|
|
970
|
+
/* .trigger_buffer = */ "",
|
|
971
|
+
/* .trigger_tokens = */ {},
|
|
972
|
+
/* .trigger_words = */ {},
|
|
973
|
+
};
|
|
964
974
|
}
|
|
965
975
|
|
|
966
|
-
struct llama_grammar * llama_grammar_init_impl(
|
|
976
|
+
struct llama_grammar * llama_grammar_init_impl(
|
|
977
|
+
const struct llama_vocab * vocab,
|
|
978
|
+
const char * grammar_str,
|
|
979
|
+
const char * grammar_root,
|
|
980
|
+
bool lazy,
|
|
981
|
+
const char ** trigger_words,
|
|
982
|
+
size_t num_trigger_words,
|
|
983
|
+
const llama_token * trigger_tokens,
|
|
984
|
+
size_t num_trigger_tokens) {
|
|
967
985
|
llama_grammar_parser parser;
|
|
968
986
|
|
|
969
987
|
// if there is a grammar, parse it
|
|
@@ -1035,10 +1053,31 @@ struct llama_grammar * llama_grammar_init_impl(const struct llama_vocab * vocab,
|
|
|
1035
1053
|
}
|
|
1036
1054
|
} while (true);
|
|
1037
1055
|
|
|
1056
|
+
std::vector<llama_token> vec_trigger_tokens;
|
|
1057
|
+
std::vector<std::string> vec_trigger_words;
|
|
1058
|
+
for (size_t i = 0; i < num_trigger_tokens; i++) {
|
|
1059
|
+
GGML_ASSERT(trigger_tokens != nullptr);
|
|
1060
|
+
vec_trigger_tokens.push_back(trigger_tokens[i]);
|
|
1061
|
+
}
|
|
1062
|
+
for (size_t i = 0; i < num_trigger_words; i++) {
|
|
1063
|
+
GGML_ASSERT(trigger_words != nullptr);
|
|
1064
|
+
vec_trigger_words.push_back(trigger_words[i]);
|
|
1065
|
+
}
|
|
1066
|
+
|
|
1038
1067
|
// Important: vec_rules has to be moved here, not copied, because stacks contains
|
|
1039
1068
|
// pointers to elements of vec_rules. If vec_rules were copied into llama_grammar
|
|
1040
1069
|
// then the pointers would be invalidated when the local vec_rules goes out of scope.
|
|
1041
|
-
return new llama_grammar {
|
|
1070
|
+
return new llama_grammar {
|
|
1071
|
+
vocab,
|
|
1072
|
+
std::move(vec_rules),
|
|
1073
|
+
std::move(stacks),
|
|
1074
|
+
/* .partial_utf8 = */ {},
|
|
1075
|
+
/* .lazy = */ lazy,
|
|
1076
|
+
/* .awaiting_trigger = */ lazy,
|
|
1077
|
+
/* .trigger_buffer = */ "",
|
|
1078
|
+
std::move(vec_trigger_tokens),
|
|
1079
|
+
std::move(vec_trigger_words),
|
|
1080
|
+
};
|
|
1042
1081
|
}
|
|
1043
1082
|
|
|
1044
1083
|
void llama_grammar_free_impl(struct llama_grammar * grammar) {
|
|
@@ -1055,6 +1094,11 @@ struct llama_grammar * llama_grammar_clone_impl(const struct llama_grammar & gra
|
|
|
1055
1094
|
grammar.rules,
|
|
1056
1095
|
grammar.stacks,
|
|
1057
1096
|
grammar.partial_utf8,
|
|
1097
|
+
grammar.lazy,
|
|
1098
|
+
grammar.awaiting_trigger,
|
|
1099
|
+
grammar.trigger_buffer,
|
|
1100
|
+
grammar.trigger_tokens,
|
|
1101
|
+
grammar.trigger_words,
|
|
1058
1102
|
};
|
|
1059
1103
|
|
|
1060
1104
|
// redirect elements in stacks to point to new rules
|
|
@@ -1076,6 +1120,10 @@ struct llama_grammar * llama_grammar_clone_impl(const struct llama_grammar & gra
|
|
|
1076
1120
|
void llama_grammar_apply_impl(const struct llama_grammar & grammar, llama_token_data_array * cur_p) {
|
|
1077
1121
|
GGML_ASSERT(grammar.vocab != nullptr);
|
|
1078
1122
|
|
|
1123
|
+
if (grammar.awaiting_trigger) {
|
|
1124
|
+
return;
|
|
1125
|
+
}
|
|
1126
|
+
|
|
1079
1127
|
bool allow_eog = false;
|
|
1080
1128
|
for (const auto & stack : grammar.stacks) {
|
|
1081
1129
|
if (stack.empty()) {
|
|
@@ -1115,6 +1163,34 @@ void llama_grammar_apply_impl(const struct llama_grammar & grammar, llama_token_
|
|
|
1115
1163
|
void llama_grammar_accept_impl(struct llama_grammar & grammar, llama_token token) {
|
|
1116
1164
|
GGML_ASSERT(grammar.vocab != nullptr);
|
|
1117
1165
|
|
|
1166
|
+
const auto & piece = grammar.vocab->token_to_piece(token);
|
|
1167
|
+
|
|
1168
|
+
if (grammar.awaiting_trigger) {
|
|
1169
|
+
if (std::find(grammar.trigger_tokens.begin(), grammar.trigger_tokens.end(), token) != grammar.trigger_tokens.end()) {
|
|
1170
|
+
grammar.awaiting_trigger = false;
|
|
1171
|
+
grammar.trigger_buffer.clear();
|
|
1172
|
+
llama_grammar_accept_str(grammar, piece);
|
|
1173
|
+
LLAMA_LOG_DEBUG("Grammar triggered on token %u (`%s`)", token, piece.c_str());
|
|
1174
|
+
return;
|
|
1175
|
+
} else {
|
|
1176
|
+
// TODO: consider a smarter incremental substring search algorithm (store last position to search from).
|
|
1177
|
+
grammar.trigger_buffer += piece;
|
|
1178
|
+
for (const auto & word : grammar.trigger_words) {
|
|
1179
|
+
auto pos = grammar.trigger_buffer.find(word);
|
|
1180
|
+
if (pos != std::string::npos) {
|
|
1181
|
+
grammar.awaiting_trigger = false;
|
|
1182
|
+
auto constrained_str = grammar.trigger_buffer.substr(pos);
|
|
1183
|
+
grammar.trigger_buffer.clear();
|
|
1184
|
+
llama_grammar_accept_str(grammar, constrained_str);
|
|
1185
|
+
LLAMA_LOG_DEBUG("Grammar triggered on word `%s`", word.c_str());
|
|
1186
|
+
return;
|
|
1187
|
+
}
|
|
1188
|
+
}
|
|
1189
|
+
LLAMA_LOG_DEBUG("Grammar still awaiting trigger after token %d (`%s`) (buffer: `%s`)\n", token, piece.c_str(), grammar.trigger_buffer.c_str());
|
|
1190
|
+
return;
|
|
1191
|
+
}
|
|
1192
|
+
}
|
|
1193
|
+
|
|
1118
1194
|
if (grammar.vocab->is_eog(token)) {
|
|
1119
1195
|
for (const auto & stack : grammar.stacks) {
|
|
1120
1196
|
if (stack.empty()) {
|
|
@@ -1124,8 +1200,10 @@ void llama_grammar_accept_impl(struct llama_grammar & grammar, llama_token token
|
|
|
1124
1200
|
GGML_ABORT("fatal error");
|
|
1125
1201
|
}
|
|
1126
1202
|
|
|
1127
|
-
|
|
1203
|
+
llama_grammar_accept_str(grammar, piece);
|
|
1204
|
+
}
|
|
1128
1205
|
|
|
1206
|
+
void llama_grammar_accept_str(struct llama_grammar & grammar, const std::string & piece) {
|
|
1129
1207
|
// Note terminating 0 in decoded string
|
|
1130
1208
|
const auto decoded = decode_utf8(piece, grammar.partial_utf8);
|
|
1131
1209
|
const auto & code_points = decoded.first;
|
|
@@ -1135,5 +1213,7 @@ void llama_grammar_accept_impl(struct llama_grammar & grammar, llama_token token
|
|
|
1135
1213
|
}
|
|
1136
1214
|
|
|
1137
1215
|
grammar.partial_utf8 = decoded.second;
|
|
1138
|
-
|
|
1216
|
+
if (grammar.stacks.empty()) {
|
|
1217
|
+
throw std::runtime_error("Unexpected empty grammar stack after accepting piece: " + piece);
|
|
1218
|
+
}
|
|
1139
1219
|
}
|
|
@@ -114,6 +114,15 @@ struct llama_grammar {
|
|
|
114
114
|
|
|
115
115
|
// buffer for partially generated UTF-8 sequence from accepted tokens
|
|
116
116
|
llama_partial_utf8 partial_utf8;
|
|
117
|
+
|
|
118
|
+
// lazy grammars wait for trigger words or tokens before constraining the sampling.
|
|
119
|
+
// we still ahve trigger_tokens for non-lazy grammars to force printing of special trigger tokens.
|
|
120
|
+
// (useful e.g. for tool_choice=required)
|
|
121
|
+
bool lazy = false;
|
|
122
|
+
bool awaiting_trigger = false; // Initialized to true for lazy grammars only
|
|
123
|
+
std::string trigger_buffer; // Output buffered by lazy grammar. Will be cleared once trigger is found.
|
|
124
|
+
std::vector<llama_token> trigger_tokens; // Tokens that trigger a lazy grammar, or tokens to force printing of (even if special).
|
|
125
|
+
std::vector<std::string> trigger_words;
|
|
117
126
|
};
|
|
118
127
|
|
|
119
128
|
//
|
|
@@ -127,7 +136,15 @@ struct llama_grammar * llama_grammar_init_impl(
|
|
|
127
136
|
size_t n_rules,
|
|
128
137
|
size_t start_rule_index);
|
|
129
138
|
|
|
130
|
-
struct llama_grammar * llama_grammar_init_impl(
|
|
139
|
+
struct llama_grammar * llama_grammar_init_impl(
|
|
140
|
+
const struct llama_vocab * vocab,
|
|
141
|
+
const char * grammar_str,
|
|
142
|
+
const char * grammar_root,
|
|
143
|
+
bool lazy,
|
|
144
|
+
const char ** trigger_words,
|
|
145
|
+
size_t num_trigger_words,
|
|
146
|
+
const llama_token * trigger_tokens,
|
|
147
|
+
size_t num_trigger_tokens);
|
|
131
148
|
|
|
132
149
|
void llama_grammar_free_impl(struct llama_grammar * grammar);
|
|
133
150
|
|
|
@@ -141,3 +158,7 @@ void llama_grammar_apply_impl(
|
|
|
141
158
|
void llama_grammar_accept_impl(
|
|
142
159
|
struct llama_grammar & grammar,
|
|
143
160
|
llama_token token);
|
|
161
|
+
|
|
162
|
+
void llama_grammar_accept_str(
|
|
163
|
+
struct llama_grammar & grammar,
|
|
164
|
+
const std::string & piece);
|
|
@@ -819,7 +819,7 @@ void llama_model_loader::init_mappings(bool prefetch, llama_mlocks * mlock_mmaps
|
|
|
819
819
|
for (const auto & file : files) {
|
|
820
820
|
auto * reg = ggml_backend_dev_backend_reg(ggml_backend_dev_by_type(GGML_BACKEND_DEVICE_TYPE_CPU));
|
|
821
821
|
auto * is_numa_fn = (decltype(ggml_is_numa) *) ggml_backend_reg_get_proc_address(reg, "ggml_backend_cpu_is_numa");
|
|
822
|
-
std::unique_ptr<llama_mmap> mapping
|
|
822
|
+
std::unique_ptr<llama_mmap> mapping = std::make_unique<llama_mmap>(file.get(), prefetch ? -1 : 0, is_numa_fn());
|
|
823
823
|
mmaps_used.emplace_back(mapping->size(), 0);
|
|
824
824
|
if (mlock_mmaps) {
|
|
825
825
|
std::unique_ptr<llama_mlock> mlock_mmap(new llama_mlock());
|