@fugood/llama.node 0.3.11 → 0.3.13
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/darwin/arm64/llama-node.node +0 -0
- package/bin/darwin/x64/llama-node.node +0 -0
- package/bin/linux/arm64/llama-node.node +0 -0
- package/bin/linux/x64/llama-node.node +0 -0
- package/bin/linux-cuda/arm64/llama-node.node +0 -0
- package/bin/linux-cuda/x64/llama-node.node +0 -0
- package/bin/linux-vulkan/arm64/llama-node.node +0 -0
- package/bin/linux-vulkan/x64/llama-node.node +0 -0
- package/bin/win32/arm64/llama-node.node +0 -0
- package/bin/win32/arm64/node.lib +0 -0
- package/bin/win32/x64/llama-node.node +0 -0
- package/bin/win32/x64/node.lib +0 -0
- package/bin/win32-vulkan/arm64/llama-node.node +0 -0
- package/bin/win32-vulkan/arm64/node.lib +0 -0
- package/bin/win32-vulkan/x64/llama-node.node +0 -0
- package/bin/win32-vulkan/x64/node.lib +0 -0
- package/lib/binding.ts +1 -0
- package/lib/index.js +26 -20
- package/lib/index.ts +32 -28
- package/package.json +1 -1
- package/src/LlamaCompletionWorker.cpp +14 -0
- package/src/LlamaContext.cpp +13 -4
- package/src/llama.cpp/.github/workflows/build.yml +35 -3
- package/src/llama.cpp/.github/workflows/docker.yml +2 -0
- package/src/llama.cpp/.github/workflows/labeler.yml +1 -1
- package/src/llama.cpp/common/CMakeLists.txt +20 -3
- package/src/llama.cpp/common/arg.cpp +180 -3
- package/src/llama.cpp/common/chat-template.hpp +21 -7
- package/src/llama.cpp/common/chat.cpp +220 -101
- package/src/llama.cpp/common/chat.hpp +3 -0
- package/src/llama.cpp/common/common.h +15 -7
- package/src/llama.cpp/common/llguidance.cpp +3 -3
- package/src/llama.cpp/common/log.cpp +1 -0
- package/src/llama.cpp/common/log.h +2 -1
- package/src/llama.cpp/common/minja.hpp +24 -9
- package/src/llama.cpp/common/sampling.cpp +52 -46
- package/src/llama.cpp/common/speculative.h +1 -1
- package/src/llama.cpp/docs/build.md +2 -2
- package/src/llama.cpp/examples/imatrix/imatrix.cpp +2 -1
- package/src/llama.cpp/examples/llama-bench/llama-bench.cpp +6 -5
- package/src/llama.cpp/examples/llama.android/llama/src/main/cpp/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/perplexity/perplexity.cpp +1 -0
- package/src/llama.cpp/examples/run/run.cpp +5 -12
- package/src/llama.cpp/examples/server/CMakeLists.txt +1 -1
- package/src/llama.cpp/examples/server/httplib.h +381 -292
- package/src/llama.cpp/examples/server/server.cpp +58 -47
- package/src/llama.cpp/examples/server/utils.hpp +7 -5
- package/src/llama.cpp/ggml/include/ggml-cpu.h +1 -1
- package/src/llama.cpp/ggml/include/ggml-metal.h +1 -1
- package/src/llama.cpp/ggml/include/ggml-vulkan.h +0 -2
- package/src/llama.cpp/ggml/include/ggml.h +1 -1
- package/src/llama.cpp/ggml/src/ggml-common.h +0 -2
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-impl.h +6 -12
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.c +852 -268
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +200 -107
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +2 -5
- package/src/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm.cpp +9 -8
- package/src/llama.cpp/ggml/src/ggml-cuda/CMakeLists.txt +2 -2
- package/src/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +26 -4
- package/src/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +6 -7
- package/src/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +812 -569
- package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +25 -1
- package/src/llama.cpp/ggml/src/ggml.c +1 -1
- package/src/llama.cpp/include/llama.h +14 -10
- package/src/llama.cpp/src/llama-grammar.cpp +1 -1
- package/src/llama.cpp/src/llama-grammar.h +1 -1
- package/src/llama.cpp/src/llama-impl.h +6 -6
- package/src/llama.cpp/src/llama-kv-cache.h +1 -1
- package/src/llama.cpp/src/llama-mmap.h +1 -0
- package/src/llama.cpp/src/llama-model.cpp +1 -1
- package/src/llama.cpp/src/llama-sampling.cpp +131 -57
- package/src/llama.cpp/src/llama.cpp +7 -5
- package/src/llama.cpp/src/unicode.cpp +9 -2
- package/src/llama.cpp/tests/test-backend-ops.cpp +5 -5
- package/src/llama.cpp/tests/test-chat.cpp +237 -69
- package/src/llama.cpp/tests/test-gguf.cpp +4 -4
- package/src/llama.cpp/tests/test-sampling.cpp +15 -0
|
@@ -55,6 +55,8 @@ const std::vector<std::string> type_names = {
|
|
|
55
55
|
"q4_k",
|
|
56
56
|
"q5_k",
|
|
57
57
|
"q6_k",
|
|
58
|
+
"iq1_s",
|
|
59
|
+
"iq1_m",
|
|
58
60
|
"iq2_xxs",
|
|
59
61
|
"iq2_xs",
|
|
60
62
|
"iq2_s",
|
|
@@ -182,6 +184,13 @@ std::string to_uppercase(const std::string& input) {
|
|
|
182
184
|
return result;
|
|
183
185
|
}
|
|
184
186
|
|
|
187
|
+
bool string_starts_with(const std::string& str, const std::string& prefix) {
|
|
188
|
+
if (prefix.size() > str.size()) {
|
|
189
|
+
return false;
|
|
190
|
+
}
|
|
191
|
+
return std::equal(prefix.begin(), prefix.end(), str.begin());
|
|
192
|
+
}
|
|
193
|
+
|
|
185
194
|
bool string_ends_with(const std::string& str, const std::string& suffix) {
|
|
186
195
|
if (suffix.size() > str.size()) {
|
|
187
196
|
return false;
|
|
@@ -387,7 +396,7 @@ void process_shaders() {
|
|
|
387
396
|
for (const auto& tname : type_names) {
|
|
388
397
|
// mul mat vec
|
|
389
398
|
std::string data_a_key = "DATA_A_" + to_uppercase(tname);
|
|
390
|
-
std::string shader = (string_ends_with(tname, "_k")) ? "mul_mat_vec_" + tname + ".comp" : "mul_mat_vec.comp";
|
|
399
|
+
std::string shader = (string_ends_with(tname, "_k") || string_starts_with(tname, "iq1_")) ? "mul_mat_vec_" + tname + ".comp" : "mul_mat_vec.comp";
|
|
391
400
|
|
|
392
401
|
string_to_spv("mul_mat_vec_" + tname + "_f32_f32", shader, merge_maps(base_dict, {{data_a_key, "1"}, {"B_TYPE", "float"}, {"B_TYPE_VEC2", "vec2"}, {"B_TYPE_VEC4", "vec4"}, {"D_TYPE", "float"}}));
|
|
393
402
|
string_to_spv("mul_mat_vec_" + tname + "_f16_f32", shader, merge_maps(base_dict, {{data_a_key, "1"}, {"B_TYPE", "float16_t"}, {"B_TYPE_VEC2", "f16vec2"}, {"B_TYPE_VEC4", "f16vec4"}, {"D_TYPE", "float"}}));
|
|
@@ -434,6 +443,8 @@ void process_shaders() {
|
|
|
434
443
|
string_to_spv("add_f32", "add.comp", {{"A_TYPE", "float"}, {"B_TYPE", "float"}, {"D_TYPE", "float"}, {"FLOAT_TYPE", "float"}});
|
|
435
444
|
string_to_spv("add_f16_f32_f16", "add.comp", {{"A_TYPE", "float16_t"}, {"B_TYPE", "float"}, {"D_TYPE", "float16_t"}, {"FLOAT_TYPE", "float"}});
|
|
436
445
|
|
|
446
|
+
string_to_spv("sub_f32", "sub.comp", {{"A_TYPE", "float"}, {"B_TYPE", "float"}, {"D_TYPE", "float"}, {"FLOAT_TYPE", "float"}});
|
|
447
|
+
|
|
437
448
|
string_to_spv("acc_f32", "acc.comp", {{"A_TYPE", "float"}, {"B_TYPE", "float"}, {"D_TYPE", "float"}, {"FLOAT_TYPE", "float"}});
|
|
438
449
|
|
|
439
450
|
string_to_spv("split_k_reduce", "mul_mat_split_k_reduce.comp", {});
|
|
@@ -443,6 +454,7 @@ void process_shaders() {
|
|
|
443
454
|
string_to_spv("div_f32", "div.comp", {{"A_TYPE", "float"}, {"B_TYPE", "float"}, {"D_TYPE", "float"}, {"FLOAT_TYPE", "float"}});
|
|
444
455
|
|
|
445
456
|
string_to_spv("repeat_f32", "repeat.comp", {{"A_TYPE", "float"}, {"D_TYPE", "float"}});
|
|
457
|
+
string_to_spv("repeat_back_f32", "repeat_back.comp", {{"A_TYPE", "float"}, {"D_TYPE", "float"}});
|
|
446
458
|
|
|
447
459
|
string_to_spv("scale_f32", "scale.comp", {{"A_TYPE", "float"}, {"D_TYPE", "float"}, {"FLOAT_TYPE", "float"}});
|
|
448
460
|
|
|
@@ -482,9 +494,19 @@ void process_shaders() {
|
|
|
482
494
|
string_to_spv("rope_neox_f16", "rope_neox.comp", {{"A_TYPE", "float16_t"}, {"D_TYPE", "float16_t"}});
|
|
483
495
|
string_to_spv("rope_neox_f16_rte", "rope_neox.comp", {{"A_TYPE", "float16_t"}, {"D_TYPE", "float16_t"}, {"RTE16", "1"}});
|
|
484
496
|
|
|
497
|
+
string_to_spv("rope_multi_f32", "rope_multi.comp", {{"A_TYPE", "float"}, {"D_TYPE", "float"}});
|
|
498
|
+
string_to_spv("rope_multi_f16", "rope_multi.comp", {{"A_TYPE", "float16_t"}, {"D_TYPE", "float16_t"}});
|
|
499
|
+
string_to_spv("rope_multi_f16_rte", "rope_multi.comp", {{"A_TYPE", "float16_t"}, {"D_TYPE", "float16_t"}, {"RTE16", "1"}});
|
|
500
|
+
|
|
501
|
+
string_to_spv("rope_vision_f32", "rope_vision.comp", {{"A_TYPE", "float"}, {"D_TYPE", "float"}});
|
|
502
|
+
string_to_spv("rope_vision_f16", "rope_vision.comp", {{"A_TYPE", "float16_t"}, {"D_TYPE", "float16_t"}});
|
|
503
|
+
string_to_spv("rope_vision_f16_rte", "rope_vision.comp", {{"A_TYPE", "float16_t"}, {"D_TYPE", "float16_t"}, {"RTE16", "1"}});
|
|
504
|
+
|
|
485
505
|
string_to_spv("argsort_f32", "argsort.comp", {{"A_TYPE", "float"}});
|
|
486
506
|
|
|
507
|
+
string_to_spv("argmax_f32", "argmax.comp", merge_maps(base_dict, {{"A_TYPE", "float"}, {"D_TYPE", "int"}}));
|
|
487
508
|
string_to_spv("sum_rows_f32", "sum_rows.comp", merge_maps(base_dict, {{"A_TYPE", "float"}, {"D_TYPE", "float"}}));
|
|
509
|
+
string_to_spv("count_equal_i32", "count_equal.comp", merge_maps(base_dict, {{"A_TYPE", "int"}, {"B_TYPE", "int"}, {"D_TYPE", "int"}}));
|
|
488
510
|
|
|
489
511
|
string_to_spv("im2col_f32", "im2col.comp", merge_maps(base_dict, {{"A_TYPE", "float"}, {"D_TYPE", "float"}}));
|
|
490
512
|
string_to_spv("im2col_f32_f16", "im2col.comp", merge_maps(base_dict, {{"A_TYPE", "float"}, {"D_TYPE", "float16_t"}}));
|
|
@@ -496,6 +518,8 @@ void process_shaders() {
|
|
|
496
518
|
|
|
497
519
|
string_to_spv("rwkv_wkv6_f32", "wkv6.comp", merge_maps(base_dict, {{"A_TYPE", "float"}}));
|
|
498
520
|
|
|
521
|
+
string_to_spv("opt_step_adamw_f32", "opt_step_adamw.comp", merge_maps(base_dict, {{"A_TYPE", "float"}}));
|
|
522
|
+
|
|
499
523
|
for (auto &c : compiles) {
|
|
500
524
|
c.wait();
|
|
501
525
|
}
|
|
@@ -1379,7 +1379,7 @@ bool ggml_are_same_stride(const struct ggml_tensor * t0, const struct ggml_tenso
|
|
|
1379
1379
|
(t0->nb[3] == t1->nb[3]);
|
|
1380
1380
|
}
|
|
1381
1381
|
|
|
1382
|
-
// check if t1 can be represented as a
|
|
1382
|
+
// check if t1 can be represented as a repetition of t0
|
|
1383
1383
|
bool ggml_can_repeat(const struct ggml_tensor * t0, const struct ggml_tensor * t1) {
|
|
1384
1384
|
static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
|
|
1385
1385
|
|
|
@@ -213,7 +213,7 @@ extern "C" {
|
|
|
213
213
|
LLAMA_SPLIT_MODE_ROW = 2, // split layers and KV across GPUs, use tensor parallelism if supported
|
|
214
214
|
};
|
|
215
215
|
|
|
216
|
-
// TODO: simplify (https://github.com/
|
|
216
|
+
// TODO: simplify (https://github.com/ggml-org/llama.cpp/pull/9294#pullrequestreview-2286561979)
|
|
217
217
|
typedef struct llama_token_data {
|
|
218
218
|
llama_token id; // token id
|
|
219
219
|
float logit; // log-odds of the token
|
|
@@ -307,7 +307,7 @@ extern "C" {
|
|
|
307
307
|
};
|
|
308
308
|
|
|
309
309
|
// NOTE: changing the default values of parameters marked as [EXPERIMENTAL] may cause crashes or incorrect results in certain configurations
|
|
310
|
-
// https://github.com/
|
|
310
|
+
// https://github.com/ggml-org/llama.cpp/pull/7544
|
|
311
311
|
struct llama_context_params {
|
|
312
312
|
uint32_t n_ctx; // text context, 0 = from model
|
|
313
313
|
uint32_t n_batch; // logical maximum batch size that can be submitted to llama_decode
|
|
@@ -320,7 +320,7 @@ extern "C" {
|
|
|
320
320
|
enum llama_pooling_type pooling_type; // whether to pool (sum) embedding results by sequence id
|
|
321
321
|
enum llama_attention_type attention_type; // attention type to use for embeddings
|
|
322
322
|
|
|
323
|
-
// ref: https://github.com/
|
|
323
|
+
// ref: https://github.com/ggml-org/llama.cpp/pull/2054
|
|
324
324
|
float rope_freq_base; // RoPE base frequency, 0 = from model
|
|
325
325
|
float rope_freq_scale; // RoPE frequency scaling factor, 0 = from model
|
|
326
326
|
float yarn_ext_factor; // YaRN extrapolation mix factor, negative = from model
|
|
@@ -385,7 +385,7 @@ extern "C" {
|
|
|
385
385
|
struct llama_adapter_lora;
|
|
386
386
|
|
|
387
387
|
// Helpers for getting default parameters
|
|
388
|
-
// TODO: update API to start accepting pointers to params structs (https://github.com/
|
|
388
|
+
// TODO: update API to start accepting pointers to params structs (https://github.com/ggml-org/llama.cpp/discussions/9172)
|
|
389
389
|
LLAMA_API struct llama_model_params llama_model_default_params(void);
|
|
390
390
|
LLAMA_API struct llama_context_params llama_context_default_params(void);
|
|
391
391
|
LLAMA_API struct llama_sampler_chain_params llama_sampler_chain_default_params(void);
|
|
@@ -1040,7 +1040,7 @@ extern "C" {
|
|
|
1040
1040
|
|
|
1041
1041
|
/// Apply chat template. Inspired by hf apply_chat_template() on python.
|
|
1042
1042
|
/// Both "model" and "custom_template" are optional, but at least one is required. "custom_template" has higher precedence than "model"
|
|
1043
|
-
/// NOTE: This function does not use a jinja parser. It only support a pre-defined list of template. See more: https://github.com/
|
|
1043
|
+
/// NOTE: This function does not use a jinja parser. It only support a pre-defined list of template. See more: https://github.com/ggml-org/llama.cpp/wiki/Templates-supported-by-llama_chat_apply_template
|
|
1044
1044
|
/// @param tmpl A Jinja template to use for this chat. If this is nullptr, the model’s default chat template will be used instead.
|
|
1045
1045
|
/// @param chat Pointer to a list of multiple llama_chat_message
|
|
1046
1046
|
/// @param n_msg Number of llama_chat_message in this chat
|
|
@@ -1114,11 +1114,12 @@ extern "C" {
|
|
|
1114
1114
|
};
|
|
1115
1115
|
|
|
1116
1116
|
struct llama_sampler {
|
|
1117
|
-
struct llama_sampler_i
|
|
1118
|
-
llama_sampler_context_t
|
|
1117
|
+
const struct llama_sampler_i * iface;
|
|
1118
|
+
llama_sampler_context_t ctx;
|
|
1119
1119
|
};
|
|
1120
1120
|
|
|
1121
1121
|
// mirror of llama_sampler_i:
|
|
1122
|
+
LLAMA_API struct llama_sampler * llama_sampler_init (const struct llama_sampler_i * iface, llama_sampler_context_t ctx);
|
|
1122
1123
|
LLAMA_API const char * llama_sampler_name (const struct llama_sampler * smpl);
|
|
1123
1124
|
LLAMA_API void llama_sampler_accept( struct llama_sampler * smpl, llama_token token);
|
|
1124
1125
|
LLAMA_API void llama_sampler_apply ( struct llama_sampler * smpl, llama_token_data_array * cur_p);
|
|
@@ -1148,7 +1149,7 @@ extern "C" {
|
|
|
1148
1149
|
/// @details Sorts candidate tokens by their logits in descending order and calculate probabilities based on logits.
|
|
1149
1150
|
/// NOTE: Avoid using on the full vocabulary as the sorting can become slow. For example, apply top-k or top-p sampling first.
|
|
1150
1151
|
DEPRECATED(LLAMA_API struct llama_sampler * llama_sampler_init_softmax (void),
|
|
1151
|
-
"will be removed in the future (see https://github.com/
|
|
1152
|
+
"will be removed in the future (see https://github.com/ggml-org/llama.cpp/pull/9896#discussion_r1800920915)");
|
|
1152
1153
|
|
|
1153
1154
|
/// @details Top-K sampling described in academic paper "The Curious Case of Neural Text Degeneration" https://arxiv.org/abs/1904.09751
|
|
1154
1155
|
LLAMA_API struct llama_sampler * llama_sampler_init_top_k (int32_t k);
|
|
@@ -1156,7 +1157,7 @@ extern "C" {
|
|
|
1156
1157
|
/// @details Nucleus sampling described in academic paper "The Curious Case of Neural Text Degeneration" https://arxiv.org/abs/1904.09751
|
|
1157
1158
|
LLAMA_API struct llama_sampler * llama_sampler_init_top_p (float p, size_t min_keep);
|
|
1158
1159
|
|
|
1159
|
-
/// @details Minimum P sampling as described in https://github.com/
|
|
1160
|
+
/// @details Minimum P sampling as described in https://github.com/ggml-org/llama.cpp/pull/3841
|
|
1160
1161
|
LLAMA_API struct llama_sampler * llama_sampler_init_min_p (float p, size_t min_keep);
|
|
1161
1162
|
|
|
1162
1163
|
/// @details Locally Typical Sampling implementation described in the paper https://arxiv.org/abs/2202.00666.
|
|
@@ -1171,6 +1172,9 @@ extern "C" {
|
|
|
1171
1172
|
/// @details XTC sampler as described in https://github.com/oobabooga/text-generation-webui/pull/6335
|
|
1172
1173
|
LLAMA_API struct llama_sampler * llama_sampler_init_xtc (float p, float t, size_t min_keep, uint32_t seed);
|
|
1173
1174
|
|
|
1175
|
+
/// @details Top n sigma sampling as described in academic paper "Top-nσ: Not All Logits Are You Need" https://arxiv.org/pdf/2411.07641
|
|
1176
|
+
LLAMA_API struct llama_sampler * llama_sampler_init_top_n_sigma(float n);
|
|
1177
|
+
|
|
1174
1178
|
/// @details Mirostat 1.0 algorithm described in the paper https://arxiv.org/abs/2007.14966. Uses tokens instead of words.
|
|
1175
1179
|
/// @param candidates A vector of `llama_token_data` containing the candidate tokens, their probabilities (p), and log-odds (logit) for the current position in the generated text.
|
|
1176
1180
|
/// @param tau The target cross-entropy (or surprise) value you want to achieve for the generated text. A higher value corresponds to more surprising or less predictable text, while a lower value corresponds to less surprising or more predictable text.
|
|
@@ -1199,7 +1203,7 @@ extern "C" {
|
|
|
1199
1203
|
const char * grammar_str,
|
|
1200
1204
|
const char * grammar_root);
|
|
1201
1205
|
|
|
1202
|
-
/// @details Lazy grammar sampler, introduced in https://github.com/
|
|
1206
|
+
/// @details Lazy grammar sampler, introduced in https://github.com/ggml-org/llama.cpp/pull/9639
|
|
1203
1207
|
/// @param trigger_words A list of words that will trigger the grammar sampler. This may be updated to a loose regex syntax (w/ ^) in a near future.
|
|
1204
1208
|
/// @param trigger_tokens A list of tokens that will trigger the grammar sampler.
|
|
1205
1209
|
LLAMA_API struct llama_sampler * llama_sampler_init_grammar_lazy(
|
|
@@ -1186,7 +1186,7 @@ void llama_grammar_accept_impl(struct llama_grammar & grammar, llama_token token
|
|
|
1186
1186
|
return;
|
|
1187
1187
|
}
|
|
1188
1188
|
}
|
|
1189
|
-
LLAMA_LOG_DEBUG("Grammar still awaiting trigger after token %d (`%s`)
|
|
1189
|
+
LLAMA_LOG_DEBUG("Grammar still awaiting trigger after token %d (`%s`)\n", token, piece.c_str());
|
|
1190
1190
|
return;
|
|
1191
1191
|
}
|
|
1192
1192
|
}
|
|
@@ -116,7 +116,7 @@ struct llama_grammar {
|
|
|
116
116
|
llama_partial_utf8 partial_utf8;
|
|
117
117
|
|
|
118
118
|
// lazy grammars wait for trigger words or tokens before constraining the sampling.
|
|
119
|
-
// we still
|
|
119
|
+
// we still have trigger_tokens for non-lazy grammars to force printing of special trigger tokens.
|
|
120
120
|
// (useful e.g. for tool_choice=required)
|
|
121
121
|
bool lazy = false;
|
|
122
122
|
bool awaiting_trigger = false; // Initialized to true for lazy grammars only
|
|
@@ -6,13 +6,13 @@
|
|
|
6
6
|
#include <vector>
|
|
7
7
|
|
|
8
8
|
#ifdef __GNUC__
|
|
9
|
-
#
|
|
10
|
-
#define LLAMA_ATTRIBUTE_FORMAT(...) __attribute__((format(gnu_printf, __VA_ARGS__)))
|
|
9
|
+
# if defined(__MINGW32__) && !defined(__clang__)
|
|
10
|
+
# define LLAMA_ATTRIBUTE_FORMAT(...) __attribute__((format(gnu_printf, __VA_ARGS__)))
|
|
11
|
+
# else
|
|
12
|
+
# define LLAMA_ATTRIBUTE_FORMAT(...) __attribute__((format(printf, __VA_ARGS__)))
|
|
13
|
+
# endif
|
|
11
14
|
#else
|
|
12
|
-
#define LLAMA_ATTRIBUTE_FORMAT(...)
|
|
13
|
-
#endif
|
|
14
|
-
#else
|
|
15
|
-
#define LLAMA_ATTRIBUTE_FORMAT(...)
|
|
15
|
+
# define LLAMA_ATTRIBUTE_FORMAT(...)
|
|
16
16
|
#endif
|
|
17
17
|
|
|
18
18
|
//
|
|
@@ -37,7 +37,7 @@ struct llama_kv_cache {
|
|
|
37
37
|
bool can_shift = false;
|
|
38
38
|
|
|
39
39
|
// Note: The value of head isn't only used to optimize searching
|
|
40
|
-
// for a free KV slot.
|
|
40
|
+
// for a free KV slot. llama_decode_impl also uses it, so it
|
|
41
41
|
// cannot be freely changed after a slot has been allocated.
|
|
42
42
|
uint32_t head = 0;
|
|
43
43
|
uint32_t size = 0;
|
|
@@ -1275,7 +1275,7 @@ bool llama_model::load_tensors(llama_model_loader & ml) {
|
|
|
1275
1275
|
|
|
1276
1276
|
const bool use_mmap_buffer = true;
|
|
1277
1277
|
|
|
1278
|
-
LLAMA_LOG_INFO("%s: loading model tensors, this can take a while... (mmap = %s)\n", __func__,
|
|
1278
|
+
LLAMA_LOG_INFO("%s: loading model tensors, this can take a while... (mmap = %s)\n", __func__, ml.use_mmap ? "true" : "false");
|
|
1279
1279
|
|
|
1280
1280
|
// build a list of buffer types for the CPU and GPU devices
|
|
1281
1281
|
pimpl->cpu_buft_list = make_cpu_buft_list(devices);
|
|
@@ -316,6 +316,13 @@ static uint32_t get_rng_seed(uint32_t seed) {
|
|
|
316
316
|
|
|
317
317
|
// llama_sampler API
|
|
318
318
|
|
|
319
|
+
struct llama_sampler * llama_sampler_init(const struct llama_sampler_i * iface, llama_sampler_context_t ctx) {
|
|
320
|
+
return new llama_sampler {
|
|
321
|
+
/* .iface = */ iface,
|
|
322
|
+
/* .ctx = */ ctx,
|
|
323
|
+
};
|
|
324
|
+
}
|
|
325
|
+
|
|
319
326
|
const char * llama_sampler_name(const struct llama_sampler * smpl) {
|
|
320
327
|
if (!smpl->iface) {
|
|
321
328
|
return "(null)";
|
|
@@ -347,10 +354,10 @@ struct llama_sampler * llama_sampler_clone(const struct llama_sampler * smpl) {
|
|
|
347
354
|
}
|
|
348
355
|
|
|
349
356
|
if (smpl->ctx == nullptr) {
|
|
350
|
-
return
|
|
357
|
+
return llama_sampler_init(
|
|
351
358
|
/* .iface = */ smpl->iface,
|
|
352
|
-
/* .ctx = */ nullptr
|
|
353
|
-
|
|
359
|
+
/* .ctx = */ nullptr
|
|
360
|
+
);
|
|
354
361
|
}
|
|
355
362
|
|
|
356
363
|
GGML_ABORT("the sampler does not support cloning");
|
|
@@ -472,15 +479,15 @@ static struct llama_sampler_i llama_sampler_chain_i = {
|
|
|
472
479
|
};
|
|
473
480
|
|
|
474
481
|
struct llama_sampler * llama_sampler_chain_init(struct llama_sampler_chain_params params) {
|
|
475
|
-
return
|
|
482
|
+
return llama_sampler_init(
|
|
476
483
|
/* .iface = */ &llama_sampler_chain_i,
|
|
477
484
|
/* .ctx = */ new llama_sampler_chain {
|
|
478
485
|
/* .params = */ params,
|
|
479
486
|
/* .samplers = */ {},
|
|
480
487
|
/* .t_sample_us = */ 0,
|
|
481
488
|
/* .n_sample = */ 0,
|
|
482
|
-
}
|
|
483
|
-
|
|
489
|
+
}
|
|
490
|
+
);
|
|
484
491
|
}
|
|
485
492
|
|
|
486
493
|
void llama_sampler_chain_add(struct llama_sampler * chain, struct llama_sampler * smpl) {
|
|
@@ -546,10 +553,10 @@ static struct llama_sampler_i llama_sampler_greedy_i = {
|
|
|
546
553
|
};
|
|
547
554
|
|
|
548
555
|
struct llama_sampler * llama_sampler_init_greedy() {
|
|
549
|
-
return
|
|
556
|
+
return llama_sampler_init(
|
|
550
557
|
/* .iface = */ &llama_sampler_greedy_i,
|
|
551
|
-
/* .ctx = */ nullptr
|
|
552
|
-
|
|
558
|
+
/* .ctx = */ nullptr
|
|
559
|
+
);
|
|
553
560
|
}
|
|
554
561
|
|
|
555
562
|
// dist
|
|
@@ -608,14 +615,14 @@ static struct llama_sampler_i llama_sampler_dist_i = {
|
|
|
608
615
|
|
|
609
616
|
struct llama_sampler * llama_sampler_init_dist(uint32_t seed) {
|
|
610
617
|
auto seed_cur = get_rng_seed(seed);
|
|
611
|
-
return
|
|
618
|
+
return llama_sampler_init(
|
|
612
619
|
/* .iface = */ &llama_sampler_dist_i,
|
|
613
620
|
/* .ctx = */ new llama_sampler_dist {
|
|
614
621
|
/* .seed = */ seed,
|
|
615
622
|
/* .seed_cur = */ seed_cur,
|
|
616
623
|
/* .rng = */ std::mt19937(seed_cur),
|
|
617
|
-
}
|
|
618
|
-
|
|
624
|
+
}
|
|
625
|
+
);
|
|
619
626
|
}
|
|
620
627
|
|
|
621
628
|
// softmax
|
|
@@ -638,10 +645,10 @@ static struct llama_sampler_i llama_sampler_softmax_i = {
|
|
|
638
645
|
};
|
|
639
646
|
|
|
640
647
|
struct llama_sampler * llama_sampler_init_softmax() {
|
|
641
|
-
return
|
|
648
|
+
return llama_sampler_init(
|
|
642
649
|
/* .iface = */ &llama_sampler_softmax_i,
|
|
643
|
-
/* .ctx = */ nullptr
|
|
644
|
-
|
|
650
|
+
/* .ctx = */ nullptr
|
|
651
|
+
);
|
|
645
652
|
}
|
|
646
653
|
|
|
647
654
|
// top-k
|
|
@@ -678,12 +685,12 @@ static struct llama_sampler_i llama_sampler_top_k_i = {
|
|
|
678
685
|
};
|
|
679
686
|
|
|
680
687
|
struct llama_sampler * llama_sampler_init_top_k(int32_t k) {
|
|
681
|
-
return
|
|
688
|
+
return llama_sampler_init(
|
|
682
689
|
/* .iface = */ &llama_sampler_top_k_i,
|
|
683
690
|
/* .ctx = */ new llama_sampler_top_k {
|
|
684
691
|
/* .k = */ k,
|
|
685
|
-
}
|
|
686
|
-
|
|
692
|
+
}
|
|
693
|
+
);
|
|
687
694
|
}
|
|
688
695
|
|
|
689
696
|
// top-p
|
|
@@ -744,13 +751,13 @@ static struct llama_sampler_i llama_sampler_top_p_i = {
|
|
|
744
751
|
};
|
|
745
752
|
|
|
746
753
|
struct llama_sampler * llama_sampler_init_top_p(float p, size_t min_keep) {
|
|
747
|
-
return
|
|
754
|
+
return llama_sampler_init(
|
|
748
755
|
/* .iface = */ &llama_sampler_top_p_i,
|
|
749
756
|
/* .ctx = */ new llama_sampler_top_p {
|
|
750
757
|
/* .p = */ p,
|
|
751
758
|
/* .min_keep = */ min_keep,
|
|
752
|
-
}
|
|
753
|
-
|
|
759
|
+
}
|
|
760
|
+
);
|
|
754
761
|
}
|
|
755
762
|
|
|
756
763
|
// min-p
|
|
@@ -840,13 +847,13 @@ static struct llama_sampler_i llama_sampler_min_p_i = {
|
|
|
840
847
|
};
|
|
841
848
|
|
|
842
849
|
struct llama_sampler * llama_sampler_init_min_p(float p, size_t min_keep) {
|
|
843
|
-
return
|
|
850
|
+
return llama_sampler_init(
|
|
844
851
|
/* .iface = */ &llama_sampler_min_p_i,
|
|
845
852
|
/* .ctx = */ new llama_sampler_min_p {
|
|
846
853
|
/* .p = */ p,
|
|
847
854
|
/* .min_keep = */ min_keep,
|
|
848
|
-
}
|
|
849
|
-
|
|
855
|
+
}
|
|
856
|
+
);
|
|
850
857
|
}
|
|
851
858
|
|
|
852
859
|
// typical
|
|
@@ -939,13 +946,13 @@ static struct llama_sampler_i llama_sampler_typical_i = {
|
|
|
939
946
|
};
|
|
940
947
|
|
|
941
948
|
struct llama_sampler * llama_sampler_init_typical(float p, size_t min_keep) {
|
|
942
|
-
return
|
|
949
|
+
return llama_sampler_init(
|
|
943
950
|
/* .iface = */ &llama_sampler_typical_i,
|
|
944
951
|
/* .ctx = */ new llama_sampler_typical {
|
|
945
952
|
/* .p = */ p,
|
|
946
953
|
/* .min_keep = */ min_keep,
|
|
947
|
-
}
|
|
948
|
-
|
|
954
|
+
}
|
|
955
|
+
);
|
|
949
956
|
}
|
|
950
957
|
|
|
951
958
|
// temp
|
|
@@ -983,12 +990,12 @@ static struct llama_sampler_i llama_sampler_temp_i = {
|
|
|
983
990
|
};
|
|
984
991
|
|
|
985
992
|
struct llama_sampler * llama_sampler_init_temp(float temp) {
|
|
986
|
-
return
|
|
993
|
+
return llama_sampler_init(
|
|
987
994
|
/* .iface = */ &llama_sampler_temp_i,
|
|
988
995
|
/* .ctx = */ new llama_sampler_temp {
|
|
989
996
|
/*.temp = */ temp,
|
|
990
|
-
}
|
|
991
|
-
|
|
997
|
+
}
|
|
998
|
+
);
|
|
992
999
|
}
|
|
993
1000
|
|
|
994
1001
|
// temp-ext
|
|
@@ -1093,14 +1100,14 @@ static struct llama_sampler_i llama_sampler_temp_ext_i = {
|
|
|
1093
1100
|
};
|
|
1094
1101
|
|
|
1095
1102
|
struct llama_sampler * llama_sampler_init_temp_ext(float temp, float delta, float exponent) {
|
|
1096
|
-
return
|
|
1103
|
+
return llama_sampler_init(
|
|
1097
1104
|
/* .iface = */ &llama_sampler_temp_ext_i,
|
|
1098
1105
|
/* .ctx = */ new llama_sampler_temp_ext {
|
|
1099
1106
|
/* .temp = */ temp,
|
|
1100
1107
|
/* .delta = */ delta,
|
|
1101
1108
|
/* .exponent = */ exponent,
|
|
1102
|
-
}
|
|
1103
|
-
|
|
1109
|
+
}
|
|
1110
|
+
);
|
|
1104
1111
|
}
|
|
1105
1112
|
|
|
1106
1113
|
// xtc
|
|
@@ -1185,7 +1192,7 @@ static struct llama_sampler_i llama_sampler_xtc_i = {
|
|
|
1185
1192
|
|
|
1186
1193
|
struct llama_sampler * llama_sampler_init_xtc(float p, float t, size_t min_keep, uint32_t seed) {
|
|
1187
1194
|
auto seed_cur = get_rng_seed(seed);
|
|
1188
|
-
return
|
|
1195
|
+
return llama_sampler_init(
|
|
1189
1196
|
/* .iface = */ &llama_sampler_xtc_i,
|
|
1190
1197
|
/* .ctx = */ new llama_sampler_xtc {
|
|
1191
1198
|
/* .probability = */ p,
|
|
@@ -1194,8 +1201,8 @@ struct llama_sampler * llama_sampler_init_xtc(float p, float t, size_t min_keep,
|
|
|
1194
1201
|
/* .seed = */ seed,
|
|
1195
1202
|
/* .seed_cur = */ seed_cur,
|
|
1196
1203
|
/* .rng = */ std::mt19937(seed_cur),
|
|
1197
|
-
}
|
|
1198
|
-
|
|
1204
|
+
}
|
|
1205
|
+
);
|
|
1199
1206
|
}
|
|
1200
1207
|
|
|
1201
1208
|
// mirostat
|
|
@@ -1292,7 +1299,7 @@ static struct llama_sampler_i llama_sampler_mirostat_i = {
|
|
|
1292
1299
|
|
|
1293
1300
|
struct llama_sampler * llama_sampler_init_mirostat(int32_t n_vocab, uint32_t seed, float tau, float eta, int32_t m) {
|
|
1294
1301
|
auto seed_cur = get_rng_seed(seed);
|
|
1295
|
-
return
|
|
1302
|
+
return llama_sampler_init(
|
|
1296
1303
|
/* .iface = */ &llama_sampler_mirostat_i,
|
|
1297
1304
|
/* .ctx = */ new llama_sampler_mirostat {
|
|
1298
1305
|
/* .n_vocab = */ n_vocab,
|
|
@@ -1303,8 +1310,8 @@ struct llama_sampler * llama_sampler_init_mirostat(int32_t n_vocab, uint32_t see
|
|
|
1303
1310
|
/* .m = */ m,
|
|
1304
1311
|
/* .mu = */ 2.0f*tau,
|
|
1305
1312
|
/* .rng = */ std::mt19937(seed_cur),
|
|
1306
|
-
}
|
|
1307
|
-
|
|
1313
|
+
}
|
|
1314
|
+
);
|
|
1308
1315
|
}
|
|
1309
1316
|
|
|
1310
1317
|
// mirostat v2
|
|
@@ -1391,7 +1398,7 @@ static struct llama_sampler_i llama_sampler_mirostat_v2_i = {
|
|
|
1391
1398
|
|
|
1392
1399
|
struct llama_sampler * llama_sampler_init_mirostat_v2(uint32_t seed, float tau, float eta) {
|
|
1393
1400
|
auto seed_cur = get_rng_seed(seed);
|
|
1394
|
-
return
|
|
1401
|
+
return llama_sampler_init(
|
|
1395
1402
|
/* .iface = */ &llama_sampler_mirostat_v2_i,
|
|
1396
1403
|
/* .ctx = */ new llama_sampler_mirostat_v2 {
|
|
1397
1404
|
/* .seed = */ seed,
|
|
@@ -1400,8 +1407,8 @@ struct llama_sampler * llama_sampler_init_mirostat_v2(uint32_t seed, float tau,
|
|
|
1400
1407
|
/* .eta = */ eta,
|
|
1401
1408
|
/* .mu = */ 2.0f*tau,
|
|
1402
1409
|
/* .rng = */ std::mt19937(seed_cur),
|
|
1403
|
-
}
|
|
1404
|
-
|
|
1410
|
+
}
|
|
1411
|
+
);
|
|
1405
1412
|
}
|
|
1406
1413
|
|
|
1407
1414
|
// grammar
|
|
@@ -1528,10 +1535,10 @@ static struct llama_sampler * llama_sampler_init_grammar_impl(
|
|
|
1528
1535
|
};
|
|
1529
1536
|
}
|
|
1530
1537
|
|
|
1531
|
-
return
|
|
1538
|
+
return llama_sampler_init(
|
|
1532
1539
|
/* .iface = */ &llama_sampler_grammar_i,
|
|
1533
|
-
/* .ctx = */ ctx
|
|
1534
|
-
|
|
1540
|
+
/* .ctx = */ ctx
|
|
1541
|
+
);
|
|
1535
1542
|
}
|
|
1536
1543
|
|
|
1537
1544
|
struct llama_sampler * llama_sampler_init_grammar(
|
|
@@ -1678,7 +1685,7 @@ struct llama_sampler * llama_sampler_init_penalties(
|
|
|
1678
1685
|
float penalty_present) {
|
|
1679
1686
|
penalty_last_n = std::max(penalty_last_n, 0);
|
|
1680
1687
|
|
|
1681
|
-
return
|
|
1688
|
+
return llama_sampler_init(
|
|
1682
1689
|
/* .iface = */ &llama_sampler_penalties_i,
|
|
1683
1690
|
/* .ctx = */ new llama_sampler_penalties {
|
|
1684
1691
|
/* .penalty_last_n = */ penalty_last_n,
|
|
@@ -1687,8 +1694,75 @@ struct llama_sampler * llama_sampler_init_penalties(
|
|
|
1687
1694
|
/* .penalty_present = */ penalty_present,
|
|
1688
1695
|
/* .prev = */ ring_buffer<llama_token>(penalty_last_n),
|
|
1689
1696
|
/* .token_count = */ {},
|
|
1690
|
-
}
|
|
1691
|
-
|
|
1697
|
+
}
|
|
1698
|
+
);
|
|
1699
|
+
}
|
|
1700
|
+
|
|
1701
|
+
// top-n-sigma
|
|
1702
|
+
|
|
1703
|
+
struct llama_sampler_top_n_sigma {
|
|
1704
|
+
const float n;
|
|
1705
|
+
};
|
|
1706
|
+
|
|
1707
|
+
static const char * llama_sampler_top_n_sigma_name(const struct llama_sampler * /*smpl*/) {
|
|
1708
|
+
return "top-n-sigma";
|
|
1709
|
+
}
|
|
1710
|
+
|
|
1711
|
+
static void llama_sampler_top_n_sigma_apply(struct llama_sampler * smpl, llama_token_data_array * cur_p) {
|
|
1712
|
+
const auto * ctx = (llama_sampler_top_n_sigma *) smpl->ctx;
|
|
1713
|
+
|
|
1714
|
+
// find max logit and calculate mean
|
|
1715
|
+
float max = cur_p->data[0].logit;
|
|
1716
|
+
float logits_sum = 0;
|
|
1717
|
+
for (size_t i = 0; i < cur_p->size; ++i) {
|
|
1718
|
+
if (cur_p->data[i].logit > max) {
|
|
1719
|
+
max = cur_p->data[i].logit;
|
|
1720
|
+
}
|
|
1721
|
+
logits_sum += cur_p->data[i].logit;
|
|
1722
|
+
}
|
|
1723
|
+
float mean = logits_sum/cur_p->size;
|
|
1724
|
+
|
|
1725
|
+
// calculate standard deviation
|
|
1726
|
+
float acc = 0;
|
|
1727
|
+
for (size_t i = 0; i < cur_p->size; ++i) {
|
|
1728
|
+
acc += pow(cur_p->data[i].logit - mean, 2);
|
|
1729
|
+
}
|
|
1730
|
+
float std = sqrt(acc/cur_p->size);
|
|
1731
|
+
|
|
1732
|
+
//apply mask
|
|
1733
|
+
for (size_t i = 0; i < cur_p->size; ++i) {
|
|
1734
|
+
if (cur_p->data[i].logit < max - (ctx->n * std)) {
|
|
1735
|
+
cur_p->data[i].logit = -INFINITY;
|
|
1736
|
+
}
|
|
1737
|
+
}
|
|
1738
|
+
llama_sampler_softmax_impl(cur_p);
|
|
1739
|
+
}
|
|
1740
|
+
|
|
1741
|
+
static struct llama_sampler * llama_sampler_top_n_sigma_clone(const struct llama_sampler * smpl) {
|
|
1742
|
+
const auto * ctx = (const llama_sampler_top_n_sigma *) smpl->ctx;
|
|
1743
|
+
return llama_sampler_init_top_n_sigma(ctx->n);
|
|
1744
|
+
}
|
|
1745
|
+
|
|
1746
|
+
static void llama_sampler_top_n_sigma_free(struct llama_sampler * smpl) {
|
|
1747
|
+
delete (llama_sampler_top_n_sigma *) smpl->ctx;
|
|
1748
|
+
}
|
|
1749
|
+
|
|
1750
|
+
static struct llama_sampler_i llama_sampler_top_n_sigma_i = {
|
|
1751
|
+
/* .name = */ llama_sampler_top_n_sigma_name,
|
|
1752
|
+
/* .accept = */ nullptr,
|
|
1753
|
+
/* .apply = */ llama_sampler_top_n_sigma_apply,
|
|
1754
|
+
/* .reset = */ nullptr,
|
|
1755
|
+
/* .clone = */ llama_sampler_top_n_sigma_clone,
|
|
1756
|
+
/* .free = */ llama_sampler_top_n_sigma_free,
|
|
1757
|
+
};
|
|
1758
|
+
|
|
1759
|
+
struct llama_sampler * llama_sampler_init_top_n_sigma(float n) {
|
|
1760
|
+
return llama_sampler_init(
|
|
1761
|
+
/* .iface = */ &llama_sampler_top_n_sigma_i,
|
|
1762
|
+
/* .ctx = */ new llama_sampler_top_n_sigma {
|
|
1763
|
+
/* .n = */ n,
|
|
1764
|
+
}
|
|
1765
|
+
);
|
|
1692
1766
|
}
|
|
1693
1767
|
|
|
1694
1768
|
// DRY
|
|
@@ -2041,7 +2115,7 @@ struct llama_sampler * llama_sampler_init_dry(const struct llama_vocab * vocab,
|
|
|
2041
2115
|
}
|
|
2042
2116
|
}
|
|
2043
2117
|
|
|
2044
|
-
return
|
|
2118
|
+
return llama_sampler_init(
|
|
2045
2119
|
/* .iface = */ &llama_sampler_dry_i,
|
|
2046
2120
|
/* .ctx = */ new llama_sampler_dry {
|
|
2047
2121
|
/* .total_context_size = */ context_size,
|
|
@@ -2053,8 +2127,8 @@ struct llama_sampler * llama_sampler_init_dry(const struct llama_vocab * vocab,
|
|
|
2053
2127
|
/* .dry_repeat_count = */ dry_enabled ? std::vector<int>(effective_dry_penalty_last_n, 0) : std::vector<int>{},
|
|
2054
2128
|
/* .dry_max_token_repeat = */ {},
|
|
2055
2129
|
/* .last_tokens = */ dry_enabled ? ring_buffer<llama_token>(effective_dry_penalty_last_n) : ring_buffer<llama_token>(0),
|
|
2056
|
-
}
|
|
2057
|
-
|
|
2130
|
+
}
|
|
2131
|
+
);
|
|
2058
2132
|
}
|
|
2059
2133
|
|
|
2060
2134
|
// wrapper for test-sampling.cpp
|
|
@@ -2155,14 +2229,14 @@ struct llama_sampler * llama_sampler_init_logit_bias(
|
|
|
2155
2229
|
int32_t n_vocab,
|
|
2156
2230
|
int32_t n_logit_bias,
|
|
2157
2231
|
const llama_logit_bias * logit_bias) {
|
|
2158
|
-
return
|
|
2232
|
+
return llama_sampler_init(
|
|
2159
2233
|
/* .iface = */ &llama_sampler_logit_bias_i,
|
|
2160
2234
|
/* .ctx = */ new llama_sampler_logit_bias {
|
|
2161
2235
|
/* .n_vocab = */ n_vocab,
|
|
2162
2236
|
/* .logit_bias = */ std::vector<llama_logit_bias>(logit_bias, logit_bias + n_logit_bias),
|
|
2163
2237
|
/* .to_search = */ {},
|
|
2164
|
-
}
|
|
2165
|
-
|
|
2238
|
+
}
|
|
2239
|
+
);
|
|
2166
2240
|
}
|
|
2167
2241
|
|
|
2168
2242
|
// infill
|
|
@@ -2377,14 +2451,14 @@ static struct llama_sampler_i llama_sampler_infill_i = {
|
|
|
2377
2451
|
};
|
|
2378
2452
|
|
|
2379
2453
|
struct llama_sampler * llama_sampler_init_infill(const struct llama_vocab * vocab) {
|
|
2380
|
-
return
|
|
2454
|
+
return llama_sampler_init(
|
|
2381
2455
|
/* .iface = */ &llama_sampler_infill_i,
|
|
2382
2456
|
/* .ctx = */ new llama_sampler_infill {
|
|
2383
2457
|
/* .vocab = */ vocab,
|
|
2384
2458
|
/* .buf0 = */ std::vector<char>(512),
|
|
2385
2459
|
/* .buf1 = */ std::vector<char>(512),
|
|
2386
|
-
}
|
|
2387
|
-
|
|
2460
|
+
}
|
|
2461
|
+
);
|
|
2388
2462
|
}
|
|
2389
2463
|
|
|
2390
2464
|
// utils
|