@fugood/llama.node 0.4.7 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/darwin/arm64/llama-node.node +0 -0
- package/bin/darwin/x64/llama-node.node +0 -0
- package/bin/linux/arm64/llama-node.node +0 -0
- package/bin/linux/x64/llama-node.node +0 -0
- package/bin/linux-cuda/arm64/llama-node.node +0 -0
- package/bin/linux-cuda/x64/llama-node.node +0 -0
- package/bin/linux-vulkan/arm64/llama-node.node +0 -0
- package/bin/linux-vulkan/x64/llama-node.node +0 -0
- package/bin/win32/arm64/llama-node.node +0 -0
- package/bin/win32/arm64/node.lib +0 -0
- package/bin/win32/x64/llama-node.node +0 -0
- package/bin/win32/x64/node.lib +0 -0
- package/bin/win32-vulkan/arm64/llama-node.node +0 -0
- package/bin/win32-vulkan/arm64/node.lib +0 -0
- package/bin/win32-vulkan/x64/llama-node.node +0 -0
- package/bin/win32-vulkan/x64/node.lib +0 -0
- package/lib/binding.ts +20 -6
- package/lib/index.js +41 -17
- package/lib/index.ts +50 -23
- package/package.json +1 -1
- package/src/LlamaCompletionWorker.cpp +9 -9
- package/src/LlamaCompletionWorker.h +2 -2
- package/src/LlamaContext.cpp +37 -18
- package/src/LlamaContext.h +1 -0
- package/src/TokenizeWorker.cpp +16 -12
- package/src/TokenizeWorker.h +2 -2
- package/src/common.hpp +54 -50
- package/src/llama.cpp/.github/workflows/build.yml +2 -2
- package/src/llama.cpp/.github/workflows/release.yml +152 -129
- package/src/llama.cpp/.github/workflows/winget.yml +42 -0
- package/src/llama.cpp/common/arg.cpp +14 -13
- package/src/llama.cpp/common/common.cpp +4 -75
- package/src/llama.cpp/common/common.h +7 -12
- package/src/llama.cpp/examples/lookahead/lookahead.cpp +0 -13
- package/src/llama.cpp/examples/lookup/lookup.cpp +0 -11
- package/src/llama.cpp/examples/parallel/parallel.cpp +0 -9
- package/src/llama.cpp/examples/retrieval/retrieval.cpp +6 -6
- package/src/llama.cpp/examples/simple/simple.cpp +1 -1
- package/src/llama.cpp/examples/simple-chat/simple-chat.cpp +2 -2
- package/src/llama.cpp/examples/sycl/run-llama2.sh +4 -4
- package/src/llama.cpp/examples/sycl/run-llama3.sh +28 -0
- package/src/llama.cpp/examples/sycl/win-run-llama2.bat +1 -1
- package/src/llama.cpp/examples/sycl/win-run-llama3.bat +9 -0
- package/src/llama.cpp/ggml/include/ggml-opt.h +2 -0
- package/src/llama.cpp/ggml/include/ggml.h +11 -0
- package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.cpp +274 -0
- package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.h +27 -0
- package/src/llama.cpp/ggml/src/ggml-cann/ggml-cann.cpp +18 -2
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +1 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ops.cpp +107 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/vec.h +16 -0
- package/src/llama.cpp/ggml/src/ggml-musa/CMakeLists.txt +8 -2
- package/src/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +315 -155
- package/src/llama.cpp/ggml/src/ggml-opt.cpp +5 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +43 -12
- package/src/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +171 -112
- package/src/llama.cpp/ggml/src/ggml.c +64 -18
- package/src/llama.cpp/include/llama.h +24 -124
- package/src/llama.cpp/requirements/requirements-convert_hf_to_gguf.txt +5 -1
- package/src/llama.cpp/requirements/requirements-convert_hf_to_gguf_update.txt +5 -1
- package/src/llama.cpp/requirements/requirements-convert_lora_to_gguf.txt +2 -0
- package/src/llama.cpp/src/llama-batch.cpp +3 -1
- package/src/llama.cpp/src/llama-context.cpp +60 -110
- package/src/llama.cpp/src/llama-graph.cpp +137 -233
- package/src/llama.cpp/src/llama-graph.h +49 -7
- package/src/llama.cpp/src/llama-hparams.cpp +17 -1
- package/src/llama.cpp/src/llama-hparams.h +34 -5
- package/src/llama.cpp/src/llama-kv-cache.cpp +654 -321
- package/src/llama.cpp/src/llama-kv-cache.h +201 -85
- package/src/llama.cpp/src/llama-memory.h +3 -2
- package/src/llama.cpp/src/llama-model.cpp +273 -94
- package/src/llama.cpp/src/llama-model.h +4 -1
- package/src/llama.cpp/tests/test-arg-parser.cpp +1 -1
- package/src/llama.cpp/tools/llama-bench/llama-bench.cpp +1 -0
- package/src/llama.cpp/tools/mtmd/CMakeLists.txt +13 -2
- package/src/llama.cpp/tools/mtmd/clip-impl.h +108 -11
- package/src/llama.cpp/tools/mtmd/clip.cpp +466 -88
- package/src/llama.cpp/tools/mtmd/clip.h +6 -4
- package/src/llama.cpp/tools/mtmd/miniaudio.h +93468 -0
- package/src/llama.cpp/tools/mtmd/mtmd-audio.cpp +855 -0
- package/src/llama.cpp/tools/mtmd/mtmd-audio.h +62 -0
- package/src/llama.cpp/tools/mtmd/mtmd-cli.cpp +21 -14
- package/src/llama.cpp/tools/mtmd/mtmd-helper.cpp +36 -49
- package/src/llama.cpp/tools/mtmd/mtmd.cpp +362 -98
- package/src/llama.cpp/tools/mtmd/mtmd.h +52 -21
- package/src/llama.cpp/tools/run/run.cpp +2 -2
- package/src/llama.cpp/tools/server/server.cpp +158 -47
- package/src/llama.cpp/tools/server/utils.hpp +71 -43
- package/src/llama.cpp/tools/tts/tts.cpp +4 -2
|
@@ -536,6 +536,7 @@ static bool server_sent_event(httplib::DataSink & sink, const char * event, cons
|
|
|
536
536
|
// OAI utils
|
|
537
537
|
//
|
|
538
538
|
|
|
539
|
+
// used by /completions endpoint
|
|
539
540
|
static json oaicompat_completion_params_parse(const json & body) {
|
|
540
541
|
json llama_params;
|
|
541
542
|
|
|
@@ -580,13 +581,19 @@ static json oaicompat_completion_params_parse(const json & body) {
|
|
|
580
581
|
return llama_params;
|
|
581
582
|
}
|
|
582
583
|
|
|
583
|
-
|
|
584
|
+
struct oaicompat_parser_options {
|
|
585
|
+
bool use_jinja;
|
|
586
|
+
bool prefill_assistant;
|
|
587
|
+
common_reasoning_format reasoning_format;
|
|
588
|
+
common_chat_templates * tmpls;
|
|
589
|
+
bool allow_image;
|
|
590
|
+
bool allow_audio;
|
|
591
|
+
};
|
|
592
|
+
|
|
593
|
+
// used by /chat/completions endpoint
|
|
594
|
+
static json oaicompat_chat_params_parse(
|
|
584
595
|
const json & body, /* openai api json semantics */
|
|
585
|
-
|
|
586
|
-
bool prefill_assistant,
|
|
587
|
-
common_reasoning_format reasoning_format,
|
|
588
|
-
const struct common_chat_templates * tmpls,
|
|
589
|
-
bool allow_non_text,
|
|
596
|
+
const oaicompat_parser_options & opt,
|
|
590
597
|
std::vector<raw_buffer> & out_files)
|
|
591
598
|
{
|
|
592
599
|
json llama_params;
|
|
@@ -598,11 +605,11 @@ static json oaicompat_completion_params_parse(
|
|
|
598
605
|
if (stream) {
|
|
599
606
|
throw std::runtime_error("Cannot use tools with stream");
|
|
600
607
|
}
|
|
601
|
-
if (!use_jinja) {
|
|
608
|
+
if (!opt.use_jinja) {
|
|
602
609
|
throw std::runtime_error("tools param requires --jinja flag");
|
|
603
610
|
}
|
|
604
611
|
}
|
|
605
|
-
if (!use_jinja) {
|
|
612
|
+
if (!opt.use_jinja) {
|
|
606
613
|
if (body.contains("tool_choice") && !body.at("tool_choice").is_null()) {
|
|
607
614
|
throw std::runtime_error("Unsupported param: tool_choice");
|
|
608
615
|
}
|
|
@@ -667,12 +674,12 @@ static json oaicompat_completion_params_parse(
|
|
|
667
674
|
|
|
668
675
|
for (auto & p : content) {
|
|
669
676
|
std::string type = json_value(p, "type", std::string());
|
|
670
|
-
json image_url = json_value(p, "image_url", json::object());
|
|
671
677
|
if (type == "image_url") {
|
|
672
|
-
if (!
|
|
673
|
-
throw std::runtime_error("image input is not supported
|
|
678
|
+
if (!opt.allow_image) {
|
|
679
|
+
throw std::runtime_error("image input is not supported - hint: if this is unexpected, you may need to provide the mmproj");
|
|
674
680
|
}
|
|
675
681
|
|
|
682
|
+
json image_url = json_value(p, "image_url", json::object());
|
|
676
683
|
std::string url = json_value(image_url, "url", std::string());
|
|
677
684
|
if (string_starts_with(url, "http")) {
|
|
678
685
|
// download remote image
|
|
@@ -710,8 +717,31 @@ static json oaicompat_completion_params_parse(
|
|
|
710
717
|
|
|
711
718
|
// replace this chunk with a marker
|
|
712
719
|
p["type"] = "text";
|
|
713
|
-
p["text"] =
|
|
720
|
+
p["text"] = mtmd_default_marker();
|
|
714
721
|
p.erase("image_url");
|
|
722
|
+
|
|
723
|
+
} else if (type == "input_audio") {
|
|
724
|
+
if (!opt.allow_audio) {
|
|
725
|
+
throw std::runtime_error("audio input is not supported - hint: if this is unexpected, you may need to provide the mmproj");
|
|
726
|
+
}
|
|
727
|
+
|
|
728
|
+
json input_audio = json_value(p, "input_audio", json::object());
|
|
729
|
+
std::string data = json_value(input_audio, "data", std::string());
|
|
730
|
+
std::string format = json_value(input_audio, "format", std::string());
|
|
731
|
+
// while we also support flac, we don't allow it here so we matches the OAI spec
|
|
732
|
+
if (format != "wav" && format != "mp3") {
|
|
733
|
+
throw std::runtime_error("input_audio.format must be either 'wav' or 'mp3'");
|
|
734
|
+
}
|
|
735
|
+
auto decoded_data = base64_decode(data); // expected to be base64 encoded
|
|
736
|
+
out_files.push_back(decoded_data);
|
|
737
|
+
|
|
738
|
+
// replace this chunk with a marker
|
|
739
|
+
p["type"] = "text";
|
|
740
|
+
p["text"] = mtmd_default_marker();
|
|
741
|
+
p.erase("input_audio");
|
|
742
|
+
|
|
743
|
+
} else if (type != "text") {
|
|
744
|
+
throw std::runtime_error("unsupported content[].type");
|
|
715
745
|
}
|
|
716
746
|
}
|
|
717
747
|
}
|
|
@@ -723,9 +753,9 @@ static json oaicompat_completion_params_parse(
|
|
|
723
753
|
inputs.json_schema = json_schema.is_null() ? "" : json_schema.dump();
|
|
724
754
|
inputs.grammar = grammar;
|
|
725
755
|
inputs.add_generation_prompt = json_value(body, "add_generation_prompt", true);
|
|
726
|
-
inputs.use_jinja = use_jinja;
|
|
756
|
+
inputs.use_jinja = opt.use_jinja;
|
|
727
757
|
inputs.parallel_tool_calls = json_value(body, "parallel_tool_calls", false);
|
|
728
|
-
inputs.extract_reasoning = reasoning_format != COMMON_REASONING_FORMAT_NONE;
|
|
758
|
+
inputs.extract_reasoning = opt.reasoning_format != COMMON_REASONING_FORMAT_NONE;
|
|
729
759
|
inputs.add_generation_prompt = json_value(body, "add_generation_prompt", true);
|
|
730
760
|
if (!inputs.tools.empty() && inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_NONE && body.contains("grammar")) {
|
|
731
761
|
throw std::runtime_error("Cannot use custom grammar constraints with tools.");
|
|
@@ -733,7 +763,7 @@ static json oaicompat_completion_params_parse(
|
|
|
733
763
|
|
|
734
764
|
// if the assistant message appears at the end of list, we do not add end-of-turn token
|
|
735
765
|
// for ex. this can be useful to modify the reasoning process in reasoning models
|
|
736
|
-
bool prefill_assistant_message = !inputs.messages.empty() && inputs.messages.back().role == "assistant" && prefill_assistant;
|
|
766
|
+
bool prefill_assistant_message = !inputs.messages.empty() && inputs.messages.back().role == "assistant" && opt.prefill_assistant;
|
|
737
767
|
common_chat_msg last_message;
|
|
738
768
|
if (prefill_assistant_message) {
|
|
739
769
|
last_message = inputs.messages.back();
|
|
@@ -749,7 +779,7 @@ static json oaicompat_completion_params_parse(
|
|
|
749
779
|
}
|
|
750
780
|
|
|
751
781
|
// Apply chat template to the list of messages
|
|
752
|
-
auto chat_params = common_chat_templates_apply(tmpls, inputs);
|
|
782
|
+
auto chat_params = common_chat_templates_apply(opt.tmpls, inputs);
|
|
753
783
|
|
|
754
784
|
/* Append assistant prefilled message */
|
|
755
785
|
if (prefill_assistant_message) {
|
|
@@ -1040,7 +1070,7 @@ struct server_tokens {
|
|
|
1040
1070
|
private: // disallow accessing these members directly, risking out-of-sync
|
|
1041
1071
|
|
|
1042
1072
|
// map a **start** position in tokens to the image chunk
|
|
1043
|
-
std::unordered_map<llama_pos, mtmd::input_chunk_ptr>
|
|
1073
|
+
std::unordered_map<llama_pos, mtmd::input_chunk_ptr> map_pos_to_media;
|
|
1044
1074
|
|
|
1045
1075
|
// list of tokens
|
|
1046
1076
|
// it can include LLAMA_TOKEN_NULL, which is used to indicate a token that is not a text token
|
|
@@ -1051,7 +1081,7 @@ private: // disallow accessing these members directly, risking out-of-sync
|
|
|
1051
1081
|
// for ex. with input of 5 text tokens and 2 images:
|
|
1052
1082
|
// [0] [1] [2] [3] [4] [img0] [img0] [img0] [img1] [img1]
|
|
1053
1083
|
// pos 0 1 2 3 4 5 6 7 8 9
|
|
1054
|
-
//
|
|
1084
|
+
// map_pos_to_media will contain: {5, img0}, {8, img1}
|
|
1055
1085
|
|
|
1056
1086
|
public:
|
|
1057
1087
|
server_tokens() = default;
|
|
@@ -1090,15 +1120,15 @@ public:
|
|
|
1090
1120
|
}
|
|
1091
1121
|
oss << "\n";
|
|
1092
1122
|
oss << "image pos: ";
|
|
1093
|
-
for (const auto & it :
|
|
1123
|
+
for (const auto & it : map_pos_to_media) {
|
|
1094
1124
|
oss << it.first << ", ";
|
|
1095
1125
|
}
|
|
1096
1126
|
return oss.str();
|
|
1097
1127
|
}
|
|
1098
1128
|
|
|
1099
1129
|
const mtmd::input_chunk_ptr & find_chunk(llama_pos pos) const {
|
|
1100
|
-
auto it =
|
|
1101
|
-
if (it !=
|
|
1130
|
+
auto it = map_pos_to_media.find(pos);
|
|
1131
|
+
if (it != map_pos_to_media.end()) {
|
|
1102
1132
|
return it->second;
|
|
1103
1133
|
} else {
|
|
1104
1134
|
throw std::runtime_error("Chunk not found");
|
|
@@ -1115,16 +1145,15 @@ public:
|
|
|
1115
1145
|
// will create a copy of the chunk if it contains non-text data
|
|
1116
1146
|
void push_back(const mtmd_input_chunk * chunk) {
|
|
1117
1147
|
auto type = mtmd_input_chunk_get_type(chunk);
|
|
1118
|
-
if (type == MTMD_INPUT_CHUNK_TYPE_IMAGE) {
|
|
1148
|
+
if (type == MTMD_INPUT_CHUNK_TYPE_IMAGE || type == MTMD_INPUT_CHUNK_TYPE_AUDIO) {
|
|
1119
1149
|
GGML_ASSERT(has_mtmd);
|
|
1120
|
-
|
|
1121
|
-
const int n_pos = mtmd_image_tokens_get_n_pos(img_tokens);
|
|
1150
|
+
const int n_pos = mtmd_input_chunk_get_n_pos(chunk);
|
|
1122
1151
|
llama_pos start_pos = tokens.size();
|
|
1123
1152
|
for (int i = 0; i < n_pos; ++i) {
|
|
1124
1153
|
tokens.emplace_back(LLAMA_TOKEN_NULL);
|
|
1125
1154
|
}
|
|
1126
1155
|
mtmd::input_chunk_ptr new_chunk(mtmd_input_chunk_copy(chunk));
|
|
1127
|
-
|
|
1156
|
+
map_pos_to_media[start_pos] = std::move(new_chunk);
|
|
1128
1157
|
} else if (type == MTMD_INPUT_CHUNK_TYPE_TEXT) {
|
|
1129
1158
|
size_t n_tokens;
|
|
1130
1159
|
auto text_tokens = mtmd_input_chunk_get_tokens_text(chunk, &n_tokens);
|
|
@@ -1169,6 +1198,9 @@ public:
|
|
|
1169
1198
|
void keep_first(size_t n) {
|
|
1170
1199
|
GGML_ASSERT(n <= tokens.size());
|
|
1171
1200
|
if (has_mtmd) {
|
|
1201
|
+
if (n == tokens.size()) {
|
|
1202
|
+
return; // nothing to do
|
|
1203
|
+
}
|
|
1172
1204
|
// we throw an error if we try to remove a token in the middle of an image
|
|
1173
1205
|
// for ex. with input of 5 text tokens and 2 images:
|
|
1174
1206
|
// [0] [1] [2] [3] [4] [img0] [img0] [img0] [img1] [img1]
|
|
@@ -1183,10 +1215,10 @@ public:
|
|
|
1183
1215
|
}
|
|
1184
1216
|
}
|
|
1185
1217
|
// remove all image chunks that are not used anymore
|
|
1186
|
-
for (auto it =
|
|
1218
|
+
for (auto it = map_pos_to_media.begin(); it != map_pos_to_media.end(); ) {
|
|
1187
1219
|
llama_pos pos = it->first;
|
|
1188
1220
|
if (pos >= (llama_pos)n) {
|
|
1189
|
-
it =
|
|
1221
|
+
it = map_pos_to_media.erase(it);
|
|
1190
1222
|
} else {
|
|
1191
1223
|
++it;
|
|
1192
1224
|
}
|
|
@@ -1217,14 +1249,12 @@ public:
|
|
|
1217
1249
|
const auto & a_chunk = find_chunk(i);
|
|
1218
1250
|
const auto & b_chunk = b.find_chunk(i);
|
|
1219
1251
|
GGML_ASSERT(a_chunk && b_chunk);
|
|
1220
|
-
|
|
1221
|
-
|
|
1222
|
-
|
|
1223
|
-
|
|
1224
|
-
size_t a_pos = mtmd_image_tokens_get_n_pos(a_img);
|
|
1225
|
-
size_t b_pos = mtmd_image_tokens_get_n_pos(b_img);
|
|
1252
|
+
std::string ai_id = mtmd_input_chunk_get_id(a_chunk.get());
|
|
1253
|
+
std::string bi_id = mtmd_input_chunk_get_id(b_chunk.get());
|
|
1254
|
+
size_t a_pos = mtmd_input_chunk_get_n_pos(a_chunk.get());
|
|
1255
|
+
size_t b_pos = mtmd_input_chunk_get_n_pos(b_chunk.get());
|
|
1226
1256
|
if (ai_id == bi_id && a_pos == b_pos) {
|
|
1227
|
-
GGML_ASSERT(a_pos > 0 && "Invalid
|
|
1257
|
+
GGML_ASSERT(a_pos > 0 && "Invalid media chunk"); // should never happen
|
|
1228
1258
|
i += a_pos - 1; // will be +1 by the for loop
|
|
1229
1259
|
continue;
|
|
1230
1260
|
} else {
|
|
@@ -1250,8 +1280,7 @@ public:
|
|
|
1250
1280
|
if (t == LLAMA_TOKEN_NULL) {
|
|
1251
1281
|
try {
|
|
1252
1282
|
const auto & chunk = find_chunk(i);
|
|
1253
|
-
|
|
1254
|
-
size_t n_pos = mtmd_image_tokens_get_n_pos(img_tokens);
|
|
1283
|
+
size_t n_pos = mtmd_input_chunk_get_n_pos(chunk.get());
|
|
1255
1284
|
i += n_pos - 1; // will be +1 by the for loop
|
|
1256
1285
|
} catch (const std::exception & e) {
|
|
1257
1286
|
return false;
|
|
@@ -1270,22 +1299,21 @@ public:
|
|
|
1270
1299
|
llama_pos n_past,
|
|
1271
1300
|
int32_t seq_id,
|
|
1272
1301
|
llama_pos & n_pos_out) {
|
|
1273
|
-
auto
|
|
1274
|
-
|
|
1275
|
-
|
|
1276
|
-
|
|
1277
|
-
SRV_INF("%s\n", "processing image...");
|
|
1302
|
+
auto & chunk = find_chunk(n_past);
|
|
1303
|
+
const char * name = mtmd_input_chunk_get_type(chunk.get()) == MTMD_INPUT_CHUNK_TYPE_IMAGE
|
|
1304
|
+
? "image" : "audio";
|
|
1305
|
+
SRV_INF("processing %s...\n", name);
|
|
1278
1306
|
int32_t n_batch = llama_n_batch(ctx);
|
|
1279
1307
|
int64_t t0 = ggml_time_ms();
|
|
1280
1308
|
llama_pos new_n_past = n_past;
|
|
1281
1309
|
int32_t result = mtmd_helper_eval_chunk_single(mctx, ctx,
|
|
1282
|
-
|
|
1310
|
+
chunk.get(),
|
|
1283
1311
|
n_past,
|
|
1284
1312
|
seq_id,
|
|
1285
1313
|
n_batch,
|
|
1286
1314
|
true, // logits last
|
|
1287
1315
|
&new_n_past);
|
|
1288
|
-
SRV_INF("
|
|
1316
|
+
SRV_INF("%s processed in %" PRId64 " ms\n", name, ggml_time_ms() - t0);
|
|
1289
1317
|
if (result != 0) {
|
|
1290
1318
|
LOG_ERR("mtmd_helper_eval failed with status %d", result);
|
|
1291
1319
|
n_pos_out = n_past;
|
|
@@ -579,6 +579,8 @@ int main(int argc, char ** argv) {
|
|
|
579
579
|
|
|
580
580
|
params.model = params.vocoder.model;
|
|
581
581
|
params.embedding = true;
|
|
582
|
+
params.ctx_shift = false; // silence warning
|
|
583
|
+
params.n_ubatch = params.n_batch;
|
|
582
584
|
|
|
583
585
|
common_init_result llama_init_cts = common_init_from_params(params);
|
|
584
586
|
|
|
@@ -1020,8 +1022,8 @@ lovely<|t_0.56|><|code_start|><|634|><|596|><|1766|><|1556|><|1306|><|1285|><|14
|
|
|
1020
1022
|
}
|
|
1021
1023
|
GGML_ASSERT(batch.n_tokens == n_codes);
|
|
1022
1024
|
|
|
1023
|
-
if (
|
|
1024
|
-
LOG_ERR("%s:
|
|
1025
|
+
if (llama_encode(ctx_cts, batch) != 0) {
|
|
1026
|
+
LOG_ERR("%s: llama_encode() failed\n", __func__);
|
|
1025
1027
|
return 1;
|
|
1026
1028
|
}
|
|
1027
1029
|
|