@fugood/llama.node 1.2.2 → 1.2.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +14 -14
- package/scripts/llama.cpp.patch +33 -11
- package/src/llama.cpp/CMakeLists.txt +1 -0
- package/src/llama.cpp/common/CMakeLists.txt +46 -2
- package/src/llama.cpp/common/arg.cpp +423 -186
- package/src/llama.cpp/common/arg.h +0 -1
- package/src/llama.cpp/common/chat-parser.cpp +154 -13
- package/src/llama.cpp/common/chat-parser.h +3 -0
- package/src/llama.cpp/common/chat.cpp +217 -6
- package/src/llama.cpp/common/chat.h +5 -3
- package/src/llama.cpp/common/common.cpp +23 -6
- package/src/llama.cpp/common/common.h +6 -4
- package/src/llama.cpp/common/http.h +73 -0
- package/src/llama.cpp/common/sampling.cpp +1 -0
- package/src/llama.cpp/ggml/CMakeLists.txt +7 -6
- package/src/llama.cpp/ggml/include/ggml-backend.h +4 -1
- package/src/llama.cpp/ggml/include/ggml-rpc.h +8 -9
- package/src/llama.cpp/ggml/include/ggml-zdnn.h +3 -0
- package/src/llama.cpp/ggml/include/ggml.h +22 -0
- package/src/llama.cpp/ggml/src/CMakeLists.txt +3 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +12 -2
- package/src/llama.cpp/ggml/src/ggml-cpu/amx/amx.cpp +1 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/arch/loongarch/quants.c +12 -12
- package/src/llama.cpp/ggml/src/ggml-cpu/arch/s390/quants.c +100 -3
- package/src/llama.cpp/ggml/src/ggml-cpu/arch/x86/repack.cpp +2 -2
- package/src/llama.cpp/ggml/src/ggml-cpu/arch-fallback.h +0 -1
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +18 -3
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +10 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.cpp +209 -96
- package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.h +32 -44
- package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +107 -83
- package/src/llama.cpp/ggml/src/ggml-cpu/ops.cpp +27 -19
- package/src/llama.cpp/ggml/src/ggml-cpu/simd-mappings.h +8 -8
- package/src/llama.cpp/ggml/src/ggml-cpu/spacemit/ime.cpp +1024 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/spacemit/ime.h +13 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/spacemit/ime1_kernels.cpp +3196 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/spacemit/ime_kernels.h +26 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/unary-ops.cpp +103 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/unary-ops.h +1 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/vec.cpp +66 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/vec.h +6 -5
- package/src/llama.cpp/include/llama.h +23 -11
- package/src/llama.cpp/src/llama-arch.cpp +93 -0
- package/src/llama.cpp/src/llama-arch.h +22 -0
- package/src/llama.cpp/src/llama-chat.cpp +1 -1
- package/src/llama.cpp/src/llama-context.cpp +157 -0
- package/src/llama.cpp/src/llama-context.h +10 -0
- package/src/llama.cpp/src/llama-graph.cpp +57 -22
- package/src/llama.cpp/src/llama-graph.h +10 -1
- package/src/llama.cpp/src/llama-hparams.h +17 -2
- package/src/llama.cpp/src/llama-kv-cache-iswa.cpp +10 -2
- package/src/llama.cpp/src/llama-kv-cache-iswa.h +2 -0
- package/src/llama.cpp/src/llama-kv-cache.cpp +10 -5
- package/src/llama.cpp/src/llama-kv-cache.h +2 -0
- package/src/llama.cpp/src/llama-memory-hybrid.cpp +19 -9
- package/src/llama.cpp/src/llama-memory-hybrid.h +2 -0
- package/src/llama.cpp/src/llama-memory-recurrent.cpp +19 -3
- package/src/llama.cpp/src/llama-memory-recurrent.h +3 -0
- package/src/llama.cpp/src/llama-memory.h +3 -0
- package/src/llama.cpp/src/llama-model-loader.cpp +2 -0
- package/src/llama.cpp/src/llama-model.cpp +582 -45
- package/src/llama.cpp/src/llama-model.h +23 -1
- package/src/llama.cpp/src/llama-sampling.cpp +5 -0
- package/src/llama.cpp/src/llama-vocab.cpp +7 -1
- package/src/llama.cpp/src/llama-vocab.h +41 -40
- package/src/llama.cpp/src/unicode.h +43 -0
|
@@ -24,6 +24,7 @@
|
|
|
24
24
|
#include <cstdarg>
|
|
25
25
|
#include <filesystem>
|
|
26
26
|
#include <fstream>
|
|
27
|
+
#include <future>
|
|
27
28
|
#include <list>
|
|
28
29
|
#include <regex>
|
|
29
30
|
#include <set>
|
|
@@ -31,12 +32,31 @@
|
|
|
31
32
|
#include <thread>
|
|
32
33
|
#include <vector>
|
|
33
34
|
|
|
34
|
-
//#define LLAMA_USE_CURL
|
|
35
|
-
|
|
36
35
|
#if defined(LLAMA_USE_CURL)
|
|
37
36
|
#include <curl/curl.h>
|
|
38
37
|
#include <curl/easy.h>
|
|
39
|
-
#
|
|
38
|
+
#else
|
|
39
|
+
#include "http.h"
|
|
40
|
+
#endif
|
|
41
|
+
|
|
42
|
+
#ifdef __linux__
|
|
43
|
+
#include <linux/limits.h>
|
|
44
|
+
#elif defined(_WIN32)
|
|
45
|
+
# if !defined(PATH_MAX)
|
|
46
|
+
# define PATH_MAX MAX_PATH
|
|
47
|
+
# endif
|
|
48
|
+
#elif defined(_AIX)
|
|
49
|
+
#include <sys/limits.h>
|
|
50
|
+
#else
|
|
51
|
+
#include <sys/syslimits.h>
|
|
52
|
+
#endif
|
|
53
|
+
#define LLAMA_MAX_URL_LENGTH 2084 // Maximum URL Length in Chrome: 2083
|
|
54
|
+
|
|
55
|
+
// isatty
|
|
56
|
+
#if defined(_WIN32)
|
|
57
|
+
#include <io.h>
|
|
58
|
+
#else
|
|
59
|
+
#include <unistd.h>
|
|
40
60
|
#endif
|
|
41
61
|
|
|
42
62
|
using json = nlohmann::ordered_json;
|
|
@@ -85,6 +105,14 @@ static void write_file(const std::string & fname, const std::string & content) {
|
|
|
85
105
|
}
|
|
86
106
|
}
|
|
87
107
|
|
|
108
|
+
static bool is_output_a_tty() {
|
|
109
|
+
#if defined(_WIN32)
|
|
110
|
+
return _isatty(_fileno(stdout));
|
|
111
|
+
#else
|
|
112
|
+
return isatty(1);
|
|
113
|
+
#endif
|
|
114
|
+
}
|
|
115
|
+
|
|
88
116
|
common_arg & common_arg::set_examples(std::initializer_list<enum llama_example> examples) {
|
|
89
117
|
this->examples = std::move(examples);
|
|
90
118
|
return *this;
|
|
@@ -202,24 +230,54 @@ struct common_hf_file_res {
|
|
|
202
230
|
std::string mmprojFile;
|
|
203
231
|
};
|
|
204
232
|
|
|
205
|
-
|
|
233
|
+
static void write_etag(const std::string & path, const std::string & etag) {
|
|
234
|
+
const std::string etag_path = path + ".etag";
|
|
235
|
+
write_file(etag_path, etag);
|
|
236
|
+
LOG_DBG("%s: file etag saved: %s\n", __func__, etag_path.c_str());
|
|
237
|
+
}
|
|
206
238
|
|
|
207
|
-
|
|
208
|
-
|
|
239
|
+
static std::string read_etag(const std::string & path) {
|
|
240
|
+
std::string none;
|
|
241
|
+
const std::string etag_path = path + ".etag";
|
|
242
|
+
|
|
243
|
+
if (std::filesystem::exists(etag_path)) {
|
|
244
|
+
std::ifstream etag_in(etag_path);
|
|
245
|
+
if (!etag_in) {
|
|
246
|
+
LOG_ERR("%s: could not open .etag file for reading: %s\n", __func__, etag_path.c_str());
|
|
247
|
+
return none;
|
|
248
|
+
}
|
|
249
|
+
std::string etag;
|
|
250
|
+
std::getline(etag_in, etag);
|
|
251
|
+
return etag;
|
|
252
|
+
}
|
|
253
|
+
|
|
254
|
+
// no etag file, but maybe there is an old .json
|
|
255
|
+
// remove this code later
|
|
256
|
+
const std::string metadata_path = path + ".json";
|
|
257
|
+
|
|
258
|
+
if (std::filesystem::exists(metadata_path)) {
|
|
259
|
+
std::ifstream metadata_in(metadata_path);
|
|
260
|
+
try {
|
|
261
|
+
nlohmann::json metadata_json;
|
|
262
|
+
metadata_in >> metadata_json;
|
|
263
|
+
LOG_DBG("%s: previous metadata file found %s: %s\n", __func__, metadata_path.c_str(),
|
|
264
|
+
metadata_json.dump().c_str());
|
|
265
|
+
if (metadata_json.contains("etag") && metadata_json.at("etag").is_string()) {
|
|
266
|
+
std::string etag = metadata_json.at("etag");
|
|
267
|
+
write_etag(path, etag);
|
|
268
|
+
if (!std::filesystem::remove(metadata_path)) {
|
|
269
|
+
LOG_WRN("%s: failed to delete old .json metadata file: %s\n", __func__, metadata_path.c_str());
|
|
270
|
+
}
|
|
271
|
+
return etag;
|
|
272
|
+
}
|
|
273
|
+
} catch (const nlohmann::json::exception & e) {
|
|
274
|
+
LOG_ERR("%s: error reading metadata file %s: %s\n", __func__, metadata_path.c_str(), e.what());
|
|
275
|
+
}
|
|
276
|
+
}
|
|
277
|
+
return none;
|
|
209
278
|
}
|
|
210
279
|
|
|
211
|
-
#ifdef
|
|
212
|
-
#include <linux/limits.h>
|
|
213
|
-
#elif defined(_WIN32)
|
|
214
|
-
# if !defined(PATH_MAX)
|
|
215
|
-
# define PATH_MAX MAX_PATH
|
|
216
|
-
# endif
|
|
217
|
-
#elif defined(_AIX)
|
|
218
|
-
#include <sys/limits.h>
|
|
219
|
-
#else
|
|
220
|
-
#include <sys/syslimits.h>
|
|
221
|
-
#endif
|
|
222
|
-
#define LLAMA_CURL_MAX_URL_LENGTH 2084 // Maximum URL Length in Chrome: 2083
|
|
280
|
+
#ifdef LLAMA_USE_CURL
|
|
223
281
|
|
|
224
282
|
//
|
|
225
283
|
// CURL utils
|
|
@@ -368,49 +426,19 @@ static bool common_download_head(CURL * curl,
|
|
|
368
426
|
}
|
|
369
427
|
|
|
370
428
|
// download one single file from remote URL to local path
|
|
371
|
-
static bool
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
bool offline) {
|
|
375
|
-
// If the file exists, check its JSON metadata companion file.
|
|
376
|
-
std::string metadata_path = path + ".json";
|
|
429
|
+
static bool common_download_file_single_online(const std::string & url,
|
|
430
|
+
const std::string & path,
|
|
431
|
+
const std::string & bearer_token) {
|
|
377
432
|
static const int max_attempts = 3;
|
|
378
433
|
static const int retry_delay_seconds = 2;
|
|
379
434
|
for (int i = 0; i < max_attempts; ++i) {
|
|
380
|
-
|
|
381
|
-
std::string etag;
|
|
382
|
-
std::string last_modified;
|
|
435
|
+
std::string etag;
|
|
383
436
|
|
|
384
437
|
// Check if the file already exists locally
|
|
385
438
|
const auto file_exists = std::filesystem::exists(path);
|
|
386
439
|
if (file_exists) {
|
|
387
|
-
|
|
388
|
-
LOG_INF("%s: using cached file (offline mode): %s\n", __func__, path.c_str());
|
|
389
|
-
return true; // skip verification/downloading
|
|
390
|
-
}
|
|
391
|
-
// Try and read the JSON metadata file (note: stream autoclosed upon exiting this block).
|
|
392
|
-
std::ifstream metadata_in(metadata_path);
|
|
393
|
-
if (metadata_in.good()) {
|
|
394
|
-
try {
|
|
395
|
-
metadata_in >> metadata;
|
|
396
|
-
LOG_DBG("%s: previous metadata file found %s: %s\n", __func__, metadata_path.c_str(),
|
|
397
|
-
metadata.dump().c_str());
|
|
398
|
-
if (metadata.contains("etag") && metadata.at("etag").is_string()) {
|
|
399
|
-
etag = metadata.at("etag");
|
|
400
|
-
}
|
|
401
|
-
if (metadata.contains("lastModified") && metadata.at("lastModified").is_string()) {
|
|
402
|
-
last_modified = metadata.at("lastModified");
|
|
403
|
-
}
|
|
404
|
-
} catch (const nlohmann::json::exception & e) {
|
|
405
|
-
LOG_ERR("%s: error reading metadata file %s: %s\n", __func__, metadata_path.c_str(), e.what());
|
|
406
|
-
}
|
|
407
|
-
}
|
|
408
|
-
// if we cannot open the metadata file, we assume that the downloaded file is not valid (etag and last-modified are left empty, so we will download it again)
|
|
440
|
+
etag = read_etag(path);
|
|
409
441
|
} else {
|
|
410
|
-
if (offline) {
|
|
411
|
-
LOG_ERR("%s: required file is not available in cache (offline mode): %s\n", __func__, path.c_str());
|
|
412
|
-
return false;
|
|
413
|
-
}
|
|
414
442
|
LOG_INF("%s: no previous model file found %s\n", __func__, path.c_str());
|
|
415
443
|
}
|
|
416
444
|
|
|
@@ -447,11 +475,6 @@ static bool common_download_file_single(const std::string & url,
|
|
|
447
475
|
headers.etag.c_str());
|
|
448
476
|
should_download = true;
|
|
449
477
|
should_download_from_scratch = true;
|
|
450
|
-
} else if (!last_modified.empty() && last_modified != headers.last_modified) {
|
|
451
|
-
LOG_WRN("%s: Last-Modified header is different (%s != %s): triggering a new download\n", __func__,
|
|
452
|
-
last_modified.c_str(), headers.last_modified.c_str());
|
|
453
|
-
should_download = true;
|
|
454
|
-
should_download_from_scratch = true;
|
|
455
478
|
}
|
|
456
479
|
}
|
|
457
480
|
|
|
@@ -482,15 +505,9 @@ static bool common_download_file_single(const std::string & url,
|
|
|
482
505
|
}
|
|
483
506
|
}
|
|
484
507
|
}
|
|
485
|
-
|
|
486
|
-
|
|
487
|
-
|
|
488
|
-
{ "url", url },
|
|
489
|
-
{ "etag", headers.etag },
|
|
490
|
-
{ "lastModified", headers.last_modified }
|
|
491
|
-
});
|
|
492
|
-
write_file(metadata_path, metadata.dump(4));
|
|
493
|
-
LOG_DBG("%s: file metadata saved: %s\n", __func__, metadata_path.c_str());
|
|
508
|
+
if (head_request_ok) {
|
|
509
|
+
write_etag(path, headers.etag);
|
|
510
|
+
}
|
|
494
511
|
|
|
495
512
|
// start the download
|
|
496
513
|
LOG_INF("%s: trying to download model from %s to %s (server_etag:%s, server_last_modified:%s)...\n",
|
|
@@ -530,6 +547,306 @@ static bool common_download_file_single(const std::string & url,
|
|
|
530
547
|
return true;
|
|
531
548
|
}
|
|
532
549
|
|
|
550
|
+
std::pair<long, std::vector<char>> common_remote_get_content(const std::string & url, const common_remote_params & params) {
|
|
551
|
+
curl_ptr curl(curl_easy_init(), &curl_easy_cleanup);
|
|
552
|
+
curl_slist_ptr http_headers;
|
|
553
|
+
std::vector<char> res_buffer;
|
|
554
|
+
|
|
555
|
+
curl_easy_setopt(curl.get(), CURLOPT_URL, url.c_str());
|
|
556
|
+
curl_easy_setopt(curl.get(), CURLOPT_NOPROGRESS, 1L);
|
|
557
|
+
curl_easy_setopt(curl.get(), CURLOPT_FOLLOWLOCATION, 1L);
|
|
558
|
+
curl_easy_setopt(curl.get(), CURLOPT_VERBOSE, 1L);
|
|
559
|
+
typedef size_t(*CURLOPT_WRITEFUNCTION_PTR)(void * ptr, size_t size, size_t nmemb, void * data);
|
|
560
|
+
auto write_callback = [](void * ptr, size_t size, size_t nmemb, void * data) -> size_t {
|
|
561
|
+
auto data_vec = static_cast<std::vector<char> *>(data);
|
|
562
|
+
data_vec->insert(data_vec->end(), (char *)ptr, (char *)ptr + size * nmemb);
|
|
563
|
+
return size * nmemb;
|
|
564
|
+
};
|
|
565
|
+
curl_easy_setopt(curl.get(), CURLOPT_WRITEFUNCTION, static_cast<CURLOPT_WRITEFUNCTION_PTR>(write_callback));
|
|
566
|
+
curl_easy_setopt(curl.get(), CURLOPT_WRITEDATA, &res_buffer);
|
|
567
|
+
#if defined(_WIN32)
|
|
568
|
+
curl_easy_setopt(curl.get(), CURLOPT_SSL_OPTIONS, CURLSSLOPT_NATIVE_CA);
|
|
569
|
+
#endif
|
|
570
|
+
if (params.timeout > 0) {
|
|
571
|
+
curl_easy_setopt(curl.get(), CURLOPT_TIMEOUT, params.timeout);
|
|
572
|
+
}
|
|
573
|
+
if (params.max_size > 0) {
|
|
574
|
+
curl_easy_setopt(curl.get(), CURLOPT_MAXFILESIZE, params.max_size);
|
|
575
|
+
}
|
|
576
|
+
http_headers.ptr = curl_slist_append(http_headers.ptr, "User-Agent: llama-cpp");
|
|
577
|
+
for (const auto & header : params.headers) {
|
|
578
|
+
http_headers.ptr = curl_slist_append(http_headers.ptr, header.c_str());
|
|
579
|
+
}
|
|
580
|
+
curl_easy_setopt(curl.get(), CURLOPT_HTTPHEADER, http_headers.ptr);
|
|
581
|
+
|
|
582
|
+
CURLcode res = curl_easy_perform(curl.get());
|
|
583
|
+
|
|
584
|
+
if (res != CURLE_OK) {
|
|
585
|
+
std::string error_msg = curl_easy_strerror(res);
|
|
586
|
+
throw std::runtime_error("error: cannot make GET request: " + error_msg);
|
|
587
|
+
}
|
|
588
|
+
|
|
589
|
+
long res_code;
|
|
590
|
+
curl_easy_getinfo(curl.get(), CURLINFO_RESPONSE_CODE, &res_code);
|
|
591
|
+
|
|
592
|
+
return { res_code, std::move(res_buffer) };
|
|
593
|
+
}
|
|
594
|
+
|
|
595
|
+
#else
|
|
596
|
+
|
|
597
|
+
static void print_progress(size_t current, size_t total) {
|
|
598
|
+
if (!is_output_a_tty()) {
|
|
599
|
+
return;
|
|
600
|
+
}
|
|
601
|
+
|
|
602
|
+
if (!total) {
|
|
603
|
+
return;
|
|
604
|
+
}
|
|
605
|
+
|
|
606
|
+
size_t width = 50;
|
|
607
|
+
size_t pct = (100 * current) / total;
|
|
608
|
+
size_t pos = (width * current) / total;
|
|
609
|
+
|
|
610
|
+
std::cout << "["
|
|
611
|
+
<< std::string(pos, '=')
|
|
612
|
+
<< (pos < width ? ">" : "")
|
|
613
|
+
<< std::string(width - pos, ' ')
|
|
614
|
+
<< "] " << std::setw(3) << pct << "% ("
|
|
615
|
+
<< current / (1024 * 1024) << " MB / "
|
|
616
|
+
<< total / (1024 * 1024) << " MB)\r";
|
|
617
|
+
std::cout.flush();
|
|
618
|
+
}
|
|
619
|
+
|
|
620
|
+
static bool common_pull_file(httplib::Client & cli,
|
|
621
|
+
const std::string & resolve_path,
|
|
622
|
+
const std::string & path_tmp,
|
|
623
|
+
bool supports_ranges,
|
|
624
|
+
size_t existing_size,
|
|
625
|
+
size_t & total_size) {
|
|
626
|
+
std::ofstream ofs(path_tmp, std::ios::binary | std::ios::app);
|
|
627
|
+
if (!ofs.is_open()) {
|
|
628
|
+
LOG_ERR("%s: error opening local file for writing: %s\n", __func__, path_tmp.c_str());
|
|
629
|
+
return false;
|
|
630
|
+
}
|
|
631
|
+
|
|
632
|
+
httplib::Headers headers;
|
|
633
|
+
if (supports_ranges && existing_size > 0) {
|
|
634
|
+
headers.emplace("Range", "bytes=" + std::to_string(existing_size) + "-");
|
|
635
|
+
}
|
|
636
|
+
|
|
637
|
+
std::atomic<size_t> downloaded{existing_size};
|
|
638
|
+
|
|
639
|
+
auto res = cli.Get(resolve_path, headers,
|
|
640
|
+
[&](const httplib::Response &response) {
|
|
641
|
+
if (existing_size > 0 && response.status != 206) {
|
|
642
|
+
LOG_WRN("%s: server did not respond with 206 Partial Content for a resume request. Status: %d\n", __func__, response.status);
|
|
643
|
+
return false;
|
|
644
|
+
}
|
|
645
|
+
if (existing_size == 0 && response.status != 200) {
|
|
646
|
+
LOG_WRN("%s: download received non-successful status code: %d\n", __func__, response.status);
|
|
647
|
+
return false;
|
|
648
|
+
}
|
|
649
|
+
if (total_size == 0 && response.has_header("Content-Length")) {
|
|
650
|
+
try {
|
|
651
|
+
size_t content_length = std::stoull(response.get_header_value("Content-Length"));
|
|
652
|
+
total_size = existing_size + content_length;
|
|
653
|
+
} catch (const std::exception &e) {
|
|
654
|
+
LOG_WRN("%s: invalid Content-Length header: %s\n", __func__, e.what());
|
|
655
|
+
}
|
|
656
|
+
}
|
|
657
|
+
return true;
|
|
658
|
+
},
|
|
659
|
+
[&](const char *data, size_t len) {
|
|
660
|
+
ofs.write(data, len);
|
|
661
|
+
if (!ofs) {
|
|
662
|
+
LOG_ERR("%s: error writing to file: %s\n", __func__, path_tmp.c_str());
|
|
663
|
+
return false;
|
|
664
|
+
}
|
|
665
|
+
downloaded += len;
|
|
666
|
+
print_progress(downloaded, total_size);
|
|
667
|
+
return true;
|
|
668
|
+
},
|
|
669
|
+
nullptr
|
|
670
|
+
);
|
|
671
|
+
|
|
672
|
+
std::cout << "\n";
|
|
673
|
+
|
|
674
|
+
if (!res) {
|
|
675
|
+
LOG_ERR("%s: error during download. Status: %d\n", __func__, res ? res->status : -1);
|
|
676
|
+
return false;
|
|
677
|
+
}
|
|
678
|
+
|
|
679
|
+
return true;
|
|
680
|
+
}
|
|
681
|
+
|
|
682
|
+
// download one single file from remote URL to local path
|
|
683
|
+
static bool common_download_file_single_online(const std::string & url,
|
|
684
|
+
const std::string & path,
|
|
685
|
+
const std::string & bearer_token) {
|
|
686
|
+
static const int max_attempts = 3;
|
|
687
|
+
static const int retry_delay_seconds = 2;
|
|
688
|
+
|
|
689
|
+
auto [cli, parts] = common_http_client(url);
|
|
690
|
+
|
|
691
|
+
httplib::Headers default_headers = {{"User-Agent", "llama-cpp"}};
|
|
692
|
+
if (!bearer_token.empty()) {
|
|
693
|
+
default_headers.insert({"Authorization", "Bearer " + bearer_token});
|
|
694
|
+
}
|
|
695
|
+
cli.set_default_headers(default_headers);
|
|
696
|
+
|
|
697
|
+
const bool file_exists = std::filesystem::exists(path);
|
|
698
|
+
|
|
699
|
+
std::string last_etag;
|
|
700
|
+
if (file_exists) {
|
|
701
|
+
last_etag = read_etag(path);
|
|
702
|
+
} else {
|
|
703
|
+
LOG_INF("%s: no previous model file found %s\n", __func__, path.c_str());
|
|
704
|
+
}
|
|
705
|
+
|
|
706
|
+
for (int i = 0; i < max_attempts; ++i) {
|
|
707
|
+
auto head = cli.Head(parts.path);
|
|
708
|
+
bool head_ok = head && head->status >= 200 && head->status < 300;
|
|
709
|
+
if (!head_ok) {
|
|
710
|
+
LOG_WRN("%s: HEAD invalid http status code received: %d\n", __func__, head ? head->status : -1);
|
|
711
|
+
if (file_exists) {
|
|
712
|
+
LOG_INF("%s: Using cached file (HEAD failed): %s\n", __func__, path.c_str());
|
|
713
|
+
return true;
|
|
714
|
+
}
|
|
715
|
+
}
|
|
716
|
+
|
|
717
|
+
std::string etag;
|
|
718
|
+
if (head_ok && head->has_header("ETag")) {
|
|
719
|
+
etag = head->get_header_value("ETag");
|
|
720
|
+
}
|
|
721
|
+
|
|
722
|
+
size_t total_size = 0;
|
|
723
|
+
if (head_ok && head->has_header("Content-Length")) {
|
|
724
|
+
try {
|
|
725
|
+
total_size = std::stoull(head->get_header_value("Content-Length"));
|
|
726
|
+
} catch (const std::exception& e) {
|
|
727
|
+
LOG_WRN("%s: Invalid Content-Length in HEAD response: %s\n", __func__, e.what());
|
|
728
|
+
}
|
|
729
|
+
}
|
|
730
|
+
|
|
731
|
+
bool supports_ranges = false;
|
|
732
|
+
if (head_ok && head->has_header("Accept-Ranges")) {
|
|
733
|
+
supports_ranges = head->get_header_value("Accept-Ranges") != "none";
|
|
734
|
+
}
|
|
735
|
+
|
|
736
|
+
bool should_download_from_scratch = false;
|
|
737
|
+
if (!last_etag.empty() && !etag.empty() && last_etag != etag) {
|
|
738
|
+
LOG_WRN("%s: ETag header is different (%s != %s): triggering a new download\n", __func__,
|
|
739
|
+
last_etag.c_str(), etag.c_str());
|
|
740
|
+
should_download_from_scratch = true;
|
|
741
|
+
}
|
|
742
|
+
|
|
743
|
+
if (file_exists) {
|
|
744
|
+
if (!should_download_from_scratch) {
|
|
745
|
+
LOG_INF("%s: using cached file: %s\n", __func__, path.c_str());
|
|
746
|
+
return true;
|
|
747
|
+
}
|
|
748
|
+
LOG_WRN("%s: deleting previous downloaded file: %s\n", __func__, path.c_str());
|
|
749
|
+
if (remove(path.c_str()) != 0) {
|
|
750
|
+
LOG_ERR("%s: unable to delete file: %s\n", __func__, path.c_str());
|
|
751
|
+
return false;
|
|
752
|
+
}
|
|
753
|
+
}
|
|
754
|
+
|
|
755
|
+
const std::string path_temporary = path + ".downloadInProgress";
|
|
756
|
+
size_t existing_size = 0;
|
|
757
|
+
|
|
758
|
+
if (std::filesystem::exists(path_temporary)) {
|
|
759
|
+
if (supports_ranges && !should_download_from_scratch) {
|
|
760
|
+
existing_size = std::filesystem::file_size(path_temporary);
|
|
761
|
+
} else if (remove(path_temporary.c_str()) != 0) {
|
|
762
|
+
LOG_ERR("%s: unable to delete file: %s\n", __func__, path_temporary.c_str());
|
|
763
|
+
return false;
|
|
764
|
+
}
|
|
765
|
+
}
|
|
766
|
+
|
|
767
|
+
// start the download
|
|
768
|
+
LOG_INF("%s: trying to download model from %s to %s (etag:%s)...\n",
|
|
769
|
+
__func__, common_http_show_masked_url(parts).c_str(), path_temporary.c_str(), etag.c_str());
|
|
770
|
+
const bool was_pull_successful = common_pull_file(cli, parts.path, path_temporary, supports_ranges, existing_size, total_size);
|
|
771
|
+
if (!was_pull_successful) {
|
|
772
|
+
if (i + 1 < max_attempts) {
|
|
773
|
+
const int exponential_backoff_delay = std::pow(retry_delay_seconds, i) * 1000;
|
|
774
|
+
LOG_WRN("%s: retrying after %d milliseconds...\n", __func__, exponential_backoff_delay);
|
|
775
|
+
std::this_thread::sleep_for(std::chrono::milliseconds(exponential_backoff_delay));
|
|
776
|
+
} else {
|
|
777
|
+
LOG_ERR("%s: download failed after %d attempts\n", __func__, max_attempts);
|
|
778
|
+
}
|
|
779
|
+
continue;
|
|
780
|
+
}
|
|
781
|
+
|
|
782
|
+
if (std::rename(path_temporary.c_str(), path.c_str()) != 0) {
|
|
783
|
+
LOG_ERR("%s: unable to rename file: %s to %s\n", __func__, path_temporary.c_str(), path.c_str());
|
|
784
|
+
return false;
|
|
785
|
+
}
|
|
786
|
+
if (!etag.empty()) {
|
|
787
|
+
write_etag(path, etag);
|
|
788
|
+
}
|
|
789
|
+
break;
|
|
790
|
+
}
|
|
791
|
+
|
|
792
|
+
return true;
|
|
793
|
+
}
|
|
794
|
+
|
|
795
|
+
std::pair<long, std::vector<char>> common_remote_get_content(const std::string & url,
|
|
796
|
+
const common_remote_params & params) {
|
|
797
|
+
auto [cli, parts] = common_http_client(url);
|
|
798
|
+
|
|
799
|
+
httplib::Headers headers = {{"User-Agent", "llama-cpp"}};
|
|
800
|
+
for (const auto & header : params.headers) {
|
|
801
|
+
size_t pos = header.find(':');
|
|
802
|
+
if (pos != std::string::npos) {
|
|
803
|
+
headers.emplace(header.substr(0, pos), header.substr(pos + 1));
|
|
804
|
+
} else {
|
|
805
|
+
headers.emplace(header, "");
|
|
806
|
+
}
|
|
807
|
+
}
|
|
808
|
+
|
|
809
|
+
if (params.timeout > 0) {
|
|
810
|
+
cli.set_read_timeout(params.timeout, 0);
|
|
811
|
+
cli.set_write_timeout(params.timeout, 0);
|
|
812
|
+
}
|
|
813
|
+
|
|
814
|
+
std::vector<char> buf;
|
|
815
|
+
auto res = cli.Get(parts.path, headers,
|
|
816
|
+
[&](const char *data, size_t len) {
|
|
817
|
+
buf.insert(buf.end(), data, data + len);
|
|
818
|
+
return params.max_size == 0 ||
|
|
819
|
+
buf.size() <= static_cast<size_t>(params.max_size);
|
|
820
|
+
},
|
|
821
|
+
nullptr
|
|
822
|
+
);
|
|
823
|
+
|
|
824
|
+
if (!res) {
|
|
825
|
+
throw std::runtime_error("error: cannot make GET request");
|
|
826
|
+
}
|
|
827
|
+
|
|
828
|
+
return { res->status, std::move(buf) };
|
|
829
|
+
}
|
|
830
|
+
|
|
831
|
+
#endif // LLAMA_USE_CURL
|
|
832
|
+
|
|
833
|
+
static bool common_download_file_single(const std::string & url,
|
|
834
|
+
const std::string & path,
|
|
835
|
+
const std::string & bearer_token,
|
|
836
|
+
bool offline) {
|
|
837
|
+
if (!offline) {
|
|
838
|
+
return common_download_file_single_online(url, path, bearer_token);
|
|
839
|
+
}
|
|
840
|
+
|
|
841
|
+
if (!std::filesystem::exists(path)) {
|
|
842
|
+
LOG_ERR("%s: required file is not available in cache (offline mode): %s\n", __func__, path.c_str());
|
|
843
|
+
return false;
|
|
844
|
+
}
|
|
845
|
+
|
|
846
|
+
LOG_INF("%s: using cached file (offline mode): %s\n", __func__, path.c_str());
|
|
847
|
+
return true;
|
|
848
|
+
}
|
|
849
|
+
|
|
533
850
|
// download multiple files from remote URLs to local paths
|
|
534
851
|
// the input is a vector of pairs <url, path>
|
|
535
852
|
static bool common_download_file_multiple(const std::vector<std::pair<std::string, std::string>> & urls, const std::string & bearer_token, bool offline) {
|
|
@@ -588,7 +905,7 @@ static bool common_download_model(
|
|
|
588
905
|
|
|
589
906
|
if (n_split > 1) {
|
|
590
907
|
char split_prefix[PATH_MAX] = {0};
|
|
591
|
-
char split_url_prefix[
|
|
908
|
+
char split_url_prefix[LLAMA_MAX_URL_LENGTH] = {0};
|
|
592
909
|
|
|
593
910
|
// Verify the first split file format
|
|
594
911
|
// and extract split URL and PATH prefixes
|
|
@@ -609,7 +926,7 @@ static bool common_download_model(
|
|
|
609
926
|
char split_path[PATH_MAX] = {0};
|
|
610
927
|
llama_split_path(split_path, sizeof(split_path), split_prefix, idx, n_split);
|
|
611
928
|
|
|
612
|
-
char split_url[
|
|
929
|
+
char split_url[LLAMA_MAX_URL_LENGTH] = {0};
|
|
613
930
|
llama_split_path(split_url, sizeof(split_url), split_url_prefix, idx, n_split);
|
|
614
931
|
|
|
615
932
|
if (std::string(split_path) == model.path) {
|
|
@@ -626,50 +943,6 @@ static bool common_download_model(
|
|
|
626
943
|
return true;
|
|
627
944
|
}
|
|
628
945
|
|
|
629
|
-
std::pair<long, std::vector<char>> common_remote_get_content(const std::string & url, const common_remote_params & params) {
|
|
630
|
-
curl_ptr curl(curl_easy_init(), &curl_easy_cleanup);
|
|
631
|
-
curl_slist_ptr http_headers;
|
|
632
|
-
std::vector<char> res_buffer;
|
|
633
|
-
|
|
634
|
-
curl_easy_setopt(curl.get(), CURLOPT_URL, url.c_str());
|
|
635
|
-
curl_easy_setopt(curl.get(), CURLOPT_NOPROGRESS, 1L);
|
|
636
|
-
curl_easy_setopt(curl.get(), CURLOPT_FOLLOWLOCATION, 1L);
|
|
637
|
-
typedef size_t(*CURLOPT_WRITEFUNCTION_PTR)(void * ptr, size_t size, size_t nmemb, void * data);
|
|
638
|
-
auto write_callback = [](void * ptr, size_t size, size_t nmemb, void * data) -> size_t {
|
|
639
|
-
auto data_vec = static_cast<std::vector<char> *>(data);
|
|
640
|
-
data_vec->insert(data_vec->end(), (char *)ptr, (char *)ptr + size * nmemb);
|
|
641
|
-
return size * nmemb;
|
|
642
|
-
};
|
|
643
|
-
curl_easy_setopt(curl.get(), CURLOPT_WRITEFUNCTION, static_cast<CURLOPT_WRITEFUNCTION_PTR>(write_callback));
|
|
644
|
-
curl_easy_setopt(curl.get(), CURLOPT_WRITEDATA, &res_buffer);
|
|
645
|
-
#if defined(_WIN32)
|
|
646
|
-
curl_easy_setopt(curl.get(), CURLOPT_SSL_OPTIONS, CURLSSLOPT_NATIVE_CA);
|
|
647
|
-
#endif
|
|
648
|
-
if (params.timeout > 0) {
|
|
649
|
-
curl_easy_setopt(curl.get(), CURLOPT_TIMEOUT, params.timeout);
|
|
650
|
-
}
|
|
651
|
-
if (params.max_size > 0) {
|
|
652
|
-
curl_easy_setopt(curl.get(), CURLOPT_MAXFILESIZE, params.max_size);
|
|
653
|
-
}
|
|
654
|
-
http_headers.ptr = curl_slist_append(http_headers.ptr, "User-Agent: llama-cpp");
|
|
655
|
-
for (const auto & header : params.headers) {
|
|
656
|
-
http_headers.ptr = curl_slist_append(http_headers.ptr, header.c_str());
|
|
657
|
-
}
|
|
658
|
-
curl_easy_setopt(curl.get(), CURLOPT_HTTPHEADER, http_headers.ptr);
|
|
659
|
-
|
|
660
|
-
CURLcode res = curl_easy_perform(curl.get());
|
|
661
|
-
|
|
662
|
-
if (res != CURLE_OK) {
|
|
663
|
-
std::string error_msg = curl_easy_strerror(res);
|
|
664
|
-
throw std::runtime_error("error: cannot make GET request: " + error_msg);
|
|
665
|
-
}
|
|
666
|
-
|
|
667
|
-
long res_code;
|
|
668
|
-
curl_easy_getinfo(curl.get(), CURLINFO_RESPONSE_CODE, &res_code);
|
|
669
|
-
|
|
670
|
-
return { res_code, std::move(res_buffer) };
|
|
671
|
-
}
|
|
672
|
-
|
|
673
946
|
/**
|
|
674
947
|
* Allow getting the HF file from the HF repo with tag (like ollama), for example:
|
|
675
948
|
* - bartowski/Llama-3.2-3B-Instruct-GGUF:q4
|
|
@@ -736,21 +1009,17 @@ static struct common_hf_file_res common_get_hf_file(const std::string & hf_repo_
|
|
|
736
1009
|
std::string mmprojFile;
|
|
737
1010
|
|
|
738
1011
|
if (res_code == 200 || res_code == 304) {
|
|
739
|
-
|
|
740
|
-
|
|
741
|
-
|
|
742
|
-
|
|
743
|
-
|
|
744
|
-
ggufFile = match[1].str();
|
|
1012
|
+
try {
|
|
1013
|
+
auto j = json::parse(res_str);
|
|
1014
|
+
|
|
1015
|
+
if (j.contains("ggufFile") && j["ggufFile"].contains("rfilename")) {
|
|
1016
|
+
ggufFile = j["ggufFile"]["rfilename"].get<std::string>();
|
|
745
1017
|
}
|
|
746
|
-
|
|
747
|
-
|
|
748
|
-
{
|
|
749
|
-
std::regex pattern("\"mmprojFile\"[\\s\\S]*?\"rfilename\"\\s*:\\s*\"([^\"]+)\"");
|
|
750
|
-
std::smatch match;
|
|
751
|
-
if (std::regex_search(res_str, match, pattern)) {
|
|
752
|
-
mmprojFile = match[1].str();
|
|
1018
|
+
if (j.contains("mmprojFile") && j["mmprojFile"].contains("rfilename")) {
|
|
1019
|
+
mmprojFile = j["mmprojFile"]["rfilename"].get<std::string>();
|
|
753
1020
|
}
|
|
1021
|
+
} catch (const std::exception & e) {
|
|
1022
|
+
throw std::runtime_error(std::string("error parsing manifest JSON: ") + e.what());
|
|
754
1023
|
}
|
|
755
1024
|
if (!use_cache) {
|
|
756
1025
|
// if not using cached response, update the cache file
|
|
@@ -770,45 +1039,6 @@ static struct common_hf_file_res common_get_hf_file(const std::string & hf_repo_
|
|
|
770
1039
|
return { hf_repo, ggufFile, mmprojFile };
|
|
771
1040
|
}
|
|
772
1041
|
|
|
773
|
-
#else
|
|
774
|
-
|
|
775
|
-
bool common_has_curl() {
|
|
776
|
-
return false;
|
|
777
|
-
}
|
|
778
|
-
|
|
779
|
-
static bool common_download_file_single(const std::string &, const std::string &, const std::string &, bool) {
|
|
780
|
-
LOG_ERR("error: built without CURL, cannot download model from internet\n");
|
|
781
|
-
return false;
|
|
782
|
-
}
|
|
783
|
-
|
|
784
|
-
static bool common_download_file_multiple(const std::vector<std::pair<std::string, std::string>> &, const std::string &, bool) {
|
|
785
|
-
LOG_ERR("error: built without CURL, cannot download model from the internet\n");
|
|
786
|
-
return false;
|
|
787
|
-
}
|
|
788
|
-
|
|
789
|
-
static bool common_download_model(
|
|
790
|
-
const common_params_model &,
|
|
791
|
-
const std::string &,
|
|
792
|
-
bool) {
|
|
793
|
-
LOG_ERR("error: built without CURL, cannot download model from the internet\n");
|
|
794
|
-
return false;
|
|
795
|
-
}
|
|
796
|
-
|
|
797
|
-
static struct common_hf_file_res common_get_hf_file(const std::string &, const std::string &, bool) {
|
|
798
|
-
LOG_ERR("error: built without CURL, cannot download model from the internet\n");
|
|
799
|
-
return {};
|
|
800
|
-
}
|
|
801
|
-
|
|
802
|
-
std::pair<long, std::vector<char>> common_remote_get_content(const std::string & url, const common_remote_params &) {
|
|
803
|
-
if (!url.empty()) {
|
|
804
|
-
throw std::runtime_error("error: built without CURL, cannot download model from the internet");
|
|
805
|
-
}
|
|
806
|
-
|
|
807
|
-
return {};
|
|
808
|
-
}
|
|
809
|
-
|
|
810
|
-
#endif // LLAMA_USE_CURL
|
|
811
|
-
|
|
812
1042
|
//
|
|
813
1043
|
// Docker registry functions
|
|
814
1044
|
//
|
|
@@ -1068,8 +1298,6 @@ static std::string get_all_kv_cache_types() {
|
|
|
1068
1298
|
//
|
|
1069
1299
|
|
|
1070
1300
|
static bool common_params_parse_ex(int argc, char ** argv, common_params_context & ctx_arg) {
|
|
1071
|
-
std::string arg;
|
|
1072
|
-
const std::string arg_prefix = "--";
|
|
1073
1301
|
common_params & params = ctx_arg.params;
|
|
1074
1302
|
|
|
1075
1303
|
std::unordered_map<std::string, common_arg *> arg_to_options;
|
|
@@ -1387,18 +1615,14 @@ static void add_rpc_devices(const std::string & servers) {
|
|
|
1387
1615
|
if (!rpc_reg) {
|
|
1388
1616
|
throw std::invalid_argument("failed to find RPC backend");
|
|
1389
1617
|
}
|
|
1390
|
-
typedef
|
|
1391
|
-
|
|
1392
|
-
if (!
|
|
1393
|
-
throw std::invalid_argument("failed to find RPC
|
|
1618
|
+
typedef ggml_backend_reg_t (*ggml_backend_rpc_add_server_t)(const char * endpoint);
|
|
1619
|
+
ggml_backend_rpc_add_server_t ggml_backend_rpc_add_server_fn = (ggml_backend_rpc_add_server_t) ggml_backend_reg_get_proc_address(rpc_reg, "ggml_backend_rpc_add_server");
|
|
1620
|
+
if (!ggml_backend_rpc_add_server_fn) {
|
|
1621
|
+
throw std::invalid_argument("failed to find RPC add server function");
|
|
1394
1622
|
}
|
|
1395
1623
|
for (const auto & server : rpc_servers) {
|
|
1396
|
-
|
|
1397
|
-
|
|
1398
|
-
ggml_backend_device_register(dev);
|
|
1399
|
-
} else {
|
|
1400
|
-
throw std::invalid_argument("failed to register RPC device");
|
|
1401
|
-
}
|
|
1624
|
+
auto reg = ggml_backend_rpc_add_server_fn(server.c_str());
|
|
1625
|
+
ggml_backend_register(reg);
|
|
1402
1626
|
}
|
|
1403
1627
|
}
|
|
1404
1628
|
|
|
@@ -1704,13 +1928,21 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
|
|
|
1704
1928
|
}
|
|
1705
1929
|
).set_env("LLAMA_ARG_SWA_FULL"));
|
|
1706
1930
|
add_opt(common_arg(
|
|
1707
|
-
{"--swa-checkpoints"}, "N",
|
|
1708
|
-
string_format("max number of
|
|
1709
|
-
"[(more info)](https://github.com/ggml-org/llama.cpp/pull/15293)", params.
|
|
1931
|
+
{"--ctx-checkpoints", "--swa-checkpoints"}, "N",
|
|
1932
|
+
string_format("max number of context checkpoints to create per slot (default: %d)\n"
|
|
1933
|
+
"[(more info)](https://github.com/ggml-org/llama.cpp/pull/15293)", params.n_ctx_checkpoints),
|
|
1710
1934
|
[](common_params & params, int value) {
|
|
1711
|
-
params.
|
|
1935
|
+
params.n_ctx_checkpoints = value;
|
|
1712
1936
|
}
|
|
1713
|
-
).set_env("
|
|
1937
|
+
).set_env("LLAMA_ARG_CTX_CHECKPOINTS").set_examples({LLAMA_EXAMPLE_SERVER}));
|
|
1938
|
+
add_opt(common_arg(
|
|
1939
|
+
{"--cache-ram", "-cram"}, "N",
|
|
1940
|
+
string_format("set the maximum cache size in MiB (default: %d, -1 - no limit, 0 - disable)\n"
|
|
1941
|
+
"[(more info)](https://github.com/ggml-org/llama.cpp/pull/16391)", params.cache_ram_mib),
|
|
1942
|
+
[](common_params & params, int value) {
|
|
1943
|
+
params.cache_ram_mib = value;
|
|
1944
|
+
}
|
|
1945
|
+
).set_env("LLAMA_ARG_CACHE_RAM").set_examples({LLAMA_EXAMPLE_SERVER}));
|
|
1714
1946
|
add_opt(common_arg(
|
|
1715
1947
|
{"--kv-unified", "-kvu"},
|
|
1716
1948
|
string_format("use single unified KV buffer for the KV cache of all sequences (default: %s)\n"
|
|
@@ -2360,6 +2592,13 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
|
|
|
2360
2592
|
params.no_extra_bufts = true;
|
|
2361
2593
|
}
|
|
2362
2594
|
).set_env("LLAMA_ARG_NO_REPACK"));
|
|
2595
|
+
add_opt(common_arg(
|
|
2596
|
+
{"--no-host"},
|
|
2597
|
+
"bypass host buffer allowing extra buffers to be used",
|
|
2598
|
+
[](common_params & params) {
|
|
2599
|
+
params.no_host = true;
|
|
2600
|
+
}
|
|
2601
|
+
).set_env("LLAMA_ARG_NO_HOST"));
|
|
2363
2602
|
add_opt(common_arg(
|
|
2364
2603
|
{"-ctk", "--cache-type-k"}, "TYPE",
|
|
2365
2604
|
string_format(
|
|
@@ -3201,7 +3440,8 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
|
|
|
3201
3440
|
{"--reasoning-format"}, "FORMAT",
|
|
3202
3441
|
"controls whether thought tags are allowed and/or extracted from the response, and in which format they're returned; one of:\n"
|
|
3203
3442
|
"- none: leaves thoughts unparsed in `message.content`\n"
|
|
3204
|
-
"- deepseek: puts thoughts in `message.reasoning_content
|
|
3443
|
+
"- deepseek: puts thoughts in `message.reasoning_content`\n"
|
|
3444
|
+
"- deepseek-legacy: keeps `<think>` tags in `message.content` while also populating `message.reasoning_content`\n"
|
|
3205
3445
|
"(default: auto)",
|
|
3206
3446
|
[](common_params & params, const std::string & value) {
|
|
3207
3447
|
params.reasoning_format = common_reasoning_format_from_name(value);
|
|
@@ -3628,7 +3868,6 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
|
|
|
3628
3868
|
[](common_params & params) {
|
|
3629
3869
|
params.model.hf_repo = "ggml-org/bge-small-en-v1.5-Q8_0-GGUF";
|
|
3630
3870
|
params.model.hf_file = "bge-small-en-v1.5-q8_0.gguf";
|
|
3631
|
-
params.pooling_type = LLAMA_POOLING_TYPE_NONE;
|
|
3632
3871
|
params.embd_normalize = 2;
|
|
3633
3872
|
params.n_ctx = 512;
|
|
3634
3873
|
params.verbose_prompt = true;
|
|
@@ -3642,7 +3881,6 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
|
|
|
3642
3881
|
[](common_params & params) {
|
|
3643
3882
|
params.model.hf_repo = "ggml-org/e5-small-v2-Q8_0-GGUF";
|
|
3644
3883
|
params.model.hf_file = "e5-small-v2-q8_0.gguf";
|
|
3645
|
-
params.pooling_type = LLAMA_POOLING_TYPE_NONE;
|
|
3646
3884
|
params.embd_normalize = 2;
|
|
3647
3885
|
params.n_ctx = 512;
|
|
3648
3886
|
params.verbose_prompt = true;
|
|
@@ -3656,7 +3894,6 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
|
|
|
3656
3894
|
[](common_params & params) {
|
|
3657
3895
|
params.model.hf_repo = "ggml-org/gte-small-Q8_0-GGUF";
|
|
3658
3896
|
params.model.hf_file = "gte-small-q8_0.gguf";
|
|
3659
|
-
params.pooling_type = LLAMA_POOLING_TYPE_NONE;
|
|
3660
3897
|
params.embd_normalize = 2;
|
|
3661
3898
|
params.n_ctx = 512;
|
|
3662
3899
|
params.verbose_prompt = true;
|