@fugood/llama.node 1.2.3 → 1.2.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +14 -14
- package/scripts/llama.cpp.patch +33 -11
- package/src/llama.cpp/CMakeLists.txt +1 -0
- package/src/llama.cpp/common/CMakeLists.txt +46 -2
- package/src/llama.cpp/common/arg.cpp +322 -70
- package/src/llama.cpp/common/arg.h +0 -1
- package/src/llama.cpp/common/chat-parser.cpp +154 -13
- package/src/llama.cpp/common/chat-parser.h +3 -0
- package/src/llama.cpp/common/chat.cpp +217 -6
- package/src/llama.cpp/common/chat.h +5 -3
- package/src/llama.cpp/common/common.cpp +22 -6
- package/src/llama.cpp/common/common.h +6 -4
- package/src/llama.cpp/common/http.h +73 -0
- package/src/llama.cpp/ggml/CMakeLists.txt +7 -6
- package/src/llama.cpp/ggml/include/ggml-backend.h +2 -0
- package/src/llama.cpp/ggml/include/ggml-rpc.h +8 -9
- package/src/llama.cpp/ggml/include/ggml.h +22 -0
- package/src/llama.cpp/ggml/src/CMakeLists.txt +3 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +12 -2
- package/src/llama.cpp/ggml/src/ggml-cpu/amx/amx.cpp +1 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/arch/loongarch/quants.c +12 -12
- package/src/llama.cpp/ggml/src/ggml-cpu/arch/s390/quants.c +100 -3
- package/src/llama.cpp/ggml/src/ggml-cpu/arch-fallback.h +0 -1
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +1 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +10 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.cpp +209 -96
- package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.h +32 -44
- package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +107 -83
- package/src/llama.cpp/ggml/src/ggml-cpu/ops.cpp +17 -17
- package/src/llama.cpp/ggml/src/ggml-cpu/simd-mappings.h +8 -8
- package/src/llama.cpp/ggml/src/ggml-cpu/spacemit/ime.cpp +1024 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/spacemit/ime.h +13 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/spacemit/ime1_kernels.cpp +3196 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/spacemit/ime_kernels.h +26 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/unary-ops.cpp +103 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/unary-ops.h +1 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/vec.cpp +66 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/vec.h +6 -5
- package/src/llama.cpp/include/llama.h +8 -0
- package/src/llama.cpp/src/llama-arch.cpp +93 -0
- package/src/llama.cpp/src/llama-arch.h +22 -0
- package/src/llama.cpp/src/llama-chat.cpp +1 -1
- package/src/llama.cpp/src/llama-context.cpp +6 -0
- package/src/llama.cpp/src/llama-graph.cpp +57 -22
- package/src/llama.cpp/src/llama-graph.h +10 -1
- package/src/llama.cpp/src/llama-hparams.h +17 -2
- package/src/llama.cpp/src/llama-kv-cache-iswa.cpp +2 -2
- package/src/llama.cpp/src/llama-kv-cache.cpp +2 -5
- package/src/llama.cpp/src/llama-memory-hybrid.cpp +11 -9
- package/src/llama.cpp/src/llama-memory-recurrent.cpp +11 -3
- package/src/llama.cpp/src/llama-model-loader.cpp +2 -0
- package/src/llama.cpp/src/llama-model.cpp +568 -41
- package/src/llama.cpp/src/llama-model.h +18 -0
- package/src/llama.cpp/src/llama-sampling.cpp +5 -0
- package/src/llama.cpp/src/llama-vocab.cpp +7 -1
- package/src/llama.cpp/src/llama-vocab.h +41 -40
- package/src/llama.cpp/src/unicode.h +43 -0
|
@@ -32,11 +32,11 @@
|
|
|
32
32
|
#include <thread>
|
|
33
33
|
#include <vector>
|
|
34
34
|
|
|
35
|
-
//#define LLAMA_USE_CURL
|
|
36
|
-
|
|
37
35
|
#if defined(LLAMA_USE_CURL)
|
|
38
36
|
#include <curl/curl.h>
|
|
39
37
|
#include <curl/easy.h>
|
|
38
|
+
#else
|
|
39
|
+
#include "http.h"
|
|
40
40
|
#endif
|
|
41
41
|
|
|
42
42
|
#ifdef __linux__
|
|
@@ -52,6 +52,13 @@
|
|
|
52
52
|
#endif
|
|
53
53
|
#define LLAMA_MAX_URL_LENGTH 2084 // Maximum URL Length in Chrome: 2083
|
|
54
54
|
|
|
55
|
+
// isatty
|
|
56
|
+
#if defined(_WIN32)
|
|
57
|
+
#include <io.h>
|
|
58
|
+
#else
|
|
59
|
+
#include <unistd.h>
|
|
60
|
+
#endif
|
|
61
|
+
|
|
55
62
|
using json = nlohmann::ordered_json;
|
|
56
63
|
|
|
57
64
|
std::initializer_list<enum llama_example> mmproj_examples = {
|
|
@@ -98,6 +105,14 @@ static void write_file(const std::string & fname, const std::string & content) {
|
|
|
98
105
|
}
|
|
99
106
|
}
|
|
100
107
|
|
|
108
|
+
static bool is_output_a_tty() {
|
|
109
|
+
#if defined(_WIN32)
|
|
110
|
+
return _isatty(_fileno(stdout));
|
|
111
|
+
#else
|
|
112
|
+
return isatty(1);
|
|
113
|
+
#endif
|
|
114
|
+
}
|
|
115
|
+
|
|
101
116
|
common_arg & common_arg::set_examples(std::initializer_list<enum llama_example> examples) {
|
|
102
117
|
this->examples = std::move(examples);
|
|
103
118
|
return *this;
|
|
@@ -215,12 +230,55 @@ struct common_hf_file_res {
|
|
|
215
230
|
std::string mmprojFile;
|
|
216
231
|
};
|
|
217
232
|
|
|
218
|
-
|
|
233
|
+
static void write_etag(const std::string & path, const std::string & etag) {
|
|
234
|
+
const std::string etag_path = path + ".etag";
|
|
235
|
+
write_file(etag_path, etag);
|
|
236
|
+
LOG_DBG("%s: file etag saved: %s\n", __func__, etag_path.c_str());
|
|
237
|
+
}
|
|
219
238
|
|
|
220
|
-
|
|
221
|
-
|
|
239
|
+
static std::string read_etag(const std::string & path) {
|
|
240
|
+
std::string none;
|
|
241
|
+
const std::string etag_path = path + ".etag";
|
|
242
|
+
|
|
243
|
+
if (std::filesystem::exists(etag_path)) {
|
|
244
|
+
std::ifstream etag_in(etag_path);
|
|
245
|
+
if (!etag_in) {
|
|
246
|
+
LOG_ERR("%s: could not open .etag file for reading: %s\n", __func__, etag_path.c_str());
|
|
247
|
+
return none;
|
|
248
|
+
}
|
|
249
|
+
std::string etag;
|
|
250
|
+
std::getline(etag_in, etag);
|
|
251
|
+
return etag;
|
|
252
|
+
}
|
|
253
|
+
|
|
254
|
+
// no etag file, but maybe there is an old .json
|
|
255
|
+
// remove this code later
|
|
256
|
+
const std::string metadata_path = path + ".json";
|
|
257
|
+
|
|
258
|
+
if (std::filesystem::exists(metadata_path)) {
|
|
259
|
+
std::ifstream metadata_in(metadata_path);
|
|
260
|
+
try {
|
|
261
|
+
nlohmann::json metadata_json;
|
|
262
|
+
metadata_in >> metadata_json;
|
|
263
|
+
LOG_DBG("%s: previous metadata file found %s: %s\n", __func__, metadata_path.c_str(),
|
|
264
|
+
metadata_json.dump().c_str());
|
|
265
|
+
if (metadata_json.contains("etag") && metadata_json.at("etag").is_string()) {
|
|
266
|
+
std::string etag = metadata_json.at("etag");
|
|
267
|
+
write_etag(path, etag);
|
|
268
|
+
if (!std::filesystem::remove(metadata_path)) {
|
|
269
|
+
LOG_WRN("%s: failed to delete old .json metadata file: %s\n", __func__, metadata_path.c_str());
|
|
270
|
+
}
|
|
271
|
+
return etag;
|
|
272
|
+
}
|
|
273
|
+
} catch (const nlohmann::json::exception & e) {
|
|
274
|
+
LOG_ERR("%s: error reading metadata file %s: %s\n", __func__, metadata_path.c_str(), e.what());
|
|
275
|
+
}
|
|
276
|
+
}
|
|
277
|
+
return none;
|
|
222
278
|
}
|
|
223
279
|
|
|
280
|
+
#ifdef LLAMA_USE_CURL
|
|
281
|
+
|
|
224
282
|
//
|
|
225
283
|
// CURL utils
|
|
226
284
|
//
|
|
@@ -371,36 +429,15 @@ static bool common_download_head(CURL * curl,
|
|
|
371
429
|
static bool common_download_file_single_online(const std::string & url,
|
|
372
430
|
const std::string & path,
|
|
373
431
|
const std::string & bearer_token) {
|
|
374
|
-
// If the file exists, check its JSON metadata companion file.
|
|
375
|
-
std::string metadata_path = path + ".json";
|
|
376
432
|
static const int max_attempts = 3;
|
|
377
433
|
static const int retry_delay_seconds = 2;
|
|
378
434
|
for (int i = 0; i < max_attempts; ++i) {
|
|
379
|
-
|
|
380
|
-
std::string etag;
|
|
381
|
-
std::string last_modified;
|
|
435
|
+
std::string etag;
|
|
382
436
|
|
|
383
437
|
// Check if the file already exists locally
|
|
384
438
|
const auto file_exists = std::filesystem::exists(path);
|
|
385
439
|
if (file_exists) {
|
|
386
|
-
|
|
387
|
-
std::ifstream metadata_in(metadata_path);
|
|
388
|
-
if (metadata_in.good()) {
|
|
389
|
-
try {
|
|
390
|
-
metadata_in >> metadata;
|
|
391
|
-
LOG_DBG("%s: previous metadata file found %s: %s\n", __func__, metadata_path.c_str(),
|
|
392
|
-
metadata.dump().c_str());
|
|
393
|
-
if (metadata.contains("etag") && metadata.at("etag").is_string()) {
|
|
394
|
-
etag = metadata.at("etag");
|
|
395
|
-
}
|
|
396
|
-
if (metadata.contains("lastModified") && metadata.at("lastModified").is_string()) {
|
|
397
|
-
last_modified = metadata.at("lastModified");
|
|
398
|
-
}
|
|
399
|
-
} catch (const nlohmann::json::exception & e) {
|
|
400
|
-
LOG_ERR("%s: error reading metadata file %s: %s\n", __func__, metadata_path.c_str(), e.what());
|
|
401
|
-
}
|
|
402
|
-
}
|
|
403
|
-
// if we cannot open the metadata file, we assume that the downloaded file is not valid (etag and last-modified are left empty, so we will download it again)
|
|
440
|
+
etag = read_etag(path);
|
|
404
441
|
} else {
|
|
405
442
|
LOG_INF("%s: no previous model file found %s\n", __func__, path.c_str());
|
|
406
443
|
}
|
|
@@ -438,11 +475,6 @@ static bool common_download_file_single_online(const std::string & url,
|
|
|
438
475
|
headers.etag.c_str());
|
|
439
476
|
should_download = true;
|
|
440
477
|
should_download_from_scratch = true;
|
|
441
|
-
} else if (!last_modified.empty() && last_modified != headers.last_modified) {
|
|
442
|
-
LOG_WRN("%s: Last-Modified header is different (%s != %s): triggering a new download\n", __func__,
|
|
443
|
-
last_modified.c_str(), headers.last_modified.c_str());
|
|
444
|
-
should_download = true;
|
|
445
|
-
should_download_from_scratch = true;
|
|
446
478
|
}
|
|
447
479
|
}
|
|
448
480
|
|
|
@@ -473,15 +505,9 @@ static bool common_download_file_single_online(const std::string & url,
|
|
|
473
505
|
}
|
|
474
506
|
}
|
|
475
507
|
}
|
|
476
|
-
|
|
477
|
-
|
|
478
|
-
|
|
479
|
-
{ "url", url },
|
|
480
|
-
{ "etag", headers.etag },
|
|
481
|
-
{ "lastModified", headers.last_modified }
|
|
482
|
-
});
|
|
483
|
-
write_file(metadata_path, metadata.dump(4));
|
|
484
|
-
LOG_DBG("%s: file metadata saved: %s\n", __func__, metadata_path.c_str());
|
|
508
|
+
if (head_request_ok) {
|
|
509
|
+
write_etag(path, headers.etag);
|
|
510
|
+
}
|
|
485
511
|
|
|
486
512
|
// start the download
|
|
487
513
|
LOG_INF("%s: trying to download model from %s to %s (server_etag:%s, server_last_modified:%s)...\n",
|
|
@@ -568,21 +594,238 @@ std::pair<long, std::vector<char>> common_remote_get_content(const std::string &
|
|
|
568
594
|
|
|
569
595
|
#else
|
|
570
596
|
|
|
571
|
-
|
|
572
|
-
|
|
597
|
+
static void print_progress(size_t current, size_t total) {
|
|
598
|
+
if (!is_output_a_tty()) {
|
|
599
|
+
return;
|
|
600
|
+
}
|
|
601
|
+
|
|
602
|
+
if (!total) {
|
|
603
|
+
return;
|
|
604
|
+
}
|
|
605
|
+
|
|
606
|
+
size_t width = 50;
|
|
607
|
+
size_t pct = (100 * current) / total;
|
|
608
|
+
size_t pos = (width * current) / total;
|
|
609
|
+
|
|
610
|
+
std::cout << "["
|
|
611
|
+
<< std::string(pos, '=')
|
|
612
|
+
<< (pos < width ? ">" : "")
|
|
613
|
+
<< std::string(width - pos, ' ')
|
|
614
|
+
<< "] " << std::setw(3) << pct << "% ("
|
|
615
|
+
<< current / (1024 * 1024) << " MB / "
|
|
616
|
+
<< total / (1024 * 1024) << " MB)\r";
|
|
617
|
+
std::cout.flush();
|
|
573
618
|
}
|
|
574
619
|
|
|
575
|
-
static bool
|
|
576
|
-
|
|
577
|
-
|
|
620
|
+
static bool common_pull_file(httplib::Client & cli,
|
|
621
|
+
const std::string & resolve_path,
|
|
622
|
+
const std::string & path_tmp,
|
|
623
|
+
bool supports_ranges,
|
|
624
|
+
size_t existing_size,
|
|
625
|
+
size_t & total_size) {
|
|
626
|
+
std::ofstream ofs(path_tmp, std::ios::binary | std::ios::app);
|
|
627
|
+
if (!ofs.is_open()) {
|
|
628
|
+
LOG_ERR("%s: error opening local file for writing: %s\n", __func__, path_tmp.c_str());
|
|
629
|
+
return false;
|
|
630
|
+
}
|
|
631
|
+
|
|
632
|
+
httplib::Headers headers;
|
|
633
|
+
if (supports_ranges && existing_size > 0) {
|
|
634
|
+
headers.emplace("Range", "bytes=" + std::to_string(existing_size) + "-");
|
|
635
|
+
}
|
|
636
|
+
|
|
637
|
+
std::atomic<size_t> downloaded{existing_size};
|
|
638
|
+
|
|
639
|
+
auto res = cli.Get(resolve_path, headers,
|
|
640
|
+
[&](const httplib::Response &response) {
|
|
641
|
+
if (existing_size > 0 && response.status != 206) {
|
|
642
|
+
LOG_WRN("%s: server did not respond with 206 Partial Content for a resume request. Status: %d\n", __func__, response.status);
|
|
643
|
+
return false;
|
|
644
|
+
}
|
|
645
|
+
if (existing_size == 0 && response.status != 200) {
|
|
646
|
+
LOG_WRN("%s: download received non-successful status code: %d\n", __func__, response.status);
|
|
647
|
+
return false;
|
|
648
|
+
}
|
|
649
|
+
if (total_size == 0 && response.has_header("Content-Length")) {
|
|
650
|
+
try {
|
|
651
|
+
size_t content_length = std::stoull(response.get_header_value("Content-Length"));
|
|
652
|
+
total_size = existing_size + content_length;
|
|
653
|
+
} catch (const std::exception &e) {
|
|
654
|
+
LOG_WRN("%s: invalid Content-Length header: %s\n", __func__, e.what());
|
|
655
|
+
}
|
|
656
|
+
}
|
|
657
|
+
return true;
|
|
658
|
+
},
|
|
659
|
+
[&](const char *data, size_t len) {
|
|
660
|
+
ofs.write(data, len);
|
|
661
|
+
if (!ofs) {
|
|
662
|
+
LOG_ERR("%s: error writing to file: %s\n", __func__, path_tmp.c_str());
|
|
663
|
+
return false;
|
|
664
|
+
}
|
|
665
|
+
downloaded += len;
|
|
666
|
+
print_progress(downloaded, total_size);
|
|
667
|
+
return true;
|
|
668
|
+
},
|
|
669
|
+
nullptr
|
|
670
|
+
);
|
|
671
|
+
|
|
672
|
+
std::cout << "\n";
|
|
673
|
+
|
|
674
|
+
if (!res) {
|
|
675
|
+
LOG_ERR("%s: error during download. Status: %d\n", __func__, res ? res->status : -1);
|
|
676
|
+
return false;
|
|
677
|
+
}
|
|
678
|
+
|
|
679
|
+
return true;
|
|
680
|
+
}
|
|
681
|
+
|
|
682
|
+
// download one single file from remote URL to local path
|
|
683
|
+
static bool common_download_file_single_online(const std::string & url,
|
|
684
|
+
const std::string & path,
|
|
685
|
+
const std::string & bearer_token) {
|
|
686
|
+
static const int max_attempts = 3;
|
|
687
|
+
static const int retry_delay_seconds = 2;
|
|
688
|
+
|
|
689
|
+
auto [cli, parts] = common_http_client(url);
|
|
690
|
+
|
|
691
|
+
httplib::Headers default_headers = {{"User-Agent", "llama-cpp"}};
|
|
692
|
+
if (!bearer_token.empty()) {
|
|
693
|
+
default_headers.insert({"Authorization", "Bearer " + bearer_token});
|
|
694
|
+
}
|
|
695
|
+
cli.set_default_headers(default_headers);
|
|
696
|
+
|
|
697
|
+
const bool file_exists = std::filesystem::exists(path);
|
|
698
|
+
|
|
699
|
+
std::string last_etag;
|
|
700
|
+
if (file_exists) {
|
|
701
|
+
last_etag = read_etag(path);
|
|
702
|
+
} else {
|
|
703
|
+
LOG_INF("%s: no previous model file found %s\n", __func__, path.c_str());
|
|
704
|
+
}
|
|
705
|
+
|
|
706
|
+
for (int i = 0; i < max_attempts; ++i) {
|
|
707
|
+
auto head = cli.Head(parts.path);
|
|
708
|
+
bool head_ok = head && head->status >= 200 && head->status < 300;
|
|
709
|
+
if (!head_ok) {
|
|
710
|
+
LOG_WRN("%s: HEAD invalid http status code received: %d\n", __func__, head ? head->status : -1);
|
|
711
|
+
if (file_exists) {
|
|
712
|
+
LOG_INF("%s: Using cached file (HEAD failed): %s\n", __func__, path.c_str());
|
|
713
|
+
return true;
|
|
714
|
+
}
|
|
715
|
+
}
|
|
716
|
+
|
|
717
|
+
std::string etag;
|
|
718
|
+
if (head_ok && head->has_header("ETag")) {
|
|
719
|
+
etag = head->get_header_value("ETag");
|
|
720
|
+
}
|
|
721
|
+
|
|
722
|
+
size_t total_size = 0;
|
|
723
|
+
if (head_ok && head->has_header("Content-Length")) {
|
|
724
|
+
try {
|
|
725
|
+
total_size = std::stoull(head->get_header_value("Content-Length"));
|
|
726
|
+
} catch (const std::exception& e) {
|
|
727
|
+
LOG_WRN("%s: Invalid Content-Length in HEAD response: %s\n", __func__, e.what());
|
|
728
|
+
}
|
|
729
|
+
}
|
|
730
|
+
|
|
731
|
+
bool supports_ranges = false;
|
|
732
|
+
if (head_ok && head->has_header("Accept-Ranges")) {
|
|
733
|
+
supports_ranges = head->get_header_value("Accept-Ranges") != "none";
|
|
734
|
+
}
|
|
735
|
+
|
|
736
|
+
bool should_download_from_scratch = false;
|
|
737
|
+
if (!last_etag.empty() && !etag.empty() && last_etag != etag) {
|
|
738
|
+
LOG_WRN("%s: ETag header is different (%s != %s): triggering a new download\n", __func__,
|
|
739
|
+
last_etag.c_str(), etag.c_str());
|
|
740
|
+
should_download_from_scratch = true;
|
|
741
|
+
}
|
|
742
|
+
|
|
743
|
+
if (file_exists) {
|
|
744
|
+
if (!should_download_from_scratch) {
|
|
745
|
+
LOG_INF("%s: using cached file: %s\n", __func__, path.c_str());
|
|
746
|
+
return true;
|
|
747
|
+
}
|
|
748
|
+
LOG_WRN("%s: deleting previous downloaded file: %s\n", __func__, path.c_str());
|
|
749
|
+
if (remove(path.c_str()) != 0) {
|
|
750
|
+
LOG_ERR("%s: unable to delete file: %s\n", __func__, path.c_str());
|
|
751
|
+
return false;
|
|
752
|
+
}
|
|
753
|
+
}
|
|
754
|
+
|
|
755
|
+
const std::string path_temporary = path + ".downloadInProgress";
|
|
756
|
+
size_t existing_size = 0;
|
|
757
|
+
|
|
758
|
+
if (std::filesystem::exists(path_temporary)) {
|
|
759
|
+
if (supports_ranges && !should_download_from_scratch) {
|
|
760
|
+
existing_size = std::filesystem::file_size(path_temporary);
|
|
761
|
+
} else if (remove(path_temporary.c_str()) != 0) {
|
|
762
|
+
LOG_ERR("%s: unable to delete file: %s\n", __func__, path_temporary.c_str());
|
|
763
|
+
return false;
|
|
764
|
+
}
|
|
765
|
+
}
|
|
766
|
+
|
|
767
|
+
// start the download
|
|
768
|
+
LOG_INF("%s: trying to download model from %s to %s (etag:%s)...\n",
|
|
769
|
+
__func__, common_http_show_masked_url(parts).c_str(), path_temporary.c_str(), etag.c_str());
|
|
770
|
+
const bool was_pull_successful = common_pull_file(cli, parts.path, path_temporary, supports_ranges, existing_size, total_size);
|
|
771
|
+
if (!was_pull_successful) {
|
|
772
|
+
if (i + 1 < max_attempts) {
|
|
773
|
+
const int exponential_backoff_delay = std::pow(retry_delay_seconds, i) * 1000;
|
|
774
|
+
LOG_WRN("%s: retrying after %d milliseconds...\n", __func__, exponential_backoff_delay);
|
|
775
|
+
std::this_thread::sleep_for(std::chrono::milliseconds(exponential_backoff_delay));
|
|
776
|
+
} else {
|
|
777
|
+
LOG_ERR("%s: download failed after %d attempts\n", __func__, max_attempts);
|
|
778
|
+
}
|
|
779
|
+
continue;
|
|
780
|
+
}
|
|
781
|
+
|
|
782
|
+
if (std::rename(path_temporary.c_str(), path.c_str()) != 0) {
|
|
783
|
+
LOG_ERR("%s: unable to rename file: %s to %s\n", __func__, path_temporary.c_str(), path.c_str());
|
|
784
|
+
return false;
|
|
785
|
+
}
|
|
786
|
+
if (!etag.empty()) {
|
|
787
|
+
write_etag(path, etag);
|
|
788
|
+
}
|
|
789
|
+
break;
|
|
790
|
+
}
|
|
791
|
+
|
|
792
|
+
return true;
|
|
578
793
|
}
|
|
579
794
|
|
|
580
|
-
std::pair<long, std::vector<char>> common_remote_get_content(const std::string
|
|
581
|
-
|
|
582
|
-
|
|
795
|
+
std::pair<long, std::vector<char>> common_remote_get_content(const std::string & url,
|
|
796
|
+
const common_remote_params & params) {
|
|
797
|
+
auto [cli, parts] = common_http_client(url);
|
|
798
|
+
|
|
799
|
+
httplib::Headers headers = {{"User-Agent", "llama-cpp"}};
|
|
800
|
+
for (const auto & header : params.headers) {
|
|
801
|
+
size_t pos = header.find(':');
|
|
802
|
+
if (pos != std::string::npos) {
|
|
803
|
+
headers.emplace(header.substr(0, pos), header.substr(pos + 1));
|
|
804
|
+
} else {
|
|
805
|
+
headers.emplace(header, "");
|
|
806
|
+
}
|
|
807
|
+
}
|
|
808
|
+
|
|
809
|
+
if (params.timeout > 0) {
|
|
810
|
+
cli.set_read_timeout(params.timeout, 0);
|
|
811
|
+
cli.set_write_timeout(params.timeout, 0);
|
|
812
|
+
}
|
|
813
|
+
|
|
814
|
+
std::vector<char> buf;
|
|
815
|
+
auto res = cli.Get(parts.path, headers,
|
|
816
|
+
[&](const char *data, size_t len) {
|
|
817
|
+
buf.insert(buf.end(), data, data + len);
|
|
818
|
+
return params.max_size == 0 ||
|
|
819
|
+
buf.size() <= static_cast<size_t>(params.max_size);
|
|
820
|
+
},
|
|
821
|
+
nullptr
|
|
822
|
+
);
|
|
823
|
+
|
|
824
|
+
if (!res) {
|
|
825
|
+
throw std::runtime_error("error: cannot make GET request");
|
|
583
826
|
}
|
|
584
827
|
|
|
585
|
-
return {};
|
|
828
|
+
return { res->status, std::move(buf) };
|
|
586
829
|
}
|
|
587
830
|
|
|
588
831
|
#endif // LLAMA_USE_CURL
|
|
@@ -1372,18 +1615,14 @@ static void add_rpc_devices(const std::string & servers) {
|
|
|
1372
1615
|
if (!rpc_reg) {
|
|
1373
1616
|
throw std::invalid_argument("failed to find RPC backend");
|
|
1374
1617
|
}
|
|
1375
|
-
typedef
|
|
1376
|
-
|
|
1377
|
-
if (!
|
|
1378
|
-
throw std::invalid_argument("failed to find RPC
|
|
1618
|
+
typedef ggml_backend_reg_t (*ggml_backend_rpc_add_server_t)(const char * endpoint);
|
|
1619
|
+
ggml_backend_rpc_add_server_t ggml_backend_rpc_add_server_fn = (ggml_backend_rpc_add_server_t) ggml_backend_reg_get_proc_address(rpc_reg, "ggml_backend_rpc_add_server");
|
|
1620
|
+
if (!ggml_backend_rpc_add_server_fn) {
|
|
1621
|
+
throw std::invalid_argument("failed to find RPC add server function");
|
|
1379
1622
|
}
|
|
1380
1623
|
for (const auto & server : rpc_servers) {
|
|
1381
|
-
|
|
1382
|
-
|
|
1383
|
-
ggml_backend_device_register(dev);
|
|
1384
|
-
} else {
|
|
1385
|
-
throw std::invalid_argument("failed to register RPC device");
|
|
1386
|
-
}
|
|
1624
|
+
auto reg = ggml_backend_rpc_add_server_fn(server.c_str());
|
|
1625
|
+
ggml_backend_register(reg);
|
|
1387
1626
|
}
|
|
1388
1627
|
}
|
|
1389
1628
|
|
|
@@ -1689,13 +1928,21 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
|
|
|
1689
1928
|
}
|
|
1690
1929
|
).set_env("LLAMA_ARG_SWA_FULL"));
|
|
1691
1930
|
add_opt(common_arg(
|
|
1692
|
-
{"--swa-checkpoints"}, "N",
|
|
1693
|
-
string_format("max number of
|
|
1694
|
-
"[(more info)](https://github.com/ggml-org/llama.cpp/pull/15293)", params.
|
|
1931
|
+
{"--ctx-checkpoints", "--swa-checkpoints"}, "N",
|
|
1932
|
+
string_format("max number of context checkpoints to create per slot (default: %d)\n"
|
|
1933
|
+
"[(more info)](https://github.com/ggml-org/llama.cpp/pull/15293)", params.n_ctx_checkpoints),
|
|
1934
|
+
[](common_params & params, int value) {
|
|
1935
|
+
params.n_ctx_checkpoints = value;
|
|
1936
|
+
}
|
|
1937
|
+
).set_env("LLAMA_ARG_CTX_CHECKPOINTS").set_examples({LLAMA_EXAMPLE_SERVER}));
|
|
1938
|
+
add_opt(common_arg(
|
|
1939
|
+
{"--cache-ram", "-cram"}, "N",
|
|
1940
|
+
string_format("set the maximum cache size in MiB (default: %d, -1 - no limit, 0 - disable)\n"
|
|
1941
|
+
"[(more info)](https://github.com/ggml-org/llama.cpp/pull/16391)", params.cache_ram_mib),
|
|
1695
1942
|
[](common_params & params, int value) {
|
|
1696
|
-
params.
|
|
1943
|
+
params.cache_ram_mib = value;
|
|
1697
1944
|
}
|
|
1698
|
-
).set_env("
|
|
1945
|
+
).set_env("LLAMA_ARG_CACHE_RAM").set_examples({LLAMA_EXAMPLE_SERVER}));
|
|
1699
1946
|
add_opt(common_arg(
|
|
1700
1947
|
{"--kv-unified", "-kvu"},
|
|
1701
1948
|
string_format("use single unified KV buffer for the KV cache of all sequences (default: %s)\n"
|
|
@@ -2345,6 +2592,13 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
|
|
|
2345
2592
|
params.no_extra_bufts = true;
|
|
2346
2593
|
}
|
|
2347
2594
|
).set_env("LLAMA_ARG_NO_REPACK"));
|
|
2595
|
+
add_opt(common_arg(
|
|
2596
|
+
{"--no-host"},
|
|
2597
|
+
"bypass host buffer allowing extra buffers to be used",
|
|
2598
|
+
[](common_params & params) {
|
|
2599
|
+
params.no_host = true;
|
|
2600
|
+
}
|
|
2601
|
+
).set_env("LLAMA_ARG_NO_HOST"));
|
|
2348
2602
|
add_opt(common_arg(
|
|
2349
2603
|
{"-ctk", "--cache-type-k"}, "TYPE",
|
|
2350
2604
|
string_format(
|
|
@@ -3186,7 +3440,8 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
|
|
|
3186
3440
|
{"--reasoning-format"}, "FORMAT",
|
|
3187
3441
|
"controls whether thought tags are allowed and/or extracted from the response, and in which format they're returned; one of:\n"
|
|
3188
3442
|
"- none: leaves thoughts unparsed in `message.content`\n"
|
|
3189
|
-
"- deepseek: puts thoughts in `message.reasoning_content
|
|
3443
|
+
"- deepseek: puts thoughts in `message.reasoning_content`\n"
|
|
3444
|
+
"- deepseek-legacy: keeps `<think>` tags in `message.content` while also populating `message.reasoning_content`\n"
|
|
3190
3445
|
"(default: auto)",
|
|
3191
3446
|
[](common_params & params, const std::string & value) {
|
|
3192
3447
|
params.reasoning_format = common_reasoning_format_from_name(value);
|
|
@@ -3613,7 +3868,6 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
|
|
|
3613
3868
|
[](common_params & params) {
|
|
3614
3869
|
params.model.hf_repo = "ggml-org/bge-small-en-v1.5-Q8_0-GGUF";
|
|
3615
3870
|
params.model.hf_file = "bge-small-en-v1.5-q8_0.gguf";
|
|
3616
|
-
params.pooling_type = LLAMA_POOLING_TYPE_NONE;
|
|
3617
3871
|
params.embd_normalize = 2;
|
|
3618
3872
|
params.n_ctx = 512;
|
|
3619
3873
|
params.verbose_prompt = true;
|
|
@@ -3627,7 +3881,6 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
|
|
|
3627
3881
|
[](common_params & params) {
|
|
3628
3882
|
params.model.hf_repo = "ggml-org/e5-small-v2-Q8_0-GGUF";
|
|
3629
3883
|
params.model.hf_file = "e5-small-v2-q8_0.gguf";
|
|
3630
|
-
params.pooling_type = LLAMA_POOLING_TYPE_NONE;
|
|
3631
3884
|
params.embd_normalize = 2;
|
|
3632
3885
|
params.n_ctx = 512;
|
|
3633
3886
|
params.verbose_prompt = true;
|
|
@@ -3641,7 +3894,6 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
|
|
|
3641
3894
|
[](common_params & params) {
|
|
3642
3895
|
params.model.hf_repo = "ggml-org/gte-small-Q8_0-GGUF";
|
|
3643
3896
|
params.model.hf_file = "gte-small-q8_0.gguf";
|
|
3644
|
-
params.pooling_type = LLAMA_POOLING_TYPE_NONE;
|
|
3645
3897
|
params.embd_normalize = 2;
|
|
3646
3898
|
params.n_ctx = 512;
|
|
3647
3899
|
params.verbose_prompt = true;
|
|
@@ -78,7 +78,6 @@ bool common_params_parse(int argc, char ** argv, common_params & params, llama_e
|
|
|
78
78
|
|
|
79
79
|
// function to be used by test-arg-parser
|
|
80
80
|
common_params_context common_params_parser_init(common_params & params, llama_example ex, void(*print_usage)(int, char **) = nullptr);
|
|
81
|
-
bool common_has_curl();
|
|
82
81
|
|
|
83
82
|
struct common_remote_params {
|
|
84
83
|
std::vector<std::string> headers;
|