@fugood/llama.node 1.2.3 → 1.2.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +14 -14
- package/scripts/llama.cpp.patch +33 -11
- package/src/llama.cpp/CMakeLists.txt +1 -0
- package/src/llama.cpp/common/CMakeLists.txt +46 -2
- package/src/llama.cpp/common/arg.cpp +484 -204
- package/src/llama.cpp/common/arg.h +0 -1
- package/src/llama.cpp/common/chat-parser.cpp +156 -15
- package/src/llama.cpp/common/chat-parser.h +3 -0
- package/src/llama.cpp/common/chat.cpp +217 -6
- package/src/llama.cpp/common/chat.h +5 -3
- package/src/llama.cpp/common/common.cpp +22 -6
- package/src/llama.cpp/common/common.h +6 -4
- package/src/llama.cpp/common/http.h +73 -0
- package/src/llama.cpp/common/json-partial.cpp +51 -0
- package/src/llama.cpp/ggml/CMakeLists.txt +7 -6
- package/src/llama.cpp/ggml/include/ggml-backend.h +2 -0
- package/src/llama.cpp/ggml/include/ggml-rpc.h +8 -9
- package/src/llama.cpp/ggml/include/ggml.h +22 -0
- package/src/llama.cpp/ggml/src/CMakeLists.txt +3 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +12 -2
- package/src/llama.cpp/ggml/src/ggml-cpu/amx/amx.cpp +1 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/arch/loongarch/quants.c +12 -12
- package/src/llama.cpp/ggml/src/ggml-cpu/arch/s390/quants.c +100 -3
- package/src/llama.cpp/ggml/src/ggml-cpu/arch-fallback.h +0 -1
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +1 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +10 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.cpp +209 -96
- package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.h +32 -44
- package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +107 -83
- package/src/llama.cpp/ggml/src/ggml-cpu/ops.cpp +17 -17
- package/src/llama.cpp/ggml/src/ggml-cpu/simd-mappings.h +8 -8
- package/src/llama.cpp/ggml/src/ggml-cpu/spacemit/ime.cpp +1024 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/spacemit/ime.h +13 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/spacemit/ime1_kernels.cpp +3196 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/spacemit/ime_kernels.h +26 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/unary-ops.cpp +103 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/unary-ops.h +1 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/vec.cpp +66 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/vec.h +11 -9
- package/src/llama.cpp/include/llama.h +8 -0
- package/src/llama.cpp/src/llama-arch.cpp +93 -0
- package/src/llama.cpp/src/llama-arch.h +22 -0
- package/src/llama.cpp/src/llama-chat.cpp +1 -1
- package/src/llama.cpp/src/llama-context.cpp +6 -0
- package/src/llama.cpp/src/llama-graph.cpp +57 -22
- package/src/llama.cpp/src/llama-graph.h +10 -1
- package/src/llama.cpp/src/llama-hparams.cpp +5 -1
- package/src/llama.cpp/src/llama-hparams.h +17 -2
- package/src/llama.cpp/src/llama-kv-cache-iswa.cpp +2 -2
- package/src/llama.cpp/src/llama-kv-cache.cpp +2 -5
- package/src/llama.cpp/src/llama-memory-hybrid.cpp +11 -9
- package/src/llama.cpp/src/llama-memory-recurrent.cpp +11 -3
- package/src/llama.cpp/src/llama-model-loader.cpp +2 -0
- package/src/llama.cpp/src/llama-model.cpp +572 -45
- package/src/llama.cpp/src/llama-model.h +18 -0
- package/src/llama.cpp/src/llama-sampling.cpp +5 -0
- package/src/llama.cpp/src/llama-vocab.cpp +7 -1
- package/src/llama.cpp/src/llama-vocab.h +41 -40
- package/src/llama.cpp/src/unicode.h +43 -0
|
@@ -32,11 +32,11 @@
|
|
|
32
32
|
#include <thread>
|
|
33
33
|
#include <vector>
|
|
34
34
|
|
|
35
|
-
//#define LLAMA_USE_CURL
|
|
36
|
-
|
|
37
35
|
#if defined(LLAMA_USE_CURL)
|
|
38
36
|
#include <curl/curl.h>
|
|
39
37
|
#include <curl/easy.h>
|
|
38
|
+
#else
|
|
39
|
+
#include "http.h"
|
|
40
40
|
#endif
|
|
41
41
|
|
|
42
42
|
#ifdef __linux__
|
|
@@ -52,6 +52,13 @@
|
|
|
52
52
|
#endif
|
|
53
53
|
#define LLAMA_MAX_URL_LENGTH 2084 // Maximum URL Length in Chrome: 2083
|
|
54
54
|
|
|
55
|
+
// isatty
|
|
56
|
+
#if defined(_WIN32)
|
|
57
|
+
#include <io.h>
|
|
58
|
+
#else
|
|
59
|
+
#include <unistd.h>
|
|
60
|
+
#endif
|
|
61
|
+
|
|
55
62
|
using json = nlohmann::ordered_json;
|
|
56
63
|
|
|
57
64
|
std::initializer_list<enum llama_example> mmproj_examples = {
|
|
@@ -98,6 +105,14 @@ static void write_file(const std::string & fname, const std::string & content) {
|
|
|
98
105
|
}
|
|
99
106
|
}
|
|
100
107
|
|
|
108
|
+
static bool is_output_a_tty() {
|
|
109
|
+
#if defined(_WIN32)
|
|
110
|
+
return _isatty(_fileno(stdout));
|
|
111
|
+
#else
|
|
112
|
+
return isatty(1);
|
|
113
|
+
#endif
|
|
114
|
+
}
|
|
115
|
+
|
|
101
116
|
common_arg & common_arg::set_examples(std::initializer_list<enum llama_example> examples) {
|
|
102
117
|
this->examples = std::move(examples);
|
|
103
118
|
return *this;
|
|
@@ -215,12 +230,55 @@ struct common_hf_file_res {
|
|
|
215
230
|
std::string mmprojFile;
|
|
216
231
|
};
|
|
217
232
|
|
|
218
|
-
|
|
233
|
+
static void write_etag(const std::string & path, const std::string & etag) {
|
|
234
|
+
const std::string etag_path = path + ".etag";
|
|
235
|
+
write_file(etag_path, etag);
|
|
236
|
+
LOG_DBG("%s: file etag saved: %s\n", __func__, etag_path.c_str());
|
|
237
|
+
}
|
|
219
238
|
|
|
220
|
-
|
|
221
|
-
|
|
239
|
+
static std::string read_etag(const std::string & path) {
|
|
240
|
+
std::string none;
|
|
241
|
+
const std::string etag_path = path + ".etag";
|
|
242
|
+
|
|
243
|
+
if (std::filesystem::exists(etag_path)) {
|
|
244
|
+
std::ifstream etag_in(etag_path);
|
|
245
|
+
if (!etag_in) {
|
|
246
|
+
LOG_ERR("%s: could not open .etag file for reading: %s\n", __func__, etag_path.c_str());
|
|
247
|
+
return none;
|
|
248
|
+
}
|
|
249
|
+
std::string etag;
|
|
250
|
+
std::getline(etag_in, etag);
|
|
251
|
+
return etag;
|
|
252
|
+
}
|
|
253
|
+
|
|
254
|
+
// no etag file, but maybe there is an old .json
|
|
255
|
+
// remove this code later
|
|
256
|
+
const std::string metadata_path = path + ".json";
|
|
257
|
+
|
|
258
|
+
if (std::filesystem::exists(metadata_path)) {
|
|
259
|
+
std::ifstream metadata_in(metadata_path);
|
|
260
|
+
try {
|
|
261
|
+
nlohmann::json metadata_json;
|
|
262
|
+
metadata_in >> metadata_json;
|
|
263
|
+
LOG_DBG("%s: previous metadata file found %s: %s\n", __func__, metadata_path.c_str(),
|
|
264
|
+
metadata_json.dump().c_str());
|
|
265
|
+
if (metadata_json.contains("etag") && metadata_json.at("etag").is_string()) {
|
|
266
|
+
std::string etag = metadata_json.at("etag");
|
|
267
|
+
write_etag(path, etag);
|
|
268
|
+
if (!std::filesystem::remove(metadata_path)) {
|
|
269
|
+
LOG_WRN("%s: failed to delete old .json metadata file: %s\n", __func__, metadata_path.c_str());
|
|
270
|
+
}
|
|
271
|
+
return etag;
|
|
272
|
+
}
|
|
273
|
+
} catch (const nlohmann::json::exception & e) {
|
|
274
|
+
LOG_ERR("%s: error reading metadata file %s: %s\n", __func__, metadata_path.c_str(), e.what());
|
|
275
|
+
}
|
|
276
|
+
}
|
|
277
|
+
return none;
|
|
222
278
|
}
|
|
223
279
|
|
|
280
|
+
#ifdef LLAMA_USE_CURL
|
|
281
|
+
|
|
224
282
|
//
|
|
225
283
|
// CURL utils
|
|
226
284
|
//
|
|
@@ -371,36 +429,15 @@ static bool common_download_head(CURL * curl,
|
|
|
371
429
|
static bool common_download_file_single_online(const std::string & url,
|
|
372
430
|
const std::string & path,
|
|
373
431
|
const std::string & bearer_token) {
|
|
374
|
-
// If the file exists, check its JSON metadata companion file.
|
|
375
|
-
std::string metadata_path = path + ".json";
|
|
376
432
|
static const int max_attempts = 3;
|
|
377
433
|
static const int retry_delay_seconds = 2;
|
|
378
434
|
for (int i = 0; i < max_attempts; ++i) {
|
|
379
|
-
|
|
380
|
-
std::string etag;
|
|
381
|
-
std::string last_modified;
|
|
435
|
+
std::string etag;
|
|
382
436
|
|
|
383
437
|
// Check if the file already exists locally
|
|
384
438
|
const auto file_exists = std::filesystem::exists(path);
|
|
385
439
|
if (file_exists) {
|
|
386
|
-
|
|
387
|
-
std::ifstream metadata_in(metadata_path);
|
|
388
|
-
if (metadata_in.good()) {
|
|
389
|
-
try {
|
|
390
|
-
metadata_in >> metadata;
|
|
391
|
-
LOG_DBG("%s: previous metadata file found %s: %s\n", __func__, metadata_path.c_str(),
|
|
392
|
-
metadata.dump().c_str());
|
|
393
|
-
if (metadata.contains("etag") && metadata.at("etag").is_string()) {
|
|
394
|
-
etag = metadata.at("etag");
|
|
395
|
-
}
|
|
396
|
-
if (metadata.contains("lastModified") && metadata.at("lastModified").is_string()) {
|
|
397
|
-
last_modified = metadata.at("lastModified");
|
|
398
|
-
}
|
|
399
|
-
} catch (const nlohmann::json::exception & e) {
|
|
400
|
-
LOG_ERR("%s: error reading metadata file %s: %s\n", __func__, metadata_path.c_str(), e.what());
|
|
401
|
-
}
|
|
402
|
-
}
|
|
403
|
-
// if we cannot open the metadata file, we assume that the downloaded file is not valid (etag and last-modified are left empty, so we will download it again)
|
|
440
|
+
etag = read_etag(path);
|
|
404
441
|
} else {
|
|
405
442
|
LOG_INF("%s: no previous model file found %s\n", __func__, path.c_str());
|
|
406
443
|
}
|
|
@@ -438,11 +475,6 @@ static bool common_download_file_single_online(const std::string & url,
|
|
|
438
475
|
headers.etag.c_str());
|
|
439
476
|
should_download = true;
|
|
440
477
|
should_download_from_scratch = true;
|
|
441
|
-
} else if (!last_modified.empty() && last_modified != headers.last_modified) {
|
|
442
|
-
LOG_WRN("%s: Last-Modified header is different (%s != %s): triggering a new download\n", __func__,
|
|
443
|
-
last_modified.c_str(), headers.last_modified.c_str());
|
|
444
|
-
should_download = true;
|
|
445
|
-
should_download_from_scratch = true;
|
|
446
478
|
}
|
|
447
479
|
}
|
|
448
480
|
|
|
@@ -473,15 +505,9 @@ static bool common_download_file_single_online(const std::string & url,
|
|
|
473
505
|
}
|
|
474
506
|
}
|
|
475
507
|
}
|
|
476
|
-
|
|
477
|
-
|
|
478
|
-
|
|
479
|
-
{ "url", url },
|
|
480
|
-
{ "etag", headers.etag },
|
|
481
|
-
{ "lastModified", headers.last_modified }
|
|
482
|
-
});
|
|
483
|
-
write_file(metadata_path, metadata.dump(4));
|
|
484
|
-
LOG_DBG("%s: file metadata saved: %s\n", __func__, metadata_path.c_str());
|
|
508
|
+
if (head_request_ok) {
|
|
509
|
+
write_etag(path, headers.etag);
|
|
510
|
+
}
|
|
485
511
|
|
|
486
512
|
// start the download
|
|
487
513
|
LOG_INF("%s: trying to download model from %s to %s (server_etag:%s, server_last_modified:%s)...\n",
|
|
@@ -568,21 +594,238 @@ std::pair<long, std::vector<char>> common_remote_get_content(const std::string &
|
|
|
568
594
|
|
|
569
595
|
#else
|
|
570
596
|
|
|
571
|
-
|
|
572
|
-
|
|
597
|
+
static void print_progress(size_t current, size_t total) {
|
|
598
|
+
if (!is_output_a_tty()) {
|
|
599
|
+
return;
|
|
600
|
+
}
|
|
601
|
+
|
|
602
|
+
if (!total) {
|
|
603
|
+
return;
|
|
604
|
+
}
|
|
605
|
+
|
|
606
|
+
size_t width = 50;
|
|
607
|
+
size_t pct = (100 * current) / total;
|
|
608
|
+
size_t pos = (width * current) / total;
|
|
609
|
+
|
|
610
|
+
std::cout << "["
|
|
611
|
+
<< std::string(pos, '=')
|
|
612
|
+
<< (pos < width ? ">" : "")
|
|
613
|
+
<< std::string(width - pos, ' ')
|
|
614
|
+
<< "] " << std::setw(3) << pct << "% ("
|
|
615
|
+
<< current / (1024 * 1024) << " MB / "
|
|
616
|
+
<< total / (1024 * 1024) << " MB)\r";
|
|
617
|
+
std::cout.flush();
|
|
573
618
|
}
|
|
574
619
|
|
|
575
|
-
static bool
|
|
576
|
-
|
|
577
|
-
|
|
620
|
+
static bool common_pull_file(httplib::Client & cli,
|
|
621
|
+
const std::string & resolve_path,
|
|
622
|
+
const std::string & path_tmp,
|
|
623
|
+
bool supports_ranges,
|
|
624
|
+
size_t existing_size,
|
|
625
|
+
size_t & total_size) {
|
|
626
|
+
std::ofstream ofs(path_tmp, std::ios::binary | std::ios::app);
|
|
627
|
+
if (!ofs.is_open()) {
|
|
628
|
+
LOG_ERR("%s: error opening local file for writing: %s\n", __func__, path_tmp.c_str());
|
|
629
|
+
return false;
|
|
630
|
+
}
|
|
631
|
+
|
|
632
|
+
httplib::Headers headers;
|
|
633
|
+
if (supports_ranges && existing_size > 0) {
|
|
634
|
+
headers.emplace("Range", "bytes=" + std::to_string(existing_size) + "-");
|
|
635
|
+
}
|
|
636
|
+
|
|
637
|
+
std::atomic<size_t> downloaded{existing_size};
|
|
638
|
+
|
|
639
|
+
auto res = cli.Get(resolve_path, headers,
|
|
640
|
+
[&](const httplib::Response &response) {
|
|
641
|
+
if (existing_size > 0 && response.status != 206) {
|
|
642
|
+
LOG_WRN("%s: server did not respond with 206 Partial Content for a resume request. Status: %d\n", __func__, response.status);
|
|
643
|
+
return false;
|
|
644
|
+
}
|
|
645
|
+
if (existing_size == 0 && response.status != 200) {
|
|
646
|
+
LOG_WRN("%s: download received non-successful status code: %d\n", __func__, response.status);
|
|
647
|
+
return false;
|
|
648
|
+
}
|
|
649
|
+
if (total_size == 0 && response.has_header("Content-Length")) {
|
|
650
|
+
try {
|
|
651
|
+
size_t content_length = std::stoull(response.get_header_value("Content-Length"));
|
|
652
|
+
total_size = existing_size + content_length;
|
|
653
|
+
} catch (const std::exception &e) {
|
|
654
|
+
LOG_WRN("%s: invalid Content-Length header: %s\n", __func__, e.what());
|
|
655
|
+
}
|
|
656
|
+
}
|
|
657
|
+
return true;
|
|
658
|
+
},
|
|
659
|
+
[&](const char *data, size_t len) {
|
|
660
|
+
ofs.write(data, len);
|
|
661
|
+
if (!ofs) {
|
|
662
|
+
LOG_ERR("%s: error writing to file: %s\n", __func__, path_tmp.c_str());
|
|
663
|
+
return false;
|
|
664
|
+
}
|
|
665
|
+
downloaded += len;
|
|
666
|
+
print_progress(downloaded, total_size);
|
|
667
|
+
return true;
|
|
668
|
+
},
|
|
669
|
+
nullptr
|
|
670
|
+
);
|
|
671
|
+
|
|
672
|
+
std::cout << "\n";
|
|
673
|
+
|
|
674
|
+
if (!res) {
|
|
675
|
+
LOG_ERR("%s: error during download. Status: %d\n", __func__, res ? res->status : -1);
|
|
676
|
+
return false;
|
|
677
|
+
}
|
|
678
|
+
|
|
679
|
+
return true;
|
|
680
|
+
}
|
|
681
|
+
|
|
682
|
+
// download one single file from remote URL to local path
|
|
683
|
+
static bool common_download_file_single_online(const std::string & url,
|
|
684
|
+
const std::string & path,
|
|
685
|
+
const std::string & bearer_token) {
|
|
686
|
+
static const int max_attempts = 3;
|
|
687
|
+
static const int retry_delay_seconds = 2;
|
|
688
|
+
|
|
689
|
+
auto [cli, parts] = common_http_client(url);
|
|
690
|
+
|
|
691
|
+
httplib::Headers default_headers = {{"User-Agent", "llama-cpp"}};
|
|
692
|
+
if (!bearer_token.empty()) {
|
|
693
|
+
default_headers.insert({"Authorization", "Bearer " + bearer_token});
|
|
694
|
+
}
|
|
695
|
+
cli.set_default_headers(default_headers);
|
|
696
|
+
|
|
697
|
+
const bool file_exists = std::filesystem::exists(path);
|
|
698
|
+
|
|
699
|
+
std::string last_etag;
|
|
700
|
+
if (file_exists) {
|
|
701
|
+
last_etag = read_etag(path);
|
|
702
|
+
} else {
|
|
703
|
+
LOG_INF("%s: no previous model file found %s\n", __func__, path.c_str());
|
|
704
|
+
}
|
|
705
|
+
|
|
706
|
+
for (int i = 0; i < max_attempts; ++i) {
|
|
707
|
+
auto head = cli.Head(parts.path);
|
|
708
|
+
bool head_ok = head && head->status >= 200 && head->status < 300;
|
|
709
|
+
if (!head_ok) {
|
|
710
|
+
LOG_WRN("%s: HEAD invalid http status code received: %d\n", __func__, head ? head->status : -1);
|
|
711
|
+
if (file_exists) {
|
|
712
|
+
LOG_INF("%s: Using cached file (HEAD failed): %s\n", __func__, path.c_str());
|
|
713
|
+
return true;
|
|
714
|
+
}
|
|
715
|
+
}
|
|
716
|
+
|
|
717
|
+
std::string etag;
|
|
718
|
+
if (head_ok && head->has_header("ETag")) {
|
|
719
|
+
etag = head->get_header_value("ETag");
|
|
720
|
+
}
|
|
721
|
+
|
|
722
|
+
size_t total_size = 0;
|
|
723
|
+
if (head_ok && head->has_header("Content-Length")) {
|
|
724
|
+
try {
|
|
725
|
+
total_size = std::stoull(head->get_header_value("Content-Length"));
|
|
726
|
+
} catch (const std::exception& e) {
|
|
727
|
+
LOG_WRN("%s: Invalid Content-Length in HEAD response: %s\n", __func__, e.what());
|
|
728
|
+
}
|
|
729
|
+
}
|
|
730
|
+
|
|
731
|
+
bool supports_ranges = false;
|
|
732
|
+
if (head_ok && head->has_header("Accept-Ranges")) {
|
|
733
|
+
supports_ranges = head->get_header_value("Accept-Ranges") != "none";
|
|
734
|
+
}
|
|
735
|
+
|
|
736
|
+
bool should_download_from_scratch = false;
|
|
737
|
+
if (!last_etag.empty() && !etag.empty() && last_etag != etag) {
|
|
738
|
+
LOG_WRN("%s: ETag header is different (%s != %s): triggering a new download\n", __func__,
|
|
739
|
+
last_etag.c_str(), etag.c_str());
|
|
740
|
+
should_download_from_scratch = true;
|
|
741
|
+
}
|
|
742
|
+
|
|
743
|
+
if (file_exists) {
|
|
744
|
+
if (!should_download_from_scratch) {
|
|
745
|
+
LOG_INF("%s: using cached file: %s\n", __func__, path.c_str());
|
|
746
|
+
return true;
|
|
747
|
+
}
|
|
748
|
+
LOG_WRN("%s: deleting previous downloaded file: %s\n", __func__, path.c_str());
|
|
749
|
+
if (remove(path.c_str()) != 0) {
|
|
750
|
+
LOG_ERR("%s: unable to delete file: %s\n", __func__, path.c_str());
|
|
751
|
+
return false;
|
|
752
|
+
}
|
|
753
|
+
}
|
|
754
|
+
|
|
755
|
+
const std::string path_temporary = path + ".downloadInProgress";
|
|
756
|
+
size_t existing_size = 0;
|
|
757
|
+
|
|
758
|
+
if (std::filesystem::exists(path_temporary)) {
|
|
759
|
+
if (supports_ranges && !should_download_from_scratch) {
|
|
760
|
+
existing_size = std::filesystem::file_size(path_temporary);
|
|
761
|
+
} else if (remove(path_temporary.c_str()) != 0) {
|
|
762
|
+
LOG_ERR("%s: unable to delete file: %s\n", __func__, path_temporary.c_str());
|
|
763
|
+
return false;
|
|
764
|
+
}
|
|
765
|
+
}
|
|
766
|
+
|
|
767
|
+
// start the download
|
|
768
|
+
LOG_INF("%s: trying to download model from %s to %s (etag:%s)...\n",
|
|
769
|
+
__func__, common_http_show_masked_url(parts).c_str(), path_temporary.c_str(), etag.c_str());
|
|
770
|
+
const bool was_pull_successful = common_pull_file(cli, parts.path, path_temporary, supports_ranges, existing_size, total_size);
|
|
771
|
+
if (!was_pull_successful) {
|
|
772
|
+
if (i + 1 < max_attempts) {
|
|
773
|
+
const int exponential_backoff_delay = std::pow(retry_delay_seconds, i) * 1000;
|
|
774
|
+
LOG_WRN("%s: retrying after %d milliseconds...\n", __func__, exponential_backoff_delay);
|
|
775
|
+
std::this_thread::sleep_for(std::chrono::milliseconds(exponential_backoff_delay));
|
|
776
|
+
} else {
|
|
777
|
+
LOG_ERR("%s: download failed after %d attempts\n", __func__, max_attempts);
|
|
778
|
+
}
|
|
779
|
+
continue;
|
|
780
|
+
}
|
|
781
|
+
|
|
782
|
+
if (std::rename(path_temporary.c_str(), path.c_str()) != 0) {
|
|
783
|
+
LOG_ERR("%s: unable to rename file: %s to %s\n", __func__, path_temporary.c_str(), path.c_str());
|
|
784
|
+
return false;
|
|
785
|
+
}
|
|
786
|
+
if (!etag.empty()) {
|
|
787
|
+
write_etag(path, etag);
|
|
788
|
+
}
|
|
789
|
+
break;
|
|
790
|
+
}
|
|
791
|
+
|
|
792
|
+
return true;
|
|
578
793
|
}
|
|
579
794
|
|
|
580
|
-
std::pair<long, std::vector<char>> common_remote_get_content(const std::string
|
|
581
|
-
|
|
582
|
-
|
|
795
|
+
std::pair<long, std::vector<char>> common_remote_get_content(const std::string & url,
|
|
796
|
+
const common_remote_params & params) {
|
|
797
|
+
auto [cli, parts] = common_http_client(url);
|
|
798
|
+
|
|
799
|
+
httplib::Headers headers = {{"User-Agent", "llama-cpp"}};
|
|
800
|
+
for (const auto & header : params.headers) {
|
|
801
|
+
size_t pos = header.find(':');
|
|
802
|
+
if (pos != std::string::npos) {
|
|
803
|
+
headers.emplace(header.substr(0, pos), header.substr(pos + 1));
|
|
804
|
+
} else {
|
|
805
|
+
headers.emplace(header, "");
|
|
806
|
+
}
|
|
807
|
+
}
|
|
808
|
+
|
|
809
|
+
if (params.timeout > 0) {
|
|
810
|
+
cli.set_read_timeout(params.timeout, 0);
|
|
811
|
+
cli.set_write_timeout(params.timeout, 0);
|
|
812
|
+
}
|
|
813
|
+
|
|
814
|
+
std::vector<char> buf;
|
|
815
|
+
auto res = cli.Get(parts.path, headers,
|
|
816
|
+
[&](const char *data, size_t len) {
|
|
817
|
+
buf.insert(buf.end(), data, data + len);
|
|
818
|
+
return params.max_size == 0 ||
|
|
819
|
+
buf.size() <= static_cast<size_t>(params.max_size);
|
|
820
|
+
},
|
|
821
|
+
nullptr
|
|
822
|
+
);
|
|
823
|
+
|
|
824
|
+
if (!res) {
|
|
825
|
+
throw std::runtime_error("error: cannot make GET request");
|
|
583
826
|
}
|
|
584
827
|
|
|
585
|
-
return {};
|
|
828
|
+
return { res->status, std::move(buf) };
|
|
586
829
|
}
|
|
587
830
|
|
|
588
831
|
#endif // LLAMA_USE_CURL
|
|
@@ -1372,18 +1615,14 @@ static void add_rpc_devices(const std::string & servers) {
|
|
|
1372
1615
|
if (!rpc_reg) {
|
|
1373
1616
|
throw std::invalid_argument("failed to find RPC backend");
|
|
1374
1617
|
}
|
|
1375
|
-
typedef
|
|
1376
|
-
|
|
1377
|
-
if (!
|
|
1378
|
-
throw std::invalid_argument("failed to find RPC
|
|
1618
|
+
typedef ggml_backend_reg_t (*ggml_backend_rpc_add_server_t)(const char * endpoint);
|
|
1619
|
+
ggml_backend_rpc_add_server_t ggml_backend_rpc_add_server_fn = (ggml_backend_rpc_add_server_t) ggml_backend_reg_get_proc_address(rpc_reg, "ggml_backend_rpc_add_server");
|
|
1620
|
+
if (!ggml_backend_rpc_add_server_fn) {
|
|
1621
|
+
throw std::invalid_argument("failed to find RPC add server function");
|
|
1379
1622
|
}
|
|
1380
1623
|
for (const auto & server : rpc_servers) {
|
|
1381
|
-
|
|
1382
|
-
|
|
1383
|
-
ggml_backend_device_register(dev);
|
|
1384
|
-
} else {
|
|
1385
|
-
throw std::invalid_argument("failed to register RPC device");
|
|
1386
|
-
}
|
|
1624
|
+
auto reg = ggml_backend_rpc_add_server_fn(server.c_str());
|
|
1625
|
+
ggml_backend_register(reg);
|
|
1387
1626
|
}
|
|
1388
1627
|
}
|
|
1389
1628
|
|
|
@@ -1689,13 +1928,21 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
|
|
|
1689
1928
|
}
|
|
1690
1929
|
).set_env("LLAMA_ARG_SWA_FULL"));
|
|
1691
1930
|
add_opt(common_arg(
|
|
1692
|
-
{"--swa-checkpoints"}, "N",
|
|
1693
|
-
string_format("max number of
|
|
1694
|
-
"[(more info)](https://github.com/ggml-org/llama.cpp/pull/15293)", params.
|
|
1931
|
+
{"--ctx-checkpoints", "--swa-checkpoints"}, "N",
|
|
1932
|
+
string_format("max number of context checkpoints to create per slot (default: %d)\n"
|
|
1933
|
+
"[(more info)](https://github.com/ggml-org/llama.cpp/pull/15293)", params.n_ctx_checkpoints),
|
|
1934
|
+
[](common_params & params, int value) {
|
|
1935
|
+
params.n_ctx_checkpoints = value;
|
|
1936
|
+
}
|
|
1937
|
+
).set_env("LLAMA_ARG_CTX_CHECKPOINTS").set_examples({LLAMA_EXAMPLE_SERVER}));
|
|
1938
|
+
add_opt(common_arg(
|
|
1939
|
+
{"--cache-ram", "-cram"}, "N",
|
|
1940
|
+
string_format("set the maximum cache size in MiB (default: %d, -1 - no limit, 0 - disable)\n"
|
|
1941
|
+
"[(more info)](https://github.com/ggml-org/llama.cpp/pull/16391)", params.cache_ram_mib),
|
|
1695
1942
|
[](common_params & params, int value) {
|
|
1696
|
-
params.
|
|
1943
|
+
params.cache_ram_mib = value;
|
|
1697
1944
|
}
|
|
1698
|
-
).set_env("
|
|
1945
|
+
).set_env("LLAMA_ARG_CACHE_RAM").set_examples({LLAMA_EXAMPLE_SERVER}));
|
|
1699
1946
|
add_opt(common_arg(
|
|
1700
1947
|
{"--kv-unified", "-kvu"},
|
|
1701
1948
|
string_format("use single unified KV buffer for the KV cache of all sequences (default: %s)\n"
|
|
@@ -2345,6 +2592,13 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
|
|
|
2345
2592
|
params.no_extra_bufts = true;
|
|
2346
2593
|
}
|
|
2347
2594
|
).set_env("LLAMA_ARG_NO_REPACK"));
|
|
2595
|
+
add_opt(common_arg(
|
|
2596
|
+
{"--no-host"},
|
|
2597
|
+
"bypass host buffer allowing extra buffers to be used",
|
|
2598
|
+
[](common_params & params) {
|
|
2599
|
+
params.no_host = true;
|
|
2600
|
+
}
|
|
2601
|
+
).set_env("LLAMA_ARG_NO_HOST"));
|
|
2348
2602
|
add_opt(common_arg(
|
|
2349
2603
|
{"-ctk", "--cache-type-k"}, "TYPE",
|
|
2350
2604
|
string_format(
|
|
@@ -3104,7 +3358,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
|
|
|
3104
3358
|
add_opt(common_arg(
|
|
3105
3359
|
{"--chat-template-kwargs"}, "STRING",
|
|
3106
3360
|
string_format("sets additional params for the json template parser"),
|
|
3107
|
-
[](common_params & params, const std::string &
|
|
3361
|
+
[](common_params & params, const std::string & value) {
|
|
3108
3362
|
auto parsed = json::parse(value);
|
|
3109
3363
|
for (const auto & item : parsed.items()) {
|
|
3110
3364
|
params.default_template_kwargs[item.key()] = item.value().dump();
|
|
@@ -3186,7 +3440,8 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
|
|
|
3186
3440
|
{"--reasoning-format"}, "FORMAT",
|
|
3187
3441
|
"controls whether thought tags are allowed and/or extracted from the response, and in which format they're returned; one of:\n"
|
|
3188
3442
|
"- none: leaves thoughts unparsed in `message.content`\n"
|
|
3189
|
-
"- deepseek: puts thoughts in `message.reasoning_content
|
|
3443
|
+
"- deepseek: puts thoughts in `message.reasoning_content`\n"
|
|
3444
|
+
"- deepseek-legacy: keeps `<think>` tags in `message.content` while also populating `message.reasoning_content`\n"
|
|
3190
3445
|
"(default: auto)",
|
|
3191
3446
|
[](common_params & params, const std::string & value) {
|
|
3192
3447
|
params.reasoning_format = common_reasoning_format_from_name(value);
|
|
@@ -3315,21 +3570,23 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
|
|
|
3315
3570
|
common_log_set_file(common_log_main(), value.c_str());
|
|
3316
3571
|
}
|
|
3317
3572
|
));
|
|
3318
|
-
add_opt(common_arg(
|
|
3319
|
-
|
|
3320
|
-
|
|
3321
|
-
|
|
3322
|
-
|
|
3323
|
-
|
|
3324
|
-
|
|
3325
|
-
|
|
3326
|
-
|
|
3327
|
-
|
|
3328
|
-
|
|
3329
|
-
|
|
3330
|
-
|
|
3331
|
-
|
|
3332
|
-
|
|
3573
|
+
add_opt(common_arg(
|
|
3574
|
+
{"--log-colors"}, "[on|off|auto]",
|
|
3575
|
+
"Set colored logging ('on', 'off', or 'auto', default: 'auto')\n"
|
|
3576
|
+
"'auto' enables colors when output is to a terminal",
|
|
3577
|
+
[](common_params &, const std::string & value) {
|
|
3578
|
+
if (is_truthy(value)) {
|
|
3579
|
+
common_log_set_colors(common_log_main(), LOG_COLORS_ENABLED);
|
|
3580
|
+
} else if (is_falsey(value)) {
|
|
3581
|
+
common_log_set_colors(common_log_main(), LOG_COLORS_DISABLED);
|
|
3582
|
+
} else if (is_autoy(value)) {
|
|
3583
|
+
common_log_set_colors(common_log_main(), LOG_COLORS_AUTO);
|
|
3584
|
+
} else {
|
|
3585
|
+
throw std::invalid_argument(
|
|
3586
|
+
string_format("error: unkown value for --log-colors: '%s'\n", value.c_str()));
|
|
3587
|
+
}
|
|
3588
|
+
}
|
|
3589
|
+
).set_env("LLAMA_LOG_COLORS"));
|
|
3333
3590
|
add_opt(common_arg(
|
|
3334
3591
|
{"-v", "--verbose", "--log-verbose"},
|
|
3335
3592
|
"Set verbosity level to infinity (i.e. log all messages, useful for debugging)",
|
|
@@ -3595,7 +3852,87 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
|
|
|
3595
3852
|
}
|
|
3596
3853
|
).set_examples({LLAMA_EXAMPLE_TTS}));
|
|
3597
3854
|
|
|
3598
|
-
|
|
3855
|
+
add_opt(common_arg(
|
|
3856
|
+
{"--diffusion-steps"}, "N",
|
|
3857
|
+
string_format("number of diffusion steps (default: %d)", params.diffusion.steps),
|
|
3858
|
+
[](common_params & params, int value) { params.diffusion.steps = value; }
|
|
3859
|
+
).set_examples({ LLAMA_EXAMPLE_DIFFUSION }));
|
|
3860
|
+
add_opt(common_arg(
|
|
3861
|
+
{"--diffusion-visual"},
|
|
3862
|
+
string_format("enable visual diffusion mode (show progressive generation) (default: %s)", params.diffusion.visual_mode ? "true" : "false"),
|
|
3863
|
+
[](common_params & params) { params.diffusion.visual_mode = true; }
|
|
3864
|
+
).set_examples({ LLAMA_EXAMPLE_DIFFUSION }));
|
|
3865
|
+
add_opt(common_arg(
|
|
3866
|
+
{"--diffusion-eps"}, "F",
|
|
3867
|
+
string_format("epsilon for timesteps (default: %.6f)", (double) params.diffusion.eps),
|
|
3868
|
+
[](common_params & params, const std::string & value) { params.diffusion.eps = std::stof(value); }
|
|
3869
|
+
).set_examples({ LLAMA_EXAMPLE_DIFFUSION }));
|
|
3870
|
+
add_opt(common_arg(
|
|
3871
|
+
{"--diffusion-algorithm"}, "N",
|
|
3872
|
+
string_format("diffusion algorithm: 0=ORIGIN, 1=ENTROPY_BASED, 2=MARGIN_BASED, 3=RANDOM, 4=LOW_CONFIDENCE (default: %d)", params.diffusion.algorithm),
|
|
3873
|
+
[](common_params & params, int value) { params.diffusion.algorithm = value; }
|
|
3874
|
+
).set_examples({ LLAMA_EXAMPLE_DIFFUSION }));
|
|
3875
|
+
add_opt(common_arg(
|
|
3876
|
+
{"--diffusion-alg-temp"}, "F",
|
|
3877
|
+
string_format("dream algorithm temperature (default: %.3f)", (double) params.diffusion.alg_temp),
|
|
3878
|
+
[](common_params & params, const std::string & value) { params.diffusion.alg_temp = std::stof(value); }
|
|
3879
|
+
).set_examples({ LLAMA_EXAMPLE_DIFFUSION }));
|
|
3880
|
+
add_opt(common_arg(
|
|
3881
|
+
{"--diffusion-block-length"}, "N",
|
|
3882
|
+
string_format("llada block length for generation (default: %d)", params.diffusion.block_length),
|
|
3883
|
+
[](common_params & params, int value) { params.diffusion.block_length = value; }
|
|
3884
|
+
).set_examples({ LLAMA_EXAMPLE_DIFFUSION }));
|
|
3885
|
+
add_opt(common_arg(
|
|
3886
|
+
{"--diffusion-cfg-scale"}, "F",
|
|
3887
|
+
string_format("llada classifier-free guidance scale (default: %.3f)", (double) params.diffusion.cfg_scale),
|
|
3888
|
+
[](common_params & params, const std::string & value) { params.diffusion.cfg_scale = std::stof(value); }
|
|
3889
|
+
).set_examples({ LLAMA_EXAMPLE_DIFFUSION }));
|
|
3890
|
+
add_opt(common_arg(
|
|
3891
|
+
{"--diffusion-add-gumbel-noise"}, "F",
|
|
3892
|
+
string_format("add gumbel noise to the logits if temp > 0.0 (default: %s)", params.diffusion.add_gumbel_noise ? "true" : "false"),
|
|
3893
|
+
[](common_params & params, const std::string & value) { params.diffusion.add_gumbel_noise = std::stof(value); }
|
|
3894
|
+
).set_examples({ LLAMA_EXAMPLE_DIFFUSION }));
|
|
3895
|
+
add_opt(common_arg(
|
|
3896
|
+
{ "-lr", "--learning-rate" }, "ALPHA",
|
|
3897
|
+
string_format("adamw or sgd optimizer alpha (default: %.2g); note: sgd alpha recommended ~10x (no momentum)", (double) params.lr.lr0),
|
|
3898
|
+
[](common_params & params, const std::string & value) { params.lr.lr0 = std::stof(value); }
|
|
3899
|
+
).set_examples({ LLAMA_EXAMPLE_FINETUNE }));
|
|
3900
|
+
add_opt(common_arg({ "-lr-min", "--learning-rate-min" }, "ALPHA",
|
|
3901
|
+
string_format("(if >0) final learning rate after decay (if -decay-epochs is set, default=%.2g)",
|
|
3902
|
+
(double) params.lr.lr_min),
|
|
3903
|
+
[](common_params & params, const std::string & value) { params.lr.lr_min = std::stof(value); }
|
|
3904
|
+
).set_examples({ LLAMA_EXAMPLE_FINETUNE }));
|
|
3905
|
+
add_opt(common_arg(
|
|
3906
|
+
{"-decay-epochs", "--learning-rate-decay-epochs"}, "ALPHA",
|
|
3907
|
+
string_format("(if >0) decay learning rate to -lr-min after this many epochs (exponential decay, default=%.2g)", (double) params.lr.decay_epochs),
|
|
3908
|
+
[](common_params & params, const std::string & value) { params.lr.decay_epochs = std::stof(value); }
|
|
3909
|
+
).set_examples({ LLAMA_EXAMPLE_FINETUNE }));
|
|
3910
|
+
add_opt(common_arg(
|
|
3911
|
+
{"-wd", "--weight-decay"}, "WD",
|
|
3912
|
+
string_format("adamw or sgd optimizer weight decay (0 is off; recommend very small e.g. 1e-9) (default: %.2g).", (double) params.lr.wd),
|
|
3913
|
+
[](common_params & params, const std::string & value) { params.lr.wd = std::stof(value); }
|
|
3914
|
+
).set_examples({ LLAMA_EXAMPLE_FINETUNE }));
|
|
3915
|
+
add_opt(common_arg(
|
|
3916
|
+
{"-val-split", "--val-split"}, "FRACTION",
|
|
3917
|
+
string_format("fraction of data to use as validation set for training (default: %.2g).", (double) params.val_split),
|
|
3918
|
+
[](common_params & params, const std::string & value) { params.val_split = std::stof(value); }
|
|
3919
|
+
).set_examples({ LLAMA_EXAMPLE_FINETUNE }));
|
|
3920
|
+
add_opt(common_arg(
|
|
3921
|
+
{"-epochs", "--epochs"}, "N",
|
|
3922
|
+
string_format("optimizer max # of epochs (default: %d)", params.lr.epochs),
|
|
3923
|
+
[](common_params & params, int epochs) { params.lr.epochs = epochs; }
|
|
3924
|
+
).set_examples({ LLAMA_EXAMPLE_FINETUNE }));
|
|
3925
|
+
add_opt(common_arg(
|
|
3926
|
+
{"-opt", "--optimizer"}, "sgd|adamw", "adamw or sgd",
|
|
3927
|
+
[](common_params & params, const std::string & name) {
|
|
3928
|
+
params.optimizer = common_opt_get_optimizer(name.c_str());
|
|
3929
|
+
if (params.optimizer == GGML_OPT_OPTIMIZER_TYPE_COUNT) {
|
|
3930
|
+
throw std::invalid_argument("invalid --optimizer, valid options: adamw, sgd");
|
|
3931
|
+
}
|
|
3932
|
+
}
|
|
3933
|
+
).set_examples({ LLAMA_EXAMPLE_FINETUNE }));
|
|
3934
|
+
|
|
3935
|
+
// presets
|
|
3599
3936
|
add_opt(common_arg(
|
|
3600
3937
|
{"--tts-oute-default"},
|
|
3601
3938
|
string_format("use default OuteTTS models (note: can download weights from the internet)"),
|
|
@@ -3608,42 +3945,16 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
|
|
|
3608
3945
|
).set_examples({LLAMA_EXAMPLE_TTS}));
|
|
3609
3946
|
|
|
3610
3947
|
add_opt(common_arg(
|
|
3611
|
-
{"--embd-
|
|
3612
|
-
string_format("use default
|
|
3948
|
+
{"--embd-gemma-default"},
|
|
3949
|
+
string_format("use default EmbeddingGemma model (note: can download weights from the internet)"),
|
|
3613
3950
|
[](common_params & params) {
|
|
3614
|
-
params.model.hf_repo = "ggml-org/
|
|
3615
|
-
params.model.hf_file = "
|
|
3616
|
-
params.
|
|
3617
|
-
params.
|
|
3618
|
-
params.
|
|
3619
|
-
params.
|
|
3620
|
-
params.
|
|
3621
|
-
}
|
|
3622
|
-
).set_examples({LLAMA_EXAMPLE_EMBEDDING, LLAMA_EXAMPLE_SERVER}));
|
|
3623
|
-
|
|
3624
|
-
add_opt(common_arg(
|
|
3625
|
-
{"--embd-e5-small-en-default"},
|
|
3626
|
-
string_format("use default e5-small-v2 model (note: can download weights from the internet)"),
|
|
3627
|
-
[](common_params & params) {
|
|
3628
|
-
params.model.hf_repo = "ggml-org/e5-small-v2-Q8_0-GGUF";
|
|
3629
|
-
params.model.hf_file = "e5-small-v2-q8_0.gguf";
|
|
3630
|
-
params.pooling_type = LLAMA_POOLING_TYPE_NONE;
|
|
3631
|
-
params.embd_normalize = 2;
|
|
3632
|
-
params.n_ctx = 512;
|
|
3633
|
-
params.verbose_prompt = true;
|
|
3634
|
-
params.embedding = true;
|
|
3635
|
-
}
|
|
3636
|
-
).set_examples({LLAMA_EXAMPLE_EMBEDDING, LLAMA_EXAMPLE_SERVER}));
|
|
3637
|
-
|
|
3638
|
-
add_opt(common_arg(
|
|
3639
|
-
{"--embd-gte-small-default"},
|
|
3640
|
-
string_format("use default gte-small model (note: can download weights from the internet)"),
|
|
3641
|
-
[](common_params & params) {
|
|
3642
|
-
params.model.hf_repo = "ggml-org/gte-small-Q8_0-GGUF";
|
|
3643
|
-
params.model.hf_file = "gte-small-q8_0.gguf";
|
|
3644
|
-
params.pooling_type = LLAMA_POOLING_TYPE_NONE;
|
|
3645
|
-
params.embd_normalize = 2;
|
|
3646
|
-
params.n_ctx = 512;
|
|
3951
|
+
params.model.hf_repo = "ggml-org/embeddinggemma-300M-qat-q4_0-GGUF";
|
|
3952
|
+
params.model.hf_file = "embeddinggemma-300M-qat-Q4_0.gguf";
|
|
3953
|
+
params.port = 8011;
|
|
3954
|
+
params.n_ubatch = 2048;
|
|
3955
|
+
params.n_batch = 2048;
|
|
3956
|
+
params.n_parallel = 32;
|
|
3957
|
+
params.n_ctx = 2048*params.n_parallel;
|
|
3647
3958
|
params.verbose_prompt = true;
|
|
3648
3959
|
params.embedding = true;
|
|
3649
3960
|
}
|
|
@@ -3738,96 +4049,65 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
|
|
|
3738
4049
|
).set_examples({LLAMA_EXAMPLE_SERVER}));
|
|
3739
4050
|
|
|
3740
4051
|
add_opt(common_arg(
|
|
3741
|
-
{
|
|
3742
|
-
string_format("
|
|
3743
|
-
[](common_params & params
|
|
3744
|
-
|
|
3745
|
-
|
|
3746
|
-
|
|
3747
|
-
|
|
3748
|
-
|
|
3749
|
-
|
|
3750
|
-
|
|
4052
|
+
{"--gpt-oss-20b-default"},
|
|
4053
|
+
string_format("use gpt-oss-20b (note: can download weights from the internet)"),
|
|
4054
|
+
[](common_params & params) {
|
|
4055
|
+
params.model.hf_repo = "ggml-org/gpt-oss-20b-GGUF";
|
|
4056
|
+
params.model.hf_file = "gpt-oss-20b-mxfp4.gguf";
|
|
4057
|
+
params.port = 8013;
|
|
4058
|
+
params.n_ubatch = 2048;
|
|
4059
|
+
params.n_batch = 32768;
|
|
4060
|
+
params.n_parallel = 2;
|
|
4061
|
+
params.n_ctx = 131072*params.n_parallel;
|
|
4062
|
+
params.sampling.temp = 1.0f;
|
|
4063
|
+
params.sampling.top_p = 1.0f;
|
|
4064
|
+
params.sampling.top_k = 0;
|
|
4065
|
+
params.sampling.min_p = 0.01f;
|
|
4066
|
+
params.use_jinja = true;
|
|
4067
|
+
//params.default_template_kwargs["reasoning_effort"] = "\"high\"";
|
|
4068
|
+
}
|
|
4069
|
+
).set_examples({LLAMA_EXAMPLE_SERVER}));
|
|
3751
4070
|
|
|
3752
4071
|
add_opt(common_arg(
|
|
3753
|
-
{
|
|
3754
|
-
string_format("
|
|
3755
|
-
[](common_params & params
|
|
3756
|
-
|
|
3757
|
-
|
|
3758
|
-
|
|
3759
|
-
|
|
3760
|
-
|
|
3761
|
-
|
|
3762
|
-
|
|
3763
|
-
|
|
3764
|
-
|
|
3765
|
-
|
|
3766
|
-
|
|
3767
|
-
|
|
4072
|
+
{"--gpt-oss-120b-default"},
|
|
4073
|
+
string_format("use gpt-oss-120b (note: can download weights from the internet)"),
|
|
4074
|
+
[](common_params & params) {
|
|
4075
|
+
params.model.hf_repo = "ggml-org/gpt-oss-120b-GGUF";
|
|
4076
|
+
params.port = 8013;
|
|
4077
|
+
params.n_ubatch = 2048;
|
|
4078
|
+
params.n_batch = 32768;
|
|
4079
|
+
params.n_parallel = 2;
|
|
4080
|
+
params.n_ctx = 131072*params.n_parallel;
|
|
4081
|
+
params.sampling.temp = 1.0f;
|
|
4082
|
+
params.sampling.top_p = 1.0f;
|
|
4083
|
+
params.sampling.top_k = 0;
|
|
4084
|
+
params.sampling.min_p = 0.01f;
|
|
4085
|
+
params.use_jinja = true;
|
|
4086
|
+
//params.default_template_kwargs["reasoning_effort"] = "\"high\"";
|
|
4087
|
+
}
|
|
4088
|
+
).set_examples({LLAMA_EXAMPLE_SERVER}));
|
|
3768
4089
|
|
|
3769
4090
|
add_opt(common_arg(
|
|
3770
|
-
{
|
|
3771
|
-
string_format("
|
|
3772
|
-
[](common_params & params
|
|
3773
|
-
|
|
3774
|
-
|
|
3775
|
-
|
|
3776
|
-
|
|
3777
|
-
|
|
3778
|
-
).set_examples({
|
|
3779
|
-
add_opt(common_arg(
|
|
3780
|
-
{ "--diffusion-add-gumbel-noise" }, "F",
|
|
3781
|
-
string_format("add gumbel noise to the logits if temp > 0.0 (default: %s)", params.diffusion.add_gumbel_noise ? "true" : "false"),
|
|
3782
|
-
[](common_params & params, const std::string & value) { params.diffusion.add_gumbel_noise = std::stof(value); }
|
|
3783
|
-
).set_examples({ LLAMA_EXAMPLE_DIFFUSION }));
|
|
3784
|
-
|
|
4091
|
+
{"--vision-gemma-4b-default"},
|
|
4092
|
+
string_format("use Gemma 3 4B QAT (note: can download weights from the internet)"),
|
|
4093
|
+
[](common_params & params) {
|
|
4094
|
+
params.model.hf_repo = "ggml-org/gemma-3-4b-it-qat-GGUF";
|
|
4095
|
+
params.port = 8014;
|
|
4096
|
+
params.n_ctx = 0;
|
|
4097
|
+
params.use_jinja = true;
|
|
4098
|
+
}
|
|
4099
|
+
).set_examples({LLAMA_EXAMPLE_SERVER}));
|
|
3785
4100
|
|
|
3786
|
-
add_opt(
|
|
3787
|
-
|
|
3788
|
-
|
|
3789
|
-
|
|
3790
|
-
|
|
3791
|
-
|
|
3792
|
-
.
|
|
3793
|
-
|
|
3794
|
-
|
|
3795
|
-
|
|
3796
|
-
"(if >0) final learning rate after decay (if -decay-epochs is set, default=%.2g)",
|
|
3797
|
-
(double) params.lr.lr_min),
|
|
3798
|
-
[](common_params & params, const std::string & value) { params.lr.lr_min = std::stof(value); })
|
|
3799
|
-
.set_examples({ LLAMA_EXAMPLE_FINETUNE }));
|
|
3800
|
-
add_opt(
|
|
3801
|
-
common_arg({ "-decay-epochs", "--learning-rate-decay-epochs" }, "ALPHA",
|
|
3802
|
-
string_format(
|
|
3803
|
-
"(if >0) decay learning rate to -lr-min after this many epochs (exponential decay, default=%.2g)",
|
|
3804
|
-
(double) params.lr.decay_epochs),
|
|
3805
|
-
[](common_params & params, const std::string & value) { params.lr.decay_epochs = std::stof(value); })
|
|
3806
|
-
.set_examples({ LLAMA_EXAMPLE_FINETUNE }));
|
|
3807
|
-
add_opt(common_arg(
|
|
3808
|
-
{ "-wd", "--weight-decay" }, "WD",
|
|
3809
|
-
string_format(
|
|
3810
|
-
"adamw or sgd optimizer weight decay (0 is off; recommend very small e.g. 1e-9) (default: %.2g).",
|
|
3811
|
-
(double) params.lr.wd),
|
|
3812
|
-
[](common_params & params, const std::string & value) { params.lr.wd = std::stof(value); })
|
|
3813
|
-
.set_examples({ LLAMA_EXAMPLE_FINETUNE }));
|
|
3814
|
-
add_opt(common_arg({ "-val-split", "--val-split" }, "FRACTION",
|
|
3815
|
-
string_format("fraction of data to use as validation set for training (default: %.2g).",
|
|
3816
|
-
(double) params.val_split),
|
|
3817
|
-
[](common_params & params, const std::string & value) { params.val_split = std::stof(value); })
|
|
3818
|
-
.set_examples({ LLAMA_EXAMPLE_FINETUNE }));
|
|
3819
|
-
add_opt(common_arg({ "-epochs", "--epochs" }, "N",
|
|
3820
|
-
string_format("optimizer max # of epochs (default: %d)", params.lr.epochs),
|
|
3821
|
-
[](common_params & params, int epochs) { params.lr.epochs = epochs; })
|
|
3822
|
-
.set_examples({ LLAMA_EXAMPLE_FINETUNE }));
|
|
3823
|
-
add_opt(common_arg({ "-opt", "--optimizer" }, "sgd|adamw", "adamw or sgd",
|
|
3824
|
-
[](common_params & params, const std::string & name) {
|
|
3825
|
-
params.optimizer = common_opt_get_optimizer(name.c_str());
|
|
3826
|
-
if (params.optimizer == GGML_OPT_OPTIMIZER_TYPE_COUNT) {
|
|
3827
|
-
throw std::invalid_argument("invalid --optimizer, valid options: adamw, sgd");
|
|
3828
|
-
}
|
|
3829
|
-
})
|
|
3830
|
-
.set_examples({ LLAMA_EXAMPLE_FINETUNE }));
|
|
4101
|
+
add_opt(common_arg(
|
|
4102
|
+
{"--vision-gemma-12b-default"},
|
|
4103
|
+
string_format("use Gemma 3 12B QAT (note: can download weights from the internet)"),
|
|
4104
|
+
[](common_params & params) {
|
|
4105
|
+
params.model.hf_repo = "ggml-org/gemma-3-12b-it-qat-GGUF";
|
|
4106
|
+
params.port = 8014;
|
|
4107
|
+
params.n_ctx = 0;
|
|
4108
|
+
params.use_jinja = true;
|
|
4109
|
+
}
|
|
4110
|
+
).set_examples({LLAMA_EXAMPLE_SERVER}));
|
|
3831
4111
|
|
|
3832
4112
|
return ctx_arg;
|
|
3833
4113
|
}
|