@fugood/llama.node 1.4.13 → 1.4.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. package/lib/binding.ts +23 -2
  2. package/lib/index.js +2 -1
  3. package/lib/index.ts +8 -1
  4. package/lib/parallel.ts +2 -2
  5. package/package.json +15 -15
  6. package/scripts/llama.cpp.patch +9 -12
  7. package/src/LlamaContext.cpp +16 -4
  8. package/src/llama.cpp/CMakeLists.txt +24 -8
  9. package/src/llama.cpp/common/CMakeLists.txt +3 -34
  10. package/src/llama.cpp/common/arg.cpp +183 -60
  11. package/src/llama.cpp/common/arg.h +0 -8
  12. package/src/llama.cpp/common/chat-parser.cpp +115 -0
  13. package/src/llama.cpp/common/chat.cpp +67 -0
  14. package/src/llama.cpp/common/chat.h +1 -0
  15. package/src/llama.cpp/common/common.cpp +2 -1
  16. package/src/llama.cpp/common/common.h +12 -7
  17. package/src/llama.cpp/common/debug.cpp +165 -0
  18. package/src/llama.cpp/common/debug.h +43 -0
  19. package/src/llama.cpp/common/download.cpp +88 -369
  20. package/src/llama.cpp/common/download.h +32 -5
  21. package/src/llama.cpp/common/preset.cpp +87 -2
  22. package/src/llama.cpp/common/preset.h +10 -1
  23. package/src/llama.cpp/ggml/include/ggml.h +5 -0
  24. package/src/llama.cpp/include/llama.h +5 -2
  25. package/src/llama.cpp/src/CMakeLists.txt +1 -0
  26. package/src/llama.cpp/src/llama-arch.cpp +35 -0
  27. package/src/llama.cpp/src/llama-arch.h +1 -0
  28. package/src/llama.cpp/src/llama-chat.cpp +20 -0
  29. package/src/llama.cpp/src/llama-chat.h +1 -0
  30. package/src/llama.cpp/src/llama-graph.cpp +31 -43
  31. package/src/llama.cpp/src/llama-mmap.cpp +78 -42
  32. package/src/llama.cpp/src/llama-mmap.h +5 -4
  33. package/src/llama.cpp/src/llama-model-loader.cpp +17 -5
  34. package/src/llama.cpp/src/llama-model-loader.h +2 -0
  35. package/src/llama.cpp/src/llama-model.cpp +225 -101
  36. package/src/llama.cpp/src/llama-quant.cpp +1 -1
  37. package/src/llama.cpp/src/llama-sampling.cpp +1 -1
  38. package/src/llama.cpp/src/llama-vocab.cpp +37 -24
  39. package/src/llama.cpp/src/llama-vocab.h +1 -0
  40. package/src/llama.cpp/src/llama.cpp +63 -27
  41. package/src/llama.cpp/src/models/exaone-moe.cpp +146 -0
  42. package/src/llama.cpp/src/models/gemma3n-iswa.cpp +13 -3
  43. package/src/llama.cpp/src/models/models.h +13 -2
  44. package/src/llama.cpp/src/models/qwen3next.cpp +198 -182
@@ -19,10 +19,7 @@
19
19
  #include <thread>
20
20
  #include <vector>
21
21
 
22
- #if defined(LLAMA_USE_CURL)
23
- #include <curl/curl.h>
24
- #include <curl/easy.h>
25
- #elif defined(LLAMA_USE_HTTPLIB)
22
+ #if defined(LLAMA_USE_HTTPLIB)
26
23
  #include "http.h"
27
24
  #endif
28
25
 
@@ -157,322 +154,21 @@ static std::string read_etag(const std::string & path) {
157
154
  return none;
158
155
  }
159
156
 
160
- #ifdef LLAMA_USE_CURL
161
-
162
- //
163
- // CURL utils
164
- //
165
-
166
- using curl_ptr = std::unique_ptr<CURL, decltype(&curl_easy_cleanup)>;
167
-
168
- // cannot use unique_ptr for curl_slist, because we cannot update without destroying the old one
169
- struct curl_slist_ptr {
170
- struct curl_slist * ptr = nullptr;
171
- ~curl_slist_ptr() {
172
- if (ptr) {
173
- curl_slist_free_all(ptr);
174
- }
175
- }
176
- };
177
-
178
- static CURLcode common_curl_perf(CURL * curl) {
179
- CURLcode res = curl_easy_perform(curl);
180
- if (res != CURLE_OK) {
181
- LOG_ERR("%s: curl_easy_perform() failed\n", __func__);
182
- }
183
-
184
- return res;
157
+ static bool is_http_status_ok(int status) {
158
+ return status >= 200 && status < 400;
185
159
  }
186
160
 
187
- // Send a HEAD request to retrieve the etag and last-modified headers
188
- struct common_load_model_from_url_headers {
189
- std::string etag;
190
- std::string last_modified;
191
- std::string accept_ranges;
192
- };
193
-
194
- struct FILE_deleter {
195
- void operator()(FILE * f) const { fclose(f); }
196
- };
197
-
198
- static size_t common_header_callback(char * buffer, size_t, size_t n_items, void * userdata) {
199
- common_load_model_from_url_headers * headers = (common_load_model_from_url_headers *) userdata;
200
- static std::regex header_regex("([^:]+): (.*)\r\n");
201
- static std::regex etag_regex("ETag", std::regex_constants::icase);
202
- static std::regex last_modified_regex("Last-Modified", std::regex_constants::icase);
203
- static std::regex accept_ranges_regex("Accept-Ranges", std::regex_constants::icase);
204
- std::string header(buffer, n_items);
205
- std::smatch match;
206
- if (std::regex_match(header, match, header_regex)) {
207
- const std::string & key = match[1];
208
- const std::string & value = match[2];
209
- if (std::regex_match(key, match, etag_regex)) {
210
- headers->etag = value;
211
- } else if (std::regex_match(key, match, last_modified_regex)) {
212
- headers->last_modified = value;
213
- } else if (std::regex_match(key, match, accept_ranges_regex)) {
214
- headers->accept_ranges = value;
215
- }
216
- }
217
-
218
- return n_items;
219
- }
220
-
221
- static size_t common_write_callback(void * data, size_t size, size_t nmemb, void * fd) {
222
- return std::fwrite(data, size, nmemb, static_cast<FILE *>(fd));
223
- }
224
-
225
- // helper function to hide password in URL
226
- static std::string llama_download_hide_password_in_url(const std::string & url) {
227
- // Use regex to match and replace the user[:password]@ pattern in URLs
228
- // Pattern: scheme://[user[:password]@]host[...]
229
- static const std::regex url_regex(R"(^(?:[A-Za-z][A-Za-z0-9+.-]://)(?:[^/@]+@)?.$)");
230
- std::smatch match;
231
-
232
- if (std::regex_match(url, match, url_regex)) {
233
- // match[1] = scheme (e.g., "https://")
234
- // match[2] = user[:password]@ part
235
- // match[3] = rest of URL (host and path)
236
- return match[1].str() + "********@" + match[3].str();
237
- }
238
-
239
- return url; // No credentials found or malformed URL
240
- }
241
-
242
- static void common_curl_easy_setopt_head(CURL * curl, const std::string & url) {
243
- // Set the URL, allow to follow http redirection
244
- curl_easy_setopt(curl, CURLOPT_URL, url.c_str());
245
- curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1L);
246
-
247
- # if defined(_WIN32)
248
- // CURLSSLOPT_NATIVE_CA tells libcurl to use standard certificate store of
249
- // operating system. Currently implemented under MS-Windows.
250
- curl_easy_setopt(curl, CURLOPT_SSL_OPTIONS, CURLSSLOPT_NATIVE_CA);
251
- # endif
252
-
253
- curl_easy_setopt(curl, CURLOPT_NOBODY, 1L); // will trigger the HEAD verb
254
- curl_easy_setopt(curl, CURLOPT_NOPROGRESS, 1L); // hide head request progress
255
- curl_easy_setopt(curl, CURLOPT_HEADERFUNCTION, common_header_callback);
256
- }
257
-
258
- static void common_curl_easy_setopt_get(CURL * curl) {
259
- curl_easy_setopt(curl, CURLOPT_NOBODY, 0L);
260
- curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, common_write_callback);
261
-
262
- // display download progress
263
- curl_easy_setopt(curl, CURLOPT_NOPROGRESS, 0L);
264
- }
265
-
266
- static bool common_pull_file(CURL * curl, const std::string & path_temporary) {
267
- if (std::filesystem::exists(path_temporary)) {
268
- const std::string partial_size = std::to_string(std::filesystem::file_size(path_temporary));
269
- LOG_INF("%s: server supports range requests, resuming download from byte %s\n", __func__, partial_size.c_str());
270
- const std::string range_str = partial_size + "-";
271
- curl_easy_setopt(curl, CURLOPT_RANGE, range_str.c_str());
272
- }
273
-
274
- // Always open file in append mode could be resuming
275
- std::unique_ptr<FILE, FILE_deleter> outfile(fopen(path_temporary.c_str(), "ab"));
276
- if (!outfile) {
277
- LOG_ERR("%s: error opening local file for writing: %s\n", __func__, path_temporary.c_str());
278
- return false;
279
- }
280
-
281
- common_curl_easy_setopt_get(curl);
282
- curl_easy_setopt(curl, CURLOPT_WRITEDATA, outfile.get());
283
-
284
- return common_curl_perf(curl) == CURLE_OK;
285
- }
286
-
287
- static bool common_download_head(CURL * curl,
288
- curl_slist_ptr & http_headers,
289
- const std::string & url,
290
- const std::string & bearer_token) {
291
- if (!curl) {
292
- LOG_ERR("%s: error initializing libcurl\n", __func__);
293
- return false;
294
- }
295
-
296
- http_headers.ptr = curl_slist_append(http_headers.ptr, "User-Agent: llama-cpp");
297
- // Check if hf-token or bearer-token was specified
298
- if (!bearer_token.empty()) {
299
- std::string auth_header = "Authorization: Bearer " + bearer_token;
300
- http_headers.ptr = curl_slist_append(http_headers.ptr, auth_header.c_str());
301
- }
302
-
303
- curl_easy_setopt(curl, CURLOPT_HTTPHEADER, http_headers.ptr);
304
- common_curl_easy_setopt_head(curl, url);
305
- return common_curl_perf(curl) == CURLE_OK;
306
- }
307
-
308
- // download one single file from remote URL to local path
309
- static bool common_download_file_single_online(const std::string & url,
310
- const std::string & path,
311
- const std::string & bearer_token) {
312
- static const int max_attempts = 3;
313
- static const int retry_delay_seconds = 2;
314
- for (int i = 0; i < max_attempts; ++i) {
315
- std::string etag;
316
-
317
- // Check if the file already exists locally
318
- const auto file_exists = std::filesystem::exists(path);
319
- if (file_exists) {
320
- etag = read_etag(path);
321
- } else {
322
- LOG_INF("%s: no previous model file found %s\n", __func__, path.c_str());
323
- }
324
-
325
- bool head_request_ok = false;
326
- bool should_download = !file_exists; // by default, we should download if the file does not exist
327
-
328
- // Initialize libcurl
329
- curl_ptr curl(curl_easy_init(), &curl_easy_cleanup);
330
- common_load_model_from_url_headers headers;
331
- curl_easy_setopt(curl.get(), CURLOPT_HEADERDATA, &headers);
332
- curl_slist_ptr http_headers;
333
- const bool was_perform_successful = common_download_head(curl.get(), http_headers, url, bearer_token);
334
- if (!was_perform_successful) {
335
- head_request_ok = false;
336
- }
337
-
338
- long http_code = 0;
339
- curl_easy_getinfo(curl.get(), CURLINFO_RESPONSE_CODE, &http_code);
340
- if (http_code == 200) {
341
- head_request_ok = true;
342
- } else {
343
- LOG_WRN("%s: HEAD invalid http status code received: %ld\n", __func__, http_code);
344
- head_request_ok = false;
345
- }
346
-
347
- // if head_request_ok is false, we don't have the etag or last-modified headers
348
- // we leave should_download as-is, which is true if the file does not exist
349
- bool should_download_from_scratch = false;
350
- if (head_request_ok) {
351
- // check if ETag or Last-Modified headers are different
352
- // if it is, we need to download the file again
353
- if (!etag.empty() && etag != headers.etag) {
354
- LOG_WRN("%s: ETag header is different (%s != %s): triggering a new download\n", __func__, etag.c_str(),
355
- headers.etag.c_str());
356
- should_download = true;
357
- should_download_from_scratch = true;
358
- }
359
- }
360
-
361
- const bool accept_ranges_supported = !headers.accept_ranges.empty() && headers.accept_ranges != "none";
362
- if (should_download) {
363
- if (file_exists &&
364
- !accept_ranges_supported) { // Resumable downloads not supported, delete and start again.
365
- LOG_WRN("%s: deleting previous downloaded file: %s\n", __func__, path.c_str());
366
- if (remove(path.c_str()) != 0) {
367
- LOG_ERR("%s: unable to delete file: %s\n", __func__, path.c_str());
368
- return false;
369
- }
370
- }
371
-
372
- const std::string path_temporary = path + ".downloadInProgress";
373
- if (should_download_from_scratch) {
374
- if (std::filesystem::exists(path_temporary)) {
375
- if (remove(path_temporary.c_str()) != 0) {
376
- LOG_ERR("%s: unable to delete file: %s\n", __func__, path_temporary.c_str());
377
- return false;
378
- }
379
- }
380
-
381
- if (std::filesystem::exists(path)) {
382
- if (remove(path.c_str()) != 0) {
383
- LOG_ERR("%s: unable to delete file: %s\n", __func__, path.c_str());
384
- return false;
385
- }
386
- }
387
- }
388
- if (head_request_ok) {
389
- write_etag(path, headers.etag);
390
- }
391
-
392
- // start the download
393
- LOG_INF("%s: trying to download model from %s to %s (server_etag:%s, server_last_modified:%s)...\n",
394
- __func__, llama_download_hide_password_in_url(url).c_str(), path_temporary.c_str(),
395
- headers.etag.c_str(), headers.last_modified.c_str());
396
- const bool was_pull_successful = common_pull_file(curl.get(), path_temporary);
397
- if (!was_pull_successful) {
398
- if (i + 1 < max_attempts) {
399
- const int exponential_backoff_delay = std::pow(retry_delay_seconds, i) * 1000;
400
- LOG_WRN("%s: retrying after %d milliseconds...\n", __func__, exponential_backoff_delay);
401
- std::this_thread::sleep_for(std::chrono::milliseconds(exponential_backoff_delay));
402
- } else {
403
- LOG_ERR("%s: curl_easy_perform() failed after %d attempts\n", __func__, max_attempts);
404
- }
405
-
406
- continue;
407
- }
408
-
409
- long http_code = 0;
410
- curl_easy_getinfo(curl.get(), CURLINFO_RESPONSE_CODE, &http_code);
411
- if (http_code < 200 || http_code >= 400) {
412
- LOG_ERR("%s: invalid http status code received: %ld\n", __func__, http_code);
413
- return false;
414
- }
415
-
416
- if (rename(path_temporary.c_str(), path.c_str()) != 0) {
417
- LOG_ERR("%s: unable to rename file: %s to %s\n", __func__, path_temporary.c_str(), path.c_str());
418
- return false;
419
- }
420
- } else {
421
- LOG_INF("%s: using cached file: %s\n", __func__, path.c_str());
422
- }
423
-
424
- break;
425
- }
426
-
427
- return true;
428
- }
429
-
430
- std::pair<long, std::vector<char>> common_remote_get_content(const std::string & url, const common_remote_params & params) {
431
- curl_ptr curl(curl_easy_init(), &curl_easy_cleanup);
432
- curl_slist_ptr http_headers;
433
- std::vector<char> res_buffer;
434
-
435
- curl_easy_setopt(curl.get(), CURLOPT_URL, url.c_str());
436
- curl_easy_setopt(curl.get(), CURLOPT_NOPROGRESS, 1L);
437
- curl_easy_setopt(curl.get(), CURLOPT_FOLLOWLOCATION, 1L);
438
- curl_easy_setopt(curl.get(), CURLOPT_VERBOSE, 0L);
439
- typedef size_t(*CURLOPT_WRITEFUNCTION_PTR)(void * ptr, size_t size, size_t nmemb, void * data);
440
- auto write_callback = [](void * ptr, size_t size, size_t nmemb, void * data) -> size_t {
441
- auto data_vec = static_cast<std::vector<char> *>(data);
442
- data_vec->insert(data_vec->end(), (char *)ptr, (char *)ptr + size * nmemb);
443
- return size * nmemb;
444
- };
445
- curl_easy_setopt(curl.get(), CURLOPT_WRITEFUNCTION, static_cast<CURLOPT_WRITEFUNCTION_PTR>(write_callback));
446
- curl_easy_setopt(curl.get(), CURLOPT_WRITEDATA, &res_buffer);
447
- #if defined(_WIN32)
448
- curl_easy_setopt(curl.get(), CURLOPT_SSL_OPTIONS, CURLSSLOPT_NATIVE_CA);
449
- #endif
450
- if (params.timeout > 0) {
451
- curl_easy_setopt(curl.get(), CURLOPT_TIMEOUT, params.timeout);
452
- }
453
- if (params.max_size > 0) {
454
- curl_easy_setopt(curl.get(), CURLOPT_MAXFILESIZE, params.max_size);
455
- }
456
- http_headers.ptr = curl_slist_append(http_headers.ptr, "User-Agent: llama-cpp");
457
- for (const auto & header : params.headers) {
458
- http_headers.ptr = curl_slist_append(http_headers.ptr, header.c_str());
459
- }
460
- curl_easy_setopt(curl.get(), CURLOPT_HTTPHEADER, http_headers.ptr);
461
-
462
- CURLcode res = curl_easy_perform(curl.get());
463
-
464
- if (res != CURLE_OK) {
465
- std::string error_msg = curl_easy_strerror(res);
466
- throw std::runtime_error("error: cannot make GET request: " + error_msg);
161
+ std::pair<std::string, std::string> common_download_split_repo_tag(const std::string & hf_repo_with_tag) {
162
+ auto parts = string_split<std::string>(hf_repo_with_tag, ':');
163
+ std::string tag = parts.size() > 1 ? parts.back() : "latest";
164
+ std::string hf_repo = parts[0];
165
+ if (string_split<std::string>(hf_repo, '/').size() != 2) {
166
+ throw std::invalid_argument("error: invalid HF repo format, expected <user>/<model>[:quant]\n");
467
167
  }
468
-
469
- long res_code;
470
- curl_easy_getinfo(curl.get(), CURLINFO_RESPONSE_CODE, &res_code);
471
-
472
- return { res_code, std::move(res_buffer) };
168
+ return {hf_repo, tag};
473
169
  }
474
170
 
475
- #elif defined(LLAMA_USE_HTTPLIB)
171
+ #if defined(LLAMA_USE_HTTPLIB)
476
172
 
477
173
  class ProgressBar {
478
174
  static inline std::mutex mutex;
@@ -617,9 +313,11 @@ static bool common_pull_file(httplib::Client & cli,
617
313
  }
618
314
 
619
315
  // download one single file from remote URL to local path
620
- static bool common_download_file_single_online(const std::string & url,
316
+ // returns status code or -1 on error
317
+ static int common_download_file_single_online(const std::string & url,
621
318
  const std::string & path,
622
- const std::string & bearer_token) {
319
+ const std::string & bearer_token,
320
+ const common_header_list & custom_headers) {
623
321
  static const int max_attempts = 3;
624
322
  static const int retry_delay_seconds = 2;
625
323
 
@@ -629,6 +327,9 @@ static bool common_download_file_single_online(const std::string & url,
629
327
  if (!bearer_token.empty()) {
630
328
  default_headers.insert({"Authorization", "Bearer " + bearer_token});
631
329
  }
330
+ for (const auto & h : custom_headers) {
331
+ default_headers.emplace(h.first, h.second);
332
+ }
632
333
  cli.set_default_headers(default_headers);
633
334
 
634
335
  const bool file_exists = std::filesystem::exists(path);
@@ -647,8 +348,10 @@ static bool common_download_file_single_online(const std::string & url,
647
348
  LOG_WRN("%s: HEAD invalid http status code received: %d\n", __func__, head ? head->status : -1);
648
349
  if (file_exists) {
649
350
  LOG_INF("%s: Using cached file (HEAD failed): %s\n", __func__, path.c_str());
650
- return true;
351
+ return 304; // 304 Not Modified - fake cached response
651
352
  }
353
+ return head->status; // cannot use cached file, return raw status code
354
+ // TODO: maybe retry only on certain codes
652
355
  }
653
356
 
654
357
  std::string etag;
@@ -680,12 +383,12 @@ static bool common_download_file_single_online(const std::string & url,
680
383
  if (file_exists) {
681
384
  if (!should_download_from_scratch) {
682
385
  LOG_INF("%s: using cached file: %s\n", __func__, path.c_str());
683
- return true;
386
+ return 304; // 304 Not Modified - fake cached response
684
387
  }
685
388
  LOG_WRN("%s: deleting previous downloaded file: %s\n", __func__, path.c_str());
686
389
  if (remove(path.c_str()) != 0) {
687
390
  LOG_ERR("%s: unable to delete file: %s\n", __func__, path.c_str());
688
- return false;
391
+ return -1;
689
392
  }
690
393
  }
691
394
 
@@ -697,7 +400,7 @@ static bool common_download_file_single_online(const std::string & url,
697
400
  existing_size = std::filesystem::file_size(path_temporary);
698
401
  } else if (remove(path_temporary.c_str()) != 0) {
699
402
  LOG_ERR("%s: unable to delete file: %s\n", __func__, path_temporary.c_str());
700
- return false;
403
+ return -1;
701
404
  }
702
405
  }
703
406
 
@@ -718,15 +421,16 @@ static bool common_download_file_single_online(const std::string & url,
718
421
 
719
422
  if (std::rename(path_temporary.c_str(), path.c_str()) != 0) {
720
423
  LOG_ERR("%s: unable to rename file: %s to %s\n", __func__, path_temporary.c_str(), path.c_str());
721
- return false;
424
+ return -1;
722
425
  }
723
426
  if (!etag.empty()) {
724
427
  write_etag(path, etag);
725
428
  }
726
- break;
429
+
430
+ return head->status; // TODO: use actual GET status?
727
431
  }
728
432
 
729
- return true;
433
+ return -1; // max attempts reached
730
434
  }
731
435
 
732
436
  std::pair<long, std::vector<char>> common_remote_get_content(const std::string & url,
@@ -734,13 +438,9 @@ std::pair<long, std::vector<char>> common_remote_get_content(const std::string
734
438
  auto [cli, parts] = common_http_client(url);
735
439
 
736
440
  httplib::Headers headers = {{"User-Agent", "llama-cpp"}};
441
+
737
442
  for (const auto & header : params.headers) {
738
- size_t pos = header.find(':');
739
- if (pos != std::string::npos) {
740
- headers.emplace(header.substr(0, pos), header.substr(pos + 1));
741
- } else {
742
- headers.emplace(header, "");
743
- }
443
+ headers.emplace(header.first, header.second);
744
444
  }
745
445
 
746
446
  if (params.timeout > 0) {
@@ -765,36 +465,45 @@ std::pair<long, std::vector<char>> common_remote_get_content(const std::string
765
465
  return { res->status, std::move(buf) };
766
466
  }
767
467
 
768
- #endif // LLAMA_USE_CURL
769
-
770
- #if defined(LLAMA_USE_CURL) || defined(LLAMA_USE_HTTPLIB)
771
-
772
- static bool common_download_file_single(const std::string & url,
773
- const std::string & path,
774
- const std::string & bearer_token,
775
- bool offline) {
468
+ int common_download_file_single(const std::string & url,
469
+ const std::string & path,
470
+ const std::string & bearer_token,
471
+ bool offline,
472
+ const common_header_list & headers) {
776
473
  if (!offline) {
777
- return common_download_file_single_online(url, path, bearer_token);
474
+ return common_download_file_single_online(url, path, bearer_token, headers);
778
475
  }
779
476
 
780
477
  if (!std::filesystem::exists(path)) {
781
478
  LOG_ERR("%s: required file is not available in cache (offline mode): %s\n", __func__, path.c_str());
782
- return false;
479
+ return -1;
783
480
  }
784
481
 
785
482
  LOG_INF("%s: using cached file (offline mode): %s\n", __func__, path.c_str());
786
- return true;
483
+ return 304; // Not Modified - fake cached response
787
484
  }
788
485
 
789
486
  // download multiple files from remote URLs to local paths
790
487
  // the input is a vector of pairs <url, path>
791
- static bool common_download_file_multiple(const std::vector<std::pair<std::string, std::string>> & urls, const std::string & bearer_token, bool offline) {
488
+ static bool common_download_file_multiple(const std::vector<std::pair<std::string, std::string>> & urls,
489
+ const std::string & bearer_token,
490
+ bool offline,
491
+ const common_header_list & headers) {
792
492
  // Prepare download in parallel
793
493
  std::vector<std::future<bool>> futures_download;
494
+ futures_download.reserve(urls.size());
495
+
794
496
  for (auto const & item : urls) {
795
- futures_download.push_back(std::async(std::launch::async, [bearer_token, offline](const std::pair<std::string, std::string> & it) -> bool {
796
- return common_download_file_single(it.first, it.second, bearer_token, offline);
797
- }, item));
497
+ futures_download.push_back(
498
+ std::async(
499
+ std::launch::async,
500
+ [&bearer_token, offline, &headers](const std::pair<std::string, std::string> & it) -> bool {
501
+ const int http_status = common_download_file_single(it.first, it.second, bearer_token, offline, headers);
502
+ return is_http_status_ok(http_status);
503
+ },
504
+ item
505
+ )
506
+ );
798
507
  }
799
508
 
800
509
  // Wait for all downloads to complete
@@ -807,17 +516,18 @@ static bool common_download_file_multiple(const std::vector<std::pair<std::strin
807
516
  return true;
808
517
  }
809
518
 
810
- bool common_download_model(
811
- const common_params_model & model,
812
- const std::string & bearer_token,
813
- bool offline) {
519
+ bool common_download_model(const common_params_model & model,
520
+ const std::string & bearer_token,
521
+ bool offline,
522
+ const common_header_list & headers) {
814
523
  // Basic validation of the model.url
815
524
  if (model.url.empty()) {
816
525
  LOG_ERR("%s: invalid model url\n", __func__);
817
526
  return false;
818
527
  }
819
528
 
820
- if (!common_download_file_single(model.url, model.path, bearer_token, offline)) {
529
+ const int http_status = common_download_file_single(model.url, model.path, bearer_token, offline, headers);
530
+ if (!is_http_status_ok(http_status)) {
821
531
  return false;
822
532
  }
823
533
 
@@ -876,27 +586,26 @@ bool common_download_model(
876
586
  }
877
587
 
878
588
  // Download in parallel
879
- common_download_file_multiple(urls, bearer_token, offline);
589
+ common_download_file_multiple(urls, bearer_token, offline, headers);
880
590
  }
881
591
 
882
592
  return true;
883
593
  }
884
594
 
885
- common_hf_file_res common_get_hf_file(const std::string & hf_repo_with_tag, const std::string & bearer_token, bool offline) {
886
- auto parts = string_split<std::string>(hf_repo_with_tag, ':');
887
- std::string tag = parts.size() > 1 ? parts.back() : "latest";
888
- std::string hf_repo = parts[0];
889
- if (string_split<std::string>(hf_repo, '/').size() != 2) {
890
- throw std::invalid_argument("error: invalid HF repo format, expected <user>/<model>[:quant]\n");
891
- }
595
+ common_hf_file_res common_get_hf_file(const std::string & hf_repo_with_tag,
596
+ const std::string & bearer_token,
597
+ bool offline,
598
+ const common_header_list & custom_headers) {
599
+ // the returned hf_repo is without tag
600
+ auto [hf_repo, tag] = common_download_split_repo_tag(hf_repo_with_tag);
892
601
 
893
602
  std::string url = get_model_endpoint() + "v2/" + hf_repo + "/manifests/" + tag;
894
603
 
895
604
  // headers
896
- std::vector<std::string> headers;
897
- headers.push_back("Accept: application/json");
605
+ common_header_list headers = custom_headers;
606
+ headers.push_back({"Accept", "application/json"});
898
607
  if (!bearer_token.empty()) {
899
- headers.push_back("Authorization: Bearer " + bearer_token);
608
+ headers.push_back({"Authorization", "Bearer " + bearer_token});
900
609
  }
901
610
  // Important: the User-Agent must be "llama-cpp" to get the "ggufFile" field in the response
902
611
  // User-Agent header is already set in common_remote_get_content, no need to set it here
@@ -952,7 +661,7 @@ common_hf_file_res common_get_hf_file(const std::string & hf_repo_with_tag, cons
952
661
  } else if (res_code == 401) {
953
662
  throw std::runtime_error("error: model is private or does not exist; if you are accessing a gated model, please provide a valid HF token");
954
663
  } else {
955
- throw std::runtime_error(string_format("error from HF API, response code: %ld, data: %s", res_code, res_str.c_str()));
664
+ throw std::runtime_error(string_format("error from HF API (%s), response code: %ld, data: %s", url.c_str(), res_code, res_str.c_str()));
956
665
  }
957
666
 
958
667
  // check response
@@ -1031,9 +740,10 @@ std::string common_docker_resolve_model(const std::string & docker) {
1031
740
  const std::string url_prefix = "https://registry-1.docker.io/v2/" + repo;
1032
741
  std::string manifest_url = url_prefix + "/manifests/" + tag;
1033
742
  common_remote_params manifest_params;
1034
- manifest_params.headers.push_back("Authorization: Bearer " + token);
1035
- manifest_params.headers.push_back(
1036
- "Accept: application/vnd.docker.distribution.manifest.v2+json,application/vnd.oci.image.manifest.v1+json");
743
+ manifest_params.headers.push_back({"Authorization", "Bearer " + token});
744
+ manifest_params.headers.push_back({"Accept",
745
+ "application/vnd.docker.distribution.manifest.v2+json,application/vnd.oci.image.manifest.v1+json"
746
+ });
1037
747
  auto manifest_res = common_remote_get_content(manifest_url, manifest_params);
1038
748
  if (manifest_res.first != 200) {
1039
749
  throw std::runtime_error("Failed to get Docker manifest, HTTP code: " + std::to_string(manifest_res.first));
@@ -1070,7 +780,8 @@ std::string common_docker_resolve_model(const std::string & docker) {
1070
780
  std::string local_path = fs_get_cache_file(model_filename);
1071
781
 
1072
782
  const std::string blob_url = url_prefix + "/blobs/" + gguf_digest;
1073
- if (!common_download_file_single(blob_url, local_path, token, false)) {
783
+ const int http_status = common_download_file_single(blob_url, local_path, token, false, {});
784
+ if (!is_http_status_ok(http_status)) {
1074
785
  throw std::runtime_error("Failed to download Docker Model");
1075
786
  }
1076
787
 
@@ -1084,11 +795,11 @@ std::string common_docker_resolve_model(const std::string & docker) {
1084
795
 
1085
796
  #else
1086
797
 
1087
- common_hf_file_res common_get_hf_file(const std::string &, const std::string &, bool) {
798
+ common_hf_file_res common_get_hf_file(const std::string &, const std::string &, bool, const common_header_list &) {
1088
799
  throw std::runtime_error("download functionality is not enabled in this build");
1089
800
  }
1090
801
 
1091
- bool common_download_model(const common_params_model &, const std::string &, bool) {
802
+ bool common_download_model(const common_params_model &, const std::string &, bool, const common_header_list &) {
1092
803
  throw std::runtime_error("download functionality is not enabled in this build");
1093
804
  }
1094
805
 
@@ -1096,7 +807,15 @@ std::string common_docker_resolve_model(const std::string &) {
1096
807
  throw std::runtime_error("download functionality is not enabled in this build");
1097
808
  }
1098
809
 
1099
- #endif // LLAMA_USE_CURL || LLAMA_USE_HTTPLIB
810
+ int common_download_file_single(const std::string &,
811
+ const std::string &,
812
+ const std::string &,
813
+ bool,
814
+ const common_header_list &) {
815
+ throw std::runtime_error("download functionality is not enabled in this build");
816
+ }
817
+
818
+ #endif // defined(LLAMA_USE_HTTPLIB)
1100
819
 
1101
820
  std::vector<common_cached_model_info> common_list_cached_models() {
1102
821
  std::vector<common_cached_model_info> models;