@fugood/llama.node 1.2.1 → 1.2.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (30) hide show
  1. package/package.json +14 -14
  2. package/src/llama.cpp/common/arg.cpp +359 -310
  3. package/src/llama.cpp/common/chat.cpp +27 -15
  4. package/src/llama.cpp/common/common.cpp +1 -0
  5. package/src/llama.cpp/common/sampling.cpp +1 -0
  6. package/src/llama.cpp/ggml/CMakeLists.txt +37 -21
  7. package/src/llama.cpp/ggml/include/ggml-backend.h +2 -1
  8. package/src/llama.cpp/ggml/include/ggml-zdnn.h +3 -0
  9. package/src/llama.cpp/ggml/src/CMakeLists.txt +3 -0
  10. package/src/llama.cpp/ggml/src/ggml-cpu/amx/amx.cpp +4 -2
  11. package/src/llama.cpp/ggml/src/ggml-cpu/arch/x86/repack.cpp +2 -2
  12. package/src/llama.cpp/ggml/src/ggml-cpu/common.h +14 -0
  13. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +17 -3
  14. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +1 -1
  15. package/src/llama.cpp/ggml/src/ggml-cpu/ops.cpp +93 -862
  16. package/src/llama.cpp/include/llama.h +15 -11
  17. package/src/llama.cpp/src/llama-context.cpp +151 -0
  18. package/src/llama.cpp/src/llama-context.h +10 -0
  19. package/src/llama.cpp/src/llama-cparams.h +1 -1
  20. package/src/llama.cpp/src/llama-kv-cache-iswa.cpp +8 -0
  21. package/src/llama.cpp/src/llama-kv-cache-iswa.h +2 -0
  22. package/src/llama.cpp/src/llama-kv-cache.cpp +8 -0
  23. package/src/llama.cpp/src/llama-kv-cache.h +2 -0
  24. package/src/llama.cpp/src/llama-memory-hybrid.cpp +8 -0
  25. package/src/llama.cpp/src/llama-memory-hybrid.h +2 -0
  26. package/src/llama.cpp/src/llama-memory-recurrent.cpp +8 -0
  27. package/src/llama.cpp/src/llama-memory-recurrent.h +3 -0
  28. package/src/llama.cpp/src/llama-memory.h +3 -0
  29. package/src/llama.cpp/src/llama-model.cpp +14 -4
  30. package/src/llama.cpp/src/llama-model.h +5 -1
@@ -24,6 +24,7 @@
24
24
  #include <cstdarg>
25
25
  #include <filesystem>
26
26
  #include <fstream>
27
+ #include <future>
27
28
  #include <list>
28
29
  #include <regex>
29
30
  #include <set>
@@ -36,9 +37,21 @@
36
37
  #if defined(LLAMA_USE_CURL)
37
38
  #include <curl/curl.h>
38
39
  #include <curl/easy.h>
39
- #include <future>
40
40
  #endif
41
41
 
42
+ #ifdef __linux__
43
+ #include <linux/limits.h>
44
+ #elif defined(_WIN32)
45
+ # if !defined(PATH_MAX)
46
+ # define PATH_MAX MAX_PATH
47
+ # endif
48
+ #elif defined(_AIX)
49
+ #include <sys/limits.h>
50
+ #else
51
+ #include <sys/syslimits.h>
52
+ #endif
53
+ #define LLAMA_MAX_URL_LENGTH 2084 // Maximum URL Length in Chrome: 2083
54
+
42
55
  using json = nlohmann::ordered_json;
43
56
 
44
57
  std::initializer_list<enum llama_example> mmproj_examples = {
@@ -57,12 +70,32 @@ static std::string read_file(const std::string & fname) {
57
70
  }
58
71
 
59
72
  static void write_file(const std::string & fname, const std::string & content) {
60
- std::ofstream file(fname);
73
+ const std::string fname_tmp = fname + ".tmp";
74
+ std::ofstream file(fname_tmp);
61
75
  if (!file) {
62
76
  throw std::runtime_error(string_format("error: failed to open file '%s'\n", fname.c_str()));
63
77
  }
64
- file << content;
65
- file.close();
78
+
79
+ try {
80
+ file << content;
81
+ file.close();
82
+
83
+ // Makes write atomic
84
+ if (rename(fname_tmp.c_str(), fname.c_str()) != 0) {
85
+ LOG_ERR("%s: unable to rename file: %s to %s\n", __func__, fname_tmp.c_str(), fname.c_str());
86
+ // If rename fails, try to delete the temporary file
87
+ if (remove(fname_tmp.c_str()) != 0) {
88
+ LOG_ERR("%s: unable to delete temporary file: %s\n", __func__, fname_tmp.c_str());
89
+ }
90
+ }
91
+ } catch (...) {
92
+ // If anything fails, try to delete the temporary file
93
+ if (remove(fname_tmp.c_str()) != 0) {
94
+ LOG_ERR("%s: unable to delete temporary file: %s\n", __func__, fname_tmp.c_str());
95
+ }
96
+
97
+ throw std::runtime_error(string_format("error: failed to write file '%s'\n", fname.c_str()));
98
+ }
66
99
  }
67
100
 
68
101
  common_arg & common_arg::set_examples(std::initializer_list<enum llama_example> examples) {
@@ -188,19 +221,6 @@ bool common_has_curl() {
188
221
  return true;
189
222
  }
190
223
 
191
- #ifdef __linux__
192
- #include <linux/limits.h>
193
- #elif defined(_WIN32)
194
- # if !defined(PATH_MAX)
195
- # define PATH_MAX MAX_PATH
196
- # endif
197
- #elif defined(_AIX)
198
- #include <sys/limits.h>
199
- #else
200
- #include <sys/syslimits.h>
201
- #endif
202
- #define LLAMA_CURL_MAX_URL_LENGTH 2084 // Maximum URL Length in Chrome: 2083
203
-
204
224
  //
205
225
  // CURL utils
206
226
  //
@@ -217,252 +237,370 @@ struct curl_slist_ptr {
217
237
  }
218
238
  };
219
239
 
220
- #define CURL_MAX_RETRY 3
221
- #define CURL_RETRY_DELAY_SECONDS 2
240
+ static CURLcode common_curl_perf(CURL * curl) {
241
+ CURLcode res = curl_easy_perform(curl);
242
+ if (res != CURLE_OK) {
243
+ LOG_ERR("%s: curl_easy_perform() failed\n", __func__);
244
+ }
222
245
 
223
- static bool curl_perform_with_retry(const std::string & url, CURL * curl, int max_attempts, int retry_delay_seconds, const char * method_name) {
224
- int remaining_attempts = max_attempts;
246
+ return res;
247
+ }
225
248
 
226
- while (remaining_attempts > 0) {
227
- LOG_INF("%s: %s %s (attempt %d of %d)...\n", __func__ , method_name, url.c_str(), max_attempts - remaining_attempts + 1, max_attempts);
249
+ // Send a HEAD request to retrieve the etag and last-modified headers
250
+ struct common_load_model_from_url_headers {
251
+ std::string etag;
252
+ std::string last_modified;
253
+ std::string accept_ranges;
254
+ };
228
255
 
229
- CURLcode res = curl_easy_perform(curl);
230
- if (res == CURLE_OK) {
231
- return true;
232
- }
256
+ struct FILE_deleter {
257
+ void operator()(FILE * f) const { fclose(f); }
258
+ };
259
+
260
+ static size_t common_header_callback(char * buffer, size_t, size_t n_items, void * userdata) {
261
+ common_load_model_from_url_headers * headers = (common_load_model_from_url_headers *) userdata;
262
+ static std::regex header_regex("([^:]+): (.*)\r\n");
263
+ static std::regex etag_regex("ETag", std::regex_constants::icase);
264
+ static std::regex last_modified_regex("Last-Modified", std::regex_constants::icase);
265
+ static std::regex accept_ranges_regex("Accept-Ranges", std::regex_constants::icase);
266
+ std::string header(buffer, n_items);
267
+ std::smatch match;
268
+ if (std::regex_match(header, match, header_regex)) {
269
+ const std::string & key = match[1];
270
+ const std::string & value = match[2];
271
+ if (std::regex_match(key, match, etag_regex)) {
272
+ headers->etag = value;
273
+ } else if (std::regex_match(key, match, last_modified_regex)) {
274
+ headers->last_modified = value;
275
+ } else if (std::regex_match(key, match, accept_ranges_regex)) {
276
+ headers->accept_ranges = value;
277
+ }
278
+ }
279
+
280
+ return n_items;
281
+ }
233
282
 
234
- int exponential_backoff_delay = std::pow(retry_delay_seconds, max_attempts - remaining_attempts) * 1000;
235
- LOG_WRN("%s: curl_easy_perform() failed: %s, retrying after %d milliseconds...\n", __func__, curl_easy_strerror(res), exponential_backoff_delay);
283
+ static size_t common_write_callback(void * data, size_t size, size_t nmemb, void * fd) {
284
+ return std::fwrite(data, size, nmemb, static_cast<FILE *>(fd));
285
+ }
286
+
287
+ // helper function to hide password in URL
288
+ static std::string llama_download_hide_password_in_url(const std::string & url) {
289
+ // Use regex to match and replace the user[:password]@ pattern in URLs
290
+ // Pattern: scheme://[user[:password]@]host[...]
291
+ static const std::regex url_regex(R"(^(?:[A-Za-z][A-Za-z0-9+.-]://)(?:[^/@]+@)?.$)");
292
+ std::smatch match;
236
293
 
237
- remaining_attempts--;
238
- if (remaining_attempts == 0) break;
239
- std::this_thread::sleep_for(std::chrono::milliseconds(exponential_backoff_delay));
294
+ if (std::regex_match(url, match, url_regex)) {
295
+ // match[1] = scheme (e.g., "https://")
296
+ // match[2] = user[:password]@ part
297
+ // match[3] = rest of URL (host and path)
298
+ return match[1].str() + "********@" + match[3].str();
240
299
  }
241
300
 
242
- LOG_ERR("%s: curl_easy_perform() failed after %d attempts\n", __func__, max_attempts);
301
+ return url; // No credentials found or malformed URL
302
+ }
243
303
 
244
- return false;
304
+ static void common_curl_easy_setopt_head(CURL * curl, const std::string & url) {
305
+ // Set the URL, allow to follow http redirection
306
+ curl_easy_setopt(curl, CURLOPT_URL, url.c_str());
307
+ curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1L);
308
+
309
+ # if defined(_WIN32)
310
+ // CURLSSLOPT_NATIVE_CA tells libcurl to use standard certificate store of
311
+ // operating system. Currently implemented under MS-Windows.
312
+ curl_easy_setopt(curl, CURLOPT_SSL_OPTIONS, CURLSSLOPT_NATIVE_CA);
313
+ # endif
314
+
315
+ curl_easy_setopt(curl, CURLOPT_NOBODY, 1L); // will trigger the HEAD verb
316
+ curl_easy_setopt(curl, CURLOPT_NOPROGRESS, 1L); // hide head request progress
317
+ curl_easy_setopt(curl, CURLOPT_HEADERFUNCTION, common_header_callback);
245
318
  }
246
319
 
247
- // download one single file from remote URL to local path
248
- static bool common_download_file_single(const std::string & url, const std::string & path, const std::string & bearer_token, bool offline) {
249
- // Check if the file already exists locally
250
- auto file_exists = std::filesystem::exists(path);
320
+ static void common_curl_easy_setopt_get(CURL * curl) {
321
+ curl_easy_setopt(curl, CURLOPT_NOBODY, 0L);
322
+ curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, common_write_callback);
251
323
 
252
- // If the file exists, check its JSON metadata companion file.
253
- std::string metadata_path = path + ".json";
254
- nlohmann::json metadata; // TODO @ngxson : get rid of this json, use regex instead
255
- std::string etag;
256
- std::string last_modified;
324
+ // display download progress
325
+ curl_easy_setopt(curl, CURLOPT_NOPROGRESS, 0L);
326
+ }
257
327
 
258
- if (file_exists) {
259
- if (offline) {
260
- LOG_INF("%s: using cached file (offline mode): %s\n", __func__, path.c_str());
261
- return true; // skip verification/downloading
262
- }
263
- // Try and read the JSON metadata file (note: stream autoclosed upon exiting this block).
264
- std::ifstream metadata_in(metadata_path);
265
- if (metadata_in.good()) {
266
- try {
267
- metadata_in >> metadata;
268
- LOG_DBG("%s: previous metadata file found %s: %s\n", __func__, metadata_path.c_str(), metadata.dump().c_str());
269
- if (metadata.contains("etag") && metadata.at("etag").is_string()) {
270
- etag = metadata.at("etag");
271
- }
272
- if (metadata.contains("lastModified") && metadata.at("lastModified").is_string()) {
273
- last_modified = metadata.at("lastModified");
274
- }
275
- } catch (const nlohmann::json::exception & e) {
276
- LOG_ERR("%s: error reading metadata file %s: %s\n", __func__, metadata_path.c_str(), e.what());
277
- }
278
- }
279
- // if we cannot open the metadata file, we assume that the downloaded file is not valid (etag and last-modified are left empty, so we will download it again)
280
- } else {
281
- if (offline) {
282
- LOG_ERR("%s: required file is not available in cache (offline mode): %s\n", __func__, path.c_str());
283
- return false;
284
- }
285
- LOG_INF("%s: no previous model file found %s\n", __func__, path.c_str());
328
+ static bool common_pull_file(CURL * curl, const std::string & path_temporary) {
329
+ if (std::filesystem::exists(path_temporary)) {
330
+ const std::string partial_size = std::to_string(std::filesystem::file_size(path_temporary));
331
+ LOG_INF("%s: server supports range requests, resuming download from byte %s\n", __func__, partial_size.c_str());
332
+ const std::string range_str = partial_size + "-";
333
+ curl_easy_setopt(curl, CURLOPT_RANGE, range_str.c_str());
286
334
  }
287
335
 
288
- // Send a HEAD request to retrieve the etag and last-modified headers
289
- struct common_load_model_from_url_headers {
290
- std::string etag;
291
- std::string last_modified;
292
- };
336
+ // Always open file in append mode could be resuming
337
+ std::unique_ptr<FILE, FILE_deleter> outfile(fopen(path_temporary.c_str(), "ab"));
338
+ if (!outfile) {
339
+ LOG_ERR("%s: error opening local file for writing: %s\n", __func__, path_temporary.c_str());
340
+ return false;
341
+ }
293
342
 
294
- common_load_model_from_url_headers headers;
295
- bool head_request_ok = false;
296
- bool should_download = !file_exists; // by default, we should download if the file does not exist
343
+ common_curl_easy_setopt_get(curl);
344
+ curl_easy_setopt(curl, CURLOPT_WRITEDATA, outfile.get());
297
345
 
298
- // Initialize libcurl
299
- curl_ptr curl(curl_easy_init(), &curl_easy_cleanup);
300
- curl_slist_ptr http_headers;
346
+ return common_curl_perf(curl) == CURLE_OK;
347
+ }
348
+
349
+ static bool common_download_head(CURL * curl,
350
+ curl_slist_ptr & http_headers,
351
+ const std::string & url,
352
+ const std::string & bearer_token) {
301
353
  if (!curl) {
302
354
  LOG_ERR("%s: error initializing libcurl\n", __func__);
303
355
  return false;
304
356
  }
305
357
 
306
- // Set the URL, allow to follow http redirection
307
- curl_easy_setopt(curl.get(), CURLOPT_URL, url.c_str());
308
- curl_easy_setopt(curl.get(), CURLOPT_FOLLOWLOCATION, 1L);
309
-
310
358
  http_headers.ptr = curl_slist_append(http_headers.ptr, "User-Agent: llama-cpp");
311
359
  // Check if hf-token or bearer-token was specified
312
360
  if (!bearer_token.empty()) {
313
361
  std::string auth_header = "Authorization: Bearer " + bearer_token;
314
- http_headers.ptr = curl_slist_append(http_headers.ptr, auth_header.c_str());
362
+ http_headers.ptr = curl_slist_append(http_headers.ptr, auth_header.c_str());
315
363
  }
316
- curl_easy_setopt(curl.get(), CURLOPT_HTTPHEADER, http_headers.ptr);
317
364
 
318
- #if defined(_WIN32)
319
- // CURLSSLOPT_NATIVE_CA tells libcurl to use standard certificate store of
320
- // operating system. Currently implemented under MS-Windows.
321
- curl_easy_setopt(curl.get(), CURLOPT_SSL_OPTIONS, CURLSSLOPT_NATIVE_CA);
322
- #endif
365
+ curl_easy_setopt(curl, CURLOPT_HTTPHEADER, http_headers.ptr);
366
+ common_curl_easy_setopt_head(curl, url);
367
+ return common_curl_perf(curl) == CURLE_OK;
368
+ }
323
369
 
324
- typedef size_t(*CURLOPT_HEADERFUNCTION_PTR)(char *, size_t, size_t, void *);
325
- auto header_callback = [](char * buffer, size_t /*size*/, size_t n_items, void * userdata) -> size_t {
326
- common_load_model_from_url_headers * headers = (common_load_model_from_url_headers *) userdata;
370
+ // download one single file from remote URL to local path
371
+ static bool common_download_file_single_online(const std::string & url,
372
+ const std::string & path,
373
+ const std::string & bearer_token) {
374
+ // If the file exists, check its JSON metadata companion file.
375
+ std::string metadata_path = path + ".json";
376
+ static const int max_attempts = 3;
377
+ static const int retry_delay_seconds = 2;
378
+ for (int i = 0; i < max_attempts; ++i) {
379
+ nlohmann::json metadata; // TODO @ngxson : get rid of this json, use regex instead
380
+ std::string etag;
381
+ std::string last_modified;
382
+
383
+ // Check if the file already exists locally
384
+ const auto file_exists = std::filesystem::exists(path);
385
+ if (file_exists) {
386
+ // Try and read the JSON metadata file (note: stream autoclosed upon exiting this block).
387
+ std::ifstream metadata_in(metadata_path);
388
+ if (metadata_in.good()) {
389
+ try {
390
+ metadata_in >> metadata;
391
+ LOG_DBG("%s: previous metadata file found %s: %s\n", __func__, metadata_path.c_str(),
392
+ metadata.dump().c_str());
393
+ if (metadata.contains("etag") && metadata.at("etag").is_string()) {
394
+ etag = metadata.at("etag");
395
+ }
396
+ if (metadata.contains("lastModified") && metadata.at("lastModified").is_string()) {
397
+ last_modified = metadata.at("lastModified");
398
+ }
399
+ } catch (const nlohmann::json::exception & e) {
400
+ LOG_ERR("%s: error reading metadata file %s: %s\n", __func__, metadata_path.c_str(), e.what());
401
+ }
402
+ }
403
+ // if we cannot open the metadata file, we assume that the downloaded file is not valid (etag and last-modified are left empty, so we will download it again)
404
+ } else {
405
+ LOG_INF("%s: no previous model file found %s\n", __func__, path.c_str());
406
+ }
327
407
 
328
- static std::regex header_regex("([^:]+): (.*)\r\n");
329
- static std::regex etag_regex("ETag", std::regex_constants::icase);
330
- static std::regex last_modified_regex("Last-Modified", std::regex_constants::icase);
408
+ bool head_request_ok = false;
409
+ bool should_download = !file_exists; // by default, we should download if the file does not exist
331
410
 
332
- std::string header(buffer, n_items);
333
- std::smatch match;
334
- if (std::regex_match(header, match, header_regex)) {
335
- const std::string & key = match[1];
336
- const std::string & value = match[2];
337
- if (std::regex_match(key, match, etag_regex)) {
338
- headers->etag = value;
339
- } else if (std::regex_match(key, match, last_modified_regex)) {
340
- headers->last_modified = value;
341
- }
411
+ // Initialize libcurl
412
+ curl_ptr curl(curl_easy_init(), &curl_easy_cleanup);
413
+ common_load_model_from_url_headers headers;
414
+ curl_easy_setopt(curl.get(), CURLOPT_HEADERDATA, &headers);
415
+ curl_slist_ptr http_headers;
416
+ const bool was_perform_successful = common_download_head(curl.get(), http_headers, url, bearer_token);
417
+ if (!was_perform_successful) {
418
+ head_request_ok = false;
342
419
  }
343
- return n_items;
344
- };
345
420
 
346
- curl_easy_setopt(curl.get(), CURLOPT_NOBODY, 1L); // will trigger the HEAD verb
347
- curl_easy_setopt(curl.get(), CURLOPT_NOPROGRESS, 1L); // hide head request progress
348
- curl_easy_setopt(curl.get(), CURLOPT_HEADERFUNCTION, static_cast<CURLOPT_HEADERFUNCTION_PTR>(header_callback));
349
- curl_easy_setopt(curl.get(), CURLOPT_HEADERDATA, &headers);
421
+ long http_code = 0;
422
+ curl_easy_getinfo(curl.get(), CURLINFO_RESPONSE_CODE, &http_code);
423
+ if (http_code == 200) {
424
+ head_request_ok = true;
425
+ } else {
426
+ LOG_WRN("%s: HEAD invalid http status code received: %ld\n", __func__, http_code);
427
+ head_request_ok = false;
428
+ }
429
+
430
+ // if head_request_ok is false, we don't have the etag or last-modified headers
431
+ // we leave should_download as-is, which is true if the file does not exist
432
+ bool should_download_from_scratch = false;
433
+ if (head_request_ok) {
434
+ // check if ETag or Last-Modified headers are different
435
+ // if it is, we need to download the file again
436
+ if (!etag.empty() && etag != headers.etag) {
437
+ LOG_WRN("%s: ETag header is different (%s != %s): triggering a new download\n", __func__, etag.c_str(),
438
+ headers.etag.c_str());
439
+ should_download = true;
440
+ should_download_from_scratch = true;
441
+ } else if (!last_modified.empty() && last_modified != headers.last_modified) {
442
+ LOG_WRN("%s: Last-Modified header is different (%s != %s): triggering a new download\n", __func__,
443
+ last_modified.c_str(), headers.last_modified.c_str());
444
+ should_download = true;
445
+ should_download_from_scratch = true;
446
+ }
447
+ }
448
+
449
+ const bool accept_ranges_supported = !headers.accept_ranges.empty() && headers.accept_ranges != "none";
450
+ if (should_download) {
451
+ if (file_exists &&
452
+ !accept_ranges_supported) { // Resumable downloads not supported, delete and start again.
453
+ LOG_WRN("%s: deleting previous downloaded file: %s\n", __func__, path.c_str());
454
+ if (remove(path.c_str()) != 0) {
455
+ LOG_ERR("%s: unable to delete file: %s\n", __func__, path.c_str());
456
+ return false;
457
+ }
458
+ }
350
459
 
351
- // we only allow retrying once for HEAD requests
352
- // this is for the use case of using running offline (no internet), retrying can be annoying
353
- bool was_perform_successful = curl_perform_with_retry(url, curl.get(), 1, 0, "HEAD");
354
- if (!was_perform_successful) {
355
- head_request_ok = false;
356
- }
460
+ const std::string path_temporary = path + ".downloadInProgress";
461
+ if (should_download_from_scratch) {
462
+ if (std::filesystem::exists(path_temporary)) {
463
+ if (remove(path_temporary.c_str()) != 0) {
464
+ LOG_ERR("%s: unable to delete file: %s\n", __func__, path_temporary.c_str());
465
+ return false;
466
+ }
467
+ }
357
468
 
358
- long http_code = 0;
359
- curl_easy_getinfo(curl.get(), CURLINFO_RESPONSE_CODE, &http_code);
360
- if (http_code == 200) {
361
- head_request_ok = true;
362
- } else {
363
- LOG_WRN("%s: HEAD invalid http status code received: %ld\n", __func__, http_code);
364
- head_request_ok = false;
365
- }
469
+ if (std::filesystem::exists(path)) {
470
+ if (remove(path.c_str()) != 0) {
471
+ LOG_ERR("%s: unable to delete file: %s\n", __func__, path.c_str());
472
+ return false;
473
+ }
474
+ }
475
+ }
366
476
 
367
- // if head_request_ok is false, we don't have the etag or last-modified headers
368
- // we leave should_download as-is, which is true if the file does not exist
369
- if (head_request_ok) {
370
- // check if ETag or Last-Modified headers are different
371
- // if it is, we need to download the file again
372
- if (!etag.empty() && etag != headers.etag) {
373
- LOG_WRN("%s: ETag header is different (%s != %s): triggering a new download\n", __func__, etag.c_str(), headers.etag.c_str());
374
- should_download = true;
375
- } else if (!last_modified.empty() && last_modified != headers.last_modified) {
376
- LOG_WRN("%s: Last-Modified header is different (%s != %s): triggering a new download\n", __func__, last_modified.c_str(), headers.last_modified.c_str());
377
- should_download = true;
378
- }
379
- }
477
+ // Write the updated JSON metadata file.
478
+ metadata.update({
479
+ { "url", url },
480
+ { "etag", headers.etag },
481
+ { "lastModified", headers.last_modified }
482
+ });
483
+ write_file(metadata_path, metadata.dump(4));
484
+ LOG_DBG("%s: file metadata saved: %s\n", __func__, metadata_path.c_str());
485
+
486
+ // start the download
487
+ LOG_INF("%s: trying to download model from %s to %s (server_etag:%s, server_last_modified:%s)...\n",
488
+ __func__, llama_download_hide_password_in_url(url).c_str(), path_temporary.c_str(),
489
+ headers.etag.c_str(), headers.last_modified.c_str());
490
+ const bool was_pull_successful = common_pull_file(curl.get(), path_temporary);
491
+ if (!was_pull_successful) {
492
+ if (i + 1 < max_attempts) {
493
+ const int exponential_backoff_delay = std::pow(retry_delay_seconds, i) * 1000;
494
+ LOG_WRN("%s: retrying after %d milliseconds...\n", __func__, exponential_backoff_delay);
495
+ std::this_thread::sleep_for(std::chrono::milliseconds(exponential_backoff_delay));
496
+ } else {
497
+ LOG_ERR("%s: curl_easy_perform() failed after %d attempts\n", __func__, max_attempts);
498
+ }
380
499
 
381
- if (should_download) {
382
- std::string path_temporary = path + ".downloadInProgress";
383
- if (file_exists) {
384
- LOG_WRN("%s: deleting previous downloaded file: %s\n", __func__, path.c_str());
385
- if (remove(path.c_str()) != 0) {
386
- LOG_ERR("%s: unable to delete file: %s\n", __func__, path.c_str());
500
+ continue;
501
+ }
502
+
503
+ long http_code = 0;
504
+ curl_easy_getinfo(curl.get(), CURLINFO_RESPONSE_CODE, &http_code);
505
+ if (http_code < 200 || http_code >= 400) {
506
+ LOG_ERR("%s: invalid http status code received: %ld\n", __func__, http_code);
387
507
  return false;
388
508
  }
509
+
510
+ if (rename(path_temporary.c_str(), path.c_str()) != 0) {
511
+ LOG_ERR("%s: unable to rename file: %s to %s\n", __func__, path_temporary.c_str(), path.c_str());
512
+ return false;
513
+ }
514
+ } else {
515
+ LOG_INF("%s: using cached file: %s\n", __func__, path.c_str());
389
516
  }
390
517
 
391
- // Set the output file
518
+ break;
519
+ }
392
520
 
393
- struct FILE_deleter {
394
- void operator()(FILE * f) const {
395
- fclose(f);
396
- }
397
- };
521
+ return true;
522
+ }
398
523
 
399
- std::unique_ptr<FILE, FILE_deleter> outfile(fopen(path_temporary.c_str(), "wb"));
400
- if (!outfile) {
401
- LOG_ERR("%s: error opening local file for writing: %s\n", __func__, path.c_str());
402
- return false;
403
- }
524
+ std::pair<long, std::vector<char>> common_remote_get_content(const std::string & url, const common_remote_params & params) {
525
+ curl_ptr curl(curl_easy_init(), &curl_easy_cleanup);
526
+ curl_slist_ptr http_headers;
527
+ std::vector<char> res_buffer;
404
528
 
405
- typedef size_t(*CURLOPT_WRITEFUNCTION_PTR)(void * data, size_t size, size_t nmemb, void * fd);
406
- auto write_callback = [](void * data, size_t size, size_t nmemb, void * fd) -> size_t {
407
- return fwrite(data, size, nmemb, (FILE *)fd);
408
- };
409
- curl_easy_setopt(curl.get(), CURLOPT_NOBODY, 0L);
410
- curl_easy_setopt(curl.get(), CURLOPT_WRITEFUNCTION, static_cast<CURLOPT_WRITEFUNCTION_PTR>(write_callback));
411
- curl_easy_setopt(curl.get(), CURLOPT_WRITEDATA, outfile.get());
529
+ curl_easy_setopt(curl.get(), CURLOPT_URL, url.c_str());
530
+ curl_easy_setopt(curl.get(), CURLOPT_NOPROGRESS, 1L);
531
+ curl_easy_setopt(curl.get(), CURLOPT_FOLLOWLOCATION, 1L);
532
+ curl_easy_setopt(curl.get(), CURLOPT_VERBOSE, 1L);
533
+ typedef size_t(*CURLOPT_WRITEFUNCTION_PTR)(void * ptr, size_t size, size_t nmemb, void * data);
534
+ auto write_callback = [](void * ptr, size_t size, size_t nmemb, void * data) -> size_t {
535
+ auto data_vec = static_cast<std::vector<char> *>(data);
536
+ data_vec->insert(data_vec->end(), (char *)ptr, (char *)ptr + size * nmemb);
537
+ return size * nmemb;
538
+ };
539
+ curl_easy_setopt(curl.get(), CURLOPT_WRITEFUNCTION, static_cast<CURLOPT_WRITEFUNCTION_PTR>(write_callback));
540
+ curl_easy_setopt(curl.get(), CURLOPT_WRITEDATA, &res_buffer);
541
+ #if defined(_WIN32)
542
+ curl_easy_setopt(curl.get(), CURLOPT_SSL_OPTIONS, CURLSSLOPT_NATIVE_CA);
543
+ #endif
544
+ if (params.timeout > 0) {
545
+ curl_easy_setopt(curl.get(), CURLOPT_TIMEOUT, params.timeout);
546
+ }
547
+ if (params.max_size > 0) {
548
+ curl_easy_setopt(curl.get(), CURLOPT_MAXFILESIZE, params.max_size);
549
+ }
550
+ http_headers.ptr = curl_slist_append(http_headers.ptr, "User-Agent: llama-cpp");
551
+ for (const auto & header : params.headers) {
552
+ http_headers.ptr = curl_slist_append(http_headers.ptr, header.c_str());
553
+ }
554
+ curl_easy_setopt(curl.get(), CURLOPT_HTTPHEADER, http_headers.ptr);
412
555
 
413
- // display download progress
414
- curl_easy_setopt(curl.get(), CURLOPT_NOPROGRESS, 0L);
556
+ CURLcode res = curl_easy_perform(curl.get());
415
557
 
416
- // helper function to hide password in URL
417
- auto llama_download_hide_password_in_url = [](const std::string & url) -> std::string {
418
- std::size_t protocol_pos = url.find("://");
419
- if (protocol_pos == std::string::npos) {
420
- return url; // Malformed URL
421
- }
558
+ if (res != CURLE_OK) {
559
+ std::string error_msg = curl_easy_strerror(res);
560
+ throw std::runtime_error("error: cannot make GET request: " + error_msg);
561
+ }
422
562
 
423
- std::size_t at_pos = url.find('@', protocol_pos + 3);
424
- if (at_pos == std::string::npos) {
425
- return url; // No password in URL
426
- }
563
+ long res_code;
564
+ curl_easy_getinfo(curl.get(), CURLINFO_RESPONSE_CODE, &res_code);
427
565
 
428
- return url.substr(0, protocol_pos + 3) + "********" + url.substr(at_pos);
429
- };
566
+ return { res_code, std::move(res_buffer) };
567
+ }
430
568
 
431
- // start the download
432
- LOG_INF("%s: trying to download model from %s to %s (server_etag:%s, server_last_modified:%s)...\n", __func__,
433
- llama_download_hide_password_in_url(url).c_str(), path.c_str(), headers.etag.c_str(), headers.last_modified.c_str());
434
- bool was_perform_successful = curl_perform_with_retry(url, curl.get(), CURL_MAX_RETRY, CURL_RETRY_DELAY_SECONDS, "GET");
435
- if (!was_perform_successful) {
436
- return false;
437
- }
569
+ #else
438
570
 
439
- long http_code = 0;
440
- curl_easy_getinfo (curl.get(), CURLINFO_RESPONSE_CODE, &http_code);
441
- if (http_code < 200 || http_code >= 400) {
442
- LOG_ERR("%s: invalid http status code received: %ld\n", __func__, http_code);
443
- return false;
444
- }
571
+ bool common_has_curl() {
572
+ return false;
573
+ }
445
574
 
446
- // Causes file to be closed explicitly here before we rename it.
447
- outfile.reset();
575
+ static bool common_download_file_single_online(const std::string &, const std::string &, const std::string &) {
576
+ LOG_ERR("error: built without CURL, cannot download model from internet\n");
577
+ return false;
578
+ }
448
579
 
449
- // Write the updated JSON metadata file.
450
- metadata.update({
451
- {"url", url},
452
- {"etag", headers.etag},
453
- {"lastModified", headers.last_modified}
454
- });
455
- write_file(metadata_path, metadata.dump(4));
456
- LOG_DBG("%s: file metadata saved: %s\n", __func__, metadata_path.c_str());
580
+ std::pair<long, std::vector<char>> common_remote_get_content(const std::string & url, const common_remote_params &) {
581
+ if (!url.empty()) {
582
+ throw std::runtime_error("error: built without CURL, cannot download model from the internet");
583
+ }
457
584
 
458
- if (rename(path_temporary.c_str(), path.c_str()) != 0) {
459
- LOG_ERR("%s: unable to rename file: %s to %s\n", __func__, path_temporary.c_str(), path.c_str());
460
- return false;
461
- }
462
- } else {
463
- LOG_INF("%s: using cached file: %s\n", __func__, path.c_str());
585
+ return {};
586
+ }
587
+
588
+ #endif // LLAMA_USE_CURL
589
+
590
+ static bool common_download_file_single(const std::string & url,
591
+ const std::string & path,
592
+ const std::string & bearer_token,
593
+ bool offline) {
594
+ if (!offline) {
595
+ return common_download_file_single_online(url, path, bearer_token);
464
596
  }
465
597
 
598
+ if (!std::filesystem::exists(path)) {
599
+ LOG_ERR("%s: required file is not available in cache (offline mode): %s\n", __func__, path.c_str());
600
+ return false;
601
+ }
602
+
603
+ LOG_INF("%s: using cached file (offline mode): %s\n", __func__, path.c_str());
466
604
  return true;
467
605
  }
468
606
 
@@ -524,7 +662,7 @@ static bool common_download_model(
524
662
 
525
663
  if (n_split > 1) {
526
664
  char split_prefix[PATH_MAX] = {0};
527
- char split_url_prefix[LLAMA_CURL_MAX_URL_LENGTH] = {0};
665
+ char split_url_prefix[LLAMA_MAX_URL_LENGTH] = {0};
528
666
 
529
667
  // Verify the first split file format
530
668
  // and extract split URL and PATH prefixes
@@ -545,7 +683,7 @@ static bool common_download_model(
545
683
  char split_path[PATH_MAX] = {0};
546
684
  llama_split_path(split_path, sizeof(split_path), split_prefix, idx, n_split);
547
685
 
548
- char split_url[LLAMA_CURL_MAX_URL_LENGTH] = {0};
686
+ char split_url[LLAMA_MAX_URL_LENGTH] = {0};
549
687
  llama_split_path(split_url, sizeof(split_url), split_url_prefix, idx, n_split);
550
688
 
551
689
  if (std::string(split_path) == model.path) {
@@ -562,50 +700,6 @@ static bool common_download_model(
562
700
  return true;
563
701
  }
564
702
 
565
- std::pair<long, std::vector<char>> common_remote_get_content(const std::string & url, const common_remote_params & params) {
566
- curl_ptr curl(curl_easy_init(), &curl_easy_cleanup);
567
- curl_slist_ptr http_headers;
568
- std::vector<char> res_buffer;
569
-
570
- curl_easy_setopt(curl.get(), CURLOPT_URL, url.c_str());
571
- curl_easy_setopt(curl.get(), CURLOPT_NOPROGRESS, 1L);
572
- curl_easy_setopt(curl.get(), CURLOPT_FOLLOWLOCATION, 1L);
573
- typedef size_t(*CURLOPT_WRITEFUNCTION_PTR)(void * ptr, size_t size, size_t nmemb, void * data);
574
- auto write_callback = [](void * ptr, size_t size, size_t nmemb, void * data) -> size_t {
575
- auto data_vec = static_cast<std::vector<char> *>(data);
576
- data_vec->insert(data_vec->end(), (char *)ptr, (char *)ptr + size * nmemb);
577
- return size * nmemb;
578
- };
579
- curl_easy_setopt(curl.get(), CURLOPT_WRITEFUNCTION, static_cast<CURLOPT_WRITEFUNCTION_PTR>(write_callback));
580
- curl_easy_setopt(curl.get(), CURLOPT_WRITEDATA, &res_buffer);
581
- #if defined(_WIN32)
582
- curl_easy_setopt(curl.get(), CURLOPT_SSL_OPTIONS, CURLSSLOPT_NATIVE_CA);
583
- #endif
584
- if (params.timeout > 0) {
585
- curl_easy_setopt(curl.get(), CURLOPT_TIMEOUT, params.timeout);
586
- }
587
- if (params.max_size > 0) {
588
- curl_easy_setopt(curl.get(), CURLOPT_MAXFILESIZE, params.max_size);
589
- }
590
- http_headers.ptr = curl_slist_append(http_headers.ptr, "User-Agent: llama-cpp");
591
- for (const auto & header : params.headers) {
592
- http_headers.ptr = curl_slist_append(http_headers.ptr, header.c_str());
593
- }
594
- curl_easy_setopt(curl.get(), CURLOPT_HTTPHEADER, http_headers.ptr);
595
-
596
- CURLcode res = curl_easy_perform(curl.get());
597
-
598
- if (res != CURLE_OK) {
599
- std::string error_msg = curl_easy_strerror(res);
600
- throw std::runtime_error("error: cannot make GET request: " + error_msg);
601
- }
602
-
603
- long res_code;
604
- curl_easy_getinfo(curl.get(), CURLINFO_RESPONSE_CODE, &res_code);
605
-
606
- return { res_code, std::move(res_buffer) };
607
- }
608
-
609
703
  /**
610
704
  * Allow getting the HF file from the HF repo with tag (like ollama), for example:
611
705
  * - bartowski/Llama-3.2-3B-Instruct-GGUF:q4
@@ -672,21 +766,17 @@ static struct common_hf_file_res common_get_hf_file(const std::string & hf_repo_
672
766
  std::string mmprojFile;
673
767
 
674
768
  if (res_code == 200 || res_code == 304) {
675
- // extract ggufFile.rfilename in json, using regex
676
- {
677
- std::regex pattern("\"ggufFile\"[\\s\\S]*?\"rfilename\"\\s*:\\s*\"([^\"]+)\"");
678
- std::smatch match;
679
- if (std::regex_search(res_str, match, pattern)) {
680
- ggufFile = match[1].str();
769
+ try {
770
+ auto j = json::parse(res_str);
771
+
772
+ if (j.contains("ggufFile") && j["ggufFile"].contains("rfilename")) {
773
+ ggufFile = j["ggufFile"]["rfilename"].get<std::string>();
681
774
  }
682
- }
683
- // extract mmprojFile.rfilename in json, using regex
684
- {
685
- std::regex pattern("\"mmprojFile\"[\\s\\S]*?\"rfilename\"\\s*:\\s*\"([^\"]+)\"");
686
- std::smatch match;
687
- if (std::regex_search(res_str, match, pattern)) {
688
- mmprojFile = match[1].str();
775
+ if (j.contains("mmprojFile") && j["mmprojFile"].contains("rfilename")) {
776
+ mmprojFile = j["mmprojFile"]["rfilename"].get<std::string>();
689
777
  }
778
+ } catch (const std::exception & e) {
779
+ throw std::runtime_error(std::string("error parsing manifest JSON: ") + e.what());
690
780
  }
691
781
  if (!use_cache) {
692
782
  // if not using cached response, update the cache file
@@ -706,45 +796,6 @@ static struct common_hf_file_res common_get_hf_file(const std::string & hf_repo_
706
796
  return { hf_repo, ggufFile, mmprojFile };
707
797
  }
708
798
 
709
- #else
710
-
711
- bool common_has_curl() {
712
- return false;
713
- }
714
-
715
- static bool common_download_file_single(const std::string &, const std::string &, const std::string &, bool) {
716
- LOG_ERR("error: built without CURL, cannot download model from internet\n");
717
- return false;
718
- }
719
-
720
- static bool common_download_file_multiple(const std::vector<std::pair<std::string, std::string>> &, const std::string &, bool) {
721
- LOG_ERR("error: built without CURL, cannot download model from the internet\n");
722
- return false;
723
- }
724
-
725
- static bool common_download_model(
726
- const common_params_model &,
727
- const std::string &,
728
- bool) {
729
- LOG_ERR("error: built without CURL, cannot download model from the internet\n");
730
- return false;
731
- }
732
-
733
- static struct common_hf_file_res common_get_hf_file(const std::string &, const std::string &, bool) {
734
- LOG_ERR("error: built without CURL, cannot download model from the internet\n");
735
- return {};
736
- }
737
-
738
- std::pair<long, std::vector<char>> common_remote_get_content(const std::string & url, const common_remote_params &) {
739
- if (!url.empty()) {
740
- throw std::runtime_error("error: built without CURL, cannot download model from the internet");
741
- }
742
-
743
- return {};
744
- }
745
-
746
- #endif // LLAMA_USE_CURL
747
-
748
799
  //
749
800
  // Docker registry functions
750
801
  //
@@ -770,7 +821,7 @@ static std::string common_docker_get_token(const std::string & repo) {
770
821
  }
771
822
 
772
823
  static std::string common_docker_resolve_model(const std::string & docker) {
773
- // Parse ai/smollm2:135M-Q4_K_M
824
+ // Parse ai/smollm2:135M-Q4_0
774
825
  size_t colon_pos = docker.find(':');
775
826
  std::string repo, tag;
776
827
  if (colon_pos != std::string::npos) {
@@ -1004,8 +1055,6 @@ static std::string get_all_kv_cache_types() {
1004
1055
  //
1005
1056
 
1006
1057
  static bool common_params_parse_ex(int argc, char ** argv, common_params_context & ctx_arg) {
1007
- std::string arg;
1008
- const std::string arg_prefix = "--";
1009
1058
  common_params & params = ctx_arg.params;
1010
1059
 
1011
1060
  std::unordered_map<std::string, common_arg *> arg_to_options;