@fugood/llama.node 1.2.1 → 1.2.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +14 -14
- package/src/llama.cpp/common/arg.cpp +359 -310
- package/src/llama.cpp/common/chat.cpp +27 -15
- package/src/llama.cpp/common/common.cpp +1 -0
- package/src/llama.cpp/common/sampling.cpp +1 -0
- package/src/llama.cpp/ggml/CMakeLists.txt +37 -21
- package/src/llama.cpp/ggml/include/ggml-backend.h +2 -1
- package/src/llama.cpp/ggml/include/ggml-zdnn.h +3 -0
- package/src/llama.cpp/ggml/src/CMakeLists.txt +3 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/amx/amx.cpp +4 -2
- package/src/llama.cpp/ggml/src/ggml-cpu/arch/x86/repack.cpp +2 -2
- package/src/llama.cpp/ggml/src/ggml-cpu/common.h +14 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +17 -3
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +1 -1
- package/src/llama.cpp/ggml/src/ggml-cpu/ops.cpp +93 -862
- package/src/llama.cpp/include/llama.h +15 -11
- package/src/llama.cpp/src/llama-context.cpp +151 -0
- package/src/llama.cpp/src/llama-context.h +10 -0
- package/src/llama.cpp/src/llama-cparams.h +1 -1
- package/src/llama.cpp/src/llama-kv-cache-iswa.cpp +8 -0
- package/src/llama.cpp/src/llama-kv-cache-iswa.h +2 -0
- package/src/llama.cpp/src/llama-kv-cache.cpp +8 -0
- package/src/llama.cpp/src/llama-kv-cache.h +2 -0
- package/src/llama.cpp/src/llama-memory-hybrid.cpp +8 -0
- package/src/llama.cpp/src/llama-memory-hybrid.h +2 -0
- package/src/llama.cpp/src/llama-memory-recurrent.cpp +8 -0
- package/src/llama.cpp/src/llama-memory-recurrent.h +3 -0
- package/src/llama.cpp/src/llama-memory.h +3 -0
- package/src/llama.cpp/src/llama-model.cpp +14 -4
- package/src/llama.cpp/src/llama-model.h +5 -1
|
@@ -24,6 +24,7 @@
|
|
|
24
24
|
#include <cstdarg>
|
|
25
25
|
#include <filesystem>
|
|
26
26
|
#include <fstream>
|
|
27
|
+
#include <future>
|
|
27
28
|
#include <list>
|
|
28
29
|
#include <regex>
|
|
29
30
|
#include <set>
|
|
@@ -36,9 +37,21 @@
|
|
|
36
37
|
#if defined(LLAMA_USE_CURL)
|
|
37
38
|
#include <curl/curl.h>
|
|
38
39
|
#include <curl/easy.h>
|
|
39
|
-
#include <future>
|
|
40
40
|
#endif
|
|
41
41
|
|
|
42
|
+
#ifdef __linux__
|
|
43
|
+
#include <linux/limits.h>
|
|
44
|
+
#elif defined(_WIN32)
|
|
45
|
+
# if !defined(PATH_MAX)
|
|
46
|
+
# define PATH_MAX MAX_PATH
|
|
47
|
+
# endif
|
|
48
|
+
#elif defined(_AIX)
|
|
49
|
+
#include <sys/limits.h>
|
|
50
|
+
#else
|
|
51
|
+
#include <sys/syslimits.h>
|
|
52
|
+
#endif
|
|
53
|
+
#define LLAMA_MAX_URL_LENGTH 2084 // Maximum URL Length in Chrome: 2083
|
|
54
|
+
|
|
42
55
|
using json = nlohmann::ordered_json;
|
|
43
56
|
|
|
44
57
|
std::initializer_list<enum llama_example> mmproj_examples = {
|
|
@@ -57,12 +70,32 @@ static std::string read_file(const std::string & fname) {
|
|
|
57
70
|
}
|
|
58
71
|
|
|
59
72
|
static void write_file(const std::string & fname, const std::string & content) {
|
|
60
|
-
std::
|
|
73
|
+
const std::string fname_tmp = fname + ".tmp";
|
|
74
|
+
std::ofstream file(fname_tmp);
|
|
61
75
|
if (!file) {
|
|
62
76
|
throw std::runtime_error(string_format("error: failed to open file '%s'\n", fname.c_str()));
|
|
63
77
|
}
|
|
64
|
-
|
|
65
|
-
|
|
78
|
+
|
|
79
|
+
try {
|
|
80
|
+
file << content;
|
|
81
|
+
file.close();
|
|
82
|
+
|
|
83
|
+
// Makes write atomic
|
|
84
|
+
if (rename(fname_tmp.c_str(), fname.c_str()) != 0) {
|
|
85
|
+
LOG_ERR("%s: unable to rename file: %s to %s\n", __func__, fname_tmp.c_str(), fname.c_str());
|
|
86
|
+
// If rename fails, try to delete the temporary file
|
|
87
|
+
if (remove(fname_tmp.c_str()) != 0) {
|
|
88
|
+
LOG_ERR("%s: unable to delete temporary file: %s\n", __func__, fname_tmp.c_str());
|
|
89
|
+
}
|
|
90
|
+
}
|
|
91
|
+
} catch (...) {
|
|
92
|
+
// If anything fails, try to delete the temporary file
|
|
93
|
+
if (remove(fname_tmp.c_str()) != 0) {
|
|
94
|
+
LOG_ERR("%s: unable to delete temporary file: %s\n", __func__, fname_tmp.c_str());
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
throw std::runtime_error(string_format("error: failed to write file '%s'\n", fname.c_str()));
|
|
98
|
+
}
|
|
66
99
|
}
|
|
67
100
|
|
|
68
101
|
common_arg & common_arg::set_examples(std::initializer_list<enum llama_example> examples) {
|
|
@@ -188,19 +221,6 @@ bool common_has_curl() {
|
|
|
188
221
|
return true;
|
|
189
222
|
}
|
|
190
223
|
|
|
191
|
-
#ifdef __linux__
|
|
192
|
-
#include <linux/limits.h>
|
|
193
|
-
#elif defined(_WIN32)
|
|
194
|
-
# if !defined(PATH_MAX)
|
|
195
|
-
# define PATH_MAX MAX_PATH
|
|
196
|
-
# endif
|
|
197
|
-
#elif defined(_AIX)
|
|
198
|
-
#include <sys/limits.h>
|
|
199
|
-
#else
|
|
200
|
-
#include <sys/syslimits.h>
|
|
201
|
-
#endif
|
|
202
|
-
#define LLAMA_CURL_MAX_URL_LENGTH 2084 // Maximum URL Length in Chrome: 2083
|
|
203
|
-
|
|
204
224
|
//
|
|
205
225
|
// CURL utils
|
|
206
226
|
//
|
|
@@ -217,252 +237,370 @@ struct curl_slist_ptr {
|
|
|
217
237
|
}
|
|
218
238
|
};
|
|
219
239
|
|
|
220
|
-
|
|
221
|
-
|
|
240
|
+
static CURLcode common_curl_perf(CURL * curl) {
|
|
241
|
+
CURLcode res = curl_easy_perform(curl);
|
|
242
|
+
if (res != CURLE_OK) {
|
|
243
|
+
LOG_ERR("%s: curl_easy_perform() failed\n", __func__);
|
|
244
|
+
}
|
|
222
245
|
|
|
223
|
-
|
|
224
|
-
|
|
246
|
+
return res;
|
|
247
|
+
}
|
|
225
248
|
|
|
226
|
-
|
|
227
|
-
|
|
249
|
+
// Send a HEAD request to retrieve the etag and last-modified headers
|
|
250
|
+
struct common_load_model_from_url_headers {
|
|
251
|
+
std::string etag;
|
|
252
|
+
std::string last_modified;
|
|
253
|
+
std::string accept_ranges;
|
|
254
|
+
};
|
|
228
255
|
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
256
|
+
struct FILE_deleter {
|
|
257
|
+
void operator()(FILE * f) const { fclose(f); }
|
|
258
|
+
};
|
|
259
|
+
|
|
260
|
+
static size_t common_header_callback(char * buffer, size_t, size_t n_items, void * userdata) {
|
|
261
|
+
common_load_model_from_url_headers * headers = (common_load_model_from_url_headers *) userdata;
|
|
262
|
+
static std::regex header_regex("([^:]+): (.*)\r\n");
|
|
263
|
+
static std::regex etag_regex("ETag", std::regex_constants::icase);
|
|
264
|
+
static std::regex last_modified_regex("Last-Modified", std::regex_constants::icase);
|
|
265
|
+
static std::regex accept_ranges_regex("Accept-Ranges", std::regex_constants::icase);
|
|
266
|
+
std::string header(buffer, n_items);
|
|
267
|
+
std::smatch match;
|
|
268
|
+
if (std::regex_match(header, match, header_regex)) {
|
|
269
|
+
const std::string & key = match[1];
|
|
270
|
+
const std::string & value = match[2];
|
|
271
|
+
if (std::regex_match(key, match, etag_regex)) {
|
|
272
|
+
headers->etag = value;
|
|
273
|
+
} else if (std::regex_match(key, match, last_modified_regex)) {
|
|
274
|
+
headers->last_modified = value;
|
|
275
|
+
} else if (std::regex_match(key, match, accept_ranges_regex)) {
|
|
276
|
+
headers->accept_ranges = value;
|
|
277
|
+
}
|
|
278
|
+
}
|
|
279
|
+
|
|
280
|
+
return n_items;
|
|
281
|
+
}
|
|
233
282
|
|
|
234
|
-
|
|
235
|
-
|
|
283
|
+
static size_t common_write_callback(void * data, size_t size, size_t nmemb, void * fd) {
|
|
284
|
+
return std::fwrite(data, size, nmemb, static_cast<FILE *>(fd));
|
|
285
|
+
}
|
|
286
|
+
|
|
287
|
+
// helper function to hide password in URL
|
|
288
|
+
static std::string llama_download_hide_password_in_url(const std::string & url) {
|
|
289
|
+
// Use regex to match and replace the user[:password]@ pattern in URLs
|
|
290
|
+
// Pattern: scheme://[user[:password]@]host[...]
|
|
291
|
+
static const std::regex url_regex(R"(^(?:[A-Za-z][A-Za-z0-9+.-]://)(?:[^/@]+@)?.$)");
|
|
292
|
+
std::smatch match;
|
|
236
293
|
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
294
|
+
if (std::regex_match(url, match, url_regex)) {
|
|
295
|
+
// match[1] = scheme (e.g., "https://")
|
|
296
|
+
// match[2] = user[:password]@ part
|
|
297
|
+
// match[3] = rest of URL (host and path)
|
|
298
|
+
return match[1].str() + "********@" + match[3].str();
|
|
240
299
|
}
|
|
241
300
|
|
|
242
|
-
|
|
301
|
+
return url; // No credentials found or malformed URL
|
|
302
|
+
}
|
|
243
303
|
|
|
244
|
-
|
|
304
|
+
static void common_curl_easy_setopt_head(CURL * curl, const std::string & url) {
|
|
305
|
+
// Set the URL, allow to follow http redirection
|
|
306
|
+
curl_easy_setopt(curl, CURLOPT_URL, url.c_str());
|
|
307
|
+
curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1L);
|
|
308
|
+
|
|
309
|
+
# if defined(_WIN32)
|
|
310
|
+
// CURLSSLOPT_NATIVE_CA tells libcurl to use standard certificate store of
|
|
311
|
+
// operating system. Currently implemented under MS-Windows.
|
|
312
|
+
curl_easy_setopt(curl, CURLOPT_SSL_OPTIONS, CURLSSLOPT_NATIVE_CA);
|
|
313
|
+
# endif
|
|
314
|
+
|
|
315
|
+
curl_easy_setopt(curl, CURLOPT_NOBODY, 1L); // will trigger the HEAD verb
|
|
316
|
+
curl_easy_setopt(curl, CURLOPT_NOPROGRESS, 1L); // hide head request progress
|
|
317
|
+
curl_easy_setopt(curl, CURLOPT_HEADERFUNCTION, common_header_callback);
|
|
245
318
|
}
|
|
246
319
|
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
auto file_exists = std::filesystem::exists(path);
|
|
320
|
+
static void common_curl_easy_setopt_get(CURL * curl) {
|
|
321
|
+
curl_easy_setopt(curl, CURLOPT_NOBODY, 0L);
|
|
322
|
+
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, common_write_callback);
|
|
251
323
|
|
|
252
|
-
//
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
std::string etag;
|
|
256
|
-
std::string last_modified;
|
|
324
|
+
// display download progress
|
|
325
|
+
curl_easy_setopt(curl, CURLOPT_NOPROGRESS, 0L);
|
|
326
|
+
}
|
|
257
327
|
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
std::ifstream metadata_in(metadata_path);
|
|
265
|
-
if (metadata_in.good()) {
|
|
266
|
-
try {
|
|
267
|
-
metadata_in >> metadata;
|
|
268
|
-
LOG_DBG("%s: previous metadata file found %s: %s\n", __func__, metadata_path.c_str(), metadata.dump().c_str());
|
|
269
|
-
if (metadata.contains("etag") && metadata.at("etag").is_string()) {
|
|
270
|
-
etag = metadata.at("etag");
|
|
271
|
-
}
|
|
272
|
-
if (metadata.contains("lastModified") && metadata.at("lastModified").is_string()) {
|
|
273
|
-
last_modified = metadata.at("lastModified");
|
|
274
|
-
}
|
|
275
|
-
} catch (const nlohmann::json::exception & e) {
|
|
276
|
-
LOG_ERR("%s: error reading metadata file %s: %s\n", __func__, metadata_path.c_str(), e.what());
|
|
277
|
-
}
|
|
278
|
-
}
|
|
279
|
-
// if we cannot open the metadata file, we assume that the downloaded file is not valid (etag and last-modified are left empty, so we will download it again)
|
|
280
|
-
} else {
|
|
281
|
-
if (offline) {
|
|
282
|
-
LOG_ERR("%s: required file is not available in cache (offline mode): %s\n", __func__, path.c_str());
|
|
283
|
-
return false;
|
|
284
|
-
}
|
|
285
|
-
LOG_INF("%s: no previous model file found %s\n", __func__, path.c_str());
|
|
328
|
+
static bool common_pull_file(CURL * curl, const std::string & path_temporary) {
|
|
329
|
+
if (std::filesystem::exists(path_temporary)) {
|
|
330
|
+
const std::string partial_size = std::to_string(std::filesystem::file_size(path_temporary));
|
|
331
|
+
LOG_INF("%s: server supports range requests, resuming download from byte %s\n", __func__, partial_size.c_str());
|
|
332
|
+
const std::string range_str = partial_size + "-";
|
|
333
|
+
curl_easy_setopt(curl, CURLOPT_RANGE, range_str.c_str());
|
|
286
334
|
}
|
|
287
335
|
|
|
288
|
-
//
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
336
|
+
// Always open file in append mode could be resuming
|
|
337
|
+
std::unique_ptr<FILE, FILE_deleter> outfile(fopen(path_temporary.c_str(), "ab"));
|
|
338
|
+
if (!outfile) {
|
|
339
|
+
LOG_ERR("%s: error opening local file for writing: %s\n", __func__, path_temporary.c_str());
|
|
340
|
+
return false;
|
|
341
|
+
}
|
|
293
342
|
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
bool should_download = !file_exists; // by default, we should download if the file does not exist
|
|
343
|
+
common_curl_easy_setopt_get(curl);
|
|
344
|
+
curl_easy_setopt(curl, CURLOPT_WRITEDATA, outfile.get());
|
|
297
345
|
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
346
|
+
return common_curl_perf(curl) == CURLE_OK;
|
|
347
|
+
}
|
|
348
|
+
|
|
349
|
+
static bool common_download_head(CURL * curl,
|
|
350
|
+
curl_slist_ptr & http_headers,
|
|
351
|
+
const std::string & url,
|
|
352
|
+
const std::string & bearer_token) {
|
|
301
353
|
if (!curl) {
|
|
302
354
|
LOG_ERR("%s: error initializing libcurl\n", __func__);
|
|
303
355
|
return false;
|
|
304
356
|
}
|
|
305
357
|
|
|
306
|
-
// Set the URL, allow to follow http redirection
|
|
307
|
-
curl_easy_setopt(curl.get(), CURLOPT_URL, url.c_str());
|
|
308
|
-
curl_easy_setopt(curl.get(), CURLOPT_FOLLOWLOCATION, 1L);
|
|
309
|
-
|
|
310
358
|
http_headers.ptr = curl_slist_append(http_headers.ptr, "User-Agent: llama-cpp");
|
|
311
359
|
// Check if hf-token or bearer-token was specified
|
|
312
360
|
if (!bearer_token.empty()) {
|
|
313
361
|
std::string auth_header = "Authorization: Bearer " + bearer_token;
|
|
314
|
-
http_headers.ptr
|
|
362
|
+
http_headers.ptr = curl_slist_append(http_headers.ptr, auth_header.c_str());
|
|
315
363
|
}
|
|
316
|
-
curl_easy_setopt(curl.get(), CURLOPT_HTTPHEADER, http_headers.ptr);
|
|
317
364
|
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
#endif
|
|
365
|
+
curl_easy_setopt(curl, CURLOPT_HTTPHEADER, http_headers.ptr);
|
|
366
|
+
common_curl_easy_setopt_head(curl, url);
|
|
367
|
+
return common_curl_perf(curl) == CURLE_OK;
|
|
368
|
+
}
|
|
323
369
|
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
370
|
+
// download one single file from remote URL to local path
|
|
371
|
+
static bool common_download_file_single_online(const std::string & url,
|
|
372
|
+
const std::string & path,
|
|
373
|
+
const std::string & bearer_token) {
|
|
374
|
+
// If the file exists, check its JSON metadata companion file.
|
|
375
|
+
std::string metadata_path = path + ".json";
|
|
376
|
+
static const int max_attempts = 3;
|
|
377
|
+
static const int retry_delay_seconds = 2;
|
|
378
|
+
for (int i = 0; i < max_attempts; ++i) {
|
|
379
|
+
nlohmann::json metadata; // TODO @ngxson : get rid of this json, use regex instead
|
|
380
|
+
std::string etag;
|
|
381
|
+
std::string last_modified;
|
|
382
|
+
|
|
383
|
+
// Check if the file already exists locally
|
|
384
|
+
const auto file_exists = std::filesystem::exists(path);
|
|
385
|
+
if (file_exists) {
|
|
386
|
+
// Try and read the JSON metadata file (note: stream autoclosed upon exiting this block).
|
|
387
|
+
std::ifstream metadata_in(metadata_path);
|
|
388
|
+
if (metadata_in.good()) {
|
|
389
|
+
try {
|
|
390
|
+
metadata_in >> metadata;
|
|
391
|
+
LOG_DBG("%s: previous metadata file found %s: %s\n", __func__, metadata_path.c_str(),
|
|
392
|
+
metadata.dump().c_str());
|
|
393
|
+
if (metadata.contains("etag") && metadata.at("etag").is_string()) {
|
|
394
|
+
etag = metadata.at("etag");
|
|
395
|
+
}
|
|
396
|
+
if (metadata.contains("lastModified") && metadata.at("lastModified").is_string()) {
|
|
397
|
+
last_modified = metadata.at("lastModified");
|
|
398
|
+
}
|
|
399
|
+
} catch (const nlohmann::json::exception & e) {
|
|
400
|
+
LOG_ERR("%s: error reading metadata file %s: %s\n", __func__, metadata_path.c_str(), e.what());
|
|
401
|
+
}
|
|
402
|
+
}
|
|
403
|
+
// if we cannot open the metadata file, we assume that the downloaded file is not valid (etag and last-modified are left empty, so we will download it again)
|
|
404
|
+
} else {
|
|
405
|
+
LOG_INF("%s: no previous model file found %s\n", __func__, path.c_str());
|
|
406
|
+
}
|
|
327
407
|
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
static std::regex last_modified_regex("Last-Modified", std::regex_constants::icase);
|
|
408
|
+
bool head_request_ok = false;
|
|
409
|
+
bool should_download = !file_exists; // by default, we should download if the file does not exist
|
|
331
410
|
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
headers->last_modified = value;
|
|
341
|
-
}
|
|
411
|
+
// Initialize libcurl
|
|
412
|
+
curl_ptr curl(curl_easy_init(), &curl_easy_cleanup);
|
|
413
|
+
common_load_model_from_url_headers headers;
|
|
414
|
+
curl_easy_setopt(curl.get(), CURLOPT_HEADERDATA, &headers);
|
|
415
|
+
curl_slist_ptr http_headers;
|
|
416
|
+
const bool was_perform_successful = common_download_head(curl.get(), http_headers, url, bearer_token);
|
|
417
|
+
if (!was_perform_successful) {
|
|
418
|
+
head_request_ok = false;
|
|
342
419
|
}
|
|
343
|
-
return n_items;
|
|
344
|
-
};
|
|
345
420
|
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
421
|
+
long http_code = 0;
|
|
422
|
+
curl_easy_getinfo(curl.get(), CURLINFO_RESPONSE_CODE, &http_code);
|
|
423
|
+
if (http_code == 200) {
|
|
424
|
+
head_request_ok = true;
|
|
425
|
+
} else {
|
|
426
|
+
LOG_WRN("%s: HEAD invalid http status code received: %ld\n", __func__, http_code);
|
|
427
|
+
head_request_ok = false;
|
|
428
|
+
}
|
|
429
|
+
|
|
430
|
+
// if head_request_ok is false, we don't have the etag or last-modified headers
|
|
431
|
+
// we leave should_download as-is, which is true if the file does not exist
|
|
432
|
+
bool should_download_from_scratch = false;
|
|
433
|
+
if (head_request_ok) {
|
|
434
|
+
// check if ETag or Last-Modified headers are different
|
|
435
|
+
// if it is, we need to download the file again
|
|
436
|
+
if (!etag.empty() && etag != headers.etag) {
|
|
437
|
+
LOG_WRN("%s: ETag header is different (%s != %s): triggering a new download\n", __func__, etag.c_str(),
|
|
438
|
+
headers.etag.c_str());
|
|
439
|
+
should_download = true;
|
|
440
|
+
should_download_from_scratch = true;
|
|
441
|
+
} else if (!last_modified.empty() && last_modified != headers.last_modified) {
|
|
442
|
+
LOG_WRN("%s: Last-Modified header is different (%s != %s): triggering a new download\n", __func__,
|
|
443
|
+
last_modified.c_str(), headers.last_modified.c_str());
|
|
444
|
+
should_download = true;
|
|
445
|
+
should_download_from_scratch = true;
|
|
446
|
+
}
|
|
447
|
+
}
|
|
448
|
+
|
|
449
|
+
const bool accept_ranges_supported = !headers.accept_ranges.empty() && headers.accept_ranges != "none";
|
|
450
|
+
if (should_download) {
|
|
451
|
+
if (file_exists &&
|
|
452
|
+
!accept_ranges_supported) { // Resumable downloads not supported, delete and start again.
|
|
453
|
+
LOG_WRN("%s: deleting previous downloaded file: %s\n", __func__, path.c_str());
|
|
454
|
+
if (remove(path.c_str()) != 0) {
|
|
455
|
+
LOG_ERR("%s: unable to delete file: %s\n", __func__, path.c_str());
|
|
456
|
+
return false;
|
|
457
|
+
}
|
|
458
|
+
}
|
|
350
459
|
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
460
|
+
const std::string path_temporary = path + ".downloadInProgress";
|
|
461
|
+
if (should_download_from_scratch) {
|
|
462
|
+
if (std::filesystem::exists(path_temporary)) {
|
|
463
|
+
if (remove(path_temporary.c_str()) != 0) {
|
|
464
|
+
LOG_ERR("%s: unable to delete file: %s\n", __func__, path_temporary.c_str());
|
|
465
|
+
return false;
|
|
466
|
+
}
|
|
467
|
+
}
|
|
357
468
|
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
}
|
|
469
|
+
if (std::filesystem::exists(path)) {
|
|
470
|
+
if (remove(path.c_str()) != 0) {
|
|
471
|
+
LOG_ERR("%s: unable to delete file: %s\n", __func__, path.c_str());
|
|
472
|
+
return false;
|
|
473
|
+
}
|
|
474
|
+
}
|
|
475
|
+
}
|
|
366
476
|
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
477
|
+
// Write the updated JSON metadata file.
|
|
478
|
+
metadata.update({
|
|
479
|
+
{ "url", url },
|
|
480
|
+
{ "etag", headers.etag },
|
|
481
|
+
{ "lastModified", headers.last_modified }
|
|
482
|
+
});
|
|
483
|
+
write_file(metadata_path, metadata.dump(4));
|
|
484
|
+
LOG_DBG("%s: file metadata saved: %s\n", __func__, metadata_path.c_str());
|
|
485
|
+
|
|
486
|
+
// start the download
|
|
487
|
+
LOG_INF("%s: trying to download model from %s to %s (server_etag:%s, server_last_modified:%s)...\n",
|
|
488
|
+
__func__, llama_download_hide_password_in_url(url).c_str(), path_temporary.c_str(),
|
|
489
|
+
headers.etag.c_str(), headers.last_modified.c_str());
|
|
490
|
+
const bool was_pull_successful = common_pull_file(curl.get(), path_temporary);
|
|
491
|
+
if (!was_pull_successful) {
|
|
492
|
+
if (i + 1 < max_attempts) {
|
|
493
|
+
const int exponential_backoff_delay = std::pow(retry_delay_seconds, i) * 1000;
|
|
494
|
+
LOG_WRN("%s: retrying after %d milliseconds...\n", __func__, exponential_backoff_delay);
|
|
495
|
+
std::this_thread::sleep_for(std::chrono::milliseconds(exponential_backoff_delay));
|
|
496
|
+
} else {
|
|
497
|
+
LOG_ERR("%s: curl_easy_perform() failed after %d attempts\n", __func__, max_attempts);
|
|
498
|
+
}
|
|
380
499
|
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
500
|
+
continue;
|
|
501
|
+
}
|
|
502
|
+
|
|
503
|
+
long http_code = 0;
|
|
504
|
+
curl_easy_getinfo(curl.get(), CURLINFO_RESPONSE_CODE, &http_code);
|
|
505
|
+
if (http_code < 200 || http_code >= 400) {
|
|
506
|
+
LOG_ERR("%s: invalid http status code received: %ld\n", __func__, http_code);
|
|
387
507
|
return false;
|
|
388
508
|
}
|
|
509
|
+
|
|
510
|
+
if (rename(path_temporary.c_str(), path.c_str()) != 0) {
|
|
511
|
+
LOG_ERR("%s: unable to rename file: %s to %s\n", __func__, path_temporary.c_str(), path.c_str());
|
|
512
|
+
return false;
|
|
513
|
+
}
|
|
514
|
+
} else {
|
|
515
|
+
LOG_INF("%s: using cached file: %s\n", __func__, path.c_str());
|
|
389
516
|
}
|
|
390
517
|
|
|
391
|
-
|
|
518
|
+
break;
|
|
519
|
+
}
|
|
392
520
|
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
fclose(f);
|
|
396
|
-
}
|
|
397
|
-
};
|
|
521
|
+
return true;
|
|
522
|
+
}
|
|
398
523
|
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
}
|
|
524
|
+
std::pair<long, std::vector<char>> common_remote_get_content(const std::string & url, const common_remote_params & params) {
|
|
525
|
+
curl_ptr curl(curl_easy_init(), &curl_easy_cleanup);
|
|
526
|
+
curl_slist_ptr http_headers;
|
|
527
|
+
std::vector<char> res_buffer;
|
|
404
528
|
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
|
|
529
|
+
curl_easy_setopt(curl.get(), CURLOPT_URL, url.c_str());
|
|
530
|
+
curl_easy_setopt(curl.get(), CURLOPT_NOPROGRESS, 1L);
|
|
531
|
+
curl_easy_setopt(curl.get(), CURLOPT_FOLLOWLOCATION, 1L);
|
|
532
|
+
curl_easy_setopt(curl.get(), CURLOPT_VERBOSE, 1L);
|
|
533
|
+
typedef size_t(*CURLOPT_WRITEFUNCTION_PTR)(void * ptr, size_t size, size_t nmemb, void * data);
|
|
534
|
+
auto write_callback = [](void * ptr, size_t size, size_t nmemb, void * data) -> size_t {
|
|
535
|
+
auto data_vec = static_cast<std::vector<char> *>(data);
|
|
536
|
+
data_vec->insert(data_vec->end(), (char *)ptr, (char *)ptr + size * nmemb);
|
|
537
|
+
return size * nmemb;
|
|
538
|
+
};
|
|
539
|
+
curl_easy_setopt(curl.get(), CURLOPT_WRITEFUNCTION, static_cast<CURLOPT_WRITEFUNCTION_PTR>(write_callback));
|
|
540
|
+
curl_easy_setopt(curl.get(), CURLOPT_WRITEDATA, &res_buffer);
|
|
541
|
+
#if defined(_WIN32)
|
|
542
|
+
curl_easy_setopt(curl.get(), CURLOPT_SSL_OPTIONS, CURLSSLOPT_NATIVE_CA);
|
|
543
|
+
#endif
|
|
544
|
+
if (params.timeout > 0) {
|
|
545
|
+
curl_easy_setopt(curl.get(), CURLOPT_TIMEOUT, params.timeout);
|
|
546
|
+
}
|
|
547
|
+
if (params.max_size > 0) {
|
|
548
|
+
curl_easy_setopt(curl.get(), CURLOPT_MAXFILESIZE, params.max_size);
|
|
549
|
+
}
|
|
550
|
+
http_headers.ptr = curl_slist_append(http_headers.ptr, "User-Agent: llama-cpp");
|
|
551
|
+
for (const auto & header : params.headers) {
|
|
552
|
+
http_headers.ptr = curl_slist_append(http_headers.ptr, header.c_str());
|
|
553
|
+
}
|
|
554
|
+
curl_easy_setopt(curl.get(), CURLOPT_HTTPHEADER, http_headers.ptr);
|
|
412
555
|
|
|
413
|
-
|
|
414
|
-
curl_easy_setopt(curl.get(), CURLOPT_NOPROGRESS, 0L);
|
|
556
|
+
CURLcode res = curl_easy_perform(curl.get());
|
|
415
557
|
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
|
|
420
|
-
return url; // Malformed URL
|
|
421
|
-
}
|
|
558
|
+
if (res != CURLE_OK) {
|
|
559
|
+
std::string error_msg = curl_easy_strerror(res);
|
|
560
|
+
throw std::runtime_error("error: cannot make GET request: " + error_msg);
|
|
561
|
+
}
|
|
422
562
|
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
return url; // No password in URL
|
|
426
|
-
}
|
|
563
|
+
long res_code;
|
|
564
|
+
curl_easy_getinfo(curl.get(), CURLINFO_RESPONSE_CODE, &res_code);
|
|
427
565
|
|
|
428
|
-
|
|
429
|
-
|
|
566
|
+
return { res_code, std::move(res_buffer) };
|
|
567
|
+
}
|
|
430
568
|
|
|
431
|
-
|
|
432
|
-
LOG_INF("%s: trying to download model from %s to %s (server_etag:%s, server_last_modified:%s)...\n", __func__,
|
|
433
|
-
llama_download_hide_password_in_url(url).c_str(), path.c_str(), headers.etag.c_str(), headers.last_modified.c_str());
|
|
434
|
-
bool was_perform_successful = curl_perform_with_retry(url, curl.get(), CURL_MAX_RETRY, CURL_RETRY_DELAY_SECONDS, "GET");
|
|
435
|
-
if (!was_perform_successful) {
|
|
436
|
-
return false;
|
|
437
|
-
}
|
|
569
|
+
#else
|
|
438
570
|
|
|
439
|
-
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
LOG_ERR("%s: invalid http status code received: %ld\n", __func__, http_code);
|
|
443
|
-
return false;
|
|
444
|
-
}
|
|
571
|
+
bool common_has_curl() {
|
|
572
|
+
return false;
|
|
573
|
+
}
|
|
445
574
|
|
|
446
|
-
|
|
447
|
-
|
|
575
|
+
static bool common_download_file_single_online(const std::string &, const std::string &, const std::string &) {
|
|
576
|
+
LOG_ERR("error: built without CURL, cannot download model from internet\n");
|
|
577
|
+
return false;
|
|
578
|
+
}
|
|
448
579
|
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
|
|
452
|
-
|
|
453
|
-
{"lastModified", headers.last_modified}
|
|
454
|
-
});
|
|
455
|
-
write_file(metadata_path, metadata.dump(4));
|
|
456
|
-
LOG_DBG("%s: file metadata saved: %s\n", __func__, metadata_path.c_str());
|
|
580
|
+
std::pair<long, std::vector<char>> common_remote_get_content(const std::string & url, const common_remote_params &) {
|
|
581
|
+
if (!url.empty()) {
|
|
582
|
+
throw std::runtime_error("error: built without CURL, cannot download model from the internet");
|
|
583
|
+
}
|
|
457
584
|
|
|
458
|
-
|
|
459
|
-
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
|
|
463
|
-
|
|
585
|
+
return {};
|
|
586
|
+
}
|
|
587
|
+
|
|
588
|
+
#endif // LLAMA_USE_CURL
|
|
589
|
+
|
|
590
|
+
static bool common_download_file_single(const std::string & url,
|
|
591
|
+
const std::string & path,
|
|
592
|
+
const std::string & bearer_token,
|
|
593
|
+
bool offline) {
|
|
594
|
+
if (!offline) {
|
|
595
|
+
return common_download_file_single_online(url, path, bearer_token);
|
|
464
596
|
}
|
|
465
597
|
|
|
598
|
+
if (!std::filesystem::exists(path)) {
|
|
599
|
+
LOG_ERR("%s: required file is not available in cache (offline mode): %s\n", __func__, path.c_str());
|
|
600
|
+
return false;
|
|
601
|
+
}
|
|
602
|
+
|
|
603
|
+
LOG_INF("%s: using cached file (offline mode): %s\n", __func__, path.c_str());
|
|
466
604
|
return true;
|
|
467
605
|
}
|
|
468
606
|
|
|
@@ -524,7 +662,7 @@ static bool common_download_model(
|
|
|
524
662
|
|
|
525
663
|
if (n_split > 1) {
|
|
526
664
|
char split_prefix[PATH_MAX] = {0};
|
|
527
|
-
char split_url_prefix[
|
|
665
|
+
char split_url_prefix[LLAMA_MAX_URL_LENGTH] = {0};
|
|
528
666
|
|
|
529
667
|
// Verify the first split file format
|
|
530
668
|
// and extract split URL and PATH prefixes
|
|
@@ -545,7 +683,7 @@ static bool common_download_model(
|
|
|
545
683
|
char split_path[PATH_MAX] = {0};
|
|
546
684
|
llama_split_path(split_path, sizeof(split_path), split_prefix, idx, n_split);
|
|
547
685
|
|
|
548
|
-
char split_url[
|
|
686
|
+
char split_url[LLAMA_MAX_URL_LENGTH] = {0};
|
|
549
687
|
llama_split_path(split_url, sizeof(split_url), split_url_prefix, idx, n_split);
|
|
550
688
|
|
|
551
689
|
if (std::string(split_path) == model.path) {
|
|
@@ -562,50 +700,6 @@ static bool common_download_model(
|
|
|
562
700
|
return true;
|
|
563
701
|
}
|
|
564
702
|
|
|
565
|
-
std::pair<long, std::vector<char>> common_remote_get_content(const std::string & url, const common_remote_params & params) {
|
|
566
|
-
curl_ptr curl(curl_easy_init(), &curl_easy_cleanup);
|
|
567
|
-
curl_slist_ptr http_headers;
|
|
568
|
-
std::vector<char> res_buffer;
|
|
569
|
-
|
|
570
|
-
curl_easy_setopt(curl.get(), CURLOPT_URL, url.c_str());
|
|
571
|
-
curl_easy_setopt(curl.get(), CURLOPT_NOPROGRESS, 1L);
|
|
572
|
-
curl_easy_setopt(curl.get(), CURLOPT_FOLLOWLOCATION, 1L);
|
|
573
|
-
typedef size_t(*CURLOPT_WRITEFUNCTION_PTR)(void * ptr, size_t size, size_t nmemb, void * data);
|
|
574
|
-
auto write_callback = [](void * ptr, size_t size, size_t nmemb, void * data) -> size_t {
|
|
575
|
-
auto data_vec = static_cast<std::vector<char> *>(data);
|
|
576
|
-
data_vec->insert(data_vec->end(), (char *)ptr, (char *)ptr + size * nmemb);
|
|
577
|
-
return size * nmemb;
|
|
578
|
-
};
|
|
579
|
-
curl_easy_setopt(curl.get(), CURLOPT_WRITEFUNCTION, static_cast<CURLOPT_WRITEFUNCTION_PTR>(write_callback));
|
|
580
|
-
curl_easy_setopt(curl.get(), CURLOPT_WRITEDATA, &res_buffer);
|
|
581
|
-
#if defined(_WIN32)
|
|
582
|
-
curl_easy_setopt(curl.get(), CURLOPT_SSL_OPTIONS, CURLSSLOPT_NATIVE_CA);
|
|
583
|
-
#endif
|
|
584
|
-
if (params.timeout > 0) {
|
|
585
|
-
curl_easy_setopt(curl.get(), CURLOPT_TIMEOUT, params.timeout);
|
|
586
|
-
}
|
|
587
|
-
if (params.max_size > 0) {
|
|
588
|
-
curl_easy_setopt(curl.get(), CURLOPT_MAXFILESIZE, params.max_size);
|
|
589
|
-
}
|
|
590
|
-
http_headers.ptr = curl_slist_append(http_headers.ptr, "User-Agent: llama-cpp");
|
|
591
|
-
for (const auto & header : params.headers) {
|
|
592
|
-
http_headers.ptr = curl_slist_append(http_headers.ptr, header.c_str());
|
|
593
|
-
}
|
|
594
|
-
curl_easy_setopt(curl.get(), CURLOPT_HTTPHEADER, http_headers.ptr);
|
|
595
|
-
|
|
596
|
-
CURLcode res = curl_easy_perform(curl.get());
|
|
597
|
-
|
|
598
|
-
if (res != CURLE_OK) {
|
|
599
|
-
std::string error_msg = curl_easy_strerror(res);
|
|
600
|
-
throw std::runtime_error("error: cannot make GET request: " + error_msg);
|
|
601
|
-
}
|
|
602
|
-
|
|
603
|
-
long res_code;
|
|
604
|
-
curl_easy_getinfo(curl.get(), CURLINFO_RESPONSE_CODE, &res_code);
|
|
605
|
-
|
|
606
|
-
return { res_code, std::move(res_buffer) };
|
|
607
|
-
}
|
|
608
|
-
|
|
609
703
|
/**
|
|
610
704
|
* Allow getting the HF file from the HF repo with tag (like ollama), for example:
|
|
611
705
|
* - bartowski/Llama-3.2-3B-Instruct-GGUF:q4
|
|
@@ -672,21 +766,17 @@ static struct common_hf_file_res common_get_hf_file(const std::string & hf_repo_
|
|
|
672
766
|
std::string mmprojFile;
|
|
673
767
|
|
|
674
768
|
if (res_code == 200 || res_code == 304) {
|
|
675
|
-
|
|
676
|
-
|
|
677
|
-
|
|
678
|
-
|
|
679
|
-
|
|
680
|
-
ggufFile = match[1].str();
|
|
769
|
+
try {
|
|
770
|
+
auto j = json::parse(res_str);
|
|
771
|
+
|
|
772
|
+
if (j.contains("ggufFile") && j["ggufFile"].contains("rfilename")) {
|
|
773
|
+
ggufFile = j["ggufFile"]["rfilename"].get<std::string>();
|
|
681
774
|
}
|
|
682
|
-
|
|
683
|
-
|
|
684
|
-
{
|
|
685
|
-
std::regex pattern("\"mmprojFile\"[\\s\\S]*?\"rfilename\"\\s*:\\s*\"([^\"]+)\"");
|
|
686
|
-
std::smatch match;
|
|
687
|
-
if (std::regex_search(res_str, match, pattern)) {
|
|
688
|
-
mmprojFile = match[1].str();
|
|
775
|
+
if (j.contains("mmprojFile") && j["mmprojFile"].contains("rfilename")) {
|
|
776
|
+
mmprojFile = j["mmprojFile"]["rfilename"].get<std::string>();
|
|
689
777
|
}
|
|
778
|
+
} catch (const std::exception & e) {
|
|
779
|
+
throw std::runtime_error(std::string("error parsing manifest JSON: ") + e.what());
|
|
690
780
|
}
|
|
691
781
|
if (!use_cache) {
|
|
692
782
|
// if not using cached response, update the cache file
|
|
@@ -706,45 +796,6 @@ static struct common_hf_file_res common_get_hf_file(const std::string & hf_repo_
|
|
|
706
796
|
return { hf_repo, ggufFile, mmprojFile };
|
|
707
797
|
}
|
|
708
798
|
|
|
709
|
-
#else
|
|
710
|
-
|
|
711
|
-
bool common_has_curl() {
|
|
712
|
-
return false;
|
|
713
|
-
}
|
|
714
|
-
|
|
715
|
-
static bool common_download_file_single(const std::string &, const std::string &, const std::string &, bool) {
|
|
716
|
-
LOG_ERR("error: built without CURL, cannot download model from internet\n");
|
|
717
|
-
return false;
|
|
718
|
-
}
|
|
719
|
-
|
|
720
|
-
static bool common_download_file_multiple(const std::vector<std::pair<std::string, std::string>> &, const std::string &, bool) {
|
|
721
|
-
LOG_ERR("error: built without CURL, cannot download model from the internet\n");
|
|
722
|
-
return false;
|
|
723
|
-
}
|
|
724
|
-
|
|
725
|
-
static bool common_download_model(
|
|
726
|
-
const common_params_model &,
|
|
727
|
-
const std::string &,
|
|
728
|
-
bool) {
|
|
729
|
-
LOG_ERR("error: built without CURL, cannot download model from the internet\n");
|
|
730
|
-
return false;
|
|
731
|
-
}
|
|
732
|
-
|
|
733
|
-
static struct common_hf_file_res common_get_hf_file(const std::string &, const std::string &, bool) {
|
|
734
|
-
LOG_ERR("error: built without CURL, cannot download model from the internet\n");
|
|
735
|
-
return {};
|
|
736
|
-
}
|
|
737
|
-
|
|
738
|
-
std::pair<long, std::vector<char>> common_remote_get_content(const std::string & url, const common_remote_params &) {
|
|
739
|
-
if (!url.empty()) {
|
|
740
|
-
throw std::runtime_error("error: built without CURL, cannot download model from the internet");
|
|
741
|
-
}
|
|
742
|
-
|
|
743
|
-
return {};
|
|
744
|
-
}
|
|
745
|
-
|
|
746
|
-
#endif // LLAMA_USE_CURL
|
|
747
|
-
|
|
748
799
|
//
|
|
749
800
|
// Docker registry functions
|
|
750
801
|
//
|
|
@@ -770,7 +821,7 @@ static std::string common_docker_get_token(const std::string & repo) {
|
|
|
770
821
|
}
|
|
771
822
|
|
|
772
823
|
static std::string common_docker_resolve_model(const std::string & docker) {
|
|
773
|
-
// Parse ai/smollm2:135M-
|
|
824
|
+
// Parse ai/smollm2:135M-Q4_0
|
|
774
825
|
size_t colon_pos = docker.find(':');
|
|
775
826
|
std::string repo, tag;
|
|
776
827
|
if (colon_pos != std::string::npos) {
|
|
@@ -1004,8 +1055,6 @@ static std::string get_all_kv_cache_types() {
|
|
|
1004
1055
|
//
|
|
1005
1056
|
|
|
1006
1057
|
static bool common_params_parse_ex(int argc, char ** argv, common_params_context & ctx_arg) {
|
|
1007
|
-
std::string arg;
|
|
1008
|
-
const std::string arg_prefix = "--";
|
|
1009
1058
|
common_params & params = ctx_arg.params;
|
|
1010
1059
|
|
|
1011
1060
|
std::unordered_map<std::string, common_arg *> arg_to_options;
|