@fugood/llama.node 1.4.13 → 1.4.15
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/binding.ts +23 -2
- package/lib/index.js +2 -1
- package/lib/index.ts +8 -1
- package/lib/parallel.ts +2 -2
- package/package.json +15 -15
- package/scripts/llama.cpp.patch +9 -12
- package/src/LlamaContext.cpp +16 -4
- package/src/llama.cpp/CMakeLists.txt +24 -8
- package/src/llama.cpp/common/CMakeLists.txt +3 -34
- package/src/llama.cpp/common/arg.cpp +183 -60
- package/src/llama.cpp/common/arg.h +0 -8
- package/src/llama.cpp/common/chat-parser.cpp +115 -0
- package/src/llama.cpp/common/chat.cpp +67 -0
- package/src/llama.cpp/common/chat.h +1 -0
- package/src/llama.cpp/common/common.cpp +2 -1
- package/src/llama.cpp/common/common.h +12 -7
- package/src/llama.cpp/common/debug.cpp +165 -0
- package/src/llama.cpp/common/debug.h +43 -0
- package/src/llama.cpp/common/download.cpp +88 -369
- package/src/llama.cpp/common/download.h +32 -5
- package/src/llama.cpp/common/preset.cpp +87 -2
- package/src/llama.cpp/common/preset.h +10 -1
- package/src/llama.cpp/ggml/include/ggml.h +5 -0
- package/src/llama.cpp/include/llama.h +5 -2
- package/src/llama.cpp/src/CMakeLists.txt +1 -0
- package/src/llama.cpp/src/llama-arch.cpp +35 -0
- package/src/llama.cpp/src/llama-arch.h +1 -0
- package/src/llama.cpp/src/llama-chat.cpp +20 -0
- package/src/llama.cpp/src/llama-chat.h +1 -0
- package/src/llama.cpp/src/llama-graph.cpp +31 -43
- package/src/llama.cpp/src/llama-mmap.cpp +78 -42
- package/src/llama.cpp/src/llama-mmap.h +5 -4
- package/src/llama.cpp/src/llama-model-loader.cpp +17 -5
- package/src/llama.cpp/src/llama-model-loader.h +2 -0
- package/src/llama.cpp/src/llama-model.cpp +225 -101
- package/src/llama.cpp/src/llama-quant.cpp +1 -1
- package/src/llama.cpp/src/llama-sampling.cpp +1 -1
- package/src/llama.cpp/src/llama-vocab.cpp +37 -24
- package/src/llama.cpp/src/llama-vocab.h +1 -0
- package/src/llama.cpp/src/llama.cpp +63 -27
- package/src/llama.cpp/src/models/exaone-moe.cpp +146 -0
- package/src/llama.cpp/src/models/gemma3n-iswa.cpp +13 -3
- package/src/llama.cpp/src/models/models.h +13 -2
- package/src/llama.cpp/src/models/qwen3next.cpp +198 -182
|
@@ -19,10 +19,7 @@
|
|
|
19
19
|
#include <thread>
|
|
20
20
|
#include <vector>
|
|
21
21
|
|
|
22
|
-
#if defined(
|
|
23
|
-
#include <curl/curl.h>
|
|
24
|
-
#include <curl/easy.h>
|
|
25
|
-
#elif defined(LLAMA_USE_HTTPLIB)
|
|
22
|
+
#if defined(LLAMA_USE_HTTPLIB)
|
|
26
23
|
#include "http.h"
|
|
27
24
|
#endif
|
|
28
25
|
|
|
@@ -157,322 +154,21 @@ static std::string read_etag(const std::string & path) {
|
|
|
157
154
|
return none;
|
|
158
155
|
}
|
|
159
156
|
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
//
|
|
163
|
-
// CURL utils
|
|
164
|
-
//
|
|
165
|
-
|
|
166
|
-
using curl_ptr = std::unique_ptr<CURL, decltype(&curl_easy_cleanup)>;
|
|
167
|
-
|
|
168
|
-
// cannot use unique_ptr for curl_slist, because we cannot update without destroying the old one
|
|
169
|
-
struct curl_slist_ptr {
|
|
170
|
-
struct curl_slist * ptr = nullptr;
|
|
171
|
-
~curl_slist_ptr() {
|
|
172
|
-
if (ptr) {
|
|
173
|
-
curl_slist_free_all(ptr);
|
|
174
|
-
}
|
|
175
|
-
}
|
|
176
|
-
};
|
|
177
|
-
|
|
178
|
-
static CURLcode common_curl_perf(CURL * curl) {
|
|
179
|
-
CURLcode res = curl_easy_perform(curl);
|
|
180
|
-
if (res != CURLE_OK) {
|
|
181
|
-
LOG_ERR("%s: curl_easy_perform() failed\n", __func__);
|
|
182
|
-
}
|
|
183
|
-
|
|
184
|
-
return res;
|
|
157
|
+
static bool is_http_status_ok(int status) {
|
|
158
|
+
return status >= 200 && status < 400;
|
|
185
159
|
}
|
|
186
160
|
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
std::string
|
|
190
|
-
std::string
|
|
191
|
-
std::string
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
struct FILE_deleter {
|
|
195
|
-
void operator()(FILE * f) const { fclose(f); }
|
|
196
|
-
};
|
|
197
|
-
|
|
198
|
-
static size_t common_header_callback(char * buffer, size_t, size_t n_items, void * userdata) {
|
|
199
|
-
common_load_model_from_url_headers * headers = (common_load_model_from_url_headers *) userdata;
|
|
200
|
-
static std::regex header_regex("([^:]+): (.*)\r\n");
|
|
201
|
-
static std::regex etag_regex("ETag", std::regex_constants::icase);
|
|
202
|
-
static std::regex last_modified_regex("Last-Modified", std::regex_constants::icase);
|
|
203
|
-
static std::regex accept_ranges_regex("Accept-Ranges", std::regex_constants::icase);
|
|
204
|
-
std::string header(buffer, n_items);
|
|
205
|
-
std::smatch match;
|
|
206
|
-
if (std::regex_match(header, match, header_regex)) {
|
|
207
|
-
const std::string & key = match[1];
|
|
208
|
-
const std::string & value = match[2];
|
|
209
|
-
if (std::regex_match(key, match, etag_regex)) {
|
|
210
|
-
headers->etag = value;
|
|
211
|
-
} else if (std::regex_match(key, match, last_modified_regex)) {
|
|
212
|
-
headers->last_modified = value;
|
|
213
|
-
} else if (std::regex_match(key, match, accept_ranges_regex)) {
|
|
214
|
-
headers->accept_ranges = value;
|
|
215
|
-
}
|
|
216
|
-
}
|
|
217
|
-
|
|
218
|
-
return n_items;
|
|
219
|
-
}
|
|
220
|
-
|
|
221
|
-
static size_t common_write_callback(void * data, size_t size, size_t nmemb, void * fd) {
|
|
222
|
-
return std::fwrite(data, size, nmemb, static_cast<FILE *>(fd));
|
|
223
|
-
}
|
|
224
|
-
|
|
225
|
-
// helper function to hide password in URL
|
|
226
|
-
static std::string llama_download_hide_password_in_url(const std::string & url) {
|
|
227
|
-
// Use regex to match and replace the user[:password]@ pattern in URLs
|
|
228
|
-
// Pattern: scheme://[user[:password]@]host[...]
|
|
229
|
-
static const std::regex url_regex(R"(^(?:[A-Za-z][A-Za-z0-9+.-]://)(?:[^/@]+@)?.$)");
|
|
230
|
-
std::smatch match;
|
|
231
|
-
|
|
232
|
-
if (std::regex_match(url, match, url_regex)) {
|
|
233
|
-
// match[1] = scheme (e.g., "https://")
|
|
234
|
-
// match[2] = user[:password]@ part
|
|
235
|
-
// match[3] = rest of URL (host and path)
|
|
236
|
-
return match[1].str() + "********@" + match[3].str();
|
|
237
|
-
}
|
|
238
|
-
|
|
239
|
-
return url; // No credentials found or malformed URL
|
|
240
|
-
}
|
|
241
|
-
|
|
242
|
-
static void common_curl_easy_setopt_head(CURL * curl, const std::string & url) {
|
|
243
|
-
// Set the URL, allow to follow http redirection
|
|
244
|
-
curl_easy_setopt(curl, CURLOPT_URL, url.c_str());
|
|
245
|
-
curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1L);
|
|
246
|
-
|
|
247
|
-
# if defined(_WIN32)
|
|
248
|
-
// CURLSSLOPT_NATIVE_CA tells libcurl to use standard certificate store of
|
|
249
|
-
// operating system. Currently implemented under MS-Windows.
|
|
250
|
-
curl_easy_setopt(curl, CURLOPT_SSL_OPTIONS, CURLSSLOPT_NATIVE_CA);
|
|
251
|
-
# endif
|
|
252
|
-
|
|
253
|
-
curl_easy_setopt(curl, CURLOPT_NOBODY, 1L); // will trigger the HEAD verb
|
|
254
|
-
curl_easy_setopt(curl, CURLOPT_NOPROGRESS, 1L); // hide head request progress
|
|
255
|
-
curl_easy_setopt(curl, CURLOPT_HEADERFUNCTION, common_header_callback);
|
|
256
|
-
}
|
|
257
|
-
|
|
258
|
-
static void common_curl_easy_setopt_get(CURL * curl) {
|
|
259
|
-
curl_easy_setopt(curl, CURLOPT_NOBODY, 0L);
|
|
260
|
-
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, common_write_callback);
|
|
261
|
-
|
|
262
|
-
// display download progress
|
|
263
|
-
curl_easy_setopt(curl, CURLOPT_NOPROGRESS, 0L);
|
|
264
|
-
}
|
|
265
|
-
|
|
266
|
-
static bool common_pull_file(CURL * curl, const std::string & path_temporary) {
|
|
267
|
-
if (std::filesystem::exists(path_temporary)) {
|
|
268
|
-
const std::string partial_size = std::to_string(std::filesystem::file_size(path_temporary));
|
|
269
|
-
LOG_INF("%s: server supports range requests, resuming download from byte %s\n", __func__, partial_size.c_str());
|
|
270
|
-
const std::string range_str = partial_size + "-";
|
|
271
|
-
curl_easy_setopt(curl, CURLOPT_RANGE, range_str.c_str());
|
|
272
|
-
}
|
|
273
|
-
|
|
274
|
-
// Always open file in append mode could be resuming
|
|
275
|
-
std::unique_ptr<FILE, FILE_deleter> outfile(fopen(path_temporary.c_str(), "ab"));
|
|
276
|
-
if (!outfile) {
|
|
277
|
-
LOG_ERR("%s: error opening local file for writing: %s\n", __func__, path_temporary.c_str());
|
|
278
|
-
return false;
|
|
279
|
-
}
|
|
280
|
-
|
|
281
|
-
common_curl_easy_setopt_get(curl);
|
|
282
|
-
curl_easy_setopt(curl, CURLOPT_WRITEDATA, outfile.get());
|
|
283
|
-
|
|
284
|
-
return common_curl_perf(curl) == CURLE_OK;
|
|
285
|
-
}
|
|
286
|
-
|
|
287
|
-
static bool common_download_head(CURL * curl,
|
|
288
|
-
curl_slist_ptr & http_headers,
|
|
289
|
-
const std::string & url,
|
|
290
|
-
const std::string & bearer_token) {
|
|
291
|
-
if (!curl) {
|
|
292
|
-
LOG_ERR("%s: error initializing libcurl\n", __func__);
|
|
293
|
-
return false;
|
|
294
|
-
}
|
|
295
|
-
|
|
296
|
-
http_headers.ptr = curl_slist_append(http_headers.ptr, "User-Agent: llama-cpp");
|
|
297
|
-
// Check if hf-token or bearer-token was specified
|
|
298
|
-
if (!bearer_token.empty()) {
|
|
299
|
-
std::string auth_header = "Authorization: Bearer " + bearer_token;
|
|
300
|
-
http_headers.ptr = curl_slist_append(http_headers.ptr, auth_header.c_str());
|
|
301
|
-
}
|
|
302
|
-
|
|
303
|
-
curl_easy_setopt(curl, CURLOPT_HTTPHEADER, http_headers.ptr);
|
|
304
|
-
common_curl_easy_setopt_head(curl, url);
|
|
305
|
-
return common_curl_perf(curl) == CURLE_OK;
|
|
306
|
-
}
|
|
307
|
-
|
|
308
|
-
// download one single file from remote URL to local path
|
|
309
|
-
static bool common_download_file_single_online(const std::string & url,
|
|
310
|
-
const std::string & path,
|
|
311
|
-
const std::string & bearer_token) {
|
|
312
|
-
static const int max_attempts = 3;
|
|
313
|
-
static const int retry_delay_seconds = 2;
|
|
314
|
-
for (int i = 0; i < max_attempts; ++i) {
|
|
315
|
-
std::string etag;
|
|
316
|
-
|
|
317
|
-
// Check if the file already exists locally
|
|
318
|
-
const auto file_exists = std::filesystem::exists(path);
|
|
319
|
-
if (file_exists) {
|
|
320
|
-
etag = read_etag(path);
|
|
321
|
-
} else {
|
|
322
|
-
LOG_INF("%s: no previous model file found %s\n", __func__, path.c_str());
|
|
323
|
-
}
|
|
324
|
-
|
|
325
|
-
bool head_request_ok = false;
|
|
326
|
-
bool should_download = !file_exists; // by default, we should download if the file does not exist
|
|
327
|
-
|
|
328
|
-
// Initialize libcurl
|
|
329
|
-
curl_ptr curl(curl_easy_init(), &curl_easy_cleanup);
|
|
330
|
-
common_load_model_from_url_headers headers;
|
|
331
|
-
curl_easy_setopt(curl.get(), CURLOPT_HEADERDATA, &headers);
|
|
332
|
-
curl_slist_ptr http_headers;
|
|
333
|
-
const bool was_perform_successful = common_download_head(curl.get(), http_headers, url, bearer_token);
|
|
334
|
-
if (!was_perform_successful) {
|
|
335
|
-
head_request_ok = false;
|
|
336
|
-
}
|
|
337
|
-
|
|
338
|
-
long http_code = 0;
|
|
339
|
-
curl_easy_getinfo(curl.get(), CURLINFO_RESPONSE_CODE, &http_code);
|
|
340
|
-
if (http_code == 200) {
|
|
341
|
-
head_request_ok = true;
|
|
342
|
-
} else {
|
|
343
|
-
LOG_WRN("%s: HEAD invalid http status code received: %ld\n", __func__, http_code);
|
|
344
|
-
head_request_ok = false;
|
|
345
|
-
}
|
|
346
|
-
|
|
347
|
-
// if head_request_ok is false, we don't have the etag or last-modified headers
|
|
348
|
-
// we leave should_download as-is, which is true if the file does not exist
|
|
349
|
-
bool should_download_from_scratch = false;
|
|
350
|
-
if (head_request_ok) {
|
|
351
|
-
// check if ETag or Last-Modified headers are different
|
|
352
|
-
// if it is, we need to download the file again
|
|
353
|
-
if (!etag.empty() && etag != headers.etag) {
|
|
354
|
-
LOG_WRN("%s: ETag header is different (%s != %s): triggering a new download\n", __func__, etag.c_str(),
|
|
355
|
-
headers.etag.c_str());
|
|
356
|
-
should_download = true;
|
|
357
|
-
should_download_from_scratch = true;
|
|
358
|
-
}
|
|
359
|
-
}
|
|
360
|
-
|
|
361
|
-
const bool accept_ranges_supported = !headers.accept_ranges.empty() && headers.accept_ranges != "none";
|
|
362
|
-
if (should_download) {
|
|
363
|
-
if (file_exists &&
|
|
364
|
-
!accept_ranges_supported) { // Resumable downloads not supported, delete and start again.
|
|
365
|
-
LOG_WRN("%s: deleting previous downloaded file: %s\n", __func__, path.c_str());
|
|
366
|
-
if (remove(path.c_str()) != 0) {
|
|
367
|
-
LOG_ERR("%s: unable to delete file: %s\n", __func__, path.c_str());
|
|
368
|
-
return false;
|
|
369
|
-
}
|
|
370
|
-
}
|
|
371
|
-
|
|
372
|
-
const std::string path_temporary = path + ".downloadInProgress";
|
|
373
|
-
if (should_download_from_scratch) {
|
|
374
|
-
if (std::filesystem::exists(path_temporary)) {
|
|
375
|
-
if (remove(path_temporary.c_str()) != 0) {
|
|
376
|
-
LOG_ERR("%s: unable to delete file: %s\n", __func__, path_temporary.c_str());
|
|
377
|
-
return false;
|
|
378
|
-
}
|
|
379
|
-
}
|
|
380
|
-
|
|
381
|
-
if (std::filesystem::exists(path)) {
|
|
382
|
-
if (remove(path.c_str()) != 0) {
|
|
383
|
-
LOG_ERR("%s: unable to delete file: %s\n", __func__, path.c_str());
|
|
384
|
-
return false;
|
|
385
|
-
}
|
|
386
|
-
}
|
|
387
|
-
}
|
|
388
|
-
if (head_request_ok) {
|
|
389
|
-
write_etag(path, headers.etag);
|
|
390
|
-
}
|
|
391
|
-
|
|
392
|
-
// start the download
|
|
393
|
-
LOG_INF("%s: trying to download model from %s to %s (server_etag:%s, server_last_modified:%s)...\n",
|
|
394
|
-
__func__, llama_download_hide_password_in_url(url).c_str(), path_temporary.c_str(),
|
|
395
|
-
headers.etag.c_str(), headers.last_modified.c_str());
|
|
396
|
-
const bool was_pull_successful = common_pull_file(curl.get(), path_temporary);
|
|
397
|
-
if (!was_pull_successful) {
|
|
398
|
-
if (i + 1 < max_attempts) {
|
|
399
|
-
const int exponential_backoff_delay = std::pow(retry_delay_seconds, i) * 1000;
|
|
400
|
-
LOG_WRN("%s: retrying after %d milliseconds...\n", __func__, exponential_backoff_delay);
|
|
401
|
-
std::this_thread::sleep_for(std::chrono::milliseconds(exponential_backoff_delay));
|
|
402
|
-
} else {
|
|
403
|
-
LOG_ERR("%s: curl_easy_perform() failed after %d attempts\n", __func__, max_attempts);
|
|
404
|
-
}
|
|
405
|
-
|
|
406
|
-
continue;
|
|
407
|
-
}
|
|
408
|
-
|
|
409
|
-
long http_code = 0;
|
|
410
|
-
curl_easy_getinfo(curl.get(), CURLINFO_RESPONSE_CODE, &http_code);
|
|
411
|
-
if (http_code < 200 || http_code >= 400) {
|
|
412
|
-
LOG_ERR("%s: invalid http status code received: %ld\n", __func__, http_code);
|
|
413
|
-
return false;
|
|
414
|
-
}
|
|
415
|
-
|
|
416
|
-
if (rename(path_temporary.c_str(), path.c_str()) != 0) {
|
|
417
|
-
LOG_ERR("%s: unable to rename file: %s to %s\n", __func__, path_temporary.c_str(), path.c_str());
|
|
418
|
-
return false;
|
|
419
|
-
}
|
|
420
|
-
} else {
|
|
421
|
-
LOG_INF("%s: using cached file: %s\n", __func__, path.c_str());
|
|
422
|
-
}
|
|
423
|
-
|
|
424
|
-
break;
|
|
425
|
-
}
|
|
426
|
-
|
|
427
|
-
return true;
|
|
428
|
-
}
|
|
429
|
-
|
|
430
|
-
std::pair<long, std::vector<char>> common_remote_get_content(const std::string & url, const common_remote_params & params) {
|
|
431
|
-
curl_ptr curl(curl_easy_init(), &curl_easy_cleanup);
|
|
432
|
-
curl_slist_ptr http_headers;
|
|
433
|
-
std::vector<char> res_buffer;
|
|
434
|
-
|
|
435
|
-
curl_easy_setopt(curl.get(), CURLOPT_URL, url.c_str());
|
|
436
|
-
curl_easy_setopt(curl.get(), CURLOPT_NOPROGRESS, 1L);
|
|
437
|
-
curl_easy_setopt(curl.get(), CURLOPT_FOLLOWLOCATION, 1L);
|
|
438
|
-
curl_easy_setopt(curl.get(), CURLOPT_VERBOSE, 0L);
|
|
439
|
-
typedef size_t(*CURLOPT_WRITEFUNCTION_PTR)(void * ptr, size_t size, size_t nmemb, void * data);
|
|
440
|
-
auto write_callback = [](void * ptr, size_t size, size_t nmemb, void * data) -> size_t {
|
|
441
|
-
auto data_vec = static_cast<std::vector<char> *>(data);
|
|
442
|
-
data_vec->insert(data_vec->end(), (char *)ptr, (char *)ptr + size * nmemb);
|
|
443
|
-
return size * nmemb;
|
|
444
|
-
};
|
|
445
|
-
curl_easy_setopt(curl.get(), CURLOPT_WRITEFUNCTION, static_cast<CURLOPT_WRITEFUNCTION_PTR>(write_callback));
|
|
446
|
-
curl_easy_setopt(curl.get(), CURLOPT_WRITEDATA, &res_buffer);
|
|
447
|
-
#if defined(_WIN32)
|
|
448
|
-
curl_easy_setopt(curl.get(), CURLOPT_SSL_OPTIONS, CURLSSLOPT_NATIVE_CA);
|
|
449
|
-
#endif
|
|
450
|
-
if (params.timeout > 0) {
|
|
451
|
-
curl_easy_setopt(curl.get(), CURLOPT_TIMEOUT, params.timeout);
|
|
452
|
-
}
|
|
453
|
-
if (params.max_size > 0) {
|
|
454
|
-
curl_easy_setopt(curl.get(), CURLOPT_MAXFILESIZE, params.max_size);
|
|
455
|
-
}
|
|
456
|
-
http_headers.ptr = curl_slist_append(http_headers.ptr, "User-Agent: llama-cpp");
|
|
457
|
-
for (const auto & header : params.headers) {
|
|
458
|
-
http_headers.ptr = curl_slist_append(http_headers.ptr, header.c_str());
|
|
459
|
-
}
|
|
460
|
-
curl_easy_setopt(curl.get(), CURLOPT_HTTPHEADER, http_headers.ptr);
|
|
461
|
-
|
|
462
|
-
CURLcode res = curl_easy_perform(curl.get());
|
|
463
|
-
|
|
464
|
-
if (res != CURLE_OK) {
|
|
465
|
-
std::string error_msg = curl_easy_strerror(res);
|
|
466
|
-
throw std::runtime_error("error: cannot make GET request: " + error_msg);
|
|
161
|
+
std::pair<std::string, std::string> common_download_split_repo_tag(const std::string & hf_repo_with_tag) {
|
|
162
|
+
auto parts = string_split<std::string>(hf_repo_with_tag, ':');
|
|
163
|
+
std::string tag = parts.size() > 1 ? parts.back() : "latest";
|
|
164
|
+
std::string hf_repo = parts[0];
|
|
165
|
+
if (string_split<std::string>(hf_repo, '/').size() != 2) {
|
|
166
|
+
throw std::invalid_argument("error: invalid HF repo format, expected <user>/<model>[:quant]\n");
|
|
467
167
|
}
|
|
468
|
-
|
|
469
|
-
long res_code;
|
|
470
|
-
curl_easy_getinfo(curl.get(), CURLINFO_RESPONSE_CODE, &res_code);
|
|
471
|
-
|
|
472
|
-
return { res_code, std::move(res_buffer) };
|
|
168
|
+
return {hf_repo, tag};
|
|
473
169
|
}
|
|
474
170
|
|
|
475
|
-
#
|
|
171
|
+
#if defined(LLAMA_USE_HTTPLIB)
|
|
476
172
|
|
|
477
173
|
class ProgressBar {
|
|
478
174
|
static inline std::mutex mutex;
|
|
@@ -617,9 +313,11 @@ static bool common_pull_file(httplib::Client & cli,
|
|
|
617
313
|
}
|
|
618
314
|
|
|
619
315
|
// download one single file from remote URL to local path
|
|
620
|
-
|
|
316
|
+
// returns status code or -1 on error
|
|
317
|
+
static int common_download_file_single_online(const std::string & url,
|
|
621
318
|
const std::string & path,
|
|
622
|
-
const std::string & bearer_token
|
|
319
|
+
const std::string & bearer_token,
|
|
320
|
+
const common_header_list & custom_headers) {
|
|
623
321
|
static const int max_attempts = 3;
|
|
624
322
|
static const int retry_delay_seconds = 2;
|
|
625
323
|
|
|
@@ -629,6 +327,9 @@ static bool common_download_file_single_online(const std::string & url,
|
|
|
629
327
|
if (!bearer_token.empty()) {
|
|
630
328
|
default_headers.insert({"Authorization", "Bearer " + bearer_token});
|
|
631
329
|
}
|
|
330
|
+
for (const auto & h : custom_headers) {
|
|
331
|
+
default_headers.emplace(h.first, h.second);
|
|
332
|
+
}
|
|
632
333
|
cli.set_default_headers(default_headers);
|
|
633
334
|
|
|
634
335
|
const bool file_exists = std::filesystem::exists(path);
|
|
@@ -647,8 +348,10 @@ static bool common_download_file_single_online(const std::string & url,
|
|
|
647
348
|
LOG_WRN("%s: HEAD invalid http status code received: %d\n", __func__, head ? head->status : -1);
|
|
648
349
|
if (file_exists) {
|
|
649
350
|
LOG_INF("%s: Using cached file (HEAD failed): %s\n", __func__, path.c_str());
|
|
650
|
-
return
|
|
351
|
+
return 304; // 304 Not Modified - fake cached response
|
|
651
352
|
}
|
|
353
|
+
return head->status; // cannot use cached file, return raw status code
|
|
354
|
+
// TODO: maybe retry only on certain codes
|
|
652
355
|
}
|
|
653
356
|
|
|
654
357
|
std::string etag;
|
|
@@ -680,12 +383,12 @@ static bool common_download_file_single_online(const std::string & url,
|
|
|
680
383
|
if (file_exists) {
|
|
681
384
|
if (!should_download_from_scratch) {
|
|
682
385
|
LOG_INF("%s: using cached file: %s\n", __func__, path.c_str());
|
|
683
|
-
return
|
|
386
|
+
return 304; // 304 Not Modified - fake cached response
|
|
684
387
|
}
|
|
685
388
|
LOG_WRN("%s: deleting previous downloaded file: %s\n", __func__, path.c_str());
|
|
686
389
|
if (remove(path.c_str()) != 0) {
|
|
687
390
|
LOG_ERR("%s: unable to delete file: %s\n", __func__, path.c_str());
|
|
688
|
-
return
|
|
391
|
+
return -1;
|
|
689
392
|
}
|
|
690
393
|
}
|
|
691
394
|
|
|
@@ -697,7 +400,7 @@ static bool common_download_file_single_online(const std::string & url,
|
|
|
697
400
|
existing_size = std::filesystem::file_size(path_temporary);
|
|
698
401
|
} else if (remove(path_temporary.c_str()) != 0) {
|
|
699
402
|
LOG_ERR("%s: unable to delete file: %s\n", __func__, path_temporary.c_str());
|
|
700
|
-
return
|
|
403
|
+
return -1;
|
|
701
404
|
}
|
|
702
405
|
}
|
|
703
406
|
|
|
@@ -718,15 +421,16 @@ static bool common_download_file_single_online(const std::string & url,
|
|
|
718
421
|
|
|
719
422
|
if (std::rename(path_temporary.c_str(), path.c_str()) != 0) {
|
|
720
423
|
LOG_ERR("%s: unable to rename file: %s to %s\n", __func__, path_temporary.c_str(), path.c_str());
|
|
721
|
-
return
|
|
424
|
+
return -1;
|
|
722
425
|
}
|
|
723
426
|
if (!etag.empty()) {
|
|
724
427
|
write_etag(path, etag);
|
|
725
428
|
}
|
|
726
|
-
|
|
429
|
+
|
|
430
|
+
return head->status; // TODO: use actual GET status?
|
|
727
431
|
}
|
|
728
432
|
|
|
729
|
-
return
|
|
433
|
+
return -1; // max attempts reached
|
|
730
434
|
}
|
|
731
435
|
|
|
732
436
|
std::pair<long, std::vector<char>> common_remote_get_content(const std::string & url,
|
|
@@ -734,13 +438,9 @@ std::pair<long, std::vector<char>> common_remote_get_content(const std::string
|
|
|
734
438
|
auto [cli, parts] = common_http_client(url);
|
|
735
439
|
|
|
736
440
|
httplib::Headers headers = {{"User-Agent", "llama-cpp"}};
|
|
441
|
+
|
|
737
442
|
for (const auto & header : params.headers) {
|
|
738
|
-
|
|
739
|
-
if (pos != std::string::npos) {
|
|
740
|
-
headers.emplace(header.substr(0, pos), header.substr(pos + 1));
|
|
741
|
-
} else {
|
|
742
|
-
headers.emplace(header, "");
|
|
743
|
-
}
|
|
443
|
+
headers.emplace(header.first, header.second);
|
|
744
444
|
}
|
|
745
445
|
|
|
746
446
|
if (params.timeout > 0) {
|
|
@@ -765,36 +465,45 @@ std::pair<long, std::vector<char>> common_remote_get_content(const std::string
|
|
|
765
465
|
return { res->status, std::move(buf) };
|
|
766
466
|
}
|
|
767
467
|
|
|
768
|
-
|
|
769
|
-
|
|
770
|
-
|
|
771
|
-
|
|
772
|
-
|
|
773
|
-
const std::string & path,
|
|
774
|
-
const std::string & bearer_token,
|
|
775
|
-
bool offline) {
|
|
468
|
+
int common_download_file_single(const std::string & url,
|
|
469
|
+
const std::string & path,
|
|
470
|
+
const std::string & bearer_token,
|
|
471
|
+
bool offline,
|
|
472
|
+
const common_header_list & headers) {
|
|
776
473
|
if (!offline) {
|
|
777
|
-
return common_download_file_single_online(url, path, bearer_token);
|
|
474
|
+
return common_download_file_single_online(url, path, bearer_token, headers);
|
|
778
475
|
}
|
|
779
476
|
|
|
780
477
|
if (!std::filesystem::exists(path)) {
|
|
781
478
|
LOG_ERR("%s: required file is not available in cache (offline mode): %s\n", __func__, path.c_str());
|
|
782
|
-
return
|
|
479
|
+
return -1;
|
|
783
480
|
}
|
|
784
481
|
|
|
785
482
|
LOG_INF("%s: using cached file (offline mode): %s\n", __func__, path.c_str());
|
|
786
|
-
return
|
|
483
|
+
return 304; // Not Modified - fake cached response
|
|
787
484
|
}
|
|
788
485
|
|
|
789
486
|
// download multiple files from remote URLs to local paths
|
|
790
487
|
// the input is a vector of pairs <url, path>
|
|
791
|
-
static bool common_download_file_multiple(const std::vector<std::pair<std::string, std::string>> & urls,
|
|
488
|
+
static bool common_download_file_multiple(const std::vector<std::pair<std::string, std::string>> & urls,
|
|
489
|
+
const std::string & bearer_token,
|
|
490
|
+
bool offline,
|
|
491
|
+
const common_header_list & headers) {
|
|
792
492
|
// Prepare download in parallel
|
|
793
493
|
std::vector<std::future<bool>> futures_download;
|
|
494
|
+
futures_download.reserve(urls.size());
|
|
495
|
+
|
|
794
496
|
for (auto const & item : urls) {
|
|
795
|
-
futures_download.push_back(
|
|
796
|
-
|
|
797
|
-
|
|
497
|
+
futures_download.push_back(
|
|
498
|
+
std::async(
|
|
499
|
+
std::launch::async,
|
|
500
|
+
[&bearer_token, offline, &headers](const std::pair<std::string, std::string> & it) -> bool {
|
|
501
|
+
const int http_status = common_download_file_single(it.first, it.second, bearer_token, offline, headers);
|
|
502
|
+
return is_http_status_ok(http_status);
|
|
503
|
+
},
|
|
504
|
+
item
|
|
505
|
+
)
|
|
506
|
+
);
|
|
798
507
|
}
|
|
799
508
|
|
|
800
509
|
// Wait for all downloads to complete
|
|
@@ -807,17 +516,18 @@ static bool common_download_file_multiple(const std::vector<std::pair<std::strin
|
|
|
807
516
|
return true;
|
|
808
517
|
}
|
|
809
518
|
|
|
810
|
-
bool common_download_model(
|
|
811
|
-
|
|
812
|
-
|
|
813
|
-
|
|
519
|
+
bool common_download_model(const common_params_model & model,
|
|
520
|
+
const std::string & bearer_token,
|
|
521
|
+
bool offline,
|
|
522
|
+
const common_header_list & headers) {
|
|
814
523
|
// Basic validation of the model.url
|
|
815
524
|
if (model.url.empty()) {
|
|
816
525
|
LOG_ERR("%s: invalid model url\n", __func__);
|
|
817
526
|
return false;
|
|
818
527
|
}
|
|
819
528
|
|
|
820
|
-
|
|
529
|
+
const int http_status = common_download_file_single(model.url, model.path, bearer_token, offline, headers);
|
|
530
|
+
if (!is_http_status_ok(http_status)) {
|
|
821
531
|
return false;
|
|
822
532
|
}
|
|
823
533
|
|
|
@@ -876,27 +586,26 @@ bool common_download_model(
|
|
|
876
586
|
}
|
|
877
587
|
|
|
878
588
|
// Download in parallel
|
|
879
|
-
common_download_file_multiple(urls, bearer_token, offline);
|
|
589
|
+
common_download_file_multiple(urls, bearer_token, offline, headers);
|
|
880
590
|
}
|
|
881
591
|
|
|
882
592
|
return true;
|
|
883
593
|
}
|
|
884
594
|
|
|
885
|
-
common_hf_file_res common_get_hf_file(const std::string & hf_repo_with_tag,
|
|
886
|
-
|
|
887
|
-
|
|
888
|
-
|
|
889
|
-
|
|
890
|
-
|
|
891
|
-
}
|
|
595
|
+
common_hf_file_res common_get_hf_file(const std::string & hf_repo_with_tag,
|
|
596
|
+
const std::string & bearer_token,
|
|
597
|
+
bool offline,
|
|
598
|
+
const common_header_list & custom_headers) {
|
|
599
|
+
// the returned hf_repo is without tag
|
|
600
|
+
auto [hf_repo, tag] = common_download_split_repo_tag(hf_repo_with_tag);
|
|
892
601
|
|
|
893
602
|
std::string url = get_model_endpoint() + "v2/" + hf_repo + "/manifests/" + tag;
|
|
894
603
|
|
|
895
604
|
// headers
|
|
896
|
-
|
|
897
|
-
headers.push_back("Accept
|
|
605
|
+
common_header_list headers = custom_headers;
|
|
606
|
+
headers.push_back({"Accept", "application/json"});
|
|
898
607
|
if (!bearer_token.empty()) {
|
|
899
|
-
headers.push_back("Authorization
|
|
608
|
+
headers.push_back({"Authorization", "Bearer " + bearer_token});
|
|
900
609
|
}
|
|
901
610
|
// Important: the User-Agent must be "llama-cpp" to get the "ggufFile" field in the response
|
|
902
611
|
// User-Agent header is already set in common_remote_get_content, no need to set it here
|
|
@@ -952,7 +661,7 @@ common_hf_file_res common_get_hf_file(const std::string & hf_repo_with_tag, cons
|
|
|
952
661
|
} else if (res_code == 401) {
|
|
953
662
|
throw std::runtime_error("error: model is private or does not exist; if you are accessing a gated model, please provide a valid HF token");
|
|
954
663
|
} else {
|
|
955
|
-
throw std::runtime_error(string_format("error from HF API, response code: %ld, data: %s", res_code, res_str.c_str()));
|
|
664
|
+
throw std::runtime_error(string_format("error from HF API (%s), response code: %ld, data: %s", url.c_str(), res_code, res_str.c_str()));
|
|
956
665
|
}
|
|
957
666
|
|
|
958
667
|
// check response
|
|
@@ -1031,9 +740,10 @@ std::string common_docker_resolve_model(const std::string & docker) {
|
|
|
1031
740
|
const std::string url_prefix = "https://registry-1.docker.io/v2/" + repo;
|
|
1032
741
|
std::string manifest_url = url_prefix + "/manifests/" + tag;
|
|
1033
742
|
common_remote_params manifest_params;
|
|
1034
|
-
manifest_params.headers.push_back("Authorization
|
|
1035
|
-
manifest_params.headers.push_back(
|
|
1036
|
-
"
|
|
743
|
+
manifest_params.headers.push_back({"Authorization", "Bearer " + token});
|
|
744
|
+
manifest_params.headers.push_back({"Accept",
|
|
745
|
+
"application/vnd.docker.distribution.manifest.v2+json,application/vnd.oci.image.manifest.v1+json"
|
|
746
|
+
});
|
|
1037
747
|
auto manifest_res = common_remote_get_content(manifest_url, manifest_params);
|
|
1038
748
|
if (manifest_res.first != 200) {
|
|
1039
749
|
throw std::runtime_error("Failed to get Docker manifest, HTTP code: " + std::to_string(manifest_res.first));
|
|
@@ -1070,7 +780,8 @@ std::string common_docker_resolve_model(const std::string & docker) {
|
|
|
1070
780
|
std::string local_path = fs_get_cache_file(model_filename);
|
|
1071
781
|
|
|
1072
782
|
const std::string blob_url = url_prefix + "/blobs/" + gguf_digest;
|
|
1073
|
-
|
|
783
|
+
const int http_status = common_download_file_single(blob_url, local_path, token, false, {});
|
|
784
|
+
if (!is_http_status_ok(http_status)) {
|
|
1074
785
|
throw std::runtime_error("Failed to download Docker Model");
|
|
1075
786
|
}
|
|
1076
787
|
|
|
@@ -1084,11 +795,11 @@ std::string common_docker_resolve_model(const std::string & docker) {
|
|
|
1084
795
|
|
|
1085
796
|
#else
|
|
1086
797
|
|
|
1087
|
-
common_hf_file_res common_get_hf_file(const std::string &, const std::string &, bool) {
|
|
798
|
+
common_hf_file_res common_get_hf_file(const std::string &, const std::string &, bool, const common_header_list &) {
|
|
1088
799
|
throw std::runtime_error("download functionality is not enabled in this build");
|
|
1089
800
|
}
|
|
1090
801
|
|
|
1091
|
-
bool common_download_model(const common_params_model &, const std::string &, bool) {
|
|
802
|
+
bool common_download_model(const common_params_model &, const std::string &, bool, const common_header_list &) {
|
|
1092
803
|
throw std::runtime_error("download functionality is not enabled in this build");
|
|
1093
804
|
}
|
|
1094
805
|
|
|
@@ -1096,7 +807,15 @@ std::string common_docker_resolve_model(const std::string &) {
|
|
|
1096
807
|
throw std::runtime_error("download functionality is not enabled in this build");
|
|
1097
808
|
}
|
|
1098
809
|
|
|
1099
|
-
|
|
810
|
+
int common_download_file_single(const std::string &,
|
|
811
|
+
const std::string &,
|
|
812
|
+
const std::string &,
|
|
813
|
+
bool,
|
|
814
|
+
const common_header_list &) {
|
|
815
|
+
throw std::runtime_error("download functionality is not enabled in this build");
|
|
816
|
+
}
|
|
817
|
+
|
|
818
|
+
#endif // defined(LLAMA_USE_HTTPLIB)
|
|
1100
819
|
|
|
1101
820
|
std::vector<common_cached_model_info> common_list_cached_models() {
|
|
1102
821
|
std::vector<common_cached_model_info> models;
|