@fugood/llama.node 1.2.1 → 1.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +14 -14
- package/src/llama.cpp/common/arg.cpp +266 -202
- package/src/llama.cpp/common/chat.cpp +27 -15
- package/src/llama.cpp/ggml/CMakeLists.txt +37 -21
- package/src/llama.cpp/ggml/src/CMakeLists.txt +3 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/amx/amx.cpp +4 -2
- package/src/llama.cpp/ggml/src/ggml-cpu/common.h +14 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +1 -1
- package/src/llama.cpp/ggml/src/ggml-cpu/ops.cpp +74 -851
- package/src/llama.cpp/src/llama-cparams.h +1 -1
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@fugood/llama.node",
|
|
3
3
|
"access": "public",
|
|
4
|
-
"version": "1.2.
|
|
4
|
+
"version": "1.2.2",
|
|
5
5
|
"description": "An another Node binding of llama.cpp",
|
|
6
6
|
"main": "lib/index.js",
|
|
7
7
|
"scripts": {
|
|
@@ -72,19 +72,19 @@
|
|
|
72
72
|
"CMakeLists.txt"
|
|
73
73
|
],
|
|
74
74
|
"optionalDependencies": {
|
|
75
|
-
"@fugood/node-llama-linux-x64": "1.2.
|
|
76
|
-
"@fugood/node-llama-linux-x64-vulkan": "1.2.
|
|
77
|
-
"@fugood/node-llama-linux-x64-cuda": "1.2.
|
|
78
|
-
"@fugood/node-llama-linux-arm64": "1.2.
|
|
79
|
-
"@fugood/node-llama-linux-arm64-vulkan": "1.2.
|
|
80
|
-
"@fugood/node-llama-linux-arm64-cuda": "1.2.
|
|
81
|
-
"@fugood/node-llama-win32-x64": "1.2.
|
|
82
|
-
"@fugood/node-llama-win32-x64-vulkan": "1.2.
|
|
83
|
-
"@fugood/node-llama-win32-x64-cuda": "1.2.
|
|
84
|
-
"@fugood/node-llama-win32-arm64": "1.2.
|
|
85
|
-
"@fugood/node-llama-win32-arm64-vulkan": "1.2.
|
|
86
|
-
"@fugood/node-llama-darwin-x64": "1.2.
|
|
87
|
-
"@fugood/node-llama-darwin-arm64": "1.2.
|
|
75
|
+
"@fugood/node-llama-linux-x64": "1.2.2",
|
|
76
|
+
"@fugood/node-llama-linux-x64-vulkan": "1.2.2",
|
|
77
|
+
"@fugood/node-llama-linux-x64-cuda": "1.2.2",
|
|
78
|
+
"@fugood/node-llama-linux-arm64": "1.2.2",
|
|
79
|
+
"@fugood/node-llama-linux-arm64-vulkan": "1.2.2",
|
|
80
|
+
"@fugood/node-llama-linux-arm64-cuda": "1.2.2",
|
|
81
|
+
"@fugood/node-llama-win32-x64": "1.2.2",
|
|
82
|
+
"@fugood/node-llama-win32-x64-vulkan": "1.2.2",
|
|
83
|
+
"@fugood/node-llama-win32-x64-cuda": "1.2.2",
|
|
84
|
+
"@fugood/node-llama-win32-arm64": "1.2.2",
|
|
85
|
+
"@fugood/node-llama-win32-arm64-vulkan": "1.2.2",
|
|
86
|
+
"@fugood/node-llama-darwin-x64": "1.2.2",
|
|
87
|
+
"@fugood/node-llama-darwin-arm64": "1.2.2"
|
|
88
88
|
},
|
|
89
89
|
"devDependencies": {
|
|
90
90
|
"@babel/preset-env": "^7.24.4",
|
|
@@ -57,12 +57,32 @@ static std::string read_file(const std::string & fname) {
|
|
|
57
57
|
}
|
|
58
58
|
|
|
59
59
|
static void write_file(const std::string & fname, const std::string & content) {
|
|
60
|
-
std::
|
|
60
|
+
const std::string fname_tmp = fname + ".tmp";
|
|
61
|
+
std::ofstream file(fname_tmp);
|
|
61
62
|
if (!file) {
|
|
62
63
|
throw std::runtime_error(string_format("error: failed to open file '%s'\n", fname.c_str()));
|
|
63
64
|
}
|
|
64
|
-
|
|
65
|
-
|
|
65
|
+
|
|
66
|
+
try {
|
|
67
|
+
file << content;
|
|
68
|
+
file.close();
|
|
69
|
+
|
|
70
|
+
// Makes write atomic
|
|
71
|
+
if (rename(fname_tmp.c_str(), fname.c_str()) != 0) {
|
|
72
|
+
LOG_ERR("%s: unable to rename file: %s to %s\n", __func__, fname_tmp.c_str(), fname.c_str());
|
|
73
|
+
// If rename fails, try to delete the temporary file
|
|
74
|
+
if (remove(fname_tmp.c_str()) != 0) {
|
|
75
|
+
LOG_ERR("%s: unable to delete temporary file: %s\n", __func__, fname_tmp.c_str());
|
|
76
|
+
}
|
|
77
|
+
}
|
|
78
|
+
} catch (...) {
|
|
79
|
+
// If anything fails, try to delete the temporary file
|
|
80
|
+
if (remove(fname_tmp.c_str()) != 0) {
|
|
81
|
+
LOG_ERR("%s: unable to delete temporary file: %s\n", __func__, fname_tmp.c_str());
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
throw std::runtime_error(string_format("error: failed to write file '%s'\n", fname.c_str()));
|
|
85
|
+
}
|
|
66
86
|
}
|
|
67
87
|
|
|
68
88
|
common_arg & common_arg::set_examples(std::initializer_list<enum llama_example> examples) {
|
|
@@ -217,250 +237,294 @@ struct curl_slist_ptr {
|
|
|
217
237
|
}
|
|
218
238
|
};
|
|
219
239
|
|
|
220
|
-
|
|
221
|
-
|
|
240
|
+
static CURLcode common_curl_perf(CURL * curl) {
|
|
241
|
+
CURLcode res = curl_easy_perform(curl);
|
|
242
|
+
if (res != CURLE_OK) {
|
|
243
|
+
LOG_ERR("%s: curl_easy_perform() failed\n", __func__);
|
|
244
|
+
}
|
|
245
|
+
|
|
246
|
+
return res;
|
|
247
|
+
}
|
|
248
|
+
|
|
249
|
+
// Send a HEAD request to retrieve the etag and last-modified headers
|
|
250
|
+
struct common_load_model_from_url_headers {
|
|
251
|
+
std::string etag;
|
|
252
|
+
std::string last_modified;
|
|
253
|
+
std::string accept_ranges;
|
|
254
|
+
};
|
|
222
255
|
|
|
223
|
-
|
|
224
|
-
|
|
256
|
+
struct FILE_deleter {
|
|
257
|
+
void operator()(FILE * f) const { fclose(f); }
|
|
258
|
+
};
|
|
225
259
|
|
|
226
|
-
|
|
227
|
-
|
|
260
|
+
static size_t common_header_callback(char * buffer, size_t, size_t n_items, void * userdata) {
|
|
261
|
+
common_load_model_from_url_headers * headers = (common_load_model_from_url_headers *) userdata;
|
|
262
|
+
static std::regex header_regex("([^:]+): (.*)\r\n");
|
|
263
|
+
static std::regex etag_regex("ETag", std::regex_constants::icase);
|
|
264
|
+
static std::regex last_modified_regex("Last-Modified", std::regex_constants::icase);
|
|
265
|
+
static std::regex accept_ranges_regex("Accept-Ranges", std::regex_constants::icase);
|
|
266
|
+
std::string header(buffer, n_items);
|
|
267
|
+
std::smatch match;
|
|
268
|
+
if (std::regex_match(header, match, header_regex)) {
|
|
269
|
+
const std::string & key = match[1];
|
|
270
|
+
const std::string & value = match[2];
|
|
271
|
+
if (std::regex_match(key, match, etag_regex)) {
|
|
272
|
+
headers->etag = value;
|
|
273
|
+
} else if (std::regex_match(key, match, last_modified_regex)) {
|
|
274
|
+
headers->last_modified = value;
|
|
275
|
+
} else if (std::regex_match(key, match, accept_ranges_regex)) {
|
|
276
|
+
headers->accept_ranges = value;
|
|
277
|
+
}
|
|
278
|
+
}
|
|
279
|
+
|
|
280
|
+
return n_items;
|
|
281
|
+
}
|
|
228
282
|
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
}
|
|
283
|
+
static size_t common_write_callback(void * data, size_t size, size_t nmemb, void * fd) {
|
|
284
|
+
return std::fwrite(data, size, nmemb, static_cast<FILE *>(fd));
|
|
285
|
+
}
|
|
233
286
|
|
|
234
|
-
|
|
235
|
-
|
|
287
|
+
// helper function to hide password in URL
|
|
288
|
+
static std::string llama_download_hide_password_in_url(const std::string & url) {
|
|
289
|
+
// Use regex to match and replace the user[:password]@ pattern in URLs
|
|
290
|
+
// Pattern: scheme://[user[:password]@]host[...]
|
|
291
|
+
static const std::regex url_regex(R"(^(?:[A-Za-z][A-Za-z0-9+.-]://)(?:[^/@]+@)?.$)");
|
|
292
|
+
std::smatch match;
|
|
236
293
|
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
294
|
+
if (std::regex_match(url, match, url_regex)) {
|
|
295
|
+
// match[1] = scheme (e.g., "https://")
|
|
296
|
+
// match[2] = user[:password]@ part
|
|
297
|
+
// match[3] = rest of URL (host and path)
|
|
298
|
+
return match[1].str() + "********@" + match[3].str();
|
|
240
299
|
}
|
|
241
300
|
|
|
242
|
-
|
|
301
|
+
return url; // No credentials found or malformed URL
|
|
302
|
+
}
|
|
243
303
|
|
|
244
|
-
|
|
304
|
+
static void common_curl_easy_setopt_head(CURL * curl, const std::string & url) {
|
|
305
|
+
// Set the URL, allow to follow http redirection
|
|
306
|
+
curl_easy_setopt(curl, CURLOPT_URL, url.c_str());
|
|
307
|
+
curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1L);
|
|
308
|
+
|
|
309
|
+
# if defined(_WIN32)
|
|
310
|
+
// CURLSSLOPT_NATIVE_CA tells libcurl to use standard certificate store of
|
|
311
|
+
// operating system. Currently implemented under MS-Windows.
|
|
312
|
+
curl_easy_setopt(curl, CURLOPT_SSL_OPTIONS, CURLSSLOPT_NATIVE_CA);
|
|
313
|
+
# endif
|
|
314
|
+
|
|
315
|
+
curl_easy_setopt(curl, CURLOPT_NOBODY, 1L); // will trigger the HEAD verb
|
|
316
|
+
curl_easy_setopt(curl, CURLOPT_NOPROGRESS, 1L); // hide head request progress
|
|
317
|
+
curl_easy_setopt(curl, CURLOPT_HEADERFUNCTION, common_header_callback);
|
|
245
318
|
}
|
|
246
319
|
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
auto file_exists = std::filesystem::exists(path);
|
|
320
|
+
static void common_curl_easy_setopt_get(CURL * curl) {
|
|
321
|
+
curl_easy_setopt(curl, CURLOPT_NOBODY, 0L);
|
|
322
|
+
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, common_write_callback);
|
|
251
323
|
|
|
252
|
-
//
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
std::string etag;
|
|
256
|
-
std::string last_modified;
|
|
324
|
+
// display download progress
|
|
325
|
+
curl_easy_setopt(curl, CURLOPT_NOPROGRESS, 0L);
|
|
326
|
+
}
|
|
257
327
|
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
std::ifstream metadata_in(metadata_path);
|
|
265
|
-
if (metadata_in.good()) {
|
|
266
|
-
try {
|
|
267
|
-
metadata_in >> metadata;
|
|
268
|
-
LOG_DBG("%s: previous metadata file found %s: %s\n", __func__, metadata_path.c_str(), metadata.dump().c_str());
|
|
269
|
-
if (metadata.contains("etag") && metadata.at("etag").is_string()) {
|
|
270
|
-
etag = metadata.at("etag");
|
|
271
|
-
}
|
|
272
|
-
if (metadata.contains("lastModified") && metadata.at("lastModified").is_string()) {
|
|
273
|
-
last_modified = metadata.at("lastModified");
|
|
274
|
-
}
|
|
275
|
-
} catch (const nlohmann::json::exception & e) {
|
|
276
|
-
LOG_ERR("%s: error reading metadata file %s: %s\n", __func__, metadata_path.c_str(), e.what());
|
|
277
|
-
}
|
|
278
|
-
}
|
|
279
|
-
// if we cannot open the metadata file, we assume that the downloaded file is not valid (etag and last-modified are left empty, so we will download it again)
|
|
280
|
-
} else {
|
|
281
|
-
if (offline) {
|
|
282
|
-
LOG_ERR("%s: required file is not available in cache (offline mode): %s\n", __func__, path.c_str());
|
|
283
|
-
return false;
|
|
284
|
-
}
|
|
285
|
-
LOG_INF("%s: no previous model file found %s\n", __func__, path.c_str());
|
|
328
|
+
static bool common_pull_file(CURL * curl, const std::string & path_temporary) {
|
|
329
|
+
if (std::filesystem::exists(path_temporary)) {
|
|
330
|
+
const std::string partial_size = std::to_string(std::filesystem::file_size(path_temporary));
|
|
331
|
+
LOG_INF("%s: server supports range requests, resuming download from byte %s\n", __func__, partial_size.c_str());
|
|
332
|
+
const std::string range_str = partial_size + "-";
|
|
333
|
+
curl_easy_setopt(curl, CURLOPT_RANGE, range_str.c_str());
|
|
286
334
|
}
|
|
287
335
|
|
|
288
|
-
//
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
336
|
+
// Always open file in append mode could be resuming
|
|
337
|
+
std::unique_ptr<FILE, FILE_deleter> outfile(fopen(path_temporary.c_str(), "ab"));
|
|
338
|
+
if (!outfile) {
|
|
339
|
+
LOG_ERR("%s: error opening local file for writing: %s\n", __func__, path_temporary.c_str());
|
|
340
|
+
return false;
|
|
341
|
+
}
|
|
293
342
|
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
bool should_download = !file_exists; // by default, we should download if the file does not exist
|
|
343
|
+
common_curl_easy_setopt_get(curl);
|
|
344
|
+
curl_easy_setopt(curl, CURLOPT_WRITEDATA, outfile.get());
|
|
297
345
|
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
346
|
+
return common_curl_perf(curl) == CURLE_OK;
|
|
347
|
+
}
|
|
348
|
+
|
|
349
|
+
static bool common_download_head(CURL * curl,
|
|
350
|
+
curl_slist_ptr & http_headers,
|
|
351
|
+
const std::string & url,
|
|
352
|
+
const std::string & bearer_token) {
|
|
301
353
|
if (!curl) {
|
|
302
354
|
LOG_ERR("%s: error initializing libcurl\n", __func__);
|
|
303
355
|
return false;
|
|
304
356
|
}
|
|
305
357
|
|
|
306
|
-
// Set the URL, allow to follow http redirection
|
|
307
|
-
curl_easy_setopt(curl.get(), CURLOPT_URL, url.c_str());
|
|
308
|
-
curl_easy_setopt(curl.get(), CURLOPT_FOLLOWLOCATION, 1L);
|
|
309
|
-
|
|
310
358
|
http_headers.ptr = curl_slist_append(http_headers.ptr, "User-Agent: llama-cpp");
|
|
311
359
|
// Check if hf-token or bearer-token was specified
|
|
312
360
|
if (!bearer_token.empty()) {
|
|
313
361
|
std::string auth_header = "Authorization: Bearer " + bearer_token;
|
|
314
|
-
http_headers.ptr
|
|
362
|
+
http_headers.ptr = curl_slist_append(http_headers.ptr, auth_header.c_str());
|
|
315
363
|
}
|
|
316
|
-
curl_easy_setopt(curl.get(), CURLOPT_HTTPHEADER, http_headers.ptr);
|
|
317
|
-
|
|
318
|
-
#if defined(_WIN32)
|
|
319
|
-
// CURLSSLOPT_NATIVE_CA tells libcurl to use standard certificate store of
|
|
320
|
-
// operating system. Currently implemented under MS-Windows.
|
|
321
|
-
curl_easy_setopt(curl.get(), CURLOPT_SSL_OPTIONS, CURLSSLOPT_NATIVE_CA);
|
|
322
|
-
#endif
|
|
323
364
|
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
static std::regex header_regex("([^:]+): (.*)\r\n");
|
|
329
|
-
static std::regex etag_regex("ETag", std::regex_constants::icase);
|
|
330
|
-
static std::regex last_modified_regex("Last-Modified", std::regex_constants::icase);
|
|
365
|
+
curl_easy_setopt(curl, CURLOPT_HTTPHEADER, http_headers.ptr);
|
|
366
|
+
common_curl_easy_setopt_head(curl, url);
|
|
367
|
+
return common_curl_perf(curl) == CURLE_OK;
|
|
368
|
+
}
|
|
331
369
|
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
370
|
+
// download one single file from remote URL to local path
|
|
371
|
+
static bool common_download_file_single(const std::string & url,
|
|
372
|
+
const std::string & path,
|
|
373
|
+
const std::string & bearer_token,
|
|
374
|
+
bool offline) {
|
|
375
|
+
// If the file exists, check its JSON metadata companion file.
|
|
376
|
+
std::string metadata_path = path + ".json";
|
|
377
|
+
static const int max_attempts = 3;
|
|
378
|
+
static const int retry_delay_seconds = 2;
|
|
379
|
+
for (int i = 0; i < max_attempts; ++i) {
|
|
380
|
+
nlohmann::json metadata; // TODO @ngxson : get rid of this json, use regex instead
|
|
381
|
+
std::string etag;
|
|
382
|
+
std::string last_modified;
|
|
383
|
+
|
|
384
|
+
// Check if the file already exists locally
|
|
385
|
+
const auto file_exists = std::filesystem::exists(path);
|
|
386
|
+
if (file_exists) {
|
|
387
|
+
if (offline) {
|
|
388
|
+
LOG_INF("%s: using cached file (offline mode): %s\n", __func__, path.c_str());
|
|
389
|
+
return true; // skip verification/downloading
|
|
390
|
+
}
|
|
391
|
+
// Try and read the JSON metadata file (note: stream autoclosed upon exiting this block).
|
|
392
|
+
std::ifstream metadata_in(metadata_path);
|
|
393
|
+
if (metadata_in.good()) {
|
|
394
|
+
try {
|
|
395
|
+
metadata_in >> metadata;
|
|
396
|
+
LOG_DBG("%s: previous metadata file found %s: %s\n", __func__, metadata_path.c_str(),
|
|
397
|
+
metadata.dump().c_str());
|
|
398
|
+
if (metadata.contains("etag") && metadata.at("etag").is_string()) {
|
|
399
|
+
etag = metadata.at("etag");
|
|
400
|
+
}
|
|
401
|
+
if (metadata.contains("lastModified") && metadata.at("lastModified").is_string()) {
|
|
402
|
+
last_modified = metadata.at("lastModified");
|
|
403
|
+
}
|
|
404
|
+
} catch (const nlohmann::json::exception & e) {
|
|
405
|
+
LOG_ERR("%s: error reading metadata file %s: %s\n", __func__, metadata_path.c_str(), e.what());
|
|
406
|
+
}
|
|
341
407
|
}
|
|
408
|
+
// if we cannot open the metadata file, we assume that the downloaded file is not valid (etag and last-modified are left empty, so we will download it again)
|
|
409
|
+
} else {
|
|
410
|
+
if (offline) {
|
|
411
|
+
LOG_ERR("%s: required file is not available in cache (offline mode): %s\n", __func__, path.c_str());
|
|
412
|
+
return false;
|
|
413
|
+
}
|
|
414
|
+
LOG_INF("%s: no previous model file found %s\n", __func__, path.c_str());
|
|
342
415
|
}
|
|
343
|
-
return n_items;
|
|
344
|
-
};
|
|
345
416
|
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
curl_easy_setopt(curl.get(), CURLOPT_HEADERFUNCTION, static_cast<CURLOPT_HEADERFUNCTION_PTR>(header_callback));
|
|
349
|
-
curl_easy_setopt(curl.get(), CURLOPT_HEADERDATA, &headers);
|
|
417
|
+
bool head_request_ok = false;
|
|
418
|
+
bool should_download = !file_exists; // by default, we should download if the file does not exist
|
|
350
419
|
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
curl_easy_getinfo(curl.get(), CURLINFO_RESPONSE_CODE, &http_code);
|
|
360
|
-
if (http_code == 200) {
|
|
361
|
-
head_request_ok = true;
|
|
362
|
-
} else {
|
|
363
|
-
LOG_WRN("%s: HEAD invalid http status code received: %ld\n", __func__, http_code);
|
|
364
|
-
head_request_ok = false;
|
|
365
|
-
}
|
|
366
|
-
|
|
367
|
-
// if head_request_ok is false, we don't have the etag or last-modified headers
|
|
368
|
-
// we leave should_download as-is, which is true if the file does not exist
|
|
369
|
-
if (head_request_ok) {
|
|
370
|
-
// check if ETag or Last-Modified headers are different
|
|
371
|
-
// if it is, we need to download the file again
|
|
372
|
-
if (!etag.empty() && etag != headers.etag) {
|
|
373
|
-
LOG_WRN("%s: ETag header is different (%s != %s): triggering a new download\n", __func__, etag.c_str(), headers.etag.c_str());
|
|
374
|
-
should_download = true;
|
|
375
|
-
} else if (!last_modified.empty() && last_modified != headers.last_modified) {
|
|
376
|
-
LOG_WRN("%s: Last-Modified header is different (%s != %s): triggering a new download\n", __func__, last_modified.c_str(), headers.last_modified.c_str());
|
|
377
|
-
should_download = true;
|
|
420
|
+
// Initialize libcurl
|
|
421
|
+
curl_ptr curl(curl_easy_init(), &curl_easy_cleanup);
|
|
422
|
+
common_load_model_from_url_headers headers;
|
|
423
|
+
curl_easy_setopt(curl.get(), CURLOPT_HEADERDATA, &headers);
|
|
424
|
+
curl_slist_ptr http_headers;
|
|
425
|
+
const bool was_perform_successful = common_download_head(curl.get(), http_headers, url, bearer_token);
|
|
426
|
+
if (!was_perform_successful) {
|
|
427
|
+
head_request_ok = false;
|
|
378
428
|
}
|
|
379
|
-
}
|
|
380
429
|
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
if (
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
430
|
+
long http_code = 0;
|
|
431
|
+
curl_easy_getinfo(curl.get(), CURLINFO_RESPONSE_CODE, &http_code);
|
|
432
|
+
if (http_code == 200) {
|
|
433
|
+
head_request_ok = true;
|
|
434
|
+
} else {
|
|
435
|
+
LOG_WRN("%s: HEAD invalid http status code received: %ld\n", __func__, http_code);
|
|
436
|
+
head_request_ok = false;
|
|
437
|
+
}
|
|
438
|
+
|
|
439
|
+
// if head_request_ok is false, we don't have the etag or last-modified headers
|
|
440
|
+
// we leave should_download as-is, which is true if the file does not exist
|
|
441
|
+
bool should_download_from_scratch = false;
|
|
442
|
+
if (head_request_ok) {
|
|
443
|
+
// check if ETag or Last-Modified headers are different
|
|
444
|
+
// if it is, we need to download the file again
|
|
445
|
+
if (!etag.empty() && etag != headers.etag) {
|
|
446
|
+
LOG_WRN("%s: ETag header is different (%s != %s): triggering a new download\n", __func__, etag.c_str(),
|
|
447
|
+
headers.etag.c_str());
|
|
448
|
+
should_download = true;
|
|
449
|
+
should_download_from_scratch = true;
|
|
450
|
+
} else if (!last_modified.empty() && last_modified != headers.last_modified) {
|
|
451
|
+
LOG_WRN("%s: Last-Modified header is different (%s != %s): triggering a new download\n", __func__,
|
|
452
|
+
last_modified.c_str(), headers.last_modified.c_str());
|
|
453
|
+
should_download = true;
|
|
454
|
+
should_download_from_scratch = true;
|
|
455
|
+
}
|
|
456
|
+
}
|
|
457
|
+
|
|
458
|
+
const bool accept_ranges_supported = !headers.accept_ranges.empty() && headers.accept_ranges != "none";
|
|
459
|
+
if (should_download) {
|
|
460
|
+
if (file_exists &&
|
|
461
|
+
!accept_ranges_supported) { // Resumable downloads not supported, delete and start again.
|
|
462
|
+
LOG_WRN("%s: deleting previous downloaded file: %s\n", __func__, path.c_str());
|
|
463
|
+
if (remove(path.c_str()) != 0) {
|
|
464
|
+
LOG_ERR("%s: unable to delete file: %s\n", __func__, path.c_str());
|
|
465
|
+
return false;
|
|
466
|
+
}
|
|
388
467
|
}
|
|
389
|
-
}
|
|
390
468
|
|
|
391
|
-
|
|
469
|
+
const std::string path_temporary = path + ".downloadInProgress";
|
|
470
|
+
if (should_download_from_scratch) {
|
|
471
|
+
if (std::filesystem::exists(path_temporary)) {
|
|
472
|
+
if (remove(path_temporary.c_str()) != 0) {
|
|
473
|
+
LOG_ERR("%s: unable to delete file: %s\n", __func__, path_temporary.c_str());
|
|
474
|
+
return false;
|
|
475
|
+
}
|
|
476
|
+
}
|
|
392
477
|
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
478
|
+
if (std::filesystem::exists(path)) {
|
|
479
|
+
if (remove(path.c_str()) != 0) {
|
|
480
|
+
LOG_ERR("%s: unable to delete file: %s\n", __func__, path.c_str());
|
|
481
|
+
return false;
|
|
482
|
+
}
|
|
483
|
+
}
|
|
396
484
|
}
|
|
397
|
-
};
|
|
398
|
-
|
|
399
|
-
std::unique_ptr<FILE, FILE_deleter> outfile(fopen(path_temporary.c_str(), "wb"));
|
|
400
|
-
if (!outfile) {
|
|
401
|
-
LOG_ERR("%s: error opening local file for writing: %s\n", __func__, path.c_str());
|
|
402
|
-
return false;
|
|
403
|
-
}
|
|
404
|
-
|
|
405
|
-
typedef size_t(*CURLOPT_WRITEFUNCTION_PTR)(void * data, size_t size, size_t nmemb, void * fd);
|
|
406
|
-
auto write_callback = [](void * data, size_t size, size_t nmemb, void * fd) -> size_t {
|
|
407
|
-
return fwrite(data, size, nmemb, (FILE *)fd);
|
|
408
|
-
};
|
|
409
|
-
curl_easy_setopt(curl.get(), CURLOPT_NOBODY, 0L);
|
|
410
|
-
curl_easy_setopt(curl.get(), CURLOPT_WRITEFUNCTION, static_cast<CURLOPT_WRITEFUNCTION_PTR>(write_callback));
|
|
411
|
-
curl_easy_setopt(curl.get(), CURLOPT_WRITEDATA, outfile.get());
|
|
412
485
|
|
|
413
|
-
|
|
414
|
-
|
|
486
|
+
// Write the updated JSON metadata file.
|
|
487
|
+
metadata.update({
|
|
488
|
+
{ "url", url },
|
|
489
|
+
{ "etag", headers.etag },
|
|
490
|
+
{ "lastModified", headers.last_modified }
|
|
491
|
+
});
|
|
492
|
+
write_file(metadata_path, metadata.dump(4));
|
|
493
|
+
LOG_DBG("%s: file metadata saved: %s\n", __func__, metadata_path.c_str());
|
|
494
|
+
|
|
495
|
+
// start the download
|
|
496
|
+
LOG_INF("%s: trying to download model from %s to %s (server_etag:%s, server_last_modified:%s)...\n",
|
|
497
|
+
__func__, llama_download_hide_password_in_url(url).c_str(), path_temporary.c_str(),
|
|
498
|
+
headers.etag.c_str(), headers.last_modified.c_str());
|
|
499
|
+
const bool was_pull_successful = common_pull_file(curl.get(), path_temporary);
|
|
500
|
+
if (!was_pull_successful) {
|
|
501
|
+
if (i + 1 < max_attempts) {
|
|
502
|
+
const int exponential_backoff_delay = std::pow(retry_delay_seconds, i) * 1000;
|
|
503
|
+
LOG_WRN("%s: retrying after %d milliseconds...\n", __func__, exponential_backoff_delay);
|
|
504
|
+
std::this_thread::sleep_for(std::chrono::milliseconds(exponential_backoff_delay));
|
|
505
|
+
} else {
|
|
506
|
+
LOG_ERR("%s: curl_easy_perform() failed after %d attempts\n", __func__, max_attempts);
|
|
507
|
+
}
|
|
415
508
|
|
|
416
|
-
|
|
417
|
-
auto llama_download_hide_password_in_url = [](const std::string & url) -> std::string {
|
|
418
|
-
std::size_t protocol_pos = url.find("://");
|
|
419
|
-
if (protocol_pos == std::string::npos) {
|
|
420
|
-
return url; // Malformed URL
|
|
509
|
+
continue;
|
|
421
510
|
}
|
|
422
511
|
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
|
|
512
|
+
long http_code = 0;
|
|
513
|
+
curl_easy_getinfo(curl.get(), CURLINFO_RESPONSE_CODE, &http_code);
|
|
514
|
+
if (http_code < 200 || http_code >= 400) {
|
|
515
|
+
LOG_ERR("%s: invalid http status code received: %ld\n", __func__, http_code);
|
|
516
|
+
return false;
|
|
426
517
|
}
|
|
427
518
|
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
bool was_perform_successful = curl_perform_with_retry(url, curl.get(), CURL_MAX_RETRY, CURL_RETRY_DELAY_SECONDS, "GET");
|
|
435
|
-
if (!was_perform_successful) {
|
|
436
|
-
return false;
|
|
437
|
-
}
|
|
438
|
-
|
|
439
|
-
long http_code = 0;
|
|
440
|
-
curl_easy_getinfo (curl.get(), CURLINFO_RESPONSE_CODE, &http_code);
|
|
441
|
-
if (http_code < 200 || http_code >= 400) {
|
|
442
|
-
LOG_ERR("%s: invalid http status code received: %ld\n", __func__, http_code);
|
|
443
|
-
return false;
|
|
519
|
+
if (rename(path_temporary.c_str(), path.c_str()) != 0) {
|
|
520
|
+
LOG_ERR("%s: unable to rename file: %s to %s\n", __func__, path_temporary.c_str(), path.c_str());
|
|
521
|
+
return false;
|
|
522
|
+
}
|
|
523
|
+
} else {
|
|
524
|
+
LOG_INF("%s: using cached file: %s\n", __func__, path.c_str());
|
|
444
525
|
}
|
|
445
526
|
|
|
446
|
-
|
|
447
|
-
outfile.reset();
|
|
448
|
-
|
|
449
|
-
// Write the updated JSON metadata file.
|
|
450
|
-
metadata.update({
|
|
451
|
-
{"url", url},
|
|
452
|
-
{"etag", headers.etag},
|
|
453
|
-
{"lastModified", headers.last_modified}
|
|
454
|
-
});
|
|
455
|
-
write_file(metadata_path, metadata.dump(4));
|
|
456
|
-
LOG_DBG("%s: file metadata saved: %s\n", __func__, metadata_path.c_str());
|
|
457
|
-
|
|
458
|
-
if (rename(path_temporary.c_str(), path.c_str()) != 0) {
|
|
459
|
-
LOG_ERR("%s: unable to rename file: %s to %s\n", __func__, path_temporary.c_str(), path.c_str());
|
|
460
|
-
return false;
|
|
461
|
-
}
|
|
462
|
-
} else {
|
|
463
|
-
LOG_INF("%s: using cached file: %s\n", __func__, path.c_str());
|
|
527
|
+
break;
|
|
464
528
|
}
|
|
465
529
|
|
|
466
530
|
return true;
|
|
@@ -770,7 +834,7 @@ static std::string common_docker_get_token(const std::string & repo) {
|
|
|
770
834
|
}
|
|
771
835
|
|
|
772
836
|
static std::string common_docker_resolve_model(const std::string & docker) {
|
|
773
|
-
// Parse ai/smollm2:135M-
|
|
837
|
+
// Parse ai/smollm2:135M-Q4_0
|
|
774
838
|
size_t colon_pos = docker.find(':');
|
|
775
839
|
std::string repo, tag;
|
|
776
840
|
if (colon_pos != std::string::npos) {
|
|
@@ -1727,10 +1727,12 @@ static void common_chat_parse_gpt_oss(common_chat_msg_parser & builder) {
|
|
|
1727
1727
|
static common_chat_params common_chat_params_init_firefunction_v2(const common_chat_template & tmpl, const struct templates_params & inputs) {
|
|
1728
1728
|
LOG_DBG("%s\n", __func__);
|
|
1729
1729
|
common_chat_params data;
|
|
1730
|
-
|
|
1730
|
+
const std::optional<json> tools_override = json();
|
|
1731
|
+
const std::optional<json> additional_context = json {
|
|
1731
1732
|
{"datetime", format_time(inputs.now, "%b %d %Y %H:%M:%S GMT")},
|
|
1732
1733
|
{"functions", json(inputs.tools.empty() ? "" : inputs.tools.dump(2))},
|
|
1733
|
-
}
|
|
1734
|
+
};
|
|
1735
|
+
data.prompt = apply(tmpl, inputs, /* messages_override =*/ std::nullopt, tools_override, additional_context);
|
|
1734
1736
|
if (inputs.tools.is_array() && !inputs.tools.empty()) {
|
|
1735
1737
|
data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
|
|
1736
1738
|
data.grammar = build_grammar([&](const common_grammar_builder & builder) {
|
|
@@ -2216,15 +2218,28 @@ static common_chat_params common_chat_params_init_granite(const common_chat_temp
|
|
|
2216
2218
|
|
|
2217
2219
|
static void common_chat_parse_granite(common_chat_msg_parser & builder) {
|
|
2218
2220
|
// Parse thinking tags
|
|
2221
|
+
static const common_regex start_think_regex(regex_escape("<think>"));
|
|
2222
|
+
static const common_regex end_think_regex(regex_escape("</think>"));
|
|
2223
|
+
// Granite models output partial tokens such as "<" and "<think".
|
|
2224
|
+
// By leveraging try_consume_regex()/try_find_regex() throwing
|
|
2225
|
+
// common_chat_msg_partial_exception for these partial tokens,
|
|
2226
|
+
// processing is interrupted and the tokens are not passed to add_content().
|
|
2227
|
+
if (auto res = builder.try_consume_regex(start_think_regex)) {
|
|
2228
|
+
// Restore position for try_parse_reasoning()
|
|
2229
|
+
builder.move_to(res->groups[0].begin);
|
|
2230
|
+
builder.try_find_regex(end_think_regex, std::string::npos, false);
|
|
2231
|
+
// Restore position for try_parse_reasoning()
|
|
2232
|
+
builder.move_to(res->groups[0].begin);
|
|
2233
|
+
}
|
|
2219
2234
|
builder.try_parse_reasoning("<think>", "</think>");
|
|
2220
2235
|
|
|
2221
|
-
// Parse response tags
|
|
2222
|
-
static const common_regex
|
|
2223
|
-
|
|
2224
|
-
|
|
2225
|
-
|
|
2226
|
-
|
|
2227
|
-
builder.
|
|
2236
|
+
// Parse response tags
|
|
2237
|
+
static const common_regex start_response_regex(regex_escape("<response>"));
|
|
2238
|
+
static const common_regex end_response_regex(regex_escape("</response>"));
|
|
2239
|
+
// Granite models output partial tokens such as "<" and "<response".
|
|
2240
|
+
// Same hack as reasoning parsing.
|
|
2241
|
+
if (builder.try_consume_regex(start_response_regex)) {
|
|
2242
|
+
builder.try_find_regex(end_response_regex);
|
|
2228
2243
|
}
|
|
2229
2244
|
|
|
2230
2245
|
if (!builder.syntax().parse_tool_calls) {
|
|
@@ -2238,13 +2253,10 @@ static void common_chat_parse_granite(common_chat_msg_parser & builder) {
|
|
|
2238
2253
|
builder.move_to(res->groups[0].end);
|
|
2239
2254
|
|
|
2240
2255
|
// Expect JSON array of tool calls
|
|
2241
|
-
auto
|
|
2242
|
-
|
|
2243
|
-
|
|
2244
|
-
builder.add_content("<|tool_call|>" + tool_calls_data.json.dump());
|
|
2256
|
+
if (auto tool_call = builder.try_consume_json_with_dumped_args({{{"arguments"}}})) {
|
|
2257
|
+
if (!builder.add_tool_calls(tool_call->value) || tool_call->is_partial) {
|
|
2258
|
+
throw common_chat_msg_partial_exception("incomplete tool call");
|
|
2245
2259
|
}
|
|
2246
|
-
} else {
|
|
2247
|
-
builder.add_content("<|tool_call|>" + tool_calls_data.json.dump());
|
|
2248
2260
|
}
|
|
2249
2261
|
} else {
|
|
2250
2262
|
builder.add_content(builder.consume_rest());
|