@fugood/llama.node 1.4.14 → 1.5.0-rc.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/binding.ts +13 -6
- package/lib/index.js +2 -2
- package/lib/index.ts +8 -3
- package/package.json +15 -15
- package/scripts/llama.cpp.patch +77 -65
- package/src/LlamaContext.cpp +31 -34
- package/src/llama.cpp/CMakeLists.txt +24 -8
- package/src/llama.cpp/common/CMakeLists.txt +15 -34
- package/src/llama.cpp/common/arg.cpp +59 -10
- package/src/llama.cpp/common/chat-parser.cpp +115 -0
- package/src/llama.cpp/common/chat.cpp +356 -34
- package/src/llama.cpp/common/chat.h +17 -13
- package/src/llama.cpp/common/common.cpp +0 -1
- package/src/llama.cpp/common/common.h +30 -25
- package/src/llama.cpp/common/debug.cpp +165 -0
- package/src/llama.cpp/common/debug.h +43 -0
- package/src/llama.cpp/common/download.cpp +12 -342
- package/src/llama.cpp/common/download.h +6 -0
- package/src/llama.cpp/common/jinja/caps.cpp +237 -0
- package/src/llama.cpp/common/jinja/caps.h +24 -0
- package/src/llama.cpp/common/jinja/lexer.cpp +341 -0
- package/src/llama.cpp/common/jinja/lexer.h +157 -0
- package/src/llama.cpp/common/jinja/parser.cpp +591 -0
- package/src/llama.cpp/common/jinja/parser.h +21 -0
- package/src/llama.cpp/common/jinja/runtime.cpp +865 -0
- package/src/llama.cpp/common/jinja/runtime.h +628 -0
- package/src/llama.cpp/common/jinja/string.cpp +207 -0
- package/src/llama.cpp/common/jinja/string.h +58 -0
- package/src/llama.cpp/common/jinja/utils.h +49 -0
- package/src/llama.cpp/common/jinja/value.cpp +1221 -0
- package/src/llama.cpp/common/jinja/value.h +464 -0
- package/src/llama.cpp/common/preset.cpp +12 -2
- package/src/llama.cpp/common/sampling.cpp +52 -19
- package/src/llama.cpp/ggml/include/ggml.h +39 -7
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +4 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ops.cpp +63 -37
- package/src/llama.cpp/ggml/src/ggml-cpu/simd-mappings.h +31 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/vec.cpp +18 -0
- package/src/llama.cpp/include/llama-cpp.h +3 -1
- package/src/llama.cpp/include/llama.h +29 -2
- package/src/llama.cpp/src/CMakeLists.txt +1 -0
- package/src/llama.cpp/src/llama-adapter.cpp +7 -13
- package/src/llama.cpp/src/llama-adapter.h +1 -3
- package/src/llama.cpp/src/llama-arch.cpp +35 -0
- package/src/llama.cpp/src/llama-arch.h +1 -0
- package/src/llama.cpp/src/llama-chat.cpp +20 -0
- package/src/llama.cpp/src/llama-chat.h +1 -0
- package/src/llama.cpp/src/llama-context.cpp +232 -144
- package/src/llama.cpp/src/llama-context.h +10 -0
- package/src/llama.cpp/src/llama-cparams.h +2 -0
- package/src/llama.cpp/src/llama-graph.cpp +31 -43
- package/src/llama.cpp/src/llama-hparams.cpp +0 -36
- package/src/llama.cpp/src/llama-hparams.h +38 -1
- package/src/llama.cpp/src/llama-kv-cache.cpp +201 -59
- package/src/llama.cpp/src/llama-kv-cache.h +0 -2
- package/src/llama.cpp/src/llama-mmap.cpp +13 -6
- package/src/llama.cpp/src/llama-model-loader.cpp +21 -7
- package/src/llama.cpp/src/llama-model.cpp +215 -97
- package/src/llama.cpp/src/llama-model.h +3 -2
- package/src/llama.cpp/src/llama-sampling.cpp +170 -13
- package/src/llama.cpp/src/llama-vocab.cpp +37 -24
- package/src/llama.cpp/src/llama-vocab.h +1 -0
- package/src/llama.cpp/src/models/exaone-moe.cpp +146 -0
- package/src/llama.cpp/src/models/gemma3n-iswa.cpp +13 -3
- package/src/llama.cpp/src/models/models.h +13 -2
- package/src/llama.cpp/src/models/qwen3next.cpp +198 -182
|
@@ -19,10 +19,7 @@
|
|
|
19
19
|
#include <thread>
|
|
20
20
|
#include <vector>
|
|
21
21
|
|
|
22
|
-
#if defined(
|
|
23
|
-
#include <curl/curl.h>
|
|
24
|
-
#include <curl/easy.h>
|
|
25
|
-
#elif defined(LLAMA_USE_HTTPLIB)
|
|
22
|
+
#if defined(LLAMA_USE_HTTPLIB)
|
|
26
23
|
#include "http.h"
|
|
27
24
|
#endif
|
|
28
25
|
|
|
@@ -161,336 +158,17 @@ static bool is_http_status_ok(int status) {
|
|
|
161
158
|
return status >= 200 && status < 400;
|
|
162
159
|
}
|
|
163
160
|
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
using curl_ptr = std::unique_ptr<CURL, decltype(&curl_easy_cleanup)>;
|
|
171
|
-
|
|
172
|
-
// cannot use unique_ptr for curl_slist, because we cannot update without destroying the old one
|
|
173
|
-
struct curl_slist_ptr {
|
|
174
|
-
struct curl_slist * ptr = nullptr;
|
|
175
|
-
~curl_slist_ptr() {
|
|
176
|
-
if (ptr) {
|
|
177
|
-
curl_slist_free_all(ptr);
|
|
178
|
-
}
|
|
179
|
-
}
|
|
180
|
-
};
|
|
181
|
-
|
|
182
|
-
static CURLcode common_curl_perf(CURL * curl) {
|
|
183
|
-
CURLcode res = curl_easy_perform(curl);
|
|
184
|
-
if (res != CURLE_OK) {
|
|
185
|
-
LOG_ERR("%s: curl_easy_perform() failed\n", __func__);
|
|
186
|
-
}
|
|
187
|
-
|
|
188
|
-
return res;
|
|
189
|
-
}
|
|
190
|
-
|
|
191
|
-
// Send a HEAD request to retrieve the etag and last-modified headers
|
|
192
|
-
struct common_load_model_from_url_headers {
|
|
193
|
-
std::string etag;
|
|
194
|
-
std::string last_modified;
|
|
195
|
-
std::string accept_ranges;
|
|
196
|
-
};
|
|
197
|
-
|
|
198
|
-
struct FILE_deleter {
|
|
199
|
-
void operator()(FILE * f) const { fclose(f); }
|
|
200
|
-
};
|
|
201
|
-
|
|
202
|
-
static size_t common_header_callback(char * buffer, size_t, size_t n_items, void * userdata) {
|
|
203
|
-
common_load_model_from_url_headers * headers = (common_load_model_from_url_headers *) userdata;
|
|
204
|
-
static std::regex header_regex("([^:]+): (.*)\r\n");
|
|
205
|
-
static std::regex etag_regex("ETag", std::regex_constants::icase);
|
|
206
|
-
static std::regex last_modified_regex("Last-Modified", std::regex_constants::icase);
|
|
207
|
-
static std::regex accept_ranges_regex("Accept-Ranges", std::regex_constants::icase);
|
|
208
|
-
std::string header(buffer, n_items);
|
|
209
|
-
std::smatch match;
|
|
210
|
-
if (std::regex_match(header, match, header_regex)) {
|
|
211
|
-
const std::string & key = match[1];
|
|
212
|
-
const std::string & value = match[2];
|
|
213
|
-
if (std::regex_match(key, match, etag_regex)) {
|
|
214
|
-
headers->etag = value;
|
|
215
|
-
} else if (std::regex_match(key, match, last_modified_regex)) {
|
|
216
|
-
headers->last_modified = value;
|
|
217
|
-
} else if (std::regex_match(key, match, accept_ranges_regex)) {
|
|
218
|
-
headers->accept_ranges = value;
|
|
219
|
-
}
|
|
220
|
-
}
|
|
221
|
-
|
|
222
|
-
return n_items;
|
|
223
|
-
}
|
|
224
|
-
|
|
225
|
-
static size_t common_write_callback(void * data, size_t size, size_t nmemb, void * fd) {
|
|
226
|
-
return std::fwrite(data, size, nmemb, static_cast<FILE *>(fd));
|
|
227
|
-
}
|
|
228
|
-
|
|
229
|
-
// helper function to hide password in URL
|
|
230
|
-
static std::string llama_download_hide_password_in_url(const std::string & url) {
|
|
231
|
-
// Use regex to match and replace the user[:password]@ pattern in URLs
|
|
232
|
-
// Pattern: scheme://[user[:password]@]host[...]
|
|
233
|
-
static const std::regex url_regex(R"(^(?:[A-Za-z][A-Za-z0-9+.-]://)(?:[^/@]+@)?.$)");
|
|
234
|
-
std::smatch match;
|
|
235
|
-
|
|
236
|
-
if (std::regex_match(url, match, url_regex)) {
|
|
237
|
-
// match[1] = scheme (e.g., "https://")
|
|
238
|
-
// match[2] = user[:password]@ part
|
|
239
|
-
// match[3] = rest of URL (host and path)
|
|
240
|
-
return match[1].str() + "********@" + match[3].str();
|
|
241
|
-
}
|
|
242
|
-
|
|
243
|
-
return url; // No credentials found or malformed URL
|
|
244
|
-
}
|
|
245
|
-
|
|
246
|
-
static void common_curl_easy_setopt_head(CURL * curl, const std::string & url) {
|
|
247
|
-
// Set the URL, allow to follow http redirection
|
|
248
|
-
curl_easy_setopt(curl, CURLOPT_URL, url.c_str());
|
|
249
|
-
curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1L);
|
|
250
|
-
|
|
251
|
-
# if defined(_WIN32)
|
|
252
|
-
// CURLSSLOPT_NATIVE_CA tells libcurl to use standard certificate store of
|
|
253
|
-
// operating system. Currently implemented under MS-Windows.
|
|
254
|
-
curl_easy_setopt(curl, CURLOPT_SSL_OPTIONS, CURLSSLOPT_NATIVE_CA);
|
|
255
|
-
# endif
|
|
256
|
-
|
|
257
|
-
curl_easy_setopt(curl, CURLOPT_NOBODY, 1L); // will trigger the HEAD verb
|
|
258
|
-
curl_easy_setopt(curl, CURLOPT_NOPROGRESS, 1L); // hide head request progress
|
|
259
|
-
curl_easy_setopt(curl, CURLOPT_HEADERFUNCTION, common_header_callback);
|
|
260
|
-
}
|
|
261
|
-
|
|
262
|
-
static void common_curl_easy_setopt_get(CURL * curl) {
|
|
263
|
-
curl_easy_setopt(curl, CURLOPT_NOBODY, 0L);
|
|
264
|
-
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, common_write_callback);
|
|
265
|
-
|
|
266
|
-
// display download progress
|
|
267
|
-
curl_easy_setopt(curl, CURLOPT_NOPROGRESS, 0L);
|
|
268
|
-
}
|
|
269
|
-
|
|
270
|
-
static bool common_pull_file(CURL * curl, const std::string & path_temporary) {
|
|
271
|
-
if (std::filesystem::exists(path_temporary)) {
|
|
272
|
-
const std::string partial_size = std::to_string(std::filesystem::file_size(path_temporary));
|
|
273
|
-
LOG_INF("%s: server supports range requests, resuming download from byte %s\n", __func__, partial_size.c_str());
|
|
274
|
-
const std::string range_str = partial_size + "-";
|
|
275
|
-
curl_easy_setopt(curl, CURLOPT_RANGE, range_str.c_str());
|
|
276
|
-
}
|
|
277
|
-
|
|
278
|
-
// Always open file in append mode could be resuming
|
|
279
|
-
std::unique_ptr<FILE, FILE_deleter> outfile(fopen(path_temporary.c_str(), "ab"));
|
|
280
|
-
if (!outfile) {
|
|
281
|
-
LOG_ERR("%s: error opening local file for writing: %s\n", __func__, path_temporary.c_str());
|
|
282
|
-
return false;
|
|
283
|
-
}
|
|
284
|
-
|
|
285
|
-
common_curl_easy_setopt_get(curl);
|
|
286
|
-
curl_easy_setopt(curl, CURLOPT_WRITEDATA, outfile.get());
|
|
287
|
-
|
|
288
|
-
return common_curl_perf(curl) == CURLE_OK;
|
|
289
|
-
}
|
|
290
|
-
|
|
291
|
-
static bool common_download_head(CURL * curl,
|
|
292
|
-
curl_slist_ptr & http_headers,
|
|
293
|
-
const std::string & url,
|
|
294
|
-
const std::string & bearer_token) {
|
|
295
|
-
if (!curl) {
|
|
296
|
-
LOG_ERR("%s: error initializing libcurl\n", __func__);
|
|
297
|
-
return false;
|
|
298
|
-
}
|
|
299
|
-
|
|
300
|
-
http_headers.ptr = curl_slist_append(http_headers.ptr, "User-Agent: llama-cpp");
|
|
301
|
-
// Check if hf-token or bearer-token was specified
|
|
302
|
-
if (!bearer_token.empty()) {
|
|
303
|
-
std::string auth_header = "Authorization: Bearer " + bearer_token;
|
|
304
|
-
http_headers.ptr = curl_slist_append(http_headers.ptr, auth_header.c_str());
|
|
305
|
-
}
|
|
306
|
-
|
|
307
|
-
curl_easy_setopt(curl, CURLOPT_HTTPHEADER, http_headers.ptr);
|
|
308
|
-
common_curl_easy_setopt_head(curl, url);
|
|
309
|
-
return common_curl_perf(curl) == CURLE_OK;
|
|
310
|
-
}
|
|
311
|
-
|
|
312
|
-
// download one single file from remote URL to local path
|
|
313
|
-
// returns status code or -1 on error
|
|
314
|
-
static int common_download_file_single_online(const std::string & url,
|
|
315
|
-
const std::string & path,
|
|
316
|
-
const std::string & bearer_token,
|
|
317
|
-
const common_header_list & custom_headers) {
|
|
318
|
-
static const int max_attempts = 3;
|
|
319
|
-
static const int retry_delay_seconds = 2;
|
|
320
|
-
|
|
321
|
-
for (int i = 0; i < max_attempts; ++i) {
|
|
322
|
-
std::string etag;
|
|
323
|
-
|
|
324
|
-
// Check if the file already exists locally
|
|
325
|
-
const auto file_exists = std::filesystem::exists(path);
|
|
326
|
-
if (file_exists) {
|
|
327
|
-
etag = read_etag(path);
|
|
328
|
-
} else {
|
|
329
|
-
LOG_INF("%s: no previous model file found %s\n", __func__, path.c_str());
|
|
330
|
-
}
|
|
331
|
-
|
|
332
|
-
bool head_request_ok = false;
|
|
333
|
-
bool should_download = !file_exists; // by default, we should download if the file does not exist
|
|
334
|
-
|
|
335
|
-
// Initialize libcurl
|
|
336
|
-
curl_ptr curl(curl_easy_init(), &curl_easy_cleanup);
|
|
337
|
-
common_load_model_from_url_headers headers;
|
|
338
|
-
curl_easy_setopt(curl.get(), CURLOPT_HEADERDATA, &headers);
|
|
339
|
-
curl_slist_ptr http_headers;
|
|
340
|
-
|
|
341
|
-
for (const auto & h : custom_headers) {
|
|
342
|
-
std::string s = h.first + ": " + h.second;
|
|
343
|
-
http_headers.ptr = curl_slist_append(http_headers.ptr, s.c_str());
|
|
344
|
-
}
|
|
345
|
-
const bool was_perform_successful = common_download_head(curl.get(), http_headers, url, bearer_token);
|
|
346
|
-
if (!was_perform_successful) {
|
|
347
|
-
head_request_ok = false;
|
|
348
|
-
}
|
|
349
|
-
|
|
350
|
-
long http_code = 0;
|
|
351
|
-
curl_easy_getinfo(curl.get(), CURLINFO_RESPONSE_CODE, &http_code);
|
|
352
|
-
if (http_code == 200) {
|
|
353
|
-
head_request_ok = true;
|
|
354
|
-
} else {
|
|
355
|
-
LOG_WRN("%s: HEAD invalid http status code received: %ld\n", __func__, http_code);
|
|
356
|
-
head_request_ok = false;
|
|
357
|
-
}
|
|
358
|
-
|
|
359
|
-
// if head_request_ok is false, we don't have the etag or last-modified headers
|
|
360
|
-
// we leave should_download as-is, which is true if the file does not exist
|
|
361
|
-
bool should_download_from_scratch = false;
|
|
362
|
-
if (head_request_ok) {
|
|
363
|
-
// check if ETag or Last-Modified headers are different
|
|
364
|
-
// if it is, we need to download the file again
|
|
365
|
-
if (!etag.empty() && etag != headers.etag) {
|
|
366
|
-
LOG_WRN("%s: ETag header is different (%s != %s): triggering a new download\n", __func__, etag.c_str(),
|
|
367
|
-
headers.etag.c_str());
|
|
368
|
-
should_download = true;
|
|
369
|
-
should_download_from_scratch = true;
|
|
370
|
-
}
|
|
371
|
-
}
|
|
372
|
-
|
|
373
|
-
const bool accept_ranges_supported = !headers.accept_ranges.empty() && headers.accept_ranges != "none";
|
|
374
|
-
if (should_download) {
|
|
375
|
-
if (file_exists &&
|
|
376
|
-
!accept_ranges_supported) { // Resumable downloads not supported, delete and start again.
|
|
377
|
-
LOG_WRN("%s: deleting previous downloaded file: %s\n", __func__, path.c_str());
|
|
378
|
-
if (remove(path.c_str()) != 0) {
|
|
379
|
-
LOG_ERR("%s: unable to delete file: %s\n", __func__, path.c_str());
|
|
380
|
-
return -1;
|
|
381
|
-
}
|
|
382
|
-
}
|
|
383
|
-
|
|
384
|
-
const std::string path_temporary = path + ".downloadInProgress";
|
|
385
|
-
if (should_download_from_scratch) {
|
|
386
|
-
if (std::filesystem::exists(path_temporary)) {
|
|
387
|
-
if (remove(path_temporary.c_str()) != 0) {
|
|
388
|
-
LOG_ERR("%s: unable to delete file: %s\n", __func__, path_temporary.c_str());
|
|
389
|
-
return -1;
|
|
390
|
-
}
|
|
391
|
-
}
|
|
392
|
-
|
|
393
|
-
if (std::filesystem::exists(path)) {
|
|
394
|
-
if (remove(path.c_str()) != 0) {
|
|
395
|
-
LOG_ERR("%s: unable to delete file: %s\n", __func__, path.c_str());
|
|
396
|
-
return -1;
|
|
397
|
-
}
|
|
398
|
-
}
|
|
399
|
-
}
|
|
400
|
-
if (head_request_ok) {
|
|
401
|
-
write_etag(path, headers.etag);
|
|
402
|
-
}
|
|
403
|
-
|
|
404
|
-
// start the download
|
|
405
|
-
LOG_INF("%s: trying to download model from %s to %s (server_etag:%s, server_last_modified:%s)...\n",
|
|
406
|
-
__func__, llama_download_hide_password_in_url(url).c_str(), path_temporary.c_str(),
|
|
407
|
-
headers.etag.c_str(), headers.last_modified.c_str());
|
|
408
|
-
const bool was_pull_successful = common_pull_file(curl.get(), path_temporary);
|
|
409
|
-
if (!was_pull_successful) {
|
|
410
|
-
if (i + 1 < max_attempts) {
|
|
411
|
-
const int exponential_backoff_delay = std::pow(retry_delay_seconds, i) * 1000;
|
|
412
|
-
LOG_WRN("%s: retrying after %d milliseconds...\n", __func__, exponential_backoff_delay);
|
|
413
|
-
std::this_thread::sleep_for(std::chrono::milliseconds(exponential_backoff_delay));
|
|
414
|
-
} else {
|
|
415
|
-
LOG_ERR("%s: curl_easy_perform() failed after %d attempts\n", __func__, max_attempts);
|
|
416
|
-
}
|
|
417
|
-
|
|
418
|
-
continue;
|
|
419
|
-
}
|
|
420
|
-
|
|
421
|
-
long http_code = 0;
|
|
422
|
-
curl_easy_getinfo(curl.get(), CURLINFO_RESPONSE_CODE, &http_code);
|
|
423
|
-
|
|
424
|
-
int status = static_cast<int>(http_code);
|
|
425
|
-
if (!is_http_status_ok(http_code)) {
|
|
426
|
-
LOG_ERR("%s: invalid http status code received: %ld\n", __func__, http_code);
|
|
427
|
-
return status; // TODO: maybe only return on certain codes
|
|
428
|
-
}
|
|
429
|
-
|
|
430
|
-
if (rename(path_temporary.c_str(), path.c_str()) != 0) {
|
|
431
|
-
LOG_ERR("%s: unable to rename file: %s to %s\n", __func__, path_temporary.c_str(), path.c_str());
|
|
432
|
-
return -1;
|
|
433
|
-
}
|
|
434
|
-
|
|
435
|
-
return static_cast<int>(http_code);
|
|
436
|
-
} else {
|
|
437
|
-
LOG_INF("%s: using cached file: %s\n", __func__, path.c_str());
|
|
438
|
-
|
|
439
|
-
return 304; // Not Modified - fake cached response
|
|
440
|
-
}
|
|
441
|
-
}
|
|
442
|
-
|
|
443
|
-
return -1; // max attempts reached
|
|
444
|
-
}
|
|
445
|
-
|
|
446
|
-
std::pair<long, std::vector<char>> common_remote_get_content(const std::string & url, const common_remote_params & params) {
|
|
447
|
-
curl_ptr curl(curl_easy_init(), &curl_easy_cleanup);
|
|
448
|
-
curl_slist_ptr http_headers;
|
|
449
|
-
std::vector<char> res_buffer;
|
|
450
|
-
|
|
451
|
-
curl_easy_setopt(curl.get(), CURLOPT_URL, url.c_str());
|
|
452
|
-
curl_easy_setopt(curl.get(), CURLOPT_NOPROGRESS, 1L);
|
|
453
|
-
curl_easy_setopt(curl.get(), CURLOPT_FOLLOWLOCATION, 1L);
|
|
454
|
-
curl_easy_setopt(curl.get(), CURLOPT_VERBOSE, 0L);
|
|
455
|
-
typedef size_t(*CURLOPT_WRITEFUNCTION_PTR)(void * ptr, size_t size, size_t nmemb, void * data);
|
|
456
|
-
auto write_callback = [](void * ptr, size_t size, size_t nmemb, void * data) -> size_t {
|
|
457
|
-
auto data_vec = static_cast<std::vector<char> *>(data);
|
|
458
|
-
data_vec->insert(data_vec->end(), (char *)ptr, (char *)ptr + size * nmemb);
|
|
459
|
-
return size * nmemb;
|
|
460
|
-
};
|
|
461
|
-
curl_easy_setopt(curl.get(), CURLOPT_WRITEFUNCTION, static_cast<CURLOPT_WRITEFUNCTION_PTR>(write_callback));
|
|
462
|
-
curl_easy_setopt(curl.get(), CURLOPT_WRITEDATA, &res_buffer);
|
|
463
|
-
#if defined(_WIN32)
|
|
464
|
-
curl_easy_setopt(curl.get(), CURLOPT_SSL_OPTIONS, CURLSSLOPT_NATIVE_CA);
|
|
465
|
-
#endif
|
|
466
|
-
if (params.timeout > 0) {
|
|
467
|
-
curl_easy_setopt(curl.get(), CURLOPT_TIMEOUT, params.timeout);
|
|
468
|
-
}
|
|
469
|
-
if (params.max_size > 0) {
|
|
470
|
-
curl_easy_setopt(curl.get(), CURLOPT_MAXFILESIZE, params.max_size);
|
|
471
|
-
}
|
|
472
|
-
http_headers.ptr = curl_slist_append(http_headers.ptr, "User-Agent: llama-cpp");
|
|
473
|
-
|
|
474
|
-
for (const auto & header : params.headers) {
|
|
475
|
-
std::string header_ = header.first + ": " + header.second;
|
|
476
|
-
http_headers.ptr = curl_slist_append(http_headers.ptr, header_.c_str());
|
|
477
|
-
}
|
|
478
|
-
curl_easy_setopt(curl.get(), CURLOPT_HTTPHEADER, http_headers.ptr);
|
|
479
|
-
|
|
480
|
-
CURLcode res = curl_easy_perform(curl.get());
|
|
481
|
-
|
|
482
|
-
if (res != CURLE_OK) {
|
|
483
|
-
std::string error_msg = curl_easy_strerror(res);
|
|
484
|
-
throw std::runtime_error("error: cannot make GET request: " + error_msg);
|
|
161
|
+
std::pair<std::string, std::string> common_download_split_repo_tag(const std::string & hf_repo_with_tag) {
|
|
162
|
+
auto parts = string_split<std::string>(hf_repo_with_tag, ':');
|
|
163
|
+
std::string tag = parts.size() > 1 ? parts.back() : "latest";
|
|
164
|
+
std::string hf_repo = parts[0];
|
|
165
|
+
if (string_split<std::string>(hf_repo, '/').size() != 2) {
|
|
166
|
+
throw std::invalid_argument("error: invalid HF repo format, expected <user>/<model>[:quant]\n");
|
|
485
167
|
}
|
|
486
|
-
|
|
487
|
-
long res_code;
|
|
488
|
-
curl_easy_getinfo(curl.get(), CURLINFO_RESPONSE_CODE, &res_code);
|
|
489
|
-
|
|
490
|
-
return { res_code, std::move(res_buffer) };
|
|
168
|
+
return {hf_repo, tag};
|
|
491
169
|
}
|
|
492
170
|
|
|
493
|
-
#
|
|
171
|
+
#if defined(LLAMA_USE_HTTPLIB)
|
|
494
172
|
|
|
495
173
|
class ProgressBar {
|
|
496
174
|
static inline std::mutex mutex;
|
|
@@ -787,10 +465,6 @@ std::pair<long, std::vector<char>> common_remote_get_content(const std::string
|
|
|
787
465
|
return { res->status, std::move(buf) };
|
|
788
466
|
}
|
|
789
467
|
|
|
790
|
-
#endif // LLAMA_USE_CURL
|
|
791
|
-
|
|
792
|
-
#if defined(LLAMA_USE_CURL) || defined(LLAMA_USE_HTTPLIB)
|
|
793
|
-
|
|
794
468
|
int common_download_file_single(const std::string & url,
|
|
795
469
|
const std::string & path,
|
|
796
470
|
const std::string & bearer_token,
|
|
@@ -922,12 +596,8 @@ common_hf_file_res common_get_hf_file(const std::string & hf_repo_with_tag,
|
|
|
922
596
|
const std::string & bearer_token,
|
|
923
597
|
bool offline,
|
|
924
598
|
const common_header_list & custom_headers) {
|
|
925
|
-
|
|
926
|
-
|
|
927
|
-
std::string hf_repo = parts[0];
|
|
928
|
-
if (string_split<std::string>(hf_repo, '/').size() != 2) {
|
|
929
|
-
throw std::invalid_argument("error: invalid HF repo format, expected <user>/<model>[:quant]\n");
|
|
930
|
-
}
|
|
599
|
+
// the returned hf_repo is without tag
|
|
600
|
+
auto [hf_repo, tag] = common_download_split_repo_tag(hf_repo_with_tag);
|
|
931
601
|
|
|
932
602
|
std::string url = get_model_endpoint() + "v2/" + hf_repo + "/manifests/" + tag;
|
|
933
603
|
|
|
@@ -1145,7 +815,7 @@ int common_download_file_single(const std::string &,
|
|
|
1145
815
|
throw std::runtime_error("download functionality is not enabled in this build");
|
|
1146
816
|
}
|
|
1147
817
|
|
|
1148
|
-
#endif //
|
|
818
|
+
#endif // defined(LLAMA_USE_HTTPLIB)
|
|
1149
819
|
|
|
1150
820
|
std::vector<common_cached_model_info> common_list_cached_models() {
|
|
1151
821
|
std::vector<common_cached_model_info> models;
|
|
@@ -17,6 +17,12 @@ struct common_remote_params {
|
|
|
17
17
|
// get remote file content, returns <http_code, raw_response_body>
|
|
18
18
|
std::pair<long, std::vector<char>> common_remote_get_content(const std::string & url, const common_remote_params & params);
|
|
19
19
|
|
|
20
|
+
// split HF repo with tag into <repo, tag>
|
|
21
|
+
// for example: "user/model:tag" -> <"user/model", "tag">
|
|
22
|
+
// if tag is not present, default to "latest"
|
|
23
|
+
// example: "user/model" -> <"user/model", "latest">
|
|
24
|
+
std::pair<std::string, std::string> common_download_split_repo_tag(const std::string & hf_repo_with_tag);
|
|
25
|
+
|
|
20
26
|
struct common_cached_model_info {
|
|
21
27
|
std::string manifest_path;
|
|
22
28
|
std::string user;
|
|
@@ -0,0 +1,237 @@
|
|
|
1
|
+
#include "value.h"
|
|
2
|
+
#include "runtime.h"
|
|
3
|
+
#include "caps.h"
|
|
4
|
+
|
|
5
|
+
// note: the json dependency is only for defining input in a convenient way
|
|
6
|
+
// we can remove it in the future when we figure out a better way to define inputs using jinja::value
|
|
7
|
+
#include <nlohmann/json.hpp>
|
|
8
|
+
|
|
9
|
+
#include <functional>
|
|
10
|
+
#include <sstream>
|
|
11
|
+
|
|
12
|
+
#define FILENAME "jinja-caps"
|
|
13
|
+
|
|
14
|
+
using json = nlohmann::ordered_json;
|
|
15
|
+
|
|
16
|
+
namespace jinja {
|
|
17
|
+
|
|
18
|
+
using caps_json_fn = std::function<json()>;
|
|
19
|
+
using caps_analyze_fn = std::function<void(bool, value &, value &)>;
|
|
20
|
+
|
|
21
|
+
static void caps_try_execute(jinja::program & prog,
|
|
22
|
+
const caps_json_fn & messages_fn,
|
|
23
|
+
const caps_json_fn & tools_fn,
|
|
24
|
+
const caps_analyze_fn & analyze_fn) {
|
|
25
|
+
context ctx;
|
|
26
|
+
ctx.is_get_stats = true;
|
|
27
|
+
jinja::global_from_json(ctx, json{
|
|
28
|
+
{"messages", messages_fn()},
|
|
29
|
+
{"tools", tools_fn()},
|
|
30
|
+
{"bos_token", ""},
|
|
31
|
+
{"eos_token", ""},
|
|
32
|
+
{"add_generation_prompt", true}
|
|
33
|
+
}, true);
|
|
34
|
+
|
|
35
|
+
auto messages = ctx.get_val("messages");
|
|
36
|
+
auto tools = ctx.get_val("tools");
|
|
37
|
+
|
|
38
|
+
bool success = false;
|
|
39
|
+
try {
|
|
40
|
+
jinja::runtime runtime(ctx);
|
|
41
|
+
runtime.execute(prog);
|
|
42
|
+
success = true;
|
|
43
|
+
} catch (const std::exception & e) {
|
|
44
|
+
JJ_DEBUG("Exception during execution: %s", e.what());
|
|
45
|
+
// ignore exceptions during capability analysis
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
analyze_fn(success, messages, tools);
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
// for debugging only
|
|
52
|
+
static void caps_print_stats(value & v, const std::string & path) {
|
|
53
|
+
std::string ops;
|
|
54
|
+
for (const auto & name : v->stats.ops) {
|
|
55
|
+
ops += name + " ";
|
|
56
|
+
}
|
|
57
|
+
JJ_DEBUG("Value %s, type: %s %s, ops: %s",
|
|
58
|
+
path.c_str(),
|
|
59
|
+
v->type().c_str(),
|
|
60
|
+
v->stats.used ? "(used)" : "",
|
|
61
|
+
ops.c_str());
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
std::string caps::to_string() const {
|
|
65
|
+
std::ostringstream ss;
|
|
66
|
+
ss << "Caps(\n";
|
|
67
|
+
ss << " requires_typed_content=" << requires_typed_content << "\n";
|
|
68
|
+
ss << " supports_tools=" << supports_tools << "\n";
|
|
69
|
+
ss << " supports_tool_calls=" << supports_tool_calls << "\n";
|
|
70
|
+
ss << " supports_parallel_tool_calls=" << supports_parallel_tool_calls << "\n";
|
|
71
|
+
ss << " supports_system_role=" << supports_system_role << "\n";
|
|
72
|
+
ss << ")";
|
|
73
|
+
return ss.str();
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
caps caps_get(jinja::program & prog) {
|
|
77
|
+
caps result;
|
|
78
|
+
|
|
79
|
+
static const auto has_op = [](value & v, const std::string & op_name) {
|
|
80
|
+
return v->stats.ops.find(op_name) != v->stats.ops.end();
|
|
81
|
+
};
|
|
82
|
+
|
|
83
|
+
// case: typed content requirement
|
|
84
|
+
caps_try_execute(
|
|
85
|
+
prog,
|
|
86
|
+
[&]() {
|
|
87
|
+
// messages
|
|
88
|
+
return json::array({
|
|
89
|
+
{
|
|
90
|
+
{"role", "user"},
|
|
91
|
+
{"content", "content"}
|
|
92
|
+
}
|
|
93
|
+
});
|
|
94
|
+
},
|
|
95
|
+
[&]() {
|
|
96
|
+
// tools
|
|
97
|
+
return json{nullptr};
|
|
98
|
+
},
|
|
99
|
+
[&](bool, value & messages, value &) {
|
|
100
|
+
auto & content = messages->at(0)->at("content");
|
|
101
|
+
caps_print_stats(content, "messages[0].content");
|
|
102
|
+
if (has_op(content, "selectattr") || has_op(content, "array_access")) {
|
|
103
|
+
// accessed as an array
|
|
104
|
+
result.requires_typed_content = true;
|
|
105
|
+
}
|
|
106
|
+
}
|
|
107
|
+
);
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
// case: system prompt support
|
|
111
|
+
caps_try_execute(
|
|
112
|
+
prog,
|
|
113
|
+
[&]() {
|
|
114
|
+
// messages
|
|
115
|
+
return json::array({
|
|
116
|
+
{
|
|
117
|
+
{"role", "system"},
|
|
118
|
+
{"content", "System message"}
|
|
119
|
+
},
|
|
120
|
+
{
|
|
121
|
+
{"role", "user"},
|
|
122
|
+
{"content", "User message"}
|
|
123
|
+
},
|
|
124
|
+
});
|
|
125
|
+
},
|
|
126
|
+
[&]() {
|
|
127
|
+
// tools
|
|
128
|
+
return json::array();
|
|
129
|
+
},
|
|
130
|
+
[&](bool, value & messages, value &) {
|
|
131
|
+
auto & content = messages->at(0)->at("content");
|
|
132
|
+
caps_print_stats(content, "messages[0].content");
|
|
133
|
+
if (!content->stats.used) {
|
|
134
|
+
result.supports_system_role = false;
|
|
135
|
+
}
|
|
136
|
+
}
|
|
137
|
+
);
|
|
138
|
+
|
|
139
|
+
// case: tools support
|
|
140
|
+
caps_try_execute(
|
|
141
|
+
prog,
|
|
142
|
+
[&]() {
|
|
143
|
+
// messages
|
|
144
|
+
return json::array({
|
|
145
|
+
{
|
|
146
|
+
{"role", "user"},
|
|
147
|
+
{"content", "User message"},
|
|
148
|
+
},
|
|
149
|
+
{
|
|
150
|
+
{"role", "assistant"},
|
|
151
|
+
{"content", "Assistant message"},
|
|
152
|
+
{"tool_calls", json::array({
|
|
153
|
+
{
|
|
154
|
+
{"id", "call1"},
|
|
155
|
+
{"type", "function"},
|
|
156
|
+
{"function", {
|
|
157
|
+
{"name", "tool1"},
|
|
158
|
+
{"arguments", {
|
|
159
|
+
{"arg", "value"}
|
|
160
|
+
}}
|
|
161
|
+
}}
|
|
162
|
+
},
|
|
163
|
+
{
|
|
164
|
+
{"id", "call2"},
|
|
165
|
+
{"type", "function"},
|
|
166
|
+
{"function", {
|
|
167
|
+
{"name", "tool2"},
|
|
168
|
+
{"arguments", {
|
|
169
|
+
{"arg", "value"}
|
|
170
|
+
}}
|
|
171
|
+
}}
|
|
172
|
+
}
|
|
173
|
+
})}
|
|
174
|
+
},
|
|
175
|
+
{
|
|
176
|
+
{"role", "user"},
|
|
177
|
+
{"content", "User message"},
|
|
178
|
+
},
|
|
179
|
+
});
|
|
180
|
+
},
|
|
181
|
+
[&]() {
|
|
182
|
+
// tools
|
|
183
|
+
return json::array({
|
|
184
|
+
{
|
|
185
|
+
{"name", "tool"},
|
|
186
|
+
{"type", "function"},
|
|
187
|
+
{"function", {
|
|
188
|
+
{"name", "tool"},
|
|
189
|
+
{"description", "Tool description"},
|
|
190
|
+
{"parameters", {
|
|
191
|
+
{"type", "object"},
|
|
192
|
+
{"properties", {
|
|
193
|
+
{"arg", {
|
|
194
|
+
{"type", "string"},
|
|
195
|
+
{"description", "Arg description"},
|
|
196
|
+
}},
|
|
197
|
+
}},
|
|
198
|
+
{"required", json::array({ "arg" })},
|
|
199
|
+
}},
|
|
200
|
+
}},
|
|
201
|
+
},
|
|
202
|
+
});
|
|
203
|
+
},
|
|
204
|
+
[&](bool success, value & messages, value & tools) {
|
|
205
|
+
if (!success) {
|
|
206
|
+
result.supports_tool_calls = false;
|
|
207
|
+
result.supports_tools = false;
|
|
208
|
+
return;
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
auto & tool_name = tools->at(0)->at("function")->at("name");
|
|
212
|
+
caps_print_stats(tool_name, "tools[0].function.name");
|
|
213
|
+
if (!tool_name->stats.used) {
|
|
214
|
+
result.supports_tools = false;
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
auto & tool_calls = messages->at(1)->at("tool_calls");;
|
|
218
|
+
caps_print_stats(tool_calls, "messages[1].tool_calls");
|
|
219
|
+
if (!tool_calls->stats.used) {
|
|
220
|
+
result.supports_tool_calls = false;
|
|
221
|
+
}
|
|
222
|
+
|
|
223
|
+
// check for second tool call usage
|
|
224
|
+
auto & tool_call_1 = tool_calls->at(1)->at("function");
|
|
225
|
+
caps_print_stats(tool_call_1, "messages[1].tool_calls[1].function");
|
|
226
|
+
if (!tool_call_1->stats.used) {
|
|
227
|
+
result.supports_parallel_tool_calls = false;
|
|
228
|
+
}
|
|
229
|
+
}
|
|
230
|
+
);
|
|
231
|
+
|
|
232
|
+
JJ_DEBUG("%s\n", result.to_string().c_str());
|
|
233
|
+
|
|
234
|
+
return result;
|
|
235
|
+
}
|
|
236
|
+
|
|
237
|
+
} // namespace jinja
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
#pragma once
|
|
2
|
+
|
|
3
|
+
#include "runtime.h"
|
|
4
|
+
|
|
5
|
+
#include <string>
|
|
6
|
+
|
|
7
|
+
namespace jinja {
|
|
8
|
+
|
|
9
|
+
struct caps {
|
|
10
|
+
bool supports_tools = true;
|
|
11
|
+
bool supports_tool_calls = true;
|
|
12
|
+
bool supports_system_role = true;
|
|
13
|
+
bool supports_parallel_tool_calls = true;
|
|
14
|
+
|
|
15
|
+
bool requires_typed_content = false; // default: use string content
|
|
16
|
+
|
|
17
|
+
// for debugging
|
|
18
|
+
std::string to_string() const;
|
|
19
|
+
};
|
|
20
|
+
|
|
21
|
+
caps caps_get(jinja::program & prog);
|
|
22
|
+
void debug_print_caps(const caps & c);
|
|
23
|
+
|
|
24
|
+
} // namespace jinja
|