@fugood/llama.node 1.4.14 → 1.5.0-rc.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (66) hide show
  1. package/lib/binding.ts +13 -6
  2. package/lib/index.js +2 -2
  3. package/lib/index.ts +8 -3
  4. package/package.json +15 -15
  5. package/scripts/llama.cpp.patch +77 -65
  6. package/src/LlamaContext.cpp +31 -34
  7. package/src/llama.cpp/CMakeLists.txt +24 -8
  8. package/src/llama.cpp/common/CMakeLists.txt +15 -34
  9. package/src/llama.cpp/common/arg.cpp +59 -10
  10. package/src/llama.cpp/common/chat-parser.cpp +115 -0
  11. package/src/llama.cpp/common/chat.cpp +356 -34
  12. package/src/llama.cpp/common/chat.h +17 -13
  13. package/src/llama.cpp/common/common.cpp +0 -1
  14. package/src/llama.cpp/common/common.h +30 -25
  15. package/src/llama.cpp/common/debug.cpp +165 -0
  16. package/src/llama.cpp/common/debug.h +43 -0
  17. package/src/llama.cpp/common/download.cpp +12 -342
  18. package/src/llama.cpp/common/download.h +6 -0
  19. package/src/llama.cpp/common/jinja/caps.cpp +237 -0
  20. package/src/llama.cpp/common/jinja/caps.h +24 -0
  21. package/src/llama.cpp/common/jinja/lexer.cpp +341 -0
  22. package/src/llama.cpp/common/jinja/lexer.h +157 -0
  23. package/src/llama.cpp/common/jinja/parser.cpp +591 -0
  24. package/src/llama.cpp/common/jinja/parser.h +21 -0
  25. package/src/llama.cpp/common/jinja/runtime.cpp +865 -0
  26. package/src/llama.cpp/common/jinja/runtime.h +628 -0
  27. package/src/llama.cpp/common/jinja/string.cpp +207 -0
  28. package/src/llama.cpp/common/jinja/string.h +58 -0
  29. package/src/llama.cpp/common/jinja/utils.h +49 -0
  30. package/src/llama.cpp/common/jinja/value.cpp +1221 -0
  31. package/src/llama.cpp/common/jinja/value.h +464 -0
  32. package/src/llama.cpp/common/preset.cpp +12 -2
  33. package/src/llama.cpp/common/sampling.cpp +52 -19
  34. package/src/llama.cpp/ggml/include/ggml.h +39 -7
  35. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +4 -0
  36. package/src/llama.cpp/ggml/src/ggml-cpu/ops.cpp +63 -37
  37. package/src/llama.cpp/ggml/src/ggml-cpu/simd-mappings.h +31 -0
  38. package/src/llama.cpp/ggml/src/ggml-cpu/vec.cpp +18 -0
  39. package/src/llama.cpp/include/llama-cpp.h +3 -1
  40. package/src/llama.cpp/include/llama.h +29 -2
  41. package/src/llama.cpp/src/CMakeLists.txt +1 -0
  42. package/src/llama.cpp/src/llama-adapter.cpp +7 -13
  43. package/src/llama.cpp/src/llama-adapter.h +1 -3
  44. package/src/llama.cpp/src/llama-arch.cpp +35 -0
  45. package/src/llama.cpp/src/llama-arch.h +1 -0
  46. package/src/llama.cpp/src/llama-chat.cpp +20 -0
  47. package/src/llama.cpp/src/llama-chat.h +1 -0
  48. package/src/llama.cpp/src/llama-context.cpp +232 -144
  49. package/src/llama.cpp/src/llama-context.h +10 -0
  50. package/src/llama.cpp/src/llama-cparams.h +2 -0
  51. package/src/llama.cpp/src/llama-graph.cpp +31 -43
  52. package/src/llama.cpp/src/llama-hparams.cpp +0 -36
  53. package/src/llama.cpp/src/llama-hparams.h +38 -1
  54. package/src/llama.cpp/src/llama-kv-cache.cpp +201 -59
  55. package/src/llama.cpp/src/llama-kv-cache.h +0 -2
  56. package/src/llama.cpp/src/llama-mmap.cpp +13 -6
  57. package/src/llama.cpp/src/llama-model-loader.cpp +21 -7
  58. package/src/llama.cpp/src/llama-model.cpp +215 -97
  59. package/src/llama.cpp/src/llama-model.h +3 -2
  60. package/src/llama.cpp/src/llama-sampling.cpp +170 -13
  61. package/src/llama.cpp/src/llama-vocab.cpp +37 -24
  62. package/src/llama.cpp/src/llama-vocab.h +1 -0
  63. package/src/llama.cpp/src/models/exaone-moe.cpp +146 -0
  64. package/src/llama.cpp/src/models/gemma3n-iswa.cpp +13 -3
  65. package/src/llama.cpp/src/models/models.h +13 -2
  66. package/src/llama.cpp/src/models/qwen3next.cpp +198 -182
@@ -19,10 +19,7 @@
19
19
  #include <thread>
20
20
  #include <vector>
21
21
 
22
- #if defined(LLAMA_USE_CURL)
23
- #include <curl/curl.h>
24
- #include <curl/easy.h>
25
- #elif defined(LLAMA_USE_HTTPLIB)
22
+ #if defined(LLAMA_USE_HTTPLIB)
26
23
  #include "http.h"
27
24
  #endif
28
25
 
@@ -161,336 +158,17 @@ static bool is_http_status_ok(int status) {
161
158
  return status >= 200 && status < 400;
162
159
  }
163
160
 
164
- #ifdef LLAMA_USE_CURL
165
-
166
- //
167
- // CURL utils
168
- //
169
-
170
- using curl_ptr = std::unique_ptr<CURL, decltype(&curl_easy_cleanup)>;
171
-
172
- // cannot use unique_ptr for curl_slist, because we cannot update without destroying the old one
173
- struct curl_slist_ptr {
174
- struct curl_slist * ptr = nullptr;
175
- ~curl_slist_ptr() {
176
- if (ptr) {
177
- curl_slist_free_all(ptr);
178
- }
179
- }
180
- };
181
-
182
- static CURLcode common_curl_perf(CURL * curl) {
183
- CURLcode res = curl_easy_perform(curl);
184
- if (res != CURLE_OK) {
185
- LOG_ERR("%s: curl_easy_perform() failed\n", __func__);
186
- }
187
-
188
- return res;
189
- }
190
-
191
- // Send a HEAD request to retrieve the etag and last-modified headers
192
- struct common_load_model_from_url_headers {
193
- std::string etag;
194
- std::string last_modified;
195
- std::string accept_ranges;
196
- };
197
-
198
- struct FILE_deleter {
199
- void operator()(FILE * f) const { fclose(f); }
200
- };
201
-
202
- static size_t common_header_callback(char * buffer, size_t, size_t n_items, void * userdata) {
203
- common_load_model_from_url_headers * headers = (common_load_model_from_url_headers *) userdata;
204
- static std::regex header_regex("([^:]+): (.*)\r\n");
205
- static std::regex etag_regex("ETag", std::regex_constants::icase);
206
- static std::regex last_modified_regex("Last-Modified", std::regex_constants::icase);
207
- static std::regex accept_ranges_regex("Accept-Ranges", std::regex_constants::icase);
208
- std::string header(buffer, n_items);
209
- std::smatch match;
210
- if (std::regex_match(header, match, header_regex)) {
211
- const std::string & key = match[1];
212
- const std::string & value = match[2];
213
- if (std::regex_match(key, match, etag_regex)) {
214
- headers->etag = value;
215
- } else if (std::regex_match(key, match, last_modified_regex)) {
216
- headers->last_modified = value;
217
- } else if (std::regex_match(key, match, accept_ranges_regex)) {
218
- headers->accept_ranges = value;
219
- }
220
- }
221
-
222
- return n_items;
223
- }
224
-
225
- static size_t common_write_callback(void * data, size_t size, size_t nmemb, void * fd) {
226
- return std::fwrite(data, size, nmemb, static_cast<FILE *>(fd));
227
- }
228
-
229
- // helper function to hide password in URL
230
- static std::string llama_download_hide_password_in_url(const std::string & url) {
231
- // Use regex to match and replace the user[:password]@ pattern in URLs
232
- // Pattern: scheme://[user[:password]@]host[...]
233
- static const std::regex url_regex(R"(^(?:[A-Za-z][A-Za-z0-9+.-]://)(?:[^/@]+@)?.$)");
234
- std::smatch match;
235
-
236
- if (std::regex_match(url, match, url_regex)) {
237
- // match[1] = scheme (e.g., "https://")
238
- // match[2] = user[:password]@ part
239
- // match[3] = rest of URL (host and path)
240
- return match[1].str() + "********@" + match[3].str();
241
- }
242
-
243
- return url; // No credentials found or malformed URL
244
- }
245
-
246
- static void common_curl_easy_setopt_head(CURL * curl, const std::string & url) {
247
- // Set the URL, allow to follow http redirection
248
- curl_easy_setopt(curl, CURLOPT_URL, url.c_str());
249
- curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1L);
250
-
251
- # if defined(_WIN32)
252
- // CURLSSLOPT_NATIVE_CA tells libcurl to use standard certificate store of
253
- // operating system. Currently implemented under MS-Windows.
254
- curl_easy_setopt(curl, CURLOPT_SSL_OPTIONS, CURLSSLOPT_NATIVE_CA);
255
- # endif
256
-
257
- curl_easy_setopt(curl, CURLOPT_NOBODY, 1L); // will trigger the HEAD verb
258
- curl_easy_setopt(curl, CURLOPT_NOPROGRESS, 1L); // hide head request progress
259
- curl_easy_setopt(curl, CURLOPT_HEADERFUNCTION, common_header_callback);
260
- }
261
-
262
- static void common_curl_easy_setopt_get(CURL * curl) {
263
- curl_easy_setopt(curl, CURLOPT_NOBODY, 0L);
264
- curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, common_write_callback);
265
-
266
- // display download progress
267
- curl_easy_setopt(curl, CURLOPT_NOPROGRESS, 0L);
268
- }
269
-
270
- static bool common_pull_file(CURL * curl, const std::string & path_temporary) {
271
- if (std::filesystem::exists(path_temporary)) {
272
- const std::string partial_size = std::to_string(std::filesystem::file_size(path_temporary));
273
- LOG_INF("%s: server supports range requests, resuming download from byte %s\n", __func__, partial_size.c_str());
274
- const std::string range_str = partial_size + "-";
275
- curl_easy_setopt(curl, CURLOPT_RANGE, range_str.c_str());
276
- }
277
-
278
- // Always open file in append mode could be resuming
279
- std::unique_ptr<FILE, FILE_deleter> outfile(fopen(path_temporary.c_str(), "ab"));
280
- if (!outfile) {
281
- LOG_ERR("%s: error opening local file for writing: %s\n", __func__, path_temporary.c_str());
282
- return false;
283
- }
284
-
285
- common_curl_easy_setopt_get(curl);
286
- curl_easy_setopt(curl, CURLOPT_WRITEDATA, outfile.get());
287
-
288
- return common_curl_perf(curl) == CURLE_OK;
289
- }
290
-
291
- static bool common_download_head(CURL * curl,
292
- curl_slist_ptr & http_headers,
293
- const std::string & url,
294
- const std::string & bearer_token) {
295
- if (!curl) {
296
- LOG_ERR("%s: error initializing libcurl\n", __func__);
297
- return false;
298
- }
299
-
300
- http_headers.ptr = curl_slist_append(http_headers.ptr, "User-Agent: llama-cpp");
301
- // Check if hf-token or bearer-token was specified
302
- if (!bearer_token.empty()) {
303
- std::string auth_header = "Authorization: Bearer " + bearer_token;
304
- http_headers.ptr = curl_slist_append(http_headers.ptr, auth_header.c_str());
305
- }
306
-
307
- curl_easy_setopt(curl, CURLOPT_HTTPHEADER, http_headers.ptr);
308
- common_curl_easy_setopt_head(curl, url);
309
- return common_curl_perf(curl) == CURLE_OK;
310
- }
311
-
312
- // download one single file from remote URL to local path
313
- // returns status code or -1 on error
314
- static int common_download_file_single_online(const std::string & url,
315
- const std::string & path,
316
- const std::string & bearer_token,
317
- const common_header_list & custom_headers) {
318
- static const int max_attempts = 3;
319
- static const int retry_delay_seconds = 2;
320
-
321
- for (int i = 0; i < max_attempts; ++i) {
322
- std::string etag;
323
-
324
- // Check if the file already exists locally
325
- const auto file_exists = std::filesystem::exists(path);
326
- if (file_exists) {
327
- etag = read_etag(path);
328
- } else {
329
- LOG_INF("%s: no previous model file found %s\n", __func__, path.c_str());
330
- }
331
-
332
- bool head_request_ok = false;
333
- bool should_download = !file_exists; // by default, we should download if the file does not exist
334
-
335
- // Initialize libcurl
336
- curl_ptr curl(curl_easy_init(), &curl_easy_cleanup);
337
- common_load_model_from_url_headers headers;
338
- curl_easy_setopt(curl.get(), CURLOPT_HEADERDATA, &headers);
339
- curl_slist_ptr http_headers;
340
-
341
- for (const auto & h : custom_headers) {
342
- std::string s = h.first + ": " + h.second;
343
- http_headers.ptr = curl_slist_append(http_headers.ptr, s.c_str());
344
- }
345
- const bool was_perform_successful = common_download_head(curl.get(), http_headers, url, bearer_token);
346
- if (!was_perform_successful) {
347
- head_request_ok = false;
348
- }
349
-
350
- long http_code = 0;
351
- curl_easy_getinfo(curl.get(), CURLINFO_RESPONSE_CODE, &http_code);
352
- if (http_code == 200) {
353
- head_request_ok = true;
354
- } else {
355
- LOG_WRN("%s: HEAD invalid http status code received: %ld\n", __func__, http_code);
356
- head_request_ok = false;
357
- }
358
-
359
- // if head_request_ok is false, we don't have the etag or last-modified headers
360
- // we leave should_download as-is, which is true if the file does not exist
361
- bool should_download_from_scratch = false;
362
- if (head_request_ok) {
363
- // check if ETag or Last-Modified headers are different
364
- // if it is, we need to download the file again
365
- if (!etag.empty() && etag != headers.etag) {
366
- LOG_WRN("%s: ETag header is different (%s != %s): triggering a new download\n", __func__, etag.c_str(),
367
- headers.etag.c_str());
368
- should_download = true;
369
- should_download_from_scratch = true;
370
- }
371
- }
372
-
373
- const bool accept_ranges_supported = !headers.accept_ranges.empty() && headers.accept_ranges != "none";
374
- if (should_download) {
375
- if (file_exists &&
376
- !accept_ranges_supported) { // Resumable downloads not supported, delete and start again.
377
- LOG_WRN("%s: deleting previous downloaded file: %s\n", __func__, path.c_str());
378
- if (remove(path.c_str()) != 0) {
379
- LOG_ERR("%s: unable to delete file: %s\n", __func__, path.c_str());
380
- return -1;
381
- }
382
- }
383
-
384
- const std::string path_temporary = path + ".downloadInProgress";
385
- if (should_download_from_scratch) {
386
- if (std::filesystem::exists(path_temporary)) {
387
- if (remove(path_temporary.c_str()) != 0) {
388
- LOG_ERR("%s: unable to delete file: %s\n", __func__, path_temporary.c_str());
389
- return -1;
390
- }
391
- }
392
-
393
- if (std::filesystem::exists(path)) {
394
- if (remove(path.c_str()) != 0) {
395
- LOG_ERR("%s: unable to delete file: %s\n", __func__, path.c_str());
396
- return -1;
397
- }
398
- }
399
- }
400
- if (head_request_ok) {
401
- write_etag(path, headers.etag);
402
- }
403
-
404
- // start the download
405
- LOG_INF("%s: trying to download model from %s to %s (server_etag:%s, server_last_modified:%s)...\n",
406
- __func__, llama_download_hide_password_in_url(url).c_str(), path_temporary.c_str(),
407
- headers.etag.c_str(), headers.last_modified.c_str());
408
- const bool was_pull_successful = common_pull_file(curl.get(), path_temporary);
409
- if (!was_pull_successful) {
410
- if (i + 1 < max_attempts) {
411
- const int exponential_backoff_delay = std::pow(retry_delay_seconds, i) * 1000;
412
- LOG_WRN("%s: retrying after %d milliseconds...\n", __func__, exponential_backoff_delay);
413
- std::this_thread::sleep_for(std::chrono::milliseconds(exponential_backoff_delay));
414
- } else {
415
- LOG_ERR("%s: curl_easy_perform() failed after %d attempts\n", __func__, max_attempts);
416
- }
417
-
418
- continue;
419
- }
420
-
421
- long http_code = 0;
422
- curl_easy_getinfo(curl.get(), CURLINFO_RESPONSE_CODE, &http_code);
423
-
424
- int status = static_cast<int>(http_code);
425
- if (!is_http_status_ok(http_code)) {
426
- LOG_ERR("%s: invalid http status code received: %ld\n", __func__, http_code);
427
- return status; // TODO: maybe only return on certain codes
428
- }
429
-
430
- if (rename(path_temporary.c_str(), path.c_str()) != 0) {
431
- LOG_ERR("%s: unable to rename file: %s to %s\n", __func__, path_temporary.c_str(), path.c_str());
432
- return -1;
433
- }
434
-
435
- return static_cast<int>(http_code);
436
- } else {
437
- LOG_INF("%s: using cached file: %s\n", __func__, path.c_str());
438
-
439
- return 304; // Not Modified - fake cached response
440
- }
441
- }
442
-
443
- return -1; // max attempts reached
444
- }
445
-
446
- std::pair<long, std::vector<char>> common_remote_get_content(const std::string & url, const common_remote_params & params) {
447
- curl_ptr curl(curl_easy_init(), &curl_easy_cleanup);
448
- curl_slist_ptr http_headers;
449
- std::vector<char> res_buffer;
450
-
451
- curl_easy_setopt(curl.get(), CURLOPT_URL, url.c_str());
452
- curl_easy_setopt(curl.get(), CURLOPT_NOPROGRESS, 1L);
453
- curl_easy_setopt(curl.get(), CURLOPT_FOLLOWLOCATION, 1L);
454
- curl_easy_setopt(curl.get(), CURLOPT_VERBOSE, 0L);
455
- typedef size_t(*CURLOPT_WRITEFUNCTION_PTR)(void * ptr, size_t size, size_t nmemb, void * data);
456
- auto write_callback = [](void * ptr, size_t size, size_t nmemb, void * data) -> size_t {
457
- auto data_vec = static_cast<std::vector<char> *>(data);
458
- data_vec->insert(data_vec->end(), (char *)ptr, (char *)ptr + size * nmemb);
459
- return size * nmemb;
460
- };
461
- curl_easy_setopt(curl.get(), CURLOPT_WRITEFUNCTION, static_cast<CURLOPT_WRITEFUNCTION_PTR>(write_callback));
462
- curl_easy_setopt(curl.get(), CURLOPT_WRITEDATA, &res_buffer);
463
- #if defined(_WIN32)
464
- curl_easy_setopt(curl.get(), CURLOPT_SSL_OPTIONS, CURLSSLOPT_NATIVE_CA);
465
- #endif
466
- if (params.timeout > 0) {
467
- curl_easy_setopt(curl.get(), CURLOPT_TIMEOUT, params.timeout);
468
- }
469
- if (params.max_size > 0) {
470
- curl_easy_setopt(curl.get(), CURLOPT_MAXFILESIZE, params.max_size);
471
- }
472
- http_headers.ptr = curl_slist_append(http_headers.ptr, "User-Agent: llama-cpp");
473
-
474
- for (const auto & header : params.headers) {
475
- std::string header_ = header.first + ": " + header.second;
476
- http_headers.ptr = curl_slist_append(http_headers.ptr, header_.c_str());
477
- }
478
- curl_easy_setopt(curl.get(), CURLOPT_HTTPHEADER, http_headers.ptr);
479
-
480
- CURLcode res = curl_easy_perform(curl.get());
481
-
482
- if (res != CURLE_OK) {
483
- std::string error_msg = curl_easy_strerror(res);
484
- throw std::runtime_error("error: cannot make GET request: " + error_msg);
161
+ std::pair<std::string, std::string> common_download_split_repo_tag(const std::string & hf_repo_with_tag) {
162
+ auto parts = string_split<std::string>(hf_repo_with_tag, ':');
163
+ std::string tag = parts.size() > 1 ? parts.back() : "latest";
164
+ std::string hf_repo = parts[0];
165
+ if (string_split<std::string>(hf_repo, '/').size() != 2) {
166
+ throw std::invalid_argument("error: invalid HF repo format, expected <user>/<model>[:quant]\n");
485
167
  }
486
-
487
- long res_code;
488
- curl_easy_getinfo(curl.get(), CURLINFO_RESPONSE_CODE, &res_code);
489
-
490
- return { res_code, std::move(res_buffer) };
168
+ return {hf_repo, tag};
491
169
  }
492
170
 
493
- #elif defined(LLAMA_USE_HTTPLIB)
171
+ #if defined(LLAMA_USE_HTTPLIB)
494
172
 
495
173
  class ProgressBar {
496
174
  static inline std::mutex mutex;
@@ -787,10 +465,6 @@ std::pair<long, std::vector<char>> common_remote_get_content(const std::string
787
465
  return { res->status, std::move(buf) };
788
466
  }
789
467
 
790
- #endif // LLAMA_USE_CURL
791
-
792
- #if defined(LLAMA_USE_CURL) || defined(LLAMA_USE_HTTPLIB)
793
-
794
468
  int common_download_file_single(const std::string & url,
795
469
  const std::string & path,
796
470
  const std::string & bearer_token,
@@ -922,12 +596,8 @@ common_hf_file_res common_get_hf_file(const std::string & hf_repo_with_tag,
922
596
  const std::string & bearer_token,
923
597
  bool offline,
924
598
  const common_header_list & custom_headers) {
925
- auto parts = string_split<std::string>(hf_repo_with_tag, ':');
926
- std::string tag = parts.size() > 1 ? parts.back() : "latest";
927
- std::string hf_repo = parts[0];
928
- if (string_split<std::string>(hf_repo, '/').size() != 2) {
929
- throw std::invalid_argument("error: invalid HF repo format, expected <user>/<model>[:quant]\n");
930
- }
599
+ // the returned hf_repo is without tag
600
+ auto [hf_repo, tag] = common_download_split_repo_tag(hf_repo_with_tag);
931
601
 
932
602
  std::string url = get_model_endpoint() + "v2/" + hf_repo + "/manifests/" + tag;
933
603
 
@@ -1145,7 +815,7 @@ int common_download_file_single(const std::string &,
1145
815
  throw std::runtime_error("download functionality is not enabled in this build");
1146
816
  }
1147
817
 
1148
- #endif // LLAMA_USE_CURL || LLAMA_USE_HTTPLIB
818
+ #endif // defined(LLAMA_USE_HTTPLIB)
1149
819
 
1150
820
  std::vector<common_cached_model_info> common_list_cached_models() {
1151
821
  std::vector<common_cached_model_info> models;
@@ -17,6 +17,12 @@ struct common_remote_params {
17
17
  // get remote file content, returns <http_code, raw_response_body>
18
18
  std::pair<long, std::vector<char>> common_remote_get_content(const std::string & url, const common_remote_params & params);
19
19
 
20
+ // split HF repo with tag into <repo, tag>
21
+ // for example: "user/model:tag" -> <"user/model", "tag">
22
+ // if tag is not present, default to "latest"
23
+ // example: "user/model" -> <"user/model", "latest">
24
+ std::pair<std::string, std::string> common_download_split_repo_tag(const std::string & hf_repo_with_tag);
25
+
20
26
  struct common_cached_model_info {
21
27
  std::string manifest_path;
22
28
  std::string user;
@@ -0,0 +1,237 @@
1
+ #include "value.h"
2
+ #include "runtime.h"
3
+ #include "caps.h"
4
+
5
+ // note: the json dependency is only for defining input in a convenient way
6
+ // we can remove it in the future when we figure out a better way to define inputs using jinja::value
7
+ #include <nlohmann/json.hpp>
8
+
9
+ #include <functional>
10
+ #include <sstream>
11
+
12
+ #define FILENAME "jinja-caps"
13
+
14
+ using json = nlohmann::ordered_json;
15
+
16
+ namespace jinja {
17
+
18
+ using caps_json_fn = std::function<json()>;
19
+ using caps_analyze_fn = std::function<void(bool, value &, value &)>;
20
+
21
+ static void caps_try_execute(jinja::program & prog,
22
+ const caps_json_fn & messages_fn,
23
+ const caps_json_fn & tools_fn,
24
+ const caps_analyze_fn & analyze_fn) {
25
+ context ctx;
26
+ ctx.is_get_stats = true;
27
+ jinja::global_from_json(ctx, json{
28
+ {"messages", messages_fn()},
29
+ {"tools", tools_fn()},
30
+ {"bos_token", ""},
31
+ {"eos_token", ""},
32
+ {"add_generation_prompt", true}
33
+ }, true);
34
+
35
+ auto messages = ctx.get_val("messages");
36
+ auto tools = ctx.get_val("tools");
37
+
38
+ bool success = false;
39
+ try {
40
+ jinja::runtime runtime(ctx);
41
+ runtime.execute(prog);
42
+ success = true;
43
+ } catch (const std::exception & e) {
44
+ JJ_DEBUG("Exception during execution: %s", e.what());
45
+ // ignore exceptions during capability analysis
46
+ }
47
+
48
+ analyze_fn(success, messages, tools);
49
+ }
50
+
51
+ // for debugging only
52
+ static void caps_print_stats(value & v, const std::string & path) {
53
+ std::string ops;
54
+ for (const auto & name : v->stats.ops) {
55
+ ops += name + " ";
56
+ }
57
+ JJ_DEBUG("Value %s, type: %s %s, ops: %s",
58
+ path.c_str(),
59
+ v->type().c_str(),
60
+ v->stats.used ? "(used)" : "",
61
+ ops.c_str());
62
+ }
63
+
64
+ std::string caps::to_string() const {
65
+ std::ostringstream ss;
66
+ ss << "Caps(\n";
67
+ ss << " requires_typed_content=" << requires_typed_content << "\n";
68
+ ss << " supports_tools=" << supports_tools << "\n";
69
+ ss << " supports_tool_calls=" << supports_tool_calls << "\n";
70
+ ss << " supports_parallel_tool_calls=" << supports_parallel_tool_calls << "\n";
71
+ ss << " supports_system_role=" << supports_system_role << "\n";
72
+ ss << ")";
73
+ return ss.str();
74
+ }
75
+
76
+ caps caps_get(jinja::program & prog) {
77
+ caps result;
78
+
79
+ static const auto has_op = [](value & v, const std::string & op_name) {
80
+ return v->stats.ops.find(op_name) != v->stats.ops.end();
81
+ };
82
+
83
+ // case: typed content requirement
84
+ caps_try_execute(
85
+ prog,
86
+ [&]() {
87
+ // messages
88
+ return json::array({
89
+ {
90
+ {"role", "user"},
91
+ {"content", "content"}
92
+ }
93
+ });
94
+ },
95
+ [&]() {
96
+ // tools
97
+ return json{nullptr};
98
+ },
99
+ [&](bool, value & messages, value &) {
100
+ auto & content = messages->at(0)->at("content");
101
+ caps_print_stats(content, "messages[0].content");
102
+ if (has_op(content, "selectattr") || has_op(content, "array_access")) {
103
+ // accessed as an array
104
+ result.requires_typed_content = true;
105
+ }
106
+ }
107
+ );
108
+
109
+
110
+ // case: system prompt support
111
+ caps_try_execute(
112
+ prog,
113
+ [&]() {
114
+ // messages
115
+ return json::array({
116
+ {
117
+ {"role", "system"},
118
+ {"content", "System message"}
119
+ },
120
+ {
121
+ {"role", "user"},
122
+ {"content", "User message"}
123
+ },
124
+ });
125
+ },
126
+ [&]() {
127
+ // tools
128
+ return json::array();
129
+ },
130
+ [&](bool, value & messages, value &) {
131
+ auto & content = messages->at(0)->at("content");
132
+ caps_print_stats(content, "messages[0].content");
133
+ if (!content->stats.used) {
134
+ result.supports_system_role = false;
135
+ }
136
+ }
137
+ );
138
+
139
+ // case: tools support
140
+ caps_try_execute(
141
+ prog,
142
+ [&]() {
143
+ // messages
144
+ return json::array({
145
+ {
146
+ {"role", "user"},
147
+ {"content", "User message"},
148
+ },
149
+ {
150
+ {"role", "assistant"},
151
+ {"content", "Assistant message"},
152
+ {"tool_calls", json::array({
153
+ {
154
+ {"id", "call1"},
155
+ {"type", "function"},
156
+ {"function", {
157
+ {"name", "tool1"},
158
+ {"arguments", {
159
+ {"arg", "value"}
160
+ }}
161
+ }}
162
+ },
163
+ {
164
+ {"id", "call2"},
165
+ {"type", "function"},
166
+ {"function", {
167
+ {"name", "tool2"},
168
+ {"arguments", {
169
+ {"arg", "value"}
170
+ }}
171
+ }}
172
+ }
173
+ })}
174
+ },
175
+ {
176
+ {"role", "user"},
177
+ {"content", "User message"},
178
+ },
179
+ });
180
+ },
181
+ [&]() {
182
+ // tools
183
+ return json::array({
184
+ {
185
+ {"name", "tool"},
186
+ {"type", "function"},
187
+ {"function", {
188
+ {"name", "tool"},
189
+ {"description", "Tool description"},
190
+ {"parameters", {
191
+ {"type", "object"},
192
+ {"properties", {
193
+ {"arg", {
194
+ {"type", "string"},
195
+ {"description", "Arg description"},
196
+ }},
197
+ }},
198
+ {"required", json::array({ "arg" })},
199
+ }},
200
+ }},
201
+ },
202
+ });
203
+ },
204
+ [&](bool success, value & messages, value & tools) {
205
+ if (!success) {
206
+ result.supports_tool_calls = false;
207
+ result.supports_tools = false;
208
+ return;
209
+ }
210
+
211
+ auto & tool_name = tools->at(0)->at("function")->at("name");
212
+ caps_print_stats(tool_name, "tools[0].function.name");
213
+ if (!tool_name->stats.used) {
214
+ result.supports_tools = false;
215
+ }
216
+
217
+ auto & tool_calls = messages->at(1)->at("tool_calls");;
218
+ caps_print_stats(tool_calls, "messages[1].tool_calls");
219
+ if (!tool_calls->stats.used) {
220
+ result.supports_tool_calls = false;
221
+ }
222
+
223
+ // check for second tool call usage
224
+ auto & tool_call_1 = tool_calls->at(1)->at("function");
225
+ caps_print_stats(tool_call_1, "messages[1].tool_calls[1].function");
226
+ if (!tool_call_1->stats.used) {
227
+ result.supports_parallel_tool_calls = false;
228
+ }
229
+ }
230
+ );
231
+
232
+ JJ_DEBUG("%s\n", result.to_string().c_str());
233
+
234
+ return result;
235
+ }
236
+
237
+ } // namespace jinja
@@ -0,0 +1,24 @@
1
+ #pragma once
2
+
3
+ #include "runtime.h"
4
+
5
+ #include <string>
6
+
7
+ namespace jinja {
8
+
9
+ struct caps {
10
+ bool supports_tools = true;
11
+ bool supports_tool_calls = true;
12
+ bool supports_system_role = true;
13
+ bool supports_parallel_tool_calls = true;
14
+
15
+ bool requires_typed_content = false; // default: use string content
16
+
17
+ // for debugging
18
+ std::string to_string() const;
19
+ };
20
+
21
+ caps caps_get(jinja::program & prog);
22
+ void debug_print_caps(const caps & c);
23
+
24
+ } // namespace jinja