@fugood/llama.node 1.4.2 → 1.4.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. package/CMakeLists.txt +1 -1
  2. package/lib/binding.js +3 -0
  3. package/lib/binding.ts +10 -0
  4. package/lib/index.js +9 -0
  5. package/lib/index.ts +10 -0
  6. package/package.json +15 -15
  7. package/scripts/llama.cpp.patch +25 -11
  8. package/src/LlamaContext.cpp +24 -0
  9. package/src/LlamaContext.h +3 -0
  10. package/src/llama.cpp/CMakeLists.txt +21 -6
  11. package/src/llama.cpp/common/CMakeLists.txt +6 -0
  12. package/src/llama.cpp/common/arg.cpp +83 -22
  13. package/src/llama.cpp/common/chat-parser.cpp +40 -0
  14. package/src/llama.cpp/common/chat-peg-parser.cpp +110 -0
  15. package/src/llama.cpp/common/chat-peg-parser.h +105 -0
  16. package/src/llama.cpp/common/chat.cpp +40 -29
  17. package/src/llama.cpp/common/chat.h +10 -1
  18. package/src/llama.cpp/common/common.cpp +70 -7
  19. package/src/llama.cpp/common/common.h +23 -5
  20. package/src/llama.cpp/common/download.cpp +18 -8
  21. package/src/llama.cpp/common/download.h +3 -1
  22. package/src/llama.cpp/common/json-schema-to-grammar.cpp +1 -1
  23. package/src/llama.cpp/common/log.cpp +18 -27
  24. package/src/llama.cpp/common/log.h +19 -12
  25. package/src/llama.cpp/common/peg-parser.cpp +1712 -0
  26. package/src/llama.cpp/common/peg-parser.h +459 -0
  27. package/src/llama.cpp/common/unicode.cpp +64 -0
  28. package/src/llama.cpp/common/unicode.h +22 -0
  29. package/src/llama.cpp/ggml/CMakeLists.txt +52 -48
  30. package/src/llama.cpp/ggml/include/ggml-rpc.h +1 -2
  31. package/src/llama.cpp/ggml/include/ggml-zendnn.h +22 -0
  32. package/src/llama.cpp/ggml/include/ggml.h +29 -2
  33. package/src/llama.cpp/ggml/src/CMakeLists.txt +1 -4
  34. package/src/llama.cpp/ggml/src/ggml-cpu/arch/arm/cpu-feats.cpp +4 -0
  35. package/src/llama.cpp/ggml/src/ggml-cpu/arch/arm/repack.cpp +0 -2
  36. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +10 -13
  37. package/src/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm-ppc.h +333 -0
  38. package/src/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm.cpp +51 -125
  39. package/src/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm.h +6 -0
  40. package/src/llama.cpp/ggml/src/ggml-cpu/ops.cpp +98 -12
  41. package/src/llama.cpp/src/CMakeLists.txt +1 -0
  42. package/src/llama.cpp/src/llama-arch.cpp +30 -1
  43. package/src/llama.cpp/src/llama-arch.h +3 -0
  44. package/src/llama.cpp/src/llama-graph.cpp +3 -6
  45. package/src/llama.cpp/src/llama-hparams.h +2 -2
  46. package/src/llama.cpp/src/llama-impl.h +1 -1
  47. package/src/llama.cpp/src/llama-mmap.cpp +1 -1
  48. package/src/llama.cpp/src/llama-model.cpp +54 -6
  49. package/src/llama.cpp/src/llama-quant.cpp +0 -29
  50. package/src/llama.cpp/src/llama-vocab.cpp +1 -2
  51. package/src/llama.cpp/src/models/deepseek2.cpp +18 -0
  52. package/src/llama.cpp/src/models/mistral3.cpp +160 -0
  53. package/src/llama.cpp/src/models/models.h +4 -0
  54. package/src/llama.cpp/src/unicode.cpp +2 -2
@@ -24,6 +24,7 @@
24
24
  #include "http.h"
25
25
  #endif
26
26
 
27
+ #ifndef __EMSCRIPTEN__
27
28
  #ifdef __linux__
28
29
  #include <linux/limits.h>
29
30
  #elif defined(_WIN32)
@@ -35,6 +36,8 @@
35
36
  #else
36
37
  #include <sys/syslimits.h>
37
38
  #endif
39
+ #endif
40
+
38
41
  #define LLAMA_MAX_URL_LENGTH 2084 // Maximum URL Length in Chrome: 2083
39
42
 
40
43
  // isatty
@@ -430,7 +433,7 @@ std::pair<long, std::vector<char>> common_remote_get_content(const std::string &
430
433
  curl_easy_setopt(curl.get(), CURLOPT_URL, url.c_str());
431
434
  curl_easy_setopt(curl.get(), CURLOPT_NOPROGRESS, 1L);
432
435
  curl_easy_setopt(curl.get(), CURLOPT_FOLLOWLOCATION, 1L);
433
- curl_easy_setopt(curl.get(), CURLOPT_VERBOSE, 1L);
436
+ curl_easy_setopt(curl.get(), CURLOPT_VERBOSE, 0L);
434
437
  typedef size_t(*CURLOPT_WRITEFUNCTION_PTR)(void * ptr, size_t size, size_t nmemb, void * data);
435
438
  auto write_callback = [](void * ptr, size_t size, size_t nmemb, void * data) -> size_t {
436
439
  auto data_vec = static_cast<std::vector<char> *>(data);
@@ -517,16 +520,18 @@ static bool common_pull_file(httplib::Client & cli,
517
520
  headers.emplace("Range", "bytes=" + std::to_string(existing_size) + "-");
518
521
  }
519
522
 
520
- std::atomic<size_t> downloaded{existing_size};
523
+ const char * func = __func__; // avoid __func__ inside a lambda
524
+ size_t downloaded = existing_size;
525
+ size_t progress_step = 0;
521
526
 
522
527
  auto res = cli.Get(resolve_path, headers,
523
528
  [&](const httplib::Response &response) {
524
529
  if (existing_size > 0 && response.status != 206) {
525
- LOG_WRN("%s: server did not respond with 206 Partial Content for a resume request. Status: %d\n", __func__, response.status);
530
+ LOG_WRN("%s: server did not respond with 206 Partial Content for a resume request. Status: %d\n", func, response.status);
526
531
  return false;
527
532
  }
528
533
  if (existing_size == 0 && response.status != 200) {
529
- LOG_WRN("%s: download received non-successful status code: %d\n", __func__, response.status);
534
+ LOG_WRN("%s: download received non-successful status code: %d\n", func, response.status);
530
535
  return false;
531
536
  }
532
537
  if (total_size == 0 && response.has_header("Content-Length")) {
@@ -534,7 +539,7 @@ static bool common_pull_file(httplib::Client & cli,
534
539
  size_t content_length = std::stoull(response.get_header_value("Content-Length"));
535
540
  total_size = existing_size + content_length;
536
541
  } catch (const std::exception &e) {
537
- LOG_WRN("%s: invalid Content-Length header: %s\n", __func__, e.what());
542
+ LOG_WRN("%s: invalid Content-Length header: %s\n", func, e.what());
538
543
  }
539
544
  }
540
545
  return true;
@@ -542,11 +547,16 @@ static bool common_pull_file(httplib::Client & cli,
542
547
  [&](const char *data, size_t len) {
543
548
  ofs.write(data, len);
544
549
  if (!ofs) {
545
- LOG_ERR("%s: error writing to file: %s\n", __func__, path_tmp.c_str());
550
+ LOG_ERR("%s: error writing to file: %s\n", func, path_tmp.c_str());
546
551
  return false;
547
552
  }
548
553
  downloaded += len;
549
- print_progress(downloaded, total_size);
554
+ progress_step += len;
555
+
556
+ if (progress_step >= total_size / 1000 || downloaded == total_size) {
557
+ print_progress(downloaded, total_size);
558
+ progress_step = 0;
559
+ }
550
560
  return true;
551
561
  },
552
562
  nullptr
@@ -1047,7 +1057,7 @@ std::string common_docker_resolve_model(const std::string &) {
1047
1057
  std::vector<common_cached_model_info> common_list_cached_models() {
1048
1058
  std::vector<common_cached_model_info> models;
1049
1059
  const std::string cache_dir = fs_get_cache_directory();
1050
- const std::vector<common_file_info> files = fs_list_files(cache_dir);
1060
+ const std::vector<common_file_info> files = fs_list(cache_dir, false);
1051
1061
  for (const auto & file : files) {
1052
1062
  if (string_starts_with(file.name, "manifest=") && string_ends_with(file.name, ".json")) {
1053
1063
  common_cached_model_info model_info;
@@ -14,8 +14,10 @@ struct common_cached_model_info {
14
14
  std::string model;
15
15
  std::string tag;
16
16
  size_t size = 0; // GGUF size in bytes
17
+ // return string representation like "user/model:tag"
18
+ // if tag is "latest", it will be omitted
17
19
  std::string to_string() const {
18
- return user + "/" + model + ":" + tag;
20
+ return user + "/" + model + (tag == "latest" ? "" : ":" + tag);
19
21
  }
20
22
  };
21
23
 
@@ -974,7 +974,7 @@ public:
974
974
 
975
975
  void check_errors() {
976
976
  if (!_errors.empty()) {
977
- throw std::runtime_error("JSON schema conversion failed:\n" + string_join(_errors, "\n"));
977
+ throw std::invalid_argument("JSON schema conversion failed:\n" + string_join(_errors, "\n"));
978
978
  }
979
979
  if (!_warnings.empty()) {
980
980
  fprintf(stderr, "WARNING: JSON schema conversion was incomplete: %s\n", string_join(_warnings, "; ").c_str());
@@ -1,3 +1,4 @@
1
+ #include "common.h"
1
2
  #include "log.h"
2
3
 
3
4
  #include <chrono>
@@ -26,30 +27,6 @@ void common_log_set_verbosity_thold(int verbosity) {
26
27
  common_log_verbosity_thold = verbosity;
27
28
  }
28
29
 
29
- // Auto-detect if colors should be enabled based on terminal and environment
30
- static bool common_log_should_use_colors_auto() {
31
- // Check NO_COLOR environment variable (https://no-color.org/)
32
- if (const char * no_color = std::getenv("NO_COLOR")) {
33
- if (no_color[0] != '\0') {
34
- return false;
35
- }
36
- }
37
-
38
- // Check TERM environment variable
39
- if (const char * term = std::getenv("TERM")) {
40
- if (std::strcmp(term, "dumb") == 0) {
41
- return false;
42
- }
43
- }
44
-
45
- // Check if stdout and stderr are connected to a terminal
46
- // We check both because log messages can go to either
47
- bool stdout_is_tty = isatty(fileno(stdout));
48
- bool stderr_is_tty = isatty(fileno(stderr));
49
-
50
- return stdout_is_tty || stderr_is_tty;
51
- }
52
-
53
30
  static int64_t t_us() {
54
31
  return std::chrono::duration_cast<std::chrono::microseconds>(std::chrono::system_clock::now().time_since_epoch()).count();
55
32
  }
@@ -391,7 +368,7 @@ struct common_log * common_log_main() {
391
368
  static std::once_flag init_flag;
392
369
  std::call_once(init_flag, [&]() {
393
370
  // Set default to auto-detect colors
394
- log.set_colors(common_log_should_use_colors_auto());
371
+ log.set_colors(tty_can_use_colors());
395
372
  });
396
373
 
397
374
  return &log;
@@ -422,7 +399,7 @@ void common_log_set_file(struct common_log * log, const char * file) {
422
399
 
423
400
  void common_log_set_colors(struct common_log * log, log_colors colors) {
424
401
  if (colors == LOG_COLORS_AUTO) {
425
- log->set_colors(common_log_should_use_colors_auto());
402
+ log->set_colors(tty_can_use_colors());
426
403
  return;
427
404
  }
428
405
 
@@ -443,8 +420,22 @@ void common_log_set_timestamps(struct common_log * log, bool timestamps) {
443
420
  log->set_timestamps(timestamps);
444
421
  }
445
422
 
423
+ static int common_get_verbosity(enum ggml_log_level level) {
424
+ switch (level) {
425
+ case GGML_LOG_LEVEL_DEBUG: return LOG_LEVEL_DEBUG;
426
+ case GGML_LOG_LEVEL_INFO: return LOG_LEVEL_INFO;
427
+ case GGML_LOG_LEVEL_WARN: return LOG_LEVEL_WARN;
428
+ case GGML_LOG_LEVEL_ERROR: return LOG_LEVEL_ERROR;
429
+ case GGML_LOG_LEVEL_CONT: return LOG_LEVEL_INFO; // same as INFO
430
+ case GGML_LOG_LEVEL_NONE:
431
+ default:
432
+ return LOG_LEVEL_OUTPUT;
433
+ }
434
+ }
435
+
446
436
  void common_log_default_callback(enum ggml_log_level level, const char * text, void * /*user_data*/) {
447
- if (LOG_DEFAULT_LLAMA <= common_log_verbosity_thold) {
437
+ auto verbosity = common_get_verbosity(level);
438
+ if (verbosity <= common_log_verbosity_thold) {
448
439
  common_log_add(common_log_main(), level, "%s", text);
449
440
  }
450
441
  }
@@ -21,8 +21,14 @@
21
21
  # define LOG_ATTRIBUTE_FORMAT(...) __attribute__((format(printf, __VA_ARGS__)))
22
22
  #endif
23
23
 
24
- #define LOG_DEFAULT_DEBUG 1
25
- #define LOG_DEFAULT_LLAMA 0
24
+ #define LOG_LEVEL_DEBUG 4
25
+ #define LOG_LEVEL_INFO 3
26
+ #define LOG_LEVEL_WARN 2
27
+ #define LOG_LEVEL_ERROR 1
28
+ #define LOG_LEVEL_OUTPUT 0 // output data from tools
29
+
30
+ #define LOG_DEFAULT_DEBUG LOG_LEVEL_DEBUG
31
+ #define LOG_DEFAULT_LLAMA LOG_LEVEL_INFO
26
32
 
27
33
  enum log_colors {
28
34
  LOG_COLORS_AUTO = -1,
@@ -67,10 +73,11 @@ void common_log_add(struct common_log * log, enum ggml_log_level level, const ch
67
73
  // 0.00.090.578 I llm_load_tensors: offloading 32 repeating layers to GPU
68
74
  // 0.00.090.579 I llm_load_tensors: offloading non-repeating layers to GPU
69
75
  //
70
- // I - info (stdout, V = 0)
71
- // W - warning (stderr, V = 0)
72
- // E - error (stderr, V = 0)
73
76
  // D - debug (stderr, V = LOG_DEFAULT_DEBUG)
77
+ // I - info (stdout, V = LOG_DEFAULT_INFO)
78
+ // W - warning (stderr, V = LOG_DEFAULT_WARN)
79
+ // E - error (stderr, V = LOG_DEFAULT_ERROR)
80
+ // O - output (stdout, V = LOG_DEFAULT_OUTPUT)
74
81
  //
75
82
 
76
83
  void common_log_set_file (struct common_log * log, const char * file); // not thread-safe
@@ -95,14 +102,14 @@ void common_log_set_timestamps(struct common_log * log, bool timestamps); // w
95
102
  } \
96
103
  } while (0)
97
104
 
98
- #define LOG(...) LOG_TMPL(GGML_LOG_LEVEL_NONE, 0, __VA_ARGS__)
99
- #define LOGV(verbosity, ...) LOG_TMPL(GGML_LOG_LEVEL_NONE, verbosity, __VA_ARGS__)
105
+ #define LOG(...) LOG_TMPL(GGML_LOG_LEVEL_NONE, LOG_LEVEL_OUTPUT, __VA_ARGS__)
106
+ #define LOGV(verbosity, ...) LOG_TMPL(GGML_LOG_LEVEL_NONE, verbosity, __VA_ARGS__)
100
107
 
101
- #define LOG_INF(...) LOG_TMPL(GGML_LOG_LEVEL_INFO, 0, __VA_ARGS__)
102
- #define LOG_WRN(...) LOG_TMPL(GGML_LOG_LEVEL_WARN, 0, __VA_ARGS__)
103
- #define LOG_ERR(...) LOG_TMPL(GGML_LOG_LEVEL_ERROR, 0, __VA_ARGS__)
104
- #define LOG_DBG(...) LOG_TMPL(GGML_LOG_LEVEL_DEBUG, LOG_DEFAULT_DEBUG, __VA_ARGS__)
105
- #define LOG_CNT(...) LOG_TMPL(GGML_LOG_LEVEL_CONT, 0, __VA_ARGS__)
108
+ #define LOG_DBG(...) LOG_TMPL(GGML_LOG_LEVEL_DEBUG, LOG_LEVEL_DEBUG, __VA_ARGS__)
109
+ #define LOG_INF(...) LOG_TMPL(GGML_LOG_LEVEL_INFO, LOG_LEVEL_INFO, __VA_ARGS__)
110
+ #define LOG_WRN(...) LOG_TMPL(GGML_LOG_LEVEL_WARN, LOG_LEVEL_WARN, __VA_ARGS__)
111
+ #define LOG_ERR(...) LOG_TMPL(GGML_LOG_LEVEL_ERROR, LOG_LEVEL_ERROR, __VA_ARGS__)
112
+ #define LOG_CNT(...) LOG_TMPL(GGML_LOG_LEVEL_CONT, LOG_LEVEL_INFO, __VA_ARGS__) // same as INFO
106
113
 
107
114
  #define LOG_INFV(verbosity, ...) LOG_TMPL(GGML_LOG_LEVEL_INFO, verbosity, __VA_ARGS__)
108
115
  #define LOG_WRNV(verbosity, ...) LOG_TMPL(GGML_LOG_LEVEL_WARN, verbosity, __VA_ARGS__)