@fugood/llama.node 1.4.2 → 1.4.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CMakeLists.txt +1 -1
- package/lib/binding.js +3 -0
- package/lib/binding.ts +10 -0
- package/lib/index.js +9 -0
- package/lib/index.ts +10 -0
- package/package.json +15 -15
- package/scripts/llama.cpp.patch +25 -11
- package/src/LlamaContext.cpp +24 -0
- package/src/LlamaContext.h +3 -0
- package/src/llama.cpp/CMakeLists.txt +21 -6
- package/src/llama.cpp/common/CMakeLists.txt +6 -0
- package/src/llama.cpp/common/arg.cpp +83 -22
- package/src/llama.cpp/common/chat-parser.cpp +40 -0
- package/src/llama.cpp/common/chat-peg-parser.cpp +110 -0
- package/src/llama.cpp/common/chat-peg-parser.h +105 -0
- package/src/llama.cpp/common/chat.cpp +40 -29
- package/src/llama.cpp/common/chat.h +10 -1
- package/src/llama.cpp/common/common.cpp +70 -7
- package/src/llama.cpp/common/common.h +23 -5
- package/src/llama.cpp/common/download.cpp +18 -8
- package/src/llama.cpp/common/download.h +3 -1
- package/src/llama.cpp/common/json-schema-to-grammar.cpp +1 -1
- package/src/llama.cpp/common/log.cpp +18 -27
- package/src/llama.cpp/common/log.h +19 -12
- package/src/llama.cpp/common/peg-parser.cpp +1712 -0
- package/src/llama.cpp/common/peg-parser.h +459 -0
- package/src/llama.cpp/common/unicode.cpp +64 -0
- package/src/llama.cpp/common/unicode.h +22 -0
- package/src/llama.cpp/ggml/CMakeLists.txt +52 -48
- package/src/llama.cpp/ggml/include/ggml-rpc.h +1 -2
- package/src/llama.cpp/ggml/include/ggml-zendnn.h +22 -0
- package/src/llama.cpp/ggml/include/ggml.h +29 -2
- package/src/llama.cpp/ggml/src/CMakeLists.txt +1 -4
- package/src/llama.cpp/ggml/src/ggml-cpu/arch/arm/cpu-feats.cpp +4 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/arch/arm/repack.cpp +0 -2
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +10 -13
- package/src/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm-ppc.h +333 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm.cpp +51 -125
- package/src/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm.h +6 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ops.cpp +98 -12
- package/src/llama.cpp/src/CMakeLists.txt +1 -0
- package/src/llama.cpp/src/llama-arch.cpp +30 -1
- package/src/llama.cpp/src/llama-arch.h +3 -0
- package/src/llama.cpp/src/llama-graph.cpp +3 -6
- package/src/llama.cpp/src/llama-hparams.h +2 -2
- package/src/llama.cpp/src/llama-impl.h +1 -1
- package/src/llama.cpp/src/llama-mmap.cpp +1 -1
- package/src/llama.cpp/src/llama-model.cpp +54 -6
- package/src/llama.cpp/src/llama-quant.cpp +0 -29
- package/src/llama.cpp/src/llama-vocab.cpp +1 -2
- package/src/llama.cpp/src/models/deepseek2.cpp +18 -0
- package/src/llama.cpp/src/models/mistral3.cpp +160 -0
- package/src/llama.cpp/src/models/models.h +4 -0
- package/src/llama.cpp/src/unicode.cpp +2 -2
|
@@ -24,6 +24,7 @@
|
|
|
24
24
|
#include "http.h"
|
|
25
25
|
#endif
|
|
26
26
|
|
|
27
|
+
#ifndef __EMSCRIPTEN__
|
|
27
28
|
#ifdef __linux__
|
|
28
29
|
#include <linux/limits.h>
|
|
29
30
|
#elif defined(_WIN32)
|
|
@@ -35,6 +36,8 @@
|
|
|
35
36
|
#else
|
|
36
37
|
#include <sys/syslimits.h>
|
|
37
38
|
#endif
|
|
39
|
+
#endif
|
|
40
|
+
|
|
38
41
|
#define LLAMA_MAX_URL_LENGTH 2084 // Maximum URL Length in Chrome: 2083
|
|
39
42
|
|
|
40
43
|
// isatty
|
|
@@ -430,7 +433,7 @@ std::pair<long, std::vector<char>> common_remote_get_content(const std::string &
|
|
|
430
433
|
curl_easy_setopt(curl.get(), CURLOPT_URL, url.c_str());
|
|
431
434
|
curl_easy_setopt(curl.get(), CURLOPT_NOPROGRESS, 1L);
|
|
432
435
|
curl_easy_setopt(curl.get(), CURLOPT_FOLLOWLOCATION, 1L);
|
|
433
|
-
curl_easy_setopt(curl.get(), CURLOPT_VERBOSE,
|
|
436
|
+
curl_easy_setopt(curl.get(), CURLOPT_VERBOSE, 0L);
|
|
434
437
|
typedef size_t(*CURLOPT_WRITEFUNCTION_PTR)(void * ptr, size_t size, size_t nmemb, void * data);
|
|
435
438
|
auto write_callback = [](void * ptr, size_t size, size_t nmemb, void * data) -> size_t {
|
|
436
439
|
auto data_vec = static_cast<std::vector<char> *>(data);
|
|
@@ -517,16 +520,18 @@ static bool common_pull_file(httplib::Client & cli,
|
|
|
517
520
|
headers.emplace("Range", "bytes=" + std::to_string(existing_size) + "-");
|
|
518
521
|
}
|
|
519
522
|
|
|
520
|
-
|
|
523
|
+
const char * func = __func__; // avoid __func__ inside a lambda
|
|
524
|
+
size_t downloaded = existing_size;
|
|
525
|
+
size_t progress_step = 0;
|
|
521
526
|
|
|
522
527
|
auto res = cli.Get(resolve_path, headers,
|
|
523
528
|
[&](const httplib::Response &response) {
|
|
524
529
|
if (existing_size > 0 && response.status != 206) {
|
|
525
|
-
LOG_WRN("%s: server did not respond with 206 Partial Content for a resume request. Status: %d\n",
|
|
530
|
+
LOG_WRN("%s: server did not respond with 206 Partial Content for a resume request. Status: %d\n", func, response.status);
|
|
526
531
|
return false;
|
|
527
532
|
}
|
|
528
533
|
if (existing_size == 0 && response.status != 200) {
|
|
529
|
-
LOG_WRN("%s: download received non-successful status code: %d\n",
|
|
534
|
+
LOG_WRN("%s: download received non-successful status code: %d\n", func, response.status);
|
|
530
535
|
return false;
|
|
531
536
|
}
|
|
532
537
|
if (total_size == 0 && response.has_header("Content-Length")) {
|
|
@@ -534,7 +539,7 @@ static bool common_pull_file(httplib::Client & cli,
|
|
|
534
539
|
size_t content_length = std::stoull(response.get_header_value("Content-Length"));
|
|
535
540
|
total_size = existing_size + content_length;
|
|
536
541
|
} catch (const std::exception &e) {
|
|
537
|
-
LOG_WRN("%s: invalid Content-Length header: %s\n",
|
|
542
|
+
LOG_WRN("%s: invalid Content-Length header: %s\n", func, e.what());
|
|
538
543
|
}
|
|
539
544
|
}
|
|
540
545
|
return true;
|
|
@@ -542,11 +547,16 @@ static bool common_pull_file(httplib::Client & cli,
|
|
|
542
547
|
[&](const char *data, size_t len) {
|
|
543
548
|
ofs.write(data, len);
|
|
544
549
|
if (!ofs) {
|
|
545
|
-
LOG_ERR("%s: error writing to file: %s\n",
|
|
550
|
+
LOG_ERR("%s: error writing to file: %s\n", func, path_tmp.c_str());
|
|
546
551
|
return false;
|
|
547
552
|
}
|
|
548
553
|
downloaded += len;
|
|
549
|
-
|
|
554
|
+
progress_step += len;
|
|
555
|
+
|
|
556
|
+
if (progress_step >= total_size / 1000 || downloaded == total_size) {
|
|
557
|
+
print_progress(downloaded, total_size);
|
|
558
|
+
progress_step = 0;
|
|
559
|
+
}
|
|
550
560
|
return true;
|
|
551
561
|
},
|
|
552
562
|
nullptr
|
|
@@ -1047,7 +1057,7 @@ std::string common_docker_resolve_model(const std::string &) {
|
|
|
1047
1057
|
std::vector<common_cached_model_info> common_list_cached_models() {
|
|
1048
1058
|
std::vector<common_cached_model_info> models;
|
|
1049
1059
|
const std::string cache_dir = fs_get_cache_directory();
|
|
1050
|
-
const std::vector<common_file_info> files =
|
|
1060
|
+
const std::vector<common_file_info> files = fs_list(cache_dir, false);
|
|
1051
1061
|
for (const auto & file : files) {
|
|
1052
1062
|
if (string_starts_with(file.name, "manifest=") && string_ends_with(file.name, ".json")) {
|
|
1053
1063
|
common_cached_model_info model_info;
|
|
@@ -14,8 +14,10 @@ struct common_cached_model_info {
|
|
|
14
14
|
std::string model;
|
|
15
15
|
std::string tag;
|
|
16
16
|
size_t size = 0; // GGUF size in bytes
|
|
17
|
+
// return string representation like "user/model:tag"
|
|
18
|
+
// if tag is "latest", it will be omitted
|
|
17
19
|
std::string to_string() const {
|
|
18
|
-
return user + "/" + model + ":" + tag;
|
|
20
|
+
return user + "/" + model + (tag == "latest" ? "" : ":" + tag);
|
|
19
21
|
}
|
|
20
22
|
};
|
|
21
23
|
|
|
@@ -974,7 +974,7 @@ public:
|
|
|
974
974
|
|
|
975
975
|
void check_errors() {
|
|
976
976
|
if (!_errors.empty()) {
|
|
977
|
-
throw std::
|
|
977
|
+
throw std::invalid_argument("JSON schema conversion failed:\n" + string_join(_errors, "\n"));
|
|
978
978
|
}
|
|
979
979
|
if (!_warnings.empty()) {
|
|
980
980
|
fprintf(stderr, "WARNING: JSON schema conversion was incomplete: %s\n", string_join(_warnings, "; ").c_str());
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
#include "common.h"
|
|
1
2
|
#include "log.h"
|
|
2
3
|
|
|
3
4
|
#include <chrono>
|
|
@@ -26,30 +27,6 @@ void common_log_set_verbosity_thold(int verbosity) {
|
|
|
26
27
|
common_log_verbosity_thold = verbosity;
|
|
27
28
|
}
|
|
28
29
|
|
|
29
|
-
// Auto-detect if colors should be enabled based on terminal and environment
|
|
30
|
-
static bool common_log_should_use_colors_auto() {
|
|
31
|
-
// Check NO_COLOR environment variable (https://no-color.org/)
|
|
32
|
-
if (const char * no_color = std::getenv("NO_COLOR")) {
|
|
33
|
-
if (no_color[0] != '\0') {
|
|
34
|
-
return false;
|
|
35
|
-
}
|
|
36
|
-
}
|
|
37
|
-
|
|
38
|
-
// Check TERM environment variable
|
|
39
|
-
if (const char * term = std::getenv("TERM")) {
|
|
40
|
-
if (std::strcmp(term, "dumb") == 0) {
|
|
41
|
-
return false;
|
|
42
|
-
}
|
|
43
|
-
}
|
|
44
|
-
|
|
45
|
-
// Check if stdout and stderr are connected to a terminal
|
|
46
|
-
// We check both because log messages can go to either
|
|
47
|
-
bool stdout_is_tty = isatty(fileno(stdout));
|
|
48
|
-
bool stderr_is_tty = isatty(fileno(stderr));
|
|
49
|
-
|
|
50
|
-
return stdout_is_tty || stderr_is_tty;
|
|
51
|
-
}
|
|
52
|
-
|
|
53
30
|
static int64_t t_us() {
|
|
54
31
|
return std::chrono::duration_cast<std::chrono::microseconds>(std::chrono::system_clock::now().time_since_epoch()).count();
|
|
55
32
|
}
|
|
@@ -391,7 +368,7 @@ struct common_log * common_log_main() {
|
|
|
391
368
|
static std::once_flag init_flag;
|
|
392
369
|
std::call_once(init_flag, [&]() {
|
|
393
370
|
// Set default to auto-detect colors
|
|
394
|
-
log.set_colors(
|
|
371
|
+
log.set_colors(tty_can_use_colors());
|
|
395
372
|
});
|
|
396
373
|
|
|
397
374
|
return &log;
|
|
@@ -422,7 +399,7 @@ void common_log_set_file(struct common_log * log, const char * file) {
|
|
|
422
399
|
|
|
423
400
|
void common_log_set_colors(struct common_log * log, log_colors colors) {
|
|
424
401
|
if (colors == LOG_COLORS_AUTO) {
|
|
425
|
-
log->set_colors(
|
|
402
|
+
log->set_colors(tty_can_use_colors());
|
|
426
403
|
return;
|
|
427
404
|
}
|
|
428
405
|
|
|
@@ -443,8 +420,22 @@ void common_log_set_timestamps(struct common_log * log, bool timestamps) {
|
|
|
443
420
|
log->set_timestamps(timestamps);
|
|
444
421
|
}
|
|
445
422
|
|
|
423
|
+
static int common_get_verbosity(enum ggml_log_level level) {
|
|
424
|
+
switch (level) {
|
|
425
|
+
case GGML_LOG_LEVEL_DEBUG: return LOG_LEVEL_DEBUG;
|
|
426
|
+
case GGML_LOG_LEVEL_INFO: return LOG_LEVEL_INFO;
|
|
427
|
+
case GGML_LOG_LEVEL_WARN: return LOG_LEVEL_WARN;
|
|
428
|
+
case GGML_LOG_LEVEL_ERROR: return LOG_LEVEL_ERROR;
|
|
429
|
+
case GGML_LOG_LEVEL_CONT: return LOG_LEVEL_INFO; // same as INFO
|
|
430
|
+
case GGML_LOG_LEVEL_NONE:
|
|
431
|
+
default:
|
|
432
|
+
return LOG_LEVEL_OUTPUT;
|
|
433
|
+
}
|
|
434
|
+
}
|
|
435
|
+
|
|
446
436
|
void common_log_default_callback(enum ggml_log_level level, const char * text, void * /*user_data*/) {
|
|
447
|
-
|
|
437
|
+
auto verbosity = common_get_verbosity(level);
|
|
438
|
+
if (verbosity <= common_log_verbosity_thold) {
|
|
448
439
|
common_log_add(common_log_main(), level, "%s", text);
|
|
449
440
|
}
|
|
450
441
|
}
|
|
@@ -21,8 +21,14 @@
|
|
|
21
21
|
# define LOG_ATTRIBUTE_FORMAT(...) __attribute__((format(printf, __VA_ARGS__)))
|
|
22
22
|
#endif
|
|
23
23
|
|
|
24
|
-
#define
|
|
25
|
-
#define
|
|
24
|
+
#define LOG_LEVEL_DEBUG 4
|
|
25
|
+
#define LOG_LEVEL_INFO 3
|
|
26
|
+
#define LOG_LEVEL_WARN 2
|
|
27
|
+
#define LOG_LEVEL_ERROR 1
|
|
28
|
+
#define LOG_LEVEL_OUTPUT 0 // output data from tools
|
|
29
|
+
|
|
30
|
+
#define LOG_DEFAULT_DEBUG LOG_LEVEL_DEBUG
|
|
31
|
+
#define LOG_DEFAULT_LLAMA LOG_LEVEL_INFO
|
|
26
32
|
|
|
27
33
|
enum log_colors {
|
|
28
34
|
LOG_COLORS_AUTO = -1,
|
|
@@ -67,10 +73,11 @@ void common_log_add(struct common_log * log, enum ggml_log_level level, const ch
|
|
|
67
73
|
// 0.00.090.578 I llm_load_tensors: offloading 32 repeating layers to GPU
|
|
68
74
|
// 0.00.090.579 I llm_load_tensors: offloading non-repeating layers to GPU
|
|
69
75
|
//
|
|
70
|
-
// I - info (stdout, V = 0)
|
|
71
|
-
// W - warning (stderr, V = 0)
|
|
72
|
-
// E - error (stderr, V = 0)
|
|
73
76
|
// D - debug (stderr, V = LOG_DEFAULT_DEBUG)
|
|
77
|
+
// I - info (stdout, V = LOG_DEFAULT_INFO)
|
|
78
|
+
// W - warning (stderr, V = LOG_DEFAULT_WARN)
|
|
79
|
+
// E - error (stderr, V = LOG_DEFAULT_ERROR)
|
|
80
|
+
// O - output (stdout, V = LOG_DEFAULT_OUTPUT)
|
|
74
81
|
//
|
|
75
82
|
|
|
76
83
|
void common_log_set_file (struct common_log * log, const char * file); // not thread-safe
|
|
@@ -95,14 +102,14 @@ void common_log_set_timestamps(struct common_log * log, bool timestamps); // w
|
|
|
95
102
|
} \
|
|
96
103
|
} while (0)
|
|
97
104
|
|
|
98
|
-
#define LOG(...) LOG_TMPL(GGML_LOG_LEVEL_NONE,
|
|
99
|
-
#define LOGV(verbosity, ...) LOG_TMPL(GGML_LOG_LEVEL_NONE, verbosity,
|
|
105
|
+
#define LOG(...) LOG_TMPL(GGML_LOG_LEVEL_NONE, LOG_LEVEL_OUTPUT, __VA_ARGS__)
|
|
106
|
+
#define LOGV(verbosity, ...) LOG_TMPL(GGML_LOG_LEVEL_NONE, verbosity, __VA_ARGS__)
|
|
100
107
|
|
|
101
|
-
#define
|
|
102
|
-
#define
|
|
103
|
-
#define
|
|
104
|
-
#define
|
|
105
|
-
#define LOG_CNT(...) LOG_TMPL(GGML_LOG_LEVEL_CONT,
|
|
108
|
+
#define LOG_DBG(...) LOG_TMPL(GGML_LOG_LEVEL_DEBUG, LOG_LEVEL_DEBUG, __VA_ARGS__)
|
|
109
|
+
#define LOG_INF(...) LOG_TMPL(GGML_LOG_LEVEL_INFO, LOG_LEVEL_INFO, __VA_ARGS__)
|
|
110
|
+
#define LOG_WRN(...) LOG_TMPL(GGML_LOG_LEVEL_WARN, LOG_LEVEL_WARN, __VA_ARGS__)
|
|
111
|
+
#define LOG_ERR(...) LOG_TMPL(GGML_LOG_LEVEL_ERROR, LOG_LEVEL_ERROR, __VA_ARGS__)
|
|
112
|
+
#define LOG_CNT(...) LOG_TMPL(GGML_LOG_LEVEL_CONT, LOG_LEVEL_INFO, __VA_ARGS__) // same as INFO
|
|
106
113
|
|
|
107
114
|
#define LOG_INFV(verbosity, ...) LOG_TMPL(GGML_LOG_LEVEL_INFO, verbosity, __VA_ARGS__)
|
|
108
115
|
#define LOG_WRNV(verbosity, ...) LOG_TMPL(GGML_LOG_LEVEL_WARN, verbosity, __VA_ARGS__)
|