@fugood/llama.node 1.4.3 → 1.4.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/binding.ts +8 -0
- package/lib/index.js +9 -0
- package/lib/index.ts +10 -0
- package/package.json +15 -15
- package/src/LlamaContext.cpp +24 -0
- package/src/LlamaContext.h +3 -0
- package/src/llama.cpp/common/arg.cpp +19 -7
- package/src/llama.cpp/common/common.cpp +46 -2
- package/src/llama.cpp/common/common.h +7 -0
- package/src/llama.cpp/common/log.cpp +3 -26
- package/src/llama.cpp/ggml/CMakeLists.txt +4 -0
- package/src/llama.cpp/ggml/include/ggml-rpc.h +1 -2
- package/src/llama.cpp/ggml/include/ggml-zendnn.h +22 -0
- package/src/llama.cpp/ggml/include/ggml.h +22 -0
- package/src/llama.cpp/ggml/src/CMakeLists.txt +1 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/arch/arm/repack.cpp +0 -2
- package/src/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm-ppc.h +333 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm.cpp +51 -125
- package/src/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm.h +6 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ops.cpp +38 -11
- package/src/llama.cpp/src/llama-model.cpp +4 -0
- package/src/llama.cpp/src/llama-quant.cpp +0 -29
- package/src/llama.cpp/src/models/deepseek2.cpp +18 -0
- package/src/llama.cpp/src/unicode.cpp +2 -2
package/lib/binding.ts
CHANGED
|
@@ -565,6 +565,14 @@ export interface LlamaContext {
|
|
|
565
565
|
*/
|
|
566
566
|
cancelRequest(requestId: number): void
|
|
567
567
|
|
|
568
|
+
/**
|
|
569
|
+
* Clear the KV and recurrent caches.
|
|
570
|
+
* This is faster than recreating the context and useful for preventing
|
|
571
|
+
* cache contamination between chat sessions.
|
|
572
|
+
* @param clearData If true, also clears the cache data (default: false)
|
|
573
|
+
*/
|
|
574
|
+
clearCache(clearData?: boolean): void
|
|
575
|
+
|
|
568
576
|
// static
|
|
569
577
|
loadModelInfo(path: string, skip: string[]): Promise<GGUFModelInfo>
|
|
570
578
|
toggleNativeLog(
|
package/lib/index.js
CHANGED
|
@@ -195,6 +195,15 @@ class LlamaContextWrapper {
|
|
|
195
195
|
decodeAudioTokens(tokens) {
|
|
196
196
|
return this.ctx.decodeAudioTokens(tokens);
|
|
197
197
|
}
|
|
198
|
+
/**
|
|
199
|
+
* Clear the KV and recurrent caches.
|
|
200
|
+
* This is faster than recreating the context and useful for preventing
|
|
201
|
+
* cache contamination between chat sessions.
|
|
202
|
+
* @param clearData If true, also clears the cache data (default: false)
|
|
203
|
+
*/
|
|
204
|
+
clearCache(clearData) {
|
|
205
|
+
this.ctx.clearCache(clearData);
|
|
206
|
+
}
|
|
198
207
|
}
|
|
199
208
|
const loadModel = (options, onProgress) => __awaiter(void 0, void 0, void 0, function* () {
|
|
200
209
|
var _a, _b;
|
package/lib/index.ts
CHANGED
|
@@ -299,6 +299,16 @@ class LlamaContextWrapper {
|
|
|
299
299
|
decodeAudioTokens(tokens: number[] | Int32Array): Promise<Float32Array> {
|
|
300
300
|
return this.ctx.decodeAudioTokens(tokens)
|
|
301
301
|
}
|
|
302
|
+
|
|
303
|
+
/**
|
|
304
|
+
* Clear the KV and recurrent caches.
|
|
305
|
+
* This is faster than recreating the context and useful for preventing
|
|
306
|
+
* cache contamination between chat sessions.
|
|
307
|
+
* @param clearData If true, also clears the cache data (default: false)
|
|
308
|
+
*/
|
|
309
|
+
clearCache(clearData?: boolean): void {
|
|
310
|
+
this.ctx.clearCache(clearData)
|
|
311
|
+
}
|
|
302
312
|
}
|
|
303
313
|
|
|
304
314
|
export const loadModel = async (
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@fugood/llama.node",
|
|
3
3
|
"access": "public",
|
|
4
|
-
"version": "1.4.
|
|
4
|
+
"version": "1.4.4",
|
|
5
5
|
"description": "An another Node binding of llama.cpp",
|
|
6
6
|
"main": "lib/index.js",
|
|
7
7
|
"scripts": {
|
|
@@ -72,20 +72,20 @@
|
|
|
72
72
|
"CMakeLists.txt"
|
|
73
73
|
],
|
|
74
74
|
"optionalDependencies": {
|
|
75
|
-
"@fugood/node-llama-darwin-arm64": "1.4.
|
|
76
|
-
"@fugood/node-llama-darwin-x64": "1.4.
|
|
77
|
-
"@fugood/node-llama-linux-arm64": "1.4.
|
|
78
|
-
"@fugood/node-llama-linux-arm64-cuda": "1.4.
|
|
79
|
-
"@fugood/node-llama-linux-arm64-snapdragon": "1.4.
|
|
80
|
-
"@fugood/node-llama-linux-arm64-vulkan": "1.4.
|
|
81
|
-
"@fugood/node-llama-linux-x64": "1.4.
|
|
82
|
-
"@fugood/node-llama-linux-x64-cuda": "1.4.
|
|
83
|
-
"@fugood/node-llama-linux-x64-vulkan": "1.4.
|
|
84
|
-
"@fugood/node-llama-win32-arm64": "1.4.
|
|
85
|
-
"@fugood/node-llama-win32-arm64-vulkan": "1.4.
|
|
86
|
-
"@fugood/node-llama-win32-x64": "1.4.
|
|
87
|
-
"@fugood/node-llama-win32-x64-cuda": "1.4.
|
|
88
|
-
"@fugood/node-llama-win32-x64-vulkan": "1.4.
|
|
75
|
+
"@fugood/node-llama-darwin-arm64": "1.4.4",
|
|
76
|
+
"@fugood/node-llama-darwin-x64": "1.4.4",
|
|
77
|
+
"@fugood/node-llama-linux-arm64": "1.4.4",
|
|
78
|
+
"@fugood/node-llama-linux-arm64-cuda": "1.4.4",
|
|
79
|
+
"@fugood/node-llama-linux-arm64-snapdragon": "1.4.4",
|
|
80
|
+
"@fugood/node-llama-linux-arm64-vulkan": "1.4.4",
|
|
81
|
+
"@fugood/node-llama-linux-x64": "1.4.4",
|
|
82
|
+
"@fugood/node-llama-linux-x64-cuda": "1.4.4",
|
|
83
|
+
"@fugood/node-llama-linux-x64-vulkan": "1.4.4",
|
|
84
|
+
"@fugood/node-llama-win32-arm64": "1.4.4",
|
|
85
|
+
"@fugood/node-llama-win32-arm64-vulkan": "1.4.4",
|
|
86
|
+
"@fugood/node-llama-win32-x64": "1.4.4",
|
|
87
|
+
"@fugood/node-llama-win32-x64-cuda": "1.4.4",
|
|
88
|
+
"@fugood/node-llama-win32-x64-vulkan": "1.4.4"
|
|
89
89
|
},
|
|
90
90
|
"devDependencies": {
|
|
91
91
|
"@babel/preset-env": "^7.24.4",
|
package/src/LlamaContext.cpp
CHANGED
|
@@ -200,6 +200,9 @@ void LlamaContext::Init(Napi::Env env, Napi::Object &exports) {
|
|
|
200
200
|
static_cast<napi_property_attributes>(napi_enumerable)),
|
|
201
201
|
InstanceMethod<&LlamaContext::CancelRequest>(
|
|
202
202
|
"cancelRequest",
|
|
203
|
+
static_cast<napi_property_attributes>(napi_enumerable)),
|
|
204
|
+
InstanceMethod<&LlamaContext::ClearCache>(
|
|
205
|
+
"clearCache",
|
|
203
206
|
static_cast<napi_property_attributes>(napi_enumerable))});
|
|
204
207
|
Napi::FunctionReference *constructor = new Napi::FunctionReference();
|
|
205
208
|
*constructor = Napi::Persistent(func);
|
|
@@ -1505,3 +1508,24 @@ Napi::Value LlamaContext::DecodeAudioTokens(const Napi::CallbackInfo &info) {
|
|
|
1505
1508
|
worker->Queue();
|
|
1506
1509
|
return worker->Promise();
|
|
1507
1510
|
}
|
|
1511
|
+
|
|
1512
|
+
// clearCache(clearData?: boolean): void
|
|
1513
|
+
void LlamaContext::ClearCache(const Napi::CallbackInfo &info) {
|
|
1514
|
+
Napi::Env env = info.Env();
|
|
1515
|
+
if (!_rn_ctx) {
|
|
1516
|
+
Napi::TypeError::New(env, "Context is disposed").ThrowAsJavaScriptException();
|
|
1517
|
+
return;
|
|
1518
|
+
}
|
|
1519
|
+
if (_rn_ctx->completion != nullptr && _rn_ctx->completion->is_predicting) {
|
|
1520
|
+
Napi::TypeError::New(env, "Cannot clear cache while completion is in progress")
|
|
1521
|
+
.ThrowAsJavaScriptException();
|
|
1522
|
+
return;
|
|
1523
|
+
}
|
|
1524
|
+
|
|
1525
|
+
bool clear_data = false;
|
|
1526
|
+
if (info.Length() >= 1 && info[0].IsBoolean()) {
|
|
1527
|
+
clear_data = info[0].ToBoolean().Value();
|
|
1528
|
+
}
|
|
1529
|
+
|
|
1530
|
+
_rn_ctx->clearCache(clear_data);
|
|
1531
|
+
}
|
package/src/LlamaContext.h
CHANGED
|
@@ -69,6 +69,9 @@ private:
|
|
|
69
69
|
Napi::Value QueueRerank(const Napi::CallbackInfo &info);
|
|
70
70
|
void CancelRequest(const Napi::CallbackInfo &info);
|
|
71
71
|
|
|
72
|
+
// Cache management
|
|
73
|
+
void ClearCache(const Napi::CallbackInfo &info);
|
|
74
|
+
|
|
72
75
|
std::string _info;
|
|
73
76
|
std::vector<std::string> _used_devices;
|
|
74
77
|
Napi::Object _meta;
|
|
@@ -427,7 +427,7 @@ static bool common_params_parse_ex(int argc, char ** argv, common_params_context
|
|
|
427
427
|
|
|
428
428
|
// model is required (except for server)
|
|
429
429
|
// TODO @ngxson : maybe show a list of available models in CLI in this case
|
|
430
|
-
if (params.model.path.empty() && ctx_arg.ex != LLAMA_EXAMPLE_SERVER) {
|
|
430
|
+
if (params.model.path.empty() && ctx_arg.ex != LLAMA_EXAMPLE_SERVER && !params.usage) {
|
|
431
431
|
throw std::invalid_argument("error: --model is required\n");
|
|
432
432
|
}
|
|
433
433
|
|
|
@@ -708,6 +708,8 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
|
|
|
708
708
|
params.use_jinja = true;
|
|
709
709
|
}
|
|
710
710
|
|
|
711
|
+
params.use_color = tty_can_use_colors();
|
|
712
|
+
|
|
711
713
|
// load dynamic backends
|
|
712
714
|
ggml_backend_load_all();
|
|
713
715
|
|
|
@@ -790,10 +792,20 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
|
|
|
790
792
|
}
|
|
791
793
|
).set_examples({LLAMA_EXAMPLE_MAIN}));
|
|
792
794
|
add_opt(common_arg(
|
|
793
|
-
{"-co", "--color"},
|
|
794
|
-
|
|
795
|
-
|
|
796
|
-
|
|
795
|
+
{"-co", "--color"}, "[on|off|auto]",
|
|
796
|
+
"Colorize output to distinguish prompt and user input from generations ('on', 'off', or 'auto', default: 'auto')\n"
|
|
797
|
+
"'auto' enables colors when output is to a terminal",
|
|
798
|
+
[](common_params & params, const std::string & value) {
|
|
799
|
+
if (is_truthy(value)) {
|
|
800
|
+
params.use_color = true;
|
|
801
|
+
} else if (is_falsey(value)) {
|
|
802
|
+
params.use_color = false;
|
|
803
|
+
} else if (is_autoy(value)) {
|
|
804
|
+
params.use_color = tty_can_use_colors();
|
|
805
|
+
} else {
|
|
806
|
+
throw std::invalid_argument(
|
|
807
|
+
string_format("error: unknown value for --color: '%s'\n", value.c_str()));
|
|
808
|
+
}
|
|
797
809
|
}
|
|
798
810
|
).set_examples({LLAMA_EXAMPLE_MAIN, LLAMA_EXAMPLE_SPECULATIVE, LLAMA_EXAMPLE_LOOKUP}));
|
|
799
811
|
add_opt(common_arg(
|
|
@@ -1022,7 +1034,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
|
|
|
1022
1034
|
params.flash_attn_type = LLAMA_FLASH_ATTN_TYPE_AUTO;
|
|
1023
1035
|
} else {
|
|
1024
1036
|
throw std::runtime_error(
|
|
1025
|
-
string_format("error:
|
|
1037
|
+
string_format("error: unknown value for --flash-attn: '%s'\n", value.c_str()));
|
|
1026
1038
|
}
|
|
1027
1039
|
}).set_env("LLAMA_ARG_FLASH_ATTN"));
|
|
1028
1040
|
add_opt(common_arg(
|
|
@@ -2696,7 +2708,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
|
|
|
2696
2708
|
common_log_set_colors(common_log_main(), LOG_COLORS_AUTO);
|
|
2697
2709
|
} else {
|
|
2698
2710
|
throw std::invalid_argument(
|
|
2699
|
-
string_format("error:
|
|
2711
|
+
string_format("error: unknown value for --log-colors: '%s'\n", value.c_str()));
|
|
2700
2712
|
}
|
|
2701
2713
|
}
|
|
2702
2714
|
).set_env("LLAMA_LOG_COLORS"));
|
|
@@ -786,11 +786,29 @@ bool fs_validate_filename(const std::string & filename, bool allow_subdirs) {
|
|
|
786
786
|
#include <iostream>
|
|
787
787
|
|
|
788
788
|
|
|
789
|
+
#ifdef _WIN32
|
|
790
|
+
static std::wstring utf8_to_wstring(const std::string & str) {
|
|
791
|
+
if (str.empty()) {
|
|
792
|
+
return std::wstring();
|
|
793
|
+
}
|
|
794
|
+
|
|
795
|
+
int size = MultiByteToWideChar(CP_UTF8, 0, str.c_str(), (int)str.size(), NULL, 0);
|
|
796
|
+
|
|
797
|
+
if (size <= 0) {
|
|
798
|
+
return std::wstring();
|
|
799
|
+
}
|
|
800
|
+
|
|
801
|
+
std::wstring wstr(size, 0);
|
|
802
|
+
MultiByteToWideChar(CP_UTF8, 0, str.c_str(), (int)str.size(), &wstr[0], size);
|
|
803
|
+
|
|
804
|
+
return wstr;
|
|
805
|
+
}
|
|
806
|
+
#endif
|
|
807
|
+
|
|
789
808
|
// returns true if successful, false otherwise
|
|
790
809
|
bool fs_create_directory_with_parents(const std::string & path) {
|
|
791
810
|
#ifdef _WIN32
|
|
792
|
-
std::
|
|
793
|
-
std::wstring wpath = converter.from_bytes(path);
|
|
811
|
+
std::wstring wpath = utf8_to_wstring(path);
|
|
794
812
|
|
|
795
813
|
// if the path already exists, check whether it's a directory
|
|
796
814
|
const DWORD attributes = GetFileAttributesW(wpath.c_str());
|
|
@@ -964,6 +982,32 @@ std::vector<common_file_info> fs_list(const std::string & path, bool include_dir
|
|
|
964
982
|
return files;
|
|
965
983
|
}
|
|
966
984
|
|
|
985
|
+
//
|
|
986
|
+
// TTY utils
|
|
987
|
+
//
|
|
988
|
+
|
|
989
|
+
bool tty_can_use_colors() {
|
|
990
|
+
// Check NO_COLOR environment variable (https://no-color.org/)
|
|
991
|
+
if (const char * no_color = std::getenv("NO_COLOR")) {
|
|
992
|
+
if (no_color[0] != '\0') {
|
|
993
|
+
return false;
|
|
994
|
+
}
|
|
995
|
+
}
|
|
996
|
+
|
|
997
|
+
// Check TERM environment variable
|
|
998
|
+
if (const char * term = std::getenv("TERM")) {
|
|
999
|
+
if (std::strcmp(term, "dumb") == 0) {
|
|
1000
|
+
return false;
|
|
1001
|
+
}
|
|
1002
|
+
}
|
|
1003
|
+
|
|
1004
|
+
// Check if stdout and stderr are connected to a terminal
|
|
1005
|
+
// We check both because log messages can go to either
|
|
1006
|
+
bool stdout_is_tty = isatty(fileno(stdout));
|
|
1007
|
+
bool stderr_is_tty = isatty(fileno(stderr));
|
|
1008
|
+
|
|
1009
|
+
return stdout_is_tty || stderr_is_tty;
|
|
1010
|
+
}
|
|
967
1011
|
|
|
968
1012
|
//
|
|
969
1013
|
// Model utils
|
|
@@ -656,6 +656,13 @@ struct common_file_info {
|
|
|
656
656
|
};
|
|
657
657
|
std::vector<common_file_info> fs_list(const std::string & path, bool include_directories);
|
|
658
658
|
|
|
659
|
+
//
|
|
660
|
+
// TTY utils
|
|
661
|
+
//
|
|
662
|
+
|
|
663
|
+
// Auto-detect if colors can be enabled based on terminal and environment
|
|
664
|
+
bool tty_can_use_colors();
|
|
665
|
+
|
|
659
666
|
//
|
|
660
667
|
// Model utils
|
|
661
668
|
//
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
#include "common.h"
|
|
1
2
|
#include "log.h"
|
|
2
3
|
|
|
3
4
|
#include <chrono>
|
|
@@ -26,30 +27,6 @@ void common_log_set_verbosity_thold(int verbosity) {
|
|
|
26
27
|
common_log_verbosity_thold = verbosity;
|
|
27
28
|
}
|
|
28
29
|
|
|
29
|
-
// Auto-detect if colors should be enabled based on terminal and environment
|
|
30
|
-
static bool common_log_should_use_colors_auto() {
|
|
31
|
-
// Check NO_COLOR environment variable (https://no-color.org/)
|
|
32
|
-
if (const char * no_color = std::getenv("NO_COLOR")) {
|
|
33
|
-
if (no_color[0] != '\0') {
|
|
34
|
-
return false;
|
|
35
|
-
}
|
|
36
|
-
}
|
|
37
|
-
|
|
38
|
-
// Check TERM environment variable
|
|
39
|
-
if (const char * term = std::getenv("TERM")) {
|
|
40
|
-
if (std::strcmp(term, "dumb") == 0) {
|
|
41
|
-
return false;
|
|
42
|
-
}
|
|
43
|
-
}
|
|
44
|
-
|
|
45
|
-
// Check if stdout and stderr are connected to a terminal
|
|
46
|
-
// We check both because log messages can go to either
|
|
47
|
-
bool stdout_is_tty = isatty(fileno(stdout));
|
|
48
|
-
bool stderr_is_tty = isatty(fileno(stderr));
|
|
49
|
-
|
|
50
|
-
return stdout_is_tty || stderr_is_tty;
|
|
51
|
-
}
|
|
52
|
-
|
|
53
30
|
static int64_t t_us() {
|
|
54
31
|
return std::chrono::duration_cast<std::chrono::microseconds>(std::chrono::system_clock::now().time_since_epoch()).count();
|
|
55
32
|
}
|
|
@@ -391,7 +368,7 @@ struct common_log * common_log_main() {
|
|
|
391
368
|
static std::once_flag init_flag;
|
|
392
369
|
std::call_once(init_flag, [&]() {
|
|
393
370
|
// Set default to auto-detect colors
|
|
394
|
-
log.set_colors(
|
|
371
|
+
log.set_colors(tty_can_use_colors());
|
|
395
372
|
});
|
|
396
373
|
|
|
397
374
|
return &log;
|
|
@@ -422,7 +399,7 @@ void common_log_set_file(struct common_log * log, const char * file) {
|
|
|
422
399
|
|
|
423
400
|
void common_log_set_colors(struct common_log * log, log_colors colors) {
|
|
424
401
|
if (colors == LOG_COLORS_AUTO) {
|
|
425
|
-
log->set_colors(
|
|
402
|
+
log->set_colors(tty_can_use_colors());
|
|
426
403
|
return;
|
|
427
404
|
}
|
|
428
405
|
|
|
@@ -253,6 +253,9 @@ option(GGML_HEXAGON "ggml: enable Hexagon backend"
|
|
|
253
253
|
# toolchain for vulkan-shaders-gen
|
|
254
254
|
set (GGML_VULKAN_SHADERS_GEN_TOOLCHAIN "" CACHE FILEPATH "ggml: toolchain file for vulkan-shaders-gen")
|
|
255
255
|
|
|
256
|
+
option(GGML_ZENDNN "ggml: use ZenDNN" OFF)
|
|
257
|
+
option(ZENDNN_ROOT "ggml: path to ZenDNN installation" "")
|
|
258
|
+
|
|
256
259
|
# extra artifacts
|
|
257
260
|
option(GGML_BUILD_TESTS "ggml: build tests" ${GGML_STANDALONE})
|
|
258
261
|
option(GGML_BUILD_EXAMPLES "ggml: build examples" ${GGML_STANDALONE})
|
|
@@ -314,6 +317,7 @@ set(GGML_PUBLIC_HEADERS
|
|
|
314
317
|
include/ggml-sycl.h
|
|
315
318
|
include/ggml-vulkan.h
|
|
316
319
|
include/ggml-webgpu.h
|
|
320
|
+
include/ggml-zendnn.h
|
|
317
321
|
include/gguf.h)
|
|
318
322
|
|
|
319
323
|
set_target_properties(ggml PROPERTIES PUBLIC_HEADER "${GGML_PUBLIC_HEADERS}")
|
|
@@ -1,6 +1,5 @@
|
|
|
1
1
|
#pragma once
|
|
2
2
|
|
|
3
|
-
#include "ggml.h"
|
|
4
3
|
#include "ggml-backend.h"
|
|
5
4
|
|
|
6
5
|
#ifdef __cplusplus
|
|
@@ -8,7 +7,7 @@ extern "C" {
|
|
|
8
7
|
#endif
|
|
9
8
|
|
|
10
9
|
#define RPC_PROTO_MAJOR_VERSION 3
|
|
11
|
-
#define RPC_PROTO_MINOR_VERSION
|
|
10
|
+
#define RPC_PROTO_MINOR_VERSION 6
|
|
12
11
|
#define RPC_PROTO_PATCH_VERSION 0
|
|
13
12
|
#define GGML_RPC_MAX_SERVERS 16
|
|
14
13
|
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
#pragma once
|
|
2
|
+
|
|
3
|
+
#include "ggml-backend.h"
|
|
4
|
+
#include "ggml.h"
|
|
5
|
+
|
|
6
|
+
#ifdef __cplusplus
|
|
7
|
+
extern "C" {
|
|
8
|
+
#endif
|
|
9
|
+
|
|
10
|
+
// backend API
|
|
11
|
+
GGML_BACKEND_API ggml_backend_t ggml_backend_zendnn_init(void);
|
|
12
|
+
|
|
13
|
+
GGML_BACKEND_API bool ggml_backend_is_zendnn(ggml_backend_t backend);
|
|
14
|
+
|
|
15
|
+
// number of threads used for zendnn operations
|
|
16
|
+
GGML_BACKEND_API void ggml_backend_zendnn_set_n_threads(ggml_backend_t backend_zendnn, int n_threads);
|
|
17
|
+
|
|
18
|
+
GGML_BACKEND_API ggml_backend_reg_t ggml_backend_zendnn_reg(void);
|
|
19
|
+
|
|
20
|
+
#ifdef __cplusplus
|
|
21
|
+
}
|
|
22
|
+
#endif
|
|
@@ -2196,6 +2196,15 @@ extern "C" {
|
|
|
2196
2196
|
int p2,
|
|
2197
2197
|
int p3);
|
|
2198
2198
|
|
|
2199
|
+
// pad each dimension with values on the other side of the torus (looping around)
|
|
2200
|
+
GGML_API struct ggml_tensor * ggml_pad_circular(
|
|
2201
|
+
struct ggml_context * ctx,
|
|
2202
|
+
struct ggml_tensor * a,
|
|
2203
|
+
int p0,
|
|
2204
|
+
int p1,
|
|
2205
|
+
int p2,
|
|
2206
|
+
int p3);
|
|
2207
|
+
|
|
2199
2208
|
GGML_API struct ggml_tensor * ggml_pad_ext(
|
|
2200
2209
|
struct ggml_context * ctx,
|
|
2201
2210
|
struct ggml_tensor * a,
|
|
@@ -2209,6 +2218,19 @@ extern "C" {
|
|
|
2209
2218
|
int rp3
|
|
2210
2219
|
);
|
|
2211
2220
|
|
|
2221
|
+
// pad each dimension with values on the other side of the torus (looping around)
|
|
2222
|
+
GGML_API struct ggml_tensor * ggml_pad_ext_circular(
|
|
2223
|
+
struct ggml_context * ctx,
|
|
2224
|
+
struct ggml_tensor * a,
|
|
2225
|
+
int lp0,
|
|
2226
|
+
int rp0,
|
|
2227
|
+
int lp1,
|
|
2228
|
+
int rp1,
|
|
2229
|
+
int lp2,
|
|
2230
|
+
int rp2,
|
|
2231
|
+
int lp3,
|
|
2232
|
+
int rp3);
|
|
2233
|
+
|
|
2212
2234
|
// pad each dimension with reflection: [a, b, c, d] -> [b, a, b, c, d, c]
|
|
2213
2235
|
GGML_API struct ggml_tensor * ggml_pad_reflect_1d(
|
|
2214
2236
|
struct ggml_context * ctx,
|
|
@@ -440,6 +440,7 @@ ggml_add_backend(WebGPU)
|
|
|
440
440
|
ggml_add_backend(zDNN)
|
|
441
441
|
ggml_add_backend(OpenCL)
|
|
442
442
|
ggml_add_backend(Hexagon)
|
|
443
|
+
ggml_add_backend(ZenDNN)
|
|
443
444
|
|
|
444
445
|
foreach (target ggml-base ggml)
|
|
445
446
|
target_include_directories(${target} PUBLIC $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/../include> $<INSTALL_INTERFACE:include>)
|
|
@@ -505,7 +505,6 @@ void ggml_gemv_q4_K_8x4_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const vo
|
|
|
505
505
|
constexpr int blocklen = 8;
|
|
506
506
|
|
|
507
507
|
assert(n % qk == 0);
|
|
508
|
-
assert(nr % 4 == 0);
|
|
509
508
|
assert(nc % ncols_interleaved == 0);
|
|
510
509
|
|
|
511
510
|
UNUSED(nb);
|
|
@@ -645,7 +644,6 @@ void ggml_gemv_q4_K_8x8_q8_K(int n,
|
|
|
645
644
|
constexpr int blocklen = 8;
|
|
646
645
|
|
|
647
646
|
assert(n % qk == 0);
|
|
648
|
-
assert(nr % 4 == 0);
|
|
649
647
|
assert(nc % ncols_interleaved == 0);
|
|
650
648
|
|
|
651
649
|
UNUSED(nb);
|