@fugood/llama.node 1.4.0 → 1.4.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/binding.js +7 -17
- package/package.json +16 -16
- package/scripts/llama.cpp.patch +51 -2
- package/src/LlamaContext.cpp +8 -3
package/lib/binding.js
CHANGED
|
@@ -15,23 +15,13 @@ var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (
|
|
|
15
15
|
}) : function(o, v) {
|
|
16
16
|
o["default"] = v;
|
|
17
17
|
});
|
|
18
|
-
var __importStar = (this && this.__importStar) ||
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
return ownKeys(o);
|
|
26
|
-
};
|
|
27
|
-
return function (mod) {
|
|
28
|
-
if (mod && mod.__esModule) return mod;
|
|
29
|
-
var result = {};
|
|
30
|
-
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
|
|
31
|
-
__setModuleDefault(result, mod);
|
|
32
|
-
return result;
|
|
33
|
-
};
|
|
34
|
-
})();
|
|
18
|
+
var __importStar = (this && this.__importStar) || function (mod) {
|
|
19
|
+
if (mod && mod.__esModule) return mod;
|
|
20
|
+
var result = {};
|
|
21
|
+
if (mod != null) for (var k in mod) if (k !== "default" && Object.prototype.hasOwnProperty.call(mod, k)) __createBinding(result, mod, k);
|
|
22
|
+
__setModuleDefault(result, mod);
|
|
23
|
+
return result;
|
|
24
|
+
};
|
|
35
25
|
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
|
|
36
26
|
function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
|
|
37
27
|
return new (P || (P = Promise))(function (resolve, reject) {
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@fugood/llama.node",
|
|
3
3
|
"access": "public",
|
|
4
|
-
"version": "1.4.
|
|
4
|
+
"version": "1.4.2",
|
|
5
5
|
"description": "An another Node binding of llama.cpp",
|
|
6
6
|
"main": "lib/index.js",
|
|
7
7
|
"scripts": {
|
|
@@ -72,20 +72,20 @@
|
|
|
72
72
|
"CMakeLists.txt"
|
|
73
73
|
],
|
|
74
74
|
"optionalDependencies": {
|
|
75
|
-
"@fugood/node-llama-
|
|
76
|
-
"@fugood/node-llama-
|
|
77
|
-
"@fugood/node-llama-linux-
|
|
78
|
-
"@fugood/node-llama-linux-arm64-
|
|
79
|
-
"@fugood/node-llama-linux-arm64": "1.4.
|
|
80
|
-
"@fugood/node-llama-linux-arm64-vulkan": "1.4.
|
|
81
|
-
"@fugood/node-llama-linux-
|
|
82
|
-
"@fugood/node-llama-
|
|
83
|
-
"@fugood/node-llama-
|
|
84
|
-
"@fugood/node-llama-win32-
|
|
85
|
-
"@fugood/node-llama-win32-arm64": "1.4.
|
|
86
|
-
"@fugood/node-llama-win32-
|
|
87
|
-
"@fugood/node-llama-
|
|
88
|
-
"@fugood/node-llama-
|
|
75
|
+
"@fugood/node-llama-darwin-arm64": "1.4.2",
|
|
76
|
+
"@fugood/node-llama-darwin-x64": "1.4.2",
|
|
77
|
+
"@fugood/node-llama-linux-arm64": "1.4.2",
|
|
78
|
+
"@fugood/node-llama-linux-arm64-cuda": "1.4.2",
|
|
79
|
+
"@fugood/node-llama-linux-arm64-snapdragon": "1.4.2",
|
|
80
|
+
"@fugood/node-llama-linux-arm64-vulkan": "1.4.2",
|
|
81
|
+
"@fugood/node-llama-linux-x64": "1.4.2",
|
|
82
|
+
"@fugood/node-llama-linux-x64-cuda": "1.4.2",
|
|
83
|
+
"@fugood/node-llama-linux-x64-vulkan": "1.4.2",
|
|
84
|
+
"@fugood/node-llama-win32-arm64": "1.4.2",
|
|
85
|
+
"@fugood/node-llama-win32-arm64-vulkan": "1.4.2",
|
|
86
|
+
"@fugood/node-llama-win32-x64": "1.4.2",
|
|
87
|
+
"@fugood/node-llama-win32-x64-cuda": "1.4.2",
|
|
88
|
+
"@fugood/node-llama-win32-x64-vulkan": "1.4.2"
|
|
89
89
|
},
|
|
90
90
|
"devDependencies": {
|
|
91
91
|
"@babel/preset-env": "^7.24.4",
|
|
@@ -100,7 +100,7 @@
|
|
|
100
100
|
"jest": "^29.7.0",
|
|
101
101
|
"node-addon-api": "^8.0.0",
|
|
102
102
|
"node-wav": "^0.0.2",
|
|
103
|
-
"release-it": "^
|
|
103
|
+
"release-it": "^19.0.6",
|
|
104
104
|
"rimraf": "^6.0.1",
|
|
105
105
|
"typescript": "^5.4.5",
|
|
106
106
|
"wait-for-expect": "^3.0.2"
|
package/scripts/llama.cpp.patch
CHANGED
|
@@ -122,10 +122,59 @@ index 7e53a57b7..a328d4db4 100644
|
|
|
122
122
|
check_cxx_compiler_flag(-mfp16-format=ieee GGML_COMPILER_SUPPORTS_FP16_FORMAT_I3E)
|
|
123
123
|
if (NOT "${GGML_COMPILER_SUPPORTS_FP16_FORMAT_I3E}" STREQUAL "")
|
|
124
124
|
diff --git a/src/llama.cpp/ggml/src/ggml-hexagon/ggml-hexagon.cpp b/src/llama.cpp/ggml/src/ggml-hexagon/ggml-hexagon.cpp
|
|
125
|
-
index 72a82a891..
|
|
125
|
+
index 72a82a891..1b681f4dd 100644
|
|
126
126
|
--- a/src/llama.cpp/ggml/src/ggml-hexagon/ggml-hexagon.cpp
|
|
127
127
|
+++ b/src/llama.cpp/ggml/src/ggml-hexagon/ggml-hexagon.cpp
|
|
128
|
-
@@ -
|
|
128
|
+
@@ -3216,11 +3216,26 @@ static const char * ggml_backend_hexagon_device_get_description(ggml_backend_dev
|
|
129
|
+
GGML_UNUSED(dev);
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
+
|
|
133
|
+
+// ~2GB per session for now
|
|
134
|
+
+#define GGML_HEXAGON_SESSION_MEMORY_DEFAULT (2ULL * 1024 * 1024 * 1024)
|
|
135
|
+
+// Max to 3.5GB
|
|
136
|
+
+#define GGML_HEXAGON_SESSION_MEMORY_MAX (3ULL * 1024 * 1024 * 1024 + 512ULL * 1024 * 1024)
|
|
137
|
+
+
|
|
138
|
+
static void ggml_backend_hexagon_device_get_memory(ggml_backend_dev_t dev, size_t * free, size_t * total) {
|
|
139
|
+
- // ~2GB per session for now
|
|
140
|
+
- *free = 2ULL * 1024 * 1024 * 1024;
|
|
141
|
+
- *total = *free;
|
|
142
|
+
+ const char * str_mem = getenv("GGML_HEXAGON_SESSION_MEMORY");
|
|
143
|
+
+ if (str_mem) {
|
|
144
|
+
+ *free = std::stoull(str_mem);
|
|
145
|
+
+ if (*free < GGML_HEXAGON_SESSION_MEMORY_DEFAULT) {
|
|
146
|
+
+ *free = GGML_HEXAGON_SESSION_MEMORY_DEFAULT;
|
|
147
|
+
+ } else if (*free > GGML_HEXAGON_SESSION_MEMORY_MAX) {
|
|
148
|
+
+ *free = GGML_HEXAGON_SESSION_MEMORY_MAX;
|
|
149
|
+
+ }
|
|
150
|
+
+ } else {
|
|
151
|
+
+ *free = GGML_HEXAGON_SESSION_MEMORY_DEFAULT;
|
|
152
|
+
+ }
|
|
153
|
+
|
|
154
|
+
+ *total = *free;
|
|
155
|
+
GGML_UNUSED(dev);
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
@@ -3401,10 +3416,17 @@ ggml_hexagon_registry::ggml_hexagon_registry(ggml_backend_reg_t reg) {
|
|
159
|
+
}
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
+#if defined(__ANDROID__)
|
|
163
|
+
if(opt_arch < 75) {
|
|
164
|
+
opt_ndev = 1;
|
|
165
|
+
- GGML_LOG_WARN("ggml-hex: forcing ndev to 1 for SoCs archs lower than v75.\n");
|
|
166
|
+
+ GGML_LOG_WARN("ggml-hex: forcing ndev to 1 for SoCs archs lower than v75 for Android.\n");
|
|
167
|
+
+ }
|
|
168
|
+
+#else
|
|
169
|
+
+ if(opt_arch < 73) {
|
|
170
|
+
+ opt_ndev = 1;
|
|
171
|
+
+ GGML_LOG_WARN("ggml-hex: forcing ndev to 1 for SoCs archs lower than v73 for Linux and Windows.\n");
|
|
172
|
+
}
|
|
173
|
+
+#endif
|
|
174
|
+
|
|
175
|
+
GGML_LOG_INFO("ggml-hex: Hexagon Arch version v%d\n", opt_arch);
|
|
176
|
+
|
|
177
|
+
@@ -3417,6 +3439,8 @@ ggml_hexagon_registry::ggml_hexagon_registry(ggml_backend_reg_t reg) {
|
|
129
178
|
} catch (std::exception const &exc) {
|
|
130
179
|
GGML_LOG_ERROR("ggml-hex: failed to create device/session %zu\n", i);
|
|
131
180
|
devices[i].context = nullptr;
|
package/src/LlamaContext.cpp
CHANGED
|
@@ -321,15 +321,20 @@ LlamaContext::LlamaContext(const Napi::CallbackInfo &info)
|
|
|
321
321
|
|
|
322
322
|
// Parse devices array
|
|
323
323
|
if (options.Has("devices") && options.Get("devices").IsArray()) {
|
|
324
|
+
std::vector<ggml_backend_dev_t> devs;
|
|
324
325
|
auto devices_array = options.Get("devices").As<Napi::Array>();
|
|
325
326
|
for (size_t i = 0; i < devices_array.Length(); i++) {
|
|
326
327
|
auto device_name = devices_array.Get(i).ToString().Utf8Value();
|
|
327
328
|
auto * dev = ggml_backend_dev_by_name(device_name.c_str());
|
|
328
329
|
if (dev) {
|
|
329
|
-
|
|
330
|
+
devs.push_back(dev);
|
|
330
331
|
}
|
|
331
332
|
// Skip invalid device names silently
|
|
332
333
|
}
|
|
334
|
+
if (!devs.empty()) {
|
|
335
|
+
params.devices = devs;
|
|
336
|
+
params.devices.push_back(nullptr); // nullptr terminator required by llama.cpp
|
|
337
|
+
}
|
|
333
338
|
}
|
|
334
339
|
|
|
335
340
|
std::vector<common_adapter_lora_info> lora;
|
|
@@ -652,7 +657,7 @@ Napi::Value LlamaContext::GetFormattedChat(const Napi::CallbackInfo &info) {
|
|
|
652
657
|
auto params =
|
|
653
658
|
has_params ? info[2].As<Napi::Object>() : Napi::Object::New(env);
|
|
654
659
|
|
|
655
|
-
if (get_option<bool>(params, "jinja",
|
|
660
|
+
if (get_option<bool>(params, "jinja", true)) {
|
|
656
661
|
std::string json_schema_str = "";
|
|
657
662
|
if (!is_nil(params.Get("response_format"))) {
|
|
658
663
|
auto response_format = params.Get("response_format").As<Napi::Object>();
|
|
@@ -907,7 +912,7 @@ Napi::Value LlamaContext::Completion(const Napi::CallbackInfo &info) {
|
|
|
907
912
|
if (options.Has("messages") && options.Get("messages").IsArray()) {
|
|
908
913
|
auto messages = options.Get("messages").As<Napi::Array>();
|
|
909
914
|
auto chat_template = get_option<std::string>(options, "chat_template", "");
|
|
910
|
-
auto jinja = get_option<bool>(options, "jinja",
|
|
915
|
+
auto jinja = get_option<bool>(options, "jinja", true);
|
|
911
916
|
if (jinja) {
|
|
912
917
|
auto tools_str =
|
|
913
918
|
!is_nil(options.Get("tools"))
|