@fugood/llama.node 1.4.0 → 1.4.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/lib/binding.js CHANGED
@@ -15,23 +15,13 @@ var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (
15
15
  }) : function(o, v) {
16
16
  o["default"] = v;
17
17
  });
18
- var __importStar = (this && this.__importStar) || (function () {
19
- var ownKeys = function(o) {
20
- ownKeys = Object.getOwnPropertyNames || function (o) {
21
- var ar = [];
22
- for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
23
- return ar;
24
- };
25
- return ownKeys(o);
26
- };
27
- return function (mod) {
28
- if (mod && mod.__esModule) return mod;
29
- var result = {};
30
- if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
31
- __setModuleDefault(result, mod);
32
- return result;
33
- };
34
- })();
18
+ var __importStar = (this && this.__importStar) || function (mod) {
19
+ if (mod && mod.__esModule) return mod;
20
+ var result = {};
21
+ if (mod != null) for (var k in mod) if (k !== "default" && Object.prototype.hasOwnProperty.call(mod, k)) __createBinding(result, mod, k);
22
+ __setModuleDefault(result, mod);
23
+ return result;
24
+ };
35
25
  var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
36
26
  function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
37
27
  return new (P || (P = Promise))(function (resolve, reject) {
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@fugood/llama.node",
3
3
  "access": "public",
4
- "version": "1.4.0",
4
+ "version": "1.4.2",
5
5
  "description": "An another Node binding of llama.cpp",
6
6
  "main": "lib/index.js",
7
7
  "scripts": {
@@ -72,20 +72,20 @@
72
72
  "CMakeLists.txt"
73
73
  ],
74
74
  "optionalDependencies": {
75
- "@fugood/node-llama-linux-x64": "1.4.0",
76
- "@fugood/node-llama-linux-x64-vulkan": "1.4.0",
77
- "@fugood/node-llama-linux-x64-cuda": "1.4.0",
78
- "@fugood/node-llama-linux-arm64-snapdragon": "1.4.0",
79
- "@fugood/node-llama-linux-arm64": "1.4.0",
80
- "@fugood/node-llama-linux-arm64-vulkan": "1.4.0",
81
- "@fugood/node-llama-linux-arm64-cuda": "1.4.0",
82
- "@fugood/node-llama-win32-x64": "1.4.0",
83
- "@fugood/node-llama-win32-x64-vulkan": "1.4.0",
84
- "@fugood/node-llama-win32-x64-cuda": "1.4.0",
85
- "@fugood/node-llama-win32-arm64": "1.4.0",
86
- "@fugood/node-llama-win32-arm64-vulkan": "1.4.0",
87
- "@fugood/node-llama-darwin-x64": "1.4.0",
88
- "@fugood/node-llama-darwin-arm64": "1.4.0"
75
+ "@fugood/node-llama-darwin-arm64": "1.4.2",
76
+ "@fugood/node-llama-darwin-x64": "1.4.2",
77
+ "@fugood/node-llama-linux-arm64": "1.4.2",
78
+ "@fugood/node-llama-linux-arm64-cuda": "1.4.2",
79
+ "@fugood/node-llama-linux-arm64-snapdragon": "1.4.2",
80
+ "@fugood/node-llama-linux-arm64-vulkan": "1.4.2",
81
+ "@fugood/node-llama-linux-x64": "1.4.2",
82
+ "@fugood/node-llama-linux-x64-cuda": "1.4.2",
83
+ "@fugood/node-llama-linux-x64-vulkan": "1.4.2",
84
+ "@fugood/node-llama-win32-arm64": "1.4.2",
85
+ "@fugood/node-llama-win32-arm64-vulkan": "1.4.2",
86
+ "@fugood/node-llama-win32-x64": "1.4.2",
87
+ "@fugood/node-llama-win32-x64-cuda": "1.4.2",
88
+ "@fugood/node-llama-win32-x64-vulkan": "1.4.2"
89
89
  },
90
90
  "devDependencies": {
91
91
  "@babel/preset-env": "^7.24.4",
@@ -100,7 +100,7 @@
100
100
  "jest": "^29.7.0",
101
101
  "node-addon-api": "^8.0.0",
102
102
  "node-wav": "^0.0.2",
103
- "release-it": "^17.7.0",
103
+ "release-it": "^19.0.6",
104
104
  "rimraf": "^6.0.1",
105
105
  "typescript": "^5.4.5",
106
106
  "wait-for-expect": "^3.0.2"
@@ -122,10 +122,59 @@ index 7e53a57b7..a328d4db4 100644
122
122
  check_cxx_compiler_flag(-mfp16-format=ieee GGML_COMPILER_SUPPORTS_FP16_FORMAT_I3E)
123
123
  if (NOT "${GGML_COMPILER_SUPPORTS_FP16_FORMAT_I3E}" STREQUAL "")
124
124
  diff --git a/src/llama.cpp/ggml/src/ggml-hexagon/ggml-hexagon.cpp b/src/llama.cpp/ggml/src/ggml-hexagon/ggml-hexagon.cpp
125
- index 72a82a891..7869ad323 100644
125
+ index 72a82a891..1b681f4dd 100644
126
126
  --- a/src/llama.cpp/ggml/src/ggml-hexagon/ggml-hexagon.cpp
127
127
  +++ b/src/llama.cpp/ggml/src/ggml-hexagon/ggml-hexagon.cpp
128
- @@ -3417,6 +3417,8 @@ ggml_hexagon_registry::ggml_hexagon_registry(ggml_backend_reg_t reg) {
128
+ @@ -3216,11 +3216,26 @@ static const char * ggml_backend_hexagon_device_get_description(ggml_backend_dev
129
+ GGML_UNUSED(dev);
130
+ }
131
+
132
+ +
133
+ +// ~2GB per session for now
134
+ +#define GGML_HEXAGON_SESSION_MEMORY_DEFAULT (2ULL * 1024 * 1024 * 1024)
135
+ +// Max to 3.5GB
136
+ +#define GGML_HEXAGON_SESSION_MEMORY_MAX (3ULL * 1024 * 1024 * 1024 + 512ULL * 1024 * 1024)
137
+ +
138
+ static void ggml_backend_hexagon_device_get_memory(ggml_backend_dev_t dev, size_t * free, size_t * total) {
139
+ - // ~2GB per session for now
140
+ - *free = 2ULL * 1024 * 1024 * 1024;
141
+ - *total = *free;
142
+ + const char * str_mem = getenv("GGML_HEXAGON_SESSION_MEMORY");
143
+ + if (str_mem) {
144
+ + *free = std::stoull(str_mem);
145
+ + if (*free < GGML_HEXAGON_SESSION_MEMORY_DEFAULT) {
146
+ + *free = GGML_HEXAGON_SESSION_MEMORY_DEFAULT;
147
+ + } else if (*free > GGML_HEXAGON_SESSION_MEMORY_MAX) {
148
+ + *free = GGML_HEXAGON_SESSION_MEMORY_MAX;
149
+ + }
150
+ + } else {
151
+ + *free = GGML_HEXAGON_SESSION_MEMORY_DEFAULT;
152
+ + }
153
+
154
+ + *total = *free;
155
+ GGML_UNUSED(dev);
156
+ }
157
+
158
+ @@ -3401,10 +3416,17 @@ ggml_hexagon_registry::ggml_hexagon_registry(ggml_backend_reg_t reg) {
159
+ }
160
+ }
161
+
162
+ +#if defined(__ANDROID__)
163
+ if(opt_arch < 75) {
164
+ opt_ndev = 1;
165
+ - GGML_LOG_WARN("ggml-hex: forcing ndev to 1 for SoCs archs lower than v75.\n");
166
+ + GGML_LOG_WARN("ggml-hex: forcing ndev to 1 for SoCs archs lower than v75 for Android.\n");
167
+ + }
168
+ +#else
169
+ + if(opt_arch < 73) {
170
+ + opt_ndev = 1;
171
+ + GGML_LOG_WARN("ggml-hex: forcing ndev to 1 for SoCs archs lower than v73 for Linux and Windows.\n");
172
+ }
173
+ +#endif
174
+
175
+ GGML_LOG_INFO("ggml-hex: Hexagon Arch version v%d\n", opt_arch);
176
+
177
+ @@ -3417,6 +3439,8 @@ ggml_hexagon_registry::ggml_hexagon_registry(ggml_backend_reg_t reg) {
129
178
  } catch (std::exception const &exc) {
130
179
  GGML_LOG_ERROR("ggml-hex: failed to create device/session %zu\n", i);
131
180
  devices[i].context = nullptr;
@@ -321,15 +321,20 @@ LlamaContext::LlamaContext(const Napi::CallbackInfo &info)
321
321
 
322
322
  // Parse devices array
323
323
  if (options.Has("devices") && options.Get("devices").IsArray()) {
324
+ std::vector<ggml_backend_dev_t> devs;
324
325
  auto devices_array = options.Get("devices").As<Napi::Array>();
325
326
  for (size_t i = 0; i < devices_array.Length(); i++) {
326
327
  auto device_name = devices_array.Get(i).ToString().Utf8Value();
327
328
  auto * dev = ggml_backend_dev_by_name(device_name.c_str());
328
329
  if (dev) {
329
- params.devices.push_back(dev);
330
+ devs.push_back(dev);
330
331
  }
331
332
  // Skip invalid device names silently
332
333
  }
334
+ if (!devs.empty()) {
335
+ params.devices = devs;
336
+ params.devices.push_back(nullptr); // nullptr terminator required by llama.cpp
337
+ }
333
338
  }
334
339
 
335
340
  std::vector<common_adapter_lora_info> lora;
@@ -652,7 +657,7 @@ Napi::Value LlamaContext::GetFormattedChat(const Napi::CallbackInfo &info) {
652
657
  auto params =
653
658
  has_params ? info[2].As<Napi::Object>() : Napi::Object::New(env);
654
659
 
655
- if (get_option<bool>(params, "jinja", false)) {
660
+ if (get_option<bool>(params, "jinja", true)) {
656
661
  std::string json_schema_str = "";
657
662
  if (!is_nil(params.Get("response_format"))) {
658
663
  auto response_format = params.Get("response_format").As<Napi::Object>();
@@ -907,7 +912,7 @@ Napi::Value LlamaContext::Completion(const Napi::CallbackInfo &info) {
907
912
  if (options.Has("messages") && options.Get("messages").IsArray()) {
908
913
  auto messages = options.Get("messages").As<Napi::Array>();
909
914
  auto chat_template = get_option<std::string>(options, "chat_template", "");
910
- auto jinja = get_option<bool>(options, "jinja", false);
915
+ auto jinja = get_option<bool>(options, "jinja", true);
911
916
  if (jinja) {
912
917
  auto tools_str =
913
918
  !is_nil(options.Get("tools"))