@fugood/llama.node 1.3.5 → 1.3.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CMakeLists.txt CHANGED
@@ -44,7 +44,8 @@ else()
44
44
  endif()
45
45
 
46
46
  if (TO_PACKAGE)
47
- set(PLATFORM_BINARY_DIR ${CMAKE_SOURCE_DIR}/packages/node-llama-${PLATFORM}-${ARCH}${VARIANT})
47
+ set(PACKAGE_NAME "node-llama-${PLATFORM}-${ARCH}${VARIANT}")
48
+ set(PLATFORM_BINARY_DIR ${CMAKE_SOURCE_DIR}/packages/${PACKAGE_NAME})
48
49
  else()
49
50
  set(PLATFORM_BINARY_DIR ${CMAKE_SOURCE_DIR}/build/Release)
50
51
  endif()
@@ -188,6 +189,13 @@ if (NOT MSVC AND CMAKE_SYSTEM_NAME STREQUAL "Windows")
188
189
  set(CMAKE_JS_LIB win_dynamic_load)
189
190
  endif()
190
191
 
192
+ if (TO_PACKAGE AND GGML_HEXAGON)
193
+ set(NODE_RPATH "node_modules/@fugood/${PACKAGE_NAME}")
194
+ set(ELECTRON_ASAR_RPATH "resources/app.asar.unpacked/node_modules/@fugood/${PACKAGE_NAME}")
195
+ set(ELECTRON_RES_RPATH "resources/node_modules/@fugood/${PACKAGE_NAME}")
196
+ set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -Wl,-rpath,${NODE_RPATH} -Wl,-rpath,${ELECTRON_ASAR_RPATH} -Wl,-rpath,${ELECTRON_RES_RPATH}")
197
+ endif()
198
+
191
199
  add_library(${PROJECT_NAME} SHARED ${SOURCE_FILES} ${CMAKE_JS_SRC})
192
200
  set_target_properties(${PROJECT_NAME} PROPERTIES PREFIX "" SUFFIX ".node")
193
201
  target_link_libraries(${PROJECT_NAME} ${CMAKE_JS_LIB} llama ggml common mtmd ${CMAKE_THREAD_LIBS_INIT})
@@ -237,3 +245,15 @@ if (GGML_CLBLAST AND TO_PACKAGE)
237
245
  )
238
246
  endif()
239
247
  endif()
248
+
249
+ if (GGML_HEXAGON)
250
+ get_target_property(HTP_LIBS_DIR ggml-hexagon BINARY_DIR)
251
+ add_custom_command(
252
+ TARGET copy_assets
253
+ COMMAND ${CMAKE_COMMAND} -E copy ${HTP_LIBS_DIR}/libggml-htp-v73.so ${PLATFORM_BINARY_DIR}
254
+ COMMAND ${CMAKE_COMMAND} -E copy ${HTP_LIBS_DIR}/libggml-htp-v75.so ${PLATFORM_BINARY_DIR}
255
+ COMMAND ${CMAKE_COMMAND} -E copy ${HTP_LIBS_DIR}/libggml-htp-v79.so ${PLATFORM_BINARY_DIR}
256
+ COMMAND ${CMAKE_COMMAND} -E copy ${HTP_LIBS_DIR}/libggml-htp-v81.so ${PLATFORM_BINARY_DIR}
257
+ COMMENT "Copying HTP libraries to bin folder"
258
+ )
259
+ endif()
package/lib/binding.ts CHANGED
@@ -25,6 +25,12 @@ export type LlamaModelOptions = {
25
25
  n_ctx?: number
26
26
  n_batch?: number
27
27
  n_ubatch?: number
28
+ /**
29
+ * CPU affinity mask
30
+ * Example: '0xfc'
31
+ */
32
+ cpu_mask?: string
33
+ cpu_strict?: boolean
28
34
  /**
29
35
  * Number of parallel sequences to support (sets n_seq_max).
30
36
  * This determines the maximum number of parallel slots that can be used.
@@ -574,7 +580,7 @@ export interface Module {
574
580
  LlamaContext: LlamaContext
575
581
  }
576
582
 
577
- export type LibVariant = 'default' | 'vulkan' | 'cuda'
583
+ export type LibVariant = 'default' | 'vulkan' | 'cuda' | 'snapdragon'
578
584
 
579
585
  const getPlatformPackageName = (variant?: LibVariant): string => {
580
586
  const platform = process.platform
package/lib/index.js CHANGED
@@ -201,7 +201,28 @@ const loadModel = (options, onProgress) => __awaiter(void 0, void 0, void 0, fun
201
201
  const variant = (_a = options.lib_variant) !== null && _a !== void 0 ? _a : 'default';
202
202
  (_b = mods[variant]) !== null && _b !== void 0 ? _b : (mods[variant] = yield (0, binding_1.loadModule)(options.lib_variant));
203
203
  refreshNativeLogSetup();
204
- const nativeCtx = new mods[variant].LlamaContext(options, onProgress);
204
+ const { devices } = options;
205
+ let filteredDevs = [];
206
+ if (Array.isArray(devices)) {
207
+ filteredDevs = [...devices];
208
+ // Handle HTP* to use all HTP devices on Hexagon
209
+ if (variant === 'snapdragon' && devices.includes('HTP*')) {
210
+ const backendDevices = yield (0, exports.getBackendDevicesInfo)(variant);
211
+ const htpDevices = backendDevices
212
+ .filter((d) => d.deviceName.startsWith('HTP'))
213
+ .map((d) => d.deviceName);
214
+ filteredDevs = filteredDevs.reduce((acc, dev) => {
215
+ if (dev.startsWith('HTP*')) {
216
+ acc.push(...htpDevices);
217
+ }
218
+ else if (!dev.startsWith('HTP')) {
219
+ acc.push(dev);
220
+ }
221
+ return acc;
222
+ }, []);
223
+ }
224
+ }
225
+ const nativeCtx = new mods[variant].LlamaContext(Object.assign(Object.assign({}, options), { devices: filteredDevs.length > 0 ? filteredDevs : undefined }), onProgress);
205
226
  return new LlamaContextWrapper(nativeCtx);
206
227
  });
207
228
  exports.loadModel = loadModel;
package/lib/index.ts CHANGED
@@ -309,7 +309,35 @@ export const loadModel = async (
309
309
  mods[variant] ??= await loadModule(options.lib_variant)
310
310
  refreshNativeLogSetup()
311
311
 
312
- const nativeCtx = new mods[variant].LlamaContext(options, onProgress)
312
+ const { devices } = options
313
+ let filteredDevs: Array<string> = []
314
+ if (Array.isArray(devices)) {
315
+ filteredDevs = [...devices]
316
+
317
+ // Handle HTP* to use all HTP devices on Hexagon
318
+ if (variant === 'snapdragon' && devices.includes('HTP*')) {
319
+ const backendDevices = await getBackendDevicesInfo(variant)
320
+ const htpDevices = backendDevices
321
+ .filter((d) => d.deviceName.startsWith('HTP'))
322
+ .map((d) => d.deviceName)
323
+ filteredDevs = filteredDevs.reduce((acc, dev) => {
324
+ if (dev.startsWith('HTP*')) {
325
+ acc.push(...htpDevices)
326
+ } else if (!dev.startsWith('HTP')) {
327
+ acc.push(dev)
328
+ }
329
+ return acc
330
+ }, [] as Array<string>)
331
+ }
332
+ }
333
+
334
+ const nativeCtx = new mods[variant].LlamaContext(
335
+ {
336
+ ...options,
337
+ devices: filteredDevs.length > 0 ? filteredDevs : undefined,
338
+ },
339
+ onProgress,
340
+ )
313
341
  return new LlamaContextWrapper(nativeCtx)
314
342
  }
315
343
 
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@fugood/llama.node",
3
3
  "access": "public",
4
- "version": "1.3.5",
4
+ "version": "1.3.6",
5
5
  "description": "An another Node binding of llama.cpp",
6
6
  "main": "lib/index.js",
7
7
  "scripts": {
@@ -72,19 +72,20 @@
72
72
  "CMakeLists.txt"
73
73
  ],
74
74
  "optionalDependencies": {
75
- "@fugood/node-llama-linux-x64": "1.3.5",
76
- "@fugood/node-llama-linux-x64-vulkan": "1.3.5",
77
- "@fugood/node-llama-linux-x64-cuda": "1.3.5",
78
- "@fugood/node-llama-linux-arm64": "1.3.5",
79
- "@fugood/node-llama-linux-arm64-vulkan": "1.3.5",
80
- "@fugood/node-llama-linux-arm64-cuda": "1.3.5",
81
- "@fugood/node-llama-win32-x64": "1.3.5",
82
- "@fugood/node-llama-win32-x64-vulkan": "1.3.5",
83
- "@fugood/node-llama-win32-x64-cuda": "1.3.5",
84
- "@fugood/node-llama-win32-arm64": "1.3.5",
85
- "@fugood/node-llama-win32-arm64-vulkan": "1.3.5",
86
- "@fugood/node-llama-darwin-x64": "1.3.5",
87
- "@fugood/node-llama-darwin-arm64": "1.3.5"
75
+ "@fugood/node-llama-linux-x64": "1.3.6",
76
+ "@fugood/node-llama-linux-x64-vulkan": "1.3.6",
77
+ "@fugood/node-llama-linux-x64-cuda": "1.3.6",
78
+ "@fugood/node-llama-linux-arm64-snapdragon": "1.3.6",
79
+ "@fugood/node-llama-linux-arm64": "1.3.6",
80
+ "@fugood/node-llama-linux-arm64-vulkan": "1.3.6",
81
+ "@fugood/node-llama-linux-arm64-cuda": "1.3.6",
82
+ "@fugood/node-llama-win32-x64": "1.3.6",
83
+ "@fugood/node-llama-win32-x64-vulkan": "1.3.6",
84
+ "@fugood/node-llama-win32-x64-cuda": "1.3.6",
85
+ "@fugood/node-llama-win32-arm64": "1.3.6",
86
+ "@fugood/node-llama-win32-arm64-vulkan": "1.3.6",
87
+ "@fugood/node-llama-darwin-x64": "1.3.6",
88
+ "@fugood/node-llama-darwin-arm64": "1.3.6"
88
89
  },
89
90
  "devDependencies": {
90
91
  "@babel/preset-env": "^7.24.4",
@@ -121,6 +121,27 @@ index d0cab0bcb..48d532838 100644
121
121
  else()
122
122
  check_cxx_compiler_flag(-mfp16-format=ieee GGML_COMPILER_SUPPORTS_FP16_FORMAT_I3E)
123
123
  if (NOT "${GGML_COMPILER_SUPPORTS_FP16_FORMAT_I3E}" STREQUAL "")
124
+ diff --git a/src/llama.cpp/ggml/src/ggml-hexagon/ggml-hexagon.cpp b/src/llama.cpp/ggml/src/ggml-hexagon/ggml-hexagon.cpp
125
+ index cabd301ad..31eec134c 100644
126
+ --- a/src/llama.cpp/ggml/src/ggml-hexagon/ggml-hexagon.cpp
127
+ +++ b/src/llama.cpp/ggml/src/ggml-hexagon/ggml-hexagon.cpp
128
+ @@ -9,6 +9,7 @@
129
+ #include <chrono>
130
+ #include <mutex>
131
+ #include <string>
132
+ +#include <stdexcept>
133
+
134
+ #ifdef _WIN32
135
+ # include <sal.h>
136
+ @@ -3682,6 +3683,8 @@ ggml_hexagon_registry::ggml_hexagon_registry(ggml_backend_reg_t reg) {
137
+ } catch (std::exception const &exc) {
138
+ GGML_LOG_ERROR("ggml-hex: failed to create device/session %zu\n", i);
139
+ devices[i].context = nullptr;
140
+ + opt_ndev = i;
141
+ + break;
142
+ }
143
+ }
144
+ }
124
145
  diff --git a/src/llama.cpp/ggml/src/ggml-vulkan/CMakeLists.txt b/src/llama.cpp/ggml/src/ggml-vulkan/CMakeLists.txt
125
146
  index de01336cd..29b1a043d 100644
126
147
  --- a/src/llama.cpp/ggml/src/ggml-vulkan/CMakeLists.txt
@@ -306,6 +306,16 @@ LlamaContext::LlamaContext(const Napi::CallbackInfo &info)
306
306
  params.tensor_buft_overrides.push_back({nullptr, nullptr});
307
307
  }
308
308
 
309
+ auto cpu_mask = get_option<std::string>(options, "cpu_mask", "");
310
+ if (!cpu_mask.empty()) {
311
+ params.cpuparams.mask_valid = true;
312
+ if (!parse_cpu_mask(cpu_mask, params.cpuparams.cpumask)) {
313
+ Napi::TypeError::New(env, "Invalid cpu_mask").ThrowAsJavaScriptException();
314
+ }
315
+ }
316
+
317
+ params.cpuparams.strict_cpu = get_option<bool>(options, "cpu_strict", false);
318
+
309
319
  llama_backend_init();
310
320
  llama_numa_init(params.numa);
311
321