@fugood/llama.node 1.3.5 → 1.3.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CMakeLists.txt +21 -1
- package/lib/binding.ts +7 -1
- package/lib/index.js +22 -1
- package/lib/index.ts +29 -1
- package/package.json +15 -14
- package/scripts/llama.cpp.patch +21 -0
- package/src/LlamaContext.cpp +10 -0
package/CMakeLists.txt
CHANGED
|
@@ -44,7 +44,8 @@ else()
|
|
|
44
44
|
endif()
|
|
45
45
|
|
|
46
46
|
if (TO_PACKAGE)
|
|
47
|
-
set(
|
|
47
|
+
set(PACKAGE_NAME "node-llama-${PLATFORM}-${ARCH}${VARIANT}")
|
|
48
|
+
set(PLATFORM_BINARY_DIR ${CMAKE_SOURCE_DIR}/packages/${PACKAGE_NAME})
|
|
48
49
|
else()
|
|
49
50
|
set(PLATFORM_BINARY_DIR ${CMAKE_SOURCE_DIR}/build/Release)
|
|
50
51
|
endif()
|
|
@@ -188,6 +189,13 @@ if (NOT MSVC AND CMAKE_SYSTEM_NAME STREQUAL "Windows")
|
|
|
188
189
|
set(CMAKE_JS_LIB win_dynamic_load)
|
|
189
190
|
endif()
|
|
190
191
|
|
|
192
|
+
if (TO_PACKAGE AND GGML_HEXAGON)
|
|
193
|
+
set(NODE_RPATH "node_modules/@fugood/${PACKAGE_NAME}")
|
|
194
|
+
set(ELECTRON_ASAR_RPATH "resources/app.asar.unpacked/node_modules/@fugood/${PACKAGE_NAME}")
|
|
195
|
+
set(ELECTRON_RES_RPATH "resources/node_modules/@fugood/${PACKAGE_NAME}")
|
|
196
|
+
set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -Wl,-rpath,${NODE_RPATH} -Wl,-rpath,${ELECTRON_ASAR_RPATH} -Wl,-rpath,${ELECTRON_RES_RPATH}")
|
|
197
|
+
endif()
|
|
198
|
+
|
|
191
199
|
add_library(${PROJECT_NAME} SHARED ${SOURCE_FILES} ${CMAKE_JS_SRC})
|
|
192
200
|
set_target_properties(${PROJECT_NAME} PROPERTIES PREFIX "" SUFFIX ".node")
|
|
193
201
|
target_link_libraries(${PROJECT_NAME} ${CMAKE_JS_LIB} llama ggml common mtmd ${CMAKE_THREAD_LIBS_INIT})
|
|
@@ -237,3 +245,15 @@ if (GGML_CLBLAST AND TO_PACKAGE)
|
|
|
237
245
|
)
|
|
238
246
|
endif()
|
|
239
247
|
endif()
|
|
248
|
+
|
|
249
|
+
if (GGML_HEXAGON)
|
|
250
|
+
get_target_property(HTP_LIBS_DIR ggml-hexagon BINARY_DIR)
|
|
251
|
+
add_custom_command(
|
|
252
|
+
TARGET copy_assets
|
|
253
|
+
COMMAND ${CMAKE_COMMAND} -E copy ${HTP_LIBS_DIR}/libggml-htp-v73.so ${PLATFORM_BINARY_DIR}
|
|
254
|
+
COMMAND ${CMAKE_COMMAND} -E copy ${HTP_LIBS_DIR}/libggml-htp-v75.so ${PLATFORM_BINARY_DIR}
|
|
255
|
+
COMMAND ${CMAKE_COMMAND} -E copy ${HTP_LIBS_DIR}/libggml-htp-v79.so ${PLATFORM_BINARY_DIR}
|
|
256
|
+
COMMAND ${CMAKE_COMMAND} -E copy ${HTP_LIBS_DIR}/libggml-htp-v81.so ${PLATFORM_BINARY_DIR}
|
|
257
|
+
COMMENT "Copying HTP libraries to bin folder"
|
|
258
|
+
)
|
|
259
|
+
endif()
|
package/lib/binding.ts
CHANGED
|
@@ -25,6 +25,12 @@ export type LlamaModelOptions = {
|
|
|
25
25
|
n_ctx?: number
|
|
26
26
|
n_batch?: number
|
|
27
27
|
n_ubatch?: number
|
|
28
|
+
/**
|
|
29
|
+
* CPU affinity mask
|
|
30
|
+
* Example: '0xfc'
|
|
31
|
+
*/
|
|
32
|
+
cpu_mask?: string
|
|
33
|
+
cpu_strict?: boolean
|
|
28
34
|
/**
|
|
29
35
|
* Number of parallel sequences to support (sets n_seq_max).
|
|
30
36
|
* This determines the maximum number of parallel slots that can be used.
|
|
@@ -574,7 +580,7 @@ export interface Module {
|
|
|
574
580
|
LlamaContext: LlamaContext
|
|
575
581
|
}
|
|
576
582
|
|
|
577
|
-
export type LibVariant = 'default' | 'vulkan' | 'cuda'
|
|
583
|
+
export type LibVariant = 'default' | 'vulkan' | 'cuda' | 'snapdragon'
|
|
578
584
|
|
|
579
585
|
const getPlatformPackageName = (variant?: LibVariant): string => {
|
|
580
586
|
const platform = process.platform
|
package/lib/index.js
CHANGED
|
@@ -201,7 +201,28 @@ const loadModel = (options, onProgress) => __awaiter(void 0, void 0, void 0, fun
|
|
|
201
201
|
const variant = (_a = options.lib_variant) !== null && _a !== void 0 ? _a : 'default';
|
|
202
202
|
(_b = mods[variant]) !== null && _b !== void 0 ? _b : (mods[variant] = yield (0, binding_1.loadModule)(options.lib_variant));
|
|
203
203
|
refreshNativeLogSetup();
|
|
204
|
-
const
|
|
204
|
+
const { devices } = options;
|
|
205
|
+
let filteredDevs = [];
|
|
206
|
+
if (Array.isArray(devices)) {
|
|
207
|
+
filteredDevs = [...devices];
|
|
208
|
+
// Handle HTP* to use all HTP devices on Hexagon
|
|
209
|
+
if (variant === 'snapdragon' && devices.includes('HTP*')) {
|
|
210
|
+
const backendDevices = yield (0, exports.getBackendDevicesInfo)(variant);
|
|
211
|
+
const htpDevices = backendDevices
|
|
212
|
+
.filter((d) => d.deviceName.startsWith('HTP'))
|
|
213
|
+
.map((d) => d.deviceName);
|
|
214
|
+
filteredDevs = filteredDevs.reduce((acc, dev) => {
|
|
215
|
+
if (dev.startsWith('HTP*')) {
|
|
216
|
+
acc.push(...htpDevices);
|
|
217
|
+
}
|
|
218
|
+
else if (!dev.startsWith('HTP')) {
|
|
219
|
+
acc.push(dev);
|
|
220
|
+
}
|
|
221
|
+
return acc;
|
|
222
|
+
}, []);
|
|
223
|
+
}
|
|
224
|
+
}
|
|
225
|
+
const nativeCtx = new mods[variant].LlamaContext(Object.assign(Object.assign({}, options), { devices: filteredDevs.length > 0 ? filteredDevs : undefined }), onProgress);
|
|
205
226
|
return new LlamaContextWrapper(nativeCtx);
|
|
206
227
|
});
|
|
207
228
|
exports.loadModel = loadModel;
|
package/lib/index.ts
CHANGED
|
@@ -309,7 +309,35 @@ export const loadModel = async (
|
|
|
309
309
|
mods[variant] ??= await loadModule(options.lib_variant)
|
|
310
310
|
refreshNativeLogSetup()
|
|
311
311
|
|
|
312
|
-
const
|
|
312
|
+
const { devices } = options
|
|
313
|
+
let filteredDevs: Array<string> = []
|
|
314
|
+
if (Array.isArray(devices)) {
|
|
315
|
+
filteredDevs = [...devices]
|
|
316
|
+
|
|
317
|
+
// Handle HTP* to use all HTP devices on Hexagon
|
|
318
|
+
if (variant === 'snapdragon' && devices.includes('HTP*')) {
|
|
319
|
+
const backendDevices = await getBackendDevicesInfo(variant)
|
|
320
|
+
const htpDevices = backendDevices
|
|
321
|
+
.filter((d) => d.deviceName.startsWith('HTP'))
|
|
322
|
+
.map((d) => d.deviceName)
|
|
323
|
+
filteredDevs = filteredDevs.reduce((acc, dev) => {
|
|
324
|
+
if (dev.startsWith('HTP*')) {
|
|
325
|
+
acc.push(...htpDevices)
|
|
326
|
+
} else if (!dev.startsWith('HTP')) {
|
|
327
|
+
acc.push(dev)
|
|
328
|
+
}
|
|
329
|
+
return acc
|
|
330
|
+
}, [] as Array<string>)
|
|
331
|
+
}
|
|
332
|
+
}
|
|
333
|
+
|
|
334
|
+
const nativeCtx = new mods[variant].LlamaContext(
|
|
335
|
+
{
|
|
336
|
+
...options,
|
|
337
|
+
devices: filteredDevs.length > 0 ? filteredDevs : undefined,
|
|
338
|
+
},
|
|
339
|
+
onProgress,
|
|
340
|
+
)
|
|
313
341
|
return new LlamaContextWrapper(nativeCtx)
|
|
314
342
|
}
|
|
315
343
|
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@fugood/llama.node",
|
|
3
3
|
"access": "public",
|
|
4
|
-
"version": "1.3.
|
|
4
|
+
"version": "1.3.6",
|
|
5
5
|
"description": "An another Node binding of llama.cpp",
|
|
6
6
|
"main": "lib/index.js",
|
|
7
7
|
"scripts": {
|
|
@@ -72,19 +72,20 @@
|
|
|
72
72
|
"CMakeLists.txt"
|
|
73
73
|
],
|
|
74
74
|
"optionalDependencies": {
|
|
75
|
-
"@fugood/node-llama-linux-x64": "1.3.
|
|
76
|
-
"@fugood/node-llama-linux-x64-vulkan": "1.3.
|
|
77
|
-
"@fugood/node-llama-linux-x64-cuda": "1.3.
|
|
78
|
-
"@fugood/node-llama-linux-arm64": "1.3.
|
|
79
|
-
"@fugood/node-llama-linux-arm64
|
|
80
|
-
"@fugood/node-llama-linux-arm64-
|
|
81
|
-
"@fugood/node-llama-
|
|
82
|
-
"@fugood/node-llama-win32-x64
|
|
83
|
-
"@fugood/node-llama-win32-x64-
|
|
84
|
-
"@fugood/node-llama-win32-
|
|
85
|
-
"@fugood/node-llama-win32-arm64
|
|
86
|
-
"@fugood/node-llama-
|
|
87
|
-
"@fugood/node-llama-darwin-
|
|
75
|
+
"@fugood/node-llama-linux-x64": "1.3.6",
|
|
76
|
+
"@fugood/node-llama-linux-x64-vulkan": "1.3.6",
|
|
77
|
+
"@fugood/node-llama-linux-x64-cuda": "1.3.6",
|
|
78
|
+
"@fugood/node-llama-linux-arm64-snapdragon": "1.3.6",
|
|
79
|
+
"@fugood/node-llama-linux-arm64": "1.3.6",
|
|
80
|
+
"@fugood/node-llama-linux-arm64-vulkan": "1.3.6",
|
|
81
|
+
"@fugood/node-llama-linux-arm64-cuda": "1.3.6",
|
|
82
|
+
"@fugood/node-llama-win32-x64": "1.3.6",
|
|
83
|
+
"@fugood/node-llama-win32-x64-vulkan": "1.3.6",
|
|
84
|
+
"@fugood/node-llama-win32-x64-cuda": "1.3.6",
|
|
85
|
+
"@fugood/node-llama-win32-arm64": "1.3.6",
|
|
86
|
+
"@fugood/node-llama-win32-arm64-vulkan": "1.3.6",
|
|
87
|
+
"@fugood/node-llama-darwin-x64": "1.3.6",
|
|
88
|
+
"@fugood/node-llama-darwin-arm64": "1.3.6"
|
|
88
89
|
},
|
|
89
90
|
"devDependencies": {
|
|
90
91
|
"@babel/preset-env": "^7.24.4",
|
package/scripts/llama.cpp.patch
CHANGED
|
@@ -121,6 +121,27 @@ index d0cab0bcb..48d532838 100644
|
|
|
121
121
|
else()
|
|
122
122
|
check_cxx_compiler_flag(-mfp16-format=ieee GGML_COMPILER_SUPPORTS_FP16_FORMAT_I3E)
|
|
123
123
|
if (NOT "${GGML_COMPILER_SUPPORTS_FP16_FORMAT_I3E}" STREQUAL "")
|
|
124
|
+
diff --git a/src/llama.cpp/ggml/src/ggml-hexagon/ggml-hexagon.cpp b/src/llama.cpp/ggml/src/ggml-hexagon/ggml-hexagon.cpp
|
|
125
|
+
index cabd301ad..31eec134c 100644
|
|
126
|
+
--- a/src/llama.cpp/ggml/src/ggml-hexagon/ggml-hexagon.cpp
|
|
127
|
+
+++ b/src/llama.cpp/ggml/src/ggml-hexagon/ggml-hexagon.cpp
|
|
128
|
+
@@ -9,6 +9,7 @@
|
|
129
|
+
#include <chrono>
|
|
130
|
+
#include <mutex>
|
|
131
|
+
#include <string>
|
|
132
|
+
+#include <stdexcept>
|
|
133
|
+
|
|
134
|
+
#ifdef _WIN32
|
|
135
|
+
# include <sal.h>
|
|
136
|
+
@@ -3682,6 +3683,8 @@ ggml_hexagon_registry::ggml_hexagon_registry(ggml_backend_reg_t reg) {
|
|
137
|
+
} catch (std::exception const &exc) {
|
|
138
|
+
GGML_LOG_ERROR("ggml-hex: failed to create device/session %zu\n", i);
|
|
139
|
+
devices[i].context = nullptr;
|
|
140
|
+
+ opt_ndev = i;
|
|
141
|
+
+ break;
|
|
142
|
+
}
|
|
143
|
+
}
|
|
144
|
+
}
|
|
124
145
|
diff --git a/src/llama.cpp/ggml/src/ggml-vulkan/CMakeLists.txt b/src/llama.cpp/ggml/src/ggml-vulkan/CMakeLists.txt
|
|
125
146
|
index de01336cd..29b1a043d 100644
|
|
126
147
|
--- a/src/llama.cpp/ggml/src/ggml-vulkan/CMakeLists.txt
|
package/src/LlamaContext.cpp
CHANGED
|
@@ -306,6 +306,16 @@ LlamaContext::LlamaContext(const Napi::CallbackInfo &info)
|
|
|
306
306
|
params.tensor_buft_overrides.push_back({nullptr, nullptr});
|
|
307
307
|
}
|
|
308
308
|
|
|
309
|
+
auto cpu_mask = get_option<std::string>(options, "cpu_mask", "");
|
|
310
|
+
if (!cpu_mask.empty()) {
|
|
311
|
+
params.cpuparams.mask_valid = true;
|
|
312
|
+
if (!parse_cpu_mask(cpu_mask, params.cpuparams.cpumask)) {
|
|
313
|
+
Napi::TypeError::New(env, "Invalid cpu_mask").ThrowAsJavaScriptException();
|
|
314
|
+
}
|
|
315
|
+
}
|
|
316
|
+
|
|
317
|
+
params.cpuparams.strict_cpu = get_option<bool>(options, "cpu_strict", false);
|
|
318
|
+
|
|
309
319
|
llama_backend_init();
|
|
310
320
|
llama_numa_init(params.numa);
|
|
311
321
|
|