@fugood/llama.node 1.3.5 → 1.3.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CMakeLists.txt +21 -1
- package/lib/binding.ts +7 -1
- package/lib/index.js +22 -1
- package/lib/index.ts +29 -1
- package/package.json +15 -14
- package/scripts/llama.cpp.patch +13 -0
- package/src/LlamaContext.cpp +10 -0
- package/src/llama.cpp/src/llama-model.cpp +4 -2
- package/src/llama.cpp/src/models/deepseek2.cpp +2 -1
package/CMakeLists.txt
CHANGED
|
@@ -44,7 +44,8 @@ else()
|
|
|
44
44
|
endif()
|
|
45
45
|
|
|
46
46
|
if (TO_PACKAGE)
|
|
47
|
-
set(
|
|
47
|
+
set(PACKAGE_NAME "node-llama-${PLATFORM}-${ARCH}${VARIANT}")
|
|
48
|
+
set(PLATFORM_BINARY_DIR ${CMAKE_SOURCE_DIR}/packages/${PACKAGE_NAME})
|
|
48
49
|
else()
|
|
49
50
|
set(PLATFORM_BINARY_DIR ${CMAKE_SOURCE_DIR}/build/Release)
|
|
50
51
|
endif()
|
|
@@ -188,6 +189,13 @@ if (NOT MSVC AND CMAKE_SYSTEM_NAME STREQUAL "Windows")
|
|
|
188
189
|
set(CMAKE_JS_LIB win_dynamic_load)
|
|
189
190
|
endif()
|
|
190
191
|
|
|
192
|
+
if (TO_PACKAGE AND GGML_HEXAGON)
|
|
193
|
+
set(NODE_RPATH "node_modules/@fugood/${PACKAGE_NAME}")
|
|
194
|
+
set(ELECTRON_ASAR_RPATH "resources/app.asar.unpacked/node_modules/@fugood/${PACKAGE_NAME}")
|
|
195
|
+
set(ELECTRON_RES_RPATH "resources/node_modules/@fugood/${PACKAGE_NAME}")
|
|
196
|
+
set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -Wl,-rpath,${NODE_RPATH} -Wl,-rpath,${ELECTRON_ASAR_RPATH} -Wl,-rpath,${ELECTRON_RES_RPATH}")
|
|
197
|
+
endif()
|
|
198
|
+
|
|
191
199
|
add_library(${PROJECT_NAME} SHARED ${SOURCE_FILES} ${CMAKE_JS_SRC})
|
|
192
200
|
set_target_properties(${PROJECT_NAME} PROPERTIES PREFIX "" SUFFIX ".node")
|
|
193
201
|
target_link_libraries(${PROJECT_NAME} ${CMAKE_JS_LIB} llama ggml common mtmd ${CMAKE_THREAD_LIBS_INIT})
|
|
@@ -237,3 +245,15 @@ if (GGML_CLBLAST AND TO_PACKAGE)
|
|
|
237
245
|
)
|
|
238
246
|
endif()
|
|
239
247
|
endif()
|
|
248
|
+
|
|
249
|
+
if (GGML_HEXAGON)
|
|
250
|
+
get_target_property(HTP_LIBS_DIR ggml-hexagon BINARY_DIR)
|
|
251
|
+
add_custom_command(
|
|
252
|
+
TARGET copy_assets
|
|
253
|
+
COMMAND ${CMAKE_COMMAND} -E copy ${HTP_LIBS_DIR}/libggml-htp-v73.so ${PLATFORM_BINARY_DIR}
|
|
254
|
+
COMMAND ${CMAKE_COMMAND} -E copy ${HTP_LIBS_DIR}/libggml-htp-v75.so ${PLATFORM_BINARY_DIR}
|
|
255
|
+
COMMAND ${CMAKE_COMMAND} -E copy ${HTP_LIBS_DIR}/libggml-htp-v79.so ${PLATFORM_BINARY_DIR}
|
|
256
|
+
COMMAND ${CMAKE_COMMAND} -E copy ${HTP_LIBS_DIR}/libggml-htp-v81.so ${PLATFORM_BINARY_DIR}
|
|
257
|
+
COMMENT "Copying HTP libraries to bin folder"
|
|
258
|
+
)
|
|
259
|
+
endif()
|
package/lib/binding.ts
CHANGED
|
@@ -25,6 +25,12 @@ export type LlamaModelOptions = {
|
|
|
25
25
|
n_ctx?: number
|
|
26
26
|
n_batch?: number
|
|
27
27
|
n_ubatch?: number
|
|
28
|
+
/**
|
|
29
|
+
* CPU affinity mask
|
|
30
|
+
* Example: '0xfc'
|
|
31
|
+
*/
|
|
32
|
+
cpu_mask?: string
|
|
33
|
+
cpu_strict?: boolean
|
|
28
34
|
/**
|
|
29
35
|
* Number of parallel sequences to support (sets n_seq_max).
|
|
30
36
|
* This determines the maximum number of parallel slots that can be used.
|
|
@@ -574,7 +580,7 @@ export interface Module {
|
|
|
574
580
|
LlamaContext: LlamaContext
|
|
575
581
|
}
|
|
576
582
|
|
|
577
|
-
export type LibVariant = 'default' | 'vulkan' | 'cuda'
|
|
583
|
+
export type LibVariant = 'default' | 'vulkan' | 'cuda' | 'snapdragon'
|
|
578
584
|
|
|
579
585
|
const getPlatformPackageName = (variant?: LibVariant): string => {
|
|
580
586
|
const platform = process.platform
|
package/lib/index.js
CHANGED
|
@@ -201,7 +201,28 @@ const loadModel = (options, onProgress) => __awaiter(void 0, void 0, void 0, fun
|
|
|
201
201
|
const variant = (_a = options.lib_variant) !== null && _a !== void 0 ? _a : 'default';
|
|
202
202
|
(_b = mods[variant]) !== null && _b !== void 0 ? _b : (mods[variant] = yield (0, binding_1.loadModule)(options.lib_variant));
|
|
203
203
|
refreshNativeLogSetup();
|
|
204
|
-
const
|
|
204
|
+
const { devices } = options;
|
|
205
|
+
let filteredDevs = [];
|
|
206
|
+
if (Array.isArray(devices)) {
|
|
207
|
+
filteredDevs = [...devices];
|
|
208
|
+
// Handle HTP* to use all HTP devices on Hexagon
|
|
209
|
+
if (variant === 'snapdragon' && devices.includes('HTP*')) {
|
|
210
|
+
const backendDevices = yield (0, exports.getBackendDevicesInfo)(variant);
|
|
211
|
+
const htpDevices = backendDevices
|
|
212
|
+
.filter((d) => d.deviceName.startsWith('HTP'))
|
|
213
|
+
.map((d) => d.deviceName);
|
|
214
|
+
filteredDevs = filteredDevs.reduce((acc, dev) => {
|
|
215
|
+
if (dev.startsWith('HTP*')) {
|
|
216
|
+
acc.push(...htpDevices);
|
|
217
|
+
}
|
|
218
|
+
else if (!dev.startsWith('HTP')) {
|
|
219
|
+
acc.push(dev);
|
|
220
|
+
}
|
|
221
|
+
return acc;
|
|
222
|
+
}, []);
|
|
223
|
+
}
|
|
224
|
+
}
|
|
225
|
+
const nativeCtx = new mods[variant].LlamaContext(Object.assign(Object.assign({}, options), { devices: filteredDevs.length > 0 ? filteredDevs : undefined }), onProgress);
|
|
205
226
|
return new LlamaContextWrapper(nativeCtx);
|
|
206
227
|
});
|
|
207
228
|
exports.loadModel = loadModel;
|
package/lib/index.ts
CHANGED
|
@@ -309,7 +309,35 @@ export const loadModel = async (
|
|
|
309
309
|
mods[variant] ??= await loadModule(options.lib_variant)
|
|
310
310
|
refreshNativeLogSetup()
|
|
311
311
|
|
|
312
|
-
const
|
|
312
|
+
const { devices } = options
|
|
313
|
+
let filteredDevs: Array<string> = []
|
|
314
|
+
if (Array.isArray(devices)) {
|
|
315
|
+
filteredDevs = [...devices]
|
|
316
|
+
|
|
317
|
+
// Handle HTP* to use all HTP devices on Hexagon
|
|
318
|
+
if (variant === 'snapdragon' && devices.includes('HTP*')) {
|
|
319
|
+
const backendDevices = await getBackendDevicesInfo(variant)
|
|
320
|
+
const htpDevices = backendDevices
|
|
321
|
+
.filter((d) => d.deviceName.startsWith('HTP'))
|
|
322
|
+
.map((d) => d.deviceName)
|
|
323
|
+
filteredDevs = filteredDevs.reduce((acc, dev) => {
|
|
324
|
+
if (dev.startsWith('HTP*')) {
|
|
325
|
+
acc.push(...htpDevices)
|
|
326
|
+
} else if (!dev.startsWith('HTP')) {
|
|
327
|
+
acc.push(dev)
|
|
328
|
+
}
|
|
329
|
+
return acc
|
|
330
|
+
}, [] as Array<string>)
|
|
331
|
+
}
|
|
332
|
+
}
|
|
333
|
+
|
|
334
|
+
const nativeCtx = new mods[variant].LlamaContext(
|
|
335
|
+
{
|
|
336
|
+
...options,
|
|
337
|
+
devices: filteredDevs.length > 0 ? filteredDevs : undefined,
|
|
338
|
+
},
|
|
339
|
+
onProgress,
|
|
340
|
+
)
|
|
313
341
|
return new LlamaContextWrapper(nativeCtx)
|
|
314
342
|
}
|
|
315
343
|
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@fugood/llama.node",
|
|
3
3
|
"access": "public",
|
|
4
|
-
"version": "1.3.
|
|
4
|
+
"version": "1.3.7",
|
|
5
5
|
"description": "An another Node binding of llama.cpp",
|
|
6
6
|
"main": "lib/index.js",
|
|
7
7
|
"scripts": {
|
|
@@ -72,19 +72,20 @@
|
|
|
72
72
|
"CMakeLists.txt"
|
|
73
73
|
],
|
|
74
74
|
"optionalDependencies": {
|
|
75
|
-
"@fugood/node-llama-linux-x64": "1.3.
|
|
76
|
-
"@fugood/node-llama-linux-x64-vulkan": "1.3.
|
|
77
|
-
"@fugood/node-llama-linux-x64-cuda": "1.3.
|
|
78
|
-
"@fugood/node-llama-linux-arm64": "1.3.
|
|
79
|
-
"@fugood/node-llama-linux-arm64
|
|
80
|
-
"@fugood/node-llama-linux-arm64-
|
|
81
|
-
"@fugood/node-llama-
|
|
82
|
-
"@fugood/node-llama-win32-x64
|
|
83
|
-
"@fugood/node-llama-win32-x64-
|
|
84
|
-
"@fugood/node-llama-win32-
|
|
85
|
-
"@fugood/node-llama-win32-arm64
|
|
86
|
-
"@fugood/node-llama-
|
|
87
|
-
"@fugood/node-llama-darwin-
|
|
75
|
+
"@fugood/node-llama-linux-x64": "1.3.7",
|
|
76
|
+
"@fugood/node-llama-linux-x64-vulkan": "1.3.7",
|
|
77
|
+
"@fugood/node-llama-linux-x64-cuda": "1.3.7",
|
|
78
|
+
"@fugood/node-llama-linux-arm64-snapdragon": "1.3.7",
|
|
79
|
+
"@fugood/node-llama-linux-arm64": "1.3.7",
|
|
80
|
+
"@fugood/node-llama-linux-arm64-vulkan": "1.3.7",
|
|
81
|
+
"@fugood/node-llama-linux-arm64-cuda": "1.3.7",
|
|
82
|
+
"@fugood/node-llama-win32-x64": "1.3.7",
|
|
83
|
+
"@fugood/node-llama-win32-x64-vulkan": "1.3.7",
|
|
84
|
+
"@fugood/node-llama-win32-x64-cuda": "1.3.7",
|
|
85
|
+
"@fugood/node-llama-win32-arm64": "1.3.7",
|
|
86
|
+
"@fugood/node-llama-win32-arm64-vulkan": "1.3.7",
|
|
87
|
+
"@fugood/node-llama-darwin-x64": "1.3.7",
|
|
88
|
+
"@fugood/node-llama-darwin-arm64": "1.3.7"
|
|
88
89
|
},
|
|
89
90
|
"devDependencies": {
|
|
90
91
|
"@babel/preset-env": "^7.24.4",
|
package/scripts/llama.cpp.patch
CHANGED
|
@@ -121,6 +121,19 @@ index d0cab0bcb..48d532838 100644
|
|
|
121
121
|
else()
|
|
122
122
|
check_cxx_compiler_flag(-mfp16-format=ieee GGML_COMPILER_SUPPORTS_FP16_FORMAT_I3E)
|
|
123
123
|
if (NOT "${GGML_COMPILER_SUPPORTS_FP16_FORMAT_I3E}" STREQUAL "")
|
|
124
|
+
diff --git a/src/llama.cpp/ggml/src/ggml-hexagon/ggml-hexagon.cpp b/src/llama.cpp/ggml/src/ggml-hexagon/ggml-hexagon.cpp
|
|
125
|
+
index 72a82a891..7869ad323 100644
|
|
126
|
+
--- a/src/llama.cpp/ggml/src/ggml-hexagon/ggml-hexagon.cpp
|
|
127
|
+
+++ b/src/llama.cpp/ggml/src/ggml-hexagon/ggml-hexagon.cpp
|
|
128
|
+
@@ -3417,6 +3417,8 @@ ggml_hexagon_registry::ggml_hexagon_registry(ggml_backend_reg_t reg) {
|
|
129
|
+
} catch (std::exception const &exc) {
|
|
130
|
+
GGML_LOG_ERROR("ggml-hex: failed to create device/session %zu\n", i);
|
|
131
|
+
devices[i].context = nullptr;
|
|
132
|
+
+ opt_ndev = i;
|
|
133
|
+
+ break;
|
|
134
|
+
}
|
|
135
|
+
}
|
|
136
|
+
}
|
|
124
137
|
diff --git a/src/llama.cpp/ggml/src/ggml-vulkan/CMakeLists.txt b/src/llama.cpp/ggml/src/ggml-vulkan/CMakeLists.txt
|
|
125
138
|
index de01336cd..29b1a043d 100644
|
|
126
139
|
--- a/src/llama.cpp/ggml/src/ggml-vulkan/CMakeLists.txt
|
package/src/LlamaContext.cpp
CHANGED
|
@@ -306,6 +306,16 @@ LlamaContext::LlamaContext(const Napi::CallbackInfo &info)
|
|
|
306
306
|
params.tensor_buft_overrides.push_back({nullptr, nullptr});
|
|
307
307
|
}
|
|
308
308
|
|
|
309
|
+
auto cpu_mask = get_option<std::string>(options, "cpu_mask", "");
|
|
310
|
+
if (!cpu_mask.empty()) {
|
|
311
|
+
params.cpuparams.mask_valid = true;
|
|
312
|
+
if (!parse_cpu_mask(cpu_mask, params.cpuparams.cpumask)) {
|
|
313
|
+
Napi::TypeError::New(env, "Invalid cpu_mask").ThrowAsJavaScriptException();
|
|
314
|
+
}
|
|
315
|
+
}
|
|
316
|
+
|
|
317
|
+
params.cpuparams.strict_cpu = get_option<bool>(options, "cpu_strict", false);
|
|
318
|
+
|
|
309
319
|
llama_backend_init();
|
|
310
320
|
llama_numa_init(params.numa);
|
|
311
321
|
|
|
@@ -1593,7 +1593,8 @@ void llama_model::load_hparams(llama_model_loader & ml) {
|
|
|
1593
1593
|
} break;
|
|
1594
1594
|
case LLM_ARCH_DEEPSEEK2:
|
|
1595
1595
|
{
|
|
1596
|
-
|
|
1596
|
+
// lite variants include DeepSeek-V2-Lite, GigaChat3-10B-A1.8B
|
|
1597
|
+
bool is_lite = (hparams.n_layer == 27 || hparams.n_layer == 26);
|
|
1597
1598
|
ml.get_key(LLM_KV_ATTENTION_LAYERNORM_RMS_EPS, hparams.f_norm_rms_eps);
|
|
1598
1599
|
ml.get_key(LLM_KV_LEADING_DENSE_BLOCK_COUNT, hparams.n_layer_dense_lead);
|
|
1599
1600
|
if (!is_lite) {
|
|
@@ -4581,7 +4582,8 @@ bool llama_model::load_tensors(llama_model_loader & ml) {
|
|
|
4581
4582
|
} break;
|
|
4582
4583
|
case LLM_ARCH_DEEPSEEK2:
|
|
4583
4584
|
{
|
|
4584
|
-
|
|
4585
|
+
// lite variants include DeepSeek-V2-Lite, GigaChat3-10B-A1.8B
|
|
4586
|
+
const bool is_lite = (hparams.n_layer == 27 || hparams.n_layer == 26);
|
|
4585
4587
|
|
|
4586
4588
|
const bool is_mla = (hparams.n_embd_head_k_mla != 0 && hparams.n_embd_head_v_mla != 0);
|
|
4587
4589
|
|
|
@@ -4,7 +4,8 @@
|
|
|
4
4
|
|
|
5
5
|
llm_build_deepseek2::llm_build_deepseek2(const llama_model & model, const llm_graph_params & params) :
|
|
6
6
|
llm_graph_context(params) {
|
|
7
|
-
|
|
7
|
+
// lite variants include DeepSeek-V2-Lite, GigaChat3-10B-A1.8B
|
|
8
|
+
bool is_lite = (hparams.n_layer == 27 || hparams.n_layer == 26);
|
|
8
9
|
|
|
9
10
|
const bool is_mla = (hparams.n_embd_head_k_mla != 0 && hparams.n_embd_head_v_mla != 0);
|
|
10
11
|
|