@fugood/llama.node 1.3.4 → 1.3.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/binding.js +1 -1
- package/lib/binding.ts +40 -14
- package/lib/index.js +4 -1
- package/lib/index.ts +13 -9
- package/package.json +14 -14
- package/scripts/llama.cpp.patch +10 -10
- package/src/LlamaContext.cpp +36 -0
- package/src/LlamaContext.h +2 -0
- package/src/llama.cpp/common/CMakeLists.txt +2 -0
- package/src/llama.cpp/common/chat-parser-xml-toolcall.cpp +861 -0
- package/src/llama.cpp/common/chat-parser-xml-toolcall.h +45 -0
- package/src/llama.cpp/common/chat-parser.h +10 -0
- package/src/llama.cpp/common/chat.cpp +461 -87
- package/src/llama.cpp/common/chat.h +6 -0
- package/src/llama.cpp/common/common.cpp +8 -1
- package/src/llama.cpp/common/common.h +12 -5
- package/src/llama.cpp/common/json-partial.cpp +19 -2
- package/src/llama.cpp/common/json-schema-to-grammar.cpp +2 -0
- package/src/llama.cpp/common/json-schema-to-grammar.h +2 -0
- package/src/llama.cpp/common/sampling.cpp +60 -6
- package/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +31 -38
- package/src/llama.cpp/ggml/src/ggml-cpu/arch/x86/repack.cpp +6 -6
- package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.cpp +15 -5
- package/src/llama.cpp/ggml/src/ggml-cpu/ops.cpp +2 -3
- package/src/llama.cpp/ggml/src/ggml-cpu/simd-mappings.h +16 -14
- package/src/llama.cpp/ggml/src/ggml-cpu/vec.h +49 -48
- package/src/llama.cpp/src/llama-grammar.cpp +17 -9
- package/src/llama.cpp/src/llama-impl.cpp +3 -3
- package/src/llama.cpp/src/llama-sampling.cpp +3 -6
- package/src/llama.cpp/src/llama-vocab.cpp +1 -0
package/lib/binding.js
CHANGED
|
@@ -51,7 +51,7 @@ const getPlatformPackageName = (variant) => {
|
|
|
51
51
|
};
|
|
52
52
|
const loadPlatformPackage = (packageName) => __awaiter(void 0, void 0, void 0, function* () {
|
|
53
53
|
try {
|
|
54
|
-
return yield Promise.resolve(`${packageName}`).then(s => __importStar(require(s)));
|
|
54
|
+
return (yield Promise.resolve(`${packageName}`).then(s => __importStar(require(s))));
|
|
55
55
|
}
|
|
56
56
|
catch (error) {
|
|
57
57
|
return null;
|
package/lib/binding.ts
CHANGED
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
export type MessagePart = {
|
|
2
|
-
type: string
|
|
3
|
-
text?: string
|
|
2
|
+
type: string
|
|
3
|
+
text?: string
|
|
4
4
|
image_url?: {
|
|
5
5
|
url?: string
|
|
6
|
-
}
|
|
6
|
+
}
|
|
7
7
|
input_audio?: {
|
|
8
8
|
format: string
|
|
9
9
|
data?: string
|
|
@@ -70,6 +70,12 @@ export type LlamaModelOptions = {
|
|
|
70
70
|
* Number of layers to keep MoE weights on CPU
|
|
71
71
|
*/
|
|
72
72
|
n_cpu_moe?: number
|
|
73
|
+
/**
|
|
74
|
+
* List of device names to use for offloading
|
|
75
|
+
* Device names can be obtained from getBackendDevicesInfo()
|
|
76
|
+
* Example: ['Metal', 'BLAS', 'CPU']
|
|
77
|
+
*/
|
|
78
|
+
devices?: string[]
|
|
73
79
|
use_mlock?: boolean
|
|
74
80
|
use_mmap?: boolean
|
|
75
81
|
vocab_only?: boolean
|
|
@@ -375,9 +381,13 @@ export type ToolCall = {
|
|
|
375
381
|
}
|
|
376
382
|
|
|
377
383
|
export interface LlamaContext {
|
|
378
|
-
new (
|
|
384
|
+
new (
|
|
385
|
+
options: LlamaModelOptions,
|
|
386
|
+
onProgress?: (progress: number) => void,
|
|
387
|
+
): LlamaContext
|
|
379
388
|
getSystemInfo(): string
|
|
380
389
|
getModelInfo(): ModelInfo
|
|
390
|
+
getUsedDevices(): string[]
|
|
381
391
|
getFormattedChat(
|
|
382
392
|
messages: ChatMessage[],
|
|
383
393
|
chat_template?: string,
|
|
@@ -400,8 +410,15 @@ export interface LlamaContext {
|
|
|
400
410
|
stopCompletion(): void
|
|
401
411
|
tokenize(text: string, media_paths?: string[]): Promise<TokenizeResult>
|
|
402
412
|
detokenize(tokens: number[]): Promise<string>
|
|
403
|
-
embedding(
|
|
404
|
-
|
|
413
|
+
embedding(
|
|
414
|
+
text: string,
|
|
415
|
+
params?: { embd_normalize?: number },
|
|
416
|
+
): Promise<EmbeddingResult>
|
|
417
|
+
rerank(
|
|
418
|
+
query: string,
|
|
419
|
+
documents: string[],
|
|
420
|
+
params?: RerankParams,
|
|
421
|
+
): Promise<RerankResult[]>
|
|
405
422
|
saveSession(path: string): Promise<void>
|
|
406
423
|
loadSession(path: string): Promise<void>
|
|
407
424
|
release(): Promise<void>
|
|
@@ -440,7 +457,7 @@ export interface LlamaContext {
|
|
|
440
457
|
* @param options Object containing path and optional n_batch
|
|
441
458
|
* @returns boolean indicating if loading was successful
|
|
442
459
|
*/
|
|
443
|
-
initVocoder(options: { path: string
|
|
460
|
+
initVocoder(options: { path: string; n_batch?: number }): boolean
|
|
444
461
|
|
|
445
462
|
/**
|
|
446
463
|
* Unload the vocoder model
|
|
@@ -459,7 +476,10 @@ export interface LlamaContext {
|
|
|
459
476
|
* @param text Text to complete
|
|
460
477
|
* @returns Formatted audio completion
|
|
461
478
|
*/
|
|
462
|
-
getFormattedAudioCompletion(
|
|
479
|
+
getFormattedAudioCompletion(
|
|
480
|
+
speaker: string | null,
|
|
481
|
+
text: string,
|
|
482
|
+
): {
|
|
463
483
|
prompt: string
|
|
464
484
|
grammar?: string
|
|
465
485
|
}
|
|
@@ -476,7 +496,7 @@ export interface LlamaContext {
|
|
|
476
496
|
* @param tokens Tokens to decode
|
|
477
497
|
* @returns Promise resolving to decoded audio tokens
|
|
478
498
|
*/
|
|
479
|
-
decodeAudioTokens(tokens: number[]|Int32Array): Promise<Float32Array>
|
|
499
|
+
decodeAudioTokens(tokens: number[] | Int32Array): Promise<Float32Array>
|
|
480
500
|
|
|
481
501
|
// Parallel decoding methods
|
|
482
502
|
|
|
@@ -485,7 +505,7 @@ export interface LlamaContext {
|
|
|
485
505
|
* @param params Configuration for parallel mode
|
|
486
506
|
* @returns boolean indicating if successful
|
|
487
507
|
*/
|
|
488
|
-
enableParallelMode(params: { n_parallel?: number
|
|
508
|
+
enableParallelMode(params: { n_parallel?: number; n_batch?: number }): boolean
|
|
489
509
|
|
|
490
510
|
/**
|
|
491
511
|
* Disable parallel decoding mode
|
|
@@ -563,9 +583,11 @@ const getPlatformPackageName = (variant?: LibVariant): string => {
|
|
|
563
583
|
return `@fugood/node-llama-${platform}-${arch}${variantSuffix}`
|
|
564
584
|
}
|
|
565
585
|
|
|
566
|
-
const loadPlatformPackage = async (
|
|
586
|
+
const loadPlatformPackage = async (
|
|
587
|
+
packageName: string,
|
|
588
|
+
): Promise<Module | null> => {
|
|
567
589
|
try {
|
|
568
|
-
return await import(packageName) as Module
|
|
590
|
+
return (await import(packageName)) as Module
|
|
569
591
|
} catch (error) {
|
|
570
592
|
return null
|
|
571
593
|
}
|
|
@@ -579,7 +601,9 @@ export const loadModule = async (variant?: LibVariant): Promise<Module> => {
|
|
|
579
601
|
|
|
580
602
|
module = await loadPlatformPackage(getPlatformPackageName())
|
|
581
603
|
if (module) {
|
|
582
|
-
console.warn(
|
|
604
|
+
console.warn(
|
|
605
|
+
`Not found package for variant "${variant}", fallback to default`,
|
|
606
|
+
)
|
|
583
607
|
return module
|
|
584
608
|
}
|
|
585
609
|
|
|
@@ -588,7 +612,9 @@ export const loadModule = async (variant?: LibVariant): Promise<Module> => {
|
|
|
588
612
|
return (await import('../build/Release/index.node')) as Module
|
|
589
613
|
}
|
|
590
614
|
|
|
591
|
-
export const isLibVariantAvailable = async (
|
|
615
|
+
export const isLibVariantAvailable = async (
|
|
616
|
+
variant?: LibVariant,
|
|
617
|
+
): Promise<boolean> => {
|
|
592
618
|
if (variant && variant !== 'default') {
|
|
593
619
|
const module = await loadPlatformPackage(getPlatformPackageName(variant))
|
|
594
620
|
return module != null
|
package/lib/index.js
CHANGED
|
@@ -76,6 +76,9 @@ class LlamaContextWrapper {
|
|
|
76
76
|
getModelInfo() {
|
|
77
77
|
return this.ctx.getModelInfo();
|
|
78
78
|
}
|
|
79
|
+
getUsedDevices() {
|
|
80
|
+
return this.ctx.getUsedDevices();
|
|
81
|
+
}
|
|
79
82
|
isJinjaSupported() {
|
|
80
83
|
const { minja } = this.ctx.getModelInfo().chatTemplates;
|
|
81
84
|
return !!(minja === null || minja === void 0 ? void 0 : minja.toolUse) || !!(minja === null || minja === void 0 ? void 0 : minja.default);
|
|
@@ -85,7 +88,7 @@ class LlamaContextWrapper {
|
|
|
85
88
|
}
|
|
86
89
|
getFormattedChat(messages, template, params) {
|
|
87
90
|
var _a;
|
|
88
|
-
const { messages: chat, has_media, media_paths
|
|
91
|
+
const { messages: chat, has_media, media_paths } = (0, utils_1.formatMediaChat)(messages);
|
|
89
92
|
const useJinja = this.isJinjaSupported() && (params === null || params === void 0 ? void 0 : params.jinja);
|
|
90
93
|
let tmpl;
|
|
91
94
|
if (template)
|
package/lib/index.ts
CHANGED
|
@@ -94,6 +94,10 @@ class LlamaContextWrapper {
|
|
|
94
94
|
return this.ctx.getModelInfo()
|
|
95
95
|
}
|
|
96
96
|
|
|
97
|
+
getUsedDevices(): string[] {
|
|
98
|
+
return this.ctx.getUsedDevices()
|
|
99
|
+
}
|
|
100
|
+
|
|
97
101
|
isJinjaSupported(): boolean {
|
|
98
102
|
const { minja } = this.ctx.getModelInfo().chatTemplates
|
|
99
103
|
return !!minja?.toolUse || !!minja?.default
|
|
@@ -118,11 +122,7 @@ class LlamaContextWrapper {
|
|
|
118
122
|
chat_template_kwargs?: Record<string, string>
|
|
119
123
|
},
|
|
120
124
|
): FormattedChatResult {
|
|
121
|
-
const {
|
|
122
|
-
messages: chat,
|
|
123
|
-
has_media,
|
|
124
|
-
media_paths,
|
|
125
|
-
} = formatMediaChat(messages)
|
|
125
|
+
const { messages: chat, has_media, media_paths } = formatMediaChat(messages)
|
|
126
126
|
|
|
127
127
|
const useJinja = this.isJinjaSupported() && params?.jinja
|
|
128
128
|
let tmpl
|
|
@@ -169,8 +169,9 @@ class LlamaContextWrapper {
|
|
|
169
169
|
options: LlamaCompletionOptions,
|
|
170
170
|
callback?: (token: LlamaCompletionToken) => void,
|
|
171
171
|
): Promise<LlamaCompletionResult> {
|
|
172
|
-
const { messages, media_paths = options.media_paths } =
|
|
173
|
-
|
|
172
|
+
const { messages, media_paths = options.media_paths } = formatMediaChat(
|
|
173
|
+
options.messages,
|
|
174
|
+
)
|
|
174
175
|
return this.ctx.completion(
|
|
175
176
|
{
|
|
176
177
|
...options,
|
|
@@ -196,7 +197,10 @@ class LlamaContextWrapper {
|
|
|
196
197
|
return this.ctx.detokenize(tokens)
|
|
197
198
|
}
|
|
198
199
|
|
|
199
|
-
embedding(
|
|
200
|
+
embedding(
|
|
201
|
+
text: string,
|
|
202
|
+
params?: { embd_normalize?: number },
|
|
203
|
+
): Promise<EmbeddingResult> {
|
|
200
204
|
return this.ctx.embedding(text, params)
|
|
201
205
|
}
|
|
202
206
|
|
|
@@ -329,7 +333,7 @@ export const loadLlamaModelInfo = async (
|
|
|
329
333
|
}
|
|
330
334
|
|
|
331
335
|
export const getBackendDevicesInfo = async (
|
|
332
|
-
variant: LibVariant = 'default'
|
|
336
|
+
variant: LibVariant = 'default',
|
|
333
337
|
): Promise<import('./binding').BackendDeviceInfo[]> => {
|
|
334
338
|
mods[variant] ??= await loadModule(variant)
|
|
335
339
|
refreshNativeLogSetup()
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@fugood/llama.node",
|
|
3
3
|
"access": "public",
|
|
4
|
-
"version": "1.3.
|
|
4
|
+
"version": "1.3.5",
|
|
5
5
|
"description": "An another Node binding of llama.cpp",
|
|
6
6
|
"main": "lib/index.js",
|
|
7
7
|
"scripts": {
|
|
@@ -72,19 +72,19 @@
|
|
|
72
72
|
"CMakeLists.txt"
|
|
73
73
|
],
|
|
74
74
|
"optionalDependencies": {
|
|
75
|
-
"@fugood/node-llama-linux-x64": "1.3.
|
|
76
|
-
"@fugood/node-llama-linux-x64-vulkan": "1.3.
|
|
77
|
-
"@fugood/node-llama-linux-x64-cuda": "1.3.
|
|
78
|
-
"@fugood/node-llama-linux-arm64": "1.3.
|
|
79
|
-
"@fugood/node-llama-linux-arm64-vulkan": "1.3.
|
|
80
|
-
"@fugood/node-llama-linux-arm64-cuda": "1.3.
|
|
81
|
-
"@fugood/node-llama-win32-x64": "1.3.
|
|
82
|
-
"@fugood/node-llama-win32-x64-vulkan": "1.3.
|
|
83
|
-
"@fugood/node-llama-win32-x64-cuda": "1.3.
|
|
84
|
-
"@fugood/node-llama-win32-arm64": "1.3.
|
|
85
|
-
"@fugood/node-llama-win32-arm64-vulkan": "1.3.
|
|
86
|
-
"@fugood/node-llama-darwin-x64": "1.3.
|
|
87
|
-
"@fugood/node-llama-darwin-arm64": "1.3.
|
|
75
|
+
"@fugood/node-llama-linux-x64": "1.3.5",
|
|
76
|
+
"@fugood/node-llama-linux-x64-vulkan": "1.3.5",
|
|
77
|
+
"@fugood/node-llama-linux-x64-cuda": "1.3.5",
|
|
78
|
+
"@fugood/node-llama-linux-arm64": "1.3.5",
|
|
79
|
+
"@fugood/node-llama-linux-arm64-vulkan": "1.3.5",
|
|
80
|
+
"@fugood/node-llama-linux-arm64-cuda": "1.3.5",
|
|
81
|
+
"@fugood/node-llama-win32-x64": "1.3.5",
|
|
82
|
+
"@fugood/node-llama-win32-x64-vulkan": "1.3.5",
|
|
83
|
+
"@fugood/node-llama-win32-x64-cuda": "1.3.5",
|
|
84
|
+
"@fugood/node-llama-win32-arm64": "1.3.5",
|
|
85
|
+
"@fugood/node-llama-win32-arm64-vulkan": "1.3.5",
|
|
86
|
+
"@fugood/node-llama-darwin-x64": "1.3.5",
|
|
87
|
+
"@fugood/node-llama-darwin-arm64": "1.3.5"
|
|
88
88
|
},
|
|
89
89
|
"devDependencies": {
|
|
90
90
|
"@babel/preset-env": "^7.24.4",
|
package/scripts/llama.cpp.patch
CHANGED
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
diff --git a/src/llama.cpp/common/CMakeLists.txt b/src/llama.cpp/common/CMakeLists.txt
|
|
2
|
-
index
|
|
2
|
+
index bb168e835..cfc0e2c2e 100644
|
|
3
3
|
--- a/src/llama.cpp/common/CMakeLists.txt
|
|
4
4
|
+++ b/src/llama.cpp/common/CMakeLists.txt
|
|
5
|
-
@@ -
|
|
5
|
+
@@ -143,9 +143,16 @@ if (LLAMA_LLGUIDANCE)
|
|
6
6
|
set(LLAMA_COMMON_EXTRA_LIBS ${LLAMA_COMMON_EXTRA_LIBS} llguidance ${LLGUIDANCE_PLATFORM_LIBS})
|
|
7
7
|
endif ()
|
|
8
8
|
|
|
@@ -21,7 +21,7 @@ index 706fa32ee..248459903 100644
|
|
|
21
21
|
|
|
22
22
|
#
|
|
23
23
|
diff --git a/src/llama.cpp/common/chat.cpp b/src/llama.cpp/common/chat.cpp
|
|
24
|
-
index
|
|
24
|
+
index 6fa05a604..87dfa7a8b 100644
|
|
25
25
|
--- a/src/llama.cpp/common/chat.cpp
|
|
26
26
|
+++ b/src/llama.cpp/common/chat.cpp
|
|
27
27
|
@@ -6,9 +6,6 @@
|
|
@@ -51,7 +51,7 @@ index 938872e82..6364f173f 100644
|
|
|
51
51
|
struct templates_params {
|
|
52
52
|
json messages;
|
|
53
53
|
json tools;
|
|
54
|
-
@@ -
|
|
54
|
+
@@ -817,7 +804,7 @@ static std::string apply(
|
|
55
55
|
tmpl_inputs.extra_context.merge_patch(*additional_context);
|
|
56
56
|
}
|
|
57
57
|
// TODO: add flag to control date/time, if only for testing purposes.
|
|
@@ -61,7 +61,7 @@ index 938872e82..6364f173f 100644
|
|
|
61
61
|
minja::chat_template_options tmpl_opts;
|
|
62
62
|
// To avoid double BOS / EOS tokens, we're manually removing begining / trailing tokens
|
|
63
63
|
diff --git a/src/llama.cpp/common/chat.h b/src/llama.cpp/common/chat.h
|
|
64
|
-
index
|
|
64
|
+
index 754c411e2..71241a6cc 100644
|
|
65
65
|
--- a/src/llama.cpp/common/chat.h
|
|
66
66
|
+++ b/src/llama.cpp/common/chat.h
|
|
67
67
|
@@ -9,7 +9,18 @@
|
|
@@ -85,10 +85,10 @@ index 50efb0d4e..f471a84c7 100644
|
|
|
85
85
|
struct common_chat_tool_call {
|
|
86
86
|
std::string name;
|
|
87
87
|
diff --git a/src/llama.cpp/common/common.cpp b/src/llama.cpp/common/common.cpp
|
|
88
|
-
index
|
|
88
|
+
index f3cc55247..65398844f 100644
|
|
89
89
|
--- a/src/llama.cpp/common/common.cpp
|
|
90
90
|
+++ b/src/llama.cpp/common/common.cpp
|
|
91
|
-
@@ -
|
|
91
|
+
@@ -1162,6 +1162,7 @@ struct llama_model_params common_model_params_to_llama(common_params & params) {
|
|
92
92
|
mparams.n_gpu_layers = params.n_gpu_layers;
|
|
93
93
|
}
|
|
94
94
|
|
|
@@ -97,10 +97,10 @@ index 4dc95dcba..ea0ea86c0 100644
|
|
|
97
97
|
mparams.split_mode = params.split_mode;
|
|
98
98
|
mparams.tensor_split = params.tensor_split;
|
|
99
99
|
diff --git a/src/llama.cpp/common/common.h b/src/llama.cpp/common/common.h
|
|
100
|
-
index
|
|
100
|
+
index de5b404dd..d30d252c9 100644
|
|
101
101
|
--- a/src/llama.cpp/common/common.h
|
|
102
102
|
+++ b/src/llama.cpp/common/common.h
|
|
103
|
-
@@ -
|
|
103
|
+
@@ -281,6 +281,7 @@ struct lr_opt {
|
|
104
104
|
struct ggml_opt_optimizer_params common_opt_lr_pars(void * userdata);
|
|
105
105
|
|
|
106
106
|
struct common_params {
|
|
@@ -109,7 +109,7 @@ index f42c083fa..c573cc812 100644
|
|
|
109
109
|
int32_t n_ctx = 4096; // context size
|
|
110
110
|
int32_t n_batch = 2048; // logical batch size for prompt processing (must be >=32 to use BLAS)
|
|
111
111
|
diff --git a/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt b/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt
|
|
112
|
-
index
|
|
112
|
+
index d0cab0bcb..48d532838 100644
|
|
113
113
|
--- a/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt
|
|
114
114
|
+++ b/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt
|
|
115
115
|
@@ -106,7 +106,7 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
|
package/src/LlamaContext.cpp
CHANGED
|
@@ -105,6 +105,9 @@ void LlamaContext::Init(Napi::Env env, Napi::Object &exports) {
|
|
|
105
105
|
InstanceMethod<&LlamaContext::GetModelInfo>(
|
|
106
106
|
"getModelInfo",
|
|
107
107
|
static_cast<napi_property_attributes>(napi_enumerable)),
|
|
108
|
+
InstanceMethod<&LlamaContext::GetUsedDevices>(
|
|
109
|
+
"getUsedDevices",
|
|
110
|
+
static_cast<napi_property_attributes>(napi_enumerable)),
|
|
108
111
|
InstanceMethod<&LlamaContext::GetFormattedChat>(
|
|
109
112
|
"getFormattedChat",
|
|
110
113
|
static_cast<napi_property_attributes>(napi_enumerable)),
|
|
@@ -306,6 +309,19 @@ LlamaContext::LlamaContext(const Napi::CallbackInfo &info)
|
|
|
306
309
|
llama_backend_init();
|
|
307
310
|
llama_numa_init(params.numa);
|
|
308
311
|
|
|
312
|
+
// Parse devices array
|
|
313
|
+
if (options.Has("devices") && options.Get("devices").IsArray()) {
|
|
314
|
+
auto devices_array = options.Get("devices").As<Napi::Array>();
|
|
315
|
+
for (size_t i = 0; i < devices_array.Length(); i++) {
|
|
316
|
+
auto device_name = devices_array.Get(i).ToString().Utf8Value();
|
|
317
|
+
auto * dev = ggml_backend_dev_by_name(device_name.c_str());
|
|
318
|
+
if (dev) {
|
|
319
|
+
params.devices.push_back(dev);
|
|
320
|
+
}
|
|
321
|
+
// Skip invalid device names silently
|
|
322
|
+
}
|
|
323
|
+
}
|
|
324
|
+
|
|
309
325
|
std::vector<common_adapter_lora_info> lora;
|
|
310
326
|
auto lora_path = get_option<std::string>(options, "lora", "");
|
|
311
327
|
auto lora_scaled = get_option<float>(options, "lora_scaled", 1.0f);
|
|
@@ -378,6 +394,17 @@ LlamaContext::LlamaContext(const Napi::CallbackInfo &info)
|
|
|
378
394
|
}
|
|
379
395
|
_rn_ctx->attachThreadpoolsIfAvailable();
|
|
380
396
|
|
|
397
|
+
// Collect used devices from the loaded model
|
|
398
|
+
if (_rn_ctx->llama_init.model) {
|
|
399
|
+
const auto &model_devices = _rn_ctx->llama_init.model->devices;
|
|
400
|
+
for (auto dev : model_devices) {
|
|
401
|
+
const char *dev_name = ggml_backend_dev_name(dev);
|
|
402
|
+
if (dev_name != nullptr) {
|
|
403
|
+
_used_devices.push_back(std::string(dev_name));
|
|
404
|
+
}
|
|
405
|
+
}
|
|
406
|
+
}
|
|
407
|
+
|
|
381
408
|
// Release progress callback after model is loaded
|
|
382
409
|
if (has_progress_callback) {
|
|
383
410
|
_progress_tsfn.Release();
|
|
@@ -583,6 +610,15 @@ Napi::Value LlamaContext::GetModelInfo(const Napi::CallbackInfo &info) {
|
|
|
583
610
|
return details;
|
|
584
611
|
}
|
|
585
612
|
|
|
613
|
+
// getUsedDevices(): string[]
|
|
614
|
+
Napi::Value LlamaContext::GetUsedDevices(const Napi::CallbackInfo &info) {
|
|
615
|
+
Napi::Env env = info.Env();
|
|
616
|
+
Napi::Array devices = Napi::Array::New(env, _used_devices.size());
|
|
617
|
+
for (size_t i = 0; i < _used_devices.size(); i++) {
|
|
618
|
+
devices[i] = Napi::String::New(env, _used_devices[i]);
|
|
619
|
+
}
|
|
620
|
+
return devices;
|
|
621
|
+
}
|
|
586
622
|
|
|
587
623
|
|
|
588
624
|
// getFormattedChat(
|
package/src/LlamaContext.h
CHANGED
|
@@ -31,6 +31,7 @@ public:
|
|
|
31
31
|
private:
|
|
32
32
|
Napi::Value GetSystemInfo(const Napi::CallbackInfo &info);
|
|
33
33
|
Napi::Value GetModelInfo(const Napi::CallbackInfo &info);
|
|
34
|
+
Napi::Value GetUsedDevices(const Napi::CallbackInfo &info);
|
|
34
35
|
Napi::Value GetFormattedChat(const Napi::CallbackInfo &info);
|
|
35
36
|
Napi::Value Completion(const Napi::CallbackInfo &info);
|
|
36
37
|
void StopCompletion(const Napi::CallbackInfo &info);
|
|
@@ -69,6 +70,7 @@ private:
|
|
|
69
70
|
void CancelRequest(const Napi::CallbackInfo &info);
|
|
70
71
|
|
|
71
72
|
std::string _info;
|
|
73
|
+
std::vector<std::string> _used_devices;
|
|
72
74
|
Napi::Object _meta;
|
|
73
75
|
LlamaCompletionWorker *_wip = nullptr;
|
|
74
76
|
|