@fugood/llama.node 1.4.8 → 1.4.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/binding.ts +43 -0
- package/lib/parallel.js +26 -0
- package/lib/parallel.ts +33 -0
- package/package.json +15 -15
- package/scripts/llama.cpp.patch +12 -14
- package/src/LlamaCompletionWorker.cpp +3 -1
- package/src/LlamaCompletionWorker.h +2 -0
- package/src/LlamaContext.cpp +16 -1
- package/src/LlamaContext.h +3 -0
- package/src/llama.cpp/common/CMakeLists.txt +4 -4
- package/src/llama.cpp/common/arg.cpp +159 -42
- package/src/llama.cpp/common/arg.h +10 -1
- package/src/llama.cpp/common/common.cpp +1 -1
- package/src/llama.cpp/common/common.h +6 -2
- package/src/llama.cpp/common/preset.cpp +197 -5
- package/src/llama.cpp/common/preset.h +45 -3
- package/src/llama.cpp/common/sampling.cpp +51 -37
- package/src/llama.cpp/common/sampling.h +6 -3
- package/src/llama.cpp/common/speculative.cpp +1 -1
- package/src/llama.cpp/ggml/CMakeLists.txt +1 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +4 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/arch/arm/repack.cpp +283 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/arch-fallback.h +28 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +51 -6
- package/src/llama.cpp/ggml/src/ggml-cpu/repack.cpp +286 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/repack.h +8 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/vec.cpp +41 -1
- package/src/llama.cpp/ggml/src/ggml-cpu/vec.h +125 -22
- package/src/llama.cpp/src/llama-arch.cpp +1 -1
- package/src/llama.cpp/src/llama-mmap.cpp +123 -28
- package/src/llama.cpp/src/llama-mmap.h +5 -1
- package/src/llama.cpp/src/llama-model-loader.cpp +56 -13
- package/src/llama.cpp/src/llama-model.cpp +7 -5
- package/src/llama.cpp/src/llama-sampling.cpp +16 -0
- package/src/llama.cpp/src/llama.cpp +22 -32
package/lib/binding.ts
CHANGED
|
@@ -120,6 +120,8 @@ export type LlamaCompletionOptions = {
|
|
|
120
120
|
tool_choice?: string
|
|
121
121
|
enable_thinking?: boolean
|
|
122
122
|
thinking_forced_open?: boolean
|
|
123
|
+
/** Serialized PEG parser for chat output parsing (required for PEG format types) */
|
|
124
|
+
chat_parser?: string
|
|
123
125
|
prompt?: string
|
|
124
126
|
/**
|
|
125
127
|
* Text to prefill the response with.
|
|
@@ -415,6 +417,8 @@ export type JinjaFormattedChatResult = {
|
|
|
415
417
|
thinking_forced_open: boolean
|
|
416
418
|
preserved_tokens: string[]
|
|
417
419
|
additional_stops: string[]
|
|
420
|
+
/** Serialized PEG parser for chat output parsing (required for PEG format types) */
|
|
421
|
+
chat_parser: string
|
|
418
422
|
}
|
|
419
423
|
|
|
420
424
|
export type Tool = {
|
|
@@ -435,6 +439,24 @@ export type ToolCall = {
|
|
|
435
439
|
id?: string
|
|
436
440
|
}
|
|
437
441
|
|
|
442
|
+
export type ParallelRequestStatus = {
|
|
443
|
+
request_id: number
|
|
444
|
+
type: 'completion' | 'embedding' | 'rerank'
|
|
445
|
+
state: 'queued' | 'processing_prompt' | 'generating' | 'done'
|
|
446
|
+
prompt_length: number
|
|
447
|
+
tokens_generated: number
|
|
448
|
+
prompt_ms: number
|
|
449
|
+
generation_ms: number
|
|
450
|
+
tokens_per_second: number
|
|
451
|
+
}
|
|
452
|
+
|
|
453
|
+
export type ParallelStatus = {
|
|
454
|
+
n_parallel: number
|
|
455
|
+
active_slots: number
|
|
456
|
+
queued_requests: number
|
|
457
|
+
requests: ParallelRequestStatus[]
|
|
458
|
+
}
|
|
459
|
+
|
|
438
460
|
export interface LlamaContext {
|
|
439
461
|
new (
|
|
440
462
|
options: LlamaModelOptions,
|
|
@@ -612,6 +634,27 @@ export interface LlamaContext {
|
|
|
612
634
|
*/
|
|
613
635
|
cancelRequest(requestId: number): void
|
|
614
636
|
|
|
637
|
+
/**
|
|
638
|
+
* Get current parallel processing status (one-time snapshot)
|
|
639
|
+
* @returns Current parallel status
|
|
640
|
+
*/
|
|
641
|
+
getParallelStatus(): ParallelStatus
|
|
642
|
+
|
|
643
|
+
/**
|
|
644
|
+
* Subscribe to parallel processing status changes
|
|
645
|
+
* @param callback Called whenever parallel status changes
|
|
646
|
+
* @returns Subscriber ID that can be used to unsubscribe
|
|
647
|
+
*/
|
|
648
|
+
subscribeParallelStatus(
|
|
649
|
+
callback: (status: ParallelStatus) => void,
|
|
650
|
+
): { subscriberId: number }
|
|
651
|
+
|
|
652
|
+
/**
|
|
653
|
+
* Unsubscribe from parallel processing status changes
|
|
654
|
+
* @param subscriberId Subscriber ID returned from subscribeParallelStatus
|
|
655
|
+
*/
|
|
656
|
+
unsubscribeParallelStatus(subscriberId: number): void
|
|
657
|
+
|
|
615
658
|
/**
|
|
616
659
|
* Clear the KV and recurrent caches.
|
|
617
660
|
* This is faster than recreating the context and useful for preventing
|
package/lib/parallel.js
CHANGED
|
@@ -212,5 +212,31 @@ class LlamaParallelAPI {
|
|
|
212
212
|
isEnabled() {
|
|
213
213
|
return this.enabled;
|
|
214
214
|
}
|
|
215
|
+
/**
|
|
216
|
+
* Get current parallel processing status (one-time snapshot)
|
|
217
|
+
* @returns Current parallel status
|
|
218
|
+
*/
|
|
219
|
+
getStatus() {
|
|
220
|
+
if (!this.enabled) {
|
|
221
|
+
throw new Error('Parallel mode is not enabled. Call enable() first.');
|
|
222
|
+
}
|
|
223
|
+
return this.context.getParallelStatus();
|
|
224
|
+
}
|
|
225
|
+
/**
|
|
226
|
+
* Subscribe to parallel processing status changes
|
|
227
|
+
* @param callback Called whenever parallel status changes
|
|
228
|
+
* @returns Object with remove() method to unsubscribe
|
|
229
|
+
*/
|
|
230
|
+
subscribeToStatus(callback) {
|
|
231
|
+
if (!this.enabled) {
|
|
232
|
+
throw new Error('Parallel mode is not enabled. Call enable() first.');
|
|
233
|
+
}
|
|
234
|
+
const { subscriberId } = this.context.subscribeParallelStatus(callback);
|
|
235
|
+
return {
|
|
236
|
+
remove: () => {
|
|
237
|
+
this.context.unsubscribeParallelStatus(subscriberId);
|
|
238
|
+
},
|
|
239
|
+
};
|
|
240
|
+
}
|
|
215
241
|
}
|
|
216
242
|
exports.LlamaParallelAPI = LlamaParallelAPI;
|
package/lib/parallel.ts
CHANGED
|
@@ -4,6 +4,7 @@ import type {
|
|
|
4
4
|
LlamaCompletionOptions,
|
|
5
5
|
LlamaCompletionToken,
|
|
6
6
|
RerankParams,
|
|
7
|
+
ParallelStatus,
|
|
7
8
|
} from './binding'
|
|
8
9
|
import { formatMediaChat } from './utils'
|
|
9
10
|
|
|
@@ -278,4 +279,36 @@ export class LlamaParallelAPI {
|
|
|
278
279
|
isEnabled(): boolean {
|
|
279
280
|
return this.enabled
|
|
280
281
|
}
|
|
282
|
+
|
|
283
|
+
/**
|
|
284
|
+
* Get current parallel processing status (one-time snapshot)
|
|
285
|
+
* @returns Current parallel status
|
|
286
|
+
*/
|
|
287
|
+
getStatus(): ParallelStatus {
|
|
288
|
+
if (!this.enabled) {
|
|
289
|
+
throw new Error('Parallel mode is not enabled. Call enable() first.')
|
|
290
|
+
}
|
|
291
|
+
return this.context.getParallelStatus()
|
|
292
|
+
}
|
|
293
|
+
|
|
294
|
+
/**
|
|
295
|
+
* Subscribe to parallel processing status changes
|
|
296
|
+
* @param callback Called whenever parallel status changes
|
|
297
|
+
* @returns Object with remove() method to unsubscribe
|
|
298
|
+
*/
|
|
299
|
+
subscribeToStatus(
|
|
300
|
+
callback: (status: ParallelStatus) => void,
|
|
301
|
+
): { remove: () => void } {
|
|
302
|
+
if (!this.enabled) {
|
|
303
|
+
throw new Error('Parallel mode is not enabled. Call enable() first.')
|
|
304
|
+
}
|
|
305
|
+
|
|
306
|
+
const { subscriberId } = this.context.subscribeParallelStatus(callback)
|
|
307
|
+
|
|
308
|
+
return {
|
|
309
|
+
remove: () => {
|
|
310
|
+
this.context.unsubscribeParallelStatus(subscriberId)
|
|
311
|
+
},
|
|
312
|
+
}
|
|
313
|
+
}
|
|
281
314
|
}
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@fugood/llama.node",
|
|
3
3
|
"access": "public",
|
|
4
|
-
"version": "1.4.
|
|
4
|
+
"version": "1.4.10",
|
|
5
5
|
"description": "An another Node binding of llama.cpp",
|
|
6
6
|
"main": "lib/index.js",
|
|
7
7
|
"scripts": {
|
|
@@ -72,20 +72,20 @@
|
|
|
72
72
|
"CMakeLists.txt"
|
|
73
73
|
],
|
|
74
74
|
"optionalDependencies": {
|
|
75
|
-
"@fugood/node-llama-darwin-arm64": "1.4.
|
|
76
|
-
"@fugood/node-llama-darwin-x64": "1.4.
|
|
77
|
-
"@fugood/node-llama-linux-arm64": "1.4.
|
|
78
|
-
"@fugood/node-llama-linux-arm64-cuda": "1.4.
|
|
79
|
-
"@fugood/node-llama-linux-arm64-snapdragon": "1.4.
|
|
80
|
-
"@fugood/node-llama-linux-arm64-vulkan": "1.4.
|
|
81
|
-
"@fugood/node-llama-linux-x64": "1.4.
|
|
82
|
-
"@fugood/node-llama-linux-x64-cuda": "1.4.
|
|
83
|
-
"@fugood/node-llama-linux-x64-vulkan": "1.4.
|
|
84
|
-
"@fugood/node-llama-win32-arm64": "1.4.
|
|
85
|
-
"@fugood/node-llama-win32-arm64-vulkan": "1.4.
|
|
86
|
-
"@fugood/node-llama-win32-x64": "1.4.
|
|
87
|
-
"@fugood/node-llama-win32-x64-cuda": "1.4.
|
|
88
|
-
"@fugood/node-llama-win32-x64-vulkan": "1.4.
|
|
75
|
+
"@fugood/node-llama-darwin-arm64": "1.4.10",
|
|
76
|
+
"@fugood/node-llama-darwin-x64": "1.4.10",
|
|
77
|
+
"@fugood/node-llama-linux-arm64": "1.4.10",
|
|
78
|
+
"@fugood/node-llama-linux-arm64-cuda": "1.4.10",
|
|
79
|
+
"@fugood/node-llama-linux-arm64-snapdragon": "1.4.10",
|
|
80
|
+
"@fugood/node-llama-linux-arm64-vulkan": "1.4.10",
|
|
81
|
+
"@fugood/node-llama-linux-x64": "1.4.10",
|
|
82
|
+
"@fugood/node-llama-linux-x64-cuda": "1.4.10",
|
|
83
|
+
"@fugood/node-llama-linux-x64-vulkan": "1.4.10",
|
|
84
|
+
"@fugood/node-llama-win32-arm64": "1.4.10",
|
|
85
|
+
"@fugood/node-llama-win32-arm64-vulkan": "1.4.10",
|
|
86
|
+
"@fugood/node-llama-win32-x64": "1.4.10",
|
|
87
|
+
"@fugood/node-llama-win32-x64-cuda": "1.4.10",
|
|
88
|
+
"@fugood/node-llama-win32-x64-vulkan": "1.4.10"
|
|
89
89
|
},
|
|
90
90
|
"devDependencies": {
|
|
91
91
|
"@babel/preset-env": "^7.24.4",
|
package/scripts/llama.cpp.patch
CHANGED
|
@@ -1,25 +1,23 @@
|
|
|
1
1
|
diff --git a/src/llama.cpp/common/CMakeLists.txt b/src/llama.cpp/common/CMakeLists.txt
|
|
2
|
-
index
|
|
2
|
+
index f7b99159e..fa37fed19 100644
|
|
3
3
|
--- a/src/llama.cpp/common/CMakeLists.txt
|
|
4
4
|
+++ b/src/llama.cpp/common/CMakeLists.txt
|
|
5
|
-
@@ -
|
|
5
|
+
@@ -154,8 +154,14 @@ if (LLAMA_LLGUIDANCE)
|
|
6
6
|
set(LLAMA_COMMON_EXTRA_LIBS ${LLAMA_COMMON_EXTRA_LIBS} llguidance ${LLGUIDANCE_PLATFORM_LIBS})
|
|
7
7
|
endif ()
|
|
8
8
|
|
|
9
|
+
-target_link_libraries(${TARGET} PRIVATE ${LLAMA_COMMON_EXTRA_LIBS} PUBLIC llama Threads::Threads)
|
|
9
10
|
+# Add Windows socket libraries unconditionally on Windows
|
|
10
11
|
+if (WIN32)
|
|
11
12
|
+ set(LLAMA_COMMON_WIN_LIBS ws2_32)
|
|
12
13
|
+else()
|
|
13
14
|
+ set(LLAMA_COMMON_WIN_LIBS "")
|
|
14
15
|
+endif()
|
|
15
|
-
+
|
|
16
|
-
target_include_directories(${TARGET} PUBLIC . ../vendor)
|
|
17
|
-
target_compile_features (${TARGET} PUBLIC cxx_std_17)
|
|
18
|
-
-target_link_libraries (${TARGET} PRIVATE ${LLAMA_COMMON_EXTRA_LIBS} PUBLIC llama Threads::Threads)
|
|
19
|
-
+target_link_libraries (${TARGET} PRIVATE ${LLAMA_COMMON_EXTRA_LIBS} ${LLAMA_COMMON_WIN_LIBS} PUBLIC llama Threads::Threads)
|
|
20
16
|
|
|
17
|
+
+target_link_libraries(${TARGET} PRIVATE ${LLAMA_COMMON_EXTRA_LIBS} ${LLAMA_COMMON_WIN_LIBS} PUBLIC llama Threads::Threads)
|
|
21
18
|
|
|
22
19
|
#
|
|
20
|
+
# copy the license files
|
|
23
21
|
diff --git a/src/llama.cpp/common/chat-peg-parser.cpp b/src/llama.cpp/common/chat-peg-parser.cpp
|
|
24
22
|
index 1bcba9cd8..b7cd68734 100644
|
|
25
23
|
--- a/src/llama.cpp/common/chat-peg-parser.cpp
|
|
@@ -98,7 +96,7 @@ index 6085510a4..263076ce2 100644
|
|
|
98
96
|
struct common_chat_tool_call {
|
|
99
97
|
std::string name;
|
|
100
98
|
diff --git a/src/llama.cpp/common/common.cpp b/src/llama.cpp/common/common.cpp
|
|
101
|
-
index
|
|
99
|
+
index d4e8c7405..af3dec813 100644
|
|
102
100
|
--- a/src/llama.cpp/common/common.cpp
|
|
103
101
|
+++ b/src/llama.cpp/common/common.cpp
|
|
104
102
|
@@ -1343,6 +1343,7 @@ struct llama_model_params common_model_params_to_llama(common_params & params) {
|
|
@@ -110,7 +108,7 @@ index 5a8cf5248..8010a990e 100644
|
|
|
110
108
|
mparams.split_mode = params.split_mode;
|
|
111
109
|
mparams.tensor_split = params.tensor_split;
|
|
112
110
|
diff --git a/src/llama.cpp/common/common.h b/src/llama.cpp/common/common.h
|
|
113
|
-
index
|
|
111
|
+
index 334372073..e912b593a 100644
|
|
114
112
|
--- a/src/llama.cpp/common/common.h
|
|
115
113
|
+++ b/src/llama.cpp/common/common.h
|
|
116
114
|
@@ -307,6 +307,7 @@ struct lr_opt {
|
|
@@ -122,7 +120,7 @@ index d70744840..dea8c4546 100644
|
|
|
122
120
|
int32_t n_ctx = 0; // context size, 0 == context the model was trained with
|
|
123
121
|
int32_t n_batch = 2048; // logical batch size for prompt processing (must be >=32 to use BLAS)
|
|
124
122
|
diff --git a/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt b/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt
|
|
125
|
-
index
|
|
123
|
+
index 28fb7612e..63f7e1ca1 100644
|
|
126
124
|
--- a/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt
|
|
127
125
|
+++ b/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt
|
|
128
126
|
@@ -106,7 +106,7 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
|
|
@@ -135,10 +133,10 @@ index fc31089f3..aa9befe4c 100644
|
|
|
135
133
|
check_cxx_compiler_flag(-mfp16-format=ieee GGML_COMPILER_SUPPORTS_FP16_FORMAT_I3E)
|
|
136
134
|
if (NOT "${GGML_COMPILER_SUPPORTS_FP16_FORMAT_I3E}" STREQUAL "")
|
|
137
135
|
diff --git a/src/llama.cpp/ggml/src/ggml-hexagon/ggml-hexagon.cpp b/src/llama.cpp/ggml/src/ggml-hexagon/ggml-hexagon.cpp
|
|
138
|
-
index
|
|
136
|
+
index 6a00abacc..9e12459b6 100644
|
|
139
137
|
--- a/src/llama.cpp/ggml/src/ggml-hexagon/ggml-hexagon.cpp
|
|
140
138
|
+++ b/src/llama.cpp/ggml/src/ggml-hexagon/ggml-hexagon.cpp
|
|
141
|
-
@@ -
|
|
139
|
+
@@ -3226,11 +3226,26 @@ static const char * ggml_backend_hexagon_device_get_description(ggml_backend_dev
|
|
142
140
|
GGML_UNUSED(dev);
|
|
143
141
|
}
|
|
144
142
|
|
|
@@ -168,7 +166,7 @@ index 514f086f6..792abaa58 100644
|
|
|
168
166
|
GGML_UNUSED(dev);
|
|
169
167
|
}
|
|
170
168
|
|
|
171
|
-
@@ -
|
|
169
|
+
@@ -3413,10 +3428,17 @@ ggml_hexagon_registry::ggml_hexagon_registry(ggml_backend_reg_t reg) {
|
|
172
170
|
}
|
|
173
171
|
}
|
|
174
172
|
|
|
@@ -187,7 +185,7 @@ index 514f086f6..792abaa58 100644
|
|
|
187
185
|
|
|
188
186
|
GGML_LOG_INFO("ggml-hex: Hexagon Arch version v%d\n", opt_arch);
|
|
189
187
|
|
|
190
|
-
@@ -
|
|
188
|
+
@@ -3429,6 +3451,8 @@ ggml_hexagon_registry::ggml_hexagon_registry(ggml_backend_reg_t reg) {
|
|
191
189
|
} catch (std::exception const &exc) {
|
|
192
190
|
GGML_LOG_ERROR("ggml-hex: failed to create device/session %zu\n", i);
|
|
193
191
|
devices[i].context = nullptr;
|
|
@@ -37,6 +37,7 @@ LlamaCompletionWorker::LlamaCompletionWorker(
|
|
|
37
37
|
int32_t chat_format,
|
|
38
38
|
bool thinking_forced_open,
|
|
39
39
|
std::string reasoning_format,
|
|
40
|
+
const std::string &chat_parser,
|
|
40
41
|
const std::vector<std::string> &media_paths,
|
|
41
42
|
const std::vector<llama_token> &guide_tokens,
|
|
42
43
|
bool has_vocoder,
|
|
@@ -46,6 +47,7 @@ LlamaCompletionWorker::LlamaCompletionWorker(
|
|
|
46
47
|
_params(params), _stop_words(stop_words), _chat_format(chat_format),
|
|
47
48
|
_thinking_forced_open(thinking_forced_open),
|
|
48
49
|
_reasoning_format(reasoning_format),
|
|
50
|
+
_chat_parser(chat_parser),
|
|
49
51
|
_media_paths(media_paths), _guide_tokens(guide_tokens),
|
|
50
52
|
_prefill_text(prefill_text),
|
|
51
53
|
_has_vocoder(has_vocoder), _tts_type(tts_type_val) {
|
|
@@ -121,7 +123,7 @@ void LlamaCompletionWorker::Execute() {
|
|
|
121
123
|
}
|
|
122
124
|
|
|
123
125
|
// Begin completion with chat format and reasoning settings
|
|
124
|
-
completion->beginCompletion(_chat_format, common_reasoning_format_from_name(_reasoning_format), _thinking_forced_open);
|
|
126
|
+
completion->beginCompletion(_chat_format, common_reasoning_format_from_name(_reasoning_format), _thinking_forced_open, _chat_parser);
|
|
125
127
|
|
|
126
128
|
// Main completion loop
|
|
127
129
|
int token_count = 0;
|
|
@@ -23,6 +23,7 @@ public:
|
|
|
23
23
|
int32_t chat_format,
|
|
24
24
|
bool thinking_forced_open,
|
|
25
25
|
std::string reasoning_format,
|
|
26
|
+
const std::string &chat_parser = "",
|
|
26
27
|
const std::vector<std::string> &media_paths = {},
|
|
27
28
|
const std::vector<llama_token> &guide_tokens = {},
|
|
28
29
|
bool has_vocoder = false,
|
|
@@ -50,6 +51,7 @@ private:
|
|
|
50
51
|
int32_t _chat_format;
|
|
51
52
|
bool _thinking_forced_open;
|
|
52
53
|
std::string _reasoning_format;
|
|
54
|
+
std::string _chat_parser;
|
|
53
55
|
std::vector<std::string> _media_paths;
|
|
54
56
|
std::vector<llama_token> _guide_tokens;
|
|
55
57
|
std::string _prefill_text;
|
package/src/LlamaContext.cpp
CHANGED
|
@@ -201,6 +201,15 @@ void LlamaContext::Init(Napi::Env env, Napi::Object &exports) {
|
|
|
201
201
|
InstanceMethod<&LlamaContext::CancelRequest>(
|
|
202
202
|
"cancelRequest",
|
|
203
203
|
static_cast<napi_property_attributes>(napi_enumerable)),
|
|
204
|
+
InstanceMethod<&LlamaContext::GetParallelStatus>(
|
|
205
|
+
"getParallelStatus",
|
|
206
|
+
static_cast<napi_property_attributes>(napi_enumerable)),
|
|
207
|
+
InstanceMethod<&LlamaContext::SubscribeParallelStatus>(
|
|
208
|
+
"subscribeParallelStatus",
|
|
209
|
+
static_cast<napi_property_attributes>(napi_enumerable)),
|
|
210
|
+
InstanceMethod<&LlamaContext::UnsubscribeParallelStatus>(
|
|
211
|
+
"unsubscribeParallelStatus",
|
|
212
|
+
static_cast<napi_property_attributes>(napi_enumerable)),
|
|
204
213
|
InstanceMethod<&LlamaContext::ClearCache>(
|
|
205
214
|
"clearCache",
|
|
206
215
|
static_cast<napi_property_attributes>(napi_enumerable)),
|
|
@@ -250,6 +259,8 @@ LlamaContext::LlamaContext(const Napi::CallbackInfo &info)
|
|
|
250
259
|
}
|
|
251
260
|
|
|
252
261
|
common_params params;
|
|
262
|
+
params.fit_params = false;
|
|
263
|
+
|
|
253
264
|
params.model.path = get_option<std::string>(options, "model", "");
|
|
254
265
|
if (params.model.path.empty()) {
|
|
255
266
|
Napi::TypeError::New(env, "Model is required").ThrowAsJavaScriptException();
|
|
@@ -760,6 +771,8 @@ Napi::Value LlamaContext::GetFormattedChat(const Napi::CallbackInfo &info) {
|
|
|
760
771
|
i, Napi::String::New(env, chatParams.additional_stops[i].c_str()));
|
|
761
772
|
}
|
|
762
773
|
result.Set("additional_stops", additional_stops);
|
|
774
|
+
// chat_parser: string (serialized PEG parser for chat output parsing)
|
|
775
|
+
result.Set("chat_parser", chatParams.parser);
|
|
763
776
|
|
|
764
777
|
return result;
|
|
765
778
|
} else {
|
|
@@ -821,6 +834,7 @@ Napi::Value LlamaContext::Completion(const Napi::CallbackInfo &info) {
|
|
|
821
834
|
int32_t chat_format = get_option<int32_t>(options, "chat_format", 0);
|
|
822
835
|
bool thinking_forced_open = get_option<bool>(options, "thinking_forced_open", false);
|
|
823
836
|
std::string reasoning_format = get_option<std::string>(options, "reasoning_format", "none");
|
|
837
|
+
std::string chat_parser = get_option<std::string>(options, "chat_parser", "");
|
|
824
838
|
|
|
825
839
|
common_params params = _rn_ctx->params;
|
|
826
840
|
auto grammar_from_params = get_option<std::string>(options, "grammar", "");
|
|
@@ -959,6 +973,7 @@ Napi::Value LlamaContext::Completion(const Napi::CallbackInfo &info) {
|
|
|
959
973
|
|
|
960
974
|
chat_format = chatParams.format;
|
|
961
975
|
thinking_forced_open = chatParams.thinking_forced_open;
|
|
976
|
+
chat_parser = chatParams.parser;
|
|
962
977
|
|
|
963
978
|
for (const auto &token : chatParams.preserved_tokens) {
|
|
964
979
|
auto ids =
|
|
@@ -1074,7 +1089,7 @@ Napi::Value LlamaContext::Completion(const Napi::CallbackInfo &info) {
|
|
|
1074
1089
|
|
|
1075
1090
|
auto *worker =
|
|
1076
1091
|
new LlamaCompletionWorker(info, _rn_ctx, callback, params, stop_words,
|
|
1077
|
-
chat_format, thinking_forced_open, reasoning_format, media_paths, guide_tokens,
|
|
1092
|
+
chat_format, thinking_forced_open, reasoning_format, chat_parser, media_paths, guide_tokens,
|
|
1078
1093
|
_rn_ctx->has_vocoder, _rn_ctx->tts_wrapper ? _rn_ctx->tts_wrapper->type : rnllama::UNKNOWN, prefill_text);
|
|
1079
1094
|
worker->Queue();
|
|
1080
1095
|
_wip = worker;
|
package/src/LlamaContext.h
CHANGED
|
@@ -68,6 +68,9 @@ private:
|
|
|
68
68
|
Napi::Value QueueEmbedding(const Napi::CallbackInfo &info);
|
|
69
69
|
Napi::Value QueueRerank(const Napi::CallbackInfo &info);
|
|
70
70
|
void CancelRequest(const Napi::CallbackInfo &info);
|
|
71
|
+
Napi::Value GetParallelStatus(const Napi::CallbackInfo &info);
|
|
72
|
+
Napi::Value SubscribeParallelStatus(const Napi::CallbackInfo &info);
|
|
73
|
+
void UnsubscribeParallelStatus(const Napi::CallbackInfo &info);
|
|
71
74
|
|
|
72
75
|
// Cache management
|
|
73
76
|
void ClearCache(const Napi::CallbackInfo &info);
|
|
@@ -85,6 +85,9 @@ add_library(${TARGET} STATIC
|
|
|
85
85
|
unicode.h
|
|
86
86
|
)
|
|
87
87
|
|
|
88
|
+
target_include_directories(${TARGET} PUBLIC . ../vendor)
|
|
89
|
+
target_compile_features (${TARGET} PUBLIC cxx_std_17)
|
|
90
|
+
|
|
88
91
|
if (BUILD_SHARED_LIBS)
|
|
89
92
|
set_target_properties(${TARGET} PROPERTIES POSITION_INDEPENDENT_CODE ON)
|
|
90
93
|
endif()
|
|
@@ -158,10 +161,7 @@ else()
|
|
|
158
161
|
set(LLAMA_COMMON_WIN_LIBS "")
|
|
159
162
|
endif()
|
|
160
163
|
|
|
161
|
-
|
|
162
|
-
target_compile_features (${TARGET} PUBLIC cxx_std_17)
|
|
163
|
-
target_link_libraries (${TARGET} PRIVATE ${LLAMA_COMMON_EXTRA_LIBS} ${LLAMA_COMMON_WIN_LIBS} PUBLIC llama Threads::Threads)
|
|
164
|
-
|
|
164
|
+
target_link_libraries(${TARGET} PRIVATE ${LLAMA_COMMON_EXTRA_LIBS} ${LLAMA_COMMON_WIN_LIBS} PUBLIC llama Threads::Threads)
|
|
165
165
|
|
|
166
166
|
#
|
|
167
167
|
# copy the license files
|