@fugood/llama.node 1.0.6 → 1.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CMakeLists.txt +3 -3
- package/lib/binding.ts +117 -32
- package/lib/index.js +7 -9
- package/lib/index.ts +34 -25
- package/package.json +17 -14
- package/src/LlamaCompletionWorker.cpp +24 -6
- package/src/LlamaContext.cpp +38 -8
- package/src/llama.cpp/common/arg.cpp +8 -1
- package/src/llama.cpp/common/common.h +4 -3
- package/src/llama.cpp/ggml/CMakeLists.txt +3 -1
- package/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +5 -2
- package/src/llama.cpp/ggml/src/ggml-cpu/arch/loongarch/quants.c +1 -1
- package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.cpp +109 -12
- package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.h +3 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +88 -10
- package/src/llama.cpp/ggml/src/ggml-cpu/repack.cpp +0 -1
- package/src/llama.cpp/include/llama.h +2 -0
- package/src/llama.cpp/src/llama-arch.cpp +6 -6
- package/src/llama.cpp/src/llama-chat.cpp +3 -4
- package/src/llama.cpp/src/llama-context.cpp +49 -14
- package/src/llama.cpp/src/llama-context.h +13 -0
- package/src/llama.cpp/src/llama-memory-recurrent.cpp +15 -0
- package/src/llama.cpp/src/llama-model.cpp +19 -2
- package/src/tts_utils.cpp +12 -0
- package/src/tts_utils.h +40 -1
package/CMakeLists.txt
CHANGED
|
@@ -73,9 +73,9 @@ if(CMAKE_BUILD_TYPE STREQUAL "Release")
|
|
|
73
73
|
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /O1 /Ob1 /bigobj")
|
|
74
74
|
endif()
|
|
75
75
|
else()
|
|
76
|
-
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3 -funroll-loops -flto")
|
|
77
|
-
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -O3 -funroll-loops -flto")
|
|
78
|
-
set(CMAKE_LINKER_FLAGS "${CMAKE_LINKER_FLAGS} -flto")
|
|
76
|
+
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3 -funroll-loops -flto=auto")
|
|
77
|
+
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -O3 -funroll-loops -flto=auto")
|
|
78
|
+
set(CMAKE_LINKER_FLAGS "${CMAKE_LINKER_FLAGS} -flto=auto")
|
|
79
79
|
endif()
|
|
80
80
|
endif()
|
|
81
81
|
|
package/lib/binding.ts
CHANGED
|
@@ -1,6 +1,3 @@
|
|
|
1
|
-
import * as path from 'path'
|
|
2
|
-
|
|
3
|
-
|
|
4
1
|
export type MessagePart = {
|
|
5
2
|
type: string,
|
|
6
3
|
text?: string,
|
|
@@ -53,6 +50,11 @@ export type LlamaModelOptions = {
|
|
|
53
50
|
* Enable context shifting to handle prompts larger than context size
|
|
54
51
|
*/
|
|
55
52
|
ctx_shift?: boolean
|
|
53
|
+
/**
|
|
54
|
+
* Use a unified buffer across the input sequences when computing the attention.
|
|
55
|
+
* Try to disable when n_seq_max > 1 for improved performance when the sequences do not share a large prefix.
|
|
56
|
+
*/
|
|
57
|
+
kv_unified?: boolean
|
|
56
58
|
use_mlock?: boolean
|
|
57
59
|
use_mmap?: boolean
|
|
58
60
|
vocab_only?: boolean
|
|
@@ -65,9 +67,9 @@ export type CompletionResponseFormat = {
|
|
|
65
67
|
type: 'text' | 'json_object' | 'json_schema'
|
|
66
68
|
json_schema?: {
|
|
67
69
|
strict?: boolean
|
|
68
|
-
schema:
|
|
70
|
+
schema: Record<string, any>
|
|
69
71
|
}
|
|
70
|
-
schema?:
|
|
72
|
+
schema?: Record<string, any> // for json_object type
|
|
71
73
|
}
|
|
72
74
|
|
|
73
75
|
export type LlamaCompletionOptions = {
|
|
@@ -76,7 +78,7 @@ export type LlamaCompletionOptions = {
|
|
|
76
78
|
reasoning_format?: string
|
|
77
79
|
chat_template?: string
|
|
78
80
|
response_format?: CompletionResponseFormat
|
|
79
|
-
tools?:
|
|
81
|
+
tools?: Tool[]
|
|
80
82
|
parallel_tool_calls?: boolean
|
|
81
83
|
tool_choice?: string
|
|
82
84
|
enable_thinking?: boolean
|
|
@@ -107,7 +109,7 @@ export type LlamaCompletionOptions = {
|
|
|
107
109
|
stop?: string[]
|
|
108
110
|
grammar?: string
|
|
109
111
|
grammar_lazy?: boolean
|
|
110
|
-
grammar_triggers?: { type: number;
|
|
112
|
+
grammar_triggers?: { type: number; value: string; token?: number }[]
|
|
111
113
|
preserved_tokens?: string[]
|
|
112
114
|
/**
|
|
113
115
|
* Path(s) to media file(s) to process before generating text.
|
|
@@ -120,13 +122,14 @@ export type LlamaCompletionOptions = {
|
|
|
120
122
|
* Guide tokens to use for audio completion.
|
|
121
123
|
* Help prevent hallucinations by forcing the TTS to use the correct words.
|
|
122
124
|
*/
|
|
123
|
-
guide_tokens?: Int32Array
|
|
125
|
+
guide_tokens?: number[] | Int32Array
|
|
124
126
|
}
|
|
125
127
|
|
|
126
128
|
export type LlamaCompletionResult = {
|
|
127
129
|
text: string
|
|
128
130
|
reasoning_content?: string
|
|
129
131
|
content?: string
|
|
132
|
+
chat_format: number
|
|
130
133
|
tokens_predicted: number
|
|
131
134
|
tokens_evaluated: number
|
|
132
135
|
truncated: boolean
|
|
@@ -169,21 +172,101 @@ export type RerankResult = {
|
|
|
169
172
|
index: number
|
|
170
173
|
}
|
|
171
174
|
|
|
175
|
+
export type ModelInfo = {
|
|
176
|
+
desc: string
|
|
177
|
+
nEmbd: number
|
|
178
|
+
nParams: number
|
|
179
|
+
size: number
|
|
180
|
+
chatTemplates: {
|
|
181
|
+
llamaChat: boolean
|
|
182
|
+
minja: {
|
|
183
|
+
default: boolean
|
|
184
|
+
defaultCaps: {
|
|
185
|
+
tools: boolean
|
|
186
|
+
toolCalls: boolean
|
|
187
|
+
toolResponses: boolean
|
|
188
|
+
systemRole: boolean
|
|
189
|
+
parallelToolCalls: boolean
|
|
190
|
+
toolCallId: boolean
|
|
191
|
+
}
|
|
192
|
+
toolUse: boolean
|
|
193
|
+
toolUseCaps?: {
|
|
194
|
+
tools: boolean
|
|
195
|
+
toolCalls: boolean
|
|
196
|
+
toolResponses: boolean
|
|
197
|
+
systemRole: boolean
|
|
198
|
+
parallelToolCalls: boolean
|
|
199
|
+
toolCallId: boolean
|
|
200
|
+
}
|
|
201
|
+
}
|
|
202
|
+
}
|
|
203
|
+
metadata: Record<string, string>
|
|
204
|
+
isChatTemplateSupported: boolean
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
export type GGUFModelInfo = {
|
|
208
|
+
version?: number
|
|
209
|
+
alignment?: number
|
|
210
|
+
data_offset?: number
|
|
211
|
+
[key: string]: string | number | undefined
|
|
212
|
+
}
|
|
213
|
+
|
|
214
|
+
export type FormattedChatResult = {
|
|
215
|
+
type: 'jinja' | 'llama-chat'
|
|
216
|
+
prompt: string
|
|
217
|
+
has_media: boolean
|
|
218
|
+
media_paths?: Array<string>
|
|
219
|
+
}
|
|
220
|
+
|
|
221
|
+
export type JinjaFormattedChatResult = {
|
|
222
|
+
prompt: string
|
|
223
|
+
chat_format: number
|
|
224
|
+
grammar: string
|
|
225
|
+
grammea_lazy: boolean
|
|
226
|
+
grammar_triggers: Array<{
|
|
227
|
+
type: number
|
|
228
|
+
value: string
|
|
229
|
+
token: number
|
|
230
|
+
}>
|
|
231
|
+
thinking_forced_open: boolean
|
|
232
|
+
preserved_tokens: string[]
|
|
233
|
+
additional_stops: string[]
|
|
234
|
+
}
|
|
235
|
+
|
|
236
|
+
export type Tool = {
|
|
237
|
+
type: 'function'
|
|
238
|
+
function: {
|
|
239
|
+
name: string
|
|
240
|
+
description: string
|
|
241
|
+
parameters: Record<string, any>
|
|
242
|
+
}
|
|
243
|
+
}
|
|
244
|
+
|
|
245
|
+
export type ToolCall = {
|
|
246
|
+
type: 'function'
|
|
247
|
+
function: {
|
|
248
|
+
name: string
|
|
249
|
+
arguments: string
|
|
250
|
+
}
|
|
251
|
+
id?: string
|
|
252
|
+
}
|
|
253
|
+
|
|
172
254
|
export interface LlamaContext {
|
|
173
255
|
new (options: LlamaModelOptions): LlamaContext
|
|
174
256
|
getSystemInfo(): string
|
|
175
|
-
getModelInfo():
|
|
257
|
+
getModelInfo(): ModelInfo
|
|
176
258
|
getFormattedChat(
|
|
177
259
|
messages: ChatMessage[],
|
|
178
260
|
chat_template?: string,
|
|
179
261
|
params?: {
|
|
180
262
|
jinja?: boolean
|
|
181
263
|
response_format?: CompletionResponseFormat
|
|
182
|
-
tools?:
|
|
183
|
-
parallel_tool_calls?:
|
|
264
|
+
tools?: Tool[]
|
|
265
|
+
parallel_tool_calls?: boolean
|
|
184
266
|
tool_choice?: string
|
|
267
|
+
enable_thinking?: boolean
|
|
185
268
|
},
|
|
186
|
-
):
|
|
269
|
+
): JinjaFormattedChatResult | string
|
|
187
270
|
completion(
|
|
188
271
|
options: LlamaCompletionOptions,
|
|
189
272
|
callback?: (token: LlamaCompletionToken) => void,
|
|
@@ -197,51 +280,50 @@ export interface LlamaContext {
|
|
|
197
280
|
loadSession(path: string): Promise<void>
|
|
198
281
|
release(): Promise<void>
|
|
199
282
|
applyLoraAdapters(adapters: { path: string; scaled: number }[]): void
|
|
200
|
-
removeLoraAdapters(
|
|
283
|
+
removeLoraAdapters(): void
|
|
201
284
|
getLoadedLoraAdapters(): { path: string; scaled: number }[]
|
|
202
285
|
/**
|
|
203
286
|
* Initialize multimodal support with a mmproj file
|
|
204
|
-
* @param
|
|
205
|
-
* @returns
|
|
287
|
+
* @param options Object containing path and optional use_gpu flag
|
|
288
|
+
* @returns boolean indicating if initialization was successful
|
|
206
289
|
*/
|
|
207
|
-
initMultimodal(options: { path: string; use_gpu?: boolean }):
|
|
290
|
+
initMultimodal(options: { path: string; use_gpu?: boolean }): boolean
|
|
208
291
|
|
|
209
292
|
/**
|
|
210
293
|
* Check if multimodal support is enabled
|
|
211
|
-
* @returns
|
|
294
|
+
* @returns boolean indicating if multimodal is enabled
|
|
212
295
|
*/
|
|
213
|
-
isMultimodalEnabled():
|
|
296
|
+
isMultimodalEnabled(): boolean
|
|
214
297
|
|
|
215
298
|
/**
|
|
216
299
|
* Get multimodal support capabilities
|
|
217
|
-
* @returns
|
|
300
|
+
* @returns Object with vision and audio support
|
|
218
301
|
*/
|
|
219
|
-
getMultimodalSupport():
|
|
302
|
+
getMultimodalSupport(): {
|
|
220
303
|
vision: boolean
|
|
221
304
|
audio: boolean
|
|
222
|
-
}
|
|
305
|
+
}
|
|
223
306
|
|
|
224
307
|
/**
|
|
225
308
|
* Release multimodal support
|
|
226
309
|
*/
|
|
227
|
-
releaseMultimodal():
|
|
310
|
+
releaseMultimodal(): void
|
|
228
311
|
|
|
229
312
|
/**
|
|
230
313
|
* Load a vocoder model
|
|
231
|
-
* @param
|
|
232
|
-
* @returns
|
|
314
|
+
* @param options Object containing path and optional n_batch
|
|
315
|
+
* @returns boolean indicating if loading was successful
|
|
233
316
|
*/
|
|
234
|
-
initVocoder(options: { path: string, n_batch?: number }):
|
|
317
|
+
initVocoder(options: { path: string, n_batch?: number }): boolean
|
|
235
318
|
|
|
236
319
|
/**
|
|
237
320
|
* Unload the vocoder model
|
|
238
|
-
* @returns Promise resolving to true if unloading was successful
|
|
239
321
|
*/
|
|
240
|
-
releaseVocoder():
|
|
322
|
+
releaseVocoder(): void
|
|
241
323
|
|
|
242
324
|
/**
|
|
243
325
|
* Check if the vocoder model is enabled
|
|
244
|
-
* @returns
|
|
326
|
+
* @returns boolean indicating if the vocoder model is enabled
|
|
245
327
|
*/
|
|
246
328
|
isVocoderEnabled(): boolean
|
|
247
329
|
|
|
@@ -251,7 +333,10 @@ export interface LlamaContext {
|
|
|
251
333
|
* @param text Text to complete
|
|
252
334
|
* @returns Formatted audio completion
|
|
253
335
|
*/
|
|
254
|
-
getFormattedAudioCompletion(speaker: string|null, text: string):
|
|
336
|
+
getFormattedAudioCompletion(speaker: string|null, text: string): {
|
|
337
|
+
prompt: string
|
|
338
|
+
grammar?: string
|
|
339
|
+
}
|
|
255
340
|
|
|
256
341
|
/**
|
|
257
342
|
* Get guide tokens for audio completion
|
|
@@ -263,12 +348,12 @@ export interface LlamaContext {
|
|
|
263
348
|
/**
|
|
264
349
|
* Decode audio tokens to audio data
|
|
265
350
|
* @param tokens Tokens to decode
|
|
266
|
-
* @returns
|
|
351
|
+
* @returns Promise resolving to decoded audio tokens
|
|
267
352
|
*/
|
|
268
|
-
decodeAudioTokens(tokens: Int32Array): Promise<Float32Array>
|
|
353
|
+
decodeAudioTokens(tokens: number[]|Int32Array): Promise<Float32Array>
|
|
269
354
|
|
|
270
355
|
// static
|
|
271
|
-
loadModelInfo(path: string, skip: string[]): Promise<
|
|
356
|
+
loadModelInfo(path: string, skip: string[]): Promise<GGUFModelInfo>
|
|
272
357
|
toggleNativeLog(
|
|
273
358
|
enable: boolean,
|
|
274
359
|
callback: (level: string, text: string) => void,
|
package/lib/index.js
CHANGED
|
@@ -140,7 +140,7 @@ class LlamaContextWrapper {
|
|
|
140
140
|
const jsonSchema = getJsonSchema(params === null || params === void 0 ? void 0 : params.response_format);
|
|
141
141
|
const result = this.ctx.getFormattedChat(chat, tmpl, {
|
|
142
142
|
jinja: useJinja,
|
|
143
|
-
|
|
143
|
+
response_format: params === null || params === void 0 ? void 0 : params.response_format,
|
|
144
144
|
tools: params === null || params === void 0 ? void 0 : params.tools,
|
|
145
145
|
parallel_tool_calls: params === null || params === void 0 ? void 0 : params.parallel_tool_calls,
|
|
146
146
|
tool_choice: params === null || params === void 0 ? void 0 : params.tool_choice,
|
|
@@ -155,10 +155,8 @@ class LlamaContextWrapper {
|
|
|
155
155
|
};
|
|
156
156
|
}
|
|
157
157
|
const jinjaResult = result;
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
jinjaResult.media_paths = media_paths;
|
|
161
|
-
return jinjaResult;
|
|
158
|
+
return Object.assign({ type: 'jinja', has_media,
|
|
159
|
+
media_paths }, jinjaResult);
|
|
162
160
|
}
|
|
163
161
|
completion(options, callback) {
|
|
164
162
|
const { messages, media_paths = options.media_paths } = this._formatMediaChat(options.messages);
|
|
@@ -196,8 +194,8 @@ class LlamaContextWrapper {
|
|
|
196
194
|
applyLoraAdapters(adapters) {
|
|
197
195
|
return this.ctx.applyLoraAdapters(adapters);
|
|
198
196
|
}
|
|
199
|
-
removeLoraAdapters(
|
|
200
|
-
|
|
197
|
+
removeLoraAdapters() {
|
|
198
|
+
this.ctx.removeLoraAdapters();
|
|
201
199
|
}
|
|
202
200
|
getLoadedLoraAdapters() {
|
|
203
201
|
return this.ctx.getLoadedLoraAdapters();
|
|
@@ -209,7 +207,7 @@ class LlamaContextWrapper {
|
|
|
209
207
|
return this.ctx.isMultimodalEnabled();
|
|
210
208
|
}
|
|
211
209
|
releaseMultimodal() {
|
|
212
|
-
|
|
210
|
+
this.ctx.releaseMultimodal();
|
|
213
211
|
}
|
|
214
212
|
getMultimodalSupport() {
|
|
215
213
|
return this.ctx.getMultimodalSupport();
|
|
@@ -218,7 +216,7 @@ class LlamaContextWrapper {
|
|
|
218
216
|
return this.ctx.initVocoder(options);
|
|
219
217
|
}
|
|
220
218
|
releaseVocoder() {
|
|
221
|
-
|
|
219
|
+
this.ctx.releaseVocoder();
|
|
222
220
|
}
|
|
223
221
|
isVocoderEnabled() {
|
|
224
222
|
return this.ctx.isVocoderEnabled();
|
package/lib/index.ts
CHANGED
|
@@ -12,6 +12,10 @@ import type {
|
|
|
12
12
|
RerankParams,
|
|
13
13
|
RerankResult,
|
|
14
14
|
CompletionResponseFormat,
|
|
15
|
+
ModelInfo,
|
|
16
|
+
JinjaFormattedChatResult,
|
|
17
|
+
Tool,
|
|
18
|
+
GGUFModelInfo,
|
|
15
19
|
} from './binding'
|
|
16
20
|
|
|
17
21
|
export * from './binding'
|
|
@@ -72,9 +76,9 @@ export type FormattedChatResult = {
|
|
|
72
76
|
}
|
|
73
77
|
|
|
74
78
|
class LlamaContextWrapper {
|
|
75
|
-
ctx:
|
|
79
|
+
ctx: LlamaContext
|
|
76
80
|
|
|
77
|
-
constructor(nativeCtx:
|
|
81
|
+
constructor(nativeCtx: LlamaContext) {
|
|
78
82
|
this.ctx = nativeCtx
|
|
79
83
|
}
|
|
80
84
|
|
|
@@ -82,7 +86,7 @@ class LlamaContextWrapper {
|
|
|
82
86
|
return this.ctx.getSystemInfo()
|
|
83
87
|
}
|
|
84
88
|
|
|
85
|
-
getModelInfo():
|
|
89
|
+
getModelInfo(): ModelInfo {
|
|
86
90
|
return this.ctx.getModelInfo()
|
|
87
91
|
}
|
|
88
92
|
|
|
@@ -158,8 +162,8 @@ class LlamaContextWrapper {
|
|
|
158
162
|
params?: {
|
|
159
163
|
jinja?: boolean
|
|
160
164
|
response_format?: CompletionResponseFormat
|
|
161
|
-
tools?:
|
|
162
|
-
parallel_tool_calls?:
|
|
165
|
+
tools?: Tool[]
|
|
166
|
+
parallel_tool_calls?: boolean
|
|
163
167
|
tool_choice?: string,
|
|
164
168
|
enable_thinking?: boolean,
|
|
165
169
|
},
|
|
@@ -175,9 +179,9 @@ class LlamaContextWrapper {
|
|
|
175
179
|
if (template) tmpl = template // Force replace if provided
|
|
176
180
|
const jsonSchema = getJsonSchema(params?.response_format)
|
|
177
181
|
|
|
178
|
-
const result = this.ctx.getFormattedChat(chat
|
|
182
|
+
const result = this.ctx.getFormattedChat(chat!, tmpl, {
|
|
179
183
|
jinja: useJinja,
|
|
180
|
-
|
|
184
|
+
response_format: params?.response_format,
|
|
181
185
|
tools: params?.tools,
|
|
182
186
|
parallel_tool_calls: params?.parallel_tool_calls,
|
|
183
187
|
tool_choice: params?.tool_choice,
|
|
@@ -192,11 +196,13 @@ class LlamaContextWrapper {
|
|
|
192
196
|
media_paths,
|
|
193
197
|
}
|
|
194
198
|
}
|
|
195
|
-
const jinjaResult = result
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
199
|
+
const jinjaResult = result as JinjaFormattedChatResult
|
|
200
|
+
return {
|
|
201
|
+
type: 'jinja',
|
|
202
|
+
has_media,
|
|
203
|
+
media_paths,
|
|
204
|
+
...jinjaResult,
|
|
205
|
+
}
|
|
200
206
|
}
|
|
201
207
|
|
|
202
208
|
completion(
|
|
@@ -256,8 +262,8 @@ class LlamaContextWrapper {
|
|
|
256
262
|
return this.ctx.applyLoraAdapters(adapters)
|
|
257
263
|
}
|
|
258
264
|
|
|
259
|
-
removeLoraAdapters(
|
|
260
|
-
|
|
265
|
+
removeLoraAdapters(): void {
|
|
266
|
+
this.ctx.removeLoraAdapters()
|
|
261
267
|
}
|
|
262
268
|
|
|
263
269
|
getLoadedLoraAdapters(): { path: string; scaled: number }[] {
|
|
@@ -267,38 +273,41 @@ class LlamaContextWrapper {
|
|
|
267
273
|
initMultimodal(options: {
|
|
268
274
|
path: string
|
|
269
275
|
use_gpu?: boolean
|
|
270
|
-
}):
|
|
276
|
+
}): boolean {
|
|
271
277
|
return this.ctx.initMultimodal(options)
|
|
272
278
|
}
|
|
273
279
|
|
|
274
|
-
isMultimodalEnabled():
|
|
280
|
+
isMultimodalEnabled(): boolean {
|
|
275
281
|
return this.ctx.isMultimodalEnabled()
|
|
276
282
|
}
|
|
277
283
|
|
|
278
|
-
releaseMultimodal():
|
|
279
|
-
|
|
284
|
+
releaseMultimodal(): void {
|
|
285
|
+
this.ctx.releaseMultimodal()
|
|
280
286
|
}
|
|
281
287
|
|
|
282
|
-
getMultimodalSupport():
|
|
288
|
+
getMultimodalSupport(): {
|
|
283
289
|
vision: boolean
|
|
284
290
|
audio: boolean
|
|
285
|
-
}
|
|
291
|
+
} {
|
|
286
292
|
return this.ctx.getMultimodalSupport()
|
|
287
293
|
}
|
|
288
294
|
|
|
289
|
-
initVocoder(options: { path: string, n_batch?: number }):
|
|
295
|
+
initVocoder(options: { path: string, n_batch?: number }): boolean {
|
|
290
296
|
return this.ctx.initVocoder(options)
|
|
291
297
|
}
|
|
292
298
|
|
|
293
|
-
releaseVocoder():
|
|
294
|
-
|
|
299
|
+
releaseVocoder(): void {
|
|
300
|
+
this.ctx.releaseVocoder()
|
|
295
301
|
}
|
|
296
302
|
|
|
297
303
|
isVocoderEnabled(): boolean {
|
|
298
304
|
return this.ctx.isVocoderEnabled()
|
|
299
305
|
}
|
|
300
306
|
|
|
301
|
-
getFormattedAudioCompletion(speaker: string|null, text: string):
|
|
307
|
+
getFormattedAudioCompletion(speaker: string|null, text: string): {
|
|
308
|
+
prompt: string
|
|
309
|
+
grammar?: string
|
|
310
|
+
} {
|
|
302
311
|
return this.ctx.getFormattedAudioCompletion(speaker, text)
|
|
303
312
|
}
|
|
304
313
|
|
|
@@ -332,7 +341,7 @@ const modelInfoSkip = [
|
|
|
332
341
|
'tokenizer.ggml.scores',
|
|
333
342
|
]
|
|
334
343
|
|
|
335
|
-
export const loadLlamaModelInfo = async (path: string): Promise<
|
|
344
|
+
export const loadLlamaModelInfo = async (path: string): Promise<GGUFModelInfo> => {
|
|
336
345
|
const variant = 'default'
|
|
337
346
|
mods[variant] ??= await loadModule(variant)
|
|
338
347
|
refreshNativeLogSetup()
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@fugood/llama.node",
|
|
3
3
|
"access": "public",
|
|
4
|
-
"version": "1.
|
|
4
|
+
"version": "1.1.1",
|
|
5
5
|
"description": "An another Node binding of llama.cpp",
|
|
6
6
|
"main": "lib/index.js",
|
|
7
7
|
"scripts": {
|
|
@@ -9,6 +9,7 @@
|
|
|
9
9
|
"postinstall": "node scripts/check.js",
|
|
10
10
|
"pretest": "node scripts/download-test-models.js",
|
|
11
11
|
"test": "jest",
|
|
12
|
+
"typecheck": "tsc --noEmit",
|
|
12
13
|
"build": "npx cmake-js build",
|
|
13
14
|
"build-js": "tsc",
|
|
14
15
|
"prepack": "npm run build-js",
|
|
@@ -70,19 +71,19 @@
|
|
|
70
71
|
"CMakeLists.txt"
|
|
71
72
|
],
|
|
72
73
|
"optionalDependencies": {
|
|
73
|
-
"@fugood/node-llama-linux-x64": "1.
|
|
74
|
-
"@fugood/node-llama-linux-x64-vulkan": "1.
|
|
75
|
-
"@fugood/node-llama-linux-x64-cuda": "1.
|
|
76
|
-
"@fugood/node-llama-linux-arm64": "1.
|
|
77
|
-
"@fugood/node-llama-linux-arm64-vulkan": "1.
|
|
78
|
-
"@fugood/node-llama-linux-arm64-cuda": "1.
|
|
79
|
-
"@fugood/node-llama-win32-x64": "1.
|
|
80
|
-
"@fugood/node-llama-win32-x64-vulkan": "1.
|
|
81
|
-
"@fugood/node-llama-win32-x64-cuda": "1.
|
|
82
|
-
"@fugood/node-llama-win32-arm64": "1.
|
|
83
|
-
"@fugood/node-llama-win32-arm64-vulkan": "1.
|
|
84
|
-
"@fugood/node-llama-darwin-x64": "1.
|
|
85
|
-
"@fugood/node-llama-darwin-arm64": "1.
|
|
74
|
+
"@fugood/node-llama-linux-x64": "1.1.1",
|
|
75
|
+
"@fugood/node-llama-linux-x64-vulkan": "1.1.1",
|
|
76
|
+
"@fugood/node-llama-linux-x64-cuda": "1.1.1",
|
|
77
|
+
"@fugood/node-llama-linux-arm64": "1.1.1",
|
|
78
|
+
"@fugood/node-llama-linux-arm64-vulkan": "1.1.1",
|
|
79
|
+
"@fugood/node-llama-linux-arm64-cuda": "1.1.1",
|
|
80
|
+
"@fugood/node-llama-win32-x64": "1.1.1",
|
|
81
|
+
"@fugood/node-llama-win32-x64-vulkan": "1.1.1",
|
|
82
|
+
"@fugood/node-llama-win32-x64-cuda": "1.1.1",
|
|
83
|
+
"@fugood/node-llama-win32-arm64": "1.1.1",
|
|
84
|
+
"@fugood/node-llama-win32-arm64-vulkan": "1.1.1",
|
|
85
|
+
"@fugood/node-llama-darwin-x64": "1.1.1",
|
|
86
|
+
"@fugood/node-llama-darwin-arm64": "1.1.1"
|
|
86
87
|
},
|
|
87
88
|
"devDependencies": {
|
|
88
89
|
"@babel/preset-env": "^7.24.4",
|
|
@@ -91,10 +92,12 @@
|
|
|
91
92
|
"@commitlint/config-conventional": "^19.2.2",
|
|
92
93
|
"@types/jest": "^29.5.12",
|
|
93
94
|
"@types/node": "^22.0.0",
|
|
95
|
+
"@types/node-wav": "^0.0.4",
|
|
94
96
|
"cmake-js": "^7.3.0",
|
|
95
97
|
"husky": "^9.0.11",
|
|
96
98
|
"jest": "^29.7.0",
|
|
97
99
|
"node-addon-api": "^8.0.0",
|
|
100
|
+
"node-wav": "^0.0.2",
|
|
98
101
|
"release-it": "^17.7.0",
|
|
99
102
|
"rimraf": "^6.0.1",
|
|
100
103
|
"typescript": "^5.4.5",
|
|
@@ -110,7 +110,7 @@ void LlamaCompletionWorker::Execute() {
|
|
|
110
110
|
} else {
|
|
111
111
|
// Text-only path
|
|
112
112
|
std::vector<llama_token> prompt_tokens =
|
|
113
|
-
::common_tokenize(ctx, _params.prompt, add_bos);
|
|
113
|
+
::common_tokenize(ctx, _params.prompt, add_bos, true);
|
|
114
114
|
n_input = prompt_tokens.size();
|
|
115
115
|
|
|
116
116
|
if (_sess->tokens_ptr()->size() > 0) {
|
|
@@ -157,10 +157,26 @@ void LlamaCompletionWorker::Execute() {
|
|
|
157
157
|
// For multimodal input, n_past might already be set
|
|
158
158
|
// Only decode text tokens if we have any input left
|
|
159
159
|
if (n_input > 0) {
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
160
|
+
// Decode tokens in batches using n_batch as chunk size
|
|
161
|
+
int n_past_batch = n_cur;
|
|
162
|
+
int n_remaining = n_input;
|
|
163
|
+
|
|
164
|
+
while (n_remaining > 0) {
|
|
165
|
+
int n_eval = n_remaining;
|
|
166
|
+
if (n_eval > _params.n_batch) {
|
|
167
|
+
n_eval = _params.n_batch;
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
int ret = llama_decode(ctx, llama_batch_get_one(embd->data() + n_past_batch, n_eval));
|
|
171
|
+
if (ret < 0) {
|
|
172
|
+
SetError("Failed to decode token batch, code: " + std::to_string(ret) +
|
|
173
|
+
", n_eval: " + std::to_string(n_eval) +
|
|
174
|
+
", n_past_batch: " + std::to_string(n_past_batch));
|
|
175
|
+
break;
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
n_past_batch += n_eval;
|
|
179
|
+
n_remaining -= n_eval;
|
|
164
180
|
}
|
|
165
181
|
}
|
|
166
182
|
|
|
@@ -177,7 +193,7 @@ void LlamaCompletionWorker::Execute() {
|
|
|
177
193
|
|
|
178
194
|
// Collect audio tokens for TTS if vocoder is enabled
|
|
179
195
|
if (_has_vocoder) {
|
|
180
|
-
if ((_tts_type == OUTETTS_V0_2 || _tts_type == OUTETTS_V0_3) &&
|
|
196
|
+
if ((_tts_type == OUTETTS_V0_1 || _tts_type == OUTETTS_V0_2 || _tts_type == OUTETTS_V0_3) &&
|
|
181
197
|
(new_token_id >= 151672 && new_token_id <= 155772)) {
|
|
182
198
|
_result.audio_tokens.push_back(new_token_id);
|
|
183
199
|
}
|
|
@@ -255,6 +271,8 @@ void LlamaCompletionWorker::OnOK() {
|
|
|
255
271
|
try {
|
|
256
272
|
common_chat_syntax chat_syntax;
|
|
257
273
|
chat_syntax.format = static_cast<common_chat_format>(_chat_format);
|
|
274
|
+
result.Set("chat_format", Napi::Number::New(env, _chat_format));
|
|
275
|
+
|
|
258
276
|
chat_syntax.thinking_forced_open = _thinking_forced_open;
|
|
259
277
|
|
|
260
278
|
if (_reasoning_format == "deepseek") {
|
package/src/LlamaContext.cpp
CHANGED
|
@@ -247,6 +247,7 @@ LlamaContext::LlamaContext(const Napi::CallbackInfo &info)
|
|
|
247
247
|
params.cache_type_v = kv_cache_type_from_str(
|
|
248
248
|
get_option<std::string>(options, "cache_type_v", "f16").c_str());
|
|
249
249
|
params.ctx_shift = get_option<bool>(options, "ctx_shift", true);
|
|
250
|
+
params.kv_unified = get_option<bool>(options, "kv_unified", false);
|
|
250
251
|
|
|
251
252
|
params.use_mlock = get_option<bool>(options, "use_mlock", false);
|
|
252
253
|
params.use_mmap = get_option<bool>(options, "use_mmap", true);
|
|
@@ -904,9 +905,27 @@ Napi::Value LlamaContext::Completion(const Napi::CallbackInfo &info) {
|
|
|
904
905
|
// guide_tokens
|
|
905
906
|
std::vector<llama_token> guide_tokens;
|
|
906
907
|
if (options.Has("guide_tokens")) {
|
|
907
|
-
auto
|
|
908
|
-
|
|
909
|
-
|
|
908
|
+
auto guide_tokens_value = options.Get("guide_tokens");
|
|
909
|
+
if (guide_tokens_value.IsArray()) {
|
|
910
|
+
auto guide_tokens_array = guide_tokens_value.As<Napi::Array>();
|
|
911
|
+
for (size_t i = 0; i < guide_tokens_array.Length(); i++) {
|
|
912
|
+
guide_tokens.push_back(guide_tokens_array.Get(i).ToNumber().Int32Value());
|
|
913
|
+
}
|
|
914
|
+
} else if (guide_tokens_value.IsTypedArray()) {
|
|
915
|
+
auto guide_tokens_typed_array = guide_tokens_value.As<Napi::TypedArray>();
|
|
916
|
+
if (guide_tokens_typed_array.TypedArrayType() == napi_int32_array) {
|
|
917
|
+
auto guide_tokens_int32_array = guide_tokens_value.As<Napi::Int32Array>();
|
|
918
|
+
size_t length = guide_tokens_int32_array.ElementLength();
|
|
919
|
+
const int32_t* data = guide_tokens_int32_array.Data();
|
|
920
|
+
guide_tokens.resize(length);
|
|
921
|
+
memcpy(guide_tokens.data(), data, length * sizeof(int32_t));
|
|
922
|
+
} else {
|
|
923
|
+
Napi::TypeError::New(env, "guide_tokens must be Array<number> or Int32Array").ThrowAsJavaScriptException();
|
|
924
|
+
return env.Undefined();
|
|
925
|
+
}
|
|
926
|
+
} else {
|
|
927
|
+
Napi::TypeError::New(env, "guide_tokens must be Array<number> or Int32Array").ThrowAsJavaScriptException();
|
|
928
|
+
return env.Undefined();
|
|
910
929
|
}
|
|
911
930
|
}
|
|
912
931
|
|
|
@@ -1345,7 +1364,7 @@ Napi::Value LlamaContext::IsVocoderEnabled(const Napi::CallbackInfo &info) {
|
|
|
1345
1364
|
return Napi::Boolean::New(env, _has_vocoder);
|
|
1346
1365
|
}
|
|
1347
1366
|
|
|
1348
|
-
// getFormattedAudioCompletion(speaker: string|null, text: string):
|
|
1367
|
+
// getFormattedAudioCompletion(speaker: string|null, text: string): object
|
|
1349
1368
|
Napi::Value
|
|
1350
1369
|
LlamaContext::GetFormattedAudioCompletion(const Napi::CallbackInfo &info) {
|
|
1351
1370
|
Napi::Env env = info.Env();
|
|
@@ -1372,9 +1391,16 @@ LlamaContext::GetFormattedAudioCompletion(const Napi::CallbackInfo &info) {
|
|
|
1372
1391
|
audio_text = audio_text_from_speaker(speaker, type);
|
|
1373
1392
|
audio_data = audio_data_from_speaker(speaker, type);
|
|
1374
1393
|
}
|
|
1375
|
-
|
|
1376
|
-
|
|
1377
|
-
|
|
1394
|
+
std::string prompt = "<|im_start|>\n" + audio_text +
|
|
1395
|
+
process_text(text, type) +
|
|
1396
|
+
"<|text_end|>\n" + audio_data + "\n";
|
|
1397
|
+
Napi::Object result = Napi::Object::New(env);
|
|
1398
|
+
result.Set("prompt", prompt);
|
|
1399
|
+
const char *grammar = get_tts_grammar(type);
|
|
1400
|
+
if (grammar != nullptr) {
|
|
1401
|
+
result.Set("grammar", grammar);
|
|
1402
|
+
}
|
|
1403
|
+
return result;
|
|
1378
1404
|
}
|
|
1379
1405
|
|
|
1380
1406
|
// getAudioCompletionGuideTokens(text: string): Int32Array
|
|
@@ -1415,6 +1441,10 @@ LlamaContext::GetAudioCompletionGuideTokens(const Napi::CallbackInfo &info) {
|
|
|
1415
1441
|
if (tmp.size() > 0) {
|
|
1416
1442
|
result.push_back(tmp[0]);
|
|
1417
1443
|
}
|
|
1444
|
+
|
|
1445
|
+
// Add Audio End, forcing stop generation
|
|
1446
|
+
result.push_back(common_tokenize(vocab, "<|audio_end|>", false, true)[0]);
|
|
1447
|
+
|
|
1418
1448
|
auto tokens = Napi::Int32Array::New(env, result.size());
|
|
1419
1449
|
memcpy(tokens.Data(), result.data(), result.size() * sizeof(int32_t));
|
|
1420
1450
|
return tokens;
|
|
@@ -1449,7 +1479,7 @@ Napi::Value LlamaContext::DecodeAudioTokens(const Napi::CallbackInfo &info) {
|
|
|
1449
1479
|
.ThrowAsJavaScriptException();
|
|
1450
1480
|
return env.Undefined();
|
|
1451
1481
|
}
|
|
1452
|
-
if (type ==
|
|
1482
|
+
if (type == OUTETTS_V0_1 || type == OUTETTS_V0_2 || type == OUTETTS_V0_3) {
|
|
1453
1483
|
tokens.erase(
|
|
1454
1484
|
std::remove_if(tokens.begin(), tokens.end(),
|
|
1455
1485
|
[](llama_token t) { return t < 151672 || t > 155772; }),
|