@fugood/llama.node 1.0.6 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CMakeLists.txt +3 -3
- package/lib/binding.js +7 -17
- package/lib/binding.ts +116 -32
- package/lib/index.js +7 -9
- package/lib/index.ts +34 -25
- package/package.json +17 -14
- package/src/LlamaCompletionWorker.cpp +2 -2
- package/src/LlamaContext.cpp +38 -8
- package/src/llama.cpp/common/arg.cpp +8 -1
- package/src/llama.cpp/common/common.h +4 -3
- package/src/llama.cpp/ggml/CMakeLists.txt +3 -1
- package/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +5 -2
- package/src/llama.cpp/ggml/src/ggml-cpu/arch/loongarch/quants.c +1 -1
- package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.cpp +109 -12
- package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.h +3 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +88 -10
- package/src/llama.cpp/ggml/src/ggml-cpu/repack.cpp +0 -1
- package/src/llama.cpp/include/llama.h +2 -0
- package/src/llama.cpp/src/llama-arch.cpp +6 -6
- package/src/llama.cpp/src/llama-chat.cpp +3 -4
- package/src/llama.cpp/src/llama-context.cpp +49 -14
- package/src/llama.cpp/src/llama-context.h +13 -0
- package/src/llama.cpp/src/llama-memory-recurrent.cpp +15 -0
- package/src/llama.cpp/src/llama-model.cpp +19 -2
- package/src/tts_utils.cpp +12 -0
- package/src/tts_utils.h +40 -1
package/CMakeLists.txt
CHANGED
|
@@ -73,9 +73,9 @@ if(CMAKE_BUILD_TYPE STREQUAL "Release")
|
|
|
73
73
|
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /O1 /Ob1 /bigobj")
|
|
74
74
|
endif()
|
|
75
75
|
else()
|
|
76
|
-
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3 -funroll-loops -flto")
|
|
77
|
-
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -O3 -funroll-loops -flto")
|
|
78
|
-
set(CMAKE_LINKER_FLAGS "${CMAKE_LINKER_FLAGS} -flto")
|
|
76
|
+
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3 -funroll-loops -flto=auto")
|
|
77
|
+
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -O3 -funroll-loops -flto=auto")
|
|
78
|
+
set(CMAKE_LINKER_FLAGS "${CMAKE_LINKER_FLAGS} -flto=auto")
|
|
79
79
|
endif()
|
|
80
80
|
endif()
|
|
81
81
|
|
package/lib/binding.js
CHANGED
|
@@ -15,23 +15,13 @@ var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (
|
|
|
15
15
|
}) : function(o, v) {
|
|
16
16
|
o["default"] = v;
|
|
17
17
|
});
|
|
18
|
-
var __importStar = (this && this.__importStar) ||
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
return ownKeys(o);
|
|
26
|
-
};
|
|
27
|
-
return function (mod) {
|
|
28
|
-
if (mod && mod.__esModule) return mod;
|
|
29
|
-
var result = {};
|
|
30
|
-
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
|
|
31
|
-
__setModuleDefault(result, mod);
|
|
32
|
-
return result;
|
|
33
|
-
};
|
|
34
|
-
})();
|
|
18
|
+
var __importStar = (this && this.__importStar) || function (mod) {
|
|
19
|
+
if (mod && mod.__esModule) return mod;
|
|
20
|
+
var result = {};
|
|
21
|
+
if (mod != null) for (var k in mod) if (k !== "default" && Object.prototype.hasOwnProperty.call(mod, k)) __createBinding(result, mod, k);
|
|
22
|
+
__setModuleDefault(result, mod);
|
|
23
|
+
return result;
|
|
24
|
+
};
|
|
35
25
|
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
|
|
36
26
|
function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
|
|
37
27
|
return new (P || (P = Promise))(function (resolve, reject) {
|
package/lib/binding.ts
CHANGED
|
@@ -1,6 +1,3 @@
|
|
|
1
|
-
import * as path from 'path'
|
|
2
|
-
|
|
3
|
-
|
|
4
1
|
export type MessagePart = {
|
|
5
2
|
type: string,
|
|
6
3
|
text?: string,
|
|
@@ -53,6 +50,11 @@ export type LlamaModelOptions = {
|
|
|
53
50
|
* Enable context shifting to handle prompts larger than context size
|
|
54
51
|
*/
|
|
55
52
|
ctx_shift?: boolean
|
|
53
|
+
/**
|
|
54
|
+
* Use a unified buffer across the input sequences when computing the attention.
|
|
55
|
+
* Try to disable when n_seq_max > 1 for improved performance when the sequences do not share a large prefix.
|
|
56
|
+
*/
|
|
57
|
+
kv_unified?: boolean
|
|
56
58
|
use_mlock?: boolean
|
|
57
59
|
use_mmap?: boolean
|
|
58
60
|
vocab_only?: boolean
|
|
@@ -65,9 +67,9 @@ export type CompletionResponseFormat = {
|
|
|
65
67
|
type: 'text' | 'json_object' | 'json_schema'
|
|
66
68
|
json_schema?: {
|
|
67
69
|
strict?: boolean
|
|
68
|
-
schema:
|
|
70
|
+
schema: Record<string, any>
|
|
69
71
|
}
|
|
70
|
-
schema?:
|
|
72
|
+
schema?: Record<string, any> // for json_object type
|
|
71
73
|
}
|
|
72
74
|
|
|
73
75
|
export type LlamaCompletionOptions = {
|
|
@@ -76,7 +78,7 @@ export type LlamaCompletionOptions = {
|
|
|
76
78
|
reasoning_format?: string
|
|
77
79
|
chat_template?: string
|
|
78
80
|
response_format?: CompletionResponseFormat
|
|
79
|
-
tools?:
|
|
81
|
+
tools?: Tool[]
|
|
80
82
|
parallel_tool_calls?: boolean
|
|
81
83
|
tool_choice?: string
|
|
82
84
|
enable_thinking?: boolean
|
|
@@ -107,7 +109,7 @@ export type LlamaCompletionOptions = {
|
|
|
107
109
|
stop?: string[]
|
|
108
110
|
grammar?: string
|
|
109
111
|
grammar_lazy?: boolean
|
|
110
|
-
grammar_triggers?: { type: number;
|
|
112
|
+
grammar_triggers?: { type: number; value: string; token?: number }[]
|
|
111
113
|
preserved_tokens?: string[]
|
|
112
114
|
/**
|
|
113
115
|
* Path(s) to media file(s) to process before generating text.
|
|
@@ -120,7 +122,7 @@ export type LlamaCompletionOptions = {
|
|
|
120
122
|
* Guide tokens to use for audio completion.
|
|
121
123
|
* Help prevent hallucinations by forcing the TTS to use the correct words.
|
|
122
124
|
*/
|
|
123
|
-
guide_tokens?: Int32Array
|
|
125
|
+
guide_tokens?: number[] | Int32Array
|
|
124
126
|
}
|
|
125
127
|
|
|
126
128
|
export type LlamaCompletionResult = {
|
|
@@ -169,21 +171,101 @@ export type RerankResult = {
|
|
|
169
171
|
index: number
|
|
170
172
|
}
|
|
171
173
|
|
|
174
|
+
export type ModelInfo = {
|
|
175
|
+
desc: string
|
|
176
|
+
nEmbd: number
|
|
177
|
+
nParams: number
|
|
178
|
+
size: number
|
|
179
|
+
chatTemplates: {
|
|
180
|
+
llamaChat: boolean
|
|
181
|
+
minja: {
|
|
182
|
+
default: boolean
|
|
183
|
+
defaultCaps: {
|
|
184
|
+
tools: boolean
|
|
185
|
+
toolCalls: boolean
|
|
186
|
+
toolResponses: boolean
|
|
187
|
+
systemRole: boolean
|
|
188
|
+
parallelToolCalls: boolean
|
|
189
|
+
toolCallId: boolean
|
|
190
|
+
}
|
|
191
|
+
toolUse: boolean
|
|
192
|
+
toolUseCaps?: {
|
|
193
|
+
tools: boolean
|
|
194
|
+
toolCalls: boolean
|
|
195
|
+
toolResponses: boolean
|
|
196
|
+
systemRole: boolean
|
|
197
|
+
parallelToolCalls: boolean
|
|
198
|
+
toolCallId: boolean
|
|
199
|
+
}
|
|
200
|
+
}
|
|
201
|
+
}
|
|
202
|
+
metadata: Record<string, string>
|
|
203
|
+
isChatTemplateSupported: boolean
|
|
204
|
+
}
|
|
205
|
+
|
|
206
|
+
export type GGUFModelInfo = {
|
|
207
|
+
version?: number
|
|
208
|
+
alignment?: number
|
|
209
|
+
data_offset?: number
|
|
210
|
+
[key: string]: string | number | undefined
|
|
211
|
+
}
|
|
212
|
+
|
|
213
|
+
export type FormattedChatResult = {
|
|
214
|
+
type: 'jinja' | 'llama-chat'
|
|
215
|
+
prompt: string
|
|
216
|
+
has_media: boolean
|
|
217
|
+
media_paths?: Array<string>
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
export type JinjaFormattedChatResult = {
|
|
221
|
+
prompt: string
|
|
222
|
+
chat_format: number
|
|
223
|
+
grammar: string
|
|
224
|
+
grammea_lazy: boolean
|
|
225
|
+
grammar_triggers: Array<{
|
|
226
|
+
type: number
|
|
227
|
+
value: string
|
|
228
|
+
token: number
|
|
229
|
+
}>
|
|
230
|
+
thinking_forced_open: boolean
|
|
231
|
+
preserved_tokens: string[]
|
|
232
|
+
additional_stops: string[]
|
|
233
|
+
}
|
|
234
|
+
|
|
235
|
+
export type Tool = {
|
|
236
|
+
type: 'function'
|
|
237
|
+
function: {
|
|
238
|
+
name: string
|
|
239
|
+
description: string
|
|
240
|
+
parameters: Record<string, any>
|
|
241
|
+
}
|
|
242
|
+
}
|
|
243
|
+
|
|
244
|
+
export type ToolCall = {
|
|
245
|
+
type: 'function'
|
|
246
|
+
function: {
|
|
247
|
+
name: string
|
|
248
|
+
arguments: string
|
|
249
|
+
}
|
|
250
|
+
id?: string
|
|
251
|
+
}
|
|
252
|
+
|
|
172
253
|
export interface LlamaContext {
|
|
173
254
|
new (options: LlamaModelOptions): LlamaContext
|
|
174
255
|
getSystemInfo(): string
|
|
175
|
-
getModelInfo():
|
|
256
|
+
getModelInfo(): ModelInfo
|
|
176
257
|
getFormattedChat(
|
|
177
258
|
messages: ChatMessage[],
|
|
178
259
|
chat_template?: string,
|
|
179
260
|
params?: {
|
|
180
261
|
jinja?: boolean
|
|
181
262
|
response_format?: CompletionResponseFormat
|
|
182
|
-
tools?:
|
|
183
|
-
parallel_tool_calls?:
|
|
263
|
+
tools?: Tool[]
|
|
264
|
+
parallel_tool_calls?: boolean
|
|
184
265
|
tool_choice?: string
|
|
266
|
+
enable_thinking?: boolean
|
|
185
267
|
},
|
|
186
|
-
):
|
|
268
|
+
): JinjaFormattedChatResult | string
|
|
187
269
|
completion(
|
|
188
270
|
options: LlamaCompletionOptions,
|
|
189
271
|
callback?: (token: LlamaCompletionToken) => void,
|
|
@@ -197,51 +279,50 @@ export interface LlamaContext {
|
|
|
197
279
|
loadSession(path: string): Promise<void>
|
|
198
280
|
release(): Promise<void>
|
|
199
281
|
applyLoraAdapters(adapters: { path: string; scaled: number }[]): void
|
|
200
|
-
removeLoraAdapters(
|
|
282
|
+
removeLoraAdapters(): void
|
|
201
283
|
getLoadedLoraAdapters(): { path: string; scaled: number }[]
|
|
202
284
|
/**
|
|
203
285
|
* Initialize multimodal support with a mmproj file
|
|
204
|
-
* @param
|
|
205
|
-
* @returns
|
|
286
|
+
* @param options Object containing path and optional use_gpu flag
|
|
287
|
+
* @returns boolean indicating if initialization was successful
|
|
206
288
|
*/
|
|
207
|
-
initMultimodal(options: { path: string; use_gpu?: boolean }):
|
|
289
|
+
initMultimodal(options: { path: string; use_gpu?: boolean }): boolean
|
|
208
290
|
|
|
209
291
|
/**
|
|
210
292
|
* Check if multimodal support is enabled
|
|
211
|
-
* @returns
|
|
293
|
+
* @returns boolean indicating if multimodal is enabled
|
|
212
294
|
*/
|
|
213
|
-
isMultimodalEnabled():
|
|
295
|
+
isMultimodalEnabled(): boolean
|
|
214
296
|
|
|
215
297
|
/**
|
|
216
298
|
* Get multimodal support capabilities
|
|
217
|
-
* @returns
|
|
299
|
+
* @returns Object with vision and audio support
|
|
218
300
|
*/
|
|
219
|
-
getMultimodalSupport():
|
|
301
|
+
getMultimodalSupport(): {
|
|
220
302
|
vision: boolean
|
|
221
303
|
audio: boolean
|
|
222
|
-
}
|
|
304
|
+
}
|
|
223
305
|
|
|
224
306
|
/**
|
|
225
307
|
* Release multimodal support
|
|
226
308
|
*/
|
|
227
|
-
releaseMultimodal():
|
|
309
|
+
releaseMultimodal(): void
|
|
228
310
|
|
|
229
311
|
/**
|
|
230
312
|
* Load a vocoder model
|
|
231
|
-
* @param
|
|
232
|
-
* @returns
|
|
313
|
+
* @param options Object containing path and optional n_batch
|
|
314
|
+
* @returns boolean indicating if loading was successful
|
|
233
315
|
*/
|
|
234
|
-
initVocoder(options: { path: string, n_batch?: number }):
|
|
316
|
+
initVocoder(options: { path: string, n_batch?: number }): boolean
|
|
235
317
|
|
|
236
318
|
/**
|
|
237
319
|
* Unload the vocoder model
|
|
238
|
-
* @returns Promise resolving to true if unloading was successful
|
|
239
320
|
*/
|
|
240
|
-
releaseVocoder():
|
|
321
|
+
releaseVocoder(): void
|
|
241
322
|
|
|
242
323
|
/**
|
|
243
324
|
* Check if the vocoder model is enabled
|
|
244
|
-
* @returns
|
|
325
|
+
* @returns boolean indicating if the vocoder model is enabled
|
|
245
326
|
*/
|
|
246
327
|
isVocoderEnabled(): boolean
|
|
247
328
|
|
|
@@ -251,7 +332,10 @@ export interface LlamaContext {
|
|
|
251
332
|
* @param text Text to complete
|
|
252
333
|
* @returns Formatted audio completion
|
|
253
334
|
*/
|
|
254
|
-
getFormattedAudioCompletion(speaker: string|null, text: string):
|
|
335
|
+
getFormattedAudioCompletion(speaker: string|null, text: string): {
|
|
336
|
+
prompt: string
|
|
337
|
+
grammar?: string
|
|
338
|
+
}
|
|
255
339
|
|
|
256
340
|
/**
|
|
257
341
|
* Get guide tokens for audio completion
|
|
@@ -263,12 +347,12 @@ export interface LlamaContext {
|
|
|
263
347
|
/**
|
|
264
348
|
* Decode audio tokens to audio data
|
|
265
349
|
* @param tokens Tokens to decode
|
|
266
|
-
* @returns
|
|
350
|
+
* @returns Promise resolving to decoded audio tokens
|
|
267
351
|
*/
|
|
268
|
-
decodeAudioTokens(tokens: Int32Array): Promise<Float32Array>
|
|
352
|
+
decodeAudioTokens(tokens: number[]|Int32Array): Promise<Float32Array>
|
|
269
353
|
|
|
270
354
|
// static
|
|
271
|
-
loadModelInfo(path: string, skip: string[]): Promise<
|
|
355
|
+
loadModelInfo(path: string, skip: string[]): Promise<GGUFModelInfo>
|
|
272
356
|
toggleNativeLog(
|
|
273
357
|
enable: boolean,
|
|
274
358
|
callback: (level: string, text: string) => void,
|
package/lib/index.js
CHANGED
|
@@ -140,7 +140,7 @@ class LlamaContextWrapper {
|
|
|
140
140
|
const jsonSchema = getJsonSchema(params === null || params === void 0 ? void 0 : params.response_format);
|
|
141
141
|
const result = this.ctx.getFormattedChat(chat, tmpl, {
|
|
142
142
|
jinja: useJinja,
|
|
143
|
-
|
|
143
|
+
response_format: params === null || params === void 0 ? void 0 : params.response_format,
|
|
144
144
|
tools: params === null || params === void 0 ? void 0 : params.tools,
|
|
145
145
|
parallel_tool_calls: params === null || params === void 0 ? void 0 : params.parallel_tool_calls,
|
|
146
146
|
tool_choice: params === null || params === void 0 ? void 0 : params.tool_choice,
|
|
@@ -155,10 +155,8 @@ class LlamaContextWrapper {
|
|
|
155
155
|
};
|
|
156
156
|
}
|
|
157
157
|
const jinjaResult = result;
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
jinjaResult.media_paths = media_paths;
|
|
161
|
-
return jinjaResult;
|
|
158
|
+
return Object.assign({ type: 'jinja', has_media,
|
|
159
|
+
media_paths }, jinjaResult);
|
|
162
160
|
}
|
|
163
161
|
completion(options, callback) {
|
|
164
162
|
const { messages, media_paths = options.media_paths } = this._formatMediaChat(options.messages);
|
|
@@ -196,8 +194,8 @@ class LlamaContextWrapper {
|
|
|
196
194
|
applyLoraAdapters(adapters) {
|
|
197
195
|
return this.ctx.applyLoraAdapters(adapters);
|
|
198
196
|
}
|
|
199
|
-
removeLoraAdapters(
|
|
200
|
-
|
|
197
|
+
removeLoraAdapters() {
|
|
198
|
+
this.ctx.removeLoraAdapters();
|
|
201
199
|
}
|
|
202
200
|
getLoadedLoraAdapters() {
|
|
203
201
|
return this.ctx.getLoadedLoraAdapters();
|
|
@@ -209,7 +207,7 @@ class LlamaContextWrapper {
|
|
|
209
207
|
return this.ctx.isMultimodalEnabled();
|
|
210
208
|
}
|
|
211
209
|
releaseMultimodal() {
|
|
212
|
-
|
|
210
|
+
this.ctx.releaseMultimodal();
|
|
213
211
|
}
|
|
214
212
|
getMultimodalSupport() {
|
|
215
213
|
return this.ctx.getMultimodalSupport();
|
|
@@ -218,7 +216,7 @@ class LlamaContextWrapper {
|
|
|
218
216
|
return this.ctx.initVocoder(options);
|
|
219
217
|
}
|
|
220
218
|
releaseVocoder() {
|
|
221
|
-
|
|
219
|
+
this.ctx.releaseVocoder();
|
|
222
220
|
}
|
|
223
221
|
isVocoderEnabled() {
|
|
224
222
|
return this.ctx.isVocoderEnabled();
|
package/lib/index.ts
CHANGED
|
@@ -12,6 +12,10 @@ import type {
|
|
|
12
12
|
RerankParams,
|
|
13
13
|
RerankResult,
|
|
14
14
|
CompletionResponseFormat,
|
|
15
|
+
ModelInfo,
|
|
16
|
+
JinjaFormattedChatResult,
|
|
17
|
+
Tool,
|
|
18
|
+
GGUFModelInfo,
|
|
15
19
|
} from './binding'
|
|
16
20
|
|
|
17
21
|
export * from './binding'
|
|
@@ -72,9 +76,9 @@ export type FormattedChatResult = {
|
|
|
72
76
|
}
|
|
73
77
|
|
|
74
78
|
class LlamaContextWrapper {
|
|
75
|
-
ctx:
|
|
79
|
+
ctx: LlamaContext
|
|
76
80
|
|
|
77
|
-
constructor(nativeCtx:
|
|
81
|
+
constructor(nativeCtx: LlamaContext) {
|
|
78
82
|
this.ctx = nativeCtx
|
|
79
83
|
}
|
|
80
84
|
|
|
@@ -82,7 +86,7 @@ class LlamaContextWrapper {
|
|
|
82
86
|
return this.ctx.getSystemInfo()
|
|
83
87
|
}
|
|
84
88
|
|
|
85
|
-
getModelInfo():
|
|
89
|
+
getModelInfo(): ModelInfo {
|
|
86
90
|
return this.ctx.getModelInfo()
|
|
87
91
|
}
|
|
88
92
|
|
|
@@ -158,8 +162,8 @@ class LlamaContextWrapper {
|
|
|
158
162
|
params?: {
|
|
159
163
|
jinja?: boolean
|
|
160
164
|
response_format?: CompletionResponseFormat
|
|
161
|
-
tools?:
|
|
162
|
-
parallel_tool_calls?:
|
|
165
|
+
tools?: Tool[]
|
|
166
|
+
parallel_tool_calls?: boolean
|
|
163
167
|
tool_choice?: string,
|
|
164
168
|
enable_thinking?: boolean,
|
|
165
169
|
},
|
|
@@ -175,9 +179,9 @@ class LlamaContextWrapper {
|
|
|
175
179
|
if (template) tmpl = template // Force replace if provided
|
|
176
180
|
const jsonSchema = getJsonSchema(params?.response_format)
|
|
177
181
|
|
|
178
|
-
const result = this.ctx.getFormattedChat(chat
|
|
182
|
+
const result = this.ctx.getFormattedChat(chat!, tmpl, {
|
|
179
183
|
jinja: useJinja,
|
|
180
|
-
|
|
184
|
+
response_format: params?.response_format,
|
|
181
185
|
tools: params?.tools,
|
|
182
186
|
parallel_tool_calls: params?.parallel_tool_calls,
|
|
183
187
|
tool_choice: params?.tool_choice,
|
|
@@ -192,11 +196,13 @@ class LlamaContextWrapper {
|
|
|
192
196
|
media_paths,
|
|
193
197
|
}
|
|
194
198
|
}
|
|
195
|
-
const jinjaResult = result
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
199
|
+
const jinjaResult = result as JinjaFormattedChatResult
|
|
200
|
+
return {
|
|
201
|
+
type: 'jinja',
|
|
202
|
+
has_media,
|
|
203
|
+
media_paths,
|
|
204
|
+
...jinjaResult,
|
|
205
|
+
}
|
|
200
206
|
}
|
|
201
207
|
|
|
202
208
|
completion(
|
|
@@ -256,8 +262,8 @@ class LlamaContextWrapper {
|
|
|
256
262
|
return this.ctx.applyLoraAdapters(adapters)
|
|
257
263
|
}
|
|
258
264
|
|
|
259
|
-
removeLoraAdapters(
|
|
260
|
-
|
|
265
|
+
removeLoraAdapters(): void {
|
|
266
|
+
this.ctx.removeLoraAdapters()
|
|
261
267
|
}
|
|
262
268
|
|
|
263
269
|
getLoadedLoraAdapters(): { path: string; scaled: number }[] {
|
|
@@ -267,38 +273,41 @@ class LlamaContextWrapper {
|
|
|
267
273
|
initMultimodal(options: {
|
|
268
274
|
path: string
|
|
269
275
|
use_gpu?: boolean
|
|
270
|
-
}):
|
|
276
|
+
}): boolean {
|
|
271
277
|
return this.ctx.initMultimodal(options)
|
|
272
278
|
}
|
|
273
279
|
|
|
274
|
-
isMultimodalEnabled():
|
|
280
|
+
isMultimodalEnabled(): boolean {
|
|
275
281
|
return this.ctx.isMultimodalEnabled()
|
|
276
282
|
}
|
|
277
283
|
|
|
278
|
-
releaseMultimodal():
|
|
279
|
-
|
|
284
|
+
releaseMultimodal(): void {
|
|
285
|
+
this.ctx.releaseMultimodal()
|
|
280
286
|
}
|
|
281
287
|
|
|
282
|
-
getMultimodalSupport():
|
|
288
|
+
getMultimodalSupport(): {
|
|
283
289
|
vision: boolean
|
|
284
290
|
audio: boolean
|
|
285
|
-
}
|
|
291
|
+
} {
|
|
286
292
|
return this.ctx.getMultimodalSupport()
|
|
287
293
|
}
|
|
288
294
|
|
|
289
|
-
initVocoder(options: { path: string, n_batch?: number }):
|
|
295
|
+
initVocoder(options: { path: string, n_batch?: number }): boolean {
|
|
290
296
|
return this.ctx.initVocoder(options)
|
|
291
297
|
}
|
|
292
298
|
|
|
293
|
-
releaseVocoder():
|
|
294
|
-
|
|
299
|
+
releaseVocoder(): void {
|
|
300
|
+
this.ctx.releaseVocoder()
|
|
295
301
|
}
|
|
296
302
|
|
|
297
303
|
isVocoderEnabled(): boolean {
|
|
298
304
|
return this.ctx.isVocoderEnabled()
|
|
299
305
|
}
|
|
300
306
|
|
|
301
|
-
getFormattedAudioCompletion(speaker: string|null, text: string):
|
|
307
|
+
getFormattedAudioCompletion(speaker: string|null, text: string): {
|
|
308
|
+
prompt: string
|
|
309
|
+
grammar?: string
|
|
310
|
+
} {
|
|
302
311
|
return this.ctx.getFormattedAudioCompletion(speaker, text)
|
|
303
312
|
}
|
|
304
313
|
|
|
@@ -332,7 +341,7 @@ const modelInfoSkip = [
|
|
|
332
341
|
'tokenizer.ggml.scores',
|
|
333
342
|
]
|
|
334
343
|
|
|
335
|
-
export const loadLlamaModelInfo = async (path: string): Promise<
|
|
344
|
+
export const loadLlamaModelInfo = async (path: string): Promise<GGUFModelInfo> => {
|
|
336
345
|
const variant = 'default'
|
|
337
346
|
mods[variant] ??= await loadModule(variant)
|
|
338
347
|
refreshNativeLogSetup()
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@fugood/llama.node",
|
|
3
3
|
"access": "public",
|
|
4
|
-
"version": "1.0
|
|
4
|
+
"version": "1.1.0",
|
|
5
5
|
"description": "An another Node binding of llama.cpp",
|
|
6
6
|
"main": "lib/index.js",
|
|
7
7
|
"scripts": {
|
|
@@ -9,6 +9,7 @@
|
|
|
9
9
|
"postinstall": "node scripts/check.js",
|
|
10
10
|
"pretest": "node scripts/download-test-models.js",
|
|
11
11
|
"test": "jest",
|
|
12
|
+
"typecheck": "tsc --noEmit",
|
|
12
13
|
"build": "npx cmake-js build",
|
|
13
14
|
"build-js": "tsc",
|
|
14
15
|
"prepack": "npm run build-js",
|
|
@@ -70,19 +71,19 @@
|
|
|
70
71
|
"CMakeLists.txt"
|
|
71
72
|
],
|
|
72
73
|
"optionalDependencies": {
|
|
73
|
-
"@fugood/node-llama-linux-x64": "1.0
|
|
74
|
-
"@fugood/node-llama-linux-x64-vulkan": "1.0
|
|
75
|
-
"@fugood/node-llama-linux-x64-cuda": "1.0
|
|
76
|
-
"@fugood/node-llama-linux-arm64": "1.0
|
|
77
|
-
"@fugood/node-llama-linux-arm64-vulkan": "1.0
|
|
78
|
-
"@fugood/node-llama-linux-arm64-cuda": "1.0
|
|
79
|
-
"@fugood/node-llama-win32-x64": "1.0
|
|
80
|
-
"@fugood/node-llama-win32-x64-vulkan": "1.0
|
|
81
|
-
"@fugood/node-llama-win32-x64-cuda": "1.0
|
|
82
|
-
"@fugood/node-llama-win32-arm64": "1.0
|
|
83
|
-
"@fugood/node-llama-win32-arm64-vulkan": "1.0
|
|
84
|
-
"@fugood/node-llama-darwin-x64": "1.0
|
|
85
|
-
"@fugood/node-llama-darwin-arm64": "1.0
|
|
74
|
+
"@fugood/node-llama-linux-x64": "1.1.0",
|
|
75
|
+
"@fugood/node-llama-linux-x64-vulkan": "1.1.0",
|
|
76
|
+
"@fugood/node-llama-linux-x64-cuda": "1.1.0",
|
|
77
|
+
"@fugood/node-llama-linux-arm64": "1.1.0",
|
|
78
|
+
"@fugood/node-llama-linux-arm64-vulkan": "1.1.0",
|
|
79
|
+
"@fugood/node-llama-linux-arm64-cuda": "1.1.0",
|
|
80
|
+
"@fugood/node-llama-win32-x64": "1.1.0",
|
|
81
|
+
"@fugood/node-llama-win32-x64-vulkan": "1.1.0",
|
|
82
|
+
"@fugood/node-llama-win32-x64-cuda": "1.1.0",
|
|
83
|
+
"@fugood/node-llama-win32-arm64": "1.1.0",
|
|
84
|
+
"@fugood/node-llama-win32-arm64-vulkan": "1.1.0",
|
|
85
|
+
"@fugood/node-llama-darwin-x64": "1.1.0",
|
|
86
|
+
"@fugood/node-llama-darwin-arm64": "1.1.0"
|
|
86
87
|
},
|
|
87
88
|
"devDependencies": {
|
|
88
89
|
"@babel/preset-env": "^7.24.4",
|
|
@@ -91,10 +92,12 @@
|
|
|
91
92
|
"@commitlint/config-conventional": "^19.2.2",
|
|
92
93
|
"@types/jest": "^29.5.12",
|
|
93
94
|
"@types/node": "^22.0.0",
|
|
95
|
+
"@types/node-wav": "^0.0.4",
|
|
94
96
|
"cmake-js": "^7.3.0",
|
|
95
97
|
"husky": "^9.0.11",
|
|
96
98
|
"jest": "^29.7.0",
|
|
97
99
|
"node-addon-api": "^8.0.0",
|
|
100
|
+
"node-wav": "^0.0.2",
|
|
98
101
|
"release-it": "^17.7.0",
|
|
99
102
|
"rimraf": "^6.0.1",
|
|
100
103
|
"typescript": "^5.4.5",
|
|
@@ -110,7 +110,7 @@ void LlamaCompletionWorker::Execute() {
|
|
|
110
110
|
} else {
|
|
111
111
|
// Text-only path
|
|
112
112
|
std::vector<llama_token> prompt_tokens =
|
|
113
|
-
::common_tokenize(ctx, _params.prompt, add_bos);
|
|
113
|
+
::common_tokenize(ctx, _params.prompt, add_bos, true);
|
|
114
114
|
n_input = prompt_tokens.size();
|
|
115
115
|
|
|
116
116
|
if (_sess->tokens_ptr()->size() > 0) {
|
|
@@ -177,7 +177,7 @@ void LlamaCompletionWorker::Execute() {
|
|
|
177
177
|
|
|
178
178
|
// Collect audio tokens for TTS if vocoder is enabled
|
|
179
179
|
if (_has_vocoder) {
|
|
180
|
-
if ((_tts_type == OUTETTS_V0_2 || _tts_type == OUTETTS_V0_3) &&
|
|
180
|
+
if ((_tts_type == OUTETTS_V0_1 || _tts_type == OUTETTS_V0_2 || _tts_type == OUTETTS_V0_3) &&
|
|
181
181
|
(new_token_id >= 151672 && new_token_id <= 155772)) {
|
|
182
182
|
_result.audio_tokens.push_back(new_token_id);
|
|
183
183
|
}
|
package/src/LlamaContext.cpp
CHANGED
|
@@ -247,6 +247,7 @@ LlamaContext::LlamaContext(const Napi::CallbackInfo &info)
|
|
|
247
247
|
params.cache_type_v = kv_cache_type_from_str(
|
|
248
248
|
get_option<std::string>(options, "cache_type_v", "f16").c_str());
|
|
249
249
|
params.ctx_shift = get_option<bool>(options, "ctx_shift", true);
|
|
250
|
+
params.kv_unified = get_option<bool>(options, "kv_unified", true);
|
|
250
251
|
|
|
251
252
|
params.use_mlock = get_option<bool>(options, "use_mlock", false);
|
|
252
253
|
params.use_mmap = get_option<bool>(options, "use_mmap", true);
|
|
@@ -904,9 +905,27 @@ Napi::Value LlamaContext::Completion(const Napi::CallbackInfo &info) {
|
|
|
904
905
|
// guide_tokens
|
|
905
906
|
std::vector<llama_token> guide_tokens;
|
|
906
907
|
if (options.Has("guide_tokens")) {
|
|
907
|
-
auto
|
|
908
|
-
|
|
909
|
-
|
|
908
|
+
auto guide_tokens_value = options.Get("guide_tokens");
|
|
909
|
+
if (guide_tokens_value.IsArray()) {
|
|
910
|
+
auto guide_tokens_array = guide_tokens_value.As<Napi::Array>();
|
|
911
|
+
for (size_t i = 0; i < guide_tokens_array.Length(); i++) {
|
|
912
|
+
guide_tokens.push_back(guide_tokens_array.Get(i).ToNumber().Int32Value());
|
|
913
|
+
}
|
|
914
|
+
} else if (guide_tokens_value.IsTypedArray()) {
|
|
915
|
+
auto guide_tokens_typed_array = guide_tokens_value.As<Napi::TypedArray>();
|
|
916
|
+
if (guide_tokens_typed_array.TypedArrayType() == napi_int32_array) {
|
|
917
|
+
auto guide_tokens_int32_array = guide_tokens_value.As<Napi::Int32Array>();
|
|
918
|
+
size_t length = guide_tokens_int32_array.ElementLength();
|
|
919
|
+
const int32_t* data = guide_tokens_int32_array.Data();
|
|
920
|
+
guide_tokens.resize(length);
|
|
921
|
+
memcpy(guide_tokens.data(), data, length * sizeof(int32_t));
|
|
922
|
+
} else {
|
|
923
|
+
Napi::TypeError::New(env, "guide_tokens must be Array<number> or Int32Array").ThrowAsJavaScriptException();
|
|
924
|
+
return env.Undefined();
|
|
925
|
+
}
|
|
926
|
+
} else {
|
|
927
|
+
Napi::TypeError::New(env, "guide_tokens must be Array<number> or Int32Array").ThrowAsJavaScriptException();
|
|
928
|
+
return env.Undefined();
|
|
910
929
|
}
|
|
911
930
|
}
|
|
912
931
|
|
|
@@ -1345,7 +1364,7 @@ Napi::Value LlamaContext::IsVocoderEnabled(const Napi::CallbackInfo &info) {
|
|
|
1345
1364
|
return Napi::Boolean::New(env, _has_vocoder);
|
|
1346
1365
|
}
|
|
1347
1366
|
|
|
1348
|
-
// getFormattedAudioCompletion(speaker: string|null, text: string):
|
|
1367
|
+
// getFormattedAudioCompletion(speaker: string|null, text: string): object
|
|
1349
1368
|
Napi::Value
|
|
1350
1369
|
LlamaContext::GetFormattedAudioCompletion(const Napi::CallbackInfo &info) {
|
|
1351
1370
|
Napi::Env env = info.Env();
|
|
@@ -1372,9 +1391,16 @@ LlamaContext::GetFormattedAudioCompletion(const Napi::CallbackInfo &info) {
|
|
|
1372
1391
|
audio_text = audio_text_from_speaker(speaker, type);
|
|
1373
1392
|
audio_data = audio_data_from_speaker(speaker, type);
|
|
1374
1393
|
}
|
|
1375
|
-
|
|
1376
|
-
|
|
1377
|
-
|
|
1394
|
+
std::string prompt = "<|im_start|>\n" + audio_text +
|
|
1395
|
+
process_text(text, type) +
|
|
1396
|
+
"<|text_end|>\n" + audio_data + "\n";
|
|
1397
|
+
Napi::Object result = Napi::Object::New(env);
|
|
1398
|
+
result.Set("prompt", prompt);
|
|
1399
|
+
const char *grammar = get_tts_grammar(type);
|
|
1400
|
+
if (grammar != nullptr) {
|
|
1401
|
+
result.Set("grammar", grammar);
|
|
1402
|
+
}
|
|
1403
|
+
return result;
|
|
1378
1404
|
}
|
|
1379
1405
|
|
|
1380
1406
|
// getAudioCompletionGuideTokens(text: string): Int32Array
|
|
@@ -1415,6 +1441,10 @@ LlamaContext::GetAudioCompletionGuideTokens(const Napi::CallbackInfo &info) {
|
|
|
1415
1441
|
if (tmp.size() > 0) {
|
|
1416
1442
|
result.push_back(tmp[0]);
|
|
1417
1443
|
}
|
|
1444
|
+
|
|
1445
|
+
// Add Audio End, forcing stop generation
|
|
1446
|
+
result.push_back(common_tokenize(vocab, "<|audio_end|>", false, true)[0]);
|
|
1447
|
+
|
|
1418
1448
|
auto tokens = Napi::Int32Array::New(env, result.size());
|
|
1419
1449
|
memcpy(tokens.Data(), result.data(), result.size() * sizeof(int32_t));
|
|
1420
1450
|
return tokens;
|
|
@@ -1449,7 +1479,7 @@ Napi::Value LlamaContext::DecodeAudioTokens(const Napi::CallbackInfo &info) {
|
|
|
1449
1479
|
.ThrowAsJavaScriptException();
|
|
1450
1480
|
return env.Undefined();
|
|
1451
1481
|
}
|
|
1452
|
-
if (type ==
|
|
1482
|
+
if (type == OUTETTS_V0_1 || type == OUTETTS_V0_2 || type == OUTETTS_V0_3) {
|
|
1453
1483
|
tokens.erase(
|
|
1454
1484
|
std::remove_if(tokens.begin(), tokens.end(),
|
|
1455
1485
|
[](llama_token t) { return t < 151672 || t > 155772; }),
|