@fugood/llama.node 0.3.17 → 0.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CMakeLists.txt +3 -1
- package/bin/darwin/arm64/llama-node.node +0 -0
- package/bin/darwin/x64/llama-node.node +0 -0
- package/bin/linux/arm64/llama-node.node +0 -0
- package/bin/linux/x64/llama-node.node +0 -0
- package/bin/linux-cuda/arm64/llama-node.node +0 -0
- package/bin/linux-cuda/x64/llama-node.node +0 -0
- package/bin/linux-vulkan/arm64/llama-node.node +0 -0
- package/bin/linux-vulkan/x64/llama-node.node +0 -0
- package/bin/win32/arm64/llama-node.node +0 -0
- package/bin/win32/arm64/node.lib +0 -0
- package/bin/win32/x64/llama-node.node +0 -0
- package/bin/win32/x64/node.lib +0 -0
- package/bin/win32-vulkan/arm64/llama-node.node +0 -0
- package/bin/win32-vulkan/arm64/node.lib +0 -0
- package/bin/win32-vulkan/x64/llama-node.node +0 -0
- package/bin/win32-vulkan/x64/node.lib +0 -0
- package/lib/binding.ts +39 -2
- package/lib/index.js +132 -1
- package/lib/index.ts +203 -3
- package/package.json +2 -1
- package/src/EmbeddingWorker.cpp +1 -1
- package/src/LlamaCompletionWorker.cpp +366 -19
- package/src/LlamaCompletionWorker.h +30 -10
- package/src/LlamaContext.cpp +213 -5
- package/src/LlamaContext.h +12 -0
- package/src/common.hpp +15 -0
- package/src/llama.cpp/.github/workflows/build-linux-cross.yml +133 -24
- package/src/llama.cpp/.github/workflows/build.yml +41 -762
- package/src/llama.cpp/.github/workflows/docker.yml +5 -2
- package/src/llama.cpp/.github/workflows/release.yml +716 -0
- package/src/llama.cpp/.github/workflows/server.yml +12 -12
- package/src/llama.cpp/CMakeLists.txt +5 -17
- package/src/llama.cpp/cmake/build-info.cmake +8 -2
- package/src/llama.cpp/cmake/x64-windows-llvm.cmake +0 -6
- package/src/llama.cpp/common/CMakeLists.txt +31 -3
- package/src/llama.cpp/common/arg.cpp +48 -29
- package/src/llama.cpp/common/chat.cpp +128 -106
- package/src/llama.cpp/common/chat.h +2 -0
- package/src/llama.cpp/common/common.cpp +37 -1
- package/src/llama.cpp/common/common.h +18 -9
- package/src/llama.cpp/common/llguidance.cpp +1 -0
- package/src/llama.cpp/common/minja/chat-template.hpp +9 -5
- package/src/llama.cpp/common/minja/minja.hpp +69 -36
- package/src/llama.cpp/common/regex-partial.cpp +204 -0
- package/src/llama.cpp/common/regex-partial.h +56 -0
- package/src/llama.cpp/common/sampling.cpp +57 -50
- package/src/llama.cpp/examples/CMakeLists.txt +2 -23
- package/src/llama.cpp/examples/embedding/embedding.cpp +2 -11
- package/src/llama.cpp/examples/parallel/parallel.cpp +86 -14
- package/src/llama.cpp/examples/training/CMakeLists.txt +5 -0
- package/src/llama.cpp/examples/training/finetune.cpp +96 -0
- package/src/llama.cpp/ggml/CMakeLists.txt +27 -0
- package/src/llama.cpp/ggml/include/ggml-backend.h +4 -4
- package/src/llama.cpp/ggml/include/ggml-cpp.h +1 -1
- package/src/llama.cpp/ggml/include/ggml-opt.h +47 -28
- package/src/llama.cpp/ggml/include/ggml.h +10 -7
- package/src/llama.cpp/ggml/src/CMakeLists.txt +1 -1
- package/src/llama.cpp/ggml/src/ggml-alloc.c +4 -1
- package/src/llama.cpp/ggml/src/ggml-backend.cpp +9 -5
- package/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +20 -13
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp +0 -2
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.c +306 -6
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +4 -13
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +29 -16
- package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.cpp +88 -5
- package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.h +47 -12
- package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +264 -69
- package/src/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm.cpp +501 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ops.cpp +0 -13
- package/src/llama.cpp/ggml/src/ggml-cpu/vec.cpp +0 -6
- package/src/llama.cpp/ggml/src/ggml-cuda/CMakeLists.txt +23 -4
- package/src/llama.cpp/ggml/src/ggml-metal/ggml-metal-impl.h +36 -11
- package/src/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +0 -2
- package/src/llama.cpp/ggml/src/ggml-opt.cpp +368 -190
- package/src/llama.cpp/ggml/src/ggml-quants.c +0 -6
- package/src/llama.cpp/ggml/src/ggml-rpc/ggml-rpc.cpp +41 -27
- package/src/llama.cpp/ggml/src/ggml-sycl/CMakeLists.txt +29 -23
- package/src/llama.cpp/ggml/src/ggml-sycl/backend.hpp +9 -8
- package/src/llama.cpp/ggml/src/ggml-sycl/binbcast.cpp +121 -232
- package/src/llama.cpp/ggml/src/ggml-sycl/common.hpp +7 -15
- package/src/llama.cpp/ggml/src/ggml-sycl/convert.cpp +72 -25
- package/src/llama.cpp/ggml/src/ggml-sycl/convert.hpp +14 -7
- package/src/llama.cpp/ggml/src/ggml-sycl/dequantize.hpp +59 -21
- package/src/llama.cpp/ggml/src/ggml-sycl/dmmv.cpp +7 -1
- package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.cpp +0 -23
- package/src/llama.cpp/ggml/src/ggml-sycl/gemm.hpp +37 -8
- package/src/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +338 -166
- package/src/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +185 -89
- package/src/llama.cpp/ggml/src/ggml-sycl/quants.hpp +83 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/vecdotq.hpp +128 -53
- package/src/llama.cpp/ggml/src/ggml-vulkan/CMakeLists.txt +81 -70
- package/src/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +657 -193
- package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/CMakeLists.txt +20 -0
- package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +123 -29
- package/src/llama.cpp/ggml/src/ggml.c +29 -20
- package/src/llama.cpp/ggml/src/gguf.cpp +33 -33
- package/src/llama.cpp/include/llama.h +52 -11
- package/src/llama.cpp/requirements/requirements-all.txt +3 -3
- package/src/llama.cpp/scripts/xxd.cmake +1 -1
- package/src/llama.cpp/src/CMakeLists.txt +1 -0
- package/src/llama.cpp/src/llama-adapter.cpp +6 -0
- package/src/llama.cpp/src/llama-arch.cpp +3 -0
- package/src/llama.cpp/src/llama-batch.cpp +5 -1
- package/src/llama.cpp/src/llama-batch.h +2 -1
- package/src/llama.cpp/src/llama-chat.cpp +17 -7
- package/src/llama.cpp/src/llama-chat.h +1 -0
- package/src/llama.cpp/src/llama-context.cpp +389 -501
- package/src/llama.cpp/src/llama-context.h +44 -32
- package/src/llama.cpp/src/llama-cparams.h +1 -0
- package/src/llama.cpp/src/llama-graph.cpp +20 -38
- package/src/llama.cpp/src/llama-graph.h +12 -8
- package/src/llama.cpp/src/llama-kv-cache.cpp +1503 -389
- package/src/llama.cpp/src/llama-kv-cache.h +271 -85
- package/src/llama.cpp/src/llama-memory.h +11 -1
- package/src/llama.cpp/src/llama-model-loader.cpp +24 -15
- package/src/llama.cpp/src/llama-model-saver.cpp +281 -0
- package/src/llama.cpp/src/llama-model-saver.h +37 -0
- package/src/llama.cpp/src/llama-model.cpp +316 -69
- package/src/llama.cpp/src/llama-model.h +8 -1
- package/src/llama.cpp/src/llama-quant.cpp +15 -13
- package/src/llama.cpp/src/llama-sampling.cpp +18 -6
- package/src/llama.cpp/src/llama-vocab.cpp +42 -4
- package/src/llama.cpp/src/llama-vocab.h +6 -0
- package/src/llama.cpp/src/llama.cpp +14 -0
- package/src/llama.cpp/tests/CMakeLists.txt +10 -2
- package/src/llama.cpp/tests/test-backend-ops.cpp +107 -47
- package/src/llama.cpp/tests/test-chat-template.cpp +10 -11
- package/src/llama.cpp/tests/test-chat.cpp +3 -1
- package/src/llama.cpp/tests/test-mtmd-c-api.c +63 -0
- package/src/llama.cpp/tests/test-opt.cpp +33 -21
- package/src/llama.cpp/tests/test-regex-partial.cpp +288 -0
- package/src/llama.cpp/tests/test-sampling.cpp +1 -1
- package/src/llama.cpp/tools/CMakeLists.txt +39 -0
- package/src/llama.cpp/{examples → tools}/batched-bench/batched-bench.cpp +2 -2
- package/src/llama.cpp/{examples → tools}/imatrix/imatrix.cpp +11 -9
- package/src/llama.cpp/{examples → tools}/llama-bench/llama-bench.cpp +495 -348
- package/src/llama.cpp/{examples → tools}/main/main.cpp +6 -9
- package/src/llama.cpp/{examples/llava → tools/mtmd}/CMakeLists.txt +1 -35
- package/src/llama.cpp/{examples/llava → tools/mtmd}/clip-impl.h +25 -5
- package/src/llama.cpp/{examples/llava → tools/mtmd}/clip.cpp +1440 -1349
- package/src/llama.cpp/tools/mtmd/clip.h +99 -0
- package/src/llama.cpp/{examples/llava → tools/mtmd}/mtmd-cli.cpp +70 -44
- package/src/llama.cpp/tools/mtmd/mtmd-helper.cpp +310 -0
- package/src/llama.cpp/{examples/llava → tools/mtmd}/mtmd.cpp +251 -281
- package/src/llama.cpp/tools/mtmd/mtmd.h +331 -0
- package/src/llama.cpp/{examples → tools}/perplexity/perplexity.cpp +4 -2
- package/src/llama.cpp/{examples → tools}/quantize/quantize.cpp +13 -76
- package/src/llama.cpp/{examples → tools}/rpc/rpc-server.cpp +70 -74
- package/src/llama.cpp/{examples → tools}/run/run.cpp +18 -4
- package/src/llama.cpp/{examples → tools}/server/CMakeLists.txt +2 -1
- package/src/llama.cpp/{examples → tools}/server/server.cpp +291 -76
- package/src/llama.cpp/{examples → tools}/server/utils.hpp +377 -5
- package/src/llama.cpp/cmake/arm64-windows-msvc.cmake +0 -6
- package/src/llama.cpp/examples/infill/CMakeLists.txt +0 -5
- package/src/llama.cpp/examples/infill/infill.cpp +0 -590
- package/src/llama.cpp/examples/llava/android/build_64.sh +0 -8
- package/src/llama.cpp/examples/llava/clip-quantize-cli.cpp +0 -59
- package/src/llama.cpp/examples/llava/clip.h +0 -135
- package/src/llama.cpp/examples/llava/llava.cpp +0 -586
- package/src/llama.cpp/examples/llava/llava.h +0 -49
- package/src/llama.cpp/examples/llava/mtmd.h +0 -168
- package/src/llama.cpp/examples/llava/qwen2vl-test.cpp +0 -636
- /package/src/llama.cpp/{examples → tools}/batched-bench/CMakeLists.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/cvector-generator/CMakeLists.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/cvector-generator/completions.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/cvector-generator/cvector-generator.cpp +0 -0
- /package/src/llama.cpp/{examples → tools}/cvector-generator/mean.hpp +0 -0
- /package/src/llama.cpp/{examples → tools}/cvector-generator/negative.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/cvector-generator/pca.hpp +0 -0
- /package/src/llama.cpp/{examples → tools}/cvector-generator/positive.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/export-lora/CMakeLists.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/export-lora/export-lora.cpp +0 -0
- /package/src/llama.cpp/{examples → tools}/gguf-split/CMakeLists.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/gguf-split/gguf-split.cpp +0 -0
- /package/src/llama.cpp/{examples → tools}/imatrix/CMakeLists.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/llama-bench/CMakeLists.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/main/CMakeLists.txt +0 -0
- /package/src/llama.cpp/{examples/llava → tools/mtmd}/deprecation-warning.cpp +0 -0
- /package/src/llama.cpp/{examples/llava → tools/mtmd}/requirements.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/perplexity/CMakeLists.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/quantize/CMakeLists.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/rpc/CMakeLists.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/run/CMakeLists.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/run/linenoise.cpp/linenoise.cpp +0 -0
- /package/src/llama.cpp/{examples → tools}/run/linenoise.cpp/linenoise.h +0 -0
- /package/src/llama.cpp/{examples → tools}/server/bench/requirements.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/server/httplib.h +0 -0
- /package/src/llama.cpp/{examples → tools}/server/tests/requirements.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/tokenize/CMakeLists.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/tokenize/tokenize.cpp +0 -0
- /package/src/llama.cpp/{examples → tools}/tts/CMakeLists.txt +0 -0
- /package/src/llama.cpp/{examples → tools}/tts/tts.cpp +0 -0
package/CMakeLists.txt
CHANGED
|
@@ -73,11 +73,13 @@ set(LLAMA_CURL OFF CACHE BOOL "Build curl")
|
|
|
73
73
|
|
|
74
74
|
set(BUILD_SHARED_LIBS OFF CACHE BOOL "Build shared libraries")
|
|
75
75
|
add_subdirectory("src/llama.cpp")
|
|
76
|
+
add_subdirectory("src/llama.cpp/tools/mtmd")
|
|
76
77
|
|
|
77
78
|
include_directories(
|
|
78
79
|
${CMAKE_JS_INC}
|
|
79
80
|
"src/llama.cpp"
|
|
80
81
|
"src/llama.cpp/src"
|
|
82
|
+
"src/tools/mtmd"
|
|
81
83
|
)
|
|
82
84
|
|
|
83
85
|
file(
|
|
@@ -104,7 +106,7 @@ file(
|
|
|
104
106
|
|
|
105
107
|
add_library(${PROJECT_NAME} SHARED ${SOURCE_FILES} ${CMAKE_JS_SRC})
|
|
106
108
|
set_target_properties(${PROJECT_NAME} PROPERTIES PREFIX "" SUFFIX ".node")
|
|
107
|
-
target_link_libraries(${PROJECT_NAME} ${CMAKE_JS_LIB} llama ggml common)
|
|
109
|
+
target_link_libraries(${PROJECT_NAME} ${CMAKE_JS_LIB} llama ggml common mtmd ${CMAKE_THREAD_LIBS_INIT})
|
|
108
110
|
|
|
109
111
|
add_custom_target(copy_assets ALL DEPENDS ${PROJECT_NAME})
|
|
110
112
|
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
package/bin/win32/arm64/node.lib
CHANGED
|
Binary file
|
|
Binary file
|
package/bin/win32/x64/node.lib
CHANGED
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
package/lib/binding.ts
CHANGED
|
@@ -1,8 +1,17 @@
|
|
|
1
1
|
import * as path from 'path'
|
|
2
2
|
|
|
3
|
+
|
|
4
|
+
export type MessagePart = {
|
|
5
|
+
type: string,
|
|
6
|
+
text?: string,
|
|
7
|
+
image_url?: {
|
|
8
|
+
url?: string
|
|
9
|
+
}
|
|
10
|
+
}
|
|
11
|
+
|
|
3
12
|
export type ChatMessage = {
|
|
4
13
|
role: string
|
|
5
|
-
content
|
|
14
|
+
content?: string | MessagePart[]
|
|
6
15
|
}
|
|
7
16
|
|
|
8
17
|
export type LlamaModelOptions = {
|
|
@@ -93,6 +102,13 @@ export type LlamaCompletionOptions = {
|
|
|
93
102
|
grammar_lazy?: boolean
|
|
94
103
|
grammar_triggers?: { type: number; word: string; at_start: boolean }[]
|
|
95
104
|
preserved_tokens?: string[]
|
|
105
|
+
/**
|
|
106
|
+
* Path(s) to image file(s) to process before generating text.
|
|
107
|
+
* When provided, the image(s) will be processed and added to the context.
|
|
108
|
+
* Requires multimodal support to be enabled via initMultimodal.
|
|
109
|
+
* Supports both file paths and base64 data URLs.
|
|
110
|
+
*/
|
|
111
|
+
image_paths?: string | string[]
|
|
96
112
|
}
|
|
97
113
|
|
|
98
114
|
export type LlamaCompletionResult = {
|
|
@@ -154,9 +170,30 @@ export interface LlamaContext {
|
|
|
154
170
|
applyLoraAdapters(adapters: { path: string; scaled: number }[]): void
|
|
155
171
|
removeLoraAdapters(adapters: { path: string }[]): void
|
|
156
172
|
getLoadedLoraAdapters(): { path: string; scaled: number }[]
|
|
173
|
+
/**
|
|
174
|
+
* Initialize multimodal support with a mmproj file
|
|
175
|
+
* @param mmproj_path Path to the multimodal projector file
|
|
176
|
+
* @returns Promise resolving to true if initialization was successful
|
|
177
|
+
*/
|
|
178
|
+
initMultimodal(options: { path: string; use_gpu?: boolean }): Promise<boolean>
|
|
179
|
+
|
|
180
|
+
/**
|
|
181
|
+
* Check if multimodal support is enabled
|
|
182
|
+
* @returns Promise resolving to true if multimodal is enabled
|
|
183
|
+
*/
|
|
184
|
+
isMultimodalEnabled(): Promise<boolean>
|
|
185
|
+
|
|
186
|
+
/**
|
|
187
|
+
* Release multimodal support
|
|
188
|
+
*/
|
|
189
|
+
releaseMultimodal(): Promise<void>
|
|
190
|
+
|
|
157
191
|
// static
|
|
158
192
|
loadModelInfo(path: string, skip: string[]): Promise<Object>
|
|
159
|
-
toggleNativeLog(
|
|
193
|
+
toggleNativeLog(
|
|
194
|
+
enable: boolean,
|
|
195
|
+
callback: (level: string, text: string) => void,
|
|
196
|
+
): void
|
|
160
197
|
}
|
|
161
198
|
|
|
162
199
|
export interface Module {
|
package/lib/index.js
CHANGED
|
@@ -51,12 +51,143 @@ function addNativeLogListener(listener) {
|
|
|
51
51
|
},
|
|
52
52
|
};
|
|
53
53
|
}
|
|
54
|
+
const getJsonSchema = (responseFormat) => {
|
|
55
|
+
var _a;
|
|
56
|
+
if ((responseFormat === null || responseFormat === void 0 ? void 0 : responseFormat.type) === 'json_schema') {
|
|
57
|
+
return (_a = responseFormat.json_schema) === null || _a === void 0 ? void 0 : _a.schema;
|
|
58
|
+
}
|
|
59
|
+
if ((responseFormat === null || responseFormat === void 0 ? void 0 : responseFormat.type) === 'json_object') {
|
|
60
|
+
return responseFormat.schema || {};
|
|
61
|
+
}
|
|
62
|
+
return null;
|
|
63
|
+
};
|
|
64
|
+
class LlamaContextWrapper {
|
|
65
|
+
constructor(nativeCtx) {
|
|
66
|
+
this.ctx = nativeCtx;
|
|
67
|
+
}
|
|
68
|
+
getSystemInfo() {
|
|
69
|
+
return this.ctx.getSystemInfo();
|
|
70
|
+
}
|
|
71
|
+
getModelInfo() {
|
|
72
|
+
return this.ctx.getModelInfo();
|
|
73
|
+
}
|
|
74
|
+
isJinjaSupported() {
|
|
75
|
+
const { minja } = this.ctx.getModelInfo().chatTemplates;
|
|
76
|
+
return !!(minja === null || minja === void 0 ? void 0 : minja.toolUse) || !!(minja === null || minja === void 0 ? void 0 : minja.default);
|
|
77
|
+
}
|
|
78
|
+
isLlamaChatSupported() {
|
|
79
|
+
return !!this.ctx.getModelInfo().chatTemplates.llamaChat;
|
|
80
|
+
}
|
|
81
|
+
_formatImageChat(messages) {
|
|
82
|
+
if (!messages)
|
|
83
|
+
return {
|
|
84
|
+
messages,
|
|
85
|
+
has_image: false,
|
|
86
|
+
};
|
|
87
|
+
const imagePaths = [];
|
|
88
|
+
return {
|
|
89
|
+
messages: messages.map((msg) => {
|
|
90
|
+
if (Array.isArray(msg.content)) {
|
|
91
|
+
const content = msg.content.map((part) => {
|
|
92
|
+
var _a;
|
|
93
|
+
// Handle multimodal content
|
|
94
|
+
if (part.type === 'image_url') {
|
|
95
|
+
let path = ((_a = part.image_url) === null || _a === void 0 ? void 0 : _a.url) || '';
|
|
96
|
+
imagePaths.push(path);
|
|
97
|
+
return {
|
|
98
|
+
type: 'text',
|
|
99
|
+
text: '<__image__>',
|
|
100
|
+
};
|
|
101
|
+
}
|
|
102
|
+
return part;
|
|
103
|
+
});
|
|
104
|
+
return Object.assign(Object.assign({}, msg), { content });
|
|
105
|
+
}
|
|
106
|
+
return msg;
|
|
107
|
+
}),
|
|
108
|
+
has_image: imagePaths.length > 0,
|
|
109
|
+
image_paths: imagePaths,
|
|
110
|
+
};
|
|
111
|
+
}
|
|
112
|
+
getFormattedChat(messages, template, params) {
|
|
113
|
+
const { messages: chat, has_image, image_paths, } = this._formatImageChat(messages);
|
|
114
|
+
const useJinja = this.isJinjaSupported() && (params === null || params === void 0 ? void 0 : params.jinja);
|
|
115
|
+
let tmpl = this.isLlamaChatSupported() || useJinja ? undefined : 'chatml';
|
|
116
|
+
if (template)
|
|
117
|
+
tmpl = template; // Force replace if provided
|
|
118
|
+
const jsonSchema = getJsonSchema(params === null || params === void 0 ? void 0 : params.response_format);
|
|
119
|
+
const result = this.ctx.getFormattedChat(chat, tmpl, {
|
|
120
|
+
jinja: useJinja,
|
|
121
|
+
json_schema: jsonSchema,
|
|
122
|
+
tools: params === null || params === void 0 ? void 0 : params.tools,
|
|
123
|
+
parallel_tool_calls: params === null || params === void 0 ? void 0 : params.parallel_tool_calls,
|
|
124
|
+
tool_choice: params === null || params === void 0 ? void 0 : params.tool_choice,
|
|
125
|
+
});
|
|
126
|
+
if (!useJinja) {
|
|
127
|
+
return {
|
|
128
|
+
type: 'llama-chat',
|
|
129
|
+
prompt: result,
|
|
130
|
+
has_image,
|
|
131
|
+
image_paths,
|
|
132
|
+
};
|
|
133
|
+
}
|
|
134
|
+
const jinjaResult = result;
|
|
135
|
+
jinjaResult.type = 'jinja';
|
|
136
|
+
jinjaResult.has_image = has_image;
|
|
137
|
+
jinjaResult.image_paths = image_paths;
|
|
138
|
+
return jinjaResult;
|
|
139
|
+
}
|
|
140
|
+
completion(options, callback) {
|
|
141
|
+
const { messages, image_paths = options.image_paths } = this._formatImageChat(options.messages);
|
|
142
|
+
return this.ctx.completion(Object.assign(Object.assign({}, options), { messages, image_paths: options.image_paths || image_paths }), callback || (() => { }));
|
|
143
|
+
}
|
|
144
|
+
stopCompletion() {
|
|
145
|
+
return this.ctx.stopCompletion();
|
|
146
|
+
}
|
|
147
|
+
tokenize(text) {
|
|
148
|
+
return this.ctx.tokenize(text);
|
|
149
|
+
}
|
|
150
|
+
detokenize(tokens) {
|
|
151
|
+
return this.ctx.detokenize(tokens);
|
|
152
|
+
}
|
|
153
|
+
embedding(text) {
|
|
154
|
+
return this.ctx.embedding(text);
|
|
155
|
+
}
|
|
156
|
+
saveSession(path) {
|
|
157
|
+
return this.ctx.saveSession(path);
|
|
158
|
+
}
|
|
159
|
+
loadSession(path) {
|
|
160
|
+
return this.ctx.loadSession(path);
|
|
161
|
+
}
|
|
162
|
+
release() {
|
|
163
|
+
return this.ctx.release();
|
|
164
|
+
}
|
|
165
|
+
applyLoraAdapters(adapters) {
|
|
166
|
+
return this.ctx.applyLoraAdapters(adapters);
|
|
167
|
+
}
|
|
168
|
+
removeLoraAdapters(adapters) {
|
|
169
|
+
return this.ctx.removeLoraAdapters(adapters);
|
|
170
|
+
}
|
|
171
|
+
getLoadedLoraAdapters() {
|
|
172
|
+
return this.ctx.getLoadedLoraAdapters();
|
|
173
|
+
}
|
|
174
|
+
initMultimodal(options) {
|
|
175
|
+
return this.ctx.initMultimodal(options);
|
|
176
|
+
}
|
|
177
|
+
isMultimodalEnabled() {
|
|
178
|
+
return this.ctx.isMultimodalEnabled();
|
|
179
|
+
}
|
|
180
|
+
releaseMultimodal() {
|
|
181
|
+
return this.ctx.releaseMultimodal();
|
|
182
|
+
}
|
|
183
|
+
}
|
|
54
184
|
const loadModel = (options) => __awaiter(void 0, void 0, void 0, function* () {
|
|
55
185
|
var _a, _b;
|
|
56
186
|
const variant = (_a = options.lib_variant) !== null && _a !== void 0 ? _a : 'default';
|
|
57
187
|
(_b = mods[variant]) !== null && _b !== void 0 ? _b : (mods[variant] = yield (0, binding_1.loadModule)(options.lib_variant));
|
|
58
188
|
refreshNativeLogSetup();
|
|
59
|
-
|
|
189
|
+
const nativeCtx = new mods[variant].LlamaContext(options);
|
|
190
|
+
return new LlamaContextWrapper(nativeCtx);
|
|
60
191
|
});
|
|
61
192
|
exports.loadModel = loadModel;
|
|
62
193
|
exports.initLlama = binding_1.loadModule;
|
package/lib/index.ts
CHANGED
|
@@ -1,5 +1,16 @@
|
|
|
1
1
|
import { loadModule, LlamaModelOptions } from './binding'
|
|
2
|
-
import type {
|
|
2
|
+
import type {
|
|
3
|
+
Module,
|
|
4
|
+
LlamaContext,
|
|
5
|
+
LibVariant,
|
|
6
|
+
ChatMessage,
|
|
7
|
+
LlamaCompletionOptions,
|
|
8
|
+
LlamaCompletionToken,
|
|
9
|
+
LlamaCompletionResult,
|
|
10
|
+
TokenizeResult,
|
|
11
|
+
EmbeddingResult,
|
|
12
|
+
CompletionResponseFormat,
|
|
13
|
+
} from './binding'
|
|
3
14
|
|
|
4
15
|
export * from './binding'
|
|
5
16
|
|
|
@@ -39,13 +50,202 @@ export function addNativeLogListener(
|
|
|
39
50
|
}
|
|
40
51
|
}
|
|
41
52
|
|
|
53
|
+
const getJsonSchema = (responseFormat?: CompletionResponseFormat) => {
|
|
54
|
+
if (responseFormat?.type === 'json_schema') {
|
|
55
|
+
return responseFormat.json_schema?.schema
|
|
56
|
+
}
|
|
57
|
+
if (responseFormat?.type === 'json_object') {
|
|
58
|
+
return responseFormat.schema || {}
|
|
59
|
+
}
|
|
60
|
+
return null
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
class LlamaContextWrapper {
|
|
64
|
+
ctx: any
|
|
65
|
+
|
|
66
|
+
constructor(nativeCtx: any) {
|
|
67
|
+
this.ctx = nativeCtx
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
getSystemInfo(): string {
|
|
71
|
+
return this.ctx.getSystemInfo()
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
getModelInfo(): object {
|
|
75
|
+
return this.ctx.getModelInfo()
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
isJinjaSupported(): boolean {
|
|
79
|
+
const { minja } = this.ctx.getModelInfo().chatTemplates
|
|
80
|
+
return !!minja?.toolUse || !!minja?.default
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
isLlamaChatSupported(): boolean {
|
|
84
|
+
return !!this.ctx.getModelInfo().chatTemplates.llamaChat
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
_formatImageChat(messages: ChatMessage[] | undefined): {
|
|
88
|
+
messages: ChatMessage[] | undefined
|
|
89
|
+
has_image: boolean
|
|
90
|
+
image_paths?: string[]
|
|
91
|
+
} {
|
|
92
|
+
if (!messages)
|
|
93
|
+
return {
|
|
94
|
+
messages,
|
|
95
|
+
has_image: false,
|
|
96
|
+
}
|
|
97
|
+
const imagePaths: string[] = []
|
|
98
|
+
return {
|
|
99
|
+
messages: messages.map((msg) => {
|
|
100
|
+
if (Array.isArray(msg.content)) {
|
|
101
|
+
const content = msg.content.map((part) => {
|
|
102
|
+
// Handle multimodal content
|
|
103
|
+
if (part.type === 'image_url') {
|
|
104
|
+
let path = part.image_url?.url || ''
|
|
105
|
+
imagePaths.push(path)
|
|
106
|
+
return {
|
|
107
|
+
type: 'text',
|
|
108
|
+
text: '<__image__>',
|
|
109
|
+
}
|
|
110
|
+
}
|
|
111
|
+
return part
|
|
112
|
+
})
|
|
113
|
+
|
|
114
|
+
return {
|
|
115
|
+
...msg,
|
|
116
|
+
content,
|
|
117
|
+
}
|
|
118
|
+
}
|
|
119
|
+
return msg
|
|
120
|
+
}),
|
|
121
|
+
has_image: imagePaths.length > 0,
|
|
122
|
+
image_paths: imagePaths,
|
|
123
|
+
}
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
getFormattedChat(
|
|
127
|
+
messages: ChatMessage[],
|
|
128
|
+
template?: string,
|
|
129
|
+
params?: {
|
|
130
|
+
jinja?: boolean
|
|
131
|
+
response_format?: CompletionResponseFormat
|
|
132
|
+
tools?: object
|
|
133
|
+
parallel_tool_calls?: object
|
|
134
|
+
tool_choice?: string
|
|
135
|
+
},
|
|
136
|
+
): object {
|
|
137
|
+
const {
|
|
138
|
+
messages: chat,
|
|
139
|
+
has_image,
|
|
140
|
+
image_paths,
|
|
141
|
+
} = this._formatImageChat(messages)
|
|
142
|
+
|
|
143
|
+
const useJinja = this.isJinjaSupported() && params?.jinja
|
|
144
|
+
let tmpl = this.isLlamaChatSupported() || useJinja ? undefined : 'chatml'
|
|
145
|
+
if (template) tmpl = template // Force replace if provided
|
|
146
|
+
const jsonSchema = getJsonSchema(params?.response_format)
|
|
147
|
+
|
|
148
|
+
const result = this.ctx.getFormattedChat(chat, tmpl, {
|
|
149
|
+
jinja: useJinja,
|
|
150
|
+
json_schema: jsonSchema,
|
|
151
|
+
tools: params?.tools,
|
|
152
|
+
parallel_tool_calls: params?.parallel_tool_calls,
|
|
153
|
+
tool_choice: params?.tool_choice,
|
|
154
|
+
})
|
|
155
|
+
|
|
156
|
+
if (!useJinja) {
|
|
157
|
+
return {
|
|
158
|
+
type: 'llama-chat',
|
|
159
|
+
prompt: result as string,
|
|
160
|
+
has_image,
|
|
161
|
+
image_paths,
|
|
162
|
+
}
|
|
163
|
+
}
|
|
164
|
+
const jinjaResult = result
|
|
165
|
+
jinjaResult.type = 'jinja'
|
|
166
|
+
jinjaResult.has_image = has_image
|
|
167
|
+
jinjaResult.image_paths = image_paths
|
|
168
|
+
return jinjaResult
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
completion(
|
|
172
|
+
options: LlamaCompletionOptions,
|
|
173
|
+
callback?: (token: LlamaCompletionToken) => void,
|
|
174
|
+
): Promise<LlamaCompletionResult> {
|
|
175
|
+
const { messages, image_paths = options.image_paths } =
|
|
176
|
+
this._formatImageChat(options.messages)
|
|
177
|
+
return this.ctx.completion({
|
|
178
|
+
...options,
|
|
179
|
+
messages,
|
|
180
|
+
image_paths: options.image_paths || image_paths,
|
|
181
|
+
}, callback || (() => {}))
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
stopCompletion(): void {
|
|
185
|
+
return this.ctx.stopCompletion()
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
tokenize(text: string): Promise<TokenizeResult> {
|
|
189
|
+
return this.ctx.tokenize(text)
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
detokenize(tokens: number[]): Promise<string> {
|
|
193
|
+
return this.ctx.detokenize(tokens)
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
embedding(text: string): Promise<EmbeddingResult> {
|
|
197
|
+
return this.ctx.embedding(text)
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
saveSession(path: string): Promise<void> {
|
|
201
|
+
return this.ctx.saveSession(path)
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
loadSession(path: string): Promise<void> {
|
|
205
|
+
return this.ctx.loadSession(path)
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
release(): Promise<void> {
|
|
209
|
+
return this.ctx.release()
|
|
210
|
+
}
|
|
211
|
+
|
|
212
|
+
applyLoraAdapters(adapters: { path: string; scaled: number }[]): void {
|
|
213
|
+
return this.ctx.applyLoraAdapters(adapters)
|
|
214
|
+
}
|
|
215
|
+
|
|
216
|
+
removeLoraAdapters(adapters: { path: string }[]): void {
|
|
217
|
+
return this.ctx.removeLoraAdapters(adapters)
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
getLoadedLoraAdapters(): { path: string; scaled: number }[] {
|
|
221
|
+
return this.ctx.getLoadedLoraAdapters()
|
|
222
|
+
}
|
|
223
|
+
|
|
224
|
+
initMultimodal(options: {
|
|
225
|
+
path: string
|
|
226
|
+
use_gpu?: boolean
|
|
227
|
+
}): Promise<boolean> {
|
|
228
|
+
return this.ctx.initMultimodal(options)
|
|
229
|
+
}
|
|
230
|
+
|
|
231
|
+
isMultimodalEnabled(): Promise<boolean> {
|
|
232
|
+
return this.ctx.isMultimodalEnabled()
|
|
233
|
+
}
|
|
234
|
+
|
|
235
|
+
releaseMultimodal(): Promise<void> {
|
|
236
|
+
return this.ctx.releaseMultimodal()
|
|
237
|
+
}
|
|
238
|
+
}
|
|
239
|
+
|
|
42
240
|
export const loadModel = async (
|
|
43
241
|
options: LlamaModelOptionsExtended,
|
|
44
|
-
): Promise<
|
|
242
|
+
): Promise<LlamaContextWrapper> => {
|
|
45
243
|
const variant = options.lib_variant ?? 'default'
|
|
46
244
|
mods[variant] ??= await loadModule(options.lib_variant)
|
|
47
245
|
refreshNativeLogSetup()
|
|
48
|
-
|
|
246
|
+
|
|
247
|
+
const nativeCtx = new mods[variant].LlamaContext(options)
|
|
248
|
+
return new LlamaContextWrapper(nativeCtx)
|
|
49
249
|
}
|
|
50
250
|
|
|
51
251
|
export const initLlama = loadModule
|
package/package.json
CHANGED
|
@@ -1,10 +1,11 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@fugood/llama.node",
|
|
3
3
|
"access": "public",
|
|
4
|
-
"version": "0.
|
|
4
|
+
"version": "0.4.1",
|
|
5
5
|
"description": "An another Node binding of llama.cpp",
|
|
6
6
|
"main": "lib/index.js",
|
|
7
7
|
"scripts": {
|
|
8
|
+
"pretest": "node scripts/download-test-models.js",
|
|
8
9
|
"test": "jest",
|
|
9
10
|
"build": "tsc",
|
|
10
11
|
"prepack": "yarn build",
|
package/src/EmbeddingWorker.cpp
CHANGED
|
@@ -6,7 +6,7 @@ EmbeddingWorker::EmbeddingWorker(const Napi::CallbackInfo &info,
|
|
|
6
6
|
: AsyncWorker(info.Env()), Deferred(info.Env()), _sess(sess), _text(text), _params(params) {}
|
|
7
7
|
|
|
8
8
|
void EmbeddingWorker::Execute() {
|
|
9
|
-
|
|
9
|
+
llama_kv_self_clear(_sess->context());
|
|
10
10
|
auto tokens = ::common_tokenize(_sess->context(), _text, true);
|
|
11
11
|
// add SEP if not present
|
|
12
12
|
auto vocab = llama_model_get_vocab(_sess->model());
|