@fugood/llama.node 0.4.7 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/darwin/arm64/llama-node.node +0 -0
- package/bin/darwin/x64/llama-node.node +0 -0
- package/bin/linux/arm64/llama-node.node +0 -0
- package/bin/linux/x64/llama-node.node +0 -0
- package/bin/linux-cuda/arm64/llama-node.node +0 -0
- package/bin/linux-cuda/x64/llama-node.node +0 -0
- package/bin/linux-vulkan/arm64/llama-node.node +0 -0
- package/bin/linux-vulkan/x64/llama-node.node +0 -0
- package/bin/win32/arm64/llama-node.node +0 -0
- package/bin/win32/arm64/node.lib +0 -0
- package/bin/win32/x64/llama-node.node +0 -0
- package/bin/win32/x64/node.lib +0 -0
- package/bin/win32-vulkan/arm64/llama-node.node +0 -0
- package/bin/win32-vulkan/arm64/node.lib +0 -0
- package/bin/win32-vulkan/x64/llama-node.node +0 -0
- package/bin/win32-vulkan/x64/node.lib +0 -0
- package/lib/binding.ts +20 -6
- package/lib/index.js +41 -17
- package/lib/index.ts +50 -23
- package/package.json +1 -1
- package/src/LlamaCompletionWorker.cpp +9 -9
- package/src/LlamaCompletionWorker.h +2 -2
- package/src/LlamaContext.cpp +37 -18
- package/src/LlamaContext.h +1 -0
- package/src/TokenizeWorker.cpp +16 -12
- package/src/TokenizeWorker.h +2 -2
- package/src/common.hpp +54 -50
- package/src/llama.cpp/.github/workflows/build.yml +2 -2
- package/src/llama.cpp/.github/workflows/release.yml +152 -129
- package/src/llama.cpp/.github/workflows/winget.yml +42 -0
- package/src/llama.cpp/common/arg.cpp +14 -13
- package/src/llama.cpp/common/common.cpp +4 -75
- package/src/llama.cpp/common/common.h +7 -12
- package/src/llama.cpp/examples/lookahead/lookahead.cpp +0 -13
- package/src/llama.cpp/examples/lookup/lookup.cpp +0 -11
- package/src/llama.cpp/examples/parallel/parallel.cpp +0 -9
- package/src/llama.cpp/examples/retrieval/retrieval.cpp +6 -6
- package/src/llama.cpp/examples/simple/simple.cpp +1 -1
- package/src/llama.cpp/examples/simple-chat/simple-chat.cpp +2 -2
- package/src/llama.cpp/examples/sycl/run-llama2.sh +4 -4
- package/src/llama.cpp/examples/sycl/run-llama3.sh +28 -0
- package/src/llama.cpp/examples/sycl/win-run-llama2.bat +1 -1
- package/src/llama.cpp/examples/sycl/win-run-llama3.bat +9 -0
- package/src/llama.cpp/ggml/include/ggml-opt.h +2 -0
- package/src/llama.cpp/ggml/include/ggml.h +11 -0
- package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.cpp +274 -0
- package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.h +27 -0
- package/src/llama.cpp/ggml/src/ggml-cann/ggml-cann.cpp +18 -2
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +1 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ops.cpp +107 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/vec.h +16 -0
- package/src/llama.cpp/ggml/src/ggml-musa/CMakeLists.txt +8 -2
- package/src/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +315 -155
- package/src/llama.cpp/ggml/src/ggml-opt.cpp +5 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +43 -12
- package/src/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +171 -112
- package/src/llama.cpp/ggml/src/ggml.c +64 -18
- package/src/llama.cpp/include/llama.h +24 -124
- package/src/llama.cpp/requirements/requirements-convert_hf_to_gguf.txt +5 -1
- package/src/llama.cpp/requirements/requirements-convert_hf_to_gguf_update.txt +5 -1
- package/src/llama.cpp/requirements/requirements-convert_lora_to_gguf.txt +2 -0
- package/src/llama.cpp/src/llama-batch.cpp +3 -1
- package/src/llama.cpp/src/llama-context.cpp +60 -110
- package/src/llama.cpp/src/llama-graph.cpp +137 -233
- package/src/llama.cpp/src/llama-graph.h +49 -7
- package/src/llama.cpp/src/llama-hparams.cpp +17 -1
- package/src/llama.cpp/src/llama-hparams.h +34 -5
- package/src/llama.cpp/src/llama-kv-cache.cpp +654 -321
- package/src/llama.cpp/src/llama-kv-cache.h +201 -85
- package/src/llama.cpp/src/llama-memory.h +3 -2
- package/src/llama.cpp/src/llama-model.cpp +273 -94
- package/src/llama.cpp/src/llama-model.h +4 -1
- package/src/llama.cpp/tests/test-arg-parser.cpp +1 -1
- package/src/llama.cpp/tools/llama-bench/llama-bench.cpp +1 -0
- package/src/llama.cpp/tools/mtmd/CMakeLists.txt +13 -2
- package/src/llama.cpp/tools/mtmd/clip-impl.h +108 -11
- package/src/llama.cpp/tools/mtmd/clip.cpp +466 -88
- package/src/llama.cpp/tools/mtmd/clip.h +6 -4
- package/src/llama.cpp/tools/mtmd/miniaudio.h +93468 -0
- package/src/llama.cpp/tools/mtmd/mtmd-audio.cpp +855 -0
- package/src/llama.cpp/tools/mtmd/mtmd-audio.h +62 -0
- package/src/llama.cpp/tools/mtmd/mtmd-cli.cpp +21 -14
- package/src/llama.cpp/tools/mtmd/mtmd-helper.cpp +36 -49
- package/src/llama.cpp/tools/mtmd/mtmd.cpp +362 -98
- package/src/llama.cpp/tools/mtmd/mtmd.h +52 -21
- package/src/llama.cpp/tools/run/run.cpp +2 -2
- package/src/llama.cpp/tools/server/server.cpp +158 -47
- package/src/llama.cpp/tools/server/utils.hpp +71 -43
- package/src/llama.cpp/tools/tts/tts.cpp +4 -2
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
package/bin/win32/arm64/node.lib
CHANGED
|
Binary file
|
|
Binary file
|
package/bin/win32/x64/node.lib
CHANGED
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
package/lib/binding.ts
CHANGED
|
@@ -6,6 +6,11 @@ export type MessagePart = {
|
|
|
6
6
|
text?: string,
|
|
7
7
|
image_url?: {
|
|
8
8
|
url?: string
|
|
9
|
+
},
|
|
10
|
+
input_audio?: {
|
|
11
|
+
format: string
|
|
12
|
+
data?: string
|
|
13
|
+
url?: string
|
|
9
14
|
}
|
|
10
15
|
}
|
|
11
16
|
|
|
@@ -103,12 +108,12 @@ export type LlamaCompletionOptions = {
|
|
|
103
108
|
grammar_triggers?: { type: number; word: string; at_start: boolean }[]
|
|
104
109
|
preserved_tokens?: string[]
|
|
105
110
|
/**
|
|
106
|
-
* Path(s) to
|
|
107
|
-
* When provided, the
|
|
111
|
+
* Path(s) to media file(s) to process before generating text.
|
|
112
|
+
* When provided, the media will be processed and added to the context.
|
|
108
113
|
* Requires multimodal support to be enabled via initMultimodal.
|
|
109
114
|
* Supports both file paths and base64 data URLs.
|
|
110
115
|
*/
|
|
111
|
-
|
|
116
|
+
media_paths?: string | string[]
|
|
112
117
|
}
|
|
113
118
|
|
|
114
119
|
export type LlamaCompletionResult = {
|
|
@@ -137,10 +142,10 @@ export type LlamaCompletionToken = {
|
|
|
137
142
|
|
|
138
143
|
export type TokenizeResult = {
|
|
139
144
|
tokens: Int32Array
|
|
140
|
-
|
|
145
|
+
has_media: boolean
|
|
141
146
|
bitmap_hashes: string[]
|
|
142
147
|
chunk_pos: number[]
|
|
143
|
-
|
|
148
|
+
chunk_pos_media: number[]
|
|
144
149
|
}
|
|
145
150
|
|
|
146
151
|
export type EmbeddingResult = {
|
|
@@ -167,7 +172,7 @@ export interface LlamaContext {
|
|
|
167
172
|
callback?: (token: LlamaCompletionToken) => void,
|
|
168
173
|
): Promise<LlamaCompletionResult>
|
|
169
174
|
stopCompletion(): void
|
|
170
|
-
tokenize(text: string,
|
|
175
|
+
tokenize(text: string, media_paths?: string[]): Promise<TokenizeResult>
|
|
171
176
|
detokenize(tokens: number[]): Promise<string>
|
|
172
177
|
embedding(text: string): Promise<EmbeddingResult>
|
|
173
178
|
saveSession(path: string): Promise<void>
|
|
@@ -189,6 +194,15 @@ export interface LlamaContext {
|
|
|
189
194
|
*/
|
|
190
195
|
isMultimodalEnabled(): Promise<boolean>
|
|
191
196
|
|
|
197
|
+
/**
|
|
198
|
+
* Get multimodal support capabilities
|
|
199
|
+
* @returns Promise resolving to an object with vision and audio support
|
|
200
|
+
*/
|
|
201
|
+
getMultimodalSupport(): Promise<{
|
|
202
|
+
vision: boolean
|
|
203
|
+
audio: boolean
|
|
204
|
+
}>
|
|
205
|
+
|
|
192
206
|
/**
|
|
193
207
|
* Release multimodal support
|
|
194
208
|
*/
|
package/lib/index.js
CHANGED
|
@@ -23,10 +23,11 @@ var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, ge
|
|
|
23
23
|
});
|
|
24
24
|
};
|
|
25
25
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
26
|
-
exports.loadLlamaModelInfo = exports.initLlama = exports.loadModel = exports.toggleNativeLog = void 0;
|
|
26
|
+
exports.loadLlamaModelInfo = exports.initLlama = exports.loadModel = exports.toggleNativeLog = exports.MTMD_DEFAULT_MEDIA_MARKER = void 0;
|
|
27
27
|
exports.addNativeLogListener = addNativeLogListener;
|
|
28
28
|
const binding_1 = require("./binding");
|
|
29
29
|
__exportStar(require("./binding"), exports);
|
|
30
|
+
exports.MTMD_DEFAULT_MEDIA_MARKER = '<__media__>';
|
|
30
31
|
const mods = {};
|
|
31
32
|
const logListeners = [];
|
|
32
33
|
const logCallback = (level, text) => {
|
|
@@ -78,13 +79,13 @@ class LlamaContextWrapper {
|
|
|
78
79
|
isLlamaChatSupported() {
|
|
79
80
|
return !!this.ctx.getModelInfo().chatTemplates.llamaChat;
|
|
80
81
|
}
|
|
81
|
-
|
|
82
|
+
_formatMediaChat(messages) {
|
|
82
83
|
if (!messages)
|
|
83
84
|
return {
|
|
84
85
|
messages,
|
|
85
|
-
|
|
86
|
+
has_media: false,
|
|
86
87
|
};
|
|
87
|
-
const
|
|
88
|
+
const mediaPaths = [];
|
|
88
89
|
return {
|
|
89
90
|
messages: messages.map((msg) => {
|
|
90
91
|
if (Array.isArray(msg.content)) {
|
|
@@ -93,10 +94,30 @@ class LlamaContextWrapper {
|
|
|
93
94
|
// Handle multimodal content
|
|
94
95
|
if (part.type === 'image_url') {
|
|
95
96
|
let path = ((_a = part.image_url) === null || _a === void 0 ? void 0 : _a.url) || '';
|
|
96
|
-
|
|
97
|
+
mediaPaths.push(path);
|
|
97
98
|
return {
|
|
98
99
|
type: 'text',
|
|
99
|
-
text:
|
|
100
|
+
text: exports.MTMD_DEFAULT_MEDIA_MARKER,
|
|
101
|
+
};
|
|
102
|
+
}
|
|
103
|
+
else if (part.type === 'input_audio') {
|
|
104
|
+
const { input_audio: audio } = part;
|
|
105
|
+
if (!audio)
|
|
106
|
+
throw new Error('input_audio is required');
|
|
107
|
+
const { format } = audio;
|
|
108
|
+
if (format != 'wav' && format != 'mp3') {
|
|
109
|
+
throw new Error(`Unsupported audio format: ${format}`);
|
|
110
|
+
}
|
|
111
|
+
if (audio.url) {
|
|
112
|
+
const path = audio.url.replace(/file:\/\//, '');
|
|
113
|
+
mediaPaths.push(path);
|
|
114
|
+
}
|
|
115
|
+
else if (audio.data) {
|
|
116
|
+
mediaPaths.push(audio.data);
|
|
117
|
+
}
|
|
118
|
+
return {
|
|
119
|
+
type: 'text',
|
|
120
|
+
text: exports.MTMD_DEFAULT_MEDIA_MARKER,
|
|
100
121
|
};
|
|
101
122
|
}
|
|
102
123
|
return part;
|
|
@@ -105,12 +126,12 @@ class LlamaContextWrapper {
|
|
|
105
126
|
}
|
|
106
127
|
return msg;
|
|
107
128
|
}),
|
|
108
|
-
|
|
109
|
-
|
|
129
|
+
has_media: mediaPaths.length > 0,
|
|
130
|
+
media_paths: mediaPaths,
|
|
110
131
|
};
|
|
111
132
|
}
|
|
112
133
|
getFormattedChat(messages, template, params) {
|
|
113
|
-
const { messages: chat,
|
|
134
|
+
const { messages: chat, has_media, media_paths, } = this._formatMediaChat(messages);
|
|
114
135
|
const useJinja = this.isJinjaSupported() && (params === null || params === void 0 ? void 0 : params.jinja);
|
|
115
136
|
let tmpl;
|
|
116
137
|
if (template)
|
|
@@ -127,25 +148,25 @@ class LlamaContextWrapper {
|
|
|
127
148
|
return {
|
|
128
149
|
type: 'llama-chat',
|
|
129
150
|
prompt: result,
|
|
130
|
-
|
|
131
|
-
|
|
151
|
+
has_media,
|
|
152
|
+
media_paths,
|
|
132
153
|
};
|
|
133
154
|
}
|
|
134
155
|
const jinjaResult = result;
|
|
135
156
|
jinjaResult.type = 'jinja';
|
|
136
|
-
jinjaResult.
|
|
137
|
-
jinjaResult.
|
|
157
|
+
jinjaResult.has_media = has_media;
|
|
158
|
+
jinjaResult.media_paths = media_paths;
|
|
138
159
|
return jinjaResult;
|
|
139
160
|
}
|
|
140
161
|
completion(options, callback) {
|
|
141
|
-
const { messages,
|
|
142
|
-
return this.ctx.completion(Object.assign(Object.assign({}, options), { messages,
|
|
162
|
+
const { messages, media_paths = options.media_paths } = this._formatMediaChat(options.messages);
|
|
163
|
+
return this.ctx.completion(Object.assign(Object.assign({}, options), { messages, media_paths: options.media_paths || media_paths }), callback || (() => { }));
|
|
143
164
|
}
|
|
144
165
|
stopCompletion() {
|
|
145
166
|
return this.ctx.stopCompletion();
|
|
146
167
|
}
|
|
147
|
-
tokenize(text, {
|
|
148
|
-
return this.ctx.tokenize(text,
|
|
168
|
+
tokenize(text, { media_paths } = {}) {
|
|
169
|
+
return this.ctx.tokenize(text, media_paths);
|
|
149
170
|
}
|
|
150
171
|
detokenize(tokens) {
|
|
151
172
|
return this.ctx.detokenize(tokens);
|
|
@@ -180,6 +201,9 @@ class LlamaContextWrapper {
|
|
|
180
201
|
releaseMultimodal() {
|
|
181
202
|
return this.ctx.releaseMultimodal();
|
|
182
203
|
}
|
|
204
|
+
getMultimodalSupport() {
|
|
205
|
+
return this.ctx.getMultimodalSupport();
|
|
206
|
+
}
|
|
183
207
|
}
|
|
184
208
|
const loadModel = (options) => __awaiter(void 0, void 0, void 0, function* () {
|
|
185
209
|
var _a, _b;
|
package/lib/index.ts
CHANGED
|
@@ -14,6 +14,8 @@ import type {
|
|
|
14
14
|
|
|
15
15
|
export * from './binding'
|
|
16
16
|
|
|
17
|
+
export const MTMD_DEFAULT_MEDIA_MARKER = '<__media__>'
|
|
18
|
+
|
|
17
19
|
export interface LlamaModelOptionsExtended extends LlamaModelOptions {
|
|
18
20
|
lib_variant?: LibVariant
|
|
19
21
|
}
|
|
@@ -63,8 +65,8 @@ const getJsonSchema = (responseFormat?: CompletionResponseFormat) => {
|
|
|
63
65
|
export type FormattedChatResult = {
|
|
64
66
|
type: 'jinja' | 'llama-chat'
|
|
65
67
|
prompt: string
|
|
66
|
-
|
|
67
|
-
|
|
68
|
+
has_media: boolean
|
|
69
|
+
media_paths?: Array<string>
|
|
68
70
|
}
|
|
69
71
|
|
|
70
72
|
class LlamaContextWrapper {
|
|
@@ -91,17 +93,17 @@ class LlamaContextWrapper {
|
|
|
91
93
|
return !!this.ctx.getModelInfo().chatTemplates.llamaChat
|
|
92
94
|
}
|
|
93
95
|
|
|
94
|
-
|
|
96
|
+
_formatMediaChat(messages: ChatMessage[] | undefined): {
|
|
95
97
|
messages: ChatMessage[] | undefined
|
|
96
|
-
|
|
97
|
-
|
|
98
|
+
has_media: boolean
|
|
99
|
+
media_paths?: string[]
|
|
98
100
|
} {
|
|
99
101
|
if (!messages)
|
|
100
102
|
return {
|
|
101
103
|
messages,
|
|
102
|
-
|
|
104
|
+
has_media: false,
|
|
103
105
|
}
|
|
104
|
-
const
|
|
106
|
+
const mediaPaths: string[] = []
|
|
105
107
|
return {
|
|
106
108
|
messages: messages.map((msg) => {
|
|
107
109
|
if (Array.isArray(msg.content)) {
|
|
@@ -109,10 +111,28 @@ class LlamaContextWrapper {
|
|
|
109
111
|
// Handle multimodal content
|
|
110
112
|
if (part.type === 'image_url') {
|
|
111
113
|
let path = part.image_url?.url || ''
|
|
112
|
-
|
|
114
|
+
mediaPaths.push(path)
|
|
113
115
|
return {
|
|
114
116
|
type: 'text',
|
|
115
|
-
text:
|
|
117
|
+
text: MTMD_DEFAULT_MEDIA_MARKER,
|
|
118
|
+
}
|
|
119
|
+
} else if (part.type === 'input_audio') {
|
|
120
|
+
const { input_audio: audio } = part
|
|
121
|
+
if (!audio) throw new Error('input_audio is required')
|
|
122
|
+
|
|
123
|
+
const { format } = audio
|
|
124
|
+
if (format != 'wav' && format != 'mp3') {
|
|
125
|
+
throw new Error(`Unsupported audio format: ${format}`)
|
|
126
|
+
}
|
|
127
|
+
if (audio.url) {
|
|
128
|
+
const path = audio.url.replace(/file:\/\//, '')
|
|
129
|
+
mediaPaths.push(path)
|
|
130
|
+
} else if (audio.data) {
|
|
131
|
+
mediaPaths.push(audio.data)
|
|
132
|
+
}
|
|
133
|
+
return {
|
|
134
|
+
type: 'text',
|
|
135
|
+
text: MTMD_DEFAULT_MEDIA_MARKER,
|
|
116
136
|
}
|
|
117
137
|
}
|
|
118
138
|
return part
|
|
@@ -125,8 +145,8 @@ class LlamaContextWrapper {
|
|
|
125
145
|
}
|
|
126
146
|
return msg
|
|
127
147
|
}),
|
|
128
|
-
|
|
129
|
-
|
|
148
|
+
has_media: mediaPaths.length > 0,
|
|
149
|
+
media_paths: mediaPaths,
|
|
130
150
|
}
|
|
131
151
|
}
|
|
132
152
|
|
|
@@ -143,9 +163,9 @@ class LlamaContextWrapper {
|
|
|
143
163
|
): FormattedChatResult {
|
|
144
164
|
const {
|
|
145
165
|
messages: chat,
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
} = this.
|
|
166
|
+
has_media,
|
|
167
|
+
media_paths,
|
|
168
|
+
} = this._formatMediaChat(messages)
|
|
149
169
|
|
|
150
170
|
const useJinja = this.isJinjaSupported() && params?.jinja
|
|
151
171
|
let tmpl
|
|
@@ -164,14 +184,14 @@ class LlamaContextWrapper {
|
|
|
164
184
|
return {
|
|
165
185
|
type: 'llama-chat',
|
|
166
186
|
prompt: result as string,
|
|
167
|
-
|
|
168
|
-
|
|
187
|
+
has_media,
|
|
188
|
+
media_paths,
|
|
169
189
|
}
|
|
170
190
|
}
|
|
171
191
|
const jinjaResult = result
|
|
172
192
|
jinjaResult.type = 'jinja'
|
|
173
|
-
jinjaResult.
|
|
174
|
-
jinjaResult.
|
|
193
|
+
jinjaResult.has_media = has_media
|
|
194
|
+
jinjaResult.media_paths = media_paths
|
|
175
195
|
return jinjaResult
|
|
176
196
|
}
|
|
177
197
|
|
|
@@ -179,12 +199,12 @@ class LlamaContextWrapper {
|
|
|
179
199
|
options: LlamaCompletionOptions,
|
|
180
200
|
callback?: (token: LlamaCompletionToken) => void,
|
|
181
201
|
): Promise<LlamaCompletionResult> {
|
|
182
|
-
const { messages,
|
|
183
|
-
this.
|
|
202
|
+
const { messages, media_paths = options.media_paths } =
|
|
203
|
+
this._formatMediaChat(options.messages)
|
|
184
204
|
return this.ctx.completion({
|
|
185
205
|
...options,
|
|
186
206
|
messages,
|
|
187
|
-
|
|
207
|
+
media_paths: options.media_paths || media_paths,
|
|
188
208
|
}, callback || (() => {}))
|
|
189
209
|
}
|
|
190
210
|
|
|
@@ -192,8 +212,8 @@ class LlamaContextWrapper {
|
|
|
192
212
|
return this.ctx.stopCompletion()
|
|
193
213
|
}
|
|
194
214
|
|
|
195
|
-
tokenize(text: string, {
|
|
196
|
-
return this.ctx.tokenize(text,
|
|
215
|
+
tokenize(text: string, { media_paths }: { media_paths?: string[] } = {}): Promise<TokenizeResult> {
|
|
216
|
+
return this.ctx.tokenize(text, media_paths)
|
|
197
217
|
}
|
|
198
218
|
|
|
199
219
|
detokenize(tokens: number[]): Promise<string> {
|
|
@@ -242,6 +262,13 @@ class LlamaContextWrapper {
|
|
|
242
262
|
releaseMultimodal(): Promise<void> {
|
|
243
263
|
return this.ctx.releaseMultimodal()
|
|
244
264
|
}
|
|
265
|
+
|
|
266
|
+
getMultimodalSupport(): Promise<{
|
|
267
|
+
vision: boolean
|
|
268
|
+
audio: boolean
|
|
269
|
+
}> {
|
|
270
|
+
return this.ctx.getMultimodalSupport()
|
|
271
|
+
}
|
|
245
272
|
}
|
|
246
273
|
|
|
247
274
|
export const loadModel = async (
|
package/package.json
CHANGED
|
@@ -29,10 +29,10 @@ LlamaCompletionWorker::LlamaCompletionWorker(
|
|
|
29
29
|
Napi::Function callback, common_params params,
|
|
30
30
|
std::vector<std::string> stop_words,
|
|
31
31
|
int32_t chat_format,
|
|
32
|
-
std::vector<std::string>
|
|
32
|
+
std::vector<std::string> media_paths)
|
|
33
33
|
: AsyncWorker(info.Env()), Deferred(info.Env()), _sess(sess),
|
|
34
34
|
_params(params), _stop_words(stop_words), _chat_format(chat_format),
|
|
35
|
-
|
|
35
|
+
_media_paths(media_paths) {
|
|
36
36
|
if (!callback.IsEmpty()) {
|
|
37
37
|
_tsfn = Napi::ThreadSafeFunction::New(info.Env(), callback,
|
|
38
38
|
"LlamaCompletionCallback", 0, 1);
|
|
@@ -64,19 +64,19 @@ void LlamaCompletionWorker::Execute() {
|
|
|
64
64
|
LlamaCppSampling sampling{common_sampler_init(model, _params.sampling),
|
|
65
65
|
common_sampler_free};
|
|
66
66
|
|
|
67
|
-
// Process
|
|
68
|
-
if (!
|
|
67
|
+
// Process media if any are provided
|
|
68
|
+
if (!_media_paths.empty()) {
|
|
69
69
|
const auto* mtmd_ctx = _sess->get_mtmd_ctx();
|
|
70
70
|
|
|
71
71
|
if (mtmd_ctx != nullptr) {
|
|
72
|
-
// Process the
|
|
72
|
+
// Process the media and get the tokens
|
|
73
73
|
try {
|
|
74
|
-
n_cur =
|
|
74
|
+
n_cur = processMediaPrompt(
|
|
75
75
|
ctx,
|
|
76
76
|
mtmd_ctx,
|
|
77
77
|
_sess,
|
|
78
78
|
_params,
|
|
79
|
-
|
|
79
|
+
_media_paths
|
|
80
80
|
);
|
|
81
81
|
} catch (const std::exception& e) {
|
|
82
82
|
SetError(e.what());
|
|
@@ -85,12 +85,12 @@ void LlamaCompletionWorker::Execute() {
|
|
|
85
85
|
}
|
|
86
86
|
|
|
87
87
|
if (n_cur <= 0) {
|
|
88
|
-
SetError("Failed to process
|
|
88
|
+
SetError("Failed to process media");
|
|
89
89
|
_sess->get_mutex().unlock();
|
|
90
90
|
return;
|
|
91
91
|
}
|
|
92
92
|
|
|
93
|
-
fprintf(stdout, "[DEBUG]
|
|
93
|
+
fprintf(stdout, "[DEBUG] Media processing successful, n_cur=%zu, tokens=%zu\n",
|
|
94
94
|
n_cur, _sess->tokens_ptr()->size());
|
|
95
95
|
|
|
96
96
|
n_input = _sess->tokens_ptr()->size();
|
|
@@ -20,7 +20,7 @@ public:
|
|
|
20
20
|
Napi::Function callback, common_params params,
|
|
21
21
|
std::vector<std::string> stop_words,
|
|
22
22
|
int32_t chat_format,
|
|
23
|
-
std::vector<std::string>
|
|
23
|
+
std::vector<std::string> media_paths = {});
|
|
24
24
|
|
|
25
25
|
~LlamaCompletionWorker();
|
|
26
26
|
|
|
@@ -44,7 +44,7 @@ private:
|
|
|
44
44
|
common_params _params;
|
|
45
45
|
std::vector<std::string> _stop_words;
|
|
46
46
|
int32_t _chat_format;
|
|
47
|
-
std::vector<std::string>
|
|
47
|
+
std::vector<std::string> _media_paths;
|
|
48
48
|
std::function<void()> _onComplete;
|
|
49
49
|
bool _has_callback = false;
|
|
50
50
|
bool _stop = false;
|
package/src/LlamaContext.cpp
CHANGED
|
@@ -135,6 +135,9 @@ void LlamaContext::Init(Napi::Env env, Napi::Object &exports) {
|
|
|
135
135
|
static_cast<napi_property_attributes>(napi_enumerable)),
|
|
136
136
|
StaticMethod<&LlamaContext::ToggleNativeLog>(
|
|
137
137
|
"toggleNativeLog",
|
|
138
|
+
static_cast<napi_property_attributes>(napi_enumerable)),
|
|
139
|
+
InstanceMethod<&LlamaContext::GetMultimodalSupport>(
|
|
140
|
+
"getMultimodalSupport",
|
|
138
141
|
static_cast<napi_property_attributes>(napi_enumerable))});
|
|
139
142
|
Napi::FunctionReference *constructor = new Napi::FunctionReference();
|
|
140
143
|
*constructor = Napi::Persistent(func);
|
|
@@ -607,22 +610,22 @@ Napi::Value LlamaContext::Completion(const Napi::CallbackInfo &info) {
|
|
|
607
610
|
}
|
|
608
611
|
}
|
|
609
612
|
|
|
610
|
-
// Process
|
|
611
|
-
std::vector<std::string>
|
|
612
|
-
if (options.Has("
|
|
613
|
-
if (options.Get("
|
|
614
|
-
auto
|
|
615
|
-
for (size_t i = 0; i <
|
|
616
|
-
|
|
613
|
+
// Process media_paths parameter
|
|
614
|
+
std::vector<std::string> media_paths;
|
|
615
|
+
if (options.Has("media_paths")) {
|
|
616
|
+
if (options.Get("media_paths").IsArray()) {
|
|
617
|
+
auto media_paths_array = options.Get("media_paths").As<Napi::Array>();
|
|
618
|
+
for (size_t i = 0; i < media_paths_array.Length(); i++) {
|
|
619
|
+
media_paths.push_back(media_paths_array.Get(i).ToString().Utf8Value());
|
|
617
620
|
}
|
|
618
|
-
} else if (options.Get("
|
|
619
|
-
|
|
621
|
+
} else if (options.Get("media_paths").IsString()) {
|
|
622
|
+
media_paths.push_back(options.Get("media_paths").ToString().Utf8Value());
|
|
620
623
|
}
|
|
621
624
|
}
|
|
622
625
|
|
|
623
|
-
// Check if multimodal is enabled when
|
|
624
|
-
if (!
|
|
625
|
-
Napi::Error::New(env, "Multimodal support must be enabled via initMultimodal to use
|
|
626
|
+
// Check if multimodal is enabled when media_paths are provided
|
|
627
|
+
if (!media_paths.empty() && !(_has_multimodal && _mtmd_ctx != nullptr)) {
|
|
628
|
+
Napi::Error::New(env, "Multimodal support must be enabled via initMultimodal to use media_paths").ThrowAsJavaScriptException();
|
|
626
629
|
return env.Undefined();
|
|
627
630
|
}
|
|
628
631
|
|
|
@@ -808,7 +811,7 @@ Napi::Value LlamaContext::Completion(const Napi::CallbackInfo &info) {
|
|
|
808
811
|
}
|
|
809
812
|
|
|
810
813
|
auto *worker =
|
|
811
|
-
new LlamaCompletionWorker(info, _sess, callback, params, stop_words, chat_format,
|
|
814
|
+
new LlamaCompletionWorker(info, _sess, callback, params, stop_words, chat_format, media_paths);
|
|
812
815
|
worker->Queue();
|
|
813
816
|
_wip = worker;
|
|
814
817
|
worker->OnComplete([this]() { _wip = nullptr; });
|
|
@@ -833,14 +836,14 @@ Napi::Value LlamaContext::Tokenize(const Napi::CallbackInfo &info) {
|
|
|
833
836
|
.ThrowAsJavaScriptException();
|
|
834
837
|
}
|
|
835
838
|
auto text = info[0].ToString().Utf8Value();
|
|
836
|
-
std::vector<std::string>
|
|
839
|
+
std::vector<std::string> media_paths;
|
|
837
840
|
if (info.Length() >= 2 && info[1].IsArray()) {
|
|
838
|
-
auto
|
|
839
|
-
for (size_t i = 0; i <
|
|
840
|
-
|
|
841
|
+
auto media_paths_array = info[1].As<Napi::Array>();
|
|
842
|
+
for (size_t i = 0; i < media_paths_array.Length(); i++) {
|
|
843
|
+
media_paths.push_back(media_paths_array.Get(i).ToString().Utf8Value());
|
|
841
844
|
}
|
|
842
845
|
}
|
|
843
|
-
auto *worker = new TokenizeWorker(info, _sess, text,
|
|
846
|
+
auto *worker = new TokenizeWorker(info, _sess, text, media_paths);
|
|
844
847
|
worker->Queue();
|
|
845
848
|
return worker->Promise();
|
|
846
849
|
}
|
|
@@ -1081,6 +1084,22 @@ Napi::Value LlamaContext::IsMultimodalEnabled(const Napi::CallbackInfo &info) {
|
|
|
1081
1084
|
return Napi::Boolean::New(info.Env(), _has_multimodal && _mtmd_ctx != nullptr);
|
|
1082
1085
|
}
|
|
1083
1086
|
|
|
1087
|
+
// getMultimodalSupport(): Promise<{ vision: boolean, audio: boolean }>
|
|
1088
|
+
Napi::Value LlamaContext::GetMultimodalSupport(const Napi::CallbackInfo &info) {
|
|
1089
|
+
Napi::Env env = info.Env();
|
|
1090
|
+
auto result = Napi::Object::New(env);
|
|
1091
|
+
|
|
1092
|
+
if (_has_multimodal && _mtmd_ctx != nullptr) {
|
|
1093
|
+
result.Set("vision", Napi::Boolean::New(env, mtmd_support_vision(_mtmd_ctx)));
|
|
1094
|
+
result.Set("audio", Napi::Boolean::New(env, mtmd_support_audio(_mtmd_ctx)));
|
|
1095
|
+
} else {
|
|
1096
|
+
result.Set("vision", Napi::Boolean::New(env, false));
|
|
1097
|
+
result.Set("audio", Napi::Boolean::New(env, false));
|
|
1098
|
+
}
|
|
1099
|
+
|
|
1100
|
+
return result;
|
|
1101
|
+
}
|
|
1102
|
+
|
|
1084
1103
|
// releaseMultimodal(): void
|
|
1085
1104
|
void LlamaContext::ReleaseMultimodal(const Napi::CallbackInfo &info) {
|
|
1086
1105
|
if (_mtmd_ctx != nullptr) {
|
package/src/LlamaContext.h
CHANGED
|
@@ -31,6 +31,7 @@ private:
|
|
|
31
31
|
// Multimodal methods
|
|
32
32
|
Napi::Value InitMultimodal(const Napi::CallbackInfo &info);
|
|
33
33
|
Napi::Value IsMultimodalEnabled(const Napi::CallbackInfo &info);
|
|
34
|
+
Napi::Value GetMultimodalSupport(const Napi::CallbackInfo &info);
|
|
34
35
|
void ReleaseMultimodal(const Napi::CallbackInfo &info);
|
|
35
36
|
|
|
36
37
|
std::string _info;
|
package/src/TokenizeWorker.cpp
CHANGED
|
@@ -2,17 +2,22 @@
|
|
|
2
2
|
#include "LlamaContext.h"
|
|
3
3
|
|
|
4
4
|
TokenizeWorker::TokenizeWorker(const Napi::CallbackInfo &info,
|
|
5
|
-
LlamaSessionPtr &sess, std::string text, std::vector<std::string>
|
|
6
|
-
: AsyncWorker(info.Env()), Deferred(info.Env()), _sess(sess), _text(text),
|
|
5
|
+
LlamaSessionPtr &sess, std::string text, std::vector<std::string> media_paths)
|
|
6
|
+
: AsyncWorker(info.Env()), Deferred(info.Env()), _sess(sess), _text(text), _media_paths(media_paths) {}
|
|
7
7
|
|
|
8
8
|
void TokenizeWorker::Execute() {
|
|
9
9
|
auto mtmd_ctx = _sess->get_mtmd_ctx();
|
|
10
|
-
if (!
|
|
11
|
-
|
|
10
|
+
if (!_media_paths.empty()) {
|
|
11
|
+
try {
|
|
12
|
+
_result = tokenizeWithMedia(mtmd_ctx, _text, _media_paths);
|
|
13
|
+
mtmd_input_chunks_free(_result.chunks);
|
|
14
|
+
} catch (const std::exception &e) {
|
|
15
|
+
SetError(e.what());
|
|
16
|
+
}
|
|
12
17
|
} else {
|
|
13
18
|
const auto tokens = common_tokenize(_sess->context(), _text, false);
|
|
14
19
|
_result.tokens = tokens;
|
|
15
|
-
_result.
|
|
20
|
+
_result.has_media = false;
|
|
16
21
|
}
|
|
17
22
|
}
|
|
18
23
|
|
|
@@ -24,9 +29,8 @@ void TokenizeWorker::OnOK() {
|
|
|
24
29
|
memcpy(tokens.Data(), _result.tokens.data(),
|
|
25
30
|
_result.tokens.size() * sizeof(llama_token));
|
|
26
31
|
result.Set("tokens", tokens);
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
32
|
+
result.Set("has_media", _result.has_media);
|
|
33
|
+
if (_result.has_media) {
|
|
30
34
|
auto bitmap_hashes = Napi::Array::New(Napi::AsyncWorker::Env(), _result.bitmap_hashes.size());
|
|
31
35
|
for (size_t i = 0; i < _result.bitmap_hashes.size(); i++) {
|
|
32
36
|
bitmap_hashes.Set(i, _result.bitmap_hashes[i]);
|
|
@@ -37,11 +41,11 @@ void TokenizeWorker::OnOK() {
|
|
|
37
41
|
chunk_pos.Set(i, _result.chunk_pos[i]);
|
|
38
42
|
}
|
|
39
43
|
result.Set("chunk_pos", chunk_pos);
|
|
40
|
-
auto
|
|
41
|
-
for (size_t i = 0; i < _result.
|
|
42
|
-
|
|
44
|
+
auto chunk_pos_media = Napi::Array::New(Napi::AsyncWorker::Env(), _result.chunk_pos_media.size());
|
|
45
|
+
for (size_t i = 0; i < _result.chunk_pos_media.size(); i++) {
|
|
46
|
+
chunk_pos_media.Set(i, _result.chunk_pos_media[i]);
|
|
43
47
|
}
|
|
44
|
-
result.Set("
|
|
48
|
+
result.Set("chunk_pos_media", chunk_pos_media);
|
|
45
49
|
}
|
|
46
50
|
Napi::Promise::Deferred::Resolve(result);
|
|
47
51
|
}
|
package/src/TokenizeWorker.h
CHANGED
|
@@ -5,7 +5,7 @@ class TokenizeWorker : public Napi::AsyncWorker,
|
|
|
5
5
|
public Napi::Promise::Deferred {
|
|
6
6
|
public:
|
|
7
7
|
TokenizeWorker(const Napi::CallbackInfo &info, LlamaSessionPtr &sess,
|
|
8
|
-
std::string text, std::vector<std::string>
|
|
8
|
+
std::string text, std::vector<std::string> media_paths);
|
|
9
9
|
|
|
10
10
|
protected:
|
|
11
11
|
void Execute();
|
|
@@ -15,6 +15,6 @@ protected:
|
|
|
15
15
|
private:
|
|
16
16
|
LlamaSessionPtr _sess;
|
|
17
17
|
std::string _text;
|
|
18
|
-
std::vector<std::string>
|
|
18
|
+
std::vector<std::string> _media_paths;
|
|
19
19
|
TokenizeResult _result;
|
|
20
20
|
};
|