@fugood/llama.node 0.4.7 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/darwin/arm64/llama-node.node +0 -0
- package/bin/darwin/x64/llama-node.node +0 -0
- package/bin/linux/arm64/llama-node.node +0 -0
- package/bin/linux/x64/llama-node.node +0 -0
- package/bin/linux-cuda/arm64/llama-node.node +0 -0
- package/bin/linux-cuda/x64/llama-node.node +0 -0
- package/bin/linux-vulkan/arm64/llama-node.node +0 -0
- package/bin/linux-vulkan/x64/llama-node.node +0 -0
- package/bin/win32/arm64/llama-node.node +0 -0
- package/bin/win32/arm64/node.lib +0 -0
- package/bin/win32/x64/llama-node.node +0 -0
- package/bin/win32/x64/node.lib +0 -0
- package/bin/win32-vulkan/arm64/llama-node.node +0 -0
- package/bin/win32-vulkan/arm64/node.lib +0 -0
- package/bin/win32-vulkan/x64/llama-node.node +0 -0
- package/bin/win32-vulkan/x64/node.lib +0 -0
- package/lib/binding.ts +20 -6
- package/lib/index.js +41 -17
- package/lib/index.ts +50 -23
- package/package.json +1 -1
- package/src/LlamaCompletionWorker.cpp +9 -9
- package/src/LlamaCompletionWorker.h +2 -2
- package/src/LlamaContext.cpp +37 -18
- package/src/LlamaContext.h +1 -0
- package/src/TokenizeWorker.cpp +16 -12
- package/src/TokenizeWorker.h +2 -2
- package/src/common.hpp +54 -50
- package/src/llama.cpp/.github/workflows/build.yml +2 -2
- package/src/llama.cpp/.github/workflows/release.yml +152 -129
- package/src/llama.cpp/.github/workflows/winget.yml +42 -0
- package/src/llama.cpp/common/arg.cpp +14 -13
- package/src/llama.cpp/common/common.cpp +4 -75
- package/src/llama.cpp/common/common.h +7 -12
- package/src/llama.cpp/examples/lookahead/lookahead.cpp +0 -13
- package/src/llama.cpp/examples/lookup/lookup.cpp +0 -11
- package/src/llama.cpp/examples/parallel/parallel.cpp +0 -9
- package/src/llama.cpp/examples/retrieval/retrieval.cpp +6 -6
- package/src/llama.cpp/examples/simple/simple.cpp +1 -1
- package/src/llama.cpp/examples/simple-chat/simple-chat.cpp +2 -2
- package/src/llama.cpp/examples/sycl/run-llama2.sh +4 -4
- package/src/llama.cpp/examples/sycl/run-llama3.sh +28 -0
- package/src/llama.cpp/examples/sycl/win-run-llama2.bat +1 -1
- package/src/llama.cpp/examples/sycl/win-run-llama3.bat +9 -0
- package/src/llama.cpp/ggml/include/ggml-opt.h +2 -0
- package/src/llama.cpp/ggml/include/ggml.h +11 -0
- package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.cpp +274 -0
- package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.h +27 -0
- package/src/llama.cpp/ggml/src/ggml-cann/ggml-cann.cpp +18 -2
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +1 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/ops.cpp +107 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/vec.h +16 -0
- package/src/llama.cpp/ggml/src/ggml-musa/CMakeLists.txt +8 -2
- package/src/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +315 -155
- package/src/llama.cpp/ggml/src/ggml-opt.cpp +5 -0
- package/src/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +43 -12
- package/src/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +171 -112
- package/src/llama.cpp/ggml/src/ggml.c +64 -18
- package/src/llama.cpp/include/llama.h +24 -124
- package/src/llama.cpp/requirements/requirements-convert_hf_to_gguf.txt +5 -1
- package/src/llama.cpp/requirements/requirements-convert_hf_to_gguf_update.txt +5 -1
- package/src/llama.cpp/requirements/requirements-convert_lora_to_gguf.txt +2 -0
- package/src/llama.cpp/src/llama-batch.cpp +3 -1
- package/src/llama.cpp/src/llama-context.cpp +60 -110
- package/src/llama.cpp/src/llama-graph.cpp +137 -233
- package/src/llama.cpp/src/llama-graph.h +49 -7
- package/src/llama.cpp/src/llama-hparams.cpp +17 -1
- package/src/llama.cpp/src/llama-hparams.h +34 -5
- package/src/llama.cpp/src/llama-kv-cache.cpp +654 -321
- package/src/llama.cpp/src/llama-kv-cache.h +201 -85
- package/src/llama.cpp/src/llama-memory.h +3 -2
- package/src/llama.cpp/src/llama-model.cpp +273 -94
- package/src/llama.cpp/src/llama-model.h +4 -1
- package/src/llama.cpp/tests/test-arg-parser.cpp +1 -1
- package/src/llama.cpp/tools/llama-bench/llama-bench.cpp +1 -0
- package/src/llama.cpp/tools/mtmd/CMakeLists.txt +13 -2
- package/src/llama.cpp/tools/mtmd/clip-impl.h +108 -11
- package/src/llama.cpp/tools/mtmd/clip.cpp +466 -88
- package/src/llama.cpp/tools/mtmd/clip.h +6 -4
- package/src/llama.cpp/tools/mtmd/miniaudio.h +93468 -0
- package/src/llama.cpp/tools/mtmd/mtmd-audio.cpp +855 -0
- package/src/llama.cpp/tools/mtmd/mtmd-audio.h +62 -0
- package/src/llama.cpp/tools/mtmd/mtmd-cli.cpp +21 -14
- package/src/llama.cpp/tools/mtmd/mtmd-helper.cpp +36 -49
- package/src/llama.cpp/tools/mtmd/mtmd.cpp +362 -98
- package/src/llama.cpp/tools/mtmd/mtmd.h +52 -21
- package/src/llama.cpp/tools/run/run.cpp +2 -2
- package/src/llama.cpp/tools/server/server.cpp +158 -47
- package/src/llama.cpp/tools/server/utils.hpp +71 -43
- package/src/llama.cpp/tools/tts/tts.cpp +4 -2
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
#pragma once
|
|
2
|
+
|
|
3
|
+
#include "ggml.h"
|
|
4
|
+
|
|
5
|
+
#include <cstdint>
|
|
6
|
+
#include <vector>
|
|
7
|
+
#include <string>
|
|
8
|
+
|
|
9
|
+
#define WHISPER_ASSERT GGML_ASSERT
|
|
10
|
+
|
|
11
|
+
#define WHISPER_SAMPLE_RATE 16000
|
|
12
|
+
#define WHISPER_N_FFT 400
|
|
13
|
+
#define WHISPER_HOP_LENGTH 160
|
|
14
|
+
#define WHISPER_CHUNK_SIZE 30
|
|
15
|
+
|
|
16
|
+
#define COMMON_SAMPLE_RATE 16000
|
|
17
|
+
|
|
18
|
+
namespace whisper_preprocessor {
|
|
19
|
+
|
|
20
|
+
struct whisper_mel {
|
|
21
|
+
int n_len;
|
|
22
|
+
int n_len_org;
|
|
23
|
+
int n_mel;
|
|
24
|
+
|
|
25
|
+
std::vector<float> data;
|
|
26
|
+
};
|
|
27
|
+
|
|
28
|
+
struct whisper_filters {
|
|
29
|
+
int32_t n_mel;
|
|
30
|
+
int32_t n_fft;
|
|
31
|
+
|
|
32
|
+
std::vector<float> data;
|
|
33
|
+
};
|
|
34
|
+
|
|
35
|
+
extern bool preprocess_audio(
|
|
36
|
+
const float * samples,
|
|
37
|
+
size_t n_samples,
|
|
38
|
+
const whisper_filters & filters,
|
|
39
|
+
std::vector<whisper_mel> & output);
|
|
40
|
+
|
|
41
|
+
} // namespace whisper_preprocessor
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
// TODO @ngxson : move this helper to mtmd-helpers.cpp
|
|
45
|
+
namespace audio_helpers {
|
|
46
|
+
|
|
47
|
+
extern bool is_audio_file(const char * buf, size_t len);
|
|
48
|
+
|
|
49
|
+
extern bool decode_audio_from_buf(
|
|
50
|
+
const unsigned char * buf_in,
|
|
51
|
+
size_t len,
|
|
52
|
+
int target_sampler_rate,
|
|
53
|
+
std::vector<float> & pcmf32_mono);
|
|
54
|
+
|
|
55
|
+
} // namespace audio_helpers
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
namespace whisper_precalc_filters {
|
|
59
|
+
|
|
60
|
+
extern whisper_preprocessor::whisper_filters get_128_bins();
|
|
61
|
+
|
|
62
|
+
} // namespace whisper_precalc_filters
|
|
@@ -37,10 +37,10 @@ static volatile bool g_is_interrupted = false;
|
|
|
37
37
|
static void show_additional_info(int /*argc*/, char ** argv) {
|
|
38
38
|
LOG(
|
|
39
39
|
"Experimental CLI for multimodal\n\n"
|
|
40
|
-
"Usage: %s [options] -m <model> --mmproj <mmproj> --image <image> -p <prompt>\n\n"
|
|
40
|
+
"Usage: %s [options] -m <model> --mmproj <mmproj> --image <image> --audio <audio> -p <prompt>\n\n"
|
|
41
41
|
" -m and --mmproj are required\n"
|
|
42
42
|
" -hf user/repo can replace both -m and --mmproj in most cases\n"
|
|
43
|
-
" --image and -p are optional, if NOT provided, the CLI will run in chat mode\n"
|
|
43
|
+
" --image, --audio and -p are optional, if NOT provided, the CLI will run in chat mode\n"
|
|
44
44
|
" to disable using GPU for mmproj model, add --no-mmproj-offload\n",
|
|
45
45
|
argv[0]
|
|
46
46
|
);
|
|
@@ -142,7 +142,7 @@ struct mtmd_cli_context {
|
|
|
142
142
|
);
|
|
143
143
|
}
|
|
144
144
|
|
|
145
|
-
bool
|
|
145
|
+
bool load_media(const std::string & fname) {
|
|
146
146
|
mtmd::bitmap bmp(mtmd_helper_bitmap_init_from_file(fname.c_str()));
|
|
147
147
|
if (!bmp.ptr) {
|
|
148
148
|
return false;
|
|
@@ -243,7 +243,7 @@ int main(int argc, char ** argv) {
|
|
|
243
243
|
common_params params;
|
|
244
244
|
params.sampling.temp = 0.2; // lower temp by default for better quality
|
|
245
245
|
|
|
246
|
-
if (!common_params_parse(argc, argv, params,
|
|
246
|
+
if (!common_params_parse(argc, argv, params, LLAMA_EXAMPLE_MTMD, show_additional_info)) {
|
|
247
247
|
return 1;
|
|
248
248
|
}
|
|
249
249
|
|
|
@@ -283,14 +283,14 @@ int main(int argc, char ** argv) {
|
|
|
283
283
|
|
|
284
284
|
if (is_single_turn) {
|
|
285
285
|
g_is_generating = true;
|
|
286
|
-
if (params.prompt.find(
|
|
287
|
-
params.prompt +=
|
|
286
|
+
if (params.prompt.find(mtmd_default_marker()) == std::string::npos) {
|
|
287
|
+
params.prompt += mtmd_default_marker();
|
|
288
288
|
}
|
|
289
289
|
common_chat_msg msg;
|
|
290
290
|
msg.role = "user";
|
|
291
291
|
msg.content = params.prompt;
|
|
292
292
|
for (const auto & image : params.image) {
|
|
293
|
-
if (!ctx.
|
|
293
|
+
if (!ctx.load_media(image)) {
|
|
294
294
|
return 1; // error is already printed by libmtmd
|
|
295
295
|
}
|
|
296
296
|
}
|
|
@@ -303,7 +303,12 @@ int main(int argc, char ** argv) {
|
|
|
303
303
|
|
|
304
304
|
} else {
|
|
305
305
|
LOG("\n Running in chat mode, available commands:");
|
|
306
|
-
|
|
306
|
+
if (mtmd_support_vision(ctx.ctx_vision.get())) {
|
|
307
|
+
LOG("\n /image <path> load an image");
|
|
308
|
+
}
|
|
309
|
+
if (mtmd_support_audio(ctx.ctx_vision.get())) {
|
|
310
|
+
LOG("\n /audio <path> load an audio");
|
|
311
|
+
}
|
|
307
312
|
LOG("\n /clear clear the chat history");
|
|
308
313
|
LOG("\n /quit or /exit exit the program");
|
|
309
314
|
LOG("\n");
|
|
@@ -333,15 +338,17 @@ int main(int argc, char ** argv) {
|
|
|
333
338
|
continue;
|
|
334
339
|
}
|
|
335
340
|
g_is_generating = true;
|
|
336
|
-
|
|
341
|
+
bool is_image = line == "/image" || line.find("/image ") == 0;
|
|
342
|
+
bool is_audio = line == "/audio" || line.find("/audio ") == 0;
|
|
343
|
+
if (is_image || is_audio) {
|
|
337
344
|
if (line.size() < 8) {
|
|
338
|
-
LOG_ERR("ERR: Missing
|
|
345
|
+
LOG_ERR("ERR: Missing media filename\n");
|
|
339
346
|
continue;
|
|
340
347
|
}
|
|
341
|
-
std::string
|
|
342
|
-
if (ctx.
|
|
343
|
-
LOG("
|
|
344
|
-
content +=
|
|
348
|
+
std::string media_path = line.substr(7);
|
|
349
|
+
if (ctx.load_media(media_path)) {
|
|
350
|
+
LOG("%s %s loaded\n", media_path.c_str(), is_image ? "image" : "audio");
|
|
351
|
+
content += mtmd_default_marker();
|
|
345
352
|
}
|
|
346
353
|
// else, error is already printed by libmtmd
|
|
347
354
|
continue;
|
|
@@ -12,17 +12,7 @@ size_t mtmd_helper_get_n_tokens(const mtmd_input_chunks * chunks) {
|
|
|
12
12
|
size_t n_tokens = 0;
|
|
13
13
|
for (size_t i = 0; i < mtmd_input_chunks_size(chunks); i++) {
|
|
14
14
|
auto chunk = mtmd_input_chunks_get(chunks, i);
|
|
15
|
-
|
|
16
|
-
if (chunk_type == MTMD_INPUT_CHUNK_TYPE_TEXT) {
|
|
17
|
-
size_t n_tokens_text;
|
|
18
|
-
mtmd_input_chunk_get_tokens_text(chunk, &n_tokens_text);
|
|
19
|
-
n_tokens += n_tokens_text;
|
|
20
|
-
} else if (chunk_type == MTMD_INPUT_CHUNK_TYPE_IMAGE) {
|
|
21
|
-
auto tokens_image = mtmd_input_chunk_get_tokens_image(chunk);
|
|
22
|
-
n_tokens += mtmd_image_tokens_get_n_tokens(tokens_image);
|
|
23
|
-
} else {
|
|
24
|
-
GGML_ASSERT(false && "chunk type not supported");
|
|
25
|
-
}
|
|
15
|
+
n_tokens += mtmd_input_chunk_get_n_tokens(chunk);
|
|
26
16
|
}
|
|
27
17
|
return n_tokens;
|
|
28
18
|
}
|
|
@@ -31,17 +21,7 @@ llama_pos mtmd_helper_get_n_pos(const mtmd_input_chunks * chunks) {
|
|
|
31
21
|
llama_pos n_pos = 0;
|
|
32
22
|
for (size_t i = 0; i < mtmd_input_chunks_size(chunks); i++) {
|
|
33
23
|
auto chunk = mtmd_input_chunks_get(chunks, i);
|
|
34
|
-
|
|
35
|
-
if (chunk_type == MTMD_INPUT_CHUNK_TYPE_TEXT) {
|
|
36
|
-
size_t n_tokens_text;
|
|
37
|
-
mtmd_input_chunk_get_tokens_text(chunk, &n_tokens_text);
|
|
38
|
-
n_pos += n_tokens_text;
|
|
39
|
-
} else if (chunk_type == MTMD_INPUT_CHUNK_TYPE_IMAGE) {
|
|
40
|
-
auto tokens_image = mtmd_input_chunk_get_tokens_image(chunk);
|
|
41
|
-
n_pos += mtmd_image_tokens_get_n_pos(tokens_image);
|
|
42
|
-
} else {
|
|
43
|
-
GGML_ASSERT(false && "chunk type not supported");
|
|
44
|
-
}
|
|
24
|
+
n_pos += mtmd_input_chunk_get_n_pos(chunk);
|
|
45
25
|
}
|
|
46
26
|
return n_pos;
|
|
47
27
|
}
|
|
@@ -149,13 +129,10 @@ int32_t mtmd_helper_decode_image_chunk(
|
|
|
149
129
|
llama_seq_id seq_id,
|
|
150
130
|
int32_t n_batch,
|
|
151
131
|
llama_pos * new_n_past) {
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
const auto image_tokens = mtmd_input_chunk_get_tokens_image(chunk);
|
|
157
|
-
if (!image_tokens) {
|
|
158
|
-
LOG_ERR("failed to decode image chunk: image tokens are null\n");
|
|
132
|
+
auto chunk_type = mtmd_input_chunk_get_type(chunk);
|
|
133
|
+
const char * name = chunk_type == MTMD_INPUT_CHUNK_TYPE_IMAGE ? "image" : "audio";
|
|
134
|
+
if (chunk_type == MTMD_INPUT_CHUNK_TYPE_TEXT) {
|
|
135
|
+
LOG_ERR("failed to decode chunk: input chunk not of image/audio type\n");
|
|
159
136
|
return -1;
|
|
160
137
|
}
|
|
161
138
|
|
|
@@ -163,15 +140,23 @@ int32_t mtmd_helper_decode_image_chunk(
|
|
|
163
140
|
int n_mmproj_embd = llama_model_n_embd(model);
|
|
164
141
|
int n_pos_per_embd = mtmd_decode_use_mrope(ctx) ? 4 : 1;
|
|
165
142
|
|
|
166
|
-
int32_t n_tokens =
|
|
143
|
+
int32_t n_tokens = mtmd_input_chunk_get_n_tokens(chunk);
|
|
167
144
|
int32_t i_batch = 0;
|
|
168
145
|
int32_t n_img_batches = GGML_PAD(n_tokens, n_batch) / n_batch;
|
|
169
146
|
decode_embd_batch batch_embd(encoded_embd, n_tokens, n_pos_per_embd, n_mmproj_embd);
|
|
170
147
|
|
|
171
|
-
const int nx = mtmd_image_tokens_get_nx(image_tokens);
|
|
172
|
-
const int ny = mtmd_image_tokens_get_ny(image_tokens);
|
|
173
|
-
|
|
174
148
|
if (mtmd_decode_use_mrope(ctx)) {
|
|
149
|
+
const auto image_tokens = mtmd_input_chunk_get_tokens_image(chunk);
|
|
150
|
+
if (chunk_type != MTMD_INPUT_CHUNK_TYPE_IMAGE) {
|
|
151
|
+
LOG_ERR("failed to decode chunk: M-RoPE only accepts image chunk\n");
|
|
152
|
+
return -1;
|
|
153
|
+
}
|
|
154
|
+
if (!image_tokens) {
|
|
155
|
+
LOG_ERR("failed to decode chunk: image tokens are null\n");
|
|
156
|
+
return -1;
|
|
157
|
+
}
|
|
158
|
+
const int nx = mtmd_image_tokens_get_nx(image_tokens);
|
|
159
|
+
const int ny = mtmd_image_tokens_get_ny(image_tokens);
|
|
175
160
|
batch_embd.set_position_mrope(n_past, nx, ny, seq_id);
|
|
176
161
|
} else {
|
|
177
162
|
batch_embd.set_position_normal(n_past, seq_id);
|
|
@@ -187,22 +172,22 @@ int32_t mtmd_helper_decode_image_chunk(
|
|
|
187
172
|
int n_tokens_batch = std::min(n_batch, n_tokens - pos_offset);
|
|
188
173
|
llama_batch batch_embd_view = batch_embd.get_view(pos_offset, n_tokens_batch);
|
|
189
174
|
|
|
190
|
-
LOG_INF("decoding
|
|
175
|
+
LOG_INF("decoding %s batch %d/%d, n_tokens_batch = %d\n", name, i_batch+1, n_img_batches, n_tokens_batch);
|
|
191
176
|
|
|
192
177
|
int64_t t1 = ggml_time_ms();
|
|
193
178
|
int32_t ret = llama_decode(lctx, batch_embd_view);
|
|
194
179
|
if (ret != 0) {
|
|
195
|
-
LOG_ERR("failed to decode
|
|
180
|
+
LOG_ERR("failed to decode %s\n", name);
|
|
196
181
|
llama_set_causal_attn(lctx, true); // restore causal attn
|
|
197
182
|
return ret;
|
|
198
183
|
}
|
|
199
184
|
|
|
200
|
-
LOG_INF("
|
|
185
|
+
LOG_INF("%s decoded (batch %d/%d) in %" PRId64 " ms\n", name, i_batch+1, n_img_batches, ggml_time_ms() - t1);
|
|
201
186
|
|
|
202
187
|
i_batch++;
|
|
203
188
|
}
|
|
204
189
|
|
|
205
|
-
n_past +=
|
|
190
|
+
n_past += mtmd_input_chunk_get_n_pos(chunk);
|
|
206
191
|
*new_n_past = n_past;
|
|
207
192
|
|
|
208
193
|
if (mtmd_decode_use_non_causal(ctx)) {
|
|
@@ -231,12 +216,14 @@ int32_t mtmd_helper_eval_chunk_single(mtmd_context * ctx,
|
|
|
231
216
|
while (i < n_tokens) { // split into batches
|
|
232
217
|
text_batch.n_tokens = 0; // clear the batch
|
|
233
218
|
for (; i < n_tokens && text_batch.n_tokens < n_batch; i++) {
|
|
219
|
+
int32_t j = text_batch.n_tokens;
|
|
220
|
+
text_batch.token [j] = tokens[i];
|
|
221
|
+
text_batch.pos [j] = n_past++;
|
|
222
|
+
text_batch.n_seq_id[j] = 1;
|
|
223
|
+
text_batch.seq_id [j][0] = seq_id;
|
|
224
|
+
text_batch.logits [j] = false;
|
|
225
|
+
|
|
234
226
|
text_batch.n_tokens++;
|
|
235
|
-
text_batch.token [i] = tokens[i];
|
|
236
|
-
text_batch.pos [i] = n_past++;
|
|
237
|
-
text_batch.n_seq_id[i] = 1;
|
|
238
|
-
text_batch.seq_id [i][0] = seq_id;
|
|
239
|
-
text_batch.logits [i] = false;
|
|
240
227
|
}
|
|
241
228
|
bool is_last_token = (i == n_tokens);
|
|
242
229
|
if (logits_last && is_last_token) {
|
|
@@ -251,25 +238,25 @@ int32_t mtmd_helper_eval_chunk_single(mtmd_context * ctx,
|
|
|
251
238
|
*new_n_past += text_batch.n_tokens;
|
|
252
239
|
}
|
|
253
240
|
|
|
254
|
-
} else if (chunk_type == MTMD_INPUT_CHUNK_TYPE_IMAGE) {
|
|
255
|
-
const
|
|
241
|
+
} else if (chunk_type == MTMD_INPUT_CHUNK_TYPE_IMAGE || chunk_type == MTMD_INPUT_CHUNK_TYPE_AUDIO) {
|
|
242
|
+
const char * name = chunk_type == MTMD_INPUT_CHUNK_TYPE_IMAGE ? "image" : "audio";
|
|
256
243
|
int64_t t0 = ggml_time_ms();
|
|
257
244
|
|
|
258
|
-
LOG_INF("encoding
|
|
245
|
+
LOG_INF("encoding %s slice...\n", name);
|
|
259
246
|
|
|
260
|
-
ret =
|
|
247
|
+
ret = mtmd_encode_chunk(ctx, chunk);
|
|
261
248
|
if (ret != 0) {
|
|
262
|
-
LOG_ERR("failed to encode
|
|
249
|
+
LOG_ERR("failed to encode %s slice\n", name);
|
|
263
250
|
llama_batch_free(text_batch);
|
|
264
251
|
return ret;
|
|
265
252
|
}
|
|
266
253
|
|
|
267
|
-
LOG_INF("
|
|
254
|
+
LOG_INF("%s slice encoded in %" PRId64 " ms\n", name, ggml_time_ms() - t0);
|
|
268
255
|
|
|
269
256
|
float * embd = mtmd_get_output_embd(ctx);
|
|
270
257
|
ret = mtmd_helper_decode_image_chunk(ctx, lctx, chunk, embd, n_past, seq_id, n_batch, new_n_past);
|
|
271
258
|
if (ret != 0) {
|
|
272
|
-
LOG_ERR("failed to decode
|
|
259
|
+
LOG_ERR("failed to decode %s\n", name);
|
|
273
260
|
llama_batch_free(text_batch);
|
|
274
261
|
return ret;
|
|
275
262
|
}
|