@fugood/llama.node 0.4.7 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (98) hide show
  1. package/CMakeLists.txt +4 -0
  2. package/bin/darwin/arm64/llama-node.node +0 -0
  3. package/bin/darwin/x64/llama-node.node +0 -0
  4. package/bin/linux/arm64/llama-node.node +0 -0
  5. package/bin/linux/x64/llama-node.node +0 -0
  6. package/bin/linux-cuda/arm64/llama-node.node +0 -0
  7. package/bin/linux-cuda/x64/llama-node.node +0 -0
  8. package/bin/linux-vulkan/arm64/llama-node.node +0 -0
  9. package/bin/linux-vulkan/x64/llama-node.node +0 -0
  10. package/lib/binding.ts +66 -6
  11. package/lib/index.js +59 -17
  12. package/lib/index.ts +74 -23
  13. package/package.json +1 -1
  14. package/src/DecodeAudioTokenWorker.cpp +40 -0
  15. package/src/DecodeAudioTokenWorker.h +22 -0
  16. package/src/EmbeddingWorker.cpp +7 -5
  17. package/src/LlamaCompletionWorker.cpp +68 -54
  18. package/src/LlamaCompletionWorker.h +7 -8
  19. package/src/LlamaContext.cpp +551 -235
  20. package/src/LlamaContext.h +26 -4
  21. package/src/LoadSessionWorker.cpp +4 -2
  22. package/src/SaveSessionWorker.cpp +10 -6
  23. package/src/TokenizeWorker.cpp +23 -14
  24. package/src/TokenizeWorker.h +2 -2
  25. package/src/addons.cc +8 -11
  26. package/src/common.hpp +129 -126
  27. package/src/llama.cpp/.github/workflows/build.yml +2 -2
  28. package/src/llama.cpp/.github/workflows/release.yml +152 -129
  29. package/src/llama.cpp/.github/workflows/winget.yml +42 -0
  30. package/src/llama.cpp/common/arg.cpp +14 -13
  31. package/src/llama.cpp/common/common.cpp +4 -75
  32. package/src/llama.cpp/common/common.h +7 -12
  33. package/src/llama.cpp/examples/lookahead/lookahead.cpp +0 -13
  34. package/src/llama.cpp/examples/lookup/lookup.cpp +0 -11
  35. package/src/llama.cpp/examples/parallel/parallel.cpp +0 -9
  36. package/src/llama.cpp/examples/retrieval/retrieval.cpp +6 -6
  37. package/src/llama.cpp/examples/simple/simple.cpp +1 -1
  38. package/src/llama.cpp/examples/simple-chat/simple-chat.cpp +2 -2
  39. package/src/llama.cpp/examples/sycl/run-llama2.sh +4 -4
  40. package/src/llama.cpp/examples/sycl/run-llama3.sh +28 -0
  41. package/src/llama.cpp/examples/sycl/win-run-llama2.bat +1 -1
  42. package/src/llama.cpp/examples/sycl/win-run-llama3.bat +9 -0
  43. package/src/llama.cpp/ggml/include/ggml-opt.h +2 -0
  44. package/src/llama.cpp/ggml/include/ggml.h +11 -0
  45. package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.cpp +274 -0
  46. package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.h +27 -0
  47. package/src/llama.cpp/ggml/src/ggml-cann/ggml-cann.cpp +18 -2
  48. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +1 -0
  49. package/src/llama.cpp/ggml/src/ggml-cpu/ops.cpp +107 -0
  50. package/src/llama.cpp/ggml/src/ggml-cpu/vec.h +16 -0
  51. package/src/llama.cpp/ggml/src/ggml-musa/CMakeLists.txt +8 -2
  52. package/src/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +315 -155
  53. package/src/llama.cpp/ggml/src/ggml-opt.cpp +5 -0
  54. package/src/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +43 -12
  55. package/src/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +171 -112
  56. package/src/llama.cpp/ggml/src/ggml.c +64 -18
  57. package/src/llama.cpp/include/llama.h +24 -124
  58. package/src/llama.cpp/requirements/requirements-convert_hf_to_gguf.txt +5 -1
  59. package/src/llama.cpp/requirements/requirements-convert_hf_to_gguf_update.txt +5 -1
  60. package/src/llama.cpp/requirements/requirements-convert_lora_to_gguf.txt +2 -0
  61. package/src/llama.cpp/src/llama-batch.cpp +3 -1
  62. package/src/llama.cpp/src/llama-context.cpp +60 -110
  63. package/src/llama.cpp/src/llama-graph.cpp +137 -233
  64. package/src/llama.cpp/src/llama-graph.h +49 -7
  65. package/src/llama.cpp/src/llama-hparams.cpp +17 -1
  66. package/src/llama.cpp/src/llama-hparams.h +34 -5
  67. package/src/llama.cpp/src/llama-kv-cache.cpp +654 -321
  68. package/src/llama.cpp/src/llama-kv-cache.h +201 -85
  69. package/src/llama.cpp/src/llama-memory.h +3 -2
  70. package/src/llama.cpp/src/llama-model.cpp +273 -94
  71. package/src/llama.cpp/src/llama-model.h +4 -1
  72. package/src/llama.cpp/tests/test-arg-parser.cpp +1 -1
  73. package/src/llama.cpp/tools/llama-bench/llama-bench.cpp +1 -0
  74. package/src/llama.cpp/tools/mtmd/CMakeLists.txt +13 -2
  75. package/src/llama.cpp/tools/mtmd/clip-impl.h +108 -11
  76. package/src/llama.cpp/tools/mtmd/clip.cpp +466 -88
  77. package/src/llama.cpp/tools/mtmd/clip.h +6 -4
  78. package/src/llama.cpp/tools/mtmd/miniaudio.h +93468 -0
  79. package/src/llama.cpp/tools/mtmd/mtmd-audio.cpp +855 -0
  80. package/src/llama.cpp/tools/mtmd/mtmd-audio.h +62 -0
  81. package/src/llama.cpp/tools/mtmd/mtmd-cli.cpp +21 -14
  82. package/src/llama.cpp/tools/mtmd/mtmd-helper.cpp +36 -49
  83. package/src/llama.cpp/tools/mtmd/mtmd.cpp +362 -98
  84. package/src/llama.cpp/tools/mtmd/mtmd.h +52 -21
  85. package/src/llama.cpp/tools/run/run.cpp +2 -2
  86. package/src/llama.cpp/tools/server/server.cpp +158 -47
  87. package/src/llama.cpp/tools/server/utils.hpp +71 -43
  88. package/src/llama.cpp/tools/tts/tts.cpp +4 -2
  89. package/src/tts_utils.cpp +342 -0
  90. package/src/tts_utils.h +62 -0
  91. package/bin/win32/arm64/llama-node.node +0 -0
  92. package/bin/win32/arm64/node.lib +0 -0
  93. package/bin/win32/x64/llama-node.node +0 -0
  94. package/bin/win32/x64/node.lib +0 -0
  95. package/bin/win32-vulkan/arm64/llama-node.node +0 -0
  96. package/bin/win32-vulkan/arm64/node.lib +0 -0
  97. package/bin/win32-vulkan/x64/llama-node.node +0 -0
  98. package/bin/win32-vulkan/x64/node.lib +0 -0
@@ -0,0 +1,62 @@
1
+ #pragma once
2
+
3
+ #include "ggml.h"
4
+
5
+ #include <cstdint>
6
+ #include <vector>
7
+ #include <string>
8
+
9
+ #define WHISPER_ASSERT GGML_ASSERT
10
+
11
+ #define WHISPER_SAMPLE_RATE 16000
12
+ #define WHISPER_N_FFT 400
13
+ #define WHISPER_HOP_LENGTH 160
14
+ #define WHISPER_CHUNK_SIZE 30
15
+
16
+ #define COMMON_SAMPLE_RATE 16000
17
+
18
+ namespace whisper_preprocessor {
19
+
20
+ struct whisper_mel {
21
+ int n_len;
22
+ int n_len_org;
23
+ int n_mel;
24
+
25
+ std::vector<float> data;
26
+ };
27
+
28
+ struct whisper_filters {
29
+ int32_t n_mel;
30
+ int32_t n_fft;
31
+
32
+ std::vector<float> data;
33
+ };
34
+
35
+ extern bool preprocess_audio(
36
+ const float * samples,
37
+ size_t n_samples,
38
+ const whisper_filters & filters,
39
+ std::vector<whisper_mel> & output);
40
+
41
+ } // namespace whisper_preprocessor
42
+
43
+
44
+ // TODO @ngxson : move this helper to mtmd-helpers.cpp
45
+ namespace audio_helpers {
46
+
47
+ extern bool is_audio_file(const char * buf, size_t len);
48
+
49
+ extern bool decode_audio_from_buf(
50
+ const unsigned char * buf_in,
51
+ size_t len,
52
+ int target_sampler_rate,
53
+ std::vector<float> & pcmf32_mono);
54
+
55
+ } // namespace audio_helpers
56
+
57
+
58
+ namespace whisper_precalc_filters {
59
+
60
+ extern whisper_preprocessor::whisper_filters get_128_bins();
61
+
62
+ } // namespace whisper_precalc_filters
@@ -37,10 +37,10 @@ static volatile bool g_is_interrupted = false;
37
37
  static void show_additional_info(int /*argc*/, char ** argv) {
38
38
  LOG(
39
39
  "Experimental CLI for multimodal\n\n"
40
- "Usage: %s [options] -m <model> --mmproj <mmproj> --image <image> -p <prompt>\n\n"
40
+ "Usage: %s [options] -m <model> --mmproj <mmproj> --image <image> --audio <audio> -p <prompt>\n\n"
41
41
  " -m and --mmproj are required\n"
42
42
  " -hf user/repo can replace both -m and --mmproj in most cases\n"
43
- " --image and -p are optional, if NOT provided, the CLI will run in chat mode\n"
43
+ " --image, --audio and -p are optional, if NOT provided, the CLI will run in chat mode\n"
44
44
  " to disable using GPU for mmproj model, add --no-mmproj-offload\n",
45
45
  argv[0]
46
46
  );
@@ -142,7 +142,7 @@ struct mtmd_cli_context {
142
142
  );
143
143
  }
144
144
 
145
- bool load_image(const std::string & fname) {
145
+ bool load_media(const std::string & fname) {
146
146
  mtmd::bitmap bmp(mtmd_helper_bitmap_init_from_file(fname.c_str()));
147
147
  if (!bmp.ptr) {
148
148
  return false;
@@ -243,7 +243,7 @@ int main(int argc, char ** argv) {
243
243
  common_params params;
244
244
  params.sampling.temp = 0.2; // lower temp by default for better quality
245
245
 
246
- if (!common_params_parse(argc, argv, params, LLAMA_EXAMPLE_LLAVA, show_additional_info)) {
246
+ if (!common_params_parse(argc, argv, params, LLAMA_EXAMPLE_MTMD, show_additional_info)) {
247
247
  return 1;
248
248
  }
249
249
 
@@ -283,14 +283,14 @@ int main(int argc, char ** argv) {
283
283
 
284
284
  if (is_single_turn) {
285
285
  g_is_generating = true;
286
- if (params.prompt.find("<__image__>") == std::string::npos) {
287
- params.prompt += " <__image__>";
286
+ if (params.prompt.find(mtmd_default_marker()) == std::string::npos) {
287
+ params.prompt += mtmd_default_marker();
288
288
  }
289
289
  common_chat_msg msg;
290
290
  msg.role = "user";
291
291
  msg.content = params.prompt;
292
292
  for (const auto & image : params.image) {
293
- if (!ctx.load_image(image)) {
293
+ if (!ctx.load_media(image)) {
294
294
  return 1; // error is already printed by libmtmd
295
295
  }
296
296
  }
@@ -303,7 +303,12 @@ int main(int argc, char ** argv) {
303
303
 
304
304
  } else {
305
305
  LOG("\n Running in chat mode, available commands:");
306
- LOG("\n /image <path> load an image");
306
+ if (mtmd_support_vision(ctx.ctx_vision.get())) {
307
+ LOG("\n /image <path> load an image");
308
+ }
309
+ if (mtmd_support_audio(ctx.ctx_vision.get())) {
310
+ LOG("\n /audio <path> load an audio");
311
+ }
307
312
  LOG("\n /clear clear the chat history");
308
313
  LOG("\n /quit or /exit exit the program");
309
314
  LOG("\n");
@@ -333,15 +338,17 @@ int main(int argc, char ** argv) {
333
338
  continue;
334
339
  }
335
340
  g_is_generating = true;
336
- if (line == "/image" || line.find("/image ") == 0) {
341
+ bool is_image = line == "/image" || line.find("/image ") == 0;
342
+ bool is_audio = line == "/audio" || line.find("/audio ") == 0;
343
+ if (is_image || is_audio) {
337
344
  if (line.size() < 8) {
338
- LOG_ERR("ERR: Missing image filename\n");
345
+ LOG_ERR("ERR: Missing media filename\n");
339
346
  continue;
340
347
  }
341
- std::string image = line.substr(7);
342
- if (ctx.load_image(image)) {
343
- LOG("Image %s loaded\n", image.c_str());
344
- content += "<__image__>";
348
+ std::string media_path = line.substr(7);
349
+ if (ctx.load_media(media_path)) {
350
+ LOG("%s %s loaded\n", media_path.c_str(), is_image ? "image" : "audio");
351
+ content += mtmd_default_marker();
345
352
  }
346
353
  // else, error is already printed by libmtmd
347
354
  continue;
@@ -12,17 +12,7 @@ size_t mtmd_helper_get_n_tokens(const mtmd_input_chunks * chunks) {
12
12
  size_t n_tokens = 0;
13
13
  for (size_t i = 0; i < mtmd_input_chunks_size(chunks); i++) {
14
14
  auto chunk = mtmd_input_chunks_get(chunks, i);
15
- auto chunk_type = mtmd_input_chunk_get_type(chunk);
16
- if (chunk_type == MTMD_INPUT_CHUNK_TYPE_TEXT) {
17
- size_t n_tokens_text;
18
- mtmd_input_chunk_get_tokens_text(chunk, &n_tokens_text);
19
- n_tokens += n_tokens_text;
20
- } else if (chunk_type == MTMD_INPUT_CHUNK_TYPE_IMAGE) {
21
- auto tokens_image = mtmd_input_chunk_get_tokens_image(chunk);
22
- n_tokens += mtmd_image_tokens_get_n_tokens(tokens_image);
23
- } else {
24
- GGML_ASSERT(false && "chunk type not supported");
25
- }
15
+ n_tokens += mtmd_input_chunk_get_n_tokens(chunk);
26
16
  }
27
17
  return n_tokens;
28
18
  }
@@ -31,17 +21,7 @@ llama_pos mtmd_helper_get_n_pos(const mtmd_input_chunks * chunks) {
31
21
  llama_pos n_pos = 0;
32
22
  for (size_t i = 0; i < mtmd_input_chunks_size(chunks); i++) {
33
23
  auto chunk = mtmd_input_chunks_get(chunks, i);
34
- auto chunk_type = mtmd_input_chunk_get_type(chunk);
35
- if (chunk_type == MTMD_INPUT_CHUNK_TYPE_TEXT) {
36
- size_t n_tokens_text;
37
- mtmd_input_chunk_get_tokens_text(chunk, &n_tokens_text);
38
- n_pos += n_tokens_text;
39
- } else if (chunk_type == MTMD_INPUT_CHUNK_TYPE_IMAGE) {
40
- auto tokens_image = mtmd_input_chunk_get_tokens_image(chunk);
41
- n_pos += mtmd_image_tokens_get_n_pos(tokens_image);
42
- } else {
43
- GGML_ASSERT(false && "chunk type not supported");
44
- }
24
+ n_pos += mtmd_input_chunk_get_n_pos(chunk);
45
25
  }
46
26
  return n_pos;
47
27
  }
@@ -149,13 +129,10 @@ int32_t mtmd_helper_decode_image_chunk(
149
129
  llama_seq_id seq_id,
150
130
  int32_t n_batch,
151
131
  llama_pos * new_n_past) {
152
- if (mtmd_input_chunk_get_type(chunk) != MTMD_INPUT_CHUNK_TYPE_IMAGE) {
153
- LOG_ERR("failed to decode image chunk: input chunk not of image type\n");
154
- return -1;
155
- }
156
- const auto image_tokens = mtmd_input_chunk_get_tokens_image(chunk);
157
- if (!image_tokens) {
158
- LOG_ERR("failed to decode image chunk: image tokens are null\n");
132
+ auto chunk_type = mtmd_input_chunk_get_type(chunk);
133
+ const char * name = chunk_type == MTMD_INPUT_CHUNK_TYPE_IMAGE ? "image" : "audio";
134
+ if (chunk_type == MTMD_INPUT_CHUNK_TYPE_TEXT) {
135
+ LOG_ERR("failed to decode chunk: input chunk not of image/audio type\n");
159
136
  return -1;
160
137
  }
161
138
 
@@ -163,15 +140,23 @@ int32_t mtmd_helper_decode_image_chunk(
163
140
  int n_mmproj_embd = llama_model_n_embd(model);
164
141
  int n_pos_per_embd = mtmd_decode_use_mrope(ctx) ? 4 : 1;
165
142
 
166
- int32_t n_tokens = mtmd_image_tokens_get_n_tokens(image_tokens);
143
+ int32_t n_tokens = mtmd_input_chunk_get_n_tokens(chunk);
167
144
  int32_t i_batch = 0;
168
145
  int32_t n_img_batches = GGML_PAD(n_tokens, n_batch) / n_batch;
169
146
  decode_embd_batch batch_embd(encoded_embd, n_tokens, n_pos_per_embd, n_mmproj_embd);
170
147
 
171
- const int nx = mtmd_image_tokens_get_nx(image_tokens);
172
- const int ny = mtmd_image_tokens_get_ny(image_tokens);
173
-
174
148
  if (mtmd_decode_use_mrope(ctx)) {
149
+ const auto image_tokens = mtmd_input_chunk_get_tokens_image(chunk);
150
+ if (chunk_type != MTMD_INPUT_CHUNK_TYPE_IMAGE) {
151
+ LOG_ERR("failed to decode chunk: M-RoPE only accepts image chunk\n");
152
+ return -1;
153
+ }
154
+ if (!image_tokens) {
155
+ LOG_ERR("failed to decode chunk: image tokens are null\n");
156
+ return -1;
157
+ }
158
+ const int nx = mtmd_image_tokens_get_nx(image_tokens);
159
+ const int ny = mtmd_image_tokens_get_ny(image_tokens);
175
160
  batch_embd.set_position_mrope(n_past, nx, ny, seq_id);
176
161
  } else {
177
162
  batch_embd.set_position_normal(n_past, seq_id);
@@ -187,22 +172,22 @@ int32_t mtmd_helper_decode_image_chunk(
187
172
  int n_tokens_batch = std::min(n_batch, n_tokens - pos_offset);
188
173
  llama_batch batch_embd_view = batch_embd.get_view(pos_offset, n_tokens_batch);
189
174
 
190
- LOG_INF("decoding image batch %d/%d, n_tokens_batch = %d\n", i_batch+1, n_img_batches, n_tokens_batch);
175
+ LOG_INF("decoding %s batch %d/%d, n_tokens_batch = %d\n", name, i_batch+1, n_img_batches, n_tokens_batch);
191
176
 
192
177
  int64_t t1 = ggml_time_ms();
193
178
  int32_t ret = llama_decode(lctx, batch_embd_view);
194
179
  if (ret != 0) {
195
- LOG_ERR("failed to decode image\n");
180
+ LOG_ERR("failed to decode %s\n", name);
196
181
  llama_set_causal_attn(lctx, true); // restore causal attn
197
182
  return ret;
198
183
  }
199
184
 
200
- LOG_INF("image decoded (batch %d/%d) in %" PRId64 " ms\n", i_batch+1, n_img_batches, ggml_time_ms() - t1);
185
+ LOG_INF("%s decoded (batch %d/%d) in %" PRId64 " ms\n", name, i_batch+1, n_img_batches, ggml_time_ms() - t1);
201
186
 
202
187
  i_batch++;
203
188
  }
204
189
 
205
- n_past += mtmd_image_tokens_get_n_pos(image_tokens);
190
+ n_past += mtmd_input_chunk_get_n_pos(chunk);
206
191
  *new_n_past = n_past;
207
192
 
208
193
  if (mtmd_decode_use_non_causal(ctx)) {
@@ -231,12 +216,14 @@ int32_t mtmd_helper_eval_chunk_single(mtmd_context * ctx,
231
216
  while (i < n_tokens) { // split into batches
232
217
  text_batch.n_tokens = 0; // clear the batch
233
218
  for (; i < n_tokens && text_batch.n_tokens < n_batch; i++) {
219
+ int32_t j = text_batch.n_tokens;
220
+ text_batch.token [j] = tokens[i];
221
+ text_batch.pos [j] = n_past++;
222
+ text_batch.n_seq_id[j] = 1;
223
+ text_batch.seq_id [j][0] = seq_id;
224
+ text_batch.logits [j] = false;
225
+
234
226
  text_batch.n_tokens++;
235
- text_batch.token [i] = tokens[i];
236
- text_batch.pos [i] = n_past++;
237
- text_batch.n_seq_id[i] = 1;
238
- text_batch.seq_id [i][0] = seq_id;
239
- text_batch.logits [i] = false;
240
227
  }
241
228
  bool is_last_token = (i == n_tokens);
242
229
  if (logits_last && is_last_token) {
@@ -251,25 +238,25 @@ int32_t mtmd_helper_eval_chunk_single(mtmd_context * ctx,
251
238
  *new_n_past += text_batch.n_tokens;
252
239
  }
253
240
 
254
- } else if (chunk_type == MTMD_INPUT_CHUNK_TYPE_IMAGE) {
255
- const auto image_tokens = mtmd_input_chunk_get_tokens_image(chunk);
241
+ } else if (chunk_type == MTMD_INPUT_CHUNK_TYPE_IMAGE || chunk_type == MTMD_INPUT_CHUNK_TYPE_AUDIO) {
242
+ const char * name = chunk_type == MTMD_INPUT_CHUNK_TYPE_IMAGE ? "image" : "audio";
256
243
  int64_t t0 = ggml_time_ms();
257
244
 
258
- LOG_INF("encoding image or slice...\n");
245
+ LOG_INF("encoding %s slice...\n", name);
259
246
 
260
- ret = mtmd_encode(ctx, image_tokens);
247
+ ret = mtmd_encode_chunk(ctx, chunk);
261
248
  if (ret != 0) {
262
- LOG_ERR("failed to encode image\n");
249
+ LOG_ERR("failed to encode %s slice\n", name);
263
250
  llama_batch_free(text_batch);
264
251
  return ret;
265
252
  }
266
253
 
267
- LOG_INF("image/slice encoded in %" PRId64 " ms\n", ggml_time_ms() - t0);
254
+ LOG_INF("%s slice encoded in %" PRId64 " ms\n", name, ggml_time_ms() - t0);
268
255
 
269
256
  float * embd = mtmd_get_output_embd(ctx);
270
257
  ret = mtmd_helper_decode_image_chunk(ctx, lctx, chunk, embd, n_past, seq_id, n_batch, new_n_past);
271
258
  if (ret != 0) {
272
- LOG_ERR("failed to decode image\n");
259
+ LOG_ERR("failed to decode %s\n", name);
273
260
  llama_batch_free(text_batch);
274
261
  return ret;
275
262
  }