@fugood/llama.node 0.3.17 → 0.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (193) hide show
  1. package/CMakeLists.txt +3 -1
  2. package/bin/darwin/arm64/llama-node.node +0 -0
  3. package/bin/darwin/x64/llama-node.node +0 -0
  4. package/bin/linux/arm64/llama-node.node +0 -0
  5. package/bin/linux/x64/llama-node.node +0 -0
  6. package/bin/linux-cuda/arm64/llama-node.node +0 -0
  7. package/bin/linux-cuda/x64/llama-node.node +0 -0
  8. package/bin/linux-vulkan/arm64/llama-node.node +0 -0
  9. package/bin/linux-vulkan/x64/llama-node.node +0 -0
  10. package/bin/win32/arm64/llama-node.node +0 -0
  11. package/bin/win32/arm64/node.lib +0 -0
  12. package/bin/win32/x64/llama-node.node +0 -0
  13. package/bin/win32/x64/node.lib +0 -0
  14. package/bin/win32-vulkan/arm64/llama-node.node +0 -0
  15. package/bin/win32-vulkan/arm64/node.lib +0 -0
  16. package/bin/win32-vulkan/x64/llama-node.node +0 -0
  17. package/bin/win32-vulkan/x64/node.lib +0 -0
  18. package/lib/binding.ts +39 -2
  19. package/lib/index.js +132 -1
  20. package/lib/index.ts +203 -3
  21. package/package.json +2 -1
  22. package/src/EmbeddingWorker.cpp +1 -1
  23. package/src/LlamaCompletionWorker.cpp +366 -19
  24. package/src/LlamaCompletionWorker.h +30 -10
  25. package/src/LlamaContext.cpp +213 -5
  26. package/src/LlamaContext.h +12 -0
  27. package/src/common.hpp +15 -0
  28. package/src/llama.cpp/.github/workflows/build-linux-cross.yml +133 -24
  29. package/src/llama.cpp/.github/workflows/build.yml +41 -762
  30. package/src/llama.cpp/.github/workflows/docker.yml +5 -2
  31. package/src/llama.cpp/.github/workflows/release.yml +716 -0
  32. package/src/llama.cpp/.github/workflows/server.yml +12 -12
  33. package/src/llama.cpp/CMakeLists.txt +5 -17
  34. package/src/llama.cpp/cmake/build-info.cmake +8 -2
  35. package/src/llama.cpp/cmake/x64-windows-llvm.cmake +0 -6
  36. package/src/llama.cpp/common/CMakeLists.txt +31 -3
  37. package/src/llama.cpp/common/arg.cpp +48 -29
  38. package/src/llama.cpp/common/chat.cpp +128 -106
  39. package/src/llama.cpp/common/chat.h +2 -0
  40. package/src/llama.cpp/common/common.cpp +37 -1
  41. package/src/llama.cpp/common/common.h +18 -9
  42. package/src/llama.cpp/common/llguidance.cpp +1 -0
  43. package/src/llama.cpp/common/minja/chat-template.hpp +9 -5
  44. package/src/llama.cpp/common/minja/minja.hpp +69 -36
  45. package/src/llama.cpp/common/regex-partial.cpp +204 -0
  46. package/src/llama.cpp/common/regex-partial.h +56 -0
  47. package/src/llama.cpp/common/sampling.cpp +57 -50
  48. package/src/llama.cpp/examples/CMakeLists.txt +2 -23
  49. package/src/llama.cpp/examples/embedding/embedding.cpp +2 -11
  50. package/src/llama.cpp/examples/parallel/parallel.cpp +86 -14
  51. package/src/llama.cpp/examples/training/CMakeLists.txt +5 -0
  52. package/src/llama.cpp/examples/training/finetune.cpp +96 -0
  53. package/src/llama.cpp/ggml/CMakeLists.txt +27 -0
  54. package/src/llama.cpp/ggml/include/ggml-backend.h +4 -4
  55. package/src/llama.cpp/ggml/include/ggml-cpp.h +1 -1
  56. package/src/llama.cpp/ggml/include/ggml-opt.h +47 -28
  57. package/src/llama.cpp/ggml/include/ggml.h +10 -7
  58. package/src/llama.cpp/ggml/src/CMakeLists.txt +1 -1
  59. package/src/llama.cpp/ggml/src/ggml-alloc.c +4 -1
  60. package/src/llama.cpp/ggml/src/ggml-backend.cpp +9 -5
  61. package/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +20 -13
  62. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp +0 -2
  63. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.c +306 -6
  64. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +4 -13
  65. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +29 -16
  66. package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.cpp +88 -5
  67. package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.h +47 -12
  68. package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +264 -69
  69. package/src/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm.cpp +501 -0
  70. package/src/llama.cpp/ggml/src/ggml-cpu/ops.cpp +0 -13
  71. package/src/llama.cpp/ggml/src/ggml-cpu/vec.cpp +0 -6
  72. package/src/llama.cpp/ggml/src/ggml-cuda/CMakeLists.txt +23 -4
  73. package/src/llama.cpp/ggml/src/ggml-metal/ggml-metal-impl.h +36 -11
  74. package/src/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +0 -2
  75. package/src/llama.cpp/ggml/src/ggml-opt.cpp +368 -190
  76. package/src/llama.cpp/ggml/src/ggml-quants.c +0 -6
  77. package/src/llama.cpp/ggml/src/ggml-rpc/ggml-rpc.cpp +41 -27
  78. package/src/llama.cpp/ggml/src/ggml-sycl/CMakeLists.txt +29 -23
  79. package/src/llama.cpp/ggml/src/ggml-sycl/backend.hpp +9 -8
  80. package/src/llama.cpp/ggml/src/ggml-sycl/binbcast.cpp +121 -232
  81. package/src/llama.cpp/ggml/src/ggml-sycl/common.hpp +7 -15
  82. package/src/llama.cpp/ggml/src/ggml-sycl/convert.cpp +72 -25
  83. package/src/llama.cpp/ggml/src/ggml-sycl/convert.hpp +14 -7
  84. package/src/llama.cpp/ggml/src/ggml-sycl/dequantize.hpp +59 -21
  85. package/src/llama.cpp/ggml/src/ggml-sycl/dmmv.cpp +7 -1
  86. package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.cpp +0 -23
  87. package/src/llama.cpp/ggml/src/ggml-sycl/gemm.hpp +37 -8
  88. package/src/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +338 -166
  89. package/src/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +185 -89
  90. package/src/llama.cpp/ggml/src/ggml-sycl/quants.hpp +83 -0
  91. package/src/llama.cpp/ggml/src/ggml-sycl/vecdotq.hpp +128 -53
  92. package/src/llama.cpp/ggml/src/ggml-vulkan/CMakeLists.txt +81 -70
  93. package/src/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +657 -193
  94. package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/CMakeLists.txt +20 -0
  95. package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +123 -29
  96. package/src/llama.cpp/ggml/src/ggml.c +29 -20
  97. package/src/llama.cpp/ggml/src/gguf.cpp +33 -33
  98. package/src/llama.cpp/include/llama.h +52 -11
  99. package/src/llama.cpp/requirements/requirements-all.txt +3 -3
  100. package/src/llama.cpp/scripts/xxd.cmake +1 -1
  101. package/src/llama.cpp/src/CMakeLists.txt +1 -0
  102. package/src/llama.cpp/src/llama-adapter.cpp +6 -0
  103. package/src/llama.cpp/src/llama-arch.cpp +3 -0
  104. package/src/llama.cpp/src/llama-batch.cpp +5 -1
  105. package/src/llama.cpp/src/llama-batch.h +2 -1
  106. package/src/llama.cpp/src/llama-chat.cpp +17 -7
  107. package/src/llama.cpp/src/llama-chat.h +1 -0
  108. package/src/llama.cpp/src/llama-context.cpp +389 -501
  109. package/src/llama.cpp/src/llama-context.h +44 -32
  110. package/src/llama.cpp/src/llama-cparams.h +1 -0
  111. package/src/llama.cpp/src/llama-graph.cpp +20 -38
  112. package/src/llama.cpp/src/llama-graph.h +12 -8
  113. package/src/llama.cpp/src/llama-kv-cache.cpp +1503 -389
  114. package/src/llama.cpp/src/llama-kv-cache.h +271 -85
  115. package/src/llama.cpp/src/llama-memory.h +11 -1
  116. package/src/llama.cpp/src/llama-model-loader.cpp +24 -15
  117. package/src/llama.cpp/src/llama-model-saver.cpp +281 -0
  118. package/src/llama.cpp/src/llama-model-saver.h +37 -0
  119. package/src/llama.cpp/src/llama-model.cpp +316 -69
  120. package/src/llama.cpp/src/llama-model.h +8 -1
  121. package/src/llama.cpp/src/llama-quant.cpp +15 -13
  122. package/src/llama.cpp/src/llama-sampling.cpp +18 -6
  123. package/src/llama.cpp/src/llama-vocab.cpp +42 -4
  124. package/src/llama.cpp/src/llama-vocab.h +6 -0
  125. package/src/llama.cpp/src/llama.cpp +14 -0
  126. package/src/llama.cpp/tests/CMakeLists.txt +10 -2
  127. package/src/llama.cpp/tests/test-backend-ops.cpp +107 -47
  128. package/src/llama.cpp/tests/test-chat-template.cpp +10 -11
  129. package/src/llama.cpp/tests/test-chat.cpp +3 -1
  130. package/src/llama.cpp/tests/test-mtmd-c-api.c +63 -0
  131. package/src/llama.cpp/tests/test-opt.cpp +33 -21
  132. package/src/llama.cpp/tests/test-regex-partial.cpp +288 -0
  133. package/src/llama.cpp/tests/test-sampling.cpp +1 -1
  134. package/src/llama.cpp/tools/CMakeLists.txt +39 -0
  135. package/src/llama.cpp/{examples → tools}/batched-bench/batched-bench.cpp +2 -2
  136. package/src/llama.cpp/{examples → tools}/imatrix/imatrix.cpp +11 -9
  137. package/src/llama.cpp/{examples → tools}/llama-bench/llama-bench.cpp +495 -348
  138. package/src/llama.cpp/{examples → tools}/main/main.cpp +6 -9
  139. package/src/llama.cpp/{examples/llava → tools/mtmd}/CMakeLists.txt +1 -35
  140. package/src/llama.cpp/{examples/llava → tools/mtmd}/clip-impl.h +25 -5
  141. package/src/llama.cpp/{examples/llava → tools/mtmd}/clip.cpp +1440 -1349
  142. package/src/llama.cpp/tools/mtmd/clip.h +99 -0
  143. package/src/llama.cpp/{examples/llava → tools/mtmd}/mtmd-cli.cpp +70 -44
  144. package/src/llama.cpp/tools/mtmd/mtmd-helper.cpp +310 -0
  145. package/src/llama.cpp/{examples/llava → tools/mtmd}/mtmd.cpp +251 -281
  146. package/src/llama.cpp/tools/mtmd/mtmd.h +331 -0
  147. package/src/llama.cpp/{examples → tools}/perplexity/perplexity.cpp +4 -2
  148. package/src/llama.cpp/{examples → tools}/quantize/quantize.cpp +13 -76
  149. package/src/llama.cpp/{examples → tools}/rpc/rpc-server.cpp +70 -74
  150. package/src/llama.cpp/{examples → tools}/run/run.cpp +18 -4
  151. package/src/llama.cpp/{examples → tools}/server/CMakeLists.txt +2 -1
  152. package/src/llama.cpp/{examples → tools}/server/server.cpp +291 -76
  153. package/src/llama.cpp/{examples → tools}/server/utils.hpp +377 -5
  154. package/src/llama.cpp/cmake/arm64-windows-msvc.cmake +0 -6
  155. package/src/llama.cpp/examples/infill/CMakeLists.txt +0 -5
  156. package/src/llama.cpp/examples/infill/infill.cpp +0 -590
  157. package/src/llama.cpp/examples/llava/android/build_64.sh +0 -8
  158. package/src/llama.cpp/examples/llava/clip-quantize-cli.cpp +0 -59
  159. package/src/llama.cpp/examples/llava/clip.h +0 -135
  160. package/src/llama.cpp/examples/llava/llava.cpp +0 -586
  161. package/src/llama.cpp/examples/llava/llava.h +0 -49
  162. package/src/llama.cpp/examples/llava/mtmd.h +0 -168
  163. package/src/llama.cpp/examples/llava/qwen2vl-test.cpp +0 -636
  164. /package/src/llama.cpp/{examples → tools}/batched-bench/CMakeLists.txt +0 -0
  165. /package/src/llama.cpp/{examples → tools}/cvector-generator/CMakeLists.txt +0 -0
  166. /package/src/llama.cpp/{examples → tools}/cvector-generator/completions.txt +0 -0
  167. /package/src/llama.cpp/{examples → tools}/cvector-generator/cvector-generator.cpp +0 -0
  168. /package/src/llama.cpp/{examples → tools}/cvector-generator/mean.hpp +0 -0
  169. /package/src/llama.cpp/{examples → tools}/cvector-generator/negative.txt +0 -0
  170. /package/src/llama.cpp/{examples → tools}/cvector-generator/pca.hpp +0 -0
  171. /package/src/llama.cpp/{examples → tools}/cvector-generator/positive.txt +0 -0
  172. /package/src/llama.cpp/{examples → tools}/export-lora/CMakeLists.txt +0 -0
  173. /package/src/llama.cpp/{examples → tools}/export-lora/export-lora.cpp +0 -0
  174. /package/src/llama.cpp/{examples → tools}/gguf-split/CMakeLists.txt +0 -0
  175. /package/src/llama.cpp/{examples → tools}/gguf-split/gguf-split.cpp +0 -0
  176. /package/src/llama.cpp/{examples → tools}/imatrix/CMakeLists.txt +0 -0
  177. /package/src/llama.cpp/{examples → tools}/llama-bench/CMakeLists.txt +0 -0
  178. /package/src/llama.cpp/{examples → tools}/main/CMakeLists.txt +0 -0
  179. /package/src/llama.cpp/{examples/llava → tools/mtmd}/deprecation-warning.cpp +0 -0
  180. /package/src/llama.cpp/{examples/llava → tools/mtmd}/requirements.txt +0 -0
  181. /package/src/llama.cpp/{examples → tools}/perplexity/CMakeLists.txt +0 -0
  182. /package/src/llama.cpp/{examples → tools}/quantize/CMakeLists.txt +0 -0
  183. /package/src/llama.cpp/{examples → tools}/rpc/CMakeLists.txt +0 -0
  184. /package/src/llama.cpp/{examples → tools}/run/CMakeLists.txt +0 -0
  185. /package/src/llama.cpp/{examples → tools}/run/linenoise.cpp/linenoise.cpp +0 -0
  186. /package/src/llama.cpp/{examples → tools}/run/linenoise.cpp/linenoise.h +0 -0
  187. /package/src/llama.cpp/{examples → tools}/server/bench/requirements.txt +0 -0
  188. /package/src/llama.cpp/{examples → tools}/server/httplib.h +0 -0
  189. /package/src/llama.cpp/{examples → tools}/server/tests/requirements.txt +0 -0
  190. /package/src/llama.cpp/{examples → tools}/tokenize/CMakeLists.txt +0 -0
  191. /package/src/llama.cpp/{examples → tools}/tokenize/tokenize.cpp +0 -0
  192. /package/src/llama.cpp/{examples → tools}/tts/CMakeLists.txt +0 -0
  193. /package/src/llama.cpp/{examples → tools}/tts/tts.cpp +0 -0
@@ -12,6 +12,91 @@
12
12
  #include "SaveSessionWorker.h"
13
13
  #include "TokenizeWorker.h"
14
14
 
15
+ // Helper function for formatted strings (for console logs)
16
+ template<typename ... Args>
17
+ static std::string format_string(const std::string& format, Args ... args) {
18
+ int size_s = std::snprintf(nullptr, 0, format.c_str(), args ...) + 1; // +1 for null terminator
19
+ if (size_s <= 0) { return "Error formatting string"; }
20
+ auto size = static_cast<size_t>(size_s);
21
+ std::unique_ptr<char[]> buf(new char[size]);
22
+ std::snprintf(buf.get(), size, format.c_str(), args ...);
23
+ return std::string(buf.get(), buf.get() + size - 1); // -1 to exclude null terminator
24
+ }
25
+
26
+ // Computes FNV-1a hash of the data
27
+ static std::string fnv_hash(const uint8_t* data, size_t len) {
28
+ const uint64_t fnv_prime = 0x100000001b3ULL;
29
+ uint64_t hash = 0xcbf29ce484222325ULL;
30
+
31
+ for (size_t i = 0; i < len; ++i) {
32
+ hash ^= data[i];
33
+ hash *= fnv_prime;
34
+ }
35
+ return std::to_string(hash);
36
+ }
37
+
38
+ static const std::string base64_chars =
39
+ "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
40
+ "abcdefghijklmnopqrstuvwxyz"
41
+ "0123456789+/";
42
+
43
+ // Base64 decoding function
44
+ static std::vector<uint8_t> base64_decode(const std::string &encoded_string) {
45
+ std::vector<uint8_t> decoded;
46
+ int in_len = encoded_string.size();
47
+ int i = 0;
48
+ int j = 0;
49
+ int in_ = 0;
50
+ unsigned char char_array_4[4], char_array_3[3];
51
+
52
+ while (in_len-- && (encoded_string[in_] != '=')) {
53
+ if (isspace(encoded_string[in_])) {
54
+ in_++;
55
+ continue;
56
+ }
57
+
58
+ if (encoded_string[in_] == '=' || base64_chars.find(encoded_string[in_]) == std::string::npos) {
59
+ break;
60
+ }
61
+
62
+ char_array_4[i++] = encoded_string[in_]; in_++;
63
+ if (i == 4) {
64
+ for (i = 0; i < 4; i++) {
65
+ char_array_4[i] = base64_chars.find(char_array_4[i]);
66
+ }
67
+
68
+ char_array_3[0] = (char_array_4[0] << 2) + ((char_array_4[1] & 0x30) >> 4);
69
+ char_array_3[1] = ((char_array_4[1] & 0xf) << 4) + ((char_array_4[2] & 0x3c) >> 2);
70
+ char_array_3[2] = ((char_array_4[2] & 0x3) << 6) + char_array_4[3];
71
+
72
+ for (i = 0; i < 3; i++) {
73
+ decoded.push_back(char_array_3[i]);
74
+ }
75
+ i = 0;
76
+ }
77
+ }
78
+
79
+ if (i) {
80
+ for (j = i; j < 4; j++) {
81
+ char_array_4[j] = 0;
82
+ }
83
+
84
+ for (j = 0; j < 4; j++) {
85
+ char_array_4[j] = base64_chars.find(char_array_4[j]);
86
+ }
87
+
88
+ char_array_3[0] = (char_array_4[0] << 2) + ((char_array_4[1] & 0x30) >> 4);
89
+ char_array_3[1] = ((char_array_4[1] & 0xf) << 4) + ((char_array_4[2] & 0x3c) >> 2);
90
+ char_array_3[2] = ((char_array_4[2] & 0x3) << 6) + char_array_4[3];
91
+
92
+ for (j = 0; j < i - 1; j++) {
93
+ decoded.push_back(char_array_3[j]);
94
+ }
95
+ }
96
+
97
+ return decoded;
98
+ }
99
+
15
100
  using json = nlohmann::ordered_json;
16
101
 
17
102
  // loadModelInfo(path: string): object
@@ -116,6 +201,15 @@ void LlamaContext::Init(Napi::Env env, Napi::Object &exports) {
116
201
  InstanceMethod<&LlamaContext::GetLoadedLoraAdapters>(
117
202
  "getLoadedLoraAdapters",
118
203
  static_cast<napi_property_attributes>(napi_enumerable)),
204
+ InstanceMethod<&LlamaContext::InitMultimodal>(
205
+ "initMultimodal",
206
+ static_cast<napi_property_attributes>(napi_enumerable)),
207
+ InstanceMethod<&LlamaContext::IsMultimodalEnabled>(
208
+ "isMultimodalEnabled",
209
+ static_cast<napi_property_attributes>(napi_enumerable)),
210
+ InstanceMethod<&LlamaContext::ReleaseMultimodal>(
211
+ "releaseMultimodal",
212
+ static_cast<napi_property_attributes>(napi_enumerable)),
119
213
  InstanceMethod<&LlamaContext::Release>(
120
214
  "release", static_cast<napi_property_attributes>(napi_enumerable)),
121
215
  StaticMethod<&LlamaContext::ModelInfo>(
@@ -448,7 +542,6 @@ Napi::Value LlamaContext::GetFormattedChat(const Napi::CallbackInfo &info) {
448
542
  Napi::TypeError::New(env, "Array expected").ThrowAsJavaScriptException();
449
543
  }
450
544
  auto messages = json_stringify(info[0].As<Napi::Array>());
451
- printf("messages: %s\n", messages.c_str());
452
545
  auto chat_template = info[1].IsString() ? info[1].ToString().Utf8Value() : "";
453
546
 
454
547
  auto has_params = info.Length() >= 2;
@@ -545,6 +638,25 @@ Napi::Value LlamaContext::Completion(const Napi::CallbackInfo &info) {
545
638
  }
546
639
  }
547
640
 
641
+ // Process image_paths parameter
642
+ std::vector<std::string> image_paths;
643
+ if (options.Has("image_paths")) {
644
+ if (options.Get("image_paths").IsArray()) {
645
+ auto image_paths_array = options.Get("image_paths").As<Napi::Array>();
646
+ for (size_t i = 0; i < image_paths_array.Length(); i++) {
647
+ image_paths.push_back(image_paths_array.Get(i).ToString().Utf8Value());
648
+ }
649
+ } else if (options.Get("image_paths").IsString()) {
650
+ image_paths.push_back(options.Get("image_paths").ToString().Utf8Value());
651
+ }
652
+ }
653
+
654
+ // Check if multimodal is enabled when image_paths are provided
655
+ if (!image_paths.empty() && !(_has_multimodal && _mtmd_ctx != nullptr)) {
656
+ Napi::Error::New(env, "Multimodal support must be enabled via initMultimodal to use image_paths").ThrowAsJavaScriptException();
657
+ return env.Undefined();
658
+ }
659
+
548
660
  int32_t chat_format = get_option<int32_t>(options, "chat_format", 0);
549
661
 
550
662
  common_params params = _sess->params();
@@ -727,17 +839,17 @@ Napi::Value LlamaContext::Completion(const Napi::CallbackInfo &info) {
727
839
  }
728
840
 
729
841
  auto *worker =
730
- new LlamaCompletionWorker(info, _sess, callback, params, stop_words, chat_format);
842
+ new LlamaCompletionWorker(info, _sess, callback, params, stop_words, chat_format, image_paths);
731
843
  worker->Queue();
732
844
  _wip = worker;
733
- worker->onComplete([this]() { _wip = nullptr; });
845
+ worker->OnComplete([this]() { _wip = nullptr; });
734
846
  return worker->Promise();
735
847
  }
736
848
 
737
849
  // stopCompletion(): void
738
850
  void LlamaContext::StopCompletion(const Napi::CallbackInfo &info) {
739
851
  if (_wip != nullptr) {
740
- _wip->Stop();
852
+ _wip->SetStop();
741
853
  }
742
854
  }
743
855
 
@@ -890,14 +1002,110 @@ Napi::Value LlamaContext::GetLoadedLoraAdapters(const Napi::CallbackInfo &info)
890
1002
  Napi::Value LlamaContext::Release(const Napi::CallbackInfo &info) {
891
1003
  auto env = info.Env();
892
1004
  if (_wip != nullptr) {
893
- _wip->Stop();
1005
+ _wip->SetStop();
894
1006
  }
895
1007
  if (_sess == nullptr) {
896
1008
  auto promise = Napi::Promise::Deferred(env);
897
1009
  promise.Resolve(env.Undefined());
898
1010
  return promise.Promise();
899
1011
  }
1012
+
1013
+ // Clear the mtmd context reference in the session
1014
+ if (_mtmd_ctx != nullptr) {
1015
+ _sess->set_mtmd_ctx(nullptr);
1016
+ }
1017
+
900
1018
  auto *worker = new DisposeWorker(info, std::move(_sess));
901
1019
  worker->Queue();
902
1020
  return worker->Promise();
903
1021
  }
1022
+
1023
+ LlamaContext::~LlamaContext() {
1024
+ if (_mtmd_ctx != nullptr) {
1025
+ mtmd_free(_mtmd_ctx);
1026
+ _mtmd_ctx = nullptr;
1027
+ _has_multimodal = false;
1028
+ }
1029
+ }
1030
+
1031
+ // initMultimodal(options: { path: string, use_gpu?: boolean }): boolean
1032
+ Napi::Value LlamaContext::InitMultimodal(const Napi::CallbackInfo &info) {
1033
+ Napi::Env env = info.Env();
1034
+
1035
+ if (info.Length() < 1 || !info[0].IsObject()) {
1036
+ Napi::TypeError::New(env, "Object expected for mmproj path").ThrowAsJavaScriptException();
1037
+ }
1038
+
1039
+ auto options = info[0].As<Napi::Object>();
1040
+ auto mmproj_path = options.Get("path").ToString().Utf8Value();
1041
+ auto use_gpu = options.Get("use_gpu").ToBoolean().Value();
1042
+
1043
+ if (mmproj_path.empty()) {
1044
+ Napi::TypeError::New(env, "mmproj path is required").ThrowAsJavaScriptException();
1045
+ }
1046
+
1047
+ console_log(env, "Initializing multimodal with mmproj path: " + mmproj_path);
1048
+
1049
+ auto model = _sess->model();
1050
+ auto ctx = _sess->context();
1051
+ if (model == nullptr) {
1052
+ Napi::Error::New(env, "Model not loaded").ThrowAsJavaScriptException();
1053
+ return Napi::Boolean::New(env, false);
1054
+ }
1055
+
1056
+ if (_mtmd_ctx != nullptr) {
1057
+ mtmd_free(_mtmd_ctx);
1058
+ _mtmd_ctx = nullptr;
1059
+ _has_multimodal = false;
1060
+ }
1061
+
1062
+ // Initialize mtmd context
1063
+ mtmd_context_params mtmd_params = mtmd_context_params_default();
1064
+ mtmd_params.use_gpu = use_gpu;
1065
+ mtmd_params.print_timings = false;
1066
+ mtmd_params.n_threads = _sess->params().cpuparams.n_threads;
1067
+ mtmd_params.verbosity = (ggml_log_level)GGML_LOG_LEVEL_INFO;
1068
+
1069
+ console_log(env, format_string("Initializing mtmd context with threads=%d, use_gpu=%d",
1070
+ mtmd_params.n_threads, mtmd_params.use_gpu ? 1 : 0));
1071
+
1072
+ _mtmd_ctx = mtmd_init_from_file(mmproj_path.c_str(), model, mtmd_params);
1073
+ if (_mtmd_ctx == nullptr) {
1074
+ Napi::Error::New(env, "Failed to initialize multimodal context").ThrowAsJavaScriptException();
1075
+ return Napi::Boolean::New(env, false);
1076
+ }
1077
+
1078
+ _has_multimodal = true;
1079
+
1080
+ // Share the mtmd context with the session
1081
+ _sess->set_mtmd_ctx(_mtmd_ctx);
1082
+
1083
+ // Check if the model uses M-RoPE or non-causal attention
1084
+ bool uses_mrope = mtmd_decode_use_mrope(_mtmd_ctx);
1085
+ bool uses_non_causal = mtmd_decode_use_non_causal(_mtmd_ctx);
1086
+ console_log(env, format_string("Model multimodal properties: uses_mrope=%d, uses_non_causal=%d",
1087
+ uses_mrope ? 1 : 0, uses_non_causal ? 1 : 0));
1088
+
1089
+ console_log(env, "Multimodal context initialized successfully with mmproj: " + mmproj_path);
1090
+ return Napi::Boolean::New(env, true);
1091
+ }
1092
+
1093
+ // isMultimodalEnabled(): boolean
1094
+ Napi::Value LlamaContext::IsMultimodalEnabled(const Napi::CallbackInfo &info) {
1095
+ return Napi::Boolean::New(info.Env(), _has_multimodal && _mtmd_ctx != nullptr);
1096
+ }
1097
+
1098
+ // releaseMultimodal(): void
1099
+ void LlamaContext::ReleaseMultimodal(const Napi::CallbackInfo &info) {
1100
+ if (_mtmd_ctx != nullptr) {
1101
+ // Clear the mtmd context reference in the session
1102
+ if (_sess != nullptr) {
1103
+ _sess->set_mtmd_ctx(nullptr);
1104
+ }
1105
+
1106
+ // Free the mtmd context
1107
+ mtmd_free(_mtmd_ctx);
1108
+ _mtmd_ctx = nullptr;
1109
+ _has_multimodal = false;
1110
+ }
1111
+ }
@@ -1,10 +1,13 @@
1
1
  #include "common.hpp"
2
+ #include "tools/mtmd/mtmd.h"
3
+ #include "tools/mtmd/clip.h"
2
4
 
3
5
  class LlamaCompletionWorker;
4
6
 
5
7
  class LlamaContext : public Napi::ObjectWrap<LlamaContext> {
6
8
  public:
7
9
  LlamaContext(const Napi::CallbackInfo &info);
10
+ ~LlamaContext();
8
11
  static void ToggleNativeLog(const Napi::CallbackInfo &info);
9
12
  static Napi::Value ModelInfo(const Napi::CallbackInfo& info);
10
13
  static void Init(Napi::Env env, Napi::Object &exports);
@@ -24,6 +27,11 @@ private:
24
27
  void RemoveLoraAdapters(const Napi::CallbackInfo &info);
25
28
  Napi::Value GetLoadedLoraAdapters(const Napi::CallbackInfo &info);
26
29
  Napi::Value Release(const Napi::CallbackInfo &info);
30
+
31
+ // Multimodal methods
32
+ Napi::Value InitMultimodal(const Napi::CallbackInfo &info);
33
+ Napi::Value IsMultimodalEnabled(const Napi::CallbackInfo &info);
34
+ void ReleaseMultimodal(const Napi::CallbackInfo &info);
27
35
 
28
36
  std::string _info;
29
37
  Napi::Object _meta;
@@ -31,4 +39,8 @@ private:
31
39
  common_chat_templates_ptr _templates;
32
40
  std::vector<common_adapter_lora_info> _lora;
33
41
  LlamaCompletionWorker *_wip = nullptr;
42
+
43
+ // Multimodal support
44
+ mtmd_context *_mtmd_ctx = nullptr;
45
+ bool _has_multimodal = false;
34
46
  };
package/src/common.hpp CHANGED
@@ -4,6 +4,7 @@
4
4
  #include "common/sampling.h"
5
5
  #include "chat.h"
6
6
  #include "llama.h"
7
+ #include "tools/mtmd/mtmd.h"
7
8
  #include <memory>
8
9
  #include <mutex>
9
10
  #include <napi.h>
@@ -82,10 +83,23 @@ public:
82
83
  inline const common_params &params() const { return params_; }
83
84
 
84
85
  inline std::mutex &get_mutex() { return mutex; }
86
+
87
+ // Getter for the multimodal context
88
+ inline const mtmd_context* get_mtmd_ctx() const {
89
+ return _mtmd_ctx;
90
+ }
91
+
92
+ // Setter for the multimodal context
93
+ inline void set_mtmd_ctx(mtmd_context* ctx) {
94
+ _mtmd_ctx = ctx;
95
+ }
85
96
 
86
97
  void dispose() {
87
98
  std::lock_guard<std::mutex> lock(mutex);
88
99
  tokens_.clear();
100
+
101
+ // mtmd_ctx is owned by LlamaContext, so we don't free it here
102
+ _mtmd_ctx = nullptr;
89
103
  }
90
104
 
91
105
  private:
@@ -93,6 +107,7 @@ private:
93
107
  const common_params params_;
94
108
  std::vector<llama_token> tokens_{};
95
109
  std::mutex mutex;
110
+ mtmd_context* _mtmd_ctx = nullptr;
96
111
  };
97
112
 
98
113
  typedef std::shared_ptr<LlamaSession> LlamaSessionPtr;
@@ -4,18 +4,25 @@ on:
4
4
  workflow_call:
5
5
 
6
6
  jobs:
7
- ubuntu-latest-riscv64-cpu-cross:
8
- runs-on: ubuntu-latest
7
+ ubuntu-24-riscv64-cpu-cross:
8
+ runs-on: ubuntu-24.04
9
9
 
10
10
  steps:
11
11
  - uses: actions/checkout@v4
12
12
  - name: Setup Riscv
13
13
  run: |
14
14
  sudo dpkg --add-architecture riscv64
15
- sudo sed -i 's|http://azure.archive.ubuntu.com/ubuntu|http://ports.ubuntu.com/ubuntu-ports|g' \
16
- /etc/apt/sources.list /etc/apt/apt-mirrors.txt
17
- sudo apt-get clean
18
- sudo apt-get update
15
+
16
+ # Add arch-specific repositories for non-amd64 architectures
17
+ cat << EOF | sudo tee /etc/apt/sources.list.d/riscv64-ports.list
18
+ deb [arch=riscv64] http://ports.ubuntu.com/ubuntu-ports/ noble main universe
19
+ deb [arch=riscv64] http://ports.ubuntu.com/ubuntu-ports/ noble-updates main universe
20
+ deb [arch=riscv64] http://ports.ubuntu.com/ubuntu-ports/ noble-security main universe
21
+ deb [arch=riscv64] http://ports.ubuntu.com/ubuntu-ports/ noble-backports main universe
22
+ EOF
23
+
24
+ sudo apt-get update || true ;# Prevent failure due to missing URLs.
25
+
19
26
  sudo apt-get install -y --no-install-recommends \
20
27
  build-essential \
21
28
  gcc-14-riscv64-linux-gnu \
@@ -27,6 +34,7 @@ jobs:
27
34
  cmake -B build -DCMAKE_BUILD_TYPE=Release \
28
35
  -DGGML_OPENMP=OFF \
29
36
  -DLLAMA_BUILD_EXAMPLES=ON \
37
+ -DLLAMA_BUILD_TOOLS=ON \
30
38
  -DLLAMA_BUILD_TESTS=OFF \
31
39
  -DCMAKE_SYSTEM_NAME=Linux \
32
40
  -DCMAKE_SYSTEM_PROCESSOR=riscv64 \
@@ -40,21 +48,25 @@ jobs:
40
48
 
41
49
  cmake --build build --config Release -j $(nproc)
42
50
 
43
- ubuntu-latest-riscv64-vulkan-cross:
44
- runs-on: ubuntu-latest
51
+ ubuntu-24-riscv64-vulkan-cross:
52
+ runs-on: ubuntu-24.04
45
53
 
46
54
  steps:
47
55
  - uses: actions/checkout@v4
48
- with:
49
- fetch-depth: 0
50
-
51
56
  - name: Setup Riscv
52
57
  run: |
53
58
  sudo dpkg --add-architecture riscv64
54
- sudo sed -i 's|http://azure.archive.ubuntu.com/ubuntu|http://ports.ubuntu.com/ubuntu-ports|g' \
55
- /etc/apt/sources.list /etc/apt/apt-mirrors.txt
56
- sudo apt-get clean
57
- sudo apt-get update
59
+
60
+ # Add arch-specific repositories for non-amd64 architectures
61
+ cat << EOF | sudo tee /etc/apt/sources.list.d/riscv64-ports.list
62
+ deb [arch=riscv64] http://ports.ubuntu.com/ubuntu-ports/ noble main universe
63
+ deb [arch=riscv64] http://ports.ubuntu.com/ubuntu-ports/ noble-updates main universe
64
+ deb [arch=riscv64] http://ports.ubuntu.com/ubuntu-ports/ noble-security main universe
65
+ deb [arch=riscv64] http://ports.ubuntu.com/ubuntu-ports/ noble-backports main universe
66
+ EOF
67
+
68
+ sudo apt-get update || true ;# Prevent failure due to missing URLs.
69
+
58
70
  sudo apt-get install -y --no-install-recommends \
59
71
  build-essential \
60
72
  glslc \
@@ -69,6 +81,7 @@ jobs:
69
81
  -DGGML_VULKAN=ON \
70
82
  -DGGML_OPENMP=OFF \
71
83
  -DLLAMA_BUILD_EXAMPLES=ON \
84
+ -DLLAMA_BUILD_TOOLS=ON \
72
85
  -DLLAMA_BUILD_TESTS=OFF \
73
86
  -DCMAKE_SYSTEM_NAME=Linux \
74
87
  -DCMAKE_SYSTEM_PROCESSOR=riscv64 \
@@ -82,21 +95,25 @@ jobs:
82
95
 
83
96
  cmake --build build --config Release -j $(nproc)
84
97
 
85
- ubuntu-latest-arm64-vulkan-cross:
86
- runs-on: ubuntu-latest
98
+ ubuntu-24-arm64-vulkan-cross:
99
+ runs-on: ubuntu-24.04
87
100
 
88
101
  steps:
89
102
  - uses: actions/checkout@v4
90
- with:
91
- fetch-depth: 0
92
-
93
103
  - name: Setup Arm64
94
104
  run: |
95
105
  sudo dpkg --add-architecture arm64
96
- sudo sed -i 's|http://azure.archive.ubuntu.com/ubuntu|http://ports.ubuntu.com/ubuntu-ports|g' \
97
- /etc/apt/sources.list /etc/apt/apt-mirrors.txt
98
- sudo apt-get clean
99
- sudo apt-get update
106
+
107
+ # Add arch-specific repositories for non-amd64 architectures
108
+ cat << EOF | sudo tee /etc/apt/sources.list.d/arm64-ports.list
109
+ deb [arch=arm64] http://ports.ubuntu.com/ubuntu-ports/ noble main universe
110
+ deb [arch=arm64] http://ports.ubuntu.com/ubuntu-ports/ noble-updates main universe
111
+ deb [arch=arm64] http://ports.ubuntu.com/ubuntu-ports/ noble-security main universe
112
+ deb [arch=arm64] http://ports.ubuntu.com/ubuntu-ports/ noble-backports main universe
113
+ EOF
114
+
115
+ sudo apt-get update || true ;# Prevent failure due to missing URLs.
116
+
100
117
  sudo apt-get install -y --no-install-recommends \
101
118
  build-essential \
102
119
  glslc \
@@ -110,6 +127,7 @@ jobs:
110
127
  -DGGML_VULKAN=ON \
111
128
  -DGGML_OPENMP=OFF \
112
129
  -DLLAMA_BUILD_EXAMPLES=ON \
130
+ -DLLAMA_BUILD_TOOLS=ON \
113
131
  -DLLAMA_BUILD_TESTS=OFF \
114
132
  -DCMAKE_SYSTEM_NAME=Linux \
115
133
  -DCMAKE_SYSTEM_PROCESSOR=aarch64 \
@@ -122,3 +140,94 @@ jobs:
122
140
  -DCMAKE_FIND_ROOT_PATH_MODE_INCLUDE=BOTH
123
141
 
124
142
  cmake --build build --config Release -j $(nproc)
143
+
144
+ ubuntu-24-ppc64el-cpu-cross:
145
+ runs-on: ubuntu-24.04
146
+
147
+ steps:
148
+ - uses: actions/checkout@v4
149
+ - name: Setup PowerPC64le
150
+ run: |
151
+ sudo dpkg --add-architecture ppc64el
152
+
153
+ # Add arch-specific repositories for non-amd64 architectures
154
+ cat << EOF | sudo tee /etc/apt/sources.list.d/ppc64el-ports.list
155
+ deb [arch=ppc64el] http://ports.ubuntu.com/ubuntu-ports/ noble main universe
156
+ deb [arch=ppc64el] http://ports.ubuntu.com/ubuntu-ports/ noble-updates main universe
157
+ deb [arch=ppc64el] http://ports.ubuntu.com/ubuntu-ports/ noble-security main universe
158
+ deb [arch=ppc64el] http://ports.ubuntu.com/ubuntu-ports/ noble-backports main universe
159
+ EOF
160
+
161
+ sudo apt-get update || true ;# Prevent failure due to missing URLs.
162
+
163
+ sudo apt-get install -y --no-install-recommends \
164
+ build-essential \
165
+ gcc-14-powerpc64le-linux-gnu \
166
+ g++-14-powerpc64le-linux-gnu \
167
+ libcurl4-openssl-dev:ppc64el
168
+
169
+ - name: Build
170
+ run: |
171
+ cmake -B build -DCMAKE_BUILD_TYPE=Release \
172
+ -DGGML_OPENMP=OFF \
173
+ -DLLAMA_BUILD_EXAMPLES=ON \
174
+ -DLLAMA_BUILD_TOOLS=ON \
175
+ -DLLAMA_BUILD_TESTS=OFF \
176
+ -DCMAKE_SYSTEM_NAME=Linux \
177
+ -DCMAKE_SYSTEM_PROCESSOR=ppc64 \
178
+ -DCMAKE_C_COMPILER=powerpc64le-linux-gnu-gcc-14 \
179
+ -DCMAKE_CXX_COMPILER=powerpc64le-linux-gnu-g++-14 \
180
+ -DCMAKE_POSITION_INDEPENDENT_CODE=ON \
181
+ -DCMAKE_FIND_ROOT_PATH=/usr/lib/powerpc64le-linux-gnu \
182
+ -DCMAKE_FIND_ROOT_PATH_MODE_PROGRAM=NEVER \
183
+ -DCMAKE_FIND_ROOT_PATH_MODE_LIBRARY=ONLY \
184
+ -DCMAKE_FIND_ROOT_PATH_MODE_INCLUDE=BOTH
185
+
186
+ cmake --build build --config Release -j $(nproc)
187
+
188
+ ubuntu-24-ppc64el-vulkan-cross:
189
+ runs-on: ubuntu-24.04
190
+
191
+ steps:
192
+ - uses: actions/checkout@v4
193
+ - name: Setup PowerPC64le
194
+ run: |
195
+ sudo dpkg --add-architecture ppc64el
196
+
197
+ # Add arch-specific repositories for non-amd64 architectures
198
+ cat << EOF | sudo tee /etc/apt/sources.list.d/ppc64el-ports.list
199
+ deb [arch=ppc64el] http://ports.ubuntu.com/ubuntu-ports/ noble main universe
200
+ deb [arch=ppc64el] http://ports.ubuntu.com/ubuntu-ports/ noble-updates main universe
201
+ deb [arch=ppc64el] http://ports.ubuntu.com/ubuntu-ports/ noble-security main universe
202
+ deb [arch=ppc64el] http://ports.ubuntu.com/ubuntu-ports/ noble-backports main universe
203
+ EOF
204
+
205
+ sudo apt-get update || true ;# Prevent failure due to missing URLs.
206
+
207
+ sudo apt-get install -y --no-install-recommends \
208
+ build-essential \
209
+ glslc \
210
+ gcc-14-powerpc64le-linux-gnu \
211
+ g++-14-powerpc64le-linux-gnu \
212
+ libvulkan-dev:ppc64el \
213
+ libcurl4-openssl-dev:ppc64el
214
+
215
+ - name: Build
216
+ run: |
217
+ cmake -B build -DCMAKE_BUILD_TYPE=Release \
218
+ -DGGML_VULKAN=ON \
219
+ -DGGML_OPENMP=OFF \
220
+ -DLLAMA_BUILD_EXAMPLES=ON \
221
+ -DLLAMA_BUILD_TOOLS=ON \
222
+ -DLLAMA_BUILD_TESTS=OFF \
223
+ -DCMAKE_SYSTEM_NAME=Linux \
224
+ -DCMAKE_SYSTEM_PROCESSOR=ppc64 \
225
+ -DCMAKE_C_COMPILER=powerpc64le-linux-gnu-gcc-14 \
226
+ -DCMAKE_CXX_COMPILER=powerpc64le-linux-gnu-g++-14 \
227
+ -DCMAKE_POSITION_INDEPENDENT_CODE=ON \
228
+ -DCMAKE_FIND_ROOT_PATH=/usr/lib/powerpc64le-linux-gnu \
229
+ -DCMAKE_FIND_ROOT_PATH_MODE_PROGRAM=NEVER \
230
+ -DCMAKE_FIND_ROOT_PATH_MODE_LIBRARY=ONLY \
231
+ -DCMAKE_FIND_ROOT_PATH_MODE_INCLUDE=BOTH
232
+
233
+ cmake --build build --config Release -j $(nproc)