cui-llama.rn 1.6.1 → 1.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (196) hide show
  1. package/android/src/main/CMakeLists.txt +6 -0
  2. package/android/src/main/java/com/rnllama/LlamaContext.java +38 -5
  3. package/android/src/main/java/com/rnllama/RNLlama.java +139 -4
  4. package/android/src/main/jni.cpp +153 -14
  5. package/android/src/main/jniLibs/arm64-v8a/librnllama.so +0 -0
  6. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8.so +0 -0
  7. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2.so +0 -0
  8. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_dotprod.so +0 -0
  9. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_dotprod_i8mm.so +0 -0
  10. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_i8mm.so +0 -0
  11. package/android/src/main/jniLibs/x86_64/librnllama.so +0 -0
  12. package/android/src/main/jniLibs/x86_64/librnllama_x86_64.so +0 -0
  13. package/android/src/newarch/java/com/rnllama/RNLlamaModule.java +24 -4
  14. package/android/src/oldarch/java/com/rnllama/RNLlamaModule.java +22 -2
  15. package/cpp/chat.cpp +128 -106
  16. package/cpp/chat.h +2 -0
  17. package/cpp/common.cpp +41 -76
  18. package/cpp/common.h +23 -19
  19. package/cpp/ggml-backend.cpp +9 -5
  20. package/cpp/ggml-backend.h +4 -4
  21. package/cpp/ggml-cpu/ggml-cpu-aarch64.cpp +0 -2
  22. package/cpp/ggml-cpu/ggml-cpu-quants.c +306 -6
  23. package/cpp/ggml-cpu/ggml-cpu.c +5 -13
  24. package/cpp/ggml-cpu/ggml-cpu.cpp +29 -16
  25. package/cpp/ggml-cpu/ops.cpp +107 -13
  26. package/cpp/ggml-cpu/vec.cpp +0 -6
  27. package/cpp/ggml-cpu/vec.h +16 -0
  28. package/cpp/ggml-llama-sim.metallib +0 -0
  29. package/cpp/ggml-llama.metallib +0 -0
  30. package/cpp/ggml-metal-impl.h +36 -11
  31. package/cpp/ggml-metal.m +321 -132
  32. package/cpp/ggml-opt.cpp +373 -190
  33. package/cpp/ggml-opt.h +49 -28
  34. package/cpp/ggml-quants.c +0 -6
  35. package/cpp/ggml.c +93 -38
  36. package/cpp/ggml.h +21 -7
  37. package/cpp/gguf.cpp +33 -33
  38. package/cpp/llama-adapter.cpp +6 -0
  39. package/cpp/llama-arch.cpp +3 -0
  40. package/cpp/llama-batch.cpp +3 -1
  41. package/cpp/llama-chat.cpp +8 -6
  42. package/cpp/llama-chat.h +1 -0
  43. package/cpp/llama-context.cpp +349 -135
  44. package/cpp/llama-context.h +30 -3
  45. package/cpp/llama-cparams.h +1 -0
  46. package/cpp/llama-graph.cpp +150 -234
  47. package/cpp/llama-graph.h +52 -7
  48. package/cpp/llama-hparams.cpp +17 -1
  49. package/cpp/llama-hparams.h +34 -5
  50. package/cpp/llama-kv-cache.cpp +662 -321
  51. package/cpp/llama-kv-cache.h +203 -93
  52. package/cpp/llama-memory.h +3 -2
  53. package/cpp/llama-model-loader.cpp +24 -15
  54. package/cpp/llama-model-saver.cpp +281 -0
  55. package/cpp/llama-model-saver.h +37 -0
  56. package/cpp/llama-model.cpp +536 -132
  57. package/cpp/llama-model.h +7 -1
  58. package/cpp/llama-sampling.cpp +18 -6
  59. package/cpp/llama-vocab.cpp +46 -8
  60. package/cpp/llama-vocab.h +6 -0
  61. package/cpp/llama.cpp +14 -0
  62. package/cpp/llama.h +72 -131
  63. package/cpp/minja/chat-template.hpp +9 -5
  64. package/cpp/minja/minja.hpp +69 -36
  65. package/cpp/rn-llama.cpp +611 -47
  66. package/cpp/rn-llama.h +33 -3
  67. package/cpp/sampling.cpp +57 -50
  68. package/cpp/tools/mtmd/clip-impl.h +462 -0
  69. package/cpp/tools/mtmd/clip.cpp +4024 -0
  70. package/cpp/tools/mtmd/clip.h +101 -0
  71. package/cpp/tools/mtmd/miniaudio.h +93468 -0
  72. package/cpp/tools/mtmd/mtmd-audio.cpp +855 -0
  73. package/cpp/tools/mtmd/mtmd-audio.h +62 -0
  74. package/cpp/tools/mtmd/mtmd-helper.cpp +297 -0
  75. package/cpp/tools/mtmd/mtmd.cpp +942 -0
  76. package/cpp/tools/mtmd/mtmd.h +362 -0
  77. package/cpp/tools/mtmd/stb_image.h +7988 -0
  78. package/ios/CMakeLists.txt +7 -0
  79. package/ios/RNLlama.mm +77 -3
  80. package/ios/RNLlamaContext.h +5 -1
  81. package/ios/RNLlamaContext.mm +105 -10
  82. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/chat.h +2 -0
  83. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/common.h +23 -19
  84. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-backend.h +4 -4
  85. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-metal-impl.h +36 -11
  86. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-opt.h +49 -28
  87. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml.h +21 -7
  88. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-chat.h +1 -0
  89. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-context.h +30 -3
  90. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-cparams.h +1 -0
  91. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-graph.h +52 -7
  92. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-hparams.h +34 -5
  93. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-kv-cache.h +203 -93
  94. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-memory.h +3 -2
  95. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-model-saver.h +37 -0
  96. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-model.h +7 -1
  97. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-vocab.h +6 -0
  98. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama.h +72 -131
  99. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/minja/chat-template.hpp +9 -5
  100. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/minja/minja.hpp +69 -36
  101. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/rn-llama.h +33 -3
  102. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Info.plist +0 -0
  103. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/ggml-llama.metallib +0 -0
  104. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/rnllama +0 -0
  105. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/chat.h +2 -0
  106. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/common.h +23 -19
  107. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-backend.h +4 -4
  108. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-metal-impl.h +36 -11
  109. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-opt.h +49 -28
  110. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml.h +21 -7
  111. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-chat.h +1 -0
  112. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-context.h +30 -3
  113. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-cparams.h +1 -0
  114. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-graph.h +52 -7
  115. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-hparams.h +34 -5
  116. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cache.h +203 -93
  117. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-memory.h +3 -2
  118. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-model-saver.h +37 -0
  119. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-model.h +7 -1
  120. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-vocab.h +6 -0
  121. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama.h +72 -131
  122. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/minja/chat-template.hpp +9 -5
  123. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/minja/minja.hpp +69 -36
  124. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/rn-llama.h +33 -3
  125. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Info.plist +0 -0
  126. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/_CodeSignature/CodeResources +1 -1
  127. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/ggml-llama-sim.metallib +0 -0
  128. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/rnllama +0 -0
  129. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/chat.h +2 -0
  130. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/common.h +23 -19
  131. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-backend.h +4 -4
  132. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-metal-impl.h +36 -11
  133. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-opt.h +49 -28
  134. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml.h +21 -7
  135. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-chat.h +1 -0
  136. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-context.h +30 -3
  137. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-cparams.h +1 -0
  138. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-graph.h +52 -7
  139. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-hparams.h +34 -5
  140. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-kv-cache.h +203 -93
  141. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-memory.h +3 -2
  142. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-model-saver.h +37 -0
  143. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-model.h +7 -1
  144. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-vocab.h +6 -0
  145. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama.h +72 -131
  146. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/minja/chat-template.hpp +9 -5
  147. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/minja/minja.hpp +69 -36
  148. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/rn-llama.h +33 -3
  149. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Info.plist +0 -0
  150. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/ggml-llama.metallib +0 -0
  151. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/rnllama +0 -0
  152. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/chat.h +2 -0
  153. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/common.h +23 -19
  154. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-backend.h +4 -4
  155. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-metal-impl.h +36 -11
  156. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-opt.h +49 -28
  157. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml.h +21 -7
  158. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-chat.h +1 -0
  159. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-context.h +30 -3
  160. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-cparams.h +1 -0
  161. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-graph.h +52 -7
  162. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-hparams.h +34 -5
  163. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cache.h +203 -93
  164. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-memory.h +3 -2
  165. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-model-saver.h +37 -0
  166. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-model.h +7 -1
  167. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-vocab.h +6 -0
  168. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama.h +72 -131
  169. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/minja/chat-template.hpp +9 -5
  170. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/minja/minja.hpp +69 -36
  171. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/rn-llama.h +33 -3
  172. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Info.plist +0 -0
  173. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/_CodeSignature/CodeResources +1 -1
  174. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/ggml-llama-sim.metallib +0 -0
  175. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/rnllama +0 -0
  176. package/jest/mock.js +33 -7
  177. package/lib/commonjs/NativeRNLlama.js.map +1 -1
  178. package/lib/commonjs/index.js +153 -21
  179. package/lib/commonjs/index.js.map +1 -1
  180. package/lib/module/NativeRNLlama.js.map +1 -1
  181. package/lib/module/index.js +152 -20
  182. package/lib/module/index.js.map +1 -1
  183. package/lib/typescript/NativeRNLlama.d.ts +50 -4
  184. package/lib/typescript/NativeRNLlama.d.ts.map +1 -1
  185. package/lib/typescript/index.d.ts +72 -6
  186. package/lib/typescript/index.d.ts.map +1 -1
  187. package/package.json +1 -1
  188. package/src/NativeRNLlama.ts +67 -4
  189. package/src/index.ts +212 -38
  190. package/lib/commonjs/chat.js +0 -37
  191. package/lib/commonjs/chat.js.map +0 -1
  192. package/lib/module/chat.js +0 -33
  193. package/lib/module/chat.js.map +0 -1
  194. package/lib/typescript/chat.d.ts +0 -10
  195. package/lib/typescript/chat.d.ts.map +0 -1
  196. package/src/chat.ts +0 -44
@@ -14,6 +14,7 @@ set(RNLLAMA_LIB_DIR ${CMAKE_SOURCE_DIR}/../../../cpp)
14
14
  include_directories(
15
15
  ${RNLLAMA_LIB_DIR}
16
16
  ${RNLLAMA_LIB_DIR}/ggml-cpu
17
+ ${RNLLAMA_LIB_DIR}/tools/mtmd
17
18
  )
18
19
 
19
20
  set(
@@ -40,6 +41,11 @@ set(
40
41
  ${RNLLAMA_LIB_DIR}/gguf.cpp
41
42
  ${RNLLAMA_LIB_DIR}/log.cpp
42
43
  ${RNLLAMA_LIB_DIR}/llama-impl.cpp
44
+ # Multimodal support
45
+ ${RNLLAMA_LIB_DIR}/tools/mtmd/mtmd.cpp
46
+ ${RNLLAMA_LIB_DIR}/tools/mtmd/mtmd-audio.cpp
47
+ ${RNLLAMA_LIB_DIR}/tools/mtmd/clip.cpp
48
+ ${RNLLAMA_LIB_DIR}/tools/mtmd/mtmd-helper.cpp
43
49
  ${RNLLAMA_LIB_DIR}/llama-grammar.cpp
44
50
  ${RNLLAMA_LIB_DIR}/llama-sampling.cpp
45
51
  ${RNLLAMA_LIB_DIR}/llama-vocab.cpp
@@ -369,6 +369,8 @@ public class LlamaContext {
369
369
  params.hasKey("top_n_sigma") ? (float) params.getDouble("top_n_sigma") : -1.0f,
370
370
  // String[] dry_sequence_breakers, when undef, we use the default definition from common.h
371
371
  params.hasKey("dry_sequence_breakers") ? params.getArray("dry_sequence_breakers").toArrayList().toArray(new String[0]) : new String[]{"\n", ":", "\"", "*"},
372
+ // String[] media_paths
373
+ params.hasKey("media_paths") ? params.getArray("media_paths").toArrayList().toArray(new String[0]) : new String[0],
372
374
  // PartialCompletionCallback partial_completion_callback
373
375
  new PartialCompletionCallback(
374
376
  this,
@@ -389,10 +391,8 @@ public class LlamaContext {
389
391
  return isPredicting(this.context);
390
392
  }
391
393
 
392
- public WritableMap tokenize(String text) {
393
- WritableMap result = Arguments.createMap();
394
- result.putArray("tokens", tokenize(this.context, text));
395
- return result;
394
+ public WritableMap tokenize(String text, ReadableArray media_paths) {
395
+ return tokenize(this.context, text, media_paths == null ? new String[0] : media_paths.toArrayList().toArray(new String[0]));
396
396
  }
397
397
 
398
398
  public String detokenize(ReadableArray tokens) {
@@ -439,6 +439,34 @@ public class LlamaContext {
439
439
  return getLoadedLoraAdapters(this.context);
440
440
  }
441
441
 
442
+ public boolean initMultimodal(ReadableMap params) {
443
+ String mmprojPath = params.getString("path");
444
+ boolean mmprojUseGpu = params.hasKey("use_gpu") ? params.getBoolean("use_gpu") : true;
445
+ if (mmprojPath == null || mmprojPath.isEmpty()) {
446
+ throw new IllegalArgumentException("mmproj_path is empty");
447
+ }
448
+ File file = new File(mmprojPath);
449
+ if (!file.exists()) {
450
+ throw new IllegalArgumentException("mmproj file does not exist: " + mmprojPath);
451
+ }
452
+ return initMultimodal(this.context, mmprojPath, mmprojUseGpu);
453
+ }
454
+
455
+ public boolean isMultimodalEnabled() {
456
+ return isMultimodalEnabled(this.context);
457
+ }
458
+
459
+ public WritableMap getMultimodalSupport() {
460
+ if (!isMultimodalEnabled()) {
461
+ throw new IllegalStateException("Multimodal is not enabled");
462
+ }
463
+ return getMultimodalSupport(this.context);
464
+ }
465
+
466
+ public void releaseMultimodal() {
467
+ releaseMultimodal(this.context);
468
+ }
469
+
442
470
  public void release() {
443
471
  freeContext(context);
444
472
  }
@@ -563,6 +591,9 @@ public class LlamaContext {
563
591
  boolean ctx_shift,
564
592
  LoadProgressCallback load_progress_callback
565
593
  );
594
+ protected static native boolean initMultimodal(long contextPtr, String mmproj_path, boolean MMPROJ_USE_GPU);
595
+ protected static native boolean isMultimodalEnabled(long contextPtr);
596
+ protected static native WritableMap getMultimodalSupport(long contextPtr);
566
597
  protected static native void interruptLoad(long contextPtr);
567
598
  protected static native WritableMap loadModelDetails(
568
599
  long contextPtr
@@ -626,11 +657,12 @@ public class LlamaContext {
626
657
  int dry_penalty_last_n,
627
658
  float top_n_sigma,
628
659
  String[] dry_sequence_breakers,
660
+ String[] media_paths,
629
661
  PartialCompletionCallback partial_completion_callback
630
662
  );
631
663
  protected static native void stopCompletion(long contextPtr);
632
664
  protected static native boolean isPredicting(long contextPtr);
633
- protected static native WritableArray tokenize(long contextPtr, String text);
665
+ protected static native WritableMap tokenize(long contextPtr, String text, String[] media_paths);
634
666
  protected static native String detokenize(long contextPtr, int[] tokens);
635
667
  protected static native boolean isEmbeddingEnabled(long contextPtr);
636
668
  protected static native WritableMap embedding(
@@ -645,4 +677,5 @@ public class LlamaContext {
645
677
  protected static native void freeContext(long contextPtr);
646
678
  protected static native void setupLog(NativeLogCallback logCallback);
647
679
  protected static native void unsetLog();
680
+ protected static native void releaseMultimodal(long contextPtr);
648
681
  }
@@ -322,7 +322,7 @@ public class RNLlama implements LifecycleEventListener {
322
322
  tasks.put(task, "stopCompletion-" + contextId);
323
323
  }
324
324
 
325
- public void tokenizeAsync(double id, final String text, final Promise promise) {
325
+ public void tokenizeAsync(double id, final String text, final ReadableArray media_paths, final Promise promise) {
326
326
  final int contextId = (int) id;
327
327
  AsyncTask task = new AsyncTask<Void, Void, WritableMap>() {
328
328
  private Exception exception;
@@ -334,7 +334,7 @@ public class RNLlama implements LifecycleEventListener {
334
334
  if (context == null) {
335
335
  throw new Exception("Context not found");
336
336
  }
337
- return context.tokenize(text);
337
+ return context.tokenize(text, media_paths);
338
338
  } catch (Exception e) {
339
339
  exception = e;
340
340
  }
@@ -354,13 +354,13 @@ public class RNLlama implements LifecycleEventListener {
354
354
  tasks.put(task, "tokenize-" + contextId);
355
355
  }
356
356
 
357
- public WritableMap tokenizeSync(double id, final String text) {
357
+ public WritableMap tokenizeSync(double id, final String text, final ReadableArray image_paths) {
358
358
  int contextId = (int) id;
359
359
  LlamaContext context = contexts.get(contextId);
360
360
  if (context == null) {
361
361
  return Arguments.createMap();
362
362
  }
363
- return context.tokenize(text);
363
+ return context.tokenize(text, image_paths);
364
364
  }
365
365
 
366
366
  public void getCpuFeatures(Promise promise) {
@@ -597,6 +597,141 @@ public class RNLlama implements LifecycleEventListener {
597
597
  tasks.put(task, "getLoadedLoraAdapters-" + contextId);
598
598
  }
599
599
 
600
+ public void initMultimodal(double id, final ReadableMap params, final Promise promise) {
601
+ final int contextId = (int) id;
602
+ AsyncTask task = new AsyncTask<Void, Void, Boolean>() {
603
+ private Exception exception;
604
+
605
+ @Override
606
+ protected Boolean doInBackground(Void... voids) {
607
+ try {
608
+ LlamaContext context = contexts.get(contextId);
609
+ if (context == null) {
610
+ throw new Exception("Context not found");
611
+ }
612
+ if (context.isPredicting()) {
613
+ throw new Exception("Context is busy");
614
+ }
615
+ return context.initMultimodal(params);
616
+ } catch (Exception e) {
617
+ exception = e;
618
+ }
619
+ return false;
620
+ }
621
+
622
+ @Override
623
+ protected void onPostExecute(Boolean result) {
624
+ if (exception != null) {
625
+ promise.reject(exception);
626
+ return;
627
+ }
628
+ promise.resolve(result);
629
+ tasks.remove(this);
630
+ }
631
+ }.executeOnExecutor(AsyncTask.THREAD_POOL_EXECUTOR);
632
+ tasks.put(task, "initMultimodal-" + contextId);
633
+ }
634
+
635
+ public void isMultimodalEnabled(double id, final Promise promise) {
636
+ final int contextId = (int) id;
637
+ AsyncTask task = new AsyncTask<Void, Void, Boolean>() {
638
+ private Exception exception;
639
+
640
+ @Override
641
+ protected Boolean doInBackground(Void... voids) {
642
+ try {
643
+ LlamaContext context = contexts.get(contextId);
644
+ if (context == null) {
645
+ throw new Exception("Context not found");
646
+ }
647
+ return context.isMultimodalEnabled();
648
+ } catch (Exception e) {
649
+ exception = e;
650
+ }
651
+ return false;
652
+ }
653
+
654
+ @Override
655
+ protected void onPostExecute(Boolean result) {
656
+ if (exception != null) {
657
+ promise.reject(exception);
658
+ return;
659
+ }
660
+ promise.resolve(result);
661
+ tasks.remove(this);
662
+ }
663
+ }.executeOnExecutor(AsyncTask.THREAD_POOL_EXECUTOR);
664
+ tasks.put(task, "isMultimodalEnabled" + contextId);
665
+ }
666
+
667
+ public void getMultimodalSupport(double id, final Promise promise) {
668
+ final int contextId = (int) id;
669
+ AsyncTask task = new AsyncTask<Void, Void, WritableMap>() {
670
+ private Exception exception;
671
+
672
+ @Override
673
+ protected WritableMap doInBackground(Void... voids) {
674
+ try {
675
+ LlamaContext context = contexts.get(contextId);
676
+ if (context == null) {
677
+ throw new Exception("Context not found");
678
+ }
679
+ if (!context.isMultimodalEnabled()) {
680
+ throw new Exception("Multimodal is not enabled");
681
+ }
682
+ return context.getMultimodalSupport();
683
+ } catch (Exception e) {
684
+ exception = e;
685
+ }
686
+ return null;
687
+ }
688
+
689
+ @Override
690
+ protected void onPostExecute(WritableMap result) {
691
+ if (exception != null) {
692
+ promise.reject(exception);
693
+ return;
694
+ }
695
+ promise.resolve(result);
696
+ tasks.remove(this);
697
+ }
698
+ }.executeOnExecutor(AsyncTask.THREAD_POOL_EXECUTOR);
699
+ tasks.put(task, "getMultimodalSupport-" + contextId);
700
+ }
701
+
702
+ @ReactMethod
703
+ public void releaseMultimodal(double id, final Promise promise) {
704
+ final int contextId = (int) id;
705
+ AsyncTask task = new AsyncTask<Void, Void, Void>() {
706
+ private Exception exception;
707
+
708
+ @Override
709
+ protected Void doInBackground(Void... voids) {
710
+ try {
711
+ LlamaContext context = contexts.get(contextId);
712
+ if (context == null) {
713
+ throw new Exception("Context not found");
714
+ }
715
+ context.releaseMultimodal();
716
+ } catch (Exception e) {
717
+ exception = e;
718
+ }
719
+ return null;
720
+ }
721
+
722
+ @Override
723
+ protected void onPostExecute(Void result) {
724
+ if (exception != null) {
725
+ promise.reject(exception);
726
+ return;
727
+ }
728
+ promise.resolve(null);
729
+ tasks.remove(this);
730
+ }
731
+ }.executeOnExecutor(AsyncTask.THREAD_POOL_EXECUTOR);
732
+ tasks.put(task, "releaseMultimodal" + id);
733
+ }
734
+
600
735
  public void releaseContext(double id, Promise promise) {
601
736
  final int contextId = (int) id;
602
737
  AsyncTask task = new AsyncTask<Void, Void, Void>() {
@@ -600,6 +600,12 @@ Java_com_rnllama_LlamaContext_loadSession(
600
600
  llama->embd.resize(n_token_count_out);
601
601
  env->ReleaseStringUTFChars(path, path_chars);
602
602
 
603
+ // Find LLAMA_TOKEN_NULL in the tokens and resize the array to the index of the null token
604
+ auto null_token_iter = std::find(llama->embd.begin(), llama->embd.end(), LLAMA_TOKEN_NULL);
605
+ if (null_token_iter != llama->embd.end()) {
606
+ llama->embd.resize(std::distance(llama->embd.begin(), null_token_iter));
607
+ }
608
+
603
609
  const std::string text = rnllama::tokens_to_str(llama->ctx, llama->embd.cbegin(), llama->embd.cend());
604
610
  putInt(env, result, "tokens_loaded", n_token_count_out);
605
611
  putString(env, result, "prompt", text.c_str());
@@ -620,6 +626,13 @@ Java_com_rnllama_LlamaContext_saveSession(
620
626
  const char *path_chars = env->GetStringUTFChars(path, nullptr);
621
627
 
622
628
  std::vector<llama_token> session_tokens = llama->embd;
629
+
630
+ // Find LLAMA_TOKEN_NULL in the tokens and resize the array to the index of the null token
631
+ auto null_token_iter = std::find(session_tokens.begin(), session_tokens.end(), LLAMA_TOKEN_NULL);
632
+ if (null_token_iter != session_tokens.end()) {
633
+ session_tokens.resize(std::distance(session_tokens.begin(), null_token_iter));
634
+ }
635
+
623
636
  int default_size = session_tokens.size();
624
637
  int save_size = size > 0 && size <= default_size ? size : default_size;
625
638
  if (!llama_state_save_file(llama->ctx, path_chars, session_tokens.data(), save_size)) {
@@ -694,6 +707,7 @@ Java_com_rnllama_LlamaContext_doCompletion(
694
707
  jint dry_penalty_last_n,
695
708
  jfloat top_n_sigma,
696
709
  jobjectArray dry_sequence_breakers,
710
+ jobjectArray media_paths,
697
711
  jobject partial_completion_callback
698
712
  ) {
699
713
  UNUSED(thiz);
@@ -703,8 +717,32 @@ Java_com_rnllama_LlamaContext_doCompletion(
703
717
 
704
718
  //llama_reset_timings(llama->ctx);
705
719
 
706
- auto prompt_chars = env->GetStringUTFChars(prompt, nullptr);
720
+ const char *prompt_chars = env->GetStringUTFChars(prompt, nullptr);
721
+
722
+ // Set the prompt parameter
707
723
  llama->params.prompt = prompt_chars;
724
+
725
+ // Process image paths if provided
726
+ std::vector<std::string> media_paths_vector;
727
+
728
+ jint media_paths_size = env->GetArrayLength(media_paths);
729
+ if (media_paths_size > 0) {
730
+ // Check if multimodal is enabled
731
+ if (!llama->isMultimodalEnabled()) {
732
+ auto result = createWriteableMap(env);
733
+ putString(env, result, "error", "Multimodal support not enabled. Call initMultimodal first.");
734
+ env->ReleaseStringUTFChars(prompt, prompt_chars);
735
+ return reinterpret_cast<jobject>(result);
736
+ }
737
+
738
+ for (jint i = 0; i < media_paths_size; i++) {
739
+ jstring image_path = (jstring) env->GetObjectArrayElement(media_paths, i);
740
+ const char *image_path_chars = env->GetStringUTFChars(image_path, nullptr);
741
+ media_paths_vector.push_back(image_path_chars);
742
+ env->ReleaseStringUTFChars(image_path, image_path_chars);
743
+ }
744
+ }
745
+
708
746
  llama->params.sampling.seed = (seed == -1) ? time(NULL) : seed;
709
747
 
710
748
  int max_threads = std::thread::hardware_concurrency();
@@ -861,10 +899,19 @@ Java_com_rnllama_LlamaContext_doCompletion(
861
899
  putString(env, result, "error", "Failed to initialize sampling");
862
900
  return reinterpret_cast<jobject>(result);
863
901
  }
902
+
864
903
  llama->beginCompletion();
865
- llama->loadPrompt();
904
+ try {
905
+ llama->loadPrompt(media_paths_vector);
906
+ } catch (const std::exception &e) {
907
+ llama->endCompletion();
908
+ auto result = createWriteableMap(env);
909
+ putString(env, result, "error", e.what());
910
+ return reinterpret_cast<jobject>(result);
911
+ }
866
912
 
867
913
  if (llama->context_full) {
914
+ llama->endCompletion();
868
915
  auto result = createWriteableMap(env);
869
916
  putString(env, result, "error", "Context is full");
870
917
  return reinterpret_cast<jobject>(result);
@@ -931,9 +978,14 @@ Java_com_rnllama_LlamaContext_doCompletion(
931
978
  }
932
979
 
933
980
  env->ReleaseStringUTFChars(grammar, grammar_chars);
934
- env->ReleaseStringUTFChars(prompt, prompt_chars);
981
+
982
+ // Release prompt_chars if it's still allocated
983
+ if (prompt_chars != nullptr) {
984
+ env->ReleaseStringUTFChars(prompt, prompt_chars);
985
+ }
986
+
935
987
  llama_perf_context_print(llama->ctx);
936
- llama->is_predicting = false;
988
+ llama->endCompletion();
937
989
 
938
990
  auto toolCalls = createWritableArray(env);
939
991
  std::string reasoningContent = "";
@@ -1023,22 +1075,47 @@ Java_com_rnllama_LlamaContext_isPredicting(
1023
1075
 
1024
1076
  JNIEXPORT jobject JNICALL
1025
1077
  Java_com_rnllama_LlamaContext_tokenize(
1026
- JNIEnv *env, jobject thiz, jlong context_ptr, jstring text) {
1078
+ JNIEnv *env, jobject thiz, jlong context_ptr, jstring text, jobjectArray media_paths) {
1027
1079
  UNUSED(thiz);
1028
1080
  auto llama = context_map[(long) context_ptr];
1029
1081
 
1030
1082
  const char *text_chars = env->GetStringUTFChars(text, nullptr);
1083
+ std::vector<std::string> media_paths_vector;
1084
+ for (int i = 0; i < env->GetArrayLength(media_paths); i++) {
1085
+ jstring image_path = (jstring) env->GetObjectArrayElement(media_paths, i);
1086
+ const char *image_path_chars = env->GetStringUTFChars(image_path, nullptr);
1087
+ media_paths_vector.push_back(image_path_chars);
1088
+ env->ReleaseStringUTFChars(image_path, image_path_chars);
1089
+ }
1090
+ auto tokenize_result = llama->tokenize(text_chars, media_paths_vector);
1091
+
1092
+ auto result = createWriteableMap(env);
1093
+
1094
+ auto tokens = createWritableArray(env);
1095
+ for (const auto &tok : tokenize_result.tokens) {
1096
+ pushInt(env, tokens, tok);
1097
+ }
1098
+ putArray(env, result, "tokens", tokens);
1099
+
1100
+ putBoolean(env, result, "has_media", tokenize_result.has_media);
1031
1101
 
1032
- const std::vector<llama_token> toks = common_tokenize(
1033
- llama->ctx,
1034
- text_chars,
1035
- false
1036
- );
1102
+ auto bitmap_hashes = createWritableArray(env);
1103
+ for (const auto &hash : tokenize_result.bitmap_hashes) {
1104
+ pushString(env, bitmap_hashes, hash.c_str());
1105
+ }
1106
+ putArray(env, result, "bitmap_hashes", bitmap_hashes);
1107
+
1108
+ auto chunk_pos = createWritableArray(env);
1109
+ for (const auto &pos : tokenize_result.chunk_pos) {
1110
+ pushInt(env, chunk_pos, pos);
1111
+ }
1112
+ putArray(env, result, "chunk_pos", chunk_pos);
1037
1113
 
1038
- jobject result = createWritableArray(env);
1039
- for (const auto &tok : toks) {
1040
- pushInt(env, result, tok);
1114
+ auto chunk_pos_media = createWritableArray(env);
1115
+ for (const auto &pos : tokenize_result.chunk_pos_media) {
1116
+ pushInt(env, chunk_pos_media, pos);
1041
1117
  }
1118
+ putArray(env, result, "chunk_pos_media", chunk_pos_media);
1042
1119
 
1043
1120
  env->ReleaseStringUTFChars(text, text_chars);
1044
1121
  return result;
@@ -1107,7 +1184,12 @@ Java_com_rnllama_LlamaContext_embedding(
1107
1184
  }
1108
1185
 
1109
1186
  llama->beginCompletion();
1110
- llama->loadPrompt();
1187
+ try {
1188
+ llama->loadPrompt({});
1189
+ } catch (const std::exception &e) {
1190
+ putString(env, result, "error", e.what());
1191
+ return reinterpret_cast<jobject>(result);
1192
+ }
1111
1193
  llama->doCompletion();
1112
1194
 
1113
1195
  std::vector<float> embedding = llama->getEmbedding(embdParams);
@@ -1276,4 +1358,61 @@ Java_com_rnllama_LlamaContext_unsetLog(JNIEnv *env, jobject thiz) {
1276
1358
  llama_log_set(rnllama_log_callback_default, NULL);
1277
1359
  }
1278
1360
 
1361
+ JNIEXPORT jboolean JNICALL
1362
+ Java_com_rnllama_LlamaContext_initMultimodal(
1363
+ JNIEnv *env,
1364
+ jobject thiz,
1365
+ jlong context_ptr,
1366
+ jstring mmproj_path,
1367
+ jboolean mmproj_use_gpu
1368
+ ) {
1369
+ UNUSED(thiz);
1370
+ auto llama = context_map[(long) context_ptr];
1371
+
1372
+ const char *mmproj_path_chars = env->GetStringUTFChars(mmproj_path, nullptr);
1373
+ bool result = llama->initMultimodal(mmproj_path_chars, mmproj_use_gpu);
1374
+ env->ReleaseStringUTFChars(mmproj_path, mmproj_path_chars);
1375
+
1376
+ return result;
1377
+ }
1378
+
1379
+ JNIEXPORT jboolean JNICALL
1380
+ Java_com_rnllama_LlamaContext_isMultimodalEnabled(
1381
+ JNIEnv *env,
1382
+ jobject thiz,
1383
+ jlong context_ptr
1384
+ ) {
1385
+ UNUSED(env);
1386
+ UNUSED(thiz);
1387
+ auto llama = context_map[(long) context_ptr];
1388
+ return llama->isMultimodalEnabled();
1389
+ }
1390
+
1391
+ JNIEXPORT jobject JNICALL
1392
+ Java_com_rnllama_LlamaContext_getMultimodalSupport(
1393
+ JNIEnv *env,
1394
+ jobject thiz,
1395
+ jlong context_ptr
1396
+ ) {
1397
+ UNUSED(env);
1398
+ UNUSED(thiz);
1399
+ auto llama = context_map[(long) context_ptr];
1400
+ auto result = createWriteableMap(env);
1401
+ putBoolean(env, result, "vision", llama->isMultimodalSupportVision());
1402
+ putBoolean(env, result, "audio", llama->isMultimodalSupportAudio());
1403
+ return result;
1404
+ }
1405
+
1406
+ JNIEXPORT void JNICALL
1407
+ Java_com_rnllama_LlamaContext_releaseMultimodal(
1408
+ JNIEnv *env,
1409
+ jobject thiz,
1410
+ jlong context_ptr
1411
+ ) {
1412
+ UNUSED(env);
1413
+ UNUSED(thiz);
1414
+ auto llama = context_map[(long) context_ptr];
1415
+ llama->releaseMultimodal();
1416
+ }
1417
+
1279
1418
  } // extern "C"
@@ -53,6 +53,26 @@ public class RNLlamaModule extends NativeRNLlamaSpec {
53
53
  rnllama.initContext(id, params, promise);
54
54
  }
55
55
 
56
+ @ReactMethod
57
+ public void initMultimodal(double id, final ReadableMap params, final Promise promise) {
58
+ rnllama.initMultimodal(id, params, promise);
59
+ }
60
+
61
+ @ReactMethod
62
+ public void isMultimodalEnabled(double id, final Promise promise) {
63
+ rnllama.isMultimodalEnabled(id, promise);
64
+ }
65
+
66
+ @ReactMethod
67
+ public void getMultimodalSupport(double id, final Promise promise) {
68
+ rnllama.getMultimodalSupport(id, promise);
69
+ }
70
+
71
+ @ReactMethod
72
+ public void releaseMultimodal(double id, final Promise promise) {
73
+ rnllama.releaseMultimodal(id, promise);
74
+ }
75
+
56
76
  @ReactMethod
57
77
  public void getFormattedChat(double id, String messages, String chatTemplate, ReadableMap params, Promise promise) {
58
78
  rnllama.getFormattedChat(id, messages, chatTemplate, params, promise);
@@ -79,13 +99,13 @@ public class RNLlamaModule extends NativeRNLlamaSpec {
79
99
  }
80
100
 
81
101
  @ReactMethod
82
- public void tokenizeAsync(double id, final String text, final Promise promise) {
83
- rnllama.tokenizeAsync(id, text, promise);
102
+ public void tokenizeAsync(double id, final String text, final ReadableArray media_paths, final Promise promise) {
103
+ rnllama.tokenizeAsync(id, text, media_paths, promise);
84
104
  }
85
105
 
86
106
  @ReactMethod(isBlockingSynchronousMethod=true)
87
- public WritableMap tokenizeSync(double id, final String text) {
88
- return rnllama.tokenizeSync(id, text);
107
+ public WritableMap tokenizeSync(double id, final String text, final ReadableArray media_paths) {
108
+ return rnllama.tokenizeSync(id, text, media_paths);
89
109
  }
90
110
 
91
111
  @ReactMethod
@@ -54,6 +54,26 @@ public class RNLlamaModule extends ReactContextBaseJavaModule {
54
54
  rnllama.initContext(id, params, promise);
55
55
  }
56
56
 
57
+ @ReactMethod
58
+ public void initMultimodal(double id, final ReadableMap params, final Promise promise) {
59
+ rnllama.initMultimodal(id, params, promise);
60
+ }
61
+
62
+ @ReactMethod
63
+ public void isMultimodalEnabled(double id, final Promise promise) {
64
+ rnllama.isMultimodalEnabled(id, promise);
65
+ }
66
+
67
+ @ReactMethod
68
+ public void getMultimodalSupport(double id, final Promise promise) {
69
+ rnllama.getMultimodalSupport(id, promise);
70
+ }
71
+
72
+ @ReactMethod
73
+ public void releaseMultimodal(double id, final Promise promise) {
74
+ rnllama.releaseMultimodal(id, promise);
75
+ }
76
+
57
77
  @ReactMethod
58
78
  public void getFormattedChat(double id, String messages, String chatTemplate, ReadableMap params, Promise promise) {
59
79
  rnllama.getFormattedChat(id, messages, chatTemplate, params, promise);
@@ -80,12 +100,12 @@ public class RNLlamaModule extends ReactContextBaseJavaModule {
80
100
  }
81
101
 
82
102
  @ReactMethod
83
- public void tokenizeAsync(double id, final String text, final Promise promise) {
103
+ public void tokenizeAsync(double id, final String text, final ReadableArray image_paths, final Promise promise) {
84
104
  rnllama.tokenizeAsync(id, text, promise);
85
105
  }
86
106
 
87
107
  @ReactMethod(isBlockingSynchronousMethod=true)
88
- public WritableMap tokenizeSync(double id, final String text) {
108
+ public WritableMap tokenizeSync(double id, final String text, final ReadableArray image_paths) {
89
109
  return rnllama.tokenizeSync(id, text);
90
110
  }
91
111