cui-llama.rn 1.6.1 → 1.7.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (196) hide show
  1. package/android/src/main/CMakeLists.txt +6 -0
  2. package/android/src/main/java/com/rnllama/LlamaContext.java +51 -14
  3. package/android/src/main/java/com/rnllama/RNLlama.java +158 -6
  4. package/android/src/main/jni.cpp +153 -14
  5. package/android/src/main/jniLibs/arm64-v8a/librnllama.so +0 -0
  6. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8.so +0 -0
  7. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2.so +0 -0
  8. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_dotprod.so +0 -0
  9. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_dotprod_i8mm.so +0 -0
  10. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_i8mm.so +0 -0
  11. package/android/src/main/jniLibs/x86_64/librnllama.so +0 -0
  12. package/android/src/main/jniLibs/x86_64/librnllama_x86_64.so +0 -0
  13. package/android/src/newarch/java/com/rnllama/RNLlamaModule.java +24 -4
  14. package/android/src/oldarch/java/com/rnllama/RNLlamaModule.java +22 -2
  15. package/cpp/chat.cpp +128 -106
  16. package/cpp/chat.h +2 -0
  17. package/cpp/common.cpp +38 -76
  18. package/cpp/common.h +23 -19
  19. package/cpp/ggml-backend.cpp +9 -5
  20. package/cpp/ggml-backend.h +4 -4
  21. package/cpp/ggml-cpu/ggml-cpu-aarch64.cpp +0 -2
  22. package/cpp/ggml-cpu/ggml-cpu-quants.c +306 -6
  23. package/cpp/ggml-cpu/ggml-cpu.c +5 -13
  24. package/cpp/ggml-cpu/ggml-cpu.cpp +29 -16
  25. package/cpp/ggml-cpu/ops.cpp +107 -13
  26. package/cpp/ggml-cpu/vec.cpp +0 -6
  27. package/cpp/ggml-cpu/vec.h +16 -0
  28. package/cpp/ggml-llama-sim.metallib +0 -0
  29. package/cpp/ggml-llama.metallib +0 -0
  30. package/cpp/ggml-metal-impl.h +36 -11
  31. package/cpp/ggml-metal.m +321 -132
  32. package/cpp/ggml-opt.cpp +373 -190
  33. package/cpp/ggml-opt.h +49 -28
  34. package/cpp/ggml-quants.c +0 -6
  35. package/cpp/ggml.c +93 -38
  36. package/cpp/ggml.h +21 -7
  37. package/cpp/gguf.cpp +33 -33
  38. package/cpp/llama-adapter.cpp +6 -0
  39. package/cpp/llama-arch.cpp +3 -0
  40. package/cpp/llama-batch.cpp +3 -1
  41. package/cpp/llama-chat.cpp +8 -6
  42. package/cpp/llama-chat.h +1 -0
  43. package/cpp/llama-context.cpp +349 -135
  44. package/cpp/llama-context.h +30 -3
  45. package/cpp/llama-cparams.h +1 -0
  46. package/cpp/llama-graph.cpp +150 -234
  47. package/cpp/llama-graph.h +52 -7
  48. package/cpp/llama-hparams.cpp +17 -1
  49. package/cpp/llama-hparams.h +34 -5
  50. package/cpp/llama-kv-cache.cpp +662 -321
  51. package/cpp/llama-kv-cache.h +203 -93
  52. package/cpp/llama-memory.h +3 -2
  53. package/cpp/llama-model-loader.cpp +24 -15
  54. package/cpp/llama-model-saver.cpp +281 -0
  55. package/cpp/llama-model-saver.h +37 -0
  56. package/cpp/llama-model.cpp +536 -132
  57. package/cpp/llama-model.h +7 -1
  58. package/cpp/llama-sampling.cpp +18 -6
  59. package/cpp/llama-vocab.cpp +46 -8
  60. package/cpp/llama-vocab.h +6 -0
  61. package/cpp/llama.cpp +14 -0
  62. package/cpp/llama.h +72 -131
  63. package/cpp/minja/chat-template.hpp +9 -5
  64. package/cpp/minja/minja.hpp +69 -36
  65. package/cpp/rn-llama.cpp +611 -47
  66. package/cpp/rn-llama.h +33 -3
  67. package/cpp/sampling.cpp +57 -50
  68. package/cpp/tools/mtmd/clip-impl.h +462 -0
  69. package/cpp/tools/mtmd/clip.cpp +4024 -0
  70. package/cpp/tools/mtmd/clip.h +101 -0
  71. package/cpp/tools/mtmd/miniaudio.h +93468 -0
  72. package/cpp/tools/mtmd/mtmd-audio.cpp +855 -0
  73. package/cpp/tools/mtmd/mtmd-audio.h +62 -0
  74. package/cpp/tools/mtmd/mtmd-helper.cpp +297 -0
  75. package/cpp/tools/mtmd/mtmd.cpp +942 -0
  76. package/cpp/tools/mtmd/mtmd.h +362 -0
  77. package/cpp/tools/mtmd/stb_image.h +7988 -0
  78. package/ios/CMakeLists.txt +7 -0
  79. package/ios/RNLlama.mm +77 -3
  80. package/ios/RNLlamaContext.h +5 -1
  81. package/ios/RNLlamaContext.mm +105 -10
  82. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/chat.h +2 -0
  83. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/common.h +23 -19
  84. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-backend.h +4 -4
  85. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-metal-impl.h +36 -11
  86. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-opt.h +49 -28
  87. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml.h +21 -7
  88. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-chat.h +1 -0
  89. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-context.h +30 -3
  90. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-cparams.h +1 -0
  91. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-graph.h +52 -7
  92. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-hparams.h +34 -5
  93. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-kv-cache.h +203 -93
  94. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-memory.h +3 -2
  95. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-model-saver.h +37 -0
  96. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-model.h +7 -1
  97. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-vocab.h +6 -0
  98. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama.h +72 -131
  99. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/minja/chat-template.hpp +9 -5
  100. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/minja/minja.hpp +69 -36
  101. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/rn-llama.h +33 -3
  102. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Info.plist +0 -0
  103. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/ggml-llama.metallib +0 -0
  104. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/rnllama +0 -0
  105. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/chat.h +2 -0
  106. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/common.h +23 -19
  107. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-backend.h +4 -4
  108. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-metal-impl.h +36 -11
  109. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-opt.h +49 -28
  110. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml.h +21 -7
  111. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-chat.h +1 -0
  112. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-context.h +30 -3
  113. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-cparams.h +1 -0
  114. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-graph.h +52 -7
  115. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-hparams.h +34 -5
  116. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cache.h +203 -93
  117. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-memory.h +3 -2
  118. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-model-saver.h +37 -0
  119. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-model.h +7 -1
  120. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-vocab.h +6 -0
  121. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama.h +72 -131
  122. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/minja/chat-template.hpp +9 -5
  123. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/minja/minja.hpp +69 -36
  124. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/rn-llama.h +33 -3
  125. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Info.plist +0 -0
  126. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/_CodeSignature/CodeResources +1 -1
  127. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/ggml-llama-sim.metallib +0 -0
  128. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/rnllama +0 -0
  129. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/chat.h +2 -0
  130. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/common.h +23 -19
  131. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-backend.h +4 -4
  132. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-metal-impl.h +36 -11
  133. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-opt.h +49 -28
  134. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml.h +21 -7
  135. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-chat.h +1 -0
  136. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-context.h +30 -3
  137. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-cparams.h +1 -0
  138. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-graph.h +52 -7
  139. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-hparams.h +34 -5
  140. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-kv-cache.h +203 -93
  141. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-memory.h +3 -2
  142. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-model-saver.h +37 -0
  143. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-model.h +7 -1
  144. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-vocab.h +6 -0
  145. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama.h +72 -131
  146. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/minja/chat-template.hpp +9 -5
  147. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/minja/minja.hpp +69 -36
  148. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/rn-llama.h +33 -3
  149. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Info.plist +0 -0
  150. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/ggml-llama.metallib +0 -0
  151. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/rnllama +0 -0
  152. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/chat.h +2 -0
  153. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/common.h +23 -19
  154. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-backend.h +4 -4
  155. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-metal-impl.h +36 -11
  156. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-opt.h +49 -28
  157. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml.h +21 -7
  158. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-chat.h +1 -0
  159. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-context.h +30 -3
  160. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-cparams.h +1 -0
  161. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-graph.h +52 -7
  162. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-hparams.h +34 -5
  163. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cache.h +203 -93
  164. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-memory.h +3 -2
  165. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-model-saver.h +37 -0
  166. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-model.h +7 -1
  167. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-vocab.h +6 -0
  168. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama.h +72 -131
  169. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/minja/chat-template.hpp +9 -5
  170. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/minja/minja.hpp +69 -36
  171. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/rn-llama.h +33 -3
  172. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Info.plist +0 -0
  173. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/_CodeSignature/CodeResources +1 -1
  174. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/ggml-llama-sim.metallib +0 -0
  175. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/rnllama +0 -0
  176. package/jest/mock.js +33 -7
  177. package/lib/commonjs/NativeRNLlama.js.map +1 -1
  178. package/lib/commonjs/index.js +153 -21
  179. package/lib/commonjs/index.js.map +1 -1
  180. package/lib/module/NativeRNLlama.js.map +1 -1
  181. package/lib/module/index.js +152 -20
  182. package/lib/module/index.js.map +1 -1
  183. package/lib/typescript/NativeRNLlama.d.ts +50 -4
  184. package/lib/typescript/NativeRNLlama.d.ts.map +1 -1
  185. package/lib/typescript/index.d.ts +72 -6
  186. package/lib/typescript/index.d.ts.map +1 -1
  187. package/package.json +1 -1
  188. package/src/NativeRNLlama.ts +67 -4
  189. package/src/index.ts +212 -38
  190. package/lib/commonjs/chat.js +0 -37
  191. package/lib/commonjs/chat.js.map +0 -1
  192. package/lib/module/chat.js +0 -33
  193. package/lib/module/chat.js.map +0 -1
  194. package/lib/typescript/chat.d.ts +0 -10
  195. package/lib/typescript/chat.d.ts.map +0 -1
  196. package/src/chat.ts +0 -44
@@ -14,6 +14,7 @@ set(RNLLAMA_LIB_DIR ${CMAKE_SOURCE_DIR}/../../../cpp)
14
14
  include_directories(
15
15
  ${RNLLAMA_LIB_DIR}
16
16
  ${RNLLAMA_LIB_DIR}/ggml-cpu
17
+ ${RNLLAMA_LIB_DIR}/tools/mtmd
17
18
  )
18
19
 
19
20
  set(
@@ -40,6 +41,11 @@ set(
40
41
  ${RNLLAMA_LIB_DIR}/gguf.cpp
41
42
  ${RNLLAMA_LIB_DIR}/log.cpp
42
43
  ${RNLLAMA_LIB_DIR}/llama-impl.cpp
44
+ # Multimodal support
45
+ ${RNLLAMA_LIB_DIR}/tools/mtmd/mtmd.cpp
46
+ ${RNLLAMA_LIB_DIR}/tools/mtmd/mtmd-audio.cpp
47
+ ${RNLLAMA_LIB_DIR}/tools/mtmd/clip.cpp
48
+ ${RNLLAMA_LIB_DIR}/tools/mtmd/mtmd-helper.cpp
43
49
  ${RNLLAMA_LIB_DIR}/llama-grammar.cpp
44
50
  ${RNLLAMA_LIB_DIR}/llama-sampling.cpp
45
51
  ${RNLLAMA_LIB_DIR}/llama-vocab.cpp
@@ -112,15 +112,7 @@ public class LlamaContext {
112
112
  throw new IllegalArgumentException("File is not in GGUF format");
113
113
  }
114
114
 
115
- if ( modelName.startsWith("content://")) {
116
- Uri uri = Uri.parse(modelName);
117
- try {
118
- ParcelFileDescriptor pfd = reactContext.getApplicationContext().getContentResolver().openFileDescriptor(uri, "r");
119
- modelName = "" + pfd.getFd();
120
- } catch (Exception e) {
121
- Log.e(NAME, "Failed to convert to FD!");
122
- }
123
- }
115
+ modelName = getContentFileDescriptor(modelName);
124
116
 
125
117
  // Check if file has GGUF magic numbers
126
118
  this.id = id;
@@ -369,6 +361,8 @@ public class LlamaContext {
369
361
  params.hasKey("top_n_sigma") ? (float) params.getDouble("top_n_sigma") : -1.0f,
370
362
  // String[] dry_sequence_breakers, when undef, we use the default definition from common.h
371
363
  params.hasKey("dry_sequence_breakers") ? params.getArray("dry_sequence_breakers").toArrayList().toArray(new String[0]) : new String[]{"\n", ":", "\"", "*"},
364
+ // String[] media_paths
365
+ params.hasKey("media_paths") ? params.getArray("media_paths").toArrayList().toArray(new String[0]) : new String[0],
372
366
  // PartialCompletionCallback partial_completion_callback
373
367
  new PartialCompletionCallback(
374
368
  this,
@@ -389,10 +383,8 @@ public class LlamaContext {
389
383
  return isPredicting(this.context);
390
384
  }
391
385
 
392
- public WritableMap tokenize(String text) {
393
- WritableMap result = Arguments.createMap();
394
- result.putArray("tokens", tokenize(this.context, text));
395
- return result;
386
+ public WritableMap tokenize(String text, ReadableArray media_paths) {
387
+ return tokenize(this.context, text, media_paths == null ? new String[0] : media_paths.toArrayList().toArray(new String[0]));
396
388
  }
397
389
 
398
390
  public String detokenize(ReadableArray tokens) {
@@ -439,10 +431,50 @@ public class LlamaContext {
439
431
  return getLoadedLoraAdapters(this.context);
440
432
  }
441
433
 
434
+ public boolean initMultimodal(ReadableMap params) {
435
+ String mmprojPath = params.getString("path");
436
+ boolean mmprojUseGpu = params.hasKey("use_gpu") ? params.getBoolean("use_gpu") : true;
437
+ if (mmprojPath == null || mmprojPath.isEmpty()) {
438
+ throw new IllegalArgumentException("mmproj_path is empty");
439
+ }
440
+ File file = new File(mmprojPath);
441
+ if (!file.exists()) {
442
+ throw new IllegalArgumentException("mmproj file does not exist: " + mmprojPath);
443
+ }
444
+ return initMultimodal(this.context, mmprojPath, mmprojUseGpu);
445
+ }
446
+
447
+ public boolean isMultimodalEnabled() {
448
+ return isMultimodalEnabled(this.context);
449
+ }
450
+
451
+ public WritableMap getMultimodalSupport() {
452
+ if (!isMultimodalEnabled()) {
453
+ throw new IllegalStateException("Multimodal is not enabled");
454
+ }
455
+ return getMultimodalSupport(this.context);
456
+ }
457
+
458
+ public void releaseMultimodal() {
459
+ releaseMultimodal(this.context);
460
+ }
461
+
442
462
  public void release() {
443
463
  freeContext(context);
444
464
  }
445
465
 
466
+ private String getContentFileDescriptor(String modelName) {
467
+ if (!modelName.startsWith("content://")) return modelName;
468
+ Uri uri = Uri.parse(modelName);
469
+ try {
470
+ ParcelFileDescriptor pfd = reactContext.getApplicationContext().getContentResolver().openFileDescriptor(uri, "r");
471
+ return "" + pfd.getFd();
472
+ } catch (Exception e) {
473
+ Log.e(NAME, "Failed to convert to FD!");
474
+ }
475
+ return modelName;
476
+ }
477
+
446
478
  static {
447
479
  Log.d(NAME, "Primary ABI: " + Build.SUPPORTED_ABIS[0]);
448
480
 
@@ -563,6 +595,9 @@ public class LlamaContext {
563
595
  boolean ctx_shift,
564
596
  LoadProgressCallback load_progress_callback
565
597
  );
598
+ protected static native boolean initMultimodal(long contextPtr, String mmproj_path, boolean MMPROJ_USE_GPU);
599
+ protected static native boolean isMultimodalEnabled(long contextPtr);
600
+ protected static native WritableMap getMultimodalSupport(long contextPtr);
566
601
  protected static native void interruptLoad(long contextPtr);
567
602
  protected static native WritableMap loadModelDetails(
568
603
  long contextPtr
@@ -626,11 +661,12 @@ public class LlamaContext {
626
661
  int dry_penalty_last_n,
627
662
  float top_n_sigma,
628
663
  String[] dry_sequence_breakers,
664
+ String[] media_paths,
629
665
  PartialCompletionCallback partial_completion_callback
630
666
  );
631
667
  protected static native void stopCompletion(long contextPtr);
632
668
  protected static native boolean isPredicting(long contextPtr);
633
- protected static native WritableArray tokenize(long contextPtr, String text);
669
+ protected static native WritableMap tokenize(long contextPtr, String text, String[] media_paths);
634
670
  protected static native String detokenize(long contextPtr, int[] tokens);
635
671
  protected static native boolean isEmbeddingEnabled(long contextPtr);
636
672
  protected static native WritableMap embedding(
@@ -645,4 +681,5 @@ public class LlamaContext {
645
681
  protected static native void freeContext(long contextPtr);
646
682
  protected static native void setupLog(NativeLogCallback logCallback);
647
683
  protected static native void unsetLog();
684
+ protected static native void releaseMultimodal(long contextPtr);
648
685
  }
@@ -5,6 +5,8 @@ import android.util.Log;
5
5
  import android.os.Build;
6
6
  import android.os.Handler;
7
7
  import android.os.AsyncTask;
8
+ import android.os.ParcelFileDescriptor;
9
+ import android.net.Uri;
8
10
 
9
11
  import com.facebook.react.bridge.Promise;
10
12
  import com.facebook.react.bridge.ReactApplicationContext;
@@ -15,6 +17,7 @@ import com.facebook.react.bridge.ReadableArray;
15
17
  import com.facebook.react.bridge.WritableMap;
16
18
  import com.facebook.react.bridge.Arguments;
17
19
 
20
+
18
21
  import java.util.HashMap;
19
22
  import java.util.Random;
20
23
  import java.io.File;
@@ -68,10 +71,24 @@ public class RNLlama implements LifecycleEventListener {
68
71
  promise.resolve(null);
69
72
  }
70
73
 
74
+ private String getContentFileDescriptor(String modelName) {
75
+ if (!modelName.startsWith("content://")) return modelName;
76
+ Uri uri = Uri.parse(modelName);
77
+ try {
78
+ ParcelFileDescriptor pfd = reactContext.getApplicationContext().getContentResolver().openFileDescriptor(uri, "r");
79
+ return "" + pfd.getFd();
80
+ } catch (Exception e) {
81
+ Log.e(NAME, "Failed to convert to FD!");
82
+ }
83
+ return modelName;
84
+ }
85
+
71
86
  public void modelInfo(final String model, final ReadableArray skip, final Promise promise) {
87
+ final String modelPath = getContentFileDescriptor(model);
88
+
72
89
  new AsyncTask<Void, Void, WritableMap>() {
73
90
  private Exception exception;
74
-
91
+
75
92
  @Override
76
93
  protected WritableMap doInBackground(Void... voids) {
77
94
  try {
@@ -79,7 +96,7 @@ public class RNLlama implements LifecycleEventListener {
79
96
  for (int i = 0; i < skip.size(); i++) {
80
97
  skipArray[i] = skip.getString(i);
81
98
  }
82
- return LlamaContext.modelInfo(model, skipArray);
99
+ return LlamaContext.modelInfo(modelPath, skipArray);
83
100
  } catch (Exception e) {
84
101
  exception = e;
85
102
  }
@@ -322,7 +339,7 @@ public class RNLlama implements LifecycleEventListener {
322
339
  tasks.put(task, "stopCompletion-" + contextId);
323
340
  }
324
341
 
325
- public void tokenizeAsync(double id, final String text, final Promise promise) {
342
+ public void tokenizeAsync(double id, final String text, final ReadableArray media_paths, final Promise promise) {
326
343
  final int contextId = (int) id;
327
344
  AsyncTask task = new AsyncTask<Void, Void, WritableMap>() {
328
345
  private Exception exception;
@@ -334,7 +351,7 @@ public class RNLlama implements LifecycleEventListener {
334
351
  if (context == null) {
335
352
  throw new Exception("Context not found");
336
353
  }
337
- return context.tokenize(text);
354
+ return context.tokenize(text, media_paths);
338
355
  } catch (Exception e) {
339
356
  exception = e;
340
357
  }
@@ -354,13 +371,13 @@ public class RNLlama implements LifecycleEventListener {
354
371
  tasks.put(task, "tokenize-" + contextId);
355
372
  }
356
373
 
357
- public WritableMap tokenizeSync(double id, final String text) {
374
+ public WritableMap tokenizeSync(double id, final String text, final ReadableArray image_paths) {
358
375
  int contextId = (int) id;
359
376
  LlamaContext context = contexts.get(contextId);
360
377
  if (context == null) {
361
378
  return Arguments.createMap();
362
379
  }
363
- return context.tokenize(text);
380
+ return context.tokenize(text, image_paths);
364
381
  }
365
382
 
366
383
  public void getCpuFeatures(Promise promise) {
@@ -597,6 +614,141 @@ public class RNLlama implements LifecycleEventListener {
597
614
  tasks.put(task, "getLoadedLoraAdapters-" + contextId);
598
615
  }
599
616
 
617
+ public void initMultimodal(double id, final ReadableMap params, final Promise promise) {
618
+ final int contextId = (int) id;
619
+ AsyncTask task = new AsyncTask<Void, Void, Boolean>() {
620
+ private Exception exception;
621
+
622
+ @Override
623
+ protected Boolean doInBackground(Void... voids) {
624
+ try {
625
+ LlamaContext context = contexts.get(contextId);
626
+ if (context == null) {
627
+ throw new Exception("Context not found");
628
+ }
629
+ if (context.isPredicting()) {
630
+ throw new Exception("Context is busy");
631
+ }
632
+ return context.initMultimodal(params);
633
+ } catch (Exception e) {
634
+ exception = e;
635
+ }
636
+ return false;
637
+ }
638
+
639
+ @Override
640
+ protected void onPostExecute(Boolean result) {
641
+ if (exception != null) {
642
+ promise.reject(exception);
643
+ return;
644
+ }
645
+ promise.resolve(result);
646
+ tasks.remove(this);
647
+ }
648
+ }.executeOnExecutor(AsyncTask.THREAD_POOL_EXECUTOR);
649
+ tasks.put(task, "initMultimodal-" + contextId);
650
+ }
651
+
652
+ public void isMultimodalEnabled(double id, final Promise promise) {
653
+ final int contextId = (int) id;
654
+ AsyncTask task = new AsyncTask<Void, Void, Boolean>() {
655
+ private Exception exception;
656
+
657
+ @Override
658
+ protected Boolean doInBackground(Void... voids) {
659
+ try {
660
+ LlamaContext context = contexts.get(contextId);
661
+ if (context == null) {
662
+ throw new Exception("Context not found");
663
+ }
664
+ return context.isMultimodalEnabled();
665
+ } catch (Exception e) {
666
+ exception = e;
667
+ }
668
+ return false;
669
+ }
670
+
671
+ @Override
672
+ protected void onPostExecute(Boolean result) {
673
+ if (exception != null) {
674
+ promise.reject(exception);
675
+ return;
676
+ }
677
+ promise.resolve(result);
678
+ tasks.remove(this);
679
+ }
680
+ }.executeOnExecutor(AsyncTask.THREAD_POOL_EXECUTOR);
681
+ tasks.put(task, "isMultimodalEnabled" + contextId);
682
+ }
683
+
684
+ public void getMultimodalSupport(double id, final Promise promise) {
685
+ final int contextId = (int) id;
686
+ AsyncTask task = new AsyncTask<Void, Void, WritableMap>() {
687
+ private Exception exception;
688
+
689
+ @Override
690
+ protected WritableMap doInBackground(Void... voids) {
691
+ try {
692
+ LlamaContext context = contexts.get(contextId);
693
+ if (context == null) {
694
+ throw new Exception("Context not found");
695
+ }
696
+ if (!context.isMultimodalEnabled()) {
697
+ throw new Exception("Multimodal is not enabled");
698
+ }
699
+ return context.getMultimodalSupport();
700
+ } catch (Exception e) {
701
+ exception = e;
702
+ }
703
+ return null;
704
+ }
705
+
706
+ @Override
707
+ protected void onPostExecute(WritableMap result) {
708
+ if (exception != null) {
709
+ promise.reject(exception);
710
+ return;
711
+ }
712
+ promise.resolve(result);
713
+ tasks.remove(this);
714
+ }
715
+ }.executeOnExecutor(AsyncTask.THREAD_POOL_EXECUTOR);
716
+ tasks.put(task, "getMultimodalSupport-" + contextId);
717
+ }
718
+
719
+ @ReactMethod
720
+ public void releaseMultimodal(double id, final Promise promise) {
721
+ final int contextId = (int) id;
722
+ AsyncTask task = new AsyncTask<Void, Void, Void>() {
723
+ private Exception exception;
724
+
725
+ @Override
726
+ protected Void doInBackground(Void... voids) {
727
+ try {
728
+ LlamaContext context = contexts.get(contextId);
729
+ if (context == null) {
730
+ throw new Exception("Context not found");
731
+ }
732
+ context.releaseMultimodal();
733
+ } catch (Exception e) {
734
+ exception = e;
735
+ }
736
+ return null;
737
+ }
738
+
739
+ @Override
740
+ protected void onPostExecute(Void result) {
741
+ if (exception != null) {
742
+ promise.reject(exception);
743
+ return;
744
+ }
745
+ promise.resolve(null);
746
+ tasks.remove(this);
747
+ }
748
+ }.executeOnExecutor(AsyncTask.THREAD_POOL_EXECUTOR);
749
+ tasks.put(task, "releaseMultimodal" + id);
750
+ }
751
+
600
752
  public void releaseContext(double id, Promise promise) {
601
753
  final int contextId = (int) id;
602
754
  AsyncTask task = new AsyncTask<Void, Void, Void>() {
@@ -600,6 +600,12 @@ Java_com_rnllama_LlamaContext_loadSession(
600
600
  llama->embd.resize(n_token_count_out);
601
601
  env->ReleaseStringUTFChars(path, path_chars);
602
602
 
603
+ // Find LLAMA_TOKEN_NULL in the tokens and resize the array to the index of the null token
604
+ auto null_token_iter = std::find(llama->embd.begin(), llama->embd.end(), LLAMA_TOKEN_NULL);
605
+ if (null_token_iter != llama->embd.end()) {
606
+ llama->embd.resize(std::distance(llama->embd.begin(), null_token_iter));
607
+ }
608
+
603
609
  const std::string text = rnllama::tokens_to_str(llama->ctx, llama->embd.cbegin(), llama->embd.cend());
604
610
  putInt(env, result, "tokens_loaded", n_token_count_out);
605
611
  putString(env, result, "prompt", text.c_str());
@@ -620,6 +626,13 @@ Java_com_rnllama_LlamaContext_saveSession(
620
626
  const char *path_chars = env->GetStringUTFChars(path, nullptr);
621
627
 
622
628
  std::vector<llama_token> session_tokens = llama->embd;
629
+
630
+ // Find LLAMA_TOKEN_NULL in the tokens and resize the array to the index of the null token
631
+ auto null_token_iter = std::find(session_tokens.begin(), session_tokens.end(), LLAMA_TOKEN_NULL);
632
+ if (null_token_iter != session_tokens.end()) {
633
+ session_tokens.resize(std::distance(session_tokens.begin(), null_token_iter));
634
+ }
635
+
623
636
  int default_size = session_tokens.size();
624
637
  int save_size = size > 0 && size <= default_size ? size : default_size;
625
638
  if (!llama_state_save_file(llama->ctx, path_chars, session_tokens.data(), save_size)) {
@@ -694,6 +707,7 @@ Java_com_rnllama_LlamaContext_doCompletion(
694
707
  jint dry_penalty_last_n,
695
708
  jfloat top_n_sigma,
696
709
  jobjectArray dry_sequence_breakers,
710
+ jobjectArray media_paths,
697
711
  jobject partial_completion_callback
698
712
  ) {
699
713
  UNUSED(thiz);
@@ -703,8 +717,32 @@ Java_com_rnllama_LlamaContext_doCompletion(
703
717
 
704
718
  //llama_reset_timings(llama->ctx);
705
719
 
706
- auto prompt_chars = env->GetStringUTFChars(prompt, nullptr);
720
+ const char *prompt_chars = env->GetStringUTFChars(prompt, nullptr);
721
+
722
+ // Set the prompt parameter
707
723
  llama->params.prompt = prompt_chars;
724
+
725
+ // Process image paths if provided
726
+ std::vector<std::string> media_paths_vector;
727
+
728
+ jint media_paths_size = env->GetArrayLength(media_paths);
729
+ if (media_paths_size > 0) {
730
+ // Check if multimodal is enabled
731
+ if (!llama->isMultimodalEnabled()) {
732
+ auto result = createWriteableMap(env);
733
+ putString(env, result, "error", "Multimodal support not enabled. Call initMultimodal first.");
734
+ env->ReleaseStringUTFChars(prompt, prompt_chars);
735
+ return reinterpret_cast<jobject>(result);
736
+ }
737
+
738
+ for (jint i = 0; i < media_paths_size; i++) {
739
+ jstring image_path = (jstring) env->GetObjectArrayElement(media_paths, i);
740
+ const char *image_path_chars = env->GetStringUTFChars(image_path, nullptr);
741
+ media_paths_vector.push_back(image_path_chars);
742
+ env->ReleaseStringUTFChars(image_path, image_path_chars);
743
+ }
744
+ }
745
+
708
746
  llama->params.sampling.seed = (seed == -1) ? time(NULL) : seed;
709
747
 
710
748
  int max_threads = std::thread::hardware_concurrency();
@@ -861,10 +899,19 @@ Java_com_rnllama_LlamaContext_doCompletion(
861
899
  putString(env, result, "error", "Failed to initialize sampling");
862
900
  return reinterpret_cast<jobject>(result);
863
901
  }
902
+
864
903
  llama->beginCompletion();
865
- llama->loadPrompt();
904
+ try {
905
+ llama->loadPrompt(media_paths_vector);
906
+ } catch (const std::exception &e) {
907
+ llama->endCompletion();
908
+ auto result = createWriteableMap(env);
909
+ putString(env, result, "error", e.what());
910
+ return reinterpret_cast<jobject>(result);
911
+ }
866
912
 
867
913
  if (llama->context_full) {
914
+ llama->endCompletion();
868
915
  auto result = createWriteableMap(env);
869
916
  putString(env, result, "error", "Context is full");
870
917
  return reinterpret_cast<jobject>(result);
@@ -931,9 +978,14 @@ Java_com_rnllama_LlamaContext_doCompletion(
931
978
  }
932
979
 
933
980
  env->ReleaseStringUTFChars(grammar, grammar_chars);
934
- env->ReleaseStringUTFChars(prompt, prompt_chars);
981
+
982
+ // Release prompt_chars if it's still allocated
983
+ if (prompt_chars != nullptr) {
984
+ env->ReleaseStringUTFChars(prompt, prompt_chars);
985
+ }
986
+
935
987
  llama_perf_context_print(llama->ctx);
936
- llama->is_predicting = false;
988
+ llama->endCompletion();
937
989
 
938
990
  auto toolCalls = createWritableArray(env);
939
991
  std::string reasoningContent = "";
@@ -1023,22 +1075,47 @@ Java_com_rnllama_LlamaContext_isPredicting(
1023
1075
 
1024
1076
  JNIEXPORT jobject JNICALL
1025
1077
  Java_com_rnllama_LlamaContext_tokenize(
1026
- JNIEnv *env, jobject thiz, jlong context_ptr, jstring text) {
1078
+ JNIEnv *env, jobject thiz, jlong context_ptr, jstring text, jobjectArray media_paths) {
1027
1079
  UNUSED(thiz);
1028
1080
  auto llama = context_map[(long) context_ptr];
1029
1081
 
1030
1082
  const char *text_chars = env->GetStringUTFChars(text, nullptr);
1083
+ std::vector<std::string> media_paths_vector;
1084
+ for (int i = 0; i < env->GetArrayLength(media_paths); i++) {
1085
+ jstring image_path = (jstring) env->GetObjectArrayElement(media_paths, i);
1086
+ const char *image_path_chars = env->GetStringUTFChars(image_path, nullptr);
1087
+ media_paths_vector.push_back(image_path_chars);
1088
+ env->ReleaseStringUTFChars(image_path, image_path_chars);
1089
+ }
1090
+ auto tokenize_result = llama->tokenize(text_chars, media_paths_vector);
1091
+
1092
+ auto result = createWriteableMap(env);
1093
+
1094
+ auto tokens = createWritableArray(env);
1095
+ for (const auto &tok : tokenize_result.tokens) {
1096
+ pushInt(env, tokens, tok);
1097
+ }
1098
+ putArray(env, result, "tokens", tokens);
1099
+
1100
+ putBoolean(env, result, "has_media", tokenize_result.has_media);
1031
1101
 
1032
- const std::vector<llama_token> toks = common_tokenize(
1033
- llama->ctx,
1034
- text_chars,
1035
- false
1036
- );
1102
+ auto bitmap_hashes = createWritableArray(env);
1103
+ for (const auto &hash : tokenize_result.bitmap_hashes) {
1104
+ pushString(env, bitmap_hashes, hash.c_str());
1105
+ }
1106
+ putArray(env, result, "bitmap_hashes", bitmap_hashes);
1107
+
1108
+ auto chunk_pos = createWritableArray(env);
1109
+ for (const auto &pos : tokenize_result.chunk_pos) {
1110
+ pushInt(env, chunk_pos, pos);
1111
+ }
1112
+ putArray(env, result, "chunk_pos", chunk_pos);
1037
1113
 
1038
- jobject result = createWritableArray(env);
1039
- for (const auto &tok : toks) {
1040
- pushInt(env, result, tok);
1114
+ auto chunk_pos_media = createWritableArray(env);
1115
+ for (const auto &pos : tokenize_result.chunk_pos_media) {
1116
+ pushInt(env, chunk_pos_media, pos);
1041
1117
  }
1118
+ putArray(env, result, "chunk_pos_media", chunk_pos_media);
1042
1119
 
1043
1120
  env->ReleaseStringUTFChars(text, text_chars);
1044
1121
  return result;
@@ -1107,7 +1184,12 @@ Java_com_rnllama_LlamaContext_embedding(
1107
1184
  }
1108
1185
 
1109
1186
  llama->beginCompletion();
1110
- llama->loadPrompt();
1187
+ try {
1188
+ llama->loadPrompt({});
1189
+ } catch (const std::exception &e) {
1190
+ putString(env, result, "error", e.what());
1191
+ return reinterpret_cast<jobject>(result);
1192
+ }
1111
1193
  llama->doCompletion();
1112
1194
 
1113
1195
  std::vector<float> embedding = llama->getEmbedding(embdParams);
@@ -1276,4 +1358,61 @@ Java_com_rnllama_LlamaContext_unsetLog(JNIEnv *env, jobject thiz) {
1276
1358
  llama_log_set(rnllama_log_callback_default, NULL);
1277
1359
  }
1278
1360
 
1361
+ JNIEXPORT jboolean JNICALL
1362
+ Java_com_rnllama_LlamaContext_initMultimodal(
1363
+ JNIEnv *env,
1364
+ jobject thiz,
1365
+ jlong context_ptr,
1366
+ jstring mmproj_path,
1367
+ jboolean mmproj_use_gpu
1368
+ ) {
1369
+ UNUSED(thiz);
1370
+ auto llama = context_map[(long) context_ptr];
1371
+
1372
+ const char *mmproj_path_chars = env->GetStringUTFChars(mmproj_path, nullptr);
1373
+ bool result = llama->initMultimodal(mmproj_path_chars, mmproj_use_gpu);
1374
+ env->ReleaseStringUTFChars(mmproj_path, mmproj_path_chars);
1375
+
1376
+ return result;
1377
+ }
1378
+
1379
+ JNIEXPORT jboolean JNICALL
1380
+ Java_com_rnllama_LlamaContext_isMultimodalEnabled(
1381
+ JNIEnv *env,
1382
+ jobject thiz,
1383
+ jlong context_ptr
1384
+ ) {
1385
+ UNUSED(env);
1386
+ UNUSED(thiz);
1387
+ auto llama = context_map[(long) context_ptr];
1388
+ return llama->isMultimodalEnabled();
1389
+ }
1390
+
1391
+ JNIEXPORT jobject JNICALL
1392
+ Java_com_rnllama_LlamaContext_getMultimodalSupport(
1393
+ JNIEnv *env,
1394
+ jobject thiz,
1395
+ jlong context_ptr
1396
+ ) {
1397
+ UNUSED(env);
1398
+ UNUSED(thiz);
1399
+ auto llama = context_map[(long) context_ptr];
1400
+ auto result = createWriteableMap(env);
1401
+ putBoolean(env, result, "vision", llama->isMultimodalSupportVision());
1402
+ putBoolean(env, result, "audio", llama->isMultimodalSupportAudio());
1403
+ return result;
1404
+ }
1405
+
1406
+ JNIEXPORT void JNICALL
1407
+ Java_com_rnllama_LlamaContext_releaseMultimodal(
1408
+ JNIEnv *env,
1409
+ jobject thiz,
1410
+ jlong context_ptr
1411
+ ) {
1412
+ UNUSED(env);
1413
+ UNUSED(thiz);
1414
+ auto llama = context_map[(long) context_ptr];
1415
+ llama->releaseMultimodal();
1416
+ }
1417
+
1279
1418
  } // extern "C"
@@ -53,6 +53,26 @@ public class RNLlamaModule extends NativeRNLlamaSpec {
53
53
  rnllama.initContext(id, params, promise);
54
54
  }
55
55
 
56
+ @ReactMethod
57
+ public void initMultimodal(double id, final ReadableMap params, final Promise promise) {
58
+ rnllama.initMultimodal(id, params, promise);
59
+ }
60
+
61
+ @ReactMethod
62
+ public void isMultimodalEnabled(double id, final Promise promise) {
63
+ rnllama.isMultimodalEnabled(id, promise);
64
+ }
65
+
66
+ @ReactMethod
67
+ public void getMultimodalSupport(double id, final Promise promise) {
68
+ rnllama.getMultimodalSupport(id, promise);
69
+ }
70
+
71
+ @ReactMethod
72
+ public void releaseMultimodal(double id, final Promise promise) {
73
+ rnllama.releaseMultimodal(id, promise);
74
+ }
75
+
56
76
  @ReactMethod
57
77
  public void getFormattedChat(double id, String messages, String chatTemplate, ReadableMap params, Promise promise) {
58
78
  rnllama.getFormattedChat(id, messages, chatTemplate, params, promise);
@@ -79,13 +99,13 @@ public class RNLlamaModule extends NativeRNLlamaSpec {
79
99
  }
80
100
 
81
101
  @ReactMethod
82
- public void tokenizeAsync(double id, final String text, final Promise promise) {
83
- rnllama.tokenizeAsync(id, text, promise);
102
+ public void tokenizeAsync(double id, final String text, final ReadableArray media_paths, final Promise promise) {
103
+ rnllama.tokenizeAsync(id, text, media_paths, promise);
84
104
  }
85
105
 
86
106
  @ReactMethod(isBlockingSynchronousMethod=true)
87
- public WritableMap tokenizeSync(double id, final String text) {
88
- return rnllama.tokenizeSync(id, text);
107
+ public WritableMap tokenizeSync(double id, final String text, final ReadableArray media_paths) {
108
+ return rnllama.tokenizeSync(id, text, media_paths);
89
109
  }
90
110
 
91
111
  @ReactMethod