cui-llama.rn 1.4.3 → 1.4.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (181) hide show
  1. package/README.md +93 -114
  2. package/android/src/main/CMakeLists.txt +5 -0
  3. package/android/src/main/build-arm64/CMakeCache.txt +429 -0
  4. package/android/src/main/build-arm64/CMakeFiles/3.31.4/CMakeCCompiler.cmake +21 -21
  5. package/android/src/main/build-arm64/CMakeFiles/3.31.4/CMakeCXXCompiler.cmake +101 -0
  6. package/android/src/main/build-arm64/CMakeFiles/3.31.4/CMakeDetermineCompilerABI_C.bin +0 -0
  7. package/android/src/main/build-arm64/CMakeFiles/3.31.4/CMakeDetermineCompilerABI_CXX.bin +0 -0
  8. package/android/src/main/build-arm64/CMakeFiles/CMakeConfigureLog.yaml +376 -0
  9. package/android/src/main/build-arm64/CMakeFiles/CMakeDirectoryInformation.cmake +16 -0
  10. package/android/src/main/build-arm64/CMakeFiles/Makefile.cmake +165 -0
  11. package/android/src/main/build-arm64/CMakeFiles/Makefile2 +297 -0
  12. package/android/src/main/build-arm64/CMakeFiles/Progress/1 +1 -0
  13. package/android/src/main/build-arm64/CMakeFiles/Progress/2 +1 -0
  14. package/android/src/main/build-arm64/CMakeFiles/Progress/3 +1 -0
  15. package/android/src/main/build-arm64/CMakeFiles/Progress/4 +1 -0
  16. package/android/src/main/build-arm64/CMakeFiles/Progress/5 +1 -0
  17. package/android/src/main/build-arm64/CMakeFiles/Progress/6 +1 -0
  18. package/android/src/main/build-arm64/CMakeFiles/Progress/count.txt +1 -0
  19. package/android/src/main/build-arm64/CMakeFiles/TargetDirectories.txt +8 -0
  20. package/android/src/main/build-arm64/CMakeFiles/cmake.check_cache +1 -0
  21. package/android/src/main/build-arm64/CMakeFiles/progress.marks +1 -0
  22. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-alloc.c.o +0 -0
  23. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-alloc.c.o.d +58 -0
  24. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-backend-reg.cpp.o +0 -0
  25. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-backend-reg.cpp.o.d +756 -0
  26. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-backend.cpp.o +0 -0
  27. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-backend.cpp.o.d +709 -0
  28. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-cpu-aarch64.cpp.o +0 -0
  29. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-cpu-aarch64.cpp.o.d +714 -0
  30. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-cpu-quants.c.o +0 -0
  31. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-cpu-quants.c.o.d +62 -0
  32. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-cpu-traits.cpp.o +0 -0
  33. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-cpu-traits.cpp.o.d +708 -0
  34. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-cpu.c.o +0 -0
  35. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-cpu.c.o.d +113 -0
  36. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-cpu.cpp.o +0 -0
  37. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-cpu.cpp.o.d +713 -0
  38. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-opt.cpp.o +0 -0
  39. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-opt.cpp.o.d +763 -0
  40. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-quants.c.o +0 -0
  41. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-quants.c.o.d +61 -0
  42. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-threading.cpp.o +0 -0
  43. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml-threading.cpp.o.d +707 -0
  44. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml.c.o +0 -0
  45. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/ggml.c.o.d +104 -0
  46. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/gguf.cpp.o +0 -0
  47. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/gguf.cpp.o.d +714 -0
  48. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/log.cpp.o +0 -0
  49. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/D_/dev/react-native/cui-llama.rn/cpp/log.cpp.o.d +723 -0
  50. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/DependInfo.cmake +62 -0
  51. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/build.make +722 -0
  52. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/cmake_clean.cmake +89 -0
  53. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/compiler_depend.make +2 -0
  54. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/compiler_depend.ts +2 -0
  55. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/depend.make +2 -0
  56. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/flags.make +17 -0
  57. package/android/src/main/build-arm64/CMakeFiles/rnllama.dir/progress.make +41 -0
  58. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8.dir/DependInfo.cmake +62 -0
  59. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8.dir/build.make +722 -0
  60. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8.dir/cmake_clean.cmake +89 -0
  61. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8.dir/compiler_depend.make +2 -0
  62. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8.dir/compiler_depend.ts +2 -0
  63. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8.dir/depend.make +2 -0
  64. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8.dir/flags.make +17 -0
  65. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8.dir/progress.make +41 -0
  66. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2.dir/DependInfo.cmake +62 -0
  67. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2.dir/build.make +722 -0
  68. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2.dir/cmake_clean.cmake +89 -0
  69. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2.dir/compiler_depend.make +2 -0
  70. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2.dir/compiler_depend.ts +2 -0
  71. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2.dir/depend.make +2 -0
  72. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2.dir/flags.make +17 -0
  73. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2.dir/progress.make +41 -0
  74. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod.dir/DependInfo.cmake +62 -0
  75. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod.dir/build.make +722 -0
  76. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod.dir/cmake_clean.cmake +89 -0
  77. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod.dir/compiler_depend.make +2 -0
  78. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod.dir/compiler_depend.ts +2 -0
  79. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod.dir/depend.make +2 -0
  80. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod.dir/flags.make +17 -0
  81. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod.dir/progress.make +41 -0
  82. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod_i8mm.dir/DependInfo.cmake +62 -0
  83. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod_i8mm.dir/build.make +722 -0
  84. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod_i8mm.dir/cmake_clean.cmake +89 -0
  85. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod_i8mm.dir/compiler_depend.make +2 -0
  86. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod_i8mm.dir/compiler_depend.ts +2 -0
  87. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod_i8mm.dir/depend.make +2 -0
  88. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod_i8mm.dir/flags.make +17 -0
  89. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_dotprod_i8mm.dir/progress.make +41 -0
  90. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_i8mm.dir/DependInfo.cmake +62 -0
  91. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_i8mm.dir/build.make +722 -0
  92. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_i8mm.dir/cmake_clean.cmake +89 -0
  93. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_i8mm.dir/compiler_depend.make +2 -0
  94. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_i8mm.dir/compiler_depend.ts +2 -0
  95. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_i8mm.dir/depend.make +2 -0
  96. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_i8mm.dir/flags.make +17 -0
  97. package/android/src/main/build-arm64/CMakeFiles/rnllama_v8_2_i8mm.dir/progress.make +41 -0
  98. package/android/src/main/build-arm64/Makefile +1862 -0
  99. package/android/src/main/build-arm64/cmake_install.cmake +66 -0
  100. package/android/src/main/java/com/rnllama/LlamaContext.java +91 -17
  101. package/android/src/main/java/com/rnllama/RNLlama.java +37 -4
  102. package/android/src/main/jni-utils.h +6 -0
  103. package/android/src/main/jni.cpp +287 -31
  104. package/android/src/main/jniLibs/arm64-v8a/librnllama.so +0 -0
  105. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8.so +0 -0
  106. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2.so +0 -0
  107. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_dotprod.so +0 -0
  108. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_dotprod_i8mm.so +0 -0
  109. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_i8mm.so +0 -0
  110. package/android/src/main/jniLibs/x86_64/librnllama.so +0 -0
  111. package/android/src/main/jniLibs/x86_64/librnllama_x86_64.so +0 -0
  112. package/android/src/newarch/java/com/rnllama/RNLlamaModule.java +7 -2
  113. package/android/src/oldarch/java/com/rnllama/RNLlamaModule.java +7 -2
  114. package/cpp/chat-template.hpp +529 -0
  115. package/cpp/chat.cpp +1085 -0
  116. package/cpp/chat.hpp +55 -0
  117. package/cpp/common.cpp +159 -36
  118. package/cpp/common.h +64 -19
  119. package/cpp/ggml-alloc.c +1 -13
  120. package/cpp/ggml-common.h +0 -2
  121. package/cpp/ggml-cpu-impl.h +6 -12
  122. package/cpp/ggml-cpu-quants.c +937 -340
  123. package/cpp/ggml-cpu.c +207 -113
  124. package/cpp/ggml-cpu.cpp +4 -6
  125. package/cpp/ggml-cpu.h +1 -1
  126. package/cpp/ggml-metal.h +66 -66
  127. package/cpp/ggml-metal.m +141 -23
  128. package/cpp/ggml.c +24 -14
  129. package/cpp/ggml.h +2 -2
  130. package/cpp/json-schema-to-grammar.cpp +46 -66
  131. package/cpp/json-schema-to-grammar.h +15 -1
  132. package/cpp/llama-arch.cpp +7 -2
  133. package/cpp/llama-arch.h +3 -1
  134. package/cpp/llama-chat.cpp +10 -1
  135. package/cpp/llama-chat.h +1 -0
  136. package/cpp/llama-grammar.cpp +86 -6
  137. package/cpp/llama-grammar.h +22 -1
  138. package/cpp/llama-impl.h +6 -6
  139. package/cpp/llama-kv-cache.h +1 -1
  140. package/cpp/llama-mmap.h +1 -0
  141. package/cpp/llama-model-loader.cpp +1 -1
  142. package/cpp/llama-model.cpp +32 -6
  143. package/cpp/llama-sampling.cpp +178 -61
  144. package/cpp/llama-vocab.cpp +8 -3
  145. package/cpp/llama.cpp +188 -128
  146. package/cpp/llama.h +27 -10
  147. package/cpp/log.cpp +32 -10
  148. package/cpp/log.h +12 -1
  149. package/cpp/minja.hpp +2883 -0
  150. package/cpp/rn-llama.cpp +82 -5
  151. package/cpp/rn-llama.h +16 -1
  152. package/cpp/sampling.cpp +68 -41
  153. package/cpp/sampling.h +3 -0
  154. package/cpp/sgemm.cpp +9 -8
  155. package/cpp/unicode.cpp +9 -2
  156. package/ios/CMakeLists.txt +6 -0
  157. package/ios/RNLlama.h +0 -8
  158. package/ios/RNLlama.mm +27 -3
  159. package/ios/RNLlamaContext.h +10 -1
  160. package/ios/RNLlamaContext.mm +269 -57
  161. package/jest/mock.js +21 -2
  162. package/lib/commonjs/NativeRNLlama.js.map +1 -1
  163. package/lib/commonjs/grammar.js +3 -0
  164. package/lib/commonjs/grammar.js.map +1 -1
  165. package/lib/commonjs/index.js +87 -13
  166. package/lib/commonjs/index.js.map +1 -1
  167. package/lib/module/NativeRNLlama.js.map +1 -1
  168. package/lib/module/grammar.js +3 -0
  169. package/lib/module/grammar.js.map +1 -1
  170. package/lib/module/index.js +86 -13
  171. package/lib/module/index.js.map +1 -1
  172. package/lib/typescript/NativeRNLlama.d.ts +107 -2
  173. package/lib/typescript/NativeRNLlama.d.ts.map +1 -1
  174. package/lib/typescript/grammar.d.ts.map +1 -1
  175. package/lib/typescript/index.d.ts +32 -7
  176. package/lib/typescript/index.d.ts.map +1 -1
  177. package/llama-rn.podspec +1 -1
  178. package/package.json +3 -2
  179. package/src/NativeRNLlama.ts +115 -3
  180. package/src/grammar.ts +3 -0
  181. package/src/index.ts +138 -21
@@ -9,12 +9,12 @@
9
9
  #include <string>
10
10
  #include <thread>
11
11
  #include <unordered_map>
12
+ #include "json-schema-to-grammar.h"
12
13
  #include "llama.h"
13
14
  #include "llama-impl.h"
14
15
  #include "ggml.h"
15
16
  #include "rn-llama.h"
16
17
  #include "jni-utils.h"
17
-
18
18
  #define UNUSED(x) (void)(x)
19
19
  #define TAG "RNLLAMA_ANDROID_JNI"
20
20
 
@@ -25,7 +25,7 @@ static inline int min(int a, int b) {
25
25
  return (a < b) ? a : b;
26
26
  }
27
27
 
28
- static void log_callback(lm_ggml_log_level level, const char * fmt, void * data) {
28
+ static void rnllama_log_callback_default(lm_ggml_log_level level, const char * fmt, void * data) {
29
29
  if (level == LM_GGML_LOG_LEVEL_ERROR) __android_log_print(ANDROID_LOG_ERROR, TAG, fmt, data);
30
30
  else if (level == LM_GGML_LOG_LEVEL_INFO) __android_log_print(ANDROID_LOG_INFO, TAG, fmt, data);
31
31
  else if (level == LM_GGML_LOG_LEVEL_WARN) __android_log_print(ANDROID_LOG_WARN, TAG, fmt, data);
@@ -230,6 +230,8 @@ Java_com_rnllama_LlamaContext_initContext(
230
230
  JNIEnv *env,
231
231
  jobject thiz,
232
232
  jstring model_path_str,
233
+ jstring chat_template,
234
+ jstring reasoning_format,
233
235
  jboolean embedding,
234
236
  jint embd_normalize,
235
237
  jint n_ctx,
@@ -262,7 +264,17 @@ Java_com_rnllama_LlamaContext_initContext(
262
264
 
263
265
  const char *model_path_chars = env->GetStringUTFChars(model_path_str, nullptr);
264
266
  defaultParams.model = model_path_chars;
265
-
267
+
268
+ const char *chat_template_chars = env->GetStringUTFChars(chat_template, nullptr);
269
+ defaultParams.chat_template = chat_template_chars;
270
+
271
+ const char *reasoning_format_chars = env->GetStringUTFChars(reasoning_format, nullptr);
272
+ if (strcmp(reasoning_format_chars, "deepseek") == 0) {
273
+ defaultParams.reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK;
274
+ } else {
275
+ defaultParams.reasoning_format = COMMON_REASONING_FORMAT_NONE;
276
+ }
277
+
266
278
  defaultParams.n_ctx = n_ctx;
267
279
  defaultParams.n_batch = n_batch;
268
280
  defaultParams.n_ubatch = n_ubatch;
@@ -329,6 +341,8 @@ Java_com_rnllama_LlamaContext_initContext(
329
341
  bool is_model_loaded = llama->loadModel(defaultParams);
330
342
 
331
343
  env->ReleaseStringUTFChars(model_path_str, model_path_chars);
344
+ env->ReleaseStringUTFChars(chat_template, chat_template_chars);
345
+ env->ReleaseStringUTFChars(reasoning_format, reasoning_format_chars);
332
346
  env->ReleaseStringUTFChars(cache_type_k, cache_type_k_chars);
333
347
  env->ReleaseStringUTFChars(cache_type_v, cache_type_v_chars);
334
348
 
@@ -418,52 +432,136 @@ Java_com_rnllama_LlamaContext_loadModelDetails(
418
432
 
419
433
  char desc[1024];
420
434
  llama_model_desc(llama->model, desc, sizeof(desc));
435
+
421
436
  putString(env, result, "desc", desc);
422
437
  putDouble(env, result, "size", llama_model_size(llama->model));
423
438
  putDouble(env, result, "nEmbd", llama_model_n_embd(llama->model));
424
439
  putDouble(env, result, "nParams", llama_model_n_params(llama->model));
425
- putBoolean(env, result, "isChatTemplateSupported", llama->validateModelChatTemplate());
440
+ auto chat_templates = createWriteableMap(env);
441
+ putBoolean(env, chat_templates, "llamaChat", llama->validateModelChatTemplate(false, nullptr));
442
+
443
+ auto minja = createWriteableMap(env);
444
+ putBoolean(env, minja, "default", llama->validateModelChatTemplate(true, nullptr));
445
+
446
+ auto default_caps = createWriteableMap(env);
447
+
448
+ auto default_tmpl = llama->templates.template_default.get();
449
+ auto default_tmpl_caps = default_tmpl->original_caps();
450
+ putBoolean(env, default_caps, "tools", default_tmpl_caps.supports_tools);
451
+ putBoolean(env, default_caps, "toolCalls", default_tmpl_caps.supports_tool_calls);
452
+ putBoolean(env, default_caps, "parallelToolCalls", default_tmpl_caps.supports_parallel_tool_calls);
453
+ putBoolean(env, default_caps, "toolResponses", default_tmpl_caps.supports_tool_responses);
454
+ putBoolean(env, default_caps, "systemRole", default_tmpl_caps.supports_system_role);
455
+ putBoolean(env, default_caps, "toolCallId", default_tmpl_caps.supports_tool_call_id);
456
+ putMap(env, minja, "defaultCaps", default_caps);
457
+
458
+ putBoolean(env, minja, "toolUse", llama->validateModelChatTemplate(true, "tool_use"));
459
+ auto tool_use_tmpl = llama->templates.template_tool_use.get();
460
+ if (tool_use_tmpl != nullptr) {
461
+ auto tool_use_caps = createWriteableMap(env);
462
+ auto tool_use_tmpl_caps = tool_use_tmpl->original_caps();
463
+ putBoolean(env, tool_use_caps, "tools", tool_use_tmpl_caps.supports_tools);
464
+ putBoolean(env, tool_use_caps, "toolCalls", tool_use_tmpl_caps.supports_tool_calls);
465
+ putBoolean(env, tool_use_caps, "parallelToolCalls", tool_use_tmpl_caps.supports_parallel_tool_calls);
466
+ putBoolean(env, tool_use_caps, "systemRole", tool_use_tmpl_caps.supports_system_role);
467
+ putBoolean(env, tool_use_caps, "toolResponses", tool_use_tmpl_caps.supports_tool_responses);
468
+ putBoolean(env, tool_use_caps, "toolCallId", tool_use_tmpl_caps.supports_tool_call_id);
469
+ putMap(env, minja, "toolUseCaps", tool_use_caps);
470
+ }
471
+
472
+ putMap(env, chat_templates, "minja", minja);
426
473
  putMap(env, result, "metadata", meta);
474
+ putMap(env, result, "chatTemplates", chat_templates);
475
+
476
+ // deprecated
477
+ putBoolean(env, result, "isChatTemplateSupported", llama->validateModelChatTemplate(false, nullptr));
427
478
 
428
479
  return reinterpret_cast<jobject>(result);
429
480
  }
430
481
 
431
482
  JNIEXPORT jobject JNICALL
432
- Java_com_rnllama_LlamaContext_getFormattedChat(
483
+ Java_com_rnllama_LlamaContext_getFormattedChatWithJinja(
433
484
  JNIEnv *env,
434
485
  jobject thiz,
435
486
  jlong context_ptr,
436
- jobjectArray messages,
437
- jstring chat_template
487
+ jstring messages,
488
+ jstring chat_template,
489
+ jstring json_schema,
490
+ jstring tools,
491
+ jboolean parallel_tool_calls,
492
+ jstring tool_choice
438
493
  ) {
439
494
  UNUSED(thiz);
440
495
  auto llama = context_map[(long) context_ptr];
441
496
 
442
- std::vector<common_chat_msg> chat;
443
-
444
- int messages_len = env->GetArrayLength(messages);
445
- for (int i = 0; i < messages_len; i++) {
446
- jobject msg = env->GetObjectArrayElement(messages, i);
447
- jclass msgClass = env->GetObjectClass(msg);
448
-
449
- jmethodID getRoleMethod = env->GetMethodID(msgClass, "getString", "(Ljava/lang/String;)Ljava/lang/String;");
450
- jstring roleKey = env->NewStringUTF("role");
451
- jstring contentKey = env->NewStringUTF("content");
497
+ const char *messages_chars = env->GetStringUTFChars(messages, nullptr);
498
+ const char *tmpl_chars = env->GetStringUTFChars(chat_template, nullptr);
499
+ const char *json_schema_chars = env->GetStringUTFChars(json_schema, nullptr);
500
+ const char *tools_chars = env->GetStringUTFChars(tools, nullptr);
501
+ const char *tool_choice_chars = env->GetStringUTFChars(tool_choice, nullptr);
452
502
 
453
- jstring role_str = (jstring) env->CallObjectMethod(msg, getRoleMethod, roleKey);
454
- jstring content_str = (jstring) env->CallObjectMethod(msg, getRoleMethod, contentKey);
503
+ auto result = createWriteableMap(env);
504
+ try {
505
+ auto formatted = llama->getFormattedChatWithJinja(
506
+ messages_chars,
507
+ tmpl_chars,
508
+ json_schema_chars,
509
+ tools_chars,
510
+ parallel_tool_calls,
511
+ tool_choice_chars
512
+ );
513
+ putString(env, result, "prompt", formatted.prompt.get<std::string>().c_str());
514
+ putInt(env, result, "chat_format", static_cast<int>(formatted.format));
515
+ putString(env, result, "grammar", formatted.grammar.c_str());
516
+ putBoolean(env, result, "grammar_lazy", formatted.grammar_lazy);
517
+ auto grammar_triggers = createWritableArray(env);
518
+ for (const auto &trigger : formatted.grammar_triggers) {
519
+ auto trigger_map = createWriteableMap(env);
520
+ putString(env, trigger_map, "word", trigger.word.c_str());
521
+ putBoolean(env, trigger_map, "at_start", trigger.at_start);
522
+ pushMap(env, grammar_triggers, trigger_map);
523
+ }
524
+ putArray(env, result, "grammar_triggers", grammar_triggers);
525
+ auto preserved_tokens = createWritableArray(env);
526
+ for (const auto &token : formatted.preserved_tokens) {
527
+ pushString(env, preserved_tokens, token.c_str());
528
+ }
529
+ putArray(env, result, "preserved_tokens", preserved_tokens);
530
+ auto additional_stops = createWritableArray(env);
531
+ for (const auto &stop : formatted.additional_stops) {
532
+ pushString(env, additional_stops, stop.c_str());
533
+ }
534
+ putArray(env, result, "additional_stops", additional_stops);
535
+ } catch (const std::runtime_error &e) {
536
+ LOGI("[RNLlama] Error: %s", e.what());
537
+ putString(env, result, "_error", e.what());
538
+ }
539
+ env->ReleaseStringUTFChars(tools, tools_chars);
540
+ env->ReleaseStringUTFChars(messages, messages_chars);
541
+ env->ReleaseStringUTFChars(chat_template, tmpl_chars);
542
+ env->ReleaseStringUTFChars(json_schema, json_schema_chars);
543
+ env->ReleaseStringUTFChars(tool_choice, tool_choice_chars);
544
+ return reinterpret_cast<jobject>(result);
545
+ }
455
546
 
456
- const char *role = env->GetStringUTFChars(role_str, nullptr);
457
- const char *content = env->GetStringUTFChars(content_str, nullptr);
547
+ JNIEXPORT jobject JNICALL
548
+ Java_com_rnllama_LlamaContext_getFormattedChat(
549
+ JNIEnv *env,
550
+ jobject thiz,
551
+ jlong context_ptr,
552
+ jstring messages,
553
+ jstring chat_template
554
+ ) {
555
+ UNUSED(thiz);
556
+ auto llama = context_map[(long) context_ptr];
458
557
 
459
- chat.push_back({ role, content });
558
+ const char *messages_chars = env->GetStringUTFChars(messages, nullptr);
559
+ const char *tmpl_chars = env->GetStringUTFChars(chat_template, nullptr);
460
560
 
461
- env->ReleaseStringUTFChars(role_str, role);
462
- env->ReleaseStringUTFChars(content_str, content);
463
- }
561
+ std::string formatted_chat = llama->getFormattedChat(messages_chars, tmpl_chars);
464
562
 
465
- const char *tmpl_chars = env->GetStringUTFChars(chat_template, nullptr);
466
- std::string formatted_chat = common_chat_apply_template(llama->model, tmpl_chars, chat, true);
563
+ env->ReleaseStringUTFChars(messages, messages_chars);
564
+ env->ReleaseStringUTFChars(chat_template, tmpl_chars);
467
565
 
468
566
  return env->NewStringUTF(formatted_chat.c_str());
469
567
  }
@@ -552,7 +650,12 @@ Java_com_rnllama_LlamaContext_doCompletion(
552
650
  jobject thiz,
553
651
  jlong context_ptr,
554
652
  jstring prompt,
653
+ jint chat_format,
555
654
  jstring grammar,
655
+ jstring json_schema,
656
+ jboolean grammar_lazy,
657
+ jobject grammar_triggers,
658
+ jobject preserved_tokens,
556
659
  jfloat temperature,
557
660
  jint n_threads,
558
661
  jint n_predict,
@@ -578,6 +681,7 @@ Java_com_rnllama_LlamaContext_doCompletion(
578
681
  jfloat dry_base,
579
682
  jint dry_allowed_length,
580
683
  jint dry_penalty_last_n,
684
+ jfloat top_n_sigma,
581
685
  jobjectArray dry_sequence_breakers,
582
686
  jobject partial_completion_callback
583
687
  ) {
@@ -588,7 +692,8 @@ Java_com_rnllama_LlamaContext_doCompletion(
588
692
 
589
693
  //llama_reset_timings(llama->ctx);
590
694
 
591
- llama->params.prompt = env->GetStringUTFChars(prompt, nullptr);
695
+ auto prompt_chars = env->GetStringUTFChars(prompt, nullptr);
696
+ llama->params.prompt = prompt_chars;
592
697
  llama->params.sampling.seed = (seed == -1) ? time(NULL) : seed;
593
698
 
594
699
  int max_threads = std::thread::hardware_concurrency();
@@ -613,13 +718,59 @@ Java_com_rnllama_LlamaContext_doCompletion(
613
718
  sparams.min_p = min_p;
614
719
  sparams.typ_p = typical_p;
615
720
  sparams.n_probs = n_probs;
616
- sparams.grammar = env->GetStringUTFChars(grammar, nullptr);
617
721
  sparams.xtc_threshold = xtc_threshold;
618
722
  sparams.xtc_probability = xtc_probability;
619
723
  sparams.dry_multiplier = dry_multiplier;
620
724
  sparams.dry_base = dry_base;
621
725
  sparams.dry_allowed_length = dry_allowed_length;
622
726
  sparams.dry_penalty_last_n = dry_penalty_last_n;
727
+ sparams.top_n_sigma = top_n_sigma;
728
+
729
+ // grammar
730
+ auto grammar_chars = env->GetStringUTFChars(grammar, nullptr);
731
+ if (grammar_chars && grammar_chars[0] != '\0') {
732
+ sparams.grammar = grammar_chars;
733
+ }
734
+ sparams.grammar_lazy = grammar_lazy;
735
+ if (grammar_triggers != nullptr) {
736
+ int grammar_triggers_size = readablearray::size(env, grammar_triggers);
737
+ for (int i = 0; i < grammar_triggers_size; i++) {
738
+ common_grammar_trigger trigger;
739
+ auto trigger_map = readablearray::getMap(env, grammar_triggers, i);
740
+ jstring trigger_word = readablemap::getString(env, trigger_map, "word", nullptr);
741
+ jboolean trigger_at_start = readablemap::getBool(env, trigger_map, "at_start", false);
742
+ trigger.word = env->GetStringUTFChars(trigger_word, nullptr);
743
+ trigger.at_start = trigger_at_start;
744
+
745
+ auto ids = common_tokenize(llama->ctx, trigger.word, /* add_special= */ false, /* parse_special= */ true);
746
+ if (ids.size() == 1) {
747
+ sparams.grammar_trigger_tokens.push_back(ids[0]);
748
+ sparams.preserved_tokens.insert(ids[0]);
749
+ continue;
750
+ }
751
+ sparams.grammar_trigger_words.push_back(trigger);
752
+ }
753
+ }
754
+
755
+ auto json_schema_chars = env->GetStringUTFChars(json_schema, nullptr);
756
+ if ((!grammar_chars || grammar_chars[0] == '\0') && json_schema_chars && json_schema_chars[0] != '\0') {
757
+ auto schema = json::parse(json_schema_chars);
758
+ sparams.grammar = json_schema_to_grammar(schema);
759
+ }
760
+ env->ReleaseStringUTFChars(json_schema, json_schema_chars);
761
+
762
+ if (preserved_tokens != nullptr) {
763
+ int preserved_tokens_size = readablearray::size(env, preserved_tokens);
764
+ for (int i = 0; i < preserved_tokens_size; i++) {
765
+ jstring preserved_token = readablearray::getString(env, preserved_tokens, i);
766
+ auto ids = common_tokenize(llama->ctx, env->GetStringUTFChars(preserved_token, nullptr), /* add_special= */ false, /* parse_special= */ true);
767
+ if (ids.size() == 1) {
768
+ sparams.preserved_tokens.insert(ids[0]);
769
+ } else {
770
+ LOGI("[RNLlama] Not preserved because more than 1 token (wrong chat template override?): %s", env->GetStringUTFChars(preserved_token, nullptr));
771
+ }
772
+ }
773
+ }
623
774
 
624
775
  const llama_model * model = llama_get_model(llama->ctx);
625
776
  const llama_vocab * vocab = llama_model_get_vocab(model);
@@ -744,11 +895,51 @@ Java_com_rnllama_LlamaContext_doCompletion(
744
895
  }
745
896
  }
746
897
 
898
+ env->ReleaseStringUTFChars(grammar, grammar_chars);
899
+ env->ReleaseStringUTFChars(prompt, prompt_chars);
747
900
  llama_perf_context_print(llama->ctx);
748
901
  llama->is_predicting = false;
749
902
 
903
+ auto toolCalls = createWritableArray(env);
904
+ std::string reasoningContent = "";
905
+ std::string *content = nullptr;
906
+ auto toolCallsSize = 0;
907
+ if (!llama->is_interrupted) {
908
+ try {
909
+ common_chat_msg message = common_chat_parse(llama->generated_text, static_cast<common_chat_format>(chat_format));
910
+ if (!message.reasoning_content.empty()) {
911
+ reasoningContent = message.reasoning_content;
912
+ }
913
+ content = &message.content;
914
+ for (const auto &tc : message.tool_calls) {
915
+ auto toolCall = createWriteableMap(env);
916
+ putString(env, toolCall, "type", "function");
917
+ auto functionMap = createWriteableMap(env);
918
+ putString(env, functionMap, "name", tc.name.c_str());
919
+ putString(env, functionMap, "arguments", tc.arguments.c_str());
920
+ putMap(env, toolCall, "function", functionMap);
921
+ if (!tc.id.empty()) {
922
+ putString(env, toolCall, "id", tc.id.c_str());
923
+ }
924
+ pushMap(env, toolCalls, toolCall);
925
+ toolCallsSize++;
926
+ }
927
+ } catch (const std::exception &e) {
928
+ // LOGI("Error parsing tool calls: %s", e.what());
929
+ }
930
+ }
931
+
750
932
  auto result = createWriteableMap(env);
751
933
  putString(env, result, "text", llama->generated_text.c_str());
934
+ if (content) {
935
+ putString(env, result, "content", content->c_str());
936
+ }
937
+ if (!reasoningContent.empty()) {
938
+ putString(env, result, "reasoning_content", reasoningContent.c_str());
939
+ }
940
+ if (toolCallsSize > 0) {
941
+ putArray(env, result, "tool_calls", toolCalls);
942
+ }
752
943
  putArray(env, result, "completion_probabilities", tokenProbsToMap(env, llama, llama->generated_token_probs));
753
944
  putInt(env, result, "tokens_predicted", llama->num_tokens_predicted);
754
945
  putInt(env, result, "tokens_evaluated", llama->num_prompt_tokens);
@@ -977,11 +1168,76 @@ Java_com_rnllama_LlamaContext_freeContext(
977
1168
  delete llama;
978
1169
  }
979
1170
 
1171
+ struct log_callback_context {
1172
+ JavaVM *jvm;
1173
+ jobject callback;
1174
+ };
1175
+
1176
+ static void rnllama_log_callback_to_j(lm_ggml_log_level level, const char * text, void * data) {
1177
+ auto level_c = "";
1178
+ if (level == LM_GGML_LOG_LEVEL_ERROR) {
1179
+ __android_log_print(ANDROID_LOG_ERROR, TAG, text, nullptr);
1180
+ level_c = "error";
1181
+ } else if (level == LM_GGML_LOG_LEVEL_INFO) {
1182
+ __android_log_print(ANDROID_LOG_INFO, TAG, text, nullptr);
1183
+ level_c = "info";
1184
+ } else if (level == LM_GGML_LOG_LEVEL_WARN) {
1185
+ __android_log_print(ANDROID_LOG_WARN, TAG, text, nullptr);
1186
+ level_c = "warn";
1187
+ } else {
1188
+ __android_log_print(ANDROID_LOG_DEFAULT, TAG, text, nullptr);
1189
+ }
1190
+
1191
+ log_callback_context *cb_ctx = (log_callback_context *) data;
1192
+
1193
+ JNIEnv *env;
1194
+ bool need_detach = false;
1195
+ int getEnvResult = cb_ctx->jvm->GetEnv((void**)&env, JNI_VERSION_1_6);
1196
+
1197
+ if (getEnvResult == JNI_EDETACHED) {
1198
+ if (cb_ctx->jvm->AttachCurrentThread(&env, nullptr) == JNI_OK) {
1199
+ need_detach = true;
1200
+ } else {
1201
+ return;
1202
+ }
1203
+ } else if (getEnvResult != JNI_OK) {
1204
+ return;
1205
+ }
1206
+
1207
+ jobject callback = cb_ctx->callback;
1208
+ jclass cb_class = env->GetObjectClass(callback);
1209
+ jmethodID emitNativeLog = env->GetMethodID(cb_class, "emitNativeLog", "(Ljava/lang/String;Ljava/lang/String;)V");
1210
+
1211
+ jstring level_str = env->NewStringUTF(level_c);
1212
+ jstring text_str = env->NewStringUTF(text);
1213
+ env->CallVoidMethod(callback, emitNativeLog, level_str, text_str);
1214
+ env->DeleteLocalRef(level_str);
1215
+ env->DeleteLocalRef(text_str);
1216
+
1217
+ if (need_detach) {
1218
+ cb_ctx->jvm->DetachCurrentThread();
1219
+ }
1220
+ }
1221
+
1222
+ JNIEXPORT void JNICALL
1223
+ Java_com_rnllama_LlamaContext_setupLog(JNIEnv *env, jobject thiz, jobject logCallback) {
1224
+ UNUSED(thiz);
1225
+
1226
+ log_callback_context *cb_ctx = new log_callback_context;
1227
+
1228
+ JavaVM *jvm;
1229
+ env->GetJavaVM(&jvm);
1230
+ cb_ctx->jvm = jvm;
1231
+ cb_ctx->callback = env->NewGlobalRef(logCallback);
1232
+
1233
+ llama_log_set(rnllama_log_callback_to_j, cb_ctx);
1234
+ }
1235
+
980
1236
  JNIEXPORT void JNICALL
981
- Java_com_rnllama_LlamaContext_logToAndroid(JNIEnv *env, jobject thiz) {
1237
+ Java_com_rnllama_LlamaContext_unsetLog(JNIEnv *env, jobject thiz) {
982
1238
  UNUSED(env);
983
1239
  UNUSED(thiz);
984
- llama_log_set(log_callback, NULL);
1240
+ llama_log_set(rnllama_log_callback_default, NULL);
985
1241
  }
986
1242
 
987
1243
  } // extern "C"
@@ -33,6 +33,11 @@ public class RNLlamaModule extends NativeRNLlamaSpec {
33
33
  return NAME;
34
34
  }
35
35
 
36
+ @ReactMethod
37
+ public void toggleNativeLog(boolean enabled, Promise promise) {
38
+ rnllama.toggleNativeLog(enabled, promise);
39
+ }
40
+
36
41
  @ReactMethod
37
42
  public void setContextLimit(double limit, Promise promise) {
38
43
  rnllama.setContextLimit(limit, promise);
@@ -49,8 +54,8 @@ public class RNLlamaModule extends NativeRNLlamaSpec {
49
54
  }
50
55
 
51
56
  @ReactMethod
52
- public void getFormattedChat(double id, ReadableArray messages, String chatTemplate, Promise promise) {
53
- rnllama.getFormattedChat(id, messages, chatTemplate, promise);
57
+ public void getFormattedChat(double id, String messages, String chatTemplate, ReadableMap params, Promise promise) {
58
+ rnllama.getFormattedChat(id, messages, chatTemplate, params, promise);
54
59
  }
55
60
 
56
61
  @ReactMethod
@@ -34,6 +34,11 @@ public class RNLlamaModule extends ReactContextBaseJavaModule {
34
34
  return NAME;
35
35
  }
36
36
 
37
+ @ReactMethod
38
+ public void toggleNativeLog(boolean enabled, Promise promise) {
39
+ rnllama.toggleNativeLog(enabled, promise);
40
+ }
41
+
37
42
  @ReactMethod
38
43
  public void setContextLimit(double limit, Promise promise) {
39
44
  rnllama.setContextLimit(limit, promise);
@@ -50,8 +55,8 @@ public class RNLlamaModule extends ReactContextBaseJavaModule {
50
55
  }
51
56
 
52
57
  @ReactMethod
53
- public void getFormattedChat(double id, ReadableArray messages, String chatTemplate, Promise promise) {
54
- rnllama.getFormattedChat(id, messages, chatTemplate, promise);
58
+ public void getFormattedChat(double id, String messages, String chatTemplate, ReadableMap params, Promise promise) {
59
+ rnllama.getFormattedChat(id, messages, chatTemplate, params, promise);
55
60
  }
56
61
 
57
62
  @ReactMethod