@novastera-oss/llamarn 0.2.5 → 0.2.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (225) hide show
  1. package/RNLlamaCpp.podspec +3 -2
  2. package/android/CMakeLists.txt +6 -3
  3. package/android/src/main/cpp/include/llama.h +140 -38
  4. package/android/src/main/jniLibs/arm64-v8a/libggml-base.so +0 -0
  5. package/android/src/main/jniLibs/arm64-v8a/libggml-cpu.so +0 -0
  6. package/android/src/main/jniLibs/arm64-v8a/libggml.so +0 -0
  7. package/android/src/main/jniLibs/arm64-v8a/libllama.so +0 -0
  8. package/android/src/main/jniLibs/x86_64/libggml-base.so +0 -0
  9. package/android/src/main/jniLibs/x86_64/libggml-cpu.so +0 -0
  10. package/android/src/main/jniLibs/x86_64/libggml.so +0 -0
  11. package/android/src/main/jniLibs/x86_64/libllama.so +0 -0
  12. package/cpp/LlamaCppModel.cpp +48 -67
  13. package/cpp/LlamaCppModel.h +8 -3
  14. package/cpp/PureCppImpl.cpp +1 -1
  15. package/cpp/PureCppImpl.h +2 -2
  16. package/cpp/build-info.cpp +2 -2
  17. package/cpp/llama.cpp/CMakeLists.txt +15 -4
  18. package/cpp/llama.cpp/Makefile +2 -2
  19. package/cpp/llama.cpp/README.md +33 -13
  20. package/cpp/llama.cpp/common/CMakeLists.txt +15 -28
  21. package/cpp/llama.cpp/common/arg.cpp +38 -12
  22. package/cpp/llama.cpp/common/build-info.cpp.in +2 -2
  23. package/cpp/llama.cpp/common/chat-parser.cpp +9 -3
  24. package/cpp/llama.cpp/common/chat-parser.h +4 -1
  25. package/cpp/llama.cpp/common/chat.cpp +16 -13
  26. package/cpp/llama.cpp/common/chat.h +1 -1
  27. package/cpp/llama.cpp/common/common.cpp +52 -40
  28. package/cpp/llama.cpp/common/common.h +5 -2
  29. package/cpp/llama.cpp/common/json-partial.cpp +5 -4
  30. package/cpp/llama.cpp/common/json-partial.h +2 -1
  31. package/cpp/llama.cpp/common/json-schema-to-grammar.cpp +2 -1
  32. package/cpp/llama.cpp/common/json-schema-to-grammar.h +4 -4
  33. package/cpp/llama.cpp/common/speculative.cpp +6 -4
  34. package/cpp/llama.cpp/convert_hf_to_gguf.py +128 -84
  35. package/cpp/llama.cpp/ggml/CMakeLists.txt +47 -2
  36. package/cpp/llama.cpp/ggml/cmake/common.cmake +1 -2
  37. package/cpp/llama.cpp/ggml/include/ggml.h +1 -3
  38. package/cpp/llama.cpp/ggml/src/CMakeLists.txt +49 -13
  39. package/cpp/llama.cpp/ggml/src/ggml-backend-reg.cpp +5 -0
  40. package/cpp/llama.cpp/ggml/src/ggml-backend.cpp +10 -5
  41. package/cpp/llama.cpp/ggml/src/ggml-blas/CMakeLists.txt +3 -3
  42. package/cpp/llama.cpp/ggml/src/ggml-cann/common.h +6 -1
  43. package/cpp/llama.cpp/ggml/src/ggml-cann/ggml-cann.cpp +33 -9
  44. package/cpp/llama.cpp/ggml/src/ggml-common.h +4 -0
  45. package/cpp/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +93 -24
  46. package/cpp/llama.cpp/ggml/src/ggml-cpu/amx/amx.cpp +1 -1
  47. package/cpp/llama.cpp/ggml/src/ggml-cpu/amx/mmq.cpp +1 -1
  48. package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/arm/cpu-feats.cpp +94 -0
  49. package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/arm/quants.c +4113 -0
  50. package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/arm/repack.cpp +2174 -0
  51. package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/loongarch/quants.c +2638 -0
  52. package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/powerpc/quants.c +2731 -0
  53. package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/riscv/quants.c +2068 -0
  54. package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/riscv/repack.cpp +396 -0
  55. package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/s390/quants.c +1299 -0
  56. package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/wasm/quants.c +1480 -0
  57. package/cpp/llama.cpp/ggml/src/ggml-cpu/arch/x86/quants.c +4310 -0
  58. package/cpp/llama.cpp/ggml/src/ggml-cpu/{ggml-cpu-aarch64.cpp → arch/x86/repack.cpp} +59 -3206
  59. package/cpp/llama.cpp/ggml/src/ggml-cpu/arch-fallback.h +184 -0
  60. package/cpp/llama.cpp/ggml/src/ggml-cpu/common.h +1 -1
  61. package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-impl.h +7 -4
  62. package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +33 -2
  63. package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +8 -8
  64. package/cpp/llama.cpp/ggml/src/ggml-cpu/{ggml-cpu-hbm.cpp → hbm.cpp} +1 -1
  65. package/cpp/llama.cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +1 -1
  66. package/cpp/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm.cpp +56 -7
  67. package/cpp/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm.h +5 -0
  68. package/cpp/llama.cpp/ggml/src/ggml-cpu/ops.cpp +2 -2
  69. package/cpp/llama.cpp/ggml/src/ggml-cpu/quants.c +1157 -0
  70. package/cpp/llama.cpp/ggml/src/ggml-cpu/{ggml-cpu-quants.h → quants.h} +26 -0
  71. package/cpp/llama.cpp/ggml/src/ggml-cpu/repack.cpp +1555 -0
  72. package/cpp/llama.cpp/ggml/src/ggml-cpu/repack.h +98 -0
  73. package/cpp/llama.cpp/ggml/src/ggml-cpu/simd-mappings.h +2 -4
  74. package/cpp/llama.cpp/ggml/src/ggml-cpu/{ggml-cpu-traits.cpp → traits.cpp} +1 -1
  75. package/cpp/llama.cpp/ggml/src/ggml-cuda/common.cuh +6 -8
  76. package/cpp/llama.cpp/ggml/src/ggml-cuda/fattn-mma-f16.cuh +5 -2
  77. package/cpp/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu +25 -16
  78. package/cpp/llama.cpp/ggml/src/ggml-cuda/ssm-scan.cu +6 -4
  79. package/cpp/llama.cpp/ggml/src/ggml-hip/CMakeLists.txt +4 -0
  80. package/cpp/llama.cpp/ggml/src/ggml-impl.h +2 -0
  81. package/cpp/llama.cpp/ggml/src/ggml-metal/CMakeLists.txt +11 -10
  82. package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal.m +33 -8
  83. package/cpp/llama.cpp/ggml/src/ggml-metal/ggml-metal.metal +135 -100
  84. package/cpp/llama.cpp/ggml/src/ggml-opencl/CMakeLists.txt +7 -0
  85. package/cpp/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +908 -3
  86. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/concat.cl +109 -0
  87. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/mul_mv_id_q4_0_f32_8x_flat.cl +283 -0
  88. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/pad.cl +30 -0
  89. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/repeat.cl +39 -0
  90. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/tanh.cl +63 -0
  91. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/tsembd.cl +48 -0
  92. package/cpp/llama.cpp/ggml/src/ggml-opencl/kernels/upscale.cl +121 -0
  93. package/cpp/llama.cpp/ggml/src/ggml-quants.c +0 -2
  94. package/cpp/llama.cpp/ggml/src/ggml-rpc/ggml-rpc.cpp +18 -15
  95. package/cpp/llama.cpp/ggml/src/ggml-sycl/CMakeLists.txt +3 -3
  96. package/cpp/llama.cpp/ggml/src/ggml-sycl/common.hpp +19 -24
  97. package/cpp/llama.cpp/ggml/src/ggml-sycl/convert.cpp +21 -2
  98. package/cpp/llama.cpp/ggml/src/ggml-sycl/cpy.cpp +121 -4
  99. package/cpp/llama.cpp/ggml/src/ggml-sycl/dequantize.hpp +32 -0
  100. package/cpp/llama.cpp/ggml/src/ggml-sycl/gemm.hpp +3 -0
  101. package/cpp/llama.cpp/ggml/src/ggml-sycl/getrows.cpp +2 -96
  102. package/cpp/llama.cpp/ggml/src/ggml-sycl/ggml-sycl.cpp +164 -46
  103. package/cpp/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +32 -8
  104. package/cpp/llama.cpp/ggml/src/ggml-sycl/quants.hpp +38 -10
  105. package/cpp/llama.cpp/ggml/src/ggml-sycl/rope.cpp +118 -11
  106. package/cpp/llama.cpp/ggml/src/ggml-sycl/vecdotq.hpp +108 -16
  107. package/cpp/llama.cpp/ggml/src/ggml-vulkan/CMakeLists.txt +26 -29
  108. package/cpp/llama.cpp/ggml/src/ggml-vulkan/ggml-vulkan.cpp +432 -248
  109. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/CMakeLists.txt +0 -12
  110. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/conv_transpose_1d.comp +98 -0
  111. package/cpp/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +2 -0
  112. package/cpp/llama.cpp/ggml/src/ggml.c +9 -8
  113. package/cpp/llama.cpp/ggml/src/ggml.cpp +26 -0
  114. package/cpp/llama.cpp/ggml/src/gguf.cpp +19 -2
  115. package/cpp/llama.cpp/gguf-py/gguf/constants.py +57 -0
  116. package/cpp/llama.cpp/gguf-py/gguf/gguf_writer.py +4 -1
  117. package/cpp/llama.cpp/gguf-py/gguf/tensor_mapping.py +14 -3
  118. package/cpp/llama.cpp/include/llama.h +140 -38
  119. package/cpp/llama.cpp/requirements/requirements-compare-llama-bench.txt +1 -0
  120. package/cpp/llama.cpp/src/CMakeLists.txt +4 -1
  121. package/cpp/llama.cpp/src/llama-arch.cpp +95 -3
  122. package/cpp/llama.cpp/src/llama-arch.h +7 -1
  123. package/cpp/llama.cpp/src/llama-batch.cpp +289 -31
  124. package/cpp/llama.cpp/src/llama-batch.h +47 -17
  125. package/cpp/llama.cpp/src/llama-chat.cpp +19 -2
  126. package/cpp/llama.cpp/src/llama-chat.h +1 -0
  127. package/cpp/llama.cpp/src/llama-context.cpp +488 -313
  128. package/cpp/llama.cpp/src/llama-context.h +38 -17
  129. package/cpp/llama.cpp/src/llama-cparams.cpp +1 -1
  130. package/cpp/llama.cpp/src/llama-cparams.h +1 -1
  131. package/cpp/llama.cpp/src/llama-graph.cpp +275 -152
  132. package/cpp/llama.cpp/src/llama-graph.h +109 -52
  133. package/cpp/llama.cpp/src/llama-hparams.cpp +6 -2
  134. package/cpp/llama.cpp/src/llama-hparams.h +8 -2
  135. package/cpp/llama.cpp/src/llama-kv-cache-unified-iswa.cpp +281 -0
  136. package/cpp/llama.cpp/src/llama-kv-cache-unified-iswa.h +133 -0
  137. package/cpp/llama.cpp/src/llama-kv-cache-unified.cpp +1835 -0
  138. package/cpp/llama.cpp/src/llama-kv-cache-unified.h +308 -0
  139. package/cpp/llama.cpp/src/llama-kv-cells.h +53 -17
  140. package/cpp/llama.cpp/src/llama-memory-hybrid.cpp +247 -0
  141. package/cpp/llama.cpp/src/llama-memory-hybrid.h +143 -0
  142. package/cpp/llama.cpp/src/llama-memory-recurrent.cpp +1116 -0
  143. package/cpp/llama.cpp/src/llama-memory-recurrent.h +188 -0
  144. package/cpp/llama.cpp/src/llama-memory.cpp +41 -0
  145. package/cpp/llama.cpp/src/llama-memory.h +89 -4
  146. package/cpp/llama.cpp/src/llama-mmap.cpp +1 -1
  147. package/cpp/llama.cpp/src/llama-model-loader.cpp +42 -17
  148. package/cpp/llama.cpp/src/llama-model.cpp +735 -143
  149. package/cpp/llama.cpp/src/llama-model.h +4 -0
  150. package/cpp/llama.cpp/src/llama-quant.cpp +2 -1
  151. package/cpp/llama.cpp/src/llama-vocab.cpp +39 -25
  152. package/cpp/llama.cpp/src/llama.cpp +11 -7
  153. package/cpp/llama.cpp/src/unicode.cpp +5 -0
  154. package/cpp/llama.cpp/vendor/cpp-httplib/httplib.h +10518 -0
  155. package/cpp/llama.cpp/vendor/miniaudio/miniaudio.h +93468 -0
  156. package/cpp/llama.cpp/{common → vendor}/minja/chat-template.hpp +1 -1
  157. package/cpp/llama.cpp/{common → vendor}/minja/minja.hpp +1 -1
  158. package/cpp/llama.cpp/{common → vendor/nlohmann}/json.hpp +3027 -2267
  159. package/cpp/llama.cpp/vendor/nlohmann/json_fwd.hpp +187 -0
  160. package/cpp/llama.cpp/vendor/stb/stb_image.h +7988 -0
  161. package/cpp/rn-completion.cpp +65 -10
  162. package/cpp/{rn-llama.hpp → rn-llama.h} +1 -1
  163. package/cpp/{rn-utils.hpp → rn-utils.h} +8 -1
  164. package/ios/include/chat.h +1 -1
  165. package/ios/include/common/minja/chat-template.hpp +1 -1
  166. package/ios/include/common/minja/minja.hpp +1 -1
  167. package/ios/include/common.h +5 -2
  168. package/ios/include/json-schema-to-grammar.h +4 -4
  169. package/ios/include/llama.h +140 -38
  170. package/ios/include/{common → nlohmann}/json.hpp +3027 -2267
  171. package/ios/libs/llama.xcframework/Info.plist +20 -20
  172. package/ios/libs/llama.xcframework/ios-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  173. package/ios/libs/llama.xcframework/ios-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4863 -4617
  174. package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/ggml.h +1 -3
  175. package/ios/libs/llama.xcframework/ios-arm64/llama.framework/Headers/llama.h +140 -38
  176. package/ios/libs/llama.xcframework/ios-arm64/llama.framework/llama +0 -0
  177. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  178. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4834 -4638
  179. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3742 -3557
  180. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/ggml.h +1 -3
  181. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/Headers/llama.h +140 -38
  182. package/ios/libs/llama.xcframework/ios-arm64_x86_64-simulator/llama.framework/llama +0 -0
  183. package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  184. package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4834 -4638
  185. package/ios/libs/llama.xcframework/macos-arm64_x86_64/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3744 -3559
  186. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/ggml.h +1 -3
  187. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Headers/llama.h +140 -38
  188. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/ggml.h +1 -3
  189. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/Headers/llama.h +140 -38
  190. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/A/llama +0 -0
  191. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/ggml.h +1 -3
  192. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/Headers/llama.h +140 -38
  193. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/Versions/Current/llama +0 -0
  194. package/ios/libs/llama.xcframework/macos-arm64_x86_64/llama.framework/llama +0 -0
  195. package/ios/libs/llama.xcframework/tvos-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  196. package/ios/libs/llama.xcframework/tvos-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4863 -4616
  197. package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/ggml.h +1 -3
  198. package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/Headers/llama.h +140 -38
  199. package/ios/libs/llama.xcframework/tvos-arm64/llama.framework/llama +0 -0
  200. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  201. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4834 -4637
  202. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3742 -3556
  203. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/ggml.h +1 -3
  204. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/Headers/llama.h +140 -38
  205. package/ios/libs/llama.xcframework/tvos-arm64_x86_64-simulator/llama.framework/llama +0 -0
  206. package/ios/libs/llama.xcframework/xros-arm64/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  207. package/ios/libs/llama.xcframework/xros-arm64/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4900 -4653
  208. package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/ggml.h +1 -3
  209. package/ios/libs/llama.xcframework/xros-arm64/llama.framework/Headers/llama.h +140 -38
  210. package/ios/libs/llama.xcframework/xros-arm64/llama.framework/llama +0 -0
  211. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/DWARF/llama +0 -0
  212. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/aarch64/llama.yml +4871 -4674
  213. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/dSYMs/llama.dSYM/Contents/Resources/Relocations/x86_64/llama.yml +3773 -3587
  214. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/ggml.h +1 -3
  215. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/Headers/llama.h +140 -38
  216. package/ios/libs/llama.xcframework/xros-arm64_x86_64-simulator/llama.framework/llama +0 -0
  217. package/package.json +1 -2
  218. package/cpp/llama.cpp/common/cmake/build-info-gen-cpp.cmake +0 -24
  219. package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.h +0 -8
  220. package/cpp/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.c +0 -13891
  221. package/cpp/llama.cpp/src/llama-kv-cache.cpp +0 -2747
  222. package/cpp/llama.cpp/src/llama-kv-cache.h +0 -502
  223. /package/cpp/llama.cpp/ggml/src/ggml-cpu/{cpu-feats-x86.cpp → arch/x86/cpu-feats.cpp} +0 -0
  224. /package/cpp/llama.cpp/ggml/src/ggml-cpu/{ggml-cpu-hbm.h → hbm.h} +0 -0
  225. /package/cpp/llama.cpp/ggml/src/ggml-cpu/{ggml-cpu-traits.h → traits.h} +0 -0
@@ -39,7 +39,8 @@ Pod::Spec.new do |s|
39
39
  "cpp/llama.cpp/common/speculative.{h,cpp}",
40
40
  "cpp/llama.cpp/common/llguidance.{h,cpp}",
41
41
  "cpp/llama.cpp/common/*.hpp",
42
- "cpp/llama.cpp/common/minja/*.hpp"
42
+ "cpp/llama.cpp/vendor/minja/*.hpp"
43
+ "cpp/llama.cpp/vendor/nlohmann/*.hpp"
43
44
 
44
45
  # Include all necessary headers for compilation
45
46
  s.preserve_paths = "ios/include/**/*.h",
@@ -51,7 +52,7 @@ Pod::Spec.new do |s|
51
52
 
52
53
  # Compiler settings
53
54
  s.pod_target_xcconfig = {
54
- "HEADER_SEARCH_PATHS" => "\"$(PODS_TARGET_SRCROOT)/ios/include\" \"$(PODS_TARGET_SRCROOT)/cpp\" \"$(PODS_TARGET_SRCROOT)/ios/generated/RNLlamaCppSpec\" \"$(PODS_TARGET_SRCROOT)/ios/generated\" \"$(PODS_TARGET_SRCROOT)/cpp/llama.cpp\" \"$(PODS_TARGET_SRCROOT)/cpp/llama.cpp/include\" \"$(PODS_TARGET_SRCROOT)/cpp/llama.cpp/ggml/include\" \"$(PODS_TARGET_SRCROOT)/cpp/llama.cpp/common\" \"$(PODS_ROOT)/boost\" \"$(PODS_ROOT)/Headers/Public/React-bridging\" \"$(PODS_ROOT)/Headers/Public/React\"",
55
+ "HEADER_SEARCH_PATHS" => "\"$(PODS_TARGET_SRCROOT)/ios/include\" \"$(PODS_TARGET_SRCROOT)/cpp\" \"$(PODS_TARGET_SRCROOT)/ios/generated/RNLlamaCppSpec\" \"$(PODS_TARGET_SRCROOT)/ios/generated\" \"$(PODS_TARGET_SRCROOT)/cpp/llama.cpp\" \"$(PODS_TARGET_SRCROOT)/cpp/llama.cpp/include\" \"$(PODS_TARGET_SRCROOT)/cpp/llama.cpp/ggml/include\" \"$(PODS_TARGET_SRCROOT)/cpp/llama.cpp/common\" \"$(PODS_TARGET_SRCROOT)/cpp/llama.cpp/vendor\" \"$(PODS_ROOT)/boost\" \"$(PODS_ROOT)/Headers/Public/React-bridging\" \"$(PODS_ROOT)/Headers/Public/React\"",
55
56
  "OTHER_CPLUSPLUSFLAGS" => "-DFOLLY_NO_CONFIG -DFOLLY_MOBILE=1 -DFOLLY_USE_LIBCPP=1 -DLLAMA_METAL -DRCT_NEW_ARCH_ENABLED=1 -DFBJSRT_EXPORTED=1",
56
57
  "CLANG_CXX_LANGUAGE_STANDARD" => "c++17",
57
58
  "GCC_OPTIMIZATION_LEVEL" => "3", # Maximum optimization
@@ -141,7 +141,8 @@ target_include_directories(common PRIVATE
141
141
  ${LLAMA_CPP_DIR}/ggml/include
142
142
  ${LLAMA_CPP_DIR}/include
143
143
  ${LLAMA_CPP_DIR}/common
144
- ${LLAMA_CPP_DIR}/common/minja # Add this for chat-template.hpp
144
+ ${LLAMA_CPP_DIR}/vendor/minja
145
+ ${LLAMA_CPP_DIR}/vendor
145
146
  ${LLAMA_CPP_DIR}/src
146
147
  )
147
148
 
@@ -150,7 +151,8 @@ target_include_directories(RNLlamaCpp PRIVATE
150
151
  ${LLAMA_CPP_DIR}/ggml/include
151
152
  ${LLAMA_CPP_DIR}/include
152
153
  ${LLAMA_CPP_DIR}/common
153
- ${LLAMA_CPP_DIR}/common/minja # Add this for chat-template.hpp
154
+ ${LLAMA_CPP_DIR}/vendor/minja # Add this for chat-template.hpp
155
+ ${LLAMA_CPP_DIR}/vendor
154
156
  ${LLAMA_CPP_DIR}/src
155
157
  # Add the generated headers path
156
158
  ${MODULE_ROOT}/android/generated/jni
@@ -244,6 +246,7 @@ target_include_directories(RNLlamaCpp INTERFACE
244
246
  ${LLAMA_CPP_DIR}/ggml/include
245
247
  ${LLAMA_CPP_DIR}/include
246
248
  ${LLAMA_CPP_DIR}/common
247
- ${LLAMA_CPP_DIR}/common/minja
249
+ ${LLAMA_CPP_DIR}/vendor/minja
250
+ ${LLAMA_CPP_DIR}/vendor
248
251
  ${LLAMA_CPP_DIR}/src
249
252
  )
@@ -61,7 +61,10 @@ extern "C" {
61
61
  struct llama_model;
62
62
  struct llama_context;
63
63
  struct llama_sampler;
64
- struct llama_kv_cache;
64
+
65
+ typedef struct llama_memory_i * llama_memory_t;
66
+
67
+ struct llama_kv_cache; // DEPRECATED (use llama_memory instead)
65
68
 
66
69
  typedef int32_t llama_pos;
67
70
  typedef int32_t llama_token;
@@ -240,18 +243,21 @@ extern "C" {
240
243
 
241
244
  typedef bool (*llama_progress_callback)(float progress, void * user_data);
242
245
 
243
- // Input data for llama_decode
246
+ // Input data for llama_encode/llama_decode
244
247
  // A llama_batch object can contain input about one or many sequences
245
248
  // The provided arrays (i.e. token, embd, pos, etc.) must have size of n_tokens
246
249
  //
247
250
  // - token : the token ids of the input (used when embd is NULL)
248
251
  // - embd : token embeddings (i.e. float vector of size n_embd) (used when token is NULL)
249
252
  // - pos : the positions of the respective token in the sequence
250
- // (if set to NULL, the token position will be tracked automatically by llama_decode)
253
+ // (if set to NULL, the token position will be tracked automatically by llama_encode/llama_decode)
251
254
  // - seq_id : the sequence to which the respective token belongs
252
255
  // (if set to NULL, the sequence ID will be assumed to be 0)
253
256
  // - logits : if zero, the logits (and/or the embeddings) for the respective token will not be output
254
- // (if set to NULL, only the logits for last token will be returned)
257
+ // (if set to NULL:
258
+ // - if embeddings: all tokens are output
259
+ // - if not: only the last token is output
260
+ // )
255
261
  //
256
262
  typedef struct llama_batch {
257
263
  int32_t n_tokens;
@@ -261,7 +267,7 @@ extern "C" {
261
267
  llama_pos * pos;
262
268
  int32_t * n_seq_id;
263
269
  llama_seq_id ** seq_id;
264
- int8_t * logits; // TODO: rename this to "output"
270
+ int8_t * logits; // TODO: rename this to "output"
265
271
  } llama_batch;
266
272
 
267
273
  enum llama_model_kv_override_type {
@@ -366,6 +372,8 @@ extern "C" {
366
372
  bool no_perf; // measure performance timings
367
373
  bool op_offload; // offload host tensor operations to device
368
374
  bool swa_full; // use full-size SWA cache (https://github.com/ggml-org/llama.cpp/pull/13194#issuecomment-2868343055)
375
+ // NOTE: setting to false when n_seq_max > 1 can cause bad performance in some cases
376
+ // ref: https://github.com/ggml-org/llama.cpp/pull/13845#issuecomment-2924800573
369
377
  };
370
378
 
371
379
  // model quantization parameters
@@ -491,9 +499,11 @@ extern "C" {
491
499
  DEPRECATED(LLAMA_API int32_t llama_n_vocab (const struct llama_vocab * vocab), "use llama_vocab_n_tokens instead");
492
500
 
493
501
  LLAMA_API const struct llama_model * llama_get_model (const struct llama_context * ctx);
494
- LLAMA_API struct llama_kv_cache * llama_get_kv_self ( struct llama_context * ctx);
502
+ LLAMA_API llama_memory_t llama_get_memory (const struct llama_context * ctx);
495
503
  LLAMA_API enum llama_pooling_type llama_pooling_type(const struct llama_context * ctx); // TODO: rename to llama_get_pooling_type
496
504
 
505
+ DEPRECATED(LLAMA_API struct llama_kv_cache * llama_get_kv_self(struct llama_context * ctx), "use llama_get_memory instead");
506
+
497
507
  LLAMA_API const struct llama_vocab * llama_model_get_vocab(const struct llama_model * model);
498
508
  LLAMA_API enum llama_rope_type llama_model_rope_type(const struct llama_model * model);
499
509
 
@@ -502,10 +512,18 @@ extern "C" {
502
512
  LLAMA_API int32_t llama_model_n_layer (const struct llama_model * model);
503
513
  LLAMA_API int32_t llama_model_n_head (const struct llama_model * model);
504
514
  LLAMA_API int32_t llama_model_n_head_kv (const struct llama_model * model);
515
+ LLAMA_API int32_t llama_model_n_swa (const struct llama_model * model);
505
516
 
506
517
  // Get the model's RoPE frequency scaling factor
507
518
  LLAMA_API float llama_model_rope_freq_scale_train(const struct llama_model * model);
508
519
 
520
+ // Returns the number of classifier outputs (only valid for classifier models)
521
+ // Undefined behavior for non-classifier models
522
+ LLAMA_API uint32_t llama_model_n_cls_out(const struct llama_model * model);
523
+
524
+ // Returns label of classifier output by index (<n_cls_out). Returns nullptr if no label provided
525
+ LLAMA_API const char * llama_model_cls_label(const struct llama_model * model, uint32_t i);
526
+
509
527
  LLAMA_API enum llama_vocab_type llama_vocab_type(const struct llama_vocab * vocab);
510
528
 
511
529
  LLAMA_API int32_t llama_vocab_n_tokens(const struct llama_vocab * vocab);
@@ -606,7 +624,81 @@ extern "C" {
606
624
  int32_t il_end);
607
625
 
608
626
  //
609
- // KV cache
627
+ // Memory
628
+ //
629
+
630
+ // Clear the memory contents
631
+ // If data == true, the data buffers will also be cleared together with the metadata
632
+ LLAMA_API void llama_memory_clear(
633
+ llama_memory_t mem,
634
+ bool data);
635
+
636
+ // Removes all tokens that belong to the specified sequence and have positions in [p0, p1)
637
+ // Returns false if a partial sequence cannot be removed. Removing a whole sequence never fails
638
+ // seq_id < 0 : match any sequence
639
+ // p0 < 0 : [0, p1]
640
+ // p1 < 0 : [p0, inf)
641
+ LLAMA_API bool llama_memory_seq_rm(
642
+ llama_memory_t mem,
643
+ llama_seq_id seq_id,
644
+ llama_pos p0,
645
+ llama_pos p1);
646
+
647
+ // Copy all tokens that belong to the specified sequence to another sequence
648
+ // p0 < 0 : [0, p1]
649
+ // p1 < 0 : [p0, inf)
650
+ LLAMA_API void llama_memory_seq_cp(
651
+ llama_memory_t mem,
652
+ llama_seq_id seq_id_src,
653
+ llama_seq_id seq_id_dst,
654
+ llama_pos p0,
655
+ llama_pos p1);
656
+
657
+ // Removes all tokens that do not belong to the specified sequence
658
+ LLAMA_API void llama_memory_seq_keep(
659
+ llama_memory_t mem,
660
+ llama_seq_id seq_id);
661
+
662
+ // Adds relative position "delta" to all tokens that belong to the specified sequence and have positions in [p0, p1)
663
+ // p0 < 0 : [0, p1]
664
+ // p1 < 0 : [p0, inf)
665
+ LLAMA_API void llama_memory_seq_add(
666
+ llama_memory_t mem,
667
+ llama_seq_id seq_id,
668
+ llama_pos p0,
669
+ llama_pos p1,
670
+ llama_pos delta);
671
+
672
+ // Integer division of the positions by factor of `d > 1`
673
+ // p0 < 0 : [0, p1]
674
+ // p1 < 0 : [p0, inf)
675
+ LLAMA_API void llama_memory_seq_div(
676
+ llama_memory_t mem,
677
+ llama_seq_id seq_id,
678
+ llama_pos p0,
679
+ llama_pos p1,
680
+ int d);
681
+
682
+ // Returns the smallest position present in the memory for the specified sequence
683
+ // This is typically non-zero only for SWA caches
684
+ // Note that all positions in the range [pos_min, pos_max] are guaranteed to be present in the memory
685
+ // Return -1 if the sequence is empty
686
+ LLAMA_API llama_pos llama_memory_seq_pos_min(
687
+ llama_memory_t mem,
688
+ llama_seq_id seq_id);
689
+
690
+ // Returns the largest position present in the memory for the specified sequence
691
+ // Note that all positions in the range [pos_min, pos_max] are guaranteed to be present in the memory
692
+ // Return -1 if the sequence is empty
693
+ LLAMA_API llama_pos llama_memory_seq_pos_max(
694
+ llama_memory_t mem,
695
+ llama_seq_id seq_id);
696
+
697
+ // Check if the memory supports shifting
698
+ LLAMA_API bool llama_memory_can_shift(llama_memory_t mem);
699
+
700
+ //
701
+ // KV cache for self-attention (TODO: deprecate in favor of llama_memory)
610
702
  //
611
703
 
612
704
  // Returns the number of tokens in the KV cache (slow, use only for debug)
@@ -619,93 +711,103 @@ extern "C" {
619
711
  "Use llama_kv_self_seq_pos_max() and llama_kv_self_seq_pos_min() instead (https://github.com/ggml-org/llama.cpp/issues/13793)");
620
712
 
621
713
  // Clear the KV cache - both cell info is erased and KV data is zeroed
622
- LLAMA_API void llama_kv_self_clear(
623
- struct llama_context * ctx);
714
+ DEPRECATED(LLAMA_API void llama_kv_self_clear(
715
+ struct llama_context * ctx),
716
+ "Use llama_memory_clear() instead");
624
717
 
625
718
  // Removes all tokens that belong to the specified sequence and have positions in [p0, p1)
626
719
  // Returns false if a partial sequence cannot be removed. Removing a whole sequence never fails
627
720
  // seq_id < 0 : match any sequence
628
721
  // p0 < 0 : [0, p1]
629
722
  // p1 < 0 : [p0, inf)
630
- LLAMA_API bool llama_kv_self_seq_rm(
723
+ DEPRECATED(LLAMA_API bool llama_kv_self_seq_rm(
631
724
  struct llama_context * ctx,
632
725
  llama_seq_id seq_id,
633
726
  llama_pos p0,
634
- llama_pos p1);
727
+ llama_pos p1),
728
+ "Use llama_memory_seq_rm() instead");
635
729
 
636
730
  // Copy all tokens that belong to the specified sequence to another sequence
637
731
  // Note that this does not allocate extra KV cache memory - it simply assigns the tokens to the new sequence
638
732
  // p0 < 0 : [0, p1]
639
733
  // p1 < 0 : [p0, inf)
640
- LLAMA_API void llama_kv_self_seq_cp(
734
+ DEPRECATED(LLAMA_API void llama_kv_self_seq_cp(
641
735
  struct llama_context * ctx,
642
736
  llama_seq_id seq_id_src,
643
737
  llama_seq_id seq_id_dst,
644
738
  llama_pos p0,
645
- llama_pos p1);
739
+ llama_pos p1),
740
+ "Use llama_memory_seq_cp() instead");
646
741
 
647
742
  // Removes all tokens that do not belong to the specified sequence
648
- LLAMA_API void llama_kv_self_seq_keep(
743
+ DEPRECATED(LLAMA_API void llama_kv_self_seq_keep(
649
744
  struct llama_context * ctx,
650
- llama_seq_id seq_id);
745
+ llama_seq_id seq_id),
746
+ "Use llama_memory_seq_keep() instead");
651
747
 
652
748
  // Adds relative position "delta" to all tokens that belong to the specified sequence and have positions in [p0, p1)
653
749
  // If the KV cache is RoPEd, the KV data is updated accordingly:
654
750
  // - lazily on next llama_decode()
655
- // - explicitly with llama_kv_self_update()
656
751
  // p0 < 0 : [0, p1]
657
752
  // p1 < 0 : [p0, inf)
658
- LLAMA_API void llama_kv_self_seq_add(
753
+ DEPRECATED(LLAMA_API void llama_kv_self_seq_add(
659
754
  struct llama_context * ctx,
660
755
  llama_seq_id seq_id,
661
756
  llama_pos p0,
662
757
  llama_pos p1,
663
- llama_pos delta);
758
+ llama_pos delta),
759
+ "Use llama_memory_seq_add() instead");
664
760
 
665
761
  // Integer division of the positions by factor of `d > 1`
666
762
  // If the KV cache is RoPEd, the KV data is updated accordingly:
667
763
  // - lazily on next llama_decode()
668
- // - explicitly with llama_kv_self_update()
669
764
  // p0 < 0 : [0, p1]
670
765
  // p1 < 0 : [p0, inf)
671
- LLAMA_API void llama_kv_self_seq_div(
766
+ DEPRECATED(void llama_kv_self_seq_div(
672
767
  struct llama_context * ctx,
673
768
  llama_seq_id seq_id,
674
769
  llama_pos p0,
675
770
  llama_pos p1,
676
- int d);
771
+ int d),
772
+ "Use llama_memory_seq_div() instead");
677
773
 
678
774
  // Returns the smallest position present in the KV cache for the specified sequence
679
775
  // This is typically non-zero only for SWA caches
776
+ // Note that all positions in the range [pos_min, pos_max] are guaranteed to be present in the KV cache
680
777
  // Return -1 if the sequence is empty
681
- LLAMA_API llama_pos llama_kv_self_seq_pos_min(
778
+ DEPRECATED(LLAMA_API llama_pos llama_kv_self_seq_pos_min(
682
779
  struct llama_context * ctx,
683
- llama_seq_id seq_id);
780
+ llama_seq_id seq_id),
781
+ "Use llama_memory_seq_pos_min() instead");
684
782
 
685
783
  // Returns the largest position present in the KV cache for the specified sequence
784
+ // Note that all positions in the range [pos_min, pos_max] are guaranteed to be present in the KV cache
686
785
  // Return -1 if the sequence is empty
687
- LLAMA_API llama_pos llama_kv_self_seq_pos_max(
786
+ DEPRECATED(LLAMA_API llama_pos llama_kv_self_seq_pos_max(
688
787
  struct llama_context * ctx,
689
- llama_seq_id seq_id);
788
+ llama_seq_id seq_id),
789
+ "Use llama_memory_seq_pos_max() instead");
690
790
 
691
791
  // Defragment the KV cache
692
792
  // This will be applied:
693
793
  // - lazily on next llama_decode()
694
- // - explicitly with llama_kv_self_update()
695
- LLAMA_API void llama_kv_self_defrag(struct llama_context * ctx);
794
+ DEPRECATED(LLAMA_API void llama_kv_self_defrag(struct llama_context * ctx),
795
+ "simply remove this call, the context will automatically decide when to do a defragmentation based on 'defrag_thold'");
696
796
 
697
797
  // Check if the context supports KV cache shifting
698
- LLAMA_API bool llama_kv_self_can_shift(const struct llama_context * ctx);
798
+ DEPRECATED(LLAMA_API bool llama_kv_self_can_shift(const struct llama_context * ctx),
799
+ "use llama_memory_can_shift() instead");
699
800
 
700
801
  // Apply the KV cache updates (such as K-shifts, defragmentation, etc.)
701
- LLAMA_API void llama_kv_self_update(struct llama_context * ctx);
802
+ DEPRECATED(LLAMA_API void llama_kv_self_update(struct llama_context * ctx),
803
+ "simply remove this call, updates are applied lazily on the next llama_decode()");
702
804
 
703
805
  //
704
806
  // State / sessions
705
807
  //
706
808
 
707
809
  // Returns the *actual* size in bytes of the state
708
- // (logits, embedding and kv_cache)
810
+ // (logits, embedding and memory)
709
811
  // Only use when saving the state, not when restoring it, otherwise the size may be too small.
710
812
  LLAMA_API size_t llama_state_get_size(struct llama_context * ctx);
711
813
  LLAMA_API DEPRECATED(size_t llama_get_state_size(struct llama_context * ctx),
@@ -761,12 +863,12 @@ extern "C" {
761
863
  size_t n_token_count),
762
864
  "use llama_state_save_file instead");
763
865
 
764
- // Get the exact size needed to copy the KV cache of a single sequence
866
+ // Get the exact size needed to copy the state of a single sequence
765
867
  LLAMA_API size_t llama_state_seq_get_size(
766
868
  struct llama_context * ctx,
767
869
  llama_seq_id seq_id);
768
870
 
769
- // Copy the KV cache of a single sequence into the specified buffer
871
+ // Copy the state of a single sequence into the specified buffer
770
872
  LLAMA_API size_t llama_state_seq_get_data(
771
873
  struct llama_context * ctx,
772
874
  uint8_t * dst,
@@ -832,16 +934,16 @@ extern "C" {
832
934
  // For encode-decoder contexts, processes the batch using the encoder.
833
935
  // Can store the encoder output internally for later use by the decoder's cross-attention layers.
834
936
  // 0 - success
835
- // < 0 - error. the KV cache state is restored to the state before this call
937
+ // < 0 - error. the memory state is restored to the state before this call
836
938
  LLAMA_API int32_t llama_encode(
837
939
  struct llama_context * ctx,
838
940
  struct llama_batch batch);
839
941
 
840
942
  // Process a batch of tokens.
841
- // Requires KV cache.
943
+ // Requires the context to have a memory.
842
944
  // For encode-decoder contexts, processes the batch using the decoder.
843
945
  // Positive return values does not mean a fatal error, but rather a warning.
844
- // Upon non-zero return values, the KV cache state is restored to the state before this call
946
+ // Upon non-zero return values, the memory state is restored to the state before this call
845
947
  // 0 - success
846
948
  // 1 - could not find a KV slot for the batch (try reducing the size of the batch or increase the context)
847
949
  // 2 - aborted
@@ -862,8 +964,8 @@ extern "C" {
862
964
  // Get the number of threads used for prompt and batch processing (multiple token).
863
965
  LLAMA_API int32_t llama_n_threads_batch(struct llama_context * ctx);
864
966
 
865
- // Set whether the model is in embeddings mode or not
866
- // If true, embeddings will be returned but logits will not
967
+ // Set whether the context outputs embeddings or not
968
+ // TODO: rename to avoid confusion with llama_get_embeddings()
867
969
  LLAMA_API void llama_set_embeddings(struct llama_context * ctx, bool embeddings);
868
970
 
869
971
  // Set whether to use causal attention or not
@@ -912,7 +1014,7 @@ extern "C" {
912
1014
 
913
1015
  // Get the embeddings for a sequence id
914
1016
  // Returns NULL if pooling_type is LLAMA_POOLING_TYPE_NONE
915
- // when pooling_type == LLAMA_POOLING_TYPE_RANK, returns float[1] with the rank of the sequence
1017
+ // when pooling_type == LLAMA_POOLING_TYPE_RANK, returns float[n_cls_out] with the rank(s) of the sequence
916
1018
  // otherwise: float[n_embd] (1-dimensional)
917
1019
  LLAMA_API float * llama_get_embeddings_seq(struct llama_context * ctx, llama_seq_id seq_id);
918
1020
 
@@ -17,8 +17,8 @@
17
17
  #include <memory>
18
18
 
19
19
  // Include rn-completion integration
20
- #include "rn-utils.hpp"
21
- #include "rn-llama.hpp"
20
+ #include "rn-utils.h"
21
+ #include "rn-llama.h"
22
22
 
23
23
  // Include llama.cpp headers
24
24
  #include "llama.h"
@@ -242,38 +242,12 @@ CompletionOptions LlamaCppModel::parseCompletionOptions(jsi::Runtime& rt, const
242
242
  auto paramsVal = fnObj.getProperty(rt, "parameters");
243
243
  if (paramsVal.isObject()) {
244
244
  try {
245
- // Convert the JSI object directly to nlohmann::json
246
- auto paramsObj = paramsVal.getObject(rt);
247
- json fnParams = json::object();
248
-
249
- // Extract properties directly from the JSI object
250
- jsi::Array propNames = paramsObj.getPropertyNames(rt);
251
- size_t propCount = propNames.size(rt);
252
- for (size_t i = 0; i < propCount; i++) {
253
- jsi::String propName = propNames.getValueAtIndex(rt, i).asString(rt);
254
- std::string key = propName.utf8(rt);
255
- auto value = paramsObj.getProperty(rt, propName);
256
-
257
- if (value.isString()) {
258
- fnParams[key] = value.asString(rt).utf8(rt);
259
- } else if (value.isNumber()) {
260
- fnParams[key] = value.asNumber();
261
- } else if (value.isBool()) {
262
- fnParams[key] = value.getBool();
263
- } else if (value.isNull()) {
264
- fnParams[key] = nullptr;
265
- } else if (value.isObject()) {
266
- if (value.getObject(rt).isArray(rt)) {
267
- fnParams[key] = json::array();
268
- } else {
269
- fnParams[key] = json::object();
270
- }
271
- }
272
- }
273
-
274
- fnJson["parameters"] = fnParams;
275
- } catch (const std::exception&) {
276
- fnJson["parameters"] = json::object();
245
+ // Convert the JSI object directly to nlohmann::json using the new helper
246
+ fnJson["parameters"] = jsiValueToJson(rt, paramsVal);
247
+ } catch (const std::exception& e) {
248
+ // Log error or handle as appropriate
249
+ fprintf(stderr, "Failed to parse tool parameters: %s\n", e.what());
250
+ fnJson["parameters"] = json::object(); // Fallback to empty object
277
251
  }
278
252
  }
279
253
  }
@@ -336,39 +310,12 @@ CompletionOptions LlamaCppModel::parseCompletionOptions(jsi::Runtime& rt, const
336
310
  auto paramsVal = fnObj.getProperty(rt, "parameters");
337
311
  if (paramsVal.isObject()) {
338
312
  try {
339
- // Convert the JSI object directly to nlohmann::json
340
- auto paramsObj = paramsVal.getObject(rt);
341
- json fnParams = json::object();
342
-
343
- // Extract properties directly from the JSI object
344
- jsi::Array propNames = paramsObj.getPropertyNames(rt);
345
- size_t propCount = propNames.size(rt);
346
- for (size_t i = 0; i < propCount; i++) {
347
- jsi::String propName = propNames.getValueAtIndex(rt, i).asString(rt);
348
- std::string key = propName.utf8(rt);
349
- auto value = paramsObj.getProperty(rt, propName);
350
-
351
- if (value.isString()) {
352
- fnParams[key] = value.asString(rt).utf8(rt);
353
- } else if (value.isNumber()) {
354
- fnParams[key] = value.asNumber();
355
- } else if (value.isBool()) {
356
- fnParams[key] = value.getBool();
357
- } else if (value.isNull()) {
358
- fnParams[key] = nullptr;
359
- } else if (value.isObject()) {
360
- // For nested objects, we use a simplified approach
361
- if (value.getObject(rt).isArray(rt)) {
362
- fnParams[key] = json::array();
363
- } else {
364
- fnParams[key] = json::object();
365
- }
366
- }
367
- }
368
-
369
- fnJson["parameters"] = fnParams;
370
- } catch (const std::exception&) {
371
- fnJson["parameters"] = json::object();
313
+ // Convert the JSI object directly to nlohmann::json using the new helper
314
+ fnJson["parameters"] = jsiValueToJson(rt, paramsVal);
315
+ } catch (const std::exception& e) {
316
+ // Log error or handle as appropriate
317
+ fprintf(stderr, "Failed to parse tool parameters: %s\n", e.what());
318
+ fnJson["parameters"] = json::object(); // Fallback to empty object
372
319
  }
373
320
  }
374
321
  }
@@ -553,6 +500,40 @@ jsi::Value LlamaCppModel::jsonToJsi(jsi::Runtime& rt, const json& j) {
553
500
  return jsi::Value::undefined();
554
501
  }
555
502
 
503
+ // Helper to convert JSI Value to nlohmann::json
504
+ json LlamaCppModel::jsiValueToJson(jsi::Runtime& rt, const jsi::Value& val) {
505
+ if (val.isUndefined() || val.isNull()) {
506
+ return nullptr;
507
+ } else if (val.isBool()) {
508
+ return val.getBool();
509
+ } else if (val.isNumber()) {
510
+ return val.getNumber();
511
+ } else if (val.isString()) {
512
+ return val.getString(rt).utf8(rt);
513
+ } else if (val.isObject()) {
514
+ jsi::Object jsiObj = val.getObject(rt);
515
+ if (jsiObj.isArray(rt)) {
516
+ jsi::Array jsiArr = jsiObj.getArray(rt);
517
+ json jsonArr = json::array();
518
+ for (size_t i = 0; i < jsiArr.size(rt); ++i) {
519
+ jsonArr.push_back(jsiValueToJson(rt, jsiArr.getValueAtIndex(rt, i)));
520
+ }
521
+ return jsonArr;
522
+ } else {
523
+ json jsonObj = json::object();
524
+ jsi::Array propNames = jsiObj.getPropertyNames(rt);
525
+ for (size_t i = 0; i < propNames.size(rt); ++i) {
526
+ jsi::String propName = propNames.getValueAtIndex(rt, i).asString(rt);
527
+ std::string key = propName.utf8(rt);
528
+ jsonObj[key] = jsiValueToJson(rt, jsiObj.getProperty(rt, propName));
529
+ }
530
+ return jsonObj;
531
+ }
532
+ }
533
+ // Should not happen for valid JSON-like structures
534
+ return nullptr;
535
+ }
536
+
556
537
  // JSI method for completions (synchronous - kept for compatibility)
557
538
  jsi::Value LlamaCppModel::completionJsi(jsi::Runtime& rt, const jsi::Value* args, size_t count) {
558
539
  if (count < 1 || !args[0].isObject()) {
@@ -17,9 +17,12 @@
17
17
  #include "chat.h" // For chat format handling and templates
18
18
  #include "json-schema-to-grammar.h"
19
19
 
20
- // Include rn-utils.hpp which has the CompletionResult definition
21
- #include "rn-utils.hpp"
22
- #include "rn-llama.hpp"
20
+ // Include rn-utils.h which has the CompletionResult definition
21
+ #include "rn-utils.h"
22
+ #include "rn-llama.h"
23
+
24
+ // Include json.hpp for json handling
25
+ #include "nlohmann/json.hpp"
23
26
 
24
27
  namespace facebook::react {
25
28
 
@@ -166,6 +169,8 @@ private:
166
169
 
167
170
  // Add CallInvoker for async operations
168
171
  std::shared_ptr<CallInvoker> jsInvoker_;
172
+
173
+ static json jsiValueToJson(jsi::Runtime& rt, const jsi::Value& val); // Declaration of new helper
169
174
  };
170
175
 
171
176
  } // namespace facebook::react
@@ -10,7 +10,7 @@
10
10
  #include <thread>
11
11
  #include "SystemUtils.h"
12
12
  // Include our custom headers - this was missing!
13
- #include "rn-llama.hpp"
13
+ #include "rn-llama.h"
14
14
  #include "LlamaCppModel.h"
15
15
  // Include the llama.cpp common headers
16
16
  #include "chat.h"
package/cpp/PureCppImpl.h CHANGED
@@ -9,7 +9,7 @@
9
9
  #include <mutex>
10
10
 
11
11
  // Include the header with the full definition of rn_llama_context
12
- #include "rn-llama.hpp"
12
+ #include "rn-llama.h"
13
13
 
14
14
  // Forward declarations for C++ only
15
15
  struct llama_model;
@@ -49,7 +49,7 @@ private:
49
49
  jsi::Object createModelObject(jsi::Runtime& runtime, struct rn_llama_context* rn_ctx);
50
50
 
51
51
  // Context for the currently loaded model, if any.
52
- // The actual definition of rn_llama_context should be in "rn-llama.hpp"
52
+ // The actual definition of rn_llama_context should be in "rn-llama.h"
53
53
  std::unique_ptr<struct rn_llama_context> rn_ctx_;
54
54
 
55
55
  // Mutex for thread safety when accessing rn_ctx_ or other shared resources
@@ -1,4 +1,4 @@
1
- int LLAMA_BUILD_NUMBER = 5541;
2
- char const *LLAMA_COMMIT = "07e4351c";
1
+ int LLAMA_BUILD_NUMBER = 5709;
2
+ char const *LLAMA_COMMIT = "d67341dc";
3
3
  char const *LLAMA_COMPILER = "unknown";
4
4
  char const *LLAMA_BUILD_TARGET = "unknown";