cui-llama.rn 1.6.0 → 1.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (285) hide show
  1. package/README.md +35 -7
  2. package/android/src/main/CMakeLists.txt +22 -11
  3. package/android/src/main/java/com/rnllama/LlamaContext.java +42 -6
  4. package/android/src/main/java/com/rnllama/RNLlama.java +139 -4
  5. package/android/src/main/jni.cpp +173 -18
  6. package/android/src/main/jniLibs/arm64-v8a/librnllama.so +0 -0
  7. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8.so +0 -0
  8. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2.so +0 -0
  9. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_dotprod.so +0 -0
  10. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_dotprod_i8mm.so +0 -0
  11. package/android/src/main/jniLibs/arm64-v8a/librnllama_v8_2_i8mm.so +0 -0
  12. package/android/src/main/jniLibs/x86_64/librnllama.so +0 -0
  13. package/android/src/main/jniLibs/x86_64/librnllama_x86_64.so +0 -0
  14. package/android/src/newarch/java/com/rnllama/RNLlamaModule.java +24 -4
  15. package/android/src/oldarch/java/com/rnllama/RNLlamaModule.java +22 -2
  16. package/cpp/LICENSE +21 -0
  17. package/cpp/chat.cpp +129 -107
  18. package/cpp/chat.h +2 -0
  19. package/cpp/common.cpp +58 -78
  20. package/cpp/common.h +29 -21
  21. package/cpp/ggml-alloc.c +4 -1
  22. package/cpp/ggml-backend.cpp +9 -5
  23. package/cpp/ggml-backend.h +4 -4
  24. package/cpp/ggml-cpp.h +1 -1
  25. package/cpp/ggml-cpu/amx/amx.cpp +221 -0
  26. package/cpp/ggml-cpu/amx/amx.h +8 -0
  27. package/cpp/ggml-cpu/amx/common.h +91 -0
  28. package/cpp/ggml-cpu/amx/mmq.cpp +2511 -0
  29. package/cpp/ggml-cpu/amx/mmq.h +10 -0
  30. package/{ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers → cpp/ggml-cpu}/binary-ops.h +1 -1
  31. package/cpp/ggml-cpu/common.h +72 -0
  32. package/cpp/{ggml-cpu-aarch64.cpp → ggml-cpu/ggml-cpu-aarch64.cpp} +809 -103
  33. package/cpp/{ggml-cpu-quants.c → ggml-cpu/ggml-cpu-quants.c} +306 -6
  34. package/cpp/{ggml-cpu.c → ggml-cpu/ggml-cpu.c} +114 -55
  35. package/cpp/{ggml-cpu.cpp → ggml-cpu/ggml-cpu.cpp} +32 -16
  36. package/cpp/{ops.cpp → ggml-cpu/ops.cpp} +353 -173
  37. package/{ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers → cpp/ggml-cpu}/ops.h +2 -20
  38. package/cpp/{sgemm.cpp → ggml-cpu/sgemm.cpp} +501 -0
  39. package/{ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers → cpp/ggml-cpu}/simd-mappings.h +7 -3
  40. package/{ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers → cpp/ggml-cpu}/unary-ops.h +1 -1
  41. package/cpp/{vec.cpp → ggml-cpu/vec.cpp} +0 -6
  42. package/{ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers → cpp/ggml-cpu}/vec.h +16 -0
  43. package/cpp/ggml-cpu.h +5 -0
  44. package/cpp/ggml-impl.h +16 -9
  45. package/cpp/ggml-llama-sim.metallib +0 -0
  46. package/cpp/ggml-llama.metallib +0 -0
  47. package/cpp/ggml-metal-impl.h +36 -11
  48. package/cpp/ggml-metal.m +810 -176
  49. package/cpp/ggml-opt.cpp +373 -190
  50. package/cpp/ggml-opt.h +49 -28
  51. package/cpp/ggml-quants.c +0 -6
  52. package/cpp/ggml.c +227 -282
  53. package/cpp/ggml.h +82 -101
  54. package/cpp/gguf.cpp +33 -33
  55. package/cpp/json-schema-to-grammar.cpp +3 -0
  56. package/cpp/llama-adapter.cpp +6 -0
  57. package/cpp/llama-arch.cpp +49 -17
  58. package/cpp/llama-arch.h +9 -0
  59. package/cpp/llama-batch.cpp +8 -2
  60. package/cpp/llama-batch.h +2 -1
  61. package/cpp/llama-chat.cpp +39 -16
  62. package/cpp/llama-chat.h +4 -2
  63. package/cpp/llama-context.cpp +440 -611
  64. package/cpp/llama-context.h +44 -33
  65. package/cpp/llama-cparams.h +1 -0
  66. package/cpp/llama-graph.cpp +214 -291
  67. package/cpp/llama-graph.h +69 -21
  68. package/cpp/llama-hparams.cpp +17 -1
  69. package/cpp/llama-hparams.h +39 -5
  70. package/cpp/llama-kv-cache.cpp +2067 -620
  71. package/cpp/llama-kv-cache.h +410 -108
  72. package/cpp/llama-memory.h +12 -1
  73. package/cpp/llama-model-loader.cpp +24 -15
  74. package/cpp/llama-model-saver.cpp +281 -0
  75. package/cpp/llama-model-saver.h +37 -0
  76. package/cpp/llama-model.cpp +1089 -359
  77. package/cpp/llama-model.h +19 -3
  78. package/cpp/llama-sampling.cpp +20 -7
  79. package/cpp/llama-vocab.cpp +54 -9
  80. package/cpp/llama-vocab.h +6 -0
  81. package/cpp/llama.cpp +14 -0
  82. package/cpp/llama.h +86 -142
  83. package/cpp/minja/chat-template.hpp +9 -5
  84. package/cpp/minja/minja.hpp +69 -36
  85. package/cpp/rn-llama.cpp +602 -190
  86. package/cpp/rn-llama.h +34 -8
  87. package/cpp/sampling.cpp +57 -50
  88. package/cpp/tools/mtmd/clip-impl.h +462 -0
  89. package/cpp/tools/mtmd/clip.cpp +4024 -0
  90. package/cpp/tools/mtmd/clip.h +101 -0
  91. package/cpp/tools/mtmd/miniaudio.h +93468 -0
  92. package/cpp/tools/mtmd/mtmd-audio.cpp +855 -0
  93. package/cpp/tools/mtmd/mtmd-audio.h +62 -0
  94. package/cpp/tools/mtmd/mtmd-helper.cpp +297 -0
  95. package/cpp/tools/mtmd/mtmd.cpp +942 -0
  96. package/cpp/tools/mtmd/mtmd.h +362 -0
  97. package/cpp/tools/mtmd/stb_image.h +7988 -0
  98. package/ios/CMakeLists.txt +20 -10
  99. package/ios/RNLlama.h +6 -0
  100. package/ios/RNLlama.mm +82 -3
  101. package/ios/RNLlamaContext.h +5 -1
  102. package/ios/RNLlamaContext.mm +131 -38
  103. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/chat.h +2 -0
  104. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/common.h +29 -21
  105. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-backend.h +4 -4
  106. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-cpp.h +1 -1
  107. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-cpu.h +5 -0
  108. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-impl.h +16 -9
  109. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-metal-impl.h +36 -11
  110. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-opt.h +49 -28
  111. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml.h +82 -101
  112. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-arch.h +9 -0
  113. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-batch.h +2 -1
  114. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-chat.h +4 -2
  115. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-context.h +44 -33
  116. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-cparams.h +1 -0
  117. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-graph.h +69 -21
  118. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-hparams.h +39 -5
  119. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-kv-cache.h +410 -108
  120. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-memory.h +12 -1
  121. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-model-saver.h +37 -0
  122. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-model.h +19 -3
  123. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama-vocab.h +6 -0
  124. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/llama.h +86 -142
  125. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/minja/chat-template.hpp +9 -5
  126. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/minja/minja.hpp +69 -36
  127. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/rn-llama.h +34 -8
  128. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Info.plist +0 -0
  129. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/ggml-llama.metallib +0 -0
  130. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/rnllama +0 -0
  131. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/chat.h +2 -0
  132. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/common.h +29 -21
  133. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-backend.h +4 -4
  134. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpp.h +1 -1
  135. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu.h +5 -0
  136. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-impl.h +16 -9
  137. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-metal-impl.h +36 -11
  138. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-opt.h +49 -28
  139. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml.h +82 -101
  140. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-arch.h +9 -0
  141. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-batch.h +2 -1
  142. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-chat.h +4 -2
  143. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-context.h +44 -33
  144. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-cparams.h +1 -0
  145. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-graph.h +69 -21
  146. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-hparams.h +39 -5
  147. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cache.h +410 -108
  148. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-memory.h +12 -1
  149. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-model-saver.h +37 -0
  150. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-model.h +19 -3
  151. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama-vocab.h +6 -0
  152. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/llama.h +86 -142
  153. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/minja/chat-template.hpp +9 -5
  154. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/minja/minja.hpp +69 -36
  155. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/rn-llama.h +34 -8
  156. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Info.plist +0 -0
  157. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/_CodeSignature/CodeResources +1 -1
  158. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/ggml-llama-sim.metallib +0 -0
  159. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/rnllama +0 -0
  160. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/chat.h +2 -0
  161. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/common.h +29 -21
  162. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-backend.h +4 -4
  163. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-cpp.h +1 -1
  164. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-cpu.h +5 -0
  165. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-impl.h +16 -9
  166. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-metal-impl.h +36 -11
  167. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-opt.h +49 -28
  168. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml.h +82 -101
  169. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-arch.h +9 -0
  170. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-batch.h +2 -1
  171. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-chat.h +4 -2
  172. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-context.h +44 -33
  173. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-cparams.h +1 -0
  174. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-graph.h +69 -21
  175. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-hparams.h +39 -5
  176. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-kv-cache.h +410 -108
  177. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-memory.h +12 -1
  178. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-model-saver.h +37 -0
  179. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-model.h +19 -3
  180. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama-vocab.h +6 -0
  181. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/llama.h +86 -142
  182. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/minja/chat-template.hpp +9 -5
  183. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/minja/minja.hpp +69 -36
  184. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/rn-llama.h +34 -8
  185. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Info.plist +0 -0
  186. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/ggml-llama.metallib +0 -0
  187. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/rnllama +0 -0
  188. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/chat.h +2 -0
  189. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/common.h +29 -21
  190. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-backend.h +4 -4
  191. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpp.h +1 -1
  192. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu.h +5 -0
  193. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-impl.h +16 -9
  194. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-metal-impl.h +36 -11
  195. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-opt.h +49 -28
  196. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml.h +82 -101
  197. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-arch.h +9 -0
  198. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-batch.h +2 -1
  199. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-chat.h +4 -2
  200. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-context.h +44 -33
  201. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-cparams.h +1 -0
  202. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-graph.h +69 -21
  203. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-hparams.h +39 -5
  204. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-kv-cache.h +410 -108
  205. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-memory.h +12 -1
  206. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-model-saver.h +37 -0
  207. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-model.h +19 -3
  208. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama-vocab.h +6 -0
  209. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/llama.h +86 -142
  210. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/minja/chat-template.hpp +9 -5
  211. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/minja/minja.hpp +69 -36
  212. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/rn-llama.h +34 -8
  213. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Info.plist +0 -0
  214. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/_CodeSignature/CodeResources +1 -1
  215. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/ggml-llama-sim.metallib +0 -0
  216. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/rnllama +0 -0
  217. package/jest/mock.js +33 -7
  218. package/lib/commonjs/NativeRNLlama.js.map +1 -1
  219. package/lib/commonjs/index.js +153 -21
  220. package/lib/commonjs/index.js.map +1 -1
  221. package/lib/module/NativeRNLlama.js.map +1 -1
  222. package/lib/module/index.js +152 -20
  223. package/lib/module/index.js.map +1 -1
  224. package/lib/typescript/NativeRNLlama.d.ts +54 -4
  225. package/lib/typescript/NativeRNLlama.d.ts.map +1 -1
  226. package/lib/typescript/index.d.ts +72 -6
  227. package/lib/typescript/index.d.ts.map +1 -1
  228. package/package.json +1 -1
  229. package/src/NativeRNLlama.ts +72 -4
  230. package/src/index.ts +212 -38
  231. package/cpp/binary-ops.h +0 -16
  232. package/cpp/ops.h +0 -128
  233. package/cpp/simd-mappings.h +0 -888
  234. package/cpp/unary-ops.h +0 -28
  235. package/cpp/vec.h +0 -802
  236. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/binary-ops.h +0 -16
  237. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-cpu-aarch64.h +0 -8
  238. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-cpu-impl.h +0 -512
  239. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-cpu-quants.h +0 -63
  240. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ggml-cpu-traits.h +0 -38
  241. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/ops.h +0 -128
  242. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/sgemm.h +0 -14
  243. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/simd-mappings.h +0 -888
  244. package/ios/rnllama.xcframework/ios-arm64/rnllama.framework/Headers/vec.h +0 -802
  245. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu-aarch64.h +0 -8
  246. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu-impl.h +0 -512
  247. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu-quants.h +0 -63
  248. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu-traits.h +0 -38
  249. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/sgemm.h +0 -14
  250. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/unary-ops.h +0 -28
  251. package/ios/rnllama.xcframework/ios-arm64_x86_64-simulator/rnllama.framework/Headers/vec.h +0 -802
  252. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/binary-ops.h +0 -16
  253. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-cpu-aarch64.h +0 -8
  254. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-cpu-impl.h +0 -512
  255. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-cpu-quants.h +0 -63
  256. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ggml-cpu-traits.h +0 -38
  257. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/ops.h +0 -128
  258. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/sgemm.h +0 -14
  259. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/simd-mappings.h +0 -888
  260. package/ios/rnllama.xcframework/tvos-arm64/rnllama.framework/Headers/unary-ops.h +0 -28
  261. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/binary-ops.h +0 -16
  262. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu-aarch64.h +0 -8
  263. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu-impl.h +0 -512
  264. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu-quants.h +0 -63
  265. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ggml-cpu-traits.h +0 -38
  266. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/ops.h +0 -128
  267. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/sgemm.h +0 -14
  268. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/simd-mappings.h +0 -888
  269. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/unary-ops.h +0 -28
  270. package/ios/rnllama.xcframework/tvos-arm64_x86_64-simulator/rnllama.framework/Headers/vec.h +0 -802
  271. package/lib/commonjs/chat.js +0 -37
  272. package/lib/commonjs/chat.js.map +0 -1
  273. package/lib/module/chat.js +0 -33
  274. package/lib/module/chat.js.map +0 -1
  275. package/lib/typescript/chat.d.ts +0 -10
  276. package/lib/typescript/chat.d.ts.map +0 -1
  277. package/src/chat.ts +0 -44
  278. /package/cpp/{binary-ops.cpp → ggml-cpu/binary-ops.cpp} +0 -0
  279. /package/cpp/{ggml-cpu-aarch64.h → ggml-cpu/ggml-cpu-aarch64.h} +0 -0
  280. /package/cpp/{ggml-cpu-impl.h → ggml-cpu/ggml-cpu-impl.h} +0 -0
  281. /package/cpp/{ggml-cpu-quants.h → ggml-cpu/ggml-cpu-quants.h} +0 -0
  282. /package/cpp/{ggml-cpu-traits.cpp → ggml-cpu/ggml-cpu-traits.cpp} +0 -0
  283. /package/cpp/{ggml-cpu-traits.h → ggml-cpu/ggml-cpu-traits.h} +0 -0
  284. /package/cpp/{sgemm.h → ggml-cpu/sgemm.h} +0 -0
  285. /package/cpp/{unary-ops.cpp → ggml-cpu/unary-ops.cpp} +0 -0
@@ -36,14 +36,17 @@ enum llm_type {
36
36
  LLM_TYPE_335M,
37
37
  LLM_TYPE_410M,
38
38
  LLM_TYPE_450M,
39
+ LLM_TYPE_475M,
39
40
  LLM_TYPE_770M,
40
41
  LLM_TYPE_780M,
41
42
  LLM_TYPE_0_5B,
43
+ LLM_TYPE_0_6B,
42
44
  LLM_TYPE_1B,
43
45
  LLM_TYPE_1_3B,
44
46
  LLM_TYPE_1_4B,
45
47
  LLM_TYPE_1_5B,
46
48
  LLM_TYPE_1_6B,
49
+ LLM_TYPE_1_7B,
47
50
  LLM_TYPE_1_8B,
48
51
  LLM_TYPE_2B,
49
52
  LLM_TYPE_2_8B,
@@ -62,6 +65,7 @@ enum llm_type {
62
65
  LLM_TYPE_15B,
63
66
  LLM_TYPE_16B,
64
67
  LLM_TYPE_20B,
68
+ LLM_TYPE_27B,
65
69
  LLM_TYPE_30B,
66
70
  LLM_TYPE_32B,
67
71
  LLM_TYPE_34B,
@@ -70,7 +74,9 @@ enum llm_type {
70
74
  LLM_TYPE_65B,
71
75
  LLM_TYPE_70B,
72
76
  LLM_TYPE_236B,
77
+ LLM_TYPE_290B,
73
78
  LLM_TYPE_314B,
79
+ LLM_TYPE_405B,
74
80
  LLM_TYPE_671B,
75
81
  LLM_TYPE_SMALL,
76
82
  LLM_TYPE_MEDIUM,
@@ -84,12 +90,14 @@ enum llm_type {
84
90
  LLM_TYPE_16x3_8B,
85
91
  LLM_TYPE_10B_128x3_66B,
86
92
  LLM_TYPE_57B_A14B,
87
- LLM_TYPE_27B,
88
- LLM_TYPE_290B,
89
93
  LLM_TYPE_17B_16E, // llama4 Scout
90
94
  LLM_TYPE_17B_128E, // llama4 Maverick
95
+ LLM_TYPE_30B_A3B,
96
+ LLM_TYPE_235B_A22B,
91
97
  };
92
98
 
99
+ std::string llama_rope_scaling_type_name(llama_rope_scaling_type rope_scaling_type);
100
+
93
101
  struct llama_layer_posnet {
94
102
  // resnet
95
103
  struct lm_ggml_tensor * norm1 = nullptr;
@@ -171,6 +179,8 @@ struct llama_layer {
171
179
  struct lm_ggml_tensor * wq_b = nullptr;
172
180
  struct lm_ggml_tensor * wkv_a_mqa = nullptr;
173
181
  struct lm_ggml_tensor * wkv_b = nullptr;
182
+ struct lm_ggml_tensor * wk_b = nullptr;
183
+ struct lm_ggml_tensor * wv_b = nullptr;
174
184
  struct lm_ggml_tensor * wq_cross = nullptr;
175
185
  struct lm_ggml_tensor * wk_cross = nullptr;
176
186
  struct lm_ggml_tensor * wv_cross = nullptr;
@@ -388,8 +398,14 @@ struct llama_model {
388
398
 
389
399
  const struct lm_ggml_tensor * get_tensor(const char * name) const;
390
400
 
401
+ float get_rope_freq_base (const llama_cparams & cparams, int il) const;
402
+ float get_rope_freq_scale(const llama_cparams & cparams, int il) const;
403
+
404
+ lm_ggml_tensor * get_rope_factors(const llama_cparams & cparams, int il) const;
405
+
406
+ // note: can mutate `cparams`
391
407
  // TODO: move this to new llm_arch_model_i interface
392
- llama_memory_i * create_memory() const; // TODO: params
408
+ llama_memory_i * create_memory(const llama_memory_params & params, llama_cparams & cparams) const;
393
409
 
394
410
  // TODO: move this to new llm_arch_model_i interface
395
411
  llm_graph_result_ptr build_graph(
@@ -21,6 +21,9 @@ struct llama_vocab {
21
21
 
22
22
  void load(llama_model_loader & ml, const LLM_KV & kv);
23
23
 
24
+ std::string get_tokenizer_model() const;
25
+ std::string get_tokenizer_pre() const;
26
+
24
27
  enum llama_vocab_type get_type() const;
25
28
  enum llama_vocab_pre_type get_pre_type() const;
26
29
 
@@ -80,6 +83,9 @@ struct llama_vocab {
80
83
  int max_token_len() const;
81
84
 
82
85
  int find_bpe_rank(const std::string & token_left, const std::string & token_right) const;
86
+ std::vector<std::string> get_bpe_merges() const;
87
+
88
+ std::vector<char> get_precompiled_charsmap() const;
83
89
 
84
90
  int32_t tokenize(
85
91
  const char * text,
@@ -4,6 +4,7 @@
4
4
  #include "ggml.h"
5
5
  #include "ggml-cpu.h"
6
6
  #include "ggml-backend.h"
7
+ #include "ggml-opt.h"
7
8
 
8
9
  #include <stddef.h>
9
10
  #include <stdint.h>
@@ -112,6 +113,8 @@ extern "C" {
112
113
  LLAMA_VOCAB_PRE_TYPE_TRILLION = 31,
113
114
  LLAMA_VOCAB_PRE_TYPE_BAILINGMOE = 32,
114
115
  LLAMA_VOCAB_PRE_TYPE_LLAMA4 = 33,
116
+ LLAMA_VOCAB_PRE_TYPE_PIXTRAL = 34,
117
+ LLAMA_VOCAB_PRE_TYPE_SEED_CODER = 35,
115
118
  };
116
119
 
117
120
  enum llama_rope_type {
@@ -343,7 +346,7 @@ extern "C" {
343
346
  float yarn_beta_fast; // YaRN low correction dim
344
347
  float yarn_beta_slow; // YaRN high correction dim
345
348
  uint32_t yarn_orig_ctx; // YaRN original context size
346
- float defrag_thold; // defragment the KV cache if holes/size > thold, < 0 disabled (default)
349
+ float defrag_thold; // defragment the KV cache if holes/size > thold, <= 0 disabled (default)
347
350
 
348
351
  lm_ggml_backend_sched_eval_callback cb_eval;
349
352
  void * cb_eval_user_data;
@@ -351,34 +354,35 @@ extern "C" {
351
354
  enum lm_ggml_type type_k; // data type for K cache [EXPERIMENTAL]
352
355
  enum lm_ggml_type type_v; // data type for V cache [EXPERIMENTAL]
353
356
 
354
- // Keep the booleans together and at the end of the struct to avoid misalignment during copy-by-value.
355
- // TODO: move at the end of the struct
356
- bool logits_all; // the llama_decode() call computes all logits, not just the last one (DEPRECATED - set llama_batch.logits instead)
357
- bool embeddings; // if true, extract embeddings (together with logits)
358
- bool offload_kqv; // whether to offload the KQV ops (including the KV cache) to GPU
359
- bool flash_attn; // whether to use flash attention [EXPERIMENTAL]
360
- bool no_perf; // whether to measure performance timings
361
-
362
357
  // Abort callback
363
358
  // if it returns true, execution of llama_decode() will be aborted
364
359
  // currently works only with CPU execution
365
360
  lm_ggml_abort_callback abort_callback;
366
361
  void * abort_callback_data;
362
+
363
+ // Keep the booleans together and at the end of the struct to avoid misalignment during copy-by-value.
364
+ bool embeddings; // if true, extract embeddings (together with logits)
365
+ bool offload_kqv; // offload the KQV ops (including the KV cache) to GPU
366
+ bool flash_attn; // use flash attention [EXPERIMENTAL]
367
+ bool no_perf; // measure performance timings
368
+ bool op_offload; // offload host tensor operations to device
369
+ bool swa_full; // use full-size SWA cache (https://github.com/ggml-org/llama.cpp/pull/13194#issuecomment-2868343055)
367
370
  };
368
371
 
369
372
  // model quantization parameters
370
373
  typedef struct llama_model_quantize_params {
371
- int32_t nthread; // number of threads to use for quantizing, if <=0 will use std::thread::hardware_concurrency()
372
- enum llama_ftype ftype; // quantize to this llama_ftype
373
- enum lm_ggml_type output_tensor_type; // output tensor type
374
- enum lm_ggml_type token_embedding_type; // token embeddings tensor type
375
- bool allow_requantize; // allow quantizing non-f32/f16 tensors
376
- bool quantize_output_tensor; // quantize output.weight
377
- bool only_copy; // only copy tensors - ftype, allow_requantize and quantize_output_tensor are ignored
378
- bool pure; // quantize all tensors to the default type
379
- bool keep_split; // quantize to the same number of shards
380
- void * imatrix; // pointer to importance matrix data
381
- void * kv_overrides; // pointer to vector containing overrides
374
+ int32_t nthread; // number of threads to use for quantizing, if <=0 will use std::thread::hardware_concurrency()
375
+ enum llama_ftype ftype; // quantize to this llama_ftype
376
+ enum lm_ggml_type output_tensor_type; // output tensor type
377
+ enum lm_ggml_type token_embedding_type; // token embeddings tensor type
378
+ bool allow_requantize; // allow quantizing non-f32/f16 tensors
379
+ bool quantize_output_tensor; // quantize output.weight
380
+ bool only_copy; // only copy tensors - ftype, allow_requantize and quantize_output_tensor are ignored
381
+ bool pure; // quantize all tensors to the default type
382
+ bool keep_split; // quantize to the same number of shards
383
+ void * imatrix; // pointer to importance matrix data
384
+ void * kv_overrides; // pointer to vector containing overrides
385
+ void * tensor_types; // pointer to vector containing tensor types
382
386
  } llama_model_quantize_params;
383
387
 
384
388
  typedef struct llama_logit_bias {
@@ -444,6 +448,10 @@ extern "C" {
444
448
  size_t n_paths,
445
449
  struct llama_model_params params);
446
450
 
451
+ LLAMA_API void llama_model_save_to_file(
452
+ const struct llama_model * model,
453
+ const char * path_model);
454
+
447
455
  DEPRECATED(LLAMA_API void llama_free_model(struct llama_model * model),
448
456
  "use llama_model_free instead");
449
457
 
@@ -601,71 +609,14 @@ extern "C" {
601
609
  // KV cache
602
610
  //
603
611
 
604
- // TODO: start using struct llama_kv_cache
605
-
606
- // Information associated with an individual cell in the KV cache view.
607
- struct llama_kv_cache_view_cell {
608
- // The position for this cell. Takes KV cache shifts into account.
609
- // May be negative if the cell is not populated.
610
- llama_pos pos;
611
- };
612
-
613
- // An updateable view of the KV cache.
614
- struct llama_kv_cache_view {
615
- // Number of KV cache cells. This will be the same as the context size.
616
- int32_t n_cells;
617
-
618
- // Maximum number of sequences that can exist in a cell. It's not an error
619
- // if there are more sequences in a cell than this value, however they will
620
- // not be visible in the view cells_sequences.
621
- int32_t n_seq_max;
622
-
623
- // Number of tokens in the cache. For example, if there are two populated
624
- // cells, the first with 1 sequence id in it and the second with 2 sequence
625
- // ids then you'll have 3 tokens.
626
- int32_t token_count;
627
-
628
- // Number of populated cache cells.
629
- int32_t used_cells;
630
-
631
- // Maximum contiguous empty slots in the cache.
632
- int32_t max_contiguous;
633
-
634
- // Index to the start of the max_contiguous slot range. Can be negative
635
- // when cache is full.
636
- int32_t max_contiguous_idx;
637
-
638
- // Information for an individual cell.
639
- struct llama_kv_cache_view_cell * cells;
640
-
641
- // The sequences for each cell. There will be n_seq_max items per cell.
642
- llama_seq_id * cells_sequences;
643
- };
644
-
645
- // Create an empty KV cache view. (use only for debugging purposes)
646
- LLAMA_API struct llama_kv_cache_view llama_kv_cache_view_init(const struct llama_context * ctx, int32_t n_seq_max);
647
-
648
- // Free a KV cache view. (use only for debugging purposes)
649
- LLAMA_API void llama_kv_cache_view_free(struct llama_kv_cache_view * view);
650
-
651
- // Update the KV cache view structure with the current state of the KV cache. (use only for debugging purposes)
652
- // TODO: change signature to llama_kv_cache_view_update(struct llama_kv_cache_view * view, const struct llama_context * ctx)
653
- LLAMA_API void llama_kv_cache_view_update(const struct llama_context * ctx, struct llama_kv_cache_view * view);
654
-
655
- ///
656
-
657
612
  // Returns the number of tokens in the KV cache (slow, use only for debug)
658
613
  // If a KV cell has multiple sequences assigned to it, it will be counted multiple times
659
- LLAMA_API int32_t llama_kv_self_n_tokens(const struct llama_context * ctx);
660
-
661
- DEPRECATED(LLAMA_API int32_t llama_get_kv_cache_token_count(const struct llama_context * ctx),
662
- "use llama_kv_self_n_tokens instead");
614
+ DEPRECATED(LLAMA_API int32_t llama_kv_self_n_tokens(const struct llama_context * ctx),
615
+ "Use llama_kv_self_seq_pos_max() instead");
663
616
 
664
617
  // Returns the number of used KV cells (i.e. have at least one sequence assigned to them)
665
- LLAMA_API int32_t llama_kv_self_used_cells(const struct llama_context * ctx);
666
-
667
- DEPRECATED(LLAMA_API int32_t llama_get_kv_cache_used_cells(const struct llama_context * ctx),
668
- "use llama_kv_self_used_cells instead");
618
+ DEPRECATED(LLAMA_API int32_t llama_kv_self_used_cells(const struct llama_context * ctx),
619
+ "Use llama_kv_self_seq_pos_max() instead");
669
620
 
670
621
  // Clear the KV cache - both cell info is erased and KV data is zeroed
671
622
  LLAMA_API void llama_kv_self_clear(
@@ -724,10 +675,18 @@ extern "C" {
724
675
  llama_pos p1,
725
676
  int d);
726
677
 
678
+ // Returns the smallest position present in the KV cache for the specified sequence
679
+ // This is typically non-zero only for SWA caches
680
+ // Return -1 if the sequence is empty
681
+ LLAMA_API llama_pos llama_kv_self_seq_pos_min(
682
+ struct llama_context * ctx,
683
+ llama_seq_id seq_id);
684
+
727
685
  // Returns the largest position present in the KV cache for the specified sequence
686
+ // Return -1 if the sequence is empty
728
687
  LLAMA_API llama_pos llama_kv_self_seq_pos_max(
729
688
  struct llama_context * ctx,
730
- llama_seq_id seq_id);
689
+ llama_seq_id seq_id);
731
690
 
732
691
  // Defragment the KV cache
733
692
  // This will be applied:
@@ -741,61 +700,6 @@ extern "C" {
741
700
  // Apply the KV cache updates (such as K-shifts, defragmentation, etc.)
742
701
  LLAMA_API void llama_kv_self_update(struct llama_context * ctx);
743
702
 
744
- DEPRECATED(LLAMA_API void llama_kv_cache_clear(
745
- struct llama_context * ctx),
746
- "use llama_kv_self_clear instead");
747
-
748
- DEPRECATED(LLAMA_API bool llama_kv_cache_seq_rm(
749
- struct llama_context * ctx,
750
- llama_seq_id seq_id,
751
- llama_pos p0,
752
- llama_pos p1),
753
- "use llama_kv_self_seq_rm instead");
754
-
755
- DEPRECATED(LLAMA_API void llama_kv_cache_seq_cp(
756
- struct llama_context * ctx,
757
- llama_seq_id seq_id_src,
758
- llama_seq_id seq_id_dst,
759
- llama_pos p0,
760
- llama_pos p1),
761
- "use llama_kv_self_seq_cp instead");
762
-
763
- DEPRECATED(LLAMA_API void llama_kv_cache_seq_keep(
764
- struct llama_context * ctx,
765
- llama_seq_id seq_id),
766
- "use llama_kv_self_seq_keep instead");
767
-
768
- DEPRECATED(LLAMA_API void llama_kv_cache_seq_add(
769
- struct llama_context * ctx,
770
- llama_seq_id seq_id,
771
- llama_pos p0,
772
- llama_pos p1,
773
- llama_pos delta),
774
- "use llama_kv_self_seq_add instead");
775
-
776
- DEPRECATED(LLAMA_API void llama_kv_cache_seq_div(
777
- struct llama_context * ctx,
778
- llama_seq_id seq_id,
779
- llama_pos p0,
780
- llama_pos p1,
781
- int d),
782
- "use llama_kv_self_seq_div instead");
783
-
784
- DEPRECATED(LLAMA_API llama_pos llama_kv_cache_seq_pos_max(
785
- struct llama_context * ctx,
786
- llama_seq_id seq_id),
787
- "use llama_kv_self_seq_pos_max instead");
788
-
789
- DEPRECATED(LLAMA_API void llama_kv_cache_defrag(struct llama_context * ctx),
790
- "use llama_kv_self_defrag instead");
791
-
792
- DEPRECATED(LLAMA_API bool llama_kv_cache_can_shift(const struct llama_context * ctx),
793
- "use llama_kv_self_can_shift instead");
794
-
795
- DEPRECATED(LLAMA_API void llama_kv_cache_update(struct llama_context * ctx),
796
- "use llama_kv_self_update instead");
797
-
798
-
799
703
  //
800
704
  // State / sessions
801
705
  //
@@ -923,18 +827,26 @@ extern "C" {
923
827
  // Frees a batch of tokens allocated with llama_batch_init()
924
828
  LLAMA_API void llama_batch_free(struct llama_batch batch);
925
829
 
926
- // Processes a batch of tokens with the ecoder part of the encoder-decoder model.
927
- // Stores the encoder output internally for later use by the decoder cross-attention layers.
830
+ // Process a batch of tokens.
831
+ // In contrast to llama_decode() - this call does not use KV cache.
832
+ // For encode-decoder contexts, processes the batch using the encoder.
833
+ // Can store the encoder output internally for later use by the decoder's cross-attention layers.
928
834
  // 0 - success
929
835
  // < 0 - error. the KV cache state is restored to the state before this call
930
836
  LLAMA_API int32_t llama_encode(
931
837
  struct llama_context * ctx,
932
838
  struct llama_batch batch);
933
839
 
840
+ // Process a batch of tokens.
841
+ // Requires KV cache.
842
+ // For encode-decoder contexts, processes the batch using the decoder.
934
843
  // Positive return values does not mean a fatal error, but rather a warning.
935
- // 0 - success
936
- // 1 - could not find a KV slot for the batch (try reducing the size of the batch or increase the context)
937
- // < 0 - error. the KV cache state is restored to the state before this call
844
+ // Upon non-zero return values, the KV cache state is restored to the state before this call
845
+ // 0 - success
846
+ // 1 - could not find a KV slot for the batch (try reducing the size of the batch or increase the context)
847
+ // 2 - aborted
848
+ // -1 - invalid input batch
849
+ // < -1 - error
938
850
  LLAMA_API int32_t llama_decode(
939
851
  struct llama_context * ctx,
940
852
  struct llama_batch batch);
@@ -1231,6 +1143,7 @@ extern "C" {
1231
1143
  "will be removed in the future (see https://github.com/ggml-org/llama.cpp/pull/9896#discussion_r1800920915)");
1232
1144
 
1233
1145
  /// @details Top-K sampling described in academic paper "The Curious Case of Neural Text Degeneration" https://arxiv.org/abs/1904.09751
1146
+ /// Setting k <= 0 makes this a noop
1234
1147
  LLAMA_API struct llama_sampler * llama_sampler_init_top_k (int32_t k);
1235
1148
 
1236
1149
  /// @details Nucleus sampling described in academic paper "The Curious Case of Neural Text Degeneration" https://arxiv.org/abs/1904.09751
@@ -1426,6 +1339,37 @@ extern "C" {
1426
1339
  LLAMA_API void llama_perf_sampler_print(const struct llama_sampler * chain);
1427
1340
  LLAMA_API void llama_perf_sampler_reset( struct llama_sampler * chain);
1428
1341
 
1342
+ //
1343
+ // training
1344
+ //
1345
+
1346
+ // function that returns whether or not a given tensor contains trainable parameters
1347
+ typedef bool (*llama_opt_param_filter)(const struct lm_ggml_tensor * tensor, void * userdata);
1348
+
1349
+ // always returns true
1350
+ LLAMA_API bool llama_opt_param_filter_all(const struct lm_ggml_tensor * tensor, void * userdata);
1351
+
1352
+ struct llama_opt_params {
1353
+ uint32_t n_ctx_train; // assumed context size post training, use context size specified in llama_context if 0
1354
+
1355
+ llama_opt_param_filter param_filter; // callback for determining which tensors contain trainable parameters
1356
+ void * param_filter_ud; // userdata for determining which tensors contain trainable parameters
1357
+
1358
+ lm_ggml_opt_get_optimizer_params get_opt_pars; // callback for calculating optimizer parameters
1359
+ void * get_opt_pars_ud; // userdata for calculating optimizer parameters
1360
+ };
1361
+
1362
+ LLAMA_API void llama_opt_init(struct llama_context * lctx, struct llama_model * model, struct llama_opt_params lopt_params);
1363
+
1364
+ LLAMA_API void llama_opt_epoch(
1365
+ struct llama_context * lctx,
1366
+ lm_ggml_opt_dataset_t dataset,
1367
+ lm_ggml_opt_result_t result_train,
1368
+ lm_ggml_opt_result_t result_eval,
1369
+ int64_t idata_split,
1370
+ lm_ggml_opt_epoch_callback callback_train,
1371
+ lm_ggml_opt_epoch_callback callback_eval);
1372
+
1429
1373
  #ifdef __cplusplus
1430
1374
  }
1431
1375
  #endif
@@ -13,10 +13,12 @@
13
13
  #include <chrono>
14
14
  #include <cstddef>
15
15
  #include <cstdio>
16
+ #include <ctime>
16
17
  #include <exception>
17
18
  #include <iomanip>
18
19
  #include <memory>
19
20
  #include <sstream>
21
+ #include <stdexcept>
20
22
  #include <string>
21
23
  #include <vector>
22
24
 
@@ -393,8 +395,8 @@ class chat_template {
393
395
 
394
396
  for (const auto & message_ : adjusted_messages) {
395
397
  auto message = message_;
396
- if (!message.contains("role") || !message.contains("content")) {
397
- throw std::runtime_error("message must have 'role' and 'content' fields: " + message.dump());
398
+ if (!message.contains("role") || (!message.contains("content") && !message.contains("tool_calls"))) {
399
+ throw std::runtime_error("message must have 'role' and one of 'content' or 'tool_calls' fields: " + message.dump());
398
400
  }
399
401
  std::string role = message.at("role");
400
402
 
@@ -415,7 +417,6 @@ class chat_template {
415
417
  }
416
418
  }
417
419
  if (polyfill_tool_calls) {
418
- auto content = message.at("content");
419
420
  auto tool_calls = json::array();
420
421
  for (const auto & tool_call : message.at("tool_calls")) {
421
422
  if (tool_call.at("type") != "function") {
@@ -434,8 +435,11 @@ class chat_template {
434
435
  auto obj = json {
435
436
  {"tool_calls", tool_calls},
436
437
  };
437
- if (!content.is_null() && !content.empty()) {
438
- obj["content"] = content;
438
+ if (message.contains("content")) {
439
+ auto content = message.at("content");
440
+ if (!content.is_null() && !content.empty()) {
441
+ obj["content"] = content;
442
+ }
439
443
  }
440
444
  message["content"] = obj.dump(2);
441
445
  message.erase("tool_calls");
@@ -11,6 +11,7 @@
11
11
  #include <algorithm>
12
12
  #include <cctype>
13
13
  #include <cstddef>
14
+ #include <cstdint>
14
15
  #include <cmath>
15
16
  #include <exception>
16
17
  #include <functional>
@@ -233,7 +234,7 @@ public:
233
234
  }
234
235
  } else if (is_object()) {
235
236
  if (!index.is_hashable())
236
- throw std::runtime_error("Unashable type: " + index.dump());
237
+ throw std::runtime_error("Unhashable type: " + index.dump());
237
238
  auto it = object_->find(index.primitive_);
238
239
  if (it == object_->end())
239
240
  throw std::runtime_error("Key not found: " + index.dump());
@@ -252,7 +253,7 @@ public:
252
253
  auto index = key.get<int>();
253
254
  return array_->at(index < 0 ? array_->size() + index : index);
254
255
  } else if (object_) {
255
- if (!key.is_hashable()) throw std::runtime_error("Unashable type: " + dump());
256
+ if (!key.is_hashable()) throw std::runtime_error("Unhashable type: " + dump());
256
257
  auto it = object_->find(key.primitive_);
257
258
  if (it == object_->end()) return Value();
258
259
  return it->second;
@@ -261,7 +262,7 @@ public:
261
262
  }
262
263
  void set(const Value& key, const Value& value) {
263
264
  if (!object_) throw std::runtime_error("Value is not an object: " + dump());
264
- if (!key.is_hashable()) throw std::runtime_error("Unashable type: " + dump());
265
+ if (!key.is_hashable()) throw std::runtime_error("Unhashable type: " + dump());
265
266
  (*object_)[key.primitive_] = value;
266
267
  }
267
268
  Value call(const std::shared_ptr<Context> & context, ArgumentsValue & args) const {
@@ -398,7 +399,7 @@ public:
398
399
  }
399
400
  return false;
400
401
  } else if (object_) {
401
- if (!value.is_hashable()) throw std::runtime_error("Unashable type: " + value.dump());
402
+ if (!value.is_hashable()) throw std::runtime_error("Unhashable type: " + value.dump());
402
403
  return object_->find(value.primitive_) != object_->end();
403
404
  } else {
404
405
  throw std::runtime_error("contains can only be called on arrays and objects: " + dump());
@@ -416,7 +417,7 @@ public:
416
417
  return const_cast<Value*>(this)->at(index);
417
418
  }
418
419
  Value& at(const Value & index) {
419
- if (!index.is_hashable()) throw std::runtime_error("Unashable type: " + dump());
420
+ if (!index.is_hashable()) throw std::runtime_error("Unhashable type: " + dump());
420
421
  if (is_array()) return array_->at(index.get<int>());
421
422
  if (is_object()) return object_->at(index.primitive_);
422
423
  throw std::runtime_error("Value is not an array or object: " + dump());
@@ -676,8 +677,8 @@ public:
676
677
  class VariableExpr : public Expression {
677
678
  std::string name;
678
679
  public:
679
- VariableExpr(const Location & location, const std::string& n)
680
- : Expression(location), name(n) {}
680
+ VariableExpr(const Location & loc, const std::string& n)
681
+ : Expression(loc), name(n) {}
681
682
  std::string get_name() const { return name; }
682
683
  Value do_evaluate(const std::shared_ptr<Context> & context) const override {
683
684
  if (!context->contains(name)) {
@@ -1200,9 +1201,9 @@ public:
1200
1201
 
1201
1202
  class SliceExpr : public Expression {
1202
1203
  public:
1203
- std::shared_ptr<Expression> start, end;
1204
- SliceExpr(const Location & loc, std::shared_ptr<Expression> && s, std::shared_ptr<Expression> && e)
1205
- : Expression(loc), start(std::move(s)), end(std::move(e)) {}
1204
+ std::shared_ptr<Expression> start, end, step;
1205
+ SliceExpr(const Location & loc, std::shared_ptr<Expression> && s, std::shared_ptr<Expression> && e, std::shared_ptr<Expression> && st = nullptr)
1206
+ : Expression(loc), start(std::move(s)), end(std::move(e)), step(std::move(st)) {}
1206
1207
  Value do_evaluate(const std::shared_ptr<Context> &) const override {
1207
1208
  throw std::runtime_error("SliceExpr not implemented");
1208
1209
  }
@@ -1219,18 +1220,35 @@ public:
1219
1220
  if (!index) throw std::runtime_error("SubscriptExpr.index is null");
1220
1221
  auto target_value = base->evaluate(context);
1221
1222
  if (auto slice = dynamic_cast<SliceExpr*>(index.get())) {
1222
- auto start = slice->start ? slice->start->evaluate(context).get<int64_t>() : 0;
1223
- auto end = slice->end ? slice->end->evaluate(context).get<int64_t>() : (int64_t) target_value.size();
1223
+ auto len = target_value.size();
1224
+ auto wrap = [len](int64_t i) -> int64_t {
1225
+ if (i < 0) {
1226
+ return i + len;
1227
+ }
1228
+ return i;
1229
+ };
1230
+ int64_t step = slice->step ? slice->step->evaluate(context).get<int64_t>() : 1;
1231
+ if (!step) {
1232
+ throw std::runtime_error("slice step cannot be zero");
1233
+ }
1234
+ int64_t start = slice->start ? wrap(slice->start->evaluate(context).get<int64_t>()) : (step < 0 ? len - 1 : 0);
1235
+ int64_t end = slice->end ? wrap(slice->end->evaluate(context).get<int64_t>()) : (step < 0 ? -1 : len);
1224
1236
  if (target_value.is_string()) {
1225
1237
  std::string s = target_value.get<std::string>();
1226
- if (start < 0) start = s.size() + start;
1227
- if (end < 0) end = s.size() + end;
1228
- return s.substr(start, end - start);
1238
+
1239
+ std::string result;
1240
+ if (start < end && step == 1) {
1241
+ result = s.substr(start, end - start);
1242
+ } else {
1243
+ for (int64_t i = start; step > 0 ? i < end : i > end; i += step) {
1244
+ result += s[i];
1245
+ }
1246
+ }
1247
+ return result;
1248
+
1229
1249
  } else if (target_value.is_array()) {
1230
- if (start < 0) start = target_value.size() + start;
1231
- if (end < 0) end = target_value.size() + end;
1232
1250
  auto result = Value::array();
1233
- for (auto i = start; i < end; ++i) {
1251
+ for (int64_t i = start; step > 0 ? i < end : i > end; i += step) {
1234
1252
  result.push_back(target_value.at(i));
1235
1253
  }
1236
1254
  return result;
@@ -1305,6 +1323,8 @@ public:
1305
1323
  if (name == "iterable") return l.is_iterable();
1306
1324
  if (name == "sequence") return l.is_array();
1307
1325
  if (name == "defined") return !l.is_null();
1326
+ if (name == "true") return l.to_bool();
1327
+ if (name == "false") return !l.to_bool();
1308
1328
  throw std::runtime_error("Unknown type for 'is' operator: " + name);
1309
1329
  };
1310
1330
  auto value = eval();
@@ -1520,6 +1540,10 @@ public:
1520
1540
  vargs.expectArgs("endswith method", {1, 1}, {0, 0});
1521
1541
  auto suffix = vargs.args[0].get<std::string>();
1522
1542
  return suffix.length() <= str.length() && std::equal(suffix.rbegin(), suffix.rend(), str.rbegin());
1543
+ } else if (method->get_name() == "startswith") {
1544
+ vargs.expectArgs("startswith method", {1, 1}, {0, 0});
1545
+ auto prefix = vargs.args[0].get<std::string>();
1546
+ return prefix.length() <= str.length() && std::equal(prefix.begin(), prefix.end(), str.begin());
1523
1547
  } else if (method->get_name() == "title") {
1524
1548
  vargs.expectArgs("title method", {0, 0}, {0, 0});
1525
1549
  auto res = str;
@@ -2082,28 +2106,37 @@ private:
2082
2106
 
2083
2107
  while (it != end && consumeSpaces() && peekSymbols({ "[", "." })) {
2084
2108
  if (!consumeToken("[").empty()) {
2085
- std::shared_ptr<Expression> index;
2109
+ std::shared_ptr<Expression> index;
2110
+ auto slice_loc = get_location();
2111
+ std::shared_ptr<Expression> start, end, step;
2112
+ bool has_first_colon = false, has_second_colon = false;
2113
+
2114
+ if (!peekSymbols({ ":" })) {
2115
+ start = parseExpression();
2116
+ }
2117
+
2118
+ if (!consumeToken(":").empty()) {
2119
+ has_first_colon = true;
2120
+ if (!peekSymbols({ ":", "]" })) {
2121
+ end = parseExpression();
2122
+ }
2086
2123
  if (!consumeToken(":").empty()) {
2087
- auto slice_end = parseExpression();
2088
- index = std::make_shared<SliceExpr>(slice_end->location, nullptr, std::move(slice_end));
2089
- } else {
2090
- auto slice_start = parseExpression();
2091
- if (!consumeToken(":").empty()) {
2092
- consumeSpaces();
2093
- if (peekSymbols({ "]" })) {
2094
- index = std::make_shared<SliceExpr>(slice_start->location, std::move(slice_start), nullptr);
2095
- } else {
2096
- auto slice_end = parseExpression();
2097
- index = std::make_shared<SliceExpr>(slice_start->location, std::move(slice_start), std::move(slice_end));
2098
- }
2099
- } else {
2100
- index = std::move(slice_start);
2124
+ has_second_colon = true;
2125
+ if (!peekSymbols({ "]" })) {
2126
+ step = parseExpression();
2101
2127
  }
2102
2128
  }
2103
- if (!index) throw std::runtime_error("Empty index in subscript");
2104
- if (consumeToken("]").empty()) throw std::runtime_error("Expected closing bracket in subscript");
2129
+ }
2130
+
2131
+ if ((has_first_colon || has_second_colon) && (start || end || step)) {
2132
+ index = std::make_shared<SliceExpr>(slice_loc, std::move(start), std::move(end), std::move(step));
2133
+ } else {
2134
+ index = std::move(start);
2135
+ }
2136
+ if (!index) throw std::runtime_error("Empty index in subscript");
2137
+ if (consumeToken("]").empty()) throw std::runtime_error("Expected closing bracket in subscript");
2105
2138
 
2106
- value = std::make_shared<SubscriptExpr>(value->location, std::move(value), std::move(index));
2139
+ value = std::make_shared<SubscriptExpr>(value->location, std::move(value), std::move(index));
2107
2140
  } else if (!consumeToken(".").empty()) {
2108
2141
  auto identifier = parseIdentifier();
2109
2142
  if (!identifier) throw std::runtime_error("Expected identifier in subscript");