cactus-react-native 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (272) hide show
  1. package/README.md +230 -0
  2. package/android/build.gradle +104 -0
  3. package/android/gradle.properties +5 -0
  4. package/android/src/main/AndroidManifest.xml +4 -0
  5. package/android/src/main/CMakeLists.txt +104 -0
  6. package/android/src/main/java/com/cactus/Cactus.java +646 -0
  7. package/android/src/main/java/com/cactus/CactusPackage.java +48 -0
  8. package/android/src/main/java/com/cactus/LlamaContext.java +579 -0
  9. package/android/src/main/jni-utils.h +100 -0
  10. package/android/src/main/jni.cpp +1254 -0
  11. package/android/src/main/jniLibs/arm64-v8a/libcactus.so +0 -0
  12. package/android/src/main/jniLibs/arm64-v8a/libcactus_v8.so +0 -0
  13. package/android/src/main/jniLibs/arm64-v8a/libcactus_v8_2.so +0 -0
  14. package/android/src/main/jniLibs/arm64-v8a/libcactus_v8_2_dotprod.so +0 -0
  15. package/android/src/main/jniLibs/arm64-v8a/libcactus_v8_2_dotprod_i8mm.so +0 -0
  16. package/android/src/main/jniLibs/arm64-v8a/libcactus_v8_2_i8mm.so +0 -0
  17. package/android/src/main/jniLibs/x86_64/libcactus.so +0 -0
  18. package/android/src/main/jniLibs/x86_64/libcactus_x86_64.so +0 -0
  19. package/android/src/newarch/java/com/cactus/CactusModule.java +124 -0
  20. package/android/src/oldarch/java/com/cactus/CactusModule.java +125 -0
  21. package/cactus-react-native.podspec +45 -0
  22. package/ios/CMakeLists.txt +109 -0
  23. package/ios/Cactus.h +6 -0
  24. package/ios/Cactus.mm +405 -0
  25. package/ios/CactusContext.h +57 -0
  26. package/ios/CactusContext.mm +835 -0
  27. package/ios/cactus.xcframework/info.plist +74 -0
  28. package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/cactus.h +133 -0
  29. package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/chat.h +143 -0
  30. package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/common.h +683 -0
  31. package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/ggml-alloc.h +76 -0
  32. package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/ggml-backend-impl.h +255 -0
  33. package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/ggml-backend.h +354 -0
  34. package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/ggml-common.h +1851 -0
  35. package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/ggml-cpp.h +39 -0
  36. package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/ggml-cpu-aarch64.h +8 -0
  37. package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/ggml-cpu-impl.h +531 -0
  38. package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/ggml-cpu-quants.h +63 -0
  39. package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/ggml-cpu-traits.h +38 -0
  40. package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/ggml-cpu.h +138 -0
  41. package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/ggml-impl.h +567 -0
  42. package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/ggml-metal-impl.h +530 -0
  43. package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/ggml-metal.h +66 -0
  44. package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/ggml-opt.h +216 -0
  45. package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/ggml-quants.h +100 -0
  46. package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/ggml-threading.h +14 -0
  47. package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/ggml.h +2221 -0
  48. package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/gguf.h +202 -0
  49. package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/json-schema-to-grammar.h +21 -0
  50. package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/json.hpp +24766 -0
  51. package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/llama-adapter.h +76 -0
  52. package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/llama-arch.h +421 -0
  53. package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/llama-batch.h +88 -0
  54. package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/llama-chat.h +53 -0
  55. package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/llama-context.h +265 -0
  56. package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/llama-cparams.h +38 -0
  57. package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/llama-cpp.h +30 -0
  58. package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/llama-grammar.h +173 -0
  59. package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/llama-graph.h +574 -0
  60. package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/llama-hparams.h +148 -0
  61. package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/llama-impl.h +61 -0
  62. package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/llama-io.h +35 -0
  63. package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/llama-kv-cache.h +287 -0
  64. package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/llama-memory.h +21 -0
  65. package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/llama-mmap.h +68 -0
  66. package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/llama-model-loader.h +167 -0
  67. package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/llama-model.h +403 -0
  68. package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/llama-sampling.h +32 -0
  69. package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/llama-vocab.h +125 -0
  70. package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/llama.h +1416 -0
  71. package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/log.h +103 -0
  72. package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/minja/chat-template.hpp +529 -0
  73. package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/minja/minja.hpp +2915 -0
  74. package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/sampling.h +107 -0
  75. package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/sgemm.h +14 -0
  76. package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/unicode-data.h +20 -0
  77. package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/unicode.h +66 -0
  78. package/ios/cactus.xcframework/ios-arm64/cactus.framework/Info.plist +0 -0
  79. package/ios/cactus.xcframework/ios-arm64/cactus.framework/cactus +0 -0
  80. package/ios/cactus.xcframework/ios-arm64/cactus.framework/ggml-llama.metallib +0 -0
  81. package/ios/cactus.xcframework/ios-arm64_x86_64-simulator/cactus.framework/Headers/cactus.h +133 -0
  82. package/ios/cactus.xcframework/ios-arm64_x86_64-simulator/cactus.framework/Headers/chat.h +143 -0
  83. package/ios/cactus.xcframework/ios-arm64_x86_64-simulator/cactus.framework/Headers/common.h +683 -0
  84. package/ios/cactus.xcframework/ios-arm64_x86_64-simulator/cactus.framework/Headers/ggml-alloc.h +76 -0
  85. package/ios/cactus.xcframework/ios-arm64_x86_64-simulator/cactus.framework/Headers/ggml-backend-impl.h +255 -0
  86. package/ios/cactus.xcframework/ios-arm64_x86_64-simulator/cactus.framework/Headers/ggml-backend.h +354 -0
  87. package/ios/cactus.xcframework/ios-arm64_x86_64-simulator/cactus.framework/Headers/ggml-common.h +1851 -0
  88. package/ios/cactus.xcframework/ios-arm64_x86_64-simulator/cactus.framework/Headers/ggml-cpp.h +39 -0
  89. package/ios/cactus.xcframework/ios-arm64_x86_64-simulator/cactus.framework/Headers/ggml-cpu-aarch64.h +8 -0
  90. package/ios/cactus.xcframework/ios-arm64_x86_64-simulator/cactus.framework/Headers/ggml-cpu-impl.h +531 -0
  91. package/ios/cactus.xcframework/ios-arm64_x86_64-simulator/cactus.framework/Headers/ggml-cpu-quants.h +63 -0
  92. package/ios/cactus.xcframework/ios-arm64_x86_64-simulator/cactus.framework/Headers/ggml-cpu-traits.h +38 -0
  93. package/ios/cactus.xcframework/ios-arm64_x86_64-simulator/cactus.framework/Headers/ggml-cpu.h +138 -0
  94. package/ios/cactus.xcframework/ios-arm64_x86_64-simulator/cactus.framework/Headers/ggml-impl.h +567 -0
  95. package/ios/cactus.xcframework/ios-arm64_x86_64-simulator/cactus.framework/Headers/ggml-metal-impl.h +530 -0
  96. package/ios/cactus.xcframework/ios-arm64_x86_64-simulator/cactus.framework/Headers/ggml-metal.h +66 -0
  97. package/ios/cactus.xcframework/ios-arm64_x86_64-simulator/cactus.framework/Headers/ggml-opt.h +216 -0
  98. package/ios/cactus.xcframework/ios-arm64_x86_64-simulator/cactus.framework/Headers/ggml-quants.h +100 -0
  99. package/ios/cactus.xcframework/ios-arm64_x86_64-simulator/cactus.framework/Headers/ggml-threading.h +14 -0
  100. package/ios/cactus.xcframework/ios-arm64_x86_64-simulator/cactus.framework/Headers/ggml.h +2221 -0
  101. package/ios/cactus.xcframework/ios-arm64_x86_64-simulator/cactus.framework/Headers/gguf.h +202 -0
  102. package/ios/cactus.xcframework/ios-arm64_x86_64-simulator/cactus.framework/Headers/json-schema-to-grammar.h +21 -0
  103. package/ios/cactus.xcframework/ios-arm64_x86_64-simulator/cactus.framework/Headers/json.hpp +24766 -0
  104. package/ios/cactus.xcframework/ios-arm64_x86_64-simulator/cactus.framework/Headers/llama-adapter.h +76 -0
  105. package/ios/cactus.xcframework/ios-arm64_x86_64-simulator/cactus.framework/Headers/llama-arch.h +421 -0
  106. package/ios/cactus.xcframework/ios-arm64_x86_64-simulator/cactus.framework/Headers/llama-batch.h +88 -0
  107. package/ios/cactus.xcframework/ios-arm64_x86_64-simulator/cactus.framework/Headers/llama-chat.h +53 -0
  108. package/ios/cactus.xcframework/ios-arm64_x86_64-simulator/cactus.framework/Headers/llama-context.h +265 -0
  109. package/ios/cactus.xcframework/ios-arm64_x86_64-simulator/cactus.framework/Headers/llama-cparams.h +38 -0
  110. package/ios/cactus.xcframework/ios-arm64_x86_64-simulator/cactus.framework/Headers/llama-cpp.h +30 -0
  111. package/ios/cactus.xcframework/ios-arm64_x86_64-simulator/cactus.framework/Headers/llama-grammar.h +173 -0
  112. package/ios/cactus.xcframework/ios-arm64_x86_64-simulator/cactus.framework/Headers/llama-graph.h +574 -0
  113. package/ios/cactus.xcframework/ios-arm64_x86_64-simulator/cactus.framework/Headers/llama-hparams.h +148 -0
  114. package/ios/cactus.xcframework/ios-arm64_x86_64-simulator/cactus.framework/Headers/llama-impl.h +61 -0
  115. package/ios/cactus.xcframework/ios-arm64_x86_64-simulator/cactus.framework/Headers/llama-io.h +35 -0
  116. package/ios/cactus.xcframework/ios-arm64_x86_64-simulator/cactus.framework/Headers/llama-kv-cache.h +287 -0
  117. package/ios/cactus.xcframework/ios-arm64_x86_64-simulator/cactus.framework/Headers/llama-memory.h +21 -0
  118. package/ios/cactus.xcframework/ios-arm64_x86_64-simulator/cactus.framework/Headers/llama-mmap.h +68 -0
  119. package/ios/cactus.xcframework/ios-arm64_x86_64-simulator/cactus.framework/Headers/llama-model-loader.h +167 -0
  120. package/ios/cactus.xcframework/ios-arm64_x86_64-simulator/cactus.framework/Headers/llama-model.h +403 -0
  121. package/ios/cactus.xcframework/ios-arm64_x86_64-simulator/cactus.framework/Headers/llama-sampling.h +32 -0
  122. package/ios/cactus.xcframework/ios-arm64_x86_64-simulator/cactus.framework/Headers/llama-vocab.h +125 -0
  123. package/ios/cactus.xcframework/ios-arm64_x86_64-simulator/cactus.framework/Headers/llama.h +1416 -0
  124. package/ios/cactus.xcframework/ios-arm64_x86_64-simulator/cactus.framework/Headers/log.h +103 -0
  125. package/ios/cactus.xcframework/ios-arm64_x86_64-simulator/cactus.framework/Headers/minja/chat-template.hpp +529 -0
  126. package/ios/cactus.xcframework/ios-arm64_x86_64-simulator/cactus.framework/Headers/minja/minja.hpp +2915 -0
  127. package/ios/cactus.xcframework/ios-arm64_x86_64-simulator/cactus.framework/Headers/sampling.h +107 -0
  128. package/ios/cactus.xcframework/ios-arm64_x86_64-simulator/cactus.framework/Headers/sgemm.h +14 -0
  129. package/ios/cactus.xcframework/ios-arm64_x86_64-simulator/cactus.framework/Headers/unicode-data.h +20 -0
  130. package/ios/cactus.xcframework/ios-arm64_x86_64-simulator/cactus.framework/Headers/unicode.h +66 -0
  131. package/ios/cactus.xcframework/ios-arm64_x86_64-simulator/cactus.framework/Info.plist +0 -0
  132. package/ios/cactus.xcframework/ios-arm64_x86_64-simulator/cactus.framework/_CodeSignature/CodeResources +101 -0
  133. package/ios/cactus.xcframework/ios-arm64_x86_64-simulator/cactus.framework/cactus +0 -0
  134. package/ios/cactus.xcframework/ios-arm64_x86_64-simulator/cactus.framework/ggml-llama-sim.metallib +0 -0
  135. package/ios/cactus.xcframework/tvos-arm64/cactus.framework/Headers/cactus.h +133 -0
  136. package/ios/cactus.xcframework/tvos-arm64/cactus.framework/Headers/chat.h +143 -0
  137. package/ios/cactus.xcframework/tvos-arm64/cactus.framework/Headers/common.h +683 -0
  138. package/ios/cactus.xcframework/tvos-arm64/cactus.framework/Headers/ggml-alloc.h +76 -0
  139. package/ios/cactus.xcframework/tvos-arm64/cactus.framework/Headers/ggml-backend-impl.h +255 -0
  140. package/ios/cactus.xcframework/tvos-arm64/cactus.framework/Headers/ggml-backend.h +354 -0
  141. package/ios/cactus.xcframework/tvos-arm64/cactus.framework/Headers/ggml-common.h +1851 -0
  142. package/ios/cactus.xcframework/tvos-arm64/cactus.framework/Headers/ggml-cpp.h +39 -0
  143. package/ios/cactus.xcframework/tvos-arm64/cactus.framework/Headers/ggml-cpu-aarch64.h +8 -0
  144. package/ios/cactus.xcframework/tvos-arm64/cactus.framework/Headers/ggml-cpu-impl.h +531 -0
  145. package/ios/cactus.xcframework/tvos-arm64/cactus.framework/Headers/ggml-cpu-quants.h +63 -0
  146. package/ios/cactus.xcframework/tvos-arm64/cactus.framework/Headers/ggml-cpu-traits.h +38 -0
  147. package/ios/cactus.xcframework/tvos-arm64/cactus.framework/Headers/ggml-cpu.h +138 -0
  148. package/ios/cactus.xcframework/tvos-arm64/cactus.framework/Headers/ggml-impl.h +567 -0
  149. package/ios/cactus.xcframework/tvos-arm64/cactus.framework/Headers/ggml-metal-impl.h +530 -0
  150. package/ios/cactus.xcframework/tvos-arm64/cactus.framework/Headers/ggml-metal.h +66 -0
  151. package/ios/cactus.xcframework/tvos-arm64/cactus.framework/Headers/ggml-opt.h +216 -0
  152. package/ios/cactus.xcframework/tvos-arm64/cactus.framework/Headers/ggml-quants.h +100 -0
  153. package/ios/cactus.xcframework/tvos-arm64/cactus.framework/Headers/ggml-threading.h +14 -0
  154. package/ios/cactus.xcframework/tvos-arm64/cactus.framework/Headers/ggml.h +2221 -0
  155. package/ios/cactus.xcframework/tvos-arm64/cactus.framework/Headers/gguf.h +202 -0
  156. package/ios/cactus.xcframework/tvos-arm64/cactus.framework/Headers/json-schema-to-grammar.h +21 -0
  157. package/ios/cactus.xcframework/tvos-arm64/cactus.framework/Headers/json.hpp +24766 -0
  158. package/ios/cactus.xcframework/tvos-arm64/cactus.framework/Headers/llama-adapter.h +76 -0
  159. package/ios/cactus.xcframework/tvos-arm64/cactus.framework/Headers/llama-arch.h +421 -0
  160. package/ios/cactus.xcframework/tvos-arm64/cactus.framework/Headers/llama-batch.h +88 -0
  161. package/ios/cactus.xcframework/tvos-arm64/cactus.framework/Headers/llama-chat.h +53 -0
  162. package/ios/cactus.xcframework/tvos-arm64/cactus.framework/Headers/llama-context.h +265 -0
  163. package/ios/cactus.xcframework/tvos-arm64/cactus.framework/Headers/llama-cparams.h +38 -0
  164. package/ios/cactus.xcframework/tvos-arm64/cactus.framework/Headers/llama-cpp.h +30 -0
  165. package/ios/cactus.xcframework/tvos-arm64/cactus.framework/Headers/llama-grammar.h +173 -0
  166. package/ios/cactus.xcframework/tvos-arm64/cactus.framework/Headers/llama-graph.h +574 -0
  167. package/ios/cactus.xcframework/tvos-arm64/cactus.framework/Headers/llama-hparams.h +148 -0
  168. package/ios/cactus.xcframework/tvos-arm64/cactus.framework/Headers/llama-impl.h +61 -0
  169. package/ios/cactus.xcframework/tvos-arm64/cactus.framework/Headers/llama-io.h +35 -0
  170. package/ios/cactus.xcframework/tvos-arm64/cactus.framework/Headers/llama-kv-cache.h +287 -0
  171. package/ios/cactus.xcframework/tvos-arm64/cactus.framework/Headers/llama-memory.h +21 -0
  172. package/ios/cactus.xcframework/tvos-arm64/cactus.framework/Headers/llama-mmap.h +68 -0
  173. package/ios/cactus.xcframework/tvos-arm64/cactus.framework/Headers/llama-model-loader.h +167 -0
  174. package/ios/cactus.xcframework/tvos-arm64/cactus.framework/Headers/llama-model.h +403 -0
  175. package/ios/cactus.xcframework/tvos-arm64/cactus.framework/Headers/llama-sampling.h +32 -0
  176. package/ios/cactus.xcframework/tvos-arm64/cactus.framework/Headers/llama-vocab.h +125 -0
  177. package/ios/cactus.xcframework/tvos-arm64/cactus.framework/Headers/llama.h +1416 -0
  178. package/ios/cactus.xcframework/tvos-arm64/cactus.framework/Headers/log.h +103 -0
  179. package/ios/cactus.xcframework/tvos-arm64/cactus.framework/Headers/minja/chat-template.hpp +529 -0
  180. package/ios/cactus.xcframework/tvos-arm64/cactus.framework/Headers/minja/minja.hpp +2915 -0
  181. package/ios/cactus.xcframework/tvos-arm64/cactus.framework/Headers/sampling.h +107 -0
  182. package/ios/cactus.xcframework/tvos-arm64/cactus.framework/Headers/sgemm.h +14 -0
  183. package/ios/cactus.xcframework/tvos-arm64/cactus.framework/Headers/unicode-data.h +20 -0
  184. package/ios/cactus.xcframework/tvos-arm64/cactus.framework/Headers/unicode.h +66 -0
  185. package/ios/cactus.xcframework/tvos-arm64/cactus.framework/Info.plist +0 -0
  186. package/ios/cactus.xcframework/tvos-arm64/cactus.framework/cactus +0 -0
  187. package/ios/cactus.xcframework/tvos-arm64/cactus.framework/ggml-llama.metallib +0 -0
  188. package/ios/cactus.xcframework/tvos-arm64_x86_64-simulator/cactus.framework/Headers/cactus.h +133 -0
  189. package/ios/cactus.xcframework/tvos-arm64_x86_64-simulator/cactus.framework/Headers/chat.h +143 -0
  190. package/ios/cactus.xcframework/tvos-arm64_x86_64-simulator/cactus.framework/Headers/common.h +683 -0
  191. package/ios/cactus.xcframework/tvos-arm64_x86_64-simulator/cactus.framework/Headers/ggml-alloc.h +76 -0
  192. package/ios/cactus.xcframework/tvos-arm64_x86_64-simulator/cactus.framework/Headers/ggml-backend-impl.h +255 -0
  193. package/ios/cactus.xcframework/tvos-arm64_x86_64-simulator/cactus.framework/Headers/ggml-backend.h +354 -0
  194. package/ios/cactus.xcframework/tvos-arm64_x86_64-simulator/cactus.framework/Headers/ggml-common.h +1851 -0
  195. package/ios/cactus.xcframework/tvos-arm64_x86_64-simulator/cactus.framework/Headers/ggml-cpp.h +39 -0
  196. package/ios/cactus.xcframework/tvos-arm64_x86_64-simulator/cactus.framework/Headers/ggml-cpu-aarch64.h +8 -0
  197. package/ios/cactus.xcframework/tvos-arm64_x86_64-simulator/cactus.framework/Headers/ggml-cpu-impl.h +531 -0
  198. package/ios/cactus.xcframework/tvos-arm64_x86_64-simulator/cactus.framework/Headers/ggml-cpu-quants.h +63 -0
  199. package/ios/cactus.xcframework/tvos-arm64_x86_64-simulator/cactus.framework/Headers/ggml-cpu-traits.h +38 -0
  200. package/ios/cactus.xcframework/tvos-arm64_x86_64-simulator/cactus.framework/Headers/ggml-cpu.h +138 -0
  201. package/ios/cactus.xcframework/tvos-arm64_x86_64-simulator/cactus.framework/Headers/ggml-impl.h +567 -0
  202. package/ios/cactus.xcframework/tvos-arm64_x86_64-simulator/cactus.framework/Headers/ggml-metal-impl.h +530 -0
  203. package/ios/cactus.xcframework/tvos-arm64_x86_64-simulator/cactus.framework/Headers/ggml-metal.h +66 -0
  204. package/ios/cactus.xcframework/tvos-arm64_x86_64-simulator/cactus.framework/Headers/ggml-opt.h +216 -0
  205. package/ios/cactus.xcframework/tvos-arm64_x86_64-simulator/cactus.framework/Headers/ggml-quants.h +100 -0
  206. package/ios/cactus.xcframework/tvos-arm64_x86_64-simulator/cactus.framework/Headers/ggml-threading.h +14 -0
  207. package/ios/cactus.xcframework/tvos-arm64_x86_64-simulator/cactus.framework/Headers/ggml.h +2221 -0
  208. package/ios/cactus.xcframework/tvos-arm64_x86_64-simulator/cactus.framework/Headers/gguf.h +202 -0
  209. package/ios/cactus.xcframework/tvos-arm64_x86_64-simulator/cactus.framework/Headers/json-schema-to-grammar.h +21 -0
  210. package/ios/cactus.xcframework/tvos-arm64_x86_64-simulator/cactus.framework/Headers/json.hpp +24766 -0
  211. package/ios/cactus.xcframework/tvos-arm64_x86_64-simulator/cactus.framework/Headers/llama-adapter.h +76 -0
  212. package/ios/cactus.xcframework/tvos-arm64_x86_64-simulator/cactus.framework/Headers/llama-arch.h +421 -0
  213. package/ios/cactus.xcframework/tvos-arm64_x86_64-simulator/cactus.framework/Headers/llama-batch.h +88 -0
  214. package/ios/cactus.xcframework/tvos-arm64_x86_64-simulator/cactus.framework/Headers/llama-chat.h +53 -0
  215. package/ios/cactus.xcframework/tvos-arm64_x86_64-simulator/cactus.framework/Headers/llama-context.h +265 -0
  216. package/ios/cactus.xcframework/tvos-arm64_x86_64-simulator/cactus.framework/Headers/llama-cparams.h +38 -0
  217. package/ios/cactus.xcframework/tvos-arm64_x86_64-simulator/cactus.framework/Headers/llama-cpp.h +30 -0
  218. package/ios/cactus.xcframework/tvos-arm64_x86_64-simulator/cactus.framework/Headers/llama-grammar.h +173 -0
  219. package/ios/cactus.xcframework/tvos-arm64_x86_64-simulator/cactus.framework/Headers/llama-graph.h +574 -0
  220. package/ios/cactus.xcframework/tvos-arm64_x86_64-simulator/cactus.framework/Headers/llama-hparams.h +148 -0
  221. package/ios/cactus.xcframework/tvos-arm64_x86_64-simulator/cactus.framework/Headers/llama-impl.h +61 -0
  222. package/ios/cactus.xcframework/tvos-arm64_x86_64-simulator/cactus.framework/Headers/llama-io.h +35 -0
  223. package/ios/cactus.xcframework/tvos-arm64_x86_64-simulator/cactus.framework/Headers/llama-kv-cache.h +287 -0
  224. package/ios/cactus.xcframework/tvos-arm64_x86_64-simulator/cactus.framework/Headers/llama-memory.h +21 -0
  225. package/ios/cactus.xcframework/tvos-arm64_x86_64-simulator/cactus.framework/Headers/llama-mmap.h +68 -0
  226. package/ios/cactus.xcframework/tvos-arm64_x86_64-simulator/cactus.framework/Headers/llama-model-loader.h +167 -0
  227. package/ios/cactus.xcframework/tvos-arm64_x86_64-simulator/cactus.framework/Headers/llama-model.h +403 -0
  228. package/ios/cactus.xcframework/tvos-arm64_x86_64-simulator/cactus.framework/Headers/llama-sampling.h +32 -0
  229. package/ios/cactus.xcframework/tvos-arm64_x86_64-simulator/cactus.framework/Headers/llama-vocab.h +125 -0
  230. package/ios/cactus.xcframework/tvos-arm64_x86_64-simulator/cactus.framework/Headers/llama.h +1416 -0
  231. package/ios/cactus.xcframework/tvos-arm64_x86_64-simulator/cactus.framework/Headers/log.h +103 -0
  232. package/ios/cactus.xcframework/tvos-arm64_x86_64-simulator/cactus.framework/Headers/minja/chat-template.hpp +529 -0
  233. package/ios/cactus.xcframework/tvos-arm64_x86_64-simulator/cactus.framework/Headers/minja/minja.hpp +2915 -0
  234. package/ios/cactus.xcframework/tvos-arm64_x86_64-simulator/cactus.framework/Headers/sampling.h +107 -0
  235. package/ios/cactus.xcframework/tvos-arm64_x86_64-simulator/cactus.framework/Headers/sgemm.h +14 -0
  236. package/ios/cactus.xcframework/tvos-arm64_x86_64-simulator/cactus.framework/Headers/unicode-data.h +20 -0
  237. package/ios/cactus.xcframework/tvos-arm64_x86_64-simulator/cactus.framework/Headers/unicode.h +66 -0
  238. package/ios/cactus.xcframework/tvos-arm64_x86_64-simulator/cactus.framework/Info.plist +0 -0
  239. package/ios/cactus.xcframework/tvos-arm64_x86_64-simulator/cactus.framework/_CodeSignature/CodeResources +101 -0
  240. package/ios/cactus.xcframework/tvos-arm64_x86_64-simulator/cactus.framework/cactus +0 -0
  241. package/ios/cactus.xcframework/tvos-arm64_x86_64-simulator/cactus.framework/ggml-llama-sim.metallib +0 -0
  242. package/lib/commonjs/NativeCactus.js +9 -0
  243. package/lib/commonjs/NativeCactus.js.map +1 -0
  244. package/lib/commonjs/chat.js +37 -0
  245. package/lib/commonjs/chat.js.map +1 -0
  246. package/lib/commonjs/grammar.js +560 -0
  247. package/lib/commonjs/grammar.js.map +1 -0
  248. package/lib/commonjs/index.js +300 -0
  249. package/lib/commonjs/index.js.map +1 -0
  250. package/lib/commonjs/package.json +1 -0
  251. package/lib/module/NativeCactus.js +5 -0
  252. package/lib/module/NativeCactus.js.map +1 -0
  253. package/lib/module/chat.js +33 -0
  254. package/lib/module/chat.js.map +1 -0
  255. package/lib/module/grammar.js +553 -0
  256. package/lib/module/grammar.js.map +1 -0
  257. package/lib/module/index.js +277 -0
  258. package/lib/module/index.js.map +1 -0
  259. package/lib/module/package.json +1 -0
  260. package/lib/typescript/NativeCactus.d.ts +357 -0
  261. package/lib/typescript/NativeCactus.d.ts.map +1 -0
  262. package/lib/typescript/chat.d.ts +10 -0
  263. package/lib/typescript/chat.d.ts.map +1 -0
  264. package/lib/typescript/grammar.d.ts +37 -0
  265. package/lib/typescript/grammar.d.ts.map +1 -0
  266. package/lib/typescript/index.d.ts +96 -0
  267. package/lib/typescript/index.d.ts.map +1 -0
  268. package/package.json +223 -0
  269. package/src/NativeCactus.ts +418 -0
  270. package/src/chat.ts +44 -0
  271. package/src/grammar.ts +854 -0
  272. package/src/index.ts +482 -0
@@ -0,0 +1,76 @@
1
+ #pragma once
2
+
3
+ #include "llama.h"
4
+
5
+ #include "ggml-cpp.h"
6
+
7
+ #include <string>
8
+ #include <unordered_map>
9
+ #include <vector>
10
+
11
+ // TODO: pimpl
12
+
13
+ //
14
+ // llama_adapter_cvec
15
+ //
16
+
17
+ struct llama_adapter_cvec {
18
+ lm_ggml_tensor * tensor_for(int il) const;
19
+
20
+ lm_ggml_tensor * apply_to(lm_ggml_context * ctx, lm_ggml_tensor * cur, int il) const;
21
+
22
+ bool apply(
23
+ const llama_model & model,
24
+ const float * data,
25
+ size_t len,
26
+ int32_t n_embd,
27
+ int32_t il_start,
28
+ int32_t il_end);
29
+
30
+ private:
31
+ bool init(const llama_model & model);
32
+
33
+ int32_t layer_start = -1;
34
+ int32_t layer_end = -1;
35
+
36
+ std::vector<lm_ggml_context_ptr> ctxs;
37
+ std::vector<lm_ggml_backend_buffer_ptr> bufs;
38
+
39
+ std::vector<lm_ggml_tensor *> tensors; // per layer
40
+ };
41
+
42
+ //
43
+ // llama_adapter_lora
44
+ //
45
+
46
+ struct llama_adapter_lora_weight {
47
+ lm_ggml_tensor * a = nullptr;
48
+ lm_ggml_tensor * b = nullptr;
49
+
50
+ // get actual scale based on rank and alpha
51
+ float get_scale(float alpha, float adapter_scale) const {
52
+ const float rank = (float) b->ne[0];
53
+ const float scale = alpha ? adapter_scale * alpha / rank : adapter_scale;
54
+ return scale;
55
+ }
56
+
57
+ llama_adapter_lora_weight() = default;
58
+ llama_adapter_lora_weight(lm_ggml_tensor * a, lm_ggml_tensor * b) : a(a), b(b) {}
59
+ };
60
+
61
+ struct llama_adapter_lora {
62
+ // map tensor name to lora_a_b
63
+ std::unordered_map<std::string, llama_adapter_lora_weight> ab_map;
64
+
65
+ std::vector<lm_ggml_context_ptr> ctxs;
66
+ std::vector<lm_ggml_backend_buffer_ptr> bufs;
67
+
68
+ float alpha;
69
+
70
+ llama_adapter_lora() = default;
71
+ ~llama_adapter_lora() = default;
72
+
73
+ llama_adapter_lora_weight * get_weight(lm_ggml_tensor * w);
74
+ };
75
+
76
+ using llama_adapter_loras = std::unordered_map<llama_adapter_lora *, float>;
@@ -0,0 +1,421 @@
1
+ #pragma once
2
+
3
+ #include "ggml.h" // lm_ggml_op
4
+
5
+ #include <string>
6
+
7
+ //
8
+ // gguf constants (sync with gguf.py)
9
+ //
10
+
11
+ enum llm_arch {
12
+ LLM_ARCH_LLAMA,
13
+ LLM_ARCH_DECI,
14
+ LLM_ARCH_FALCON,
15
+ LLM_ARCH_BAICHUAN,
16
+ LLM_ARCH_GROK,
17
+ LLM_ARCH_GPT2,
18
+ LLM_ARCH_GPTJ,
19
+ LLM_ARCH_GPTNEOX,
20
+ LLM_ARCH_MPT,
21
+ LLM_ARCH_STARCODER,
22
+ LLM_ARCH_REFACT,
23
+ LLM_ARCH_BERT,
24
+ LLM_ARCH_NOMIC_BERT,
25
+ LLM_ARCH_JINA_BERT_V2,
26
+ LLM_ARCH_BLOOM,
27
+ LLM_ARCH_STABLELM,
28
+ LLM_ARCH_QWEN,
29
+ LLM_ARCH_QWEN2,
30
+ LLM_ARCH_QWEN2MOE,
31
+ LLM_ARCH_QWEN2VL,
32
+ LLM_ARCH_PHI2,
33
+ LLM_ARCH_PHI3,
34
+ LLM_ARCH_PHIMOE,
35
+ LLM_ARCH_PLAMO,
36
+ LLM_ARCH_CODESHELL,
37
+ LLM_ARCH_ORION,
38
+ LLM_ARCH_INTERNLM2,
39
+ LLM_ARCH_MINICPM,
40
+ LLM_ARCH_MINICPM3,
41
+ LLM_ARCH_GEMMA,
42
+ LLM_ARCH_GEMMA2,
43
+ LLM_ARCH_GEMMA3,
44
+ LLM_ARCH_STARCODER2,
45
+ LLM_ARCH_MAMBA,
46
+ LLM_ARCH_XVERSE,
47
+ LLM_ARCH_COMMAND_R,
48
+ LLM_ARCH_COHERE2,
49
+ LLM_ARCH_DBRX,
50
+ LLM_ARCH_OLMO,
51
+ LLM_ARCH_OLMO2,
52
+ LLM_ARCH_OLMOE,
53
+ LLM_ARCH_OPENELM,
54
+ LLM_ARCH_ARCTIC,
55
+ LLM_ARCH_DEEPSEEK,
56
+ LLM_ARCH_DEEPSEEK2,
57
+ LLM_ARCH_CHATGLM,
58
+ LLM_ARCH_BITNET,
59
+ LLM_ARCH_T5,
60
+ LLM_ARCH_T5ENCODER,
61
+ LLM_ARCH_JAIS,
62
+ LLM_ARCH_NEMOTRON,
63
+ LLM_ARCH_EXAONE,
64
+ LLM_ARCH_RWKV6,
65
+ LLM_ARCH_RWKV6QWEN2,
66
+ LLM_ARCH_RWKV7,
67
+ LLM_ARCH_ARWKV7,
68
+ LLM_ARCH_GRANITE,
69
+ LLM_ARCH_GRANITE_MOE,
70
+ LLM_ARCH_CHAMELEON,
71
+ LLM_ARCH_WAVTOKENIZER_DEC,
72
+ LLM_ARCH_UNKNOWN,
73
+ };
74
+
75
+ enum llm_kv {
76
+ LLM_KV_GENERAL_TYPE,
77
+ LLM_KV_GENERAL_ARCHITECTURE,
78
+ LLM_KV_GENERAL_QUANTIZATION_VERSION,
79
+ LLM_KV_GENERAL_ALIGNMENT,
80
+ LLM_KV_GENERAL_NAME,
81
+ LLM_KV_GENERAL_AUTHOR,
82
+ LLM_KV_GENERAL_VERSION,
83
+ LLM_KV_GENERAL_URL,
84
+ LLM_KV_GENERAL_DESCRIPTION,
85
+ LLM_KV_GENERAL_LICENSE,
86
+ LLM_KV_GENERAL_SOURCE_URL,
87
+ LLM_KV_GENERAL_SOURCE_HF_REPO,
88
+
89
+ LLM_KV_VOCAB_SIZE,
90
+ LLM_KV_CONTEXT_LENGTH,
91
+ LLM_KV_EMBEDDING_LENGTH,
92
+ LLM_KV_FEATURES_LENGTH,
93
+ LLM_KV_BLOCK_COUNT,
94
+ LLM_KV_LEADING_DENSE_BLOCK_COUNT,
95
+ LLM_KV_FEED_FORWARD_LENGTH,
96
+ LLM_KV_EXPERT_FEED_FORWARD_LENGTH,
97
+ LLM_KV_EXPERT_SHARED_FEED_FORWARD_LENGTH,
98
+ LLM_KV_USE_PARALLEL_RESIDUAL,
99
+ LLM_KV_TENSOR_DATA_LAYOUT,
100
+ LLM_KV_EXPERT_COUNT,
101
+ LLM_KV_EXPERT_USED_COUNT,
102
+ LLM_KV_EXPERT_SHARED_COUNT,
103
+ LLM_KV_EXPERT_WEIGHTS_SCALE,
104
+ LLM_KV_EXPERT_WEIGHTS_NORM,
105
+ LLM_KV_EXPERT_GATING_FUNC,
106
+ LLM_KV_POOLING_TYPE,
107
+ LLM_KV_LOGIT_SCALE,
108
+ LLM_KV_DECODER_START_TOKEN_ID,
109
+ LLM_KV_ATTN_LOGIT_SOFTCAPPING,
110
+ LLM_KV_FINAL_LOGIT_SOFTCAPPING,
111
+ LLM_KV_SWIN_NORM,
112
+ LLM_KV_RESCALE_EVERY_N_LAYERS,
113
+ LLM_KV_TIME_MIX_EXTRA_DIM,
114
+ LLM_KV_TIME_DECAY_EXTRA_DIM,
115
+ LLM_KV_RESIDUAL_SCALE,
116
+ LLM_KV_EMBEDDING_SCALE,
117
+ LLM_KV_TOKEN_SHIFT_COUNT,
118
+
119
+ LLM_KV_ATTENTION_HEAD_COUNT,
120
+ LLM_KV_ATTENTION_HEAD_COUNT_KV,
121
+ LLM_KV_ATTENTION_MAX_ALIBI_BIAS,
122
+ LLM_KV_ATTENTION_CLAMP_KQV,
123
+ LLM_KV_ATTENTION_KEY_LENGTH,
124
+ LLM_KV_ATTENTION_VALUE_LENGTH,
125
+ LLM_KV_ATTENTION_LAYERNORM_EPS,
126
+ LLM_KV_ATTENTION_LAYERNORM_RMS_EPS,
127
+ LLM_KV_ATTENTION_GROUPNORM_EPS,
128
+ LLM_KV_ATTENTION_GROUPNORM_GROUPS,
129
+ LLM_KV_ATTENTION_CAUSAL,
130
+ LLM_KV_ATTENTION_Q_LORA_RANK,
131
+ LLM_KV_ATTENTION_KV_LORA_RANK,
132
+ LLM_KV_ATTENTION_DECAY_LORA_RANK,
133
+ LLM_KV_ATTENTION_ICLR_LORA_RANK,
134
+ LLM_KV_ATTENTION_VALUE_RESIDUAL_MIX_LORA_RANK,
135
+ LLM_KV_ATTENTION_GATE_LORA_RANK,
136
+ LLM_KV_ATTENTION_RELATIVE_BUCKETS_COUNT,
137
+ LLM_KV_ATTENTION_SLIDING_WINDOW,
138
+ LLM_KV_ATTENTION_SCALE,
139
+
140
+ LLM_KV_ROPE_DIMENSION_COUNT,
141
+ LLM_KV_ROPE_DIMENSION_SECTIONS,
142
+ LLM_KV_ROPE_FREQ_BASE,
143
+ LLM_KV_ROPE_SCALE_LINEAR,
144
+ LLM_KV_ROPE_SCALING_TYPE,
145
+ LLM_KV_ROPE_SCALING_FACTOR,
146
+ LLM_KV_ROPE_SCALING_ATTN_FACTOR,
147
+ LLM_KV_ROPE_SCALING_ORIG_CTX_LEN,
148
+ LLM_KV_ROPE_SCALING_FINETUNED,
149
+ LLM_KV_ROPE_SCALING_YARN_LOG_MUL,
150
+
151
+ LLM_KV_SPLIT_NO,
152
+ LLM_KV_SPLIT_COUNT,
153
+ LLM_KV_SPLIT_TENSORS_COUNT,
154
+
155
+ LLM_KV_SSM_INNER_SIZE,
156
+ LLM_KV_SSM_CONV_KERNEL,
157
+ LLM_KV_SSM_STATE_SIZE,
158
+ LLM_KV_SSM_TIME_STEP_RANK,
159
+ LLM_KV_SSM_DT_B_C_RMS,
160
+
161
+ LLM_KV_WKV_HEAD_SIZE,
162
+
163
+ LLM_KV_TOKENIZER_MODEL,
164
+ LLM_KV_TOKENIZER_PRE,
165
+ LLM_KV_TOKENIZER_LIST,
166
+ LLM_KV_TOKENIZER_TOKEN_TYPE,
167
+ LLM_KV_TOKENIZER_TOKEN_TYPE_COUNT,
168
+ LLM_KV_TOKENIZER_SCORES,
169
+ LLM_KV_TOKENIZER_MERGES,
170
+ LLM_KV_TOKENIZER_BOS_ID,
171
+ LLM_KV_TOKENIZER_EOS_ID,
172
+ LLM_KV_TOKENIZER_EOT_ID,
173
+ LLM_KV_TOKENIZER_EOM_ID,
174
+ LLM_KV_TOKENIZER_UNK_ID,
175
+ LLM_KV_TOKENIZER_SEP_ID,
176
+ LLM_KV_TOKENIZER_PAD_ID,
177
+ LLM_KV_TOKENIZER_CLS_ID,
178
+ LLM_KV_TOKENIZER_MASK_ID,
179
+ LLM_KV_TOKENIZER_ADD_BOS,
180
+ LLM_KV_TOKENIZER_ADD_EOS,
181
+ LLM_KV_TOKENIZER_ADD_PREFIX,
182
+ LLM_KV_TOKENIZER_REMOVE_EXTRA_WS,
183
+ LLM_KV_TOKENIZER_PRECOMPILED_CHARSMAP,
184
+ LLM_KV_TOKENIZER_HF_JSON,
185
+ LLM_KV_TOKENIZER_RWKV,
186
+ LLM_KV_TOKENIZER_CHAT_TEMPLATE,
187
+ LLM_KV_TOKENIZER_CHAT_TEMPLATE_N,
188
+ LLM_KV_TOKENIZER_FIM_PRE_ID,
189
+ LLM_KV_TOKENIZER_FIM_SUF_ID,
190
+ LLM_KV_TOKENIZER_FIM_MID_ID,
191
+ LLM_KV_TOKENIZER_FIM_PAD_ID,
192
+ LLM_KV_TOKENIZER_FIM_REP_ID,
193
+ LLM_KV_TOKENIZER_FIM_SEP_ID,
194
+
195
+ LLM_KV_ADAPTER_TYPE,
196
+ LLM_KV_ADAPTER_LORA_ALPHA,
197
+
198
+ LLM_KV_POSNET_EMBEDDING_LENGTH,
199
+ LLM_KV_POSNET_BLOCK_COUNT,
200
+
201
+ LLM_KV_CONVNEXT_EMBEDDING_LENGTH,
202
+ LLM_KV_CONVNEXT_BLOCK_COUNT,
203
+
204
+ // deprecated:
205
+ LLM_KV_TOKENIZER_PREFIX_ID,
206
+ LLM_KV_TOKENIZER_SUFFIX_ID,
207
+ LLM_KV_TOKENIZER_MIDDLE_ID,
208
+ };
209
+
210
+ enum llm_tensor {
211
+ LLM_TENSOR_TOKEN_EMBD,
212
+ LLM_TENSOR_TOKEN_EMBD_NORM,
213
+ LLM_TENSOR_TOKEN_TYPES,
214
+ LLM_TENSOR_POS_EMBD,
215
+ LLM_TENSOR_OUTPUT,
216
+ LLM_TENSOR_OUTPUT_NORM,
217
+ LLM_TENSOR_ROPE_FREQS,
218
+ LLM_TENSOR_ROPE_FACTORS_LONG,
219
+ LLM_TENSOR_ROPE_FACTORS_SHORT,
220
+ LLM_TENSOR_ATTN_Q,
221
+ LLM_TENSOR_ATTN_K,
222
+ LLM_TENSOR_ATTN_V,
223
+ LLM_TENSOR_ATTN_QKV,
224
+ LLM_TENSOR_ATTN_OUT,
225
+ LLM_TENSOR_ATTN_NORM,
226
+ LLM_TENSOR_ATTN_NORM_2,
227
+ LLM_TENSOR_ATTN_OUT_NORM,
228
+ LLM_TENSOR_ATTN_POST_NORM,
229
+ LLM_TENSOR_ATTN_ROT_EMBD,
230
+ LLM_TENSOR_FFN_GATE_INP,
231
+ LLM_TENSOR_FFN_GATE_INP_SHEXP,
232
+ LLM_TENSOR_FFN_NORM,
233
+ LLM_TENSOR_FFN_POST_NORM,
234
+ LLM_TENSOR_FFN_GATE,
235
+ LLM_TENSOR_FFN_DOWN,
236
+ LLM_TENSOR_FFN_UP,
237
+ LLM_TENSOR_FFN_ACT,
238
+ LLM_TENSOR_FFN_DOWN_EXP, // split experts for backward compatibility
239
+ LLM_TENSOR_FFN_GATE_EXP,
240
+ LLM_TENSOR_FFN_UP_EXP,
241
+ LLM_TENSOR_FFN_NORM_EXPS,
242
+ LLM_TENSOR_FFN_DOWN_EXPS, // merged experts
243
+ LLM_TENSOR_FFN_GATE_EXPS,
244
+ LLM_TENSOR_FFN_UP_EXPS,
245
+ LLM_TENSOR_FFN_DOWN_SHEXP,
246
+ LLM_TENSOR_FFN_GATE_SHEXP,
247
+ LLM_TENSOR_FFN_UP_SHEXP,
248
+ LLM_TENSOR_FFN_EXP_PROBS_B,
249
+ LLM_TENSOR_ATTN_Q_NORM,
250
+ LLM_TENSOR_ATTN_K_NORM,
251
+ LLM_TENSOR_LAYER_OUT_NORM,
252
+ LLM_TENSOR_SSM_IN,
253
+ LLM_TENSOR_SSM_CONV1D,
254
+ LLM_TENSOR_SSM_X,
255
+ LLM_TENSOR_SSM_DT,
256
+ LLM_TENSOR_SSM_A,
257
+ LLM_TENSOR_SSM_D,
258
+ LLM_TENSOR_SSM_OUT,
259
+ LLM_TENSOR_TIME_MIX_W0,
260
+ LLM_TENSOR_TIME_MIX_W1,
261
+ LLM_TENSOR_TIME_MIX_W2,
262
+ LLM_TENSOR_TIME_MIX_A0,
263
+ LLM_TENSOR_TIME_MIX_A1,
264
+ LLM_TENSOR_TIME_MIX_A2,
265
+ LLM_TENSOR_TIME_MIX_V0,
266
+ LLM_TENSOR_TIME_MIX_V1,
267
+ LLM_TENSOR_TIME_MIX_V2,
268
+ LLM_TENSOR_TIME_MIX_G1,
269
+ LLM_TENSOR_TIME_MIX_G2,
270
+ LLM_TENSOR_TIME_MIX_K_K,
271
+ LLM_TENSOR_TIME_MIX_K_A,
272
+ LLM_TENSOR_TIME_MIX_R_K,
273
+ LLM_TENSOR_TIME_MIX_LERP_X,
274
+ LLM_TENSOR_TIME_MIX_LERP_W,
275
+ LLM_TENSOR_TIME_MIX_LERP_K,
276
+ LLM_TENSOR_TIME_MIX_LERP_V,
277
+ LLM_TENSOR_TIME_MIX_LERP_R,
278
+ LLM_TENSOR_TIME_MIX_LERP_G,
279
+ LLM_TENSOR_TIME_MIX_LERP_FUSED,
280
+ LLM_TENSOR_TIME_MIX_FIRST,
281
+ LLM_TENSOR_TIME_MIX_DECAY,
282
+ LLM_TENSOR_TIME_MIX_DECAY_W1,
283
+ LLM_TENSOR_TIME_MIX_DECAY_W2,
284
+ LLM_TENSOR_TIME_MIX_KEY,
285
+ LLM_TENSOR_TIME_MIX_VALUE,
286
+ LLM_TENSOR_TIME_MIX_RECEPTANCE,
287
+ LLM_TENSOR_TIME_MIX_GATE,
288
+ LLM_TENSOR_TIME_MIX_LN,
289
+ LLM_TENSOR_TIME_MIX_OUTPUT,
290
+ LLM_TENSOR_CHANNEL_MIX_LERP_K,
291
+ LLM_TENSOR_CHANNEL_MIX_LERP_R,
292
+ LLM_TENSOR_CHANNEL_MIX_KEY,
293
+ LLM_TENSOR_CHANNEL_MIX_RECEPTANCE,
294
+ LLM_TENSOR_CHANNEL_MIX_VALUE,
295
+ LLM_TENSOR_ATTN_Q_A,
296
+ LLM_TENSOR_ATTN_Q_B,
297
+ LLM_TENSOR_ATTN_KV_A_MQA,
298
+ LLM_TENSOR_ATTN_KV_B,
299
+ LLM_TENSOR_ATTN_Q_A_NORM,
300
+ LLM_TENSOR_ATTN_KV_A_NORM,
301
+ LLM_TENSOR_ATTN_SUB_NORM,
302
+ LLM_TENSOR_FFN_SUB_NORM,
303
+ LLM_TENSOR_DEC_ATTN_NORM,
304
+ LLM_TENSOR_DEC_ATTN_Q,
305
+ LLM_TENSOR_DEC_ATTN_K,
306
+ LLM_TENSOR_DEC_ATTN_V,
307
+ LLM_TENSOR_DEC_ATTN_OUT,
308
+ LLM_TENSOR_DEC_ATTN_REL_B,
309
+ LLM_TENSOR_DEC_CROSS_ATTN_NORM,
310
+ LLM_TENSOR_DEC_CROSS_ATTN_Q,
311
+ LLM_TENSOR_DEC_CROSS_ATTN_K,
312
+ LLM_TENSOR_DEC_CROSS_ATTN_V,
313
+ LLM_TENSOR_DEC_CROSS_ATTN_OUT,
314
+ LLM_TENSOR_DEC_CROSS_ATTN_REL_B,
315
+ LLM_TENSOR_DEC_FFN_NORM,
316
+ LLM_TENSOR_DEC_FFN_GATE,
317
+ LLM_TENSOR_DEC_FFN_DOWN,
318
+ LLM_TENSOR_DEC_FFN_UP,
319
+ LLM_TENSOR_DEC_OUTPUT_NORM,
320
+ LLM_TENSOR_ENC_ATTN_NORM,
321
+ LLM_TENSOR_ENC_ATTN_Q,
322
+ LLM_TENSOR_ENC_ATTN_K,
323
+ LLM_TENSOR_ENC_ATTN_V,
324
+ LLM_TENSOR_ENC_ATTN_OUT,
325
+ LLM_TENSOR_ENC_ATTN_REL_B,
326
+ LLM_TENSOR_ENC_FFN_NORM,
327
+ LLM_TENSOR_ENC_FFN_GATE,
328
+ LLM_TENSOR_ENC_FFN_DOWN,
329
+ LLM_TENSOR_ENC_FFN_UP,
330
+ LLM_TENSOR_ENC_OUTPUT_NORM,
331
+ LLM_TENSOR_CLS,
332
+ LLM_TENSOR_CLS_OUT,
333
+ LLM_TENSOR_CONV1D,
334
+ LLM_TENSOR_CONVNEXT_DW,
335
+ LLM_TENSOR_CONVNEXT_NORM,
336
+ LLM_TENSOR_CONVNEXT_PW1,
337
+ LLM_TENSOR_CONVNEXT_PW2,
338
+ LLM_TENSOR_CONVNEXT_GAMMA,
339
+ LLM_TENSOR_POS_NET_CONV1,
340
+ LLM_TENSOR_POS_NET_CONV2,
341
+ LLM_TENSOR_POS_NET_NORM,
342
+ LLM_TENSOR_POS_NET_NORM1,
343
+ LLM_TENSOR_POS_NET_NORM2,
344
+ LLM_TENSOR_POS_NET_ATTN_NORM,
345
+ LLM_TENSOR_POS_NET_ATTN_Q,
346
+ LLM_TENSOR_POS_NET_ATTN_K,
347
+ LLM_TENSOR_POS_NET_ATTN_V,
348
+ LLM_TENSOR_POS_NET_ATTN_OUT,
349
+ };
350
+
351
+ enum llm_tensor_layer {
352
+ LLM_TENSOR_LAYER_INPUT,
353
+ LLM_TENSOR_LAYER_REPEATING,
354
+ LLM_TENSOR_LAYER_OUTPUT,
355
+ };
356
+
357
+ struct LLM_KV {
358
+ LLM_KV(llm_arch arch, const char * suffix = nullptr);
359
+
360
+ llm_arch arch;
361
+ const char * suffix;
362
+
363
+ std::string operator()(llm_kv kv) const;
364
+ };
365
+
366
+ // helper to handle gguf constants
367
+ // usage:
368
+ //
369
+ // const auto tn = LLM_TN(LLM_ARCH_LLAMA);
370
+ //
371
+ // std::string name = tn(LLM_TENSOR_OUTPUT); -> "output"
372
+ // std::string name = tn(LLM_TENSOR_TOKEN_EMBD, "bias"); -> "token_embd.bias"
373
+ // std::string name = tn(LLM_TENSOR_ATTN_NORM, "weight", 3); -> "blk.3.attn_norm.weight"
374
+ //
375
+ struct LLM_TN_IMPL {
376
+ const llm_arch arch;
377
+ const llm_tensor tensor;
378
+ const char * const suffix;
379
+ const int bid;
380
+ const int xid;
381
+
382
+ std::string str() const;
383
+
384
+ operator std::string() const {
385
+ return str();
386
+ }
387
+
388
+ friend bool operator==(const std::string & str, const LLM_TN_IMPL & tn) {
389
+ return str == tn.str();
390
+ }
391
+
392
+ friend bool operator!=(const std::string & str, const LLM_TN_IMPL & tn) {
393
+ return str != tn.str();
394
+ }
395
+ };
396
+
397
+ struct LLM_TN {
398
+ LLM_TN(llm_arch arch) : arch(arch) {}
399
+
400
+ llm_arch arch;
401
+
402
+ LLM_TN_IMPL operator()(llm_tensor tensor, const char * suffix, int bid = -1, int xid = -1) const {
403
+ return { arch, tensor, suffix, bid, xid };
404
+ }
405
+
406
+ LLM_TN_IMPL operator()(llm_tensor tensor, int bid = -1, int xid = -1) const {
407
+ return { arch, tensor, nullptr, bid, xid };
408
+ }
409
+ };
410
+
411
+
412
+ struct llm_tensor_info {
413
+ llm_tensor_layer layer;
414
+ lm_ggml_op op;
415
+ };
416
+
417
+ const char * llm_arch_name(llm_arch arch);
418
+
419
+ llm_arch llm_arch_from_string(const std::string & name);
420
+
421
+ const llm_tensor_info & llm_tensor_info_for(llm_tensor tensor);
@@ -0,0 +1,88 @@
1
+ #pragma once
2
+
3
+ #include "llama.h"
4
+
5
+ #include <array>
6
+ #include <vector>
7
+
8
+ // very similar to llama_batch,
9
+ // but has more metadata about sequences
10
+ struct llama_ubatch {
11
+ bool equal_seqs;
12
+ // TODO: whole_seqs for embeddings?
13
+
14
+ uint32_t n_tokens; // total tokens (n_seq_tokens * n_seqs)
15
+ uint32_t n_seq_tokens; // tokens per sequence
16
+ uint32_t n_seqs;
17
+
18
+ llama_token * token; // [n_tokens]
19
+ float * embd; // [n_embd, n_tokens]
20
+ llama_pos * pos; // [n_tokens]
21
+ int32_t * n_seq_id; // [n_seqs]
22
+ llama_seq_id ** seq_id; // [n_seqs]
23
+ int8_t * output; // [n_tokens]
24
+ };
25
+
26
+ struct llama_sbatch_seq {
27
+ int32_t n_seq_id;
28
+
29
+ llama_seq_id * seq_id;
30
+
31
+ size_t offset;
32
+ size_t length;
33
+ };
34
+
35
+ // sequence-length-aware batch splitting
36
+ struct llama_sbatch {
37
+ // tokens left in this batch
38
+ size_t n_tokens;
39
+
40
+ size_t n_embd;
41
+
42
+ bool logits_all; // TODO: remove once lctx.logits_all is removed too
43
+
44
+ // sorted indices into the batch
45
+ std::vector<int64_t> ids;
46
+ // batch indices of the output
47
+ std::vector<int64_t> out_ids;
48
+ std::vector<llama_sbatch_seq> seq;
49
+
50
+ const llama_batch * batch = nullptr;
51
+
52
+ // buffers for the ubatch
53
+ std::vector<llama_token> ubatch_token;
54
+ std::vector<float> ubatch_embd;
55
+ std::vector<llama_pos> ubatch_pos;
56
+ std::vector<int32_t> ubatch_n_seq_id;
57
+ std::vector<llama_seq_id *> ubatch_seq_id;
58
+ std::vector<int8_t> ubatch_output;
59
+
60
+ llama_ubatch reserve_ubatch(size_t n_ubatch, bool has_embd = false);
61
+
62
+ void add_seq_to_ubatch(llama_ubatch & ubatch, llama_sbatch_seq & seq, size_t length);
63
+
64
+ // simple split, unknown number of sequences of unequal lengths
65
+ llama_ubatch split_simple(size_t n_ubatch);
66
+
67
+ // make batches of equal-length sequences
68
+ llama_ubatch split_equal(size_t n_ubatch);
69
+
70
+ // sequence-wise split
71
+ llama_ubatch split_seq(size_t n_ubatch);
72
+
73
+ void from_batch(const llama_batch & batch, size_t n_embd, bool simple_split = false, bool logits_all = false);
74
+ };
75
+
76
+ // temporary allocate memory for the input batch if needed
77
+ struct llama_batch_allocr {
78
+ struct llama_batch batch;
79
+
80
+ std::array<llama_seq_id, 1> seq_id_0 = { 0 }; // default sequence id
81
+ std::vector<llama_pos> pos;
82
+ std::vector<int32_t> n_seq_id;
83
+ std::vector<llama_seq_id *> seq_id;
84
+ std::vector<int8_t> logits;
85
+
86
+ // optionally fulfill the batch returned by llama_batch_get_one
87
+ llama_batch_allocr(struct llama_batch in_batch, llama_pos p0);
88
+ };
@@ -0,0 +1,53 @@
1
+ #pragma once
2
+
3
+ #include <string>
4
+ #include <vector>
5
+ #include <cstdint>
6
+
7
+ enum llm_chat_template {
8
+ LLM_CHAT_TEMPLATE_CHATML,
9
+ LLM_CHAT_TEMPLATE_LLAMA_2,
10
+ LLM_CHAT_TEMPLATE_LLAMA_2_SYS,
11
+ LLM_CHAT_TEMPLATE_LLAMA_2_SYS_BOS,
12
+ LLM_CHAT_TEMPLATE_LLAMA_2_SYS_STRIP,
13
+ LLM_CHAT_TEMPLATE_MISTRAL_V1,
14
+ LLM_CHAT_TEMPLATE_MISTRAL_V3,
15
+ LLM_CHAT_TEMPLATE_MISTRAL_V3_TEKKEN,
16
+ LLM_CHAT_TEMPLATE_MISTRAL_V7,
17
+ LLM_CHAT_TEMPLATE_PHI_3,
18
+ LLM_CHAT_TEMPLATE_PHI_4,
19
+ LLM_CHAT_TEMPLATE_FALCON_3,
20
+ LLM_CHAT_TEMPLATE_ZEPHYR,
21
+ LLM_CHAT_TEMPLATE_MONARCH,
22
+ LLM_CHAT_TEMPLATE_GEMMA,
23
+ LLM_CHAT_TEMPLATE_ORION,
24
+ LLM_CHAT_TEMPLATE_OPENCHAT,
25
+ LLM_CHAT_TEMPLATE_VICUNA,
26
+ LLM_CHAT_TEMPLATE_VICUNA_ORCA,
27
+ LLM_CHAT_TEMPLATE_DEEPSEEK,
28
+ LLM_CHAT_TEMPLATE_DEEPSEEK_2,
29
+ LLM_CHAT_TEMPLATE_DEEPSEEK_3,
30
+ LLM_CHAT_TEMPLATE_COMMAND_R,
31
+ LLM_CHAT_TEMPLATE_LLAMA_3,
32
+ LLM_CHAT_TEMPLATE_CHATGML_3,
33
+ LLM_CHAT_TEMPLATE_CHATGML_4,
34
+ LLM_CHAT_TEMPLATE_GLMEDGE,
35
+ LLM_CHAT_TEMPLATE_MINICPM,
36
+ LLM_CHAT_TEMPLATE_EXAONE_3,
37
+ LLM_CHAT_TEMPLATE_RWKV_WORLD,
38
+ LLM_CHAT_TEMPLATE_GRANITE,
39
+ LLM_CHAT_TEMPLATE_GIGACHAT,
40
+ LLM_CHAT_TEMPLATE_MEGREZ,
41
+ LLM_CHAT_TEMPLATE_UNKNOWN,
42
+ };
43
+
44
+ struct llama_chat_message;
45
+
46
+ llm_chat_template llm_chat_template_from_str(const std::string & name);
47
+
48
+ llm_chat_template llm_chat_detect_template(const std::string & tmpl);
49
+
50
+ int32_t llm_chat_apply_template(
51
+ llm_chat_template tmpl,
52
+ const std::vector<const llama_chat_message *> & chat,
53
+ std::string & dest, bool add_ass);