@fugood/llama.node 0.3.1 → 0.3.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (252) hide show
  1. package/CMakeLists.txt +1 -8
  2. package/bin/darwin/arm64/llama-node.node +0 -0
  3. package/bin/darwin/x64/llama-node.node +0 -0
  4. package/bin/linux/arm64/llama-node.node +0 -0
  5. package/bin/linux/x64/llama-node.node +0 -0
  6. package/bin/linux-vulkan/arm64/llama-node.node +0 -0
  7. package/bin/linux-vulkan/x64/llama-node.node +0 -0
  8. package/bin/win32/arm64/llama-node.node +0 -0
  9. package/bin/win32/arm64/node.lib +0 -0
  10. package/bin/win32/x64/llama-node.node +0 -0
  11. package/bin/win32/x64/node.lib +0 -0
  12. package/bin/win32-vulkan/arm64/llama-node.node +0 -0
  13. package/bin/win32-vulkan/arm64/node.lib +0 -0
  14. package/bin/win32-vulkan/x64/llama-node.node +0 -0
  15. package/bin/win32-vulkan/x64/node.lib +0 -0
  16. package/package.json +4 -2
  17. package/src/DetokenizeWorker.cpp +1 -1
  18. package/src/EmbeddingWorker.cpp +2 -2
  19. package/src/LlamaCompletionWorker.cpp +10 -10
  20. package/src/LlamaCompletionWorker.h +2 -2
  21. package/src/LlamaContext.cpp +14 -17
  22. package/src/TokenizeWorker.cpp +1 -1
  23. package/src/common.hpp +5 -4
  24. package/src/llama.cpp/.github/workflows/build.yml +137 -29
  25. package/src/llama.cpp/.github/workflows/close-issue.yml +5 -0
  26. package/src/llama.cpp/.github/workflows/docker.yml +46 -34
  27. package/src/llama.cpp/.github/workflows/nix-ci-aarch64.yml +7 -0
  28. package/src/llama.cpp/.github/workflows/nix-ci.yml +7 -0
  29. package/src/llama.cpp/.github/workflows/python-check-requirements.yml +2 -4
  30. package/src/llama.cpp/.github/workflows/python-type-check.yml +3 -1
  31. package/src/llama.cpp/.github/workflows/server.yml +7 -0
  32. package/src/llama.cpp/CMakeLists.txt +26 -11
  33. package/src/llama.cpp/cmake/arm64-apple-clang.cmake +16 -0
  34. package/src/llama.cpp/common/CMakeLists.txt +10 -10
  35. package/src/llama.cpp/common/arg.cpp +2041 -0
  36. package/src/llama.cpp/common/arg.h +77 -0
  37. package/src/llama.cpp/common/common.cpp +523 -1861
  38. package/src/llama.cpp/common/common.h +234 -106
  39. package/src/llama.cpp/common/console.cpp +3 -0
  40. package/src/llama.cpp/common/json-schema-to-grammar.cpp +1 -1
  41. package/src/llama.cpp/common/log.cpp +401 -0
  42. package/src/llama.cpp/common/log.h +66 -698
  43. package/src/llama.cpp/common/ngram-cache.cpp +39 -36
  44. package/src/llama.cpp/common/ngram-cache.h +19 -19
  45. package/src/llama.cpp/common/sampling.cpp +356 -350
  46. package/src/llama.cpp/common/sampling.h +62 -139
  47. package/src/llama.cpp/common/stb_image.h +5990 -6398
  48. package/src/llama.cpp/docs/build.md +72 -17
  49. package/src/llama.cpp/examples/CMakeLists.txt +1 -2
  50. package/src/llama.cpp/examples/batched/batched.cpp +49 -65
  51. package/src/llama.cpp/examples/batched-bench/batched-bench.cpp +42 -53
  52. package/src/llama.cpp/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp +55 -52
  53. package/src/llama.cpp/examples/cvector-generator/cvector-generator.cpp +22 -22
  54. package/src/llama.cpp/examples/cvector-generator/pca.hpp +3 -13
  55. package/src/llama.cpp/examples/embedding/embedding.cpp +147 -91
  56. package/src/llama.cpp/examples/eval-callback/eval-callback.cpp +37 -37
  57. package/src/llama.cpp/examples/export-lora/export-lora.cpp +39 -38
  58. package/src/llama.cpp/examples/gbnf-validator/gbnf-validator.cpp +14 -39
  59. package/src/llama.cpp/examples/{baby-llama → gen-docs}/CMakeLists.txt +2 -2
  60. package/src/llama.cpp/examples/gen-docs/gen-docs.cpp +83 -0
  61. package/src/llama.cpp/examples/gguf-split/gguf-split.cpp +58 -39
  62. package/src/llama.cpp/examples/gritlm/gritlm.cpp +46 -39
  63. package/src/llama.cpp/examples/imatrix/imatrix.cpp +75 -69
  64. package/src/llama.cpp/examples/infill/infill.cpp +131 -192
  65. package/src/llama.cpp/examples/llama-bench/llama-bench.cpp +276 -178
  66. package/src/llama.cpp/examples/llama.android/llama/build.gradle.kts +1 -0
  67. package/src/llama.cpp/examples/llama.android/llama/src/main/cpp/llama-android.cpp +40 -36
  68. package/src/llama.cpp/examples/llava/CMakeLists.txt +7 -0
  69. package/src/llama.cpp/examples/llava/clip.cpp +686 -150
  70. package/src/llama.cpp/examples/llava/clip.h +11 -2
  71. package/src/llama.cpp/examples/llava/llava-cli.cpp +60 -71
  72. package/src/llama.cpp/examples/llava/llava.cpp +146 -26
  73. package/src/llama.cpp/examples/llava/llava.h +2 -3
  74. package/src/llama.cpp/examples/llava/minicpmv-cli.cpp +323 -0
  75. package/src/llama.cpp/examples/llava/requirements.txt +1 -0
  76. package/src/llama.cpp/examples/lookahead/lookahead.cpp +55 -56
  77. package/src/llama.cpp/examples/lookup/lookup-create.cpp +15 -13
  78. package/src/llama.cpp/examples/lookup/lookup-merge.cpp +4 -4
  79. package/src/llama.cpp/examples/lookup/lookup-stats.cpp +34 -33
  80. package/src/llama.cpp/examples/lookup/lookup.cpp +60 -63
  81. package/src/llama.cpp/examples/main/main.cpp +216 -313
  82. package/src/llama.cpp/examples/parallel/parallel.cpp +58 -59
  83. package/src/llama.cpp/examples/passkey/passkey.cpp +53 -61
  84. package/src/llama.cpp/examples/perplexity/perplexity.cpp +277 -311
  85. package/src/llama.cpp/examples/quantize/CMakeLists.txt +1 -1
  86. package/src/llama.cpp/examples/quantize/quantize.cpp +27 -9
  87. package/src/llama.cpp/examples/quantize-stats/quantize-stats.cpp +12 -12
  88. package/src/llama.cpp/examples/retrieval/retrieval.cpp +57 -52
  89. package/src/llama.cpp/examples/rpc/rpc-server.cpp +27 -2
  90. package/src/llama.cpp/examples/save-load-state/save-load-state.cpp +60 -46
  91. package/src/llama.cpp/examples/server/CMakeLists.txt +7 -18
  92. package/src/llama.cpp/examples/server/server.cpp +1347 -1531
  93. package/src/llama.cpp/examples/server/tests/requirements.txt +2 -1
  94. package/src/llama.cpp/examples/server/utils.hpp +396 -107
  95. package/src/llama.cpp/examples/simple/CMakeLists.txt +1 -1
  96. package/src/llama.cpp/examples/simple/simple.cpp +132 -106
  97. package/src/llama.cpp/examples/simple-chat/CMakeLists.txt +5 -0
  98. package/src/llama.cpp/examples/simple-chat/simple-chat.cpp +197 -0
  99. package/src/llama.cpp/examples/speculative/speculative.cpp +153 -124
  100. package/src/llama.cpp/examples/sycl/run-llama2.sh +10 -19
  101. package/src/llama.cpp/examples/sycl/win-run-llama2.bat +1 -1
  102. package/src/llama.cpp/examples/tokenize/tokenize.cpp +27 -29
  103. package/src/llama.cpp/ggml/CMakeLists.txt +29 -12
  104. package/src/llama.cpp/ggml/include/ggml-alloc.h +3 -3
  105. package/src/llama.cpp/ggml/include/ggml-amx.h +25 -0
  106. package/src/llama.cpp/ggml/include/ggml-backend.h +166 -68
  107. package/src/llama.cpp/ggml/include/ggml-blas.h +5 -3
  108. package/src/llama.cpp/ggml/include/ggml-cann.h +17 -19
  109. package/src/llama.cpp/ggml/include/ggml-cpp.h +38 -0
  110. package/src/llama.cpp/ggml/include/ggml-cpu.h +177 -0
  111. package/src/llama.cpp/ggml/include/ggml-cuda.h +17 -17
  112. package/src/llama.cpp/ggml/include/ggml-kompute.h +7 -3
  113. package/src/llama.cpp/ggml/include/ggml-metal.h +13 -12
  114. package/src/llama.cpp/ggml/include/ggml-opt.h +216 -0
  115. package/src/llama.cpp/ggml/include/ggml-rpc.h +9 -5
  116. package/src/llama.cpp/ggml/include/ggml-sycl.h +18 -11
  117. package/src/llama.cpp/ggml/include/ggml-vulkan.h +10 -8
  118. package/src/llama.cpp/ggml/include/ggml.h +272 -505
  119. package/src/llama.cpp/ggml/src/CMakeLists.txt +69 -1110
  120. package/src/llama.cpp/ggml/src/ggml-aarch64.c +52 -2116
  121. package/src/llama.cpp/ggml/src/ggml-aarch64.h +0 -20
  122. package/src/llama.cpp/ggml/src/ggml-alloc.c +29 -27
  123. package/src/llama.cpp/ggml/src/ggml-amx/CMakeLists.txt +107 -0
  124. package/src/llama.cpp/ggml/src/ggml-amx/common.h +94 -0
  125. package/src/llama.cpp/ggml/src/ggml-amx/ggml-amx.cpp +446 -0
  126. package/src/llama.cpp/ggml/src/ggml-amx/mmq.cpp +2510 -0
  127. package/src/llama.cpp/ggml/src/ggml-amx/mmq.h +17 -0
  128. package/src/llama.cpp/ggml/src/ggml-backend-impl.h +144 -81
  129. package/src/llama.cpp/ggml/src/ggml-backend-reg.cpp +195 -0
  130. package/src/llama.cpp/ggml/src/{ggml-backend.c → ggml-backend.cpp} +394 -635
  131. package/src/llama.cpp/ggml/src/ggml-blas/CMakeLists.txt +91 -0
  132. package/src/llama.cpp/ggml/src/{ggml-blas.cpp → ggml-blas/ggml-blas.cpp} +217 -70
  133. package/src/llama.cpp/ggml/src/ggml-cann/CMakeLists.txt +46 -0
  134. package/src/llama.cpp/ggml/src/ggml-cann/acl_tensor.cpp +4 -27
  135. package/src/llama.cpp/ggml/src/ggml-cann/acl_tensor.h +32 -4
  136. package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.cpp +179 -41
  137. package/src/llama.cpp/ggml/src/ggml-cann/common.h +1 -0
  138. package/src/llama.cpp/ggml/src/{ggml-cann.cpp → ggml-cann/ggml-cann.cpp} +458 -353
  139. package/src/llama.cpp/ggml/src/ggml-cann/kernels/CMakeLists.txt +2 -1
  140. package/src/llama.cpp/ggml/src/ggml-cann/kernels/ascendc_kernels.h +2 -0
  141. package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_float_to_q4_0.cpp +278 -0
  142. package/src/llama.cpp/ggml/src/ggml-common.h +20 -0
  143. package/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +261 -0
  144. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.c +3560 -0
  145. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.h +30 -0
  146. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-impl.h +371 -0
  147. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.c +10822 -0
  148. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.h +63 -0
  149. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +13970 -0
  150. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +663 -0
  151. package/src/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm.cpp +1885 -0
  152. package/src/llama.cpp/ggml/src/ggml-cuda/CMakeLists.txt +155 -0
  153. package/src/llama.cpp/ggml/src/ggml-cuda/vendors/cuda.h +14 -0
  154. package/src/llama.cpp/ggml/src/ggml-cuda/vendors/hip.h +178 -0
  155. package/src/llama.cpp/ggml/src/ggml-cuda/vendors/musa.h +134 -0
  156. package/src/llama.cpp/ggml/src/ggml-hip/CMakeLists.txt +106 -0
  157. package/src/llama.cpp/ggml/src/ggml-impl.h +380 -584
  158. package/src/llama.cpp/ggml/src/ggml-kompute/CMakeLists.txt +162 -0
  159. package/src/llama.cpp/ggml/src/{ggml-kompute.cpp → ggml-kompute/ggml-kompute.cpp} +233 -87
  160. package/src/llama.cpp/ggml/src/ggml-metal/CMakeLists.txt +108 -0
  161. package/src/llama.cpp/ggml/src/ggml-metal/ggml-metal-impl.h +249 -0
  162. package/src/llama.cpp/ggml/src/ggml-musa/CMakeLists.txt +100 -0
  163. package/src/llama.cpp/ggml/src/ggml-opt.cpp +867 -0
  164. package/src/llama.cpp/ggml/src/ggml-quants.c +369 -9994
  165. package/src/llama.cpp/ggml/src/ggml-quants.h +78 -110
  166. package/src/llama.cpp/ggml/src/ggml-rpc/CMakeLists.txt +11 -0
  167. package/src/llama.cpp/ggml/src/{ggml-rpc.cpp → ggml-rpc/ggml-rpc.cpp} +560 -335
  168. package/src/llama.cpp/ggml/src/ggml-sycl/CMakeLists.txt +81 -0
  169. package/src/llama.cpp/ggml/src/ggml-sycl/backend.hpp +6 -0
  170. package/src/llama.cpp/ggml/src/ggml-sycl/common.cpp +51 -0
  171. package/src/llama.cpp/ggml/src/ggml-sycl/common.hpp +310 -0
  172. package/src/llama.cpp/ggml/src/ggml-sycl/concat.cpp +1 -0
  173. package/src/llama.cpp/ggml/src/ggml-sycl/conv.cpp +99 -0
  174. package/src/llama.cpp/ggml/src/ggml-sycl/conv.hpp +21 -0
  175. package/src/llama.cpp/ggml/src/ggml-sycl/convert.cpp +57 -57
  176. package/src/llama.cpp/ggml/src/ggml-sycl/convert.hpp +1 -1
  177. package/src/llama.cpp/ggml/src/ggml-sycl/dequantize.hpp +106 -106
  178. package/src/llama.cpp/ggml/src/ggml-sycl/dmmv.cpp +4 -4
  179. package/src/llama.cpp/ggml/src/ggml-sycl/dpct/helper.hpp +18 -25
  180. package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.cpp +1011 -0
  181. package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.hpp +76 -0
  182. package/src/llama.cpp/ggml/src/ggml-sycl/gemm.hpp +101 -0
  183. package/src/llama.cpp/ggml/src/{ggml-sycl.cpp → ggml-sycl/ggml-sycl.cpp} +3350 -3980
  184. package/src/llama.cpp/ggml/src/ggml-sycl/im2col.cpp +125 -0
  185. package/src/llama.cpp/ggml/src/ggml-sycl/im2col.hpp +23 -0
  186. package/src/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +70 -68
  187. package/src/llama.cpp/ggml/src/ggml-sycl/norm.cpp +9 -6
  188. package/src/llama.cpp/ggml/src/ggml-sycl/outprod.cpp +56 -0
  189. package/src/llama.cpp/ggml/src/ggml-sycl/outprod.hpp +11 -0
  190. package/src/llama.cpp/ggml/src/ggml-sycl/presets.hpp +8 -0
  191. package/src/llama.cpp/ggml/src/ggml-sycl/rope.cpp +1 -1
  192. package/src/llama.cpp/ggml/src/ggml-sycl/tsembd.cpp +71 -0
  193. package/src/llama.cpp/ggml/src/ggml-sycl/tsembd.hpp +21 -0
  194. package/src/llama.cpp/ggml/src/ggml-sycl/vecdotq.hpp +4 -4
  195. package/src/llama.cpp/ggml/src/ggml-sycl/wkv6.cpp +138 -0
  196. package/src/llama.cpp/ggml/src/ggml-sycl/wkv6.hpp +10 -0
  197. package/src/llama.cpp/ggml/src/ggml-threading.cpp +12 -0
  198. package/src/llama.cpp/ggml/src/ggml-threading.h +12 -0
  199. package/src/llama.cpp/ggml/src/ggml-vulkan/CMakeLists.txt +78 -0
  200. package/src/llama.cpp/ggml/src/{ggml-vulkan.cpp → ggml-vulkan/ggml-vulkan.cpp} +2034 -1718
  201. package/src/llama.cpp/ggml/src/{vulkan-shaders → ggml-vulkan/vulkan-shaders}/CMakeLists.txt +2 -0
  202. package/src/llama.cpp/ggml/src/{vulkan-shaders → ggml-vulkan/vulkan-shaders}/vulkan-shaders-gen.cpp +152 -185
  203. package/src/llama.cpp/ggml/src/ggml.c +2075 -16579
  204. package/src/llama.cpp/include/llama.h +296 -285
  205. package/src/llama.cpp/models/ggml-vocab-chameleon.gguf.inp +112 -0
  206. package/src/llama.cpp/models/ggml-vocab-chameleon.gguf.out +46 -0
  207. package/src/llama.cpp/pocs/vdot/q8dot.cpp +4 -3
  208. package/src/llama.cpp/pocs/vdot/vdot.cpp +8 -7
  209. package/src/llama.cpp/requirements/requirements-convert_legacy_llama.txt +1 -1
  210. package/src/llama.cpp/src/CMakeLists.txt +2 -1
  211. package/src/llama.cpp/src/llama-grammar.cpp +721 -122
  212. package/src/llama.cpp/src/llama-grammar.h +120 -15
  213. package/src/llama.cpp/src/llama-impl.h +156 -1
  214. package/src/llama.cpp/src/llama-sampling.cpp +2058 -346
  215. package/src/llama.cpp/src/llama-sampling.h +39 -47
  216. package/src/llama.cpp/src/llama-vocab.cpp +390 -127
  217. package/src/llama.cpp/src/llama-vocab.h +60 -20
  218. package/src/llama.cpp/src/llama.cpp +6215 -3263
  219. package/src/llama.cpp/src/unicode-data.cpp +6 -4
  220. package/src/llama.cpp/src/unicode-data.h +4 -4
  221. package/src/llama.cpp/src/unicode.cpp +15 -7
  222. package/src/llama.cpp/tests/CMakeLists.txt +4 -2
  223. package/src/llama.cpp/tests/test-arg-parser.cpp +131 -0
  224. package/src/llama.cpp/tests/test-backend-ops.cpp +1725 -297
  225. package/src/llama.cpp/tests/test-barrier.cpp +94 -0
  226. package/src/llama.cpp/tests/test-chat-template.cpp +9 -5
  227. package/src/llama.cpp/tests/test-grammar-integration.cpp +23 -38
  228. package/src/llama.cpp/tests/test-grammar-parser.cpp +6 -4
  229. package/src/llama.cpp/tests/test-json-schema-to-grammar.cpp +23 -8
  230. package/src/llama.cpp/tests/test-llama-grammar.cpp +9 -8
  231. package/src/llama.cpp/tests/test-log.cpp +39 -0
  232. package/src/llama.cpp/tests/test-opt.cpp +853 -142
  233. package/src/llama.cpp/tests/test-quantize-fns.cpp +28 -19
  234. package/src/llama.cpp/tests/test-quantize-perf.cpp +16 -14
  235. package/src/llama.cpp/tests/test-rope.cpp +2 -1
  236. package/src/llama.cpp/tests/test-sampling.cpp +226 -142
  237. package/src/llama.cpp/tests/test-tokenizer-0.cpp +56 -36
  238. package/src/llama.cpp/tests/test-tokenizer-1-bpe.cpp +5 -5
  239. package/src/llama.cpp/tests/test-tokenizer-1-spm.cpp +5 -5
  240. package/patches/llama.patch +0 -22
  241. package/src/llama.cpp/.github/workflows/bench.yml +0 -310
  242. package/src/llama.cpp/common/grammar-parser.cpp +0 -536
  243. package/src/llama.cpp/common/grammar-parser.h +0 -29
  244. package/src/llama.cpp/common/train.cpp +0 -1513
  245. package/src/llama.cpp/common/train.h +0 -233
  246. package/src/llama.cpp/examples/baby-llama/baby-llama.cpp +0 -1640
  247. package/src/llama.cpp/examples/benchmark/CMakeLists.txt +0 -6
  248. package/src/llama.cpp/examples/benchmark/benchmark-matmult.cpp +0 -275
  249. package/src/llama.cpp/ggml/src/llamafile/sgemm.cpp +0 -1027
  250. package/src/llama.cpp/tests/test-grad0.cpp +0 -1566
  251. /package/src/llama.cpp/ggml/{cmake → src/ggml-cpu/cmake}/FindSIMD.cmake +0 -0
  252. /package/src/llama.cpp/ggml/src/{llamafile → ggml-cpu/llamafile}/sgemm.h +0 -0
@@ -176,25 +176,15 @@
176
176
  #ifdef GGML_SHARED
177
177
  # if defined(_WIN32) && !defined(__MINGW32__)
178
178
  # ifdef GGML_BUILD
179
- # define GGML_API __declspec(dllexport)
179
+ # define GGML_API __declspec(dllexport) extern
180
180
  # else
181
- # define GGML_API __declspec(dllimport)
181
+ # define GGML_API __declspec(dllimport) extern
182
182
  # endif
183
183
  # else
184
- # define GGML_API __attribute__ ((visibility ("default")))
184
+ # define GGML_API __attribute__ ((visibility ("default"))) extern
185
185
  # endif
186
186
  #else
187
- # define GGML_API
188
- #endif
189
-
190
- #ifdef GGML_MULTIPLATFORM
191
- # if defined(_WIN32)
192
- # define GGML_CALL
193
- # else
194
- # define GGML_CALL __attribute__((__ms_abi__))
195
- # endif
196
- #else
197
- # define GGML_CALL
187
+ # define GGML_API extern
198
188
  #endif
199
189
 
200
190
  // TODO: support for clang
@@ -220,21 +210,24 @@
220
210
  #include <stdio.h>
221
211
 
222
212
  #define GGML_FILE_MAGIC 0x67676d6c // "ggml"
223
- #define GGML_FILE_VERSION 1
213
+ #define GGML_FILE_VERSION 2
224
214
 
225
215
  #define GGML_QNT_VERSION 2 // bump this on quantization format changes
226
216
  #define GGML_QNT_VERSION_FACTOR 1000 // do not change this
227
217
 
228
218
  #define GGML_MAX_DIMS 4
229
219
  #define GGML_MAX_PARAMS 2048
230
- #define GGML_MAX_CONTEXTS 64
231
220
  #define GGML_MAX_SRC 10
221
+ #define GGML_MAX_N_THREADS 512
222
+ #define GGML_MAX_OP_PARAMS 64
223
+
232
224
  #ifndef GGML_MAX_NAME
233
- #define GGML_MAX_NAME 64
225
+ # define GGML_MAX_NAME 64
234
226
  #endif
235
- #define GGML_MAX_OP_PARAMS 64
227
+
236
228
  #define GGML_DEFAULT_N_THREADS 4
237
229
  #define GGML_DEFAULT_GRAPH_SIZE 2048
230
+
238
231
  #if UINTPTR_MAX == 0xFFFFFFFF
239
232
  #define GGML_MEM_ALIGN 4
240
233
  #else
@@ -244,6 +237,8 @@
244
237
  #define GGML_EXIT_SUCCESS 0
245
238
  #define GGML_EXIT_ABORTED 1
246
239
 
240
+ #define GGML_ROPE_TYPE_NEOX 2
241
+
247
242
  #define GGUF_MAGIC "GGUF"
248
243
 
249
244
  #define GGUF_VERSION 3
@@ -255,21 +250,21 @@
255
250
  #define GGML_PAD(x, n) (((x) + (n) - 1) & ~((n) - 1))
256
251
 
257
252
  #ifndef NDEBUG
258
- #define GGML_UNREACHABLE() do { fprintf(stderr, "statement should be unreachable\n"); abort(); } while(0)
253
+ # define GGML_UNREACHABLE() do { fprintf(stderr, "statement should be unreachable\n"); abort(); } while(0)
259
254
  #elif defined(__GNUC__)
260
- #define GGML_UNREACHABLE() __builtin_unreachable()
255
+ # define GGML_UNREACHABLE() __builtin_unreachable()
261
256
  #elif defined(_MSC_VER)
262
- #define GGML_UNREACHABLE() __assume(0)
257
+ # define GGML_UNREACHABLE() __assume(0)
263
258
  #else
264
- #define GGML_UNREACHABLE() ((void) 0)
259
+ # define GGML_UNREACHABLE() ((void) 0)
265
260
  #endif
266
261
 
267
262
  #ifdef __cplusplus
268
- #define GGML_NORETURN [[noreturn]]
263
+ # define GGML_NORETURN [[noreturn]]
269
264
  #elif defined(_MSC_VER)
270
- #define GGML_NORETURN __declspec(noreturn)
265
+ # define GGML_NORETURN __declspec(noreturn)
271
266
  #else
272
- #define GGML_NORETURN _Noreturn
267
+ # define GGML_NORETURN _Noreturn
273
268
  #endif
274
269
 
275
270
  #define GGML_ABORT(...) ggml_abort(__FILE__, __LINE__, __VA_ARGS__)
@@ -334,7 +329,7 @@ extern "C" {
334
329
  };
335
330
 
336
331
  // get ggml_status name string
337
- GGML_API GGML_CALL const char * ggml_status_to_string(enum ggml_status status);
332
+ GGML_API const char * ggml_status_to_string(enum ggml_status status);
338
333
 
339
334
  // ieee 754-2008 half-precision float16
340
335
  // todo: make this not an integral type
@@ -349,10 +344,12 @@ extern "C" {
349
344
  GGML_API ggml_bf16_t ggml_fp32_to_bf16(float);
350
345
  GGML_API float ggml_bf16_to_fp32(ggml_bf16_t); // consider just doing << 16
351
346
  GGML_API void ggml_bf16_to_fp32_row(const ggml_bf16_t *, float *, int64_t);
347
+ GGML_API void ggml_fp32_to_bf16_row_ref(const float *, ggml_bf16_t *, int64_t);
352
348
  GGML_API void ggml_fp32_to_bf16_row(const float *, ggml_bf16_t *, int64_t);
353
349
 
354
350
  struct ggml_object;
355
351
  struct ggml_context;
352
+ struct ggml_cgraph;
356
353
 
357
354
  // NOTE: always add types at the end of the enum to keep backward compatibility
358
355
  enum ggml_type {
@@ -390,6 +387,8 @@ extern "C" {
390
387
  GGML_TYPE_Q4_0_4_4 = 31,
391
388
  GGML_TYPE_Q4_0_4_8 = 32,
392
389
  GGML_TYPE_Q4_0_8_8 = 33,
390
+ GGML_TYPE_TQ1_0 = 34,
391
+ GGML_TYPE_TQ2_0 = 35,
393
392
  GGML_TYPE_COUNT,
394
393
  };
395
394
 
@@ -450,10 +449,13 @@ extern "C" {
450
449
  GGML_OP_SQR,
451
450
  GGML_OP_SQRT,
452
451
  GGML_OP_LOG,
452
+ GGML_OP_SIN,
453
+ GGML_OP_COS,
453
454
  GGML_OP_SUM,
454
455
  GGML_OP_SUM_ROWS,
455
456
  GGML_OP_MEAN,
456
457
  GGML_OP_ARGMAX,
458
+ GGML_OP_COUNT_EQUAL,
457
459
  GGML_OP_REPEAT,
458
460
  GGML_OP_REPEAT_BACK,
459
461
  GGML_OP_CONCAT,
@@ -487,9 +489,11 @@ extern "C" {
487
489
  GGML_OP_CLAMP,
488
490
  GGML_OP_CONV_TRANSPOSE_1D,
489
491
  GGML_OP_IM2COL,
492
+ GGML_OP_IM2COL_BACK,
490
493
  GGML_OP_CONV_TRANSPOSE_2D,
491
494
  GGML_OP_POOL_1D,
492
495
  GGML_OP_POOL_2D,
496
+ GGML_OP_POOL_2D_BACK,
493
497
  GGML_OP_UPSCALE, // nearest interpolate
494
498
  GGML_OP_PAD,
495
499
  GGML_OP_ARANGE,
@@ -505,6 +509,7 @@ extern "C" {
505
509
  GGML_OP_WIN_UNPART,
506
510
  GGML_OP_GET_REL_POS,
507
511
  GGML_OP_ADD_REL_POS,
512
+ GGML_OP_RWKV_WKV6,
508
513
 
509
514
  GGML_OP_UNARY,
510
515
 
@@ -521,6 +526,7 @@ extern "C" {
521
526
 
522
527
  GGML_OP_CROSS_ENTROPY_LOSS,
523
528
  GGML_OP_CROSS_ENTROPY_LOSS_BACK,
529
+ GGML_OP_OPT_STEP_ADAMW,
524
530
 
525
531
  GGML_OP_COUNT,
526
532
  };
@@ -539,6 +545,7 @@ extern "C" {
539
545
  GGML_UNARY_OP_SILU,
540
546
  GGML_UNARY_OP_HARDSWISH,
541
547
  GGML_UNARY_OP_HARDSIGMOID,
548
+ GGML_UNARY_OP_EXP,
542
549
 
543
550
  GGML_UNARY_OP_COUNT,
544
551
  };
@@ -550,35 +557,32 @@ extern "C" {
550
557
  };
551
558
 
552
559
  enum ggml_log_level {
553
- GGML_LOG_LEVEL_ERROR = 2,
560
+ GGML_LOG_LEVEL_NONE = 0,
561
+ GGML_LOG_LEVEL_DEBUG = 1,
562
+ GGML_LOG_LEVEL_INFO = 2,
554
563
  GGML_LOG_LEVEL_WARN = 3,
555
- GGML_LOG_LEVEL_INFO = 4,
556
- GGML_LOG_LEVEL_DEBUG = 5
564
+ GGML_LOG_LEVEL_ERROR = 4,
565
+ GGML_LOG_LEVEL_CONT = 5, // continue previous log
557
566
  };
558
567
 
568
+ // this tensor...
559
569
  enum ggml_tensor_flag {
560
- GGML_TENSOR_FLAG_INPUT = 1,
561
- GGML_TENSOR_FLAG_OUTPUT = 2,
562
- GGML_TENSOR_FLAG_PARAM = 4,
570
+ GGML_TENSOR_FLAG_INPUT = 1, // ...is an input for the GGML compute graph
571
+ GGML_TENSOR_FLAG_OUTPUT = 2, // ...is an output for the GGML compute graph
572
+ GGML_TENSOR_FLAG_PARAM = 4, // ...contains trainable parameters
573
+ GGML_TENSOR_FLAG_LOSS = 8, // ...defines loss for numerical optimization (multiple loss tensors add up)
563
574
  };
564
575
 
565
- // ggml object
566
- struct ggml_object {
567
- size_t offs;
568
- size_t size;
569
-
570
- struct ggml_object * next;
571
-
572
- enum ggml_object_type type;
573
-
574
- char padding[4];
576
+ struct ggml_init_params {
577
+ // memory pool
578
+ size_t mem_size; // bytes
579
+ void * mem_buffer; // if NULL, memory will be allocated internally
580
+ bool no_alloc; // don't allocate memory for the tensor data
575
581
  };
576
582
 
577
- static const size_t GGML_OBJECT_SIZE = sizeof(struct ggml_object);
578
-
579
583
  // n-dimensional tensor
580
584
  struct ggml_tensor {
581
- enum ggml_type type;
585
+ enum ggml_type type;
582
586
 
583
587
  GGML_DEPRECATED(enum ggml_backend_type backend, "use the buffer type to find the storage location of the tensor");
584
588
 
@@ -598,7 +602,6 @@ extern "C" {
598
602
 
599
603
  int32_t flags;
600
604
 
601
- struct ggml_tensor * grad;
602
605
  struct ggml_tensor * src[GGML_MAX_SRC];
603
606
 
604
607
  // source tensor and offset for views
@@ -611,7 +614,7 @@ extern "C" {
611
614
 
612
615
  void * extra; // extra things e.g. for ggml-cuda.cu
613
616
 
614
- // char padding[4];
617
+ char padding[8];
615
618
  };
616
619
 
617
620
  static const size_t GGML_TENSOR_SIZE = sizeof(struct ggml_tensor);
@@ -621,71 +624,6 @@ extern "C" {
621
624
  // If it returns true, the computation is aborted
622
625
  typedef bool (*ggml_abort_callback)(void * data);
623
626
 
624
- // the compute plan that needs to be prepared for ggml_graph_compute()
625
- // since https://github.com/ggerganov/ggml/issues/287
626
- struct ggml_cplan {
627
- size_t work_size; // size of work buffer, calculated by `ggml_graph_plan()`
628
- uint8_t * work_data; // work buffer, to be allocated by caller before calling to `ggml_graph_compute()`
629
-
630
- int n_threads;
631
-
632
- // abort ggml_graph_compute when true
633
- ggml_abort_callback abort_callback;
634
- void * abort_callback_data;
635
- };
636
-
637
- enum ggml_cgraph_eval_order {
638
- GGML_CGRAPH_EVAL_ORDER_LEFT_TO_RIGHT = 0,
639
- GGML_CGRAPH_EVAL_ORDER_RIGHT_TO_LEFT,
640
- GGML_CGRAPH_EVAL_ORDER_COUNT
641
- };
642
-
643
- typedef uint32_t ggml_bitset_t;
644
-
645
- struct ggml_hash_set {
646
- size_t size;
647
- ggml_bitset_t * used;
648
- struct ggml_tensor ** keys;
649
- };
650
-
651
- // computation graph
652
- struct ggml_cgraph {
653
- int size;
654
- int n_nodes;
655
- int n_leafs;
656
-
657
- struct ggml_tensor ** nodes;
658
- struct ggml_tensor ** grads;
659
- struct ggml_tensor ** leafs;
660
-
661
- struct ggml_hash_set visited_hash_set;
662
-
663
- enum ggml_cgraph_eval_order order;
664
- };
665
-
666
- // scratch buffer
667
- struct ggml_scratch {
668
- size_t offs;
669
- size_t size;
670
- void * data;
671
- };
672
-
673
- struct ggml_init_params {
674
- // memory pool
675
- size_t mem_size; // bytes
676
- void * mem_buffer; // if NULL, memory will be allocated internally
677
- bool no_alloc; // don't allocate memory for the tensor data
678
- };
679
-
680
- // numa strategies
681
- enum ggml_numa_strategy {
682
- GGML_NUMA_STRATEGY_DISABLED = 0,
683
- GGML_NUMA_STRATEGY_DISTRIBUTE = 1,
684
- GGML_NUMA_STRATEGY_ISOLATE = 2,
685
- GGML_NUMA_STRATEGY_NUMACTL = 3,
686
- GGML_NUMA_STRATEGY_MIRROR = 4,
687
- GGML_NUMA_STRATEGY_COUNT
688
- };
689
627
 
690
628
  //
691
629
  // GUID
@@ -708,52 +646,49 @@ extern "C" {
708
646
  // accepts a UTF-8 path, even on Windows
709
647
  GGML_API FILE * ggml_fopen(const char * fname, const char * mode);
710
648
 
711
- GGML_API void ggml_numa_init(enum ggml_numa_strategy numa); // call once for better performance on NUMA systems
712
- GGML_API bool ggml_is_numa(void); // true if init detected that system has >1 NUMA node
713
-
714
649
  GGML_API void ggml_print_object (const struct ggml_object * obj);
715
650
  GGML_API void ggml_print_objects(const struct ggml_context * ctx);
716
651
 
717
- GGML_API GGML_CALL int64_t ggml_nelements (const struct ggml_tensor * tensor);
718
- GGML_API GGML_CALL int64_t ggml_nrows (const struct ggml_tensor * tensor);
719
- GGML_API GGML_CALL size_t ggml_nbytes (const struct ggml_tensor * tensor);
720
- GGML_API size_t ggml_nbytes_pad (const struct ggml_tensor * tensor); // same as ggml_nbytes() but padded to GGML_MEM_ALIGN
652
+ GGML_API int64_t ggml_nelements (const struct ggml_tensor * tensor);
653
+ GGML_API int64_t ggml_nrows (const struct ggml_tensor * tensor);
654
+ GGML_API size_t ggml_nbytes (const struct ggml_tensor * tensor);
655
+ GGML_API size_t ggml_nbytes_pad(const struct ggml_tensor * tensor); // same as ggml_nbytes() but padded to GGML_MEM_ALIGN
721
656
 
722
- GGML_API GGML_CALL int64_t ggml_blck_size(enum ggml_type type);
723
- GGML_API GGML_CALL size_t ggml_type_size(enum ggml_type type); // size in bytes for all elements in a block
724
- GGML_API GGML_CALL size_t ggml_row_size (enum ggml_type type, int64_t ne); // size in bytes for all elements in a row
657
+ GGML_API int64_t ggml_blck_size(enum ggml_type type);
658
+ GGML_API size_t ggml_type_size(enum ggml_type type); // size in bytes for all elements in a block
659
+ GGML_API size_t ggml_row_size (enum ggml_type type, int64_t ne); // size in bytes for all elements in a row
725
660
 
726
661
  GGML_DEPRECATED(
727
662
  GGML_API double ggml_type_sizef(enum ggml_type type), // ggml_type_size()/ggml_blck_size() as float
728
663
  "use ggml_row_size() instead");
729
664
 
730
- GGML_API GGML_CALL const char * ggml_type_name(enum ggml_type type);
731
- GGML_API GGML_CALL const char * ggml_op_name (enum ggml_op op);
732
- GGML_API const char * ggml_op_symbol(enum ggml_op op);
665
+ GGML_API const char * ggml_type_name(enum ggml_type type);
666
+ GGML_API const char * ggml_op_name (enum ggml_op op);
667
+ GGML_API const char * ggml_op_symbol(enum ggml_op op);
733
668
 
734
- GGML_API const char * ggml_unary_op_name(enum ggml_unary_op op);
735
- GGML_API GGML_CALL const char * ggml_op_desc(const struct ggml_tensor * t); // unary or op name
669
+ GGML_API const char * ggml_unary_op_name(enum ggml_unary_op op);
670
+ GGML_API const char * ggml_op_desc(const struct ggml_tensor * t); // unary or op name
736
671
 
737
- GGML_API GGML_CALL size_t ggml_element_size(const struct ggml_tensor * tensor);
672
+ GGML_API size_t ggml_element_size(const struct ggml_tensor * tensor);
738
673
 
739
- GGML_API GGML_CALL bool ggml_is_quantized(enum ggml_type type);
674
+ GGML_API bool ggml_is_quantized(enum ggml_type type);
740
675
 
741
676
  // TODO: temporary until model loading of ggml examples is refactored
742
677
  GGML_API enum ggml_type ggml_ftype_to_ggml_type(enum ggml_ftype ftype);
743
678
 
744
- GGML_API GGML_CALL bool ggml_is_transposed(const struct ggml_tensor * tensor);
745
- GGML_API GGML_CALL bool ggml_is_permuted (const struct ggml_tensor * tensor);
746
- GGML_API GGML_CALL bool ggml_is_empty (const struct ggml_tensor * tensor);
747
- GGML_API bool ggml_is_scalar (const struct ggml_tensor * tensor);
748
- GGML_API bool ggml_is_vector (const struct ggml_tensor * tensor);
749
- GGML_API bool ggml_is_matrix (const struct ggml_tensor * tensor);
750
- GGML_API bool ggml_is_3d (const struct ggml_tensor * tensor);
751
- GGML_API int ggml_n_dims (const struct ggml_tensor * tensor); // returns 1 for scalars
679
+ GGML_API bool ggml_is_transposed(const struct ggml_tensor * tensor);
680
+ GGML_API bool ggml_is_permuted (const struct ggml_tensor * tensor);
681
+ GGML_API bool ggml_is_empty (const struct ggml_tensor * tensor);
682
+ GGML_API bool ggml_is_scalar (const struct ggml_tensor * tensor);
683
+ GGML_API bool ggml_is_vector (const struct ggml_tensor * tensor);
684
+ GGML_API bool ggml_is_matrix (const struct ggml_tensor * tensor);
685
+ GGML_API bool ggml_is_3d (const struct ggml_tensor * tensor);
686
+ GGML_API int ggml_n_dims (const struct ggml_tensor * tensor); // returns 1 for scalars
752
687
 
753
- GGML_API GGML_CALL bool ggml_is_contiguous (const struct ggml_tensor * tensor);
754
- GGML_API GGML_CALL bool ggml_is_contiguous_0(const struct ggml_tensor * tensor); // same as ggml_is_contiguous()
755
- GGML_API GGML_CALL bool ggml_is_contiguous_1(const struct ggml_tensor * tensor); // contiguous for dims >= 1
756
- GGML_API GGML_CALL bool ggml_is_contiguous_2(const struct ggml_tensor * tensor); // contiguous for dims >= 2
688
+ GGML_API bool ggml_is_contiguous (const struct ggml_tensor * tensor);
689
+ GGML_API bool ggml_is_contiguous_0(const struct ggml_tensor * tensor); // same as ggml_is_contiguous()
690
+ GGML_API bool ggml_is_contiguous_1(const struct ggml_tensor * tensor); // contiguous for dims >= 1
691
+ GGML_API bool ggml_is_contiguous_2(const struct ggml_tensor * tensor); // contiguous for dims >= 2
757
692
 
758
693
  GGML_API bool ggml_are_same_shape (const struct ggml_tensor * t0, const struct ggml_tensor * t1);
759
694
  GGML_API bool ggml_are_same_stride(const struct ggml_tensor * t0, const struct ggml_tensor * t1);
@@ -767,12 +702,12 @@ extern "C" {
767
702
 
768
703
  // main
769
704
 
770
- GGML_API struct ggml_context * ggml_init(struct ggml_init_params params);
771
- GGML_API void ggml_free(struct ggml_context * ctx);
705
+ GGML_API struct ggml_context * ggml_init (struct ggml_init_params params);
706
+ GGML_API void ggml_reset(struct ggml_context * ctx);
707
+ GGML_API void ggml_free (struct ggml_context * ctx);
772
708
 
773
709
  GGML_API size_t ggml_used_mem(const struct ggml_context * ctx);
774
710
 
775
- GGML_API size_t ggml_set_scratch (struct ggml_context * ctx, struct ggml_scratch scratch);
776
711
  GGML_API bool ggml_get_no_alloc(struct ggml_context * ctx);
777
712
  GGML_API void ggml_set_no_alloc(struct ggml_context * ctx, bool no_alloc);
778
713
 
@@ -812,8 +747,7 @@ extern "C" {
812
747
  int64_t ne2,
813
748
  int64_t ne3);
814
749
 
815
- GGML_API struct ggml_tensor * ggml_new_i32(struct ggml_context * ctx, int32_t value);
816
- GGML_API struct ggml_tensor * ggml_new_f32(struct ggml_context * ctx, float value);
750
+ GGML_API void * ggml_new_buffer(struct ggml_context * ctx, size_t nbytes);
817
751
 
818
752
  GGML_API struct ggml_tensor * ggml_dup_tensor (struct ggml_context * ctx, const struct ggml_tensor * src);
819
753
  GGML_API struct ggml_tensor * ggml_view_tensor(struct ggml_context * ctx, struct ggml_tensor * src);
@@ -823,35 +757,25 @@ extern "C" {
823
757
  GGML_API struct ggml_tensor * ggml_get_next_tensor (const struct ggml_context * ctx, struct ggml_tensor * tensor);
824
758
  GGML_API struct ggml_tensor * ggml_get_tensor(struct ggml_context * ctx, const char * name);
825
759
 
826
- GGML_API struct ggml_tensor * ggml_set_zero(struct ggml_tensor * tensor);
827
- GGML_API struct ggml_tensor * ggml_set_i32 (struct ggml_tensor * tensor, int32_t value);
828
- GGML_API struct ggml_tensor * ggml_set_f32 (struct ggml_tensor * tensor, float value);
829
-
830
760
  // Converts a flat index into coordinates
831
- GGML_API void ggml_unravel_index(const struct ggml_tensor * tensor, int64_t i, int64_t * i0, int64_t * i1, int64_t * i2, int64_t * i3);
761
+ GGML_API void ggml_unravel_index(const struct ggml_tensor * tensor, int64_t i, int64_t * i0, int64_t * i1, int64_t * i2, int64_t * i3);
832
762
 
833
- GGML_API int32_t ggml_get_i32_1d(const struct ggml_tensor * tensor, int i);
834
- GGML_API void ggml_set_i32_1d(const struct ggml_tensor * tensor, int i, int32_t value);
835
-
836
- GGML_API int32_t ggml_get_i32_nd(const struct ggml_tensor * tensor, int i0, int i1, int i2, int i3);
837
- GGML_API void ggml_set_i32_nd(const struct ggml_tensor * tensor, int i0, int i1, int i2, int i3, int32_t value);
838
-
839
- GGML_API float ggml_get_f32_1d(const struct ggml_tensor * tensor, int i);
840
- GGML_API void ggml_set_f32_1d(const struct ggml_tensor * tensor, int i, float value);
841
-
842
- GGML_API float ggml_get_f32_nd(const struct ggml_tensor * tensor, int i0, int i1, int i2, int i3);
843
- GGML_API void ggml_set_f32_nd(const struct ggml_tensor * tensor, int i0, int i1, int i2, int i3, float value);
763
+ GGML_API enum ggml_unary_op ggml_get_unary_op(const struct ggml_tensor * tensor);
844
764
 
845
765
  GGML_API void * ggml_get_data (const struct ggml_tensor * tensor);
846
766
  GGML_API float * ggml_get_data_f32(const struct ggml_tensor * tensor);
847
767
 
848
- GGML_API GGML_CALL enum ggml_unary_op ggml_get_unary_op(const struct ggml_tensor * tensor);
849
-
850
768
  GGML_API const char * ggml_get_name (const struct ggml_tensor * tensor);
851
769
  GGML_API struct ggml_tensor * ggml_set_name ( struct ggml_tensor * tensor, const char * name);
852
770
  GGML_ATTRIBUTE_FORMAT(2, 3)
853
771
  GGML_API struct ggml_tensor * ggml_format_name( struct ggml_tensor * tensor, const char * fmt, ...);
854
772
 
773
+ // Tensor flags
774
+ GGML_API void ggml_set_input(struct ggml_tensor * tensor);
775
+ GGML_API void ggml_set_output(struct ggml_tensor * tensor);
776
+ GGML_API void ggml_set_param(struct ggml_context * ctx, struct ggml_tensor * tensor);
777
+ GGML_API void ggml_set_loss(struct ggml_tensor * tensor);
778
+
855
779
  //
856
780
  // operations on tensors with backpropagation
857
781
  //
@@ -966,6 +890,22 @@ extern "C" {
966
890
  struct ggml_context * ctx,
967
891
  struct ggml_tensor * a);
968
892
 
893
+ GGML_API struct ggml_tensor * ggml_sin(
894
+ struct ggml_context * ctx,
895
+ struct ggml_tensor * a);
896
+
897
+ GGML_API struct ggml_tensor * ggml_sin_inplace(
898
+ struct ggml_context * ctx,
899
+ struct ggml_tensor * a);
900
+
901
+ GGML_API struct ggml_tensor * ggml_cos(
902
+ struct ggml_context * ctx,
903
+ struct ggml_tensor * a);
904
+
905
+ GGML_API struct ggml_tensor * ggml_cos_inplace(
906
+ struct ggml_context * ctx,
907
+ struct ggml_tensor * a);
908
+
969
909
  // return scalar
970
910
  GGML_API struct ggml_tensor * ggml_sum(
971
911
  struct ggml_context * ctx,
@@ -986,6 +926,12 @@ extern "C" {
986
926
  struct ggml_context * ctx,
987
927
  struct ggml_tensor * a);
988
928
 
929
+ // count number of equal elements in a and b
930
+ GGML_API struct ggml_tensor * ggml_count_equal(
931
+ struct ggml_context * ctx,
932
+ struct ggml_tensor * a,
933
+ struct ggml_tensor * b);
934
+
989
935
  // if a is the same shape as b, and a is not parameter, return a
990
936
  // otherwise, return a new tensor: repeat(a) to fit in b
991
937
  GGML_API struct ggml_tensor * ggml_repeat(
@@ -1116,6 +1062,14 @@ extern "C" {
1116
1062
  struct ggml_context * ctx,
1117
1063
  struct ggml_tensor * a);
1118
1064
 
1065
+ GGML_API struct ggml_tensor * ggml_exp(
1066
+ struct ggml_context * ctx,
1067
+ struct ggml_tensor * a);
1068
+
1069
+ GGML_API struct ggml_tensor * ggml_exp_inplace(
1070
+ struct ggml_context * ctx,
1071
+ struct ggml_tensor * a);
1072
+
1119
1073
  // normalize along rows
1120
1074
  GGML_API struct ggml_tensor * ggml_norm(
1121
1075
  struct ggml_context * ctx,
@@ -1139,16 +1093,17 @@ extern "C" {
1139
1093
 
1140
1094
  // group normalize along ne0*ne1*n_groups
1141
1095
  // used in stable-diffusion
1142
- // TODO: eps is hardcoded to 1e-6 for now
1143
1096
  GGML_API struct ggml_tensor * ggml_group_norm(
1144
1097
  struct ggml_context * ctx,
1145
1098
  struct ggml_tensor * a,
1146
- int n_groups);
1099
+ int n_groups,
1100
+ float eps);
1147
1101
 
1148
1102
  GGML_API struct ggml_tensor * ggml_group_norm_inplace(
1149
1103
  struct ggml_context * ctx,
1150
1104
  struct ggml_tensor * a,
1151
- int n_groups);
1105
+ int n_groups,
1106
+ float eps);
1152
1107
 
1153
1108
  // a - x
1154
1109
  // b - dy
@@ -1210,7 +1165,7 @@ extern "C" {
1210
1165
  size_t nb1,
1211
1166
  size_t nb2,
1212
1167
  size_t nb3,
1213
- size_t offset);
1168
+ size_t offset); // in bytes
1214
1169
 
1215
1170
  // b -> view(a,offset,nb1,nb2,3), return view(a)
1216
1171
  GGML_API struct ggml_tensor * ggml_set_inplace(
@@ -1220,19 +1175,19 @@ extern "C" {
1220
1175
  size_t nb1,
1221
1176
  size_t nb2,
1222
1177
  size_t nb3,
1223
- size_t offset);
1178
+ size_t offset); // in bytes
1224
1179
 
1225
1180
  GGML_API struct ggml_tensor * ggml_set_1d(
1226
1181
  struct ggml_context * ctx,
1227
1182
  struct ggml_tensor * a,
1228
1183
  struct ggml_tensor * b,
1229
- size_t offset);
1184
+ size_t offset); // in bytes
1230
1185
 
1231
1186
  GGML_API struct ggml_tensor * ggml_set_1d_inplace(
1232
1187
  struct ggml_context * ctx,
1233
1188
  struct ggml_tensor * a,
1234
1189
  struct ggml_tensor * b,
1235
- size_t offset);
1190
+ size_t offset); // in bytes
1236
1191
 
1237
1192
  // b -> view(a,offset,nb1,nb2,3), return modified a
1238
1193
  GGML_API struct ggml_tensor * ggml_set_2d(
@@ -1240,7 +1195,7 @@ extern "C" {
1240
1195
  struct ggml_tensor * a,
1241
1196
  struct ggml_tensor * b,
1242
1197
  size_t nb1,
1243
- size_t offset);
1198
+ size_t offset); // in bytes
1244
1199
 
1245
1200
  // b -> view(a,offset,nb1,nb2,3), return view(a)
1246
1201
  GGML_API struct ggml_tensor * ggml_set_2d_inplace(
@@ -1248,7 +1203,7 @@ extern "C" {
1248
1203
  struct ggml_tensor * a,
1249
1204
  struct ggml_tensor * b,
1250
1205
  size_t nb1,
1251
- size_t offset);
1206
+ size_t offset); // in bytes
1252
1207
 
1253
1208
  // a -> b, return view(b)
1254
1209
  GGML_API struct ggml_tensor * ggml_cpy(
@@ -1383,14 +1338,14 @@ extern "C" {
1383
1338
  // supports 3D: a->ne[2] == b->ne[1]
1384
1339
  GGML_API struct ggml_tensor * ggml_get_rows(
1385
1340
  struct ggml_context * ctx,
1386
- struct ggml_tensor * a,
1387
- struct ggml_tensor * b);
1341
+ struct ggml_tensor * a, // data
1342
+ struct ggml_tensor * b); // row indices
1388
1343
 
1389
1344
  GGML_API struct ggml_tensor * ggml_get_rows_back(
1390
1345
  struct ggml_context * ctx,
1391
- struct ggml_tensor * a,
1392
- struct ggml_tensor * b,
1393
- struct ggml_tensor * c);
1346
+ struct ggml_tensor * a, // gradients of ggml_get_rows result
1347
+ struct ggml_tensor * b, // row indices
1348
+ struct ggml_tensor * c); // data for ggml_get_rows, only used for its shape
1394
1349
 
1395
1350
  GGML_API struct ggml_tensor * ggml_diag(
1396
1351
  struct ggml_context * ctx,
@@ -1451,11 +1406,10 @@ extern "C" {
1451
1406
  struct ggml_tensor * b);
1452
1407
 
1453
1408
  // rotary position embedding
1454
- // if mode & 1 == 1, skip n_past elements (NOT SUPPORTED)
1455
- // if mode & 2 == 1, GPT-NeoX style
1409
+ // if (mode & 1) - skip n_past elements (NOT SUPPORTED)
1410
+ // if (mode & GGML_ROPE_TYPE_NEOX) - GPT-NeoX style
1456
1411
  //
1457
1412
  // b is an int32 vector with size a->ne[2], it contains the positions
1458
- // c is freq factors (e.g. phi3-128k), (optional)
1459
1413
  GGML_API struct ggml_tensor * ggml_rope(
1460
1414
  struct ggml_context * ctx,
1461
1415
  struct ggml_tensor * a,
@@ -1472,6 +1426,7 @@ extern "C" {
1472
1426
  int mode);
1473
1427
 
1474
1428
  // custom RoPE
1429
+ // c is freq factors (e.g. phi3-128k), (optional)
1475
1430
  GGML_API struct ggml_tensor * ggml_rope_ext(
1476
1431
  struct ggml_context * ctx,
1477
1432
  struct ggml_tensor * a,
@@ -1534,16 +1489,16 @@ extern "C" {
1534
1489
  "use ggml_rope_ext_inplace instead");
1535
1490
 
1536
1491
  // compute correction dims for YaRN RoPE scaling
1537
- GGML_CALL void ggml_rope_yarn_corr_dims(
1492
+ GGML_API void ggml_rope_yarn_corr_dims(
1538
1493
  int n_dims, int n_ctx_orig, float freq_base, float beta_fast, float beta_slow, float dims[2]);
1539
1494
 
1540
1495
  // rotary position embedding backward, i.e compute dx from dy
1541
1496
  // a - dy
1542
1497
  GGML_API struct ggml_tensor * ggml_rope_back(
1543
1498
  struct ggml_context * ctx,
1544
- struct ggml_tensor * a,
1545
- struct ggml_tensor * b,
1546
- struct ggml_tensor * c,
1499
+ struct ggml_tensor * a, // gradients of ggml_rope result
1500
+ struct ggml_tensor * b, // positions
1501
+ struct ggml_tensor * c, // freq factors
1547
1502
  int n_dims,
1548
1503
  int mode,
1549
1504
  int n_ctx_orig,
@@ -1562,34 +1517,49 @@ extern "C" {
1562
1517
  float min,
1563
1518
  float max);
1564
1519
 
1520
+ // im2col
1521
+ // converts data into a format that effectively results in a convolution when combined with matrix multiplication
1565
1522
  GGML_API struct ggml_tensor * ggml_im2col(
1566
1523
  struct ggml_context * ctx,
1567
- struct ggml_tensor * a,
1568
- struct ggml_tensor * b,
1569
- int s0,
1570
- int s1,
1571
- int p0,
1572
- int p1,
1573
- int d0,
1574
- int d1,
1575
- bool is_2D,
1576
- enum ggml_type dst_type);
1524
+ struct ggml_tensor * a, // convolution kernel
1525
+ struct ggml_tensor * b, // data
1526
+ int s0, // stride dimension 0
1527
+ int s1, // stride dimension 1
1528
+ int p0, // padding dimension 0
1529
+ int p1, // padding dimension 1
1530
+ int d0, // dilation dimension 0
1531
+ int d1, // dilation dimension 1
1532
+ bool is_2D,
1533
+ enum ggml_type dst_type);
1534
+
1535
+ GGML_API struct ggml_tensor * ggml_im2col_back(
1536
+ struct ggml_context * ctx,
1537
+ struct ggml_tensor * a, // convolution kernel
1538
+ struct ggml_tensor * b, // gradient of im2col output
1539
+ int64_t * ne, // shape of im2col input
1540
+ int s0, // stride dimension 0
1541
+ int s1, // stride dimension 1
1542
+ int p0, // padding dimension 0
1543
+ int p1, // padding dimension 1
1544
+ int d0, // dilation dimension 0
1545
+ int d1, // dilation dimension 1
1546
+ bool is_2D);
1577
1547
 
1578
1548
  GGML_API struct ggml_tensor * ggml_conv_depthwise_2d(
1579
1549
  struct ggml_context * ctx,
1580
- struct ggml_tensor * a,
1581
- struct ggml_tensor * b,
1582
- int s0,
1583
- int s1,
1584
- int p0,
1585
- int p1,
1586
- int d0,
1587
- int d1);
1550
+ struct ggml_tensor * a, // convolution kernel
1551
+ struct ggml_tensor * b, // data
1552
+ int s0, // stride dimension 0
1553
+ int s1, // stride dimension 1
1554
+ int p0, // padding dimension 0
1555
+ int p1, // padding dimension 1
1556
+ int d0, // dilation dimension 0
1557
+ int d1); // dilation dimension 1
1588
1558
 
1589
1559
  GGML_API struct ggml_tensor * ggml_conv_1d(
1590
1560
  struct ggml_context * ctx,
1591
- struct ggml_tensor * a,
1592
- struct ggml_tensor * b,
1561
+ struct ggml_tensor * a, // convolution kernel
1562
+ struct ggml_tensor * b, // data
1593
1563
  int s0, // stride
1594
1564
  int p0, // padding
1595
1565
  int d0); // dilation
@@ -1598,29 +1568,29 @@ extern "C" {
1598
1568
  // alias for ggml_conv_1d(a, b, s, a->ne[0]/2, d)
1599
1569
  GGML_API struct ggml_tensor* ggml_conv_1d_ph(
1600
1570
  struct ggml_context * ctx,
1601
- struct ggml_tensor * a,
1602
- struct ggml_tensor * b,
1603
- int s,
1604
- int d);
1571
+ struct ggml_tensor * a, // convolution kernel
1572
+ struct ggml_tensor * b, // data
1573
+ int s, // stride
1574
+ int d); // dilation
1605
1575
 
1606
1576
  GGML_API struct ggml_tensor * ggml_conv_transpose_1d(
1607
1577
  struct ggml_context * ctx,
1608
- struct ggml_tensor * a,
1609
- struct ggml_tensor * b,
1610
- int s0,
1611
- int p0,
1612
- int d0);
1578
+ struct ggml_tensor * a, // convolution kernel
1579
+ struct ggml_tensor * b, // data
1580
+ int s0, // stride
1581
+ int p0, // padding
1582
+ int d0); // dilation
1613
1583
 
1614
1584
  GGML_API struct ggml_tensor * ggml_conv_2d(
1615
1585
  struct ggml_context * ctx,
1616
- struct ggml_tensor * a,
1617
- struct ggml_tensor * b,
1618
- int s0,
1619
- int s1,
1620
- int p0,
1621
- int p1,
1622
- int d0,
1623
- int d1);
1586
+ struct ggml_tensor * a, // convolution kernel
1587
+ struct ggml_tensor * b, // data
1588
+ int s0, // stride dimension 0
1589
+ int s1, // stride dimension 1
1590
+ int p0, // padding dimension 0
1591
+ int p1, // padding dimension 1
1592
+ int d0, // dilation dimension 0
1593
+ int d1); // dilation dimension 1
1624
1594
 
1625
1595
 
1626
1596
  // kernel size is a->ne[0] x a->ne[1]
@@ -1682,6 +1652,18 @@ extern "C" {
1682
1652
  float p0,
1683
1653
  float p1);
1684
1654
 
1655
+ GGML_API struct ggml_tensor * ggml_pool_2d_back(
1656
+ struct ggml_context * ctx,
1657
+ struct ggml_tensor * a,
1658
+ struct ggml_tensor * af, // "a"/input used in forward pass
1659
+ enum ggml_op_pool op,
1660
+ int k0,
1661
+ int k1,
1662
+ int s0,
1663
+ int s1,
1664
+ float p0,
1665
+ float p1);
1666
+
1685
1667
  // nearest interpolate
1686
1668
  // multiplies ne0 and ne1 by scale factor
1687
1669
  // used in stable-diffusion
@@ -1756,12 +1738,16 @@ extern "C" {
1756
1738
  struct ggml_tensor * v,
1757
1739
  struct ggml_tensor * mask,
1758
1740
  float scale,
1759
- float max_bias);
1741
+ float max_bias,
1742
+ float logit_softcap);
1760
1743
 
1761
1744
  GGML_API void ggml_flash_attn_ext_set_prec(
1762
1745
  struct ggml_tensor * a,
1763
1746
  enum ggml_prec prec);
1764
1747
 
1748
+ GGML_API enum ggml_prec ggml_flash_attn_ext_get_prec(
1749
+ const struct ggml_tensor * a);
1750
+
1765
1751
  // TODO: needs to be adapted to ggml_flash_attn_ext
1766
1752
  GGML_API struct ggml_tensor * ggml_flash_attn_back(
1767
1753
  struct ggml_context * ctx,
@@ -1773,10 +1759,8 @@ extern "C" {
1773
1759
 
1774
1760
  GGML_API struct ggml_tensor * ggml_ssm_conv(
1775
1761
  struct ggml_context * ctx,
1776
- struct ggml_tensor * s,
1777
- struct ggml_tensor * x,
1778
- struct ggml_tensor * c,
1779
- struct ggml_tensor * sq);
1762
+ struct ggml_tensor * sx,
1763
+ struct ggml_tensor * c);
1780
1764
 
1781
1765
  GGML_API struct ggml_tensor * ggml_ssm_scan(
1782
1766
  struct ggml_context * ctx,
@@ -1785,8 +1769,7 @@ extern "C" {
1785
1769
  struct ggml_tensor * dt,
1786
1770
  struct ggml_tensor * A,
1787
1771
  struct ggml_tensor * B,
1788
- struct ggml_tensor * C,
1789
- struct ggml_tensor * sq);
1772
+ struct ggml_tensor * C);
1790
1773
 
1791
1774
  // partition into non-overlapping windows with padding if needed
1792
1775
  // example:
@@ -1838,6 +1821,15 @@ extern "C" {
1838
1821
  struct ggml_tensor * pw,
1839
1822
  struct ggml_tensor * ph);
1840
1823
 
1824
+ GGML_API struct ggml_tensor * ggml_rwkv_wkv6(
1825
+ struct ggml_context * ctx,
1826
+ struct ggml_tensor * k,
1827
+ struct ggml_tensor * v,
1828
+ struct ggml_tensor * r,
1829
+ struct ggml_tensor * tf,
1830
+ struct ggml_tensor * td,
1831
+ struct ggml_tensor * state);
1832
+
1841
1833
  // custom operators
1842
1834
 
1843
1835
  typedef void (*ggml_unary_op_f32_t) (const int, float *, const float *);
@@ -1921,7 +1913,8 @@ extern "C" {
1921
1913
  typedef void (*ggml_custom2_op_t)(struct ggml_tensor * dst , const struct ggml_tensor * a, const struct ggml_tensor * b, int ith, int nth, void * userdata);
1922
1914
  typedef void (*ggml_custom3_op_t)(struct ggml_tensor * dst , const struct ggml_tensor * a, const struct ggml_tensor * b, const struct ggml_tensor * c, int ith, int nth, void * userdata);
1923
1915
 
1924
- #define GGML_N_TASKS_MAX -1
1916
+ #define GGML_N_TASKS_MAX (-1)
1917
+ // n_tasks == GGML_N_TASKS_MAX means to use max number of tasks
1925
1918
 
1926
1919
  GGML_API struct ggml_tensor * ggml_map_custom1(
1927
1920
  struct ggml_context * ctx,
@@ -1974,49 +1967,59 @@ extern "C" {
1974
1967
  // loss function
1975
1968
 
1976
1969
  GGML_API struct ggml_tensor * ggml_cross_entropy_loss(
1977
- struct ggml_context * ctx,
1978
- struct ggml_tensor * a,
1979
- struct ggml_tensor * b);
1970
+ struct ggml_context * ctx,
1971
+ struct ggml_tensor * a, // logits
1972
+ struct ggml_tensor * b); // labels
1980
1973
 
1981
1974
  GGML_API struct ggml_tensor * ggml_cross_entropy_loss_back(
1982
- struct ggml_context * ctx,
1983
- struct ggml_tensor * a,
1984
- struct ggml_tensor * b,
1985
- struct ggml_tensor * c);
1975
+ struct ggml_context * ctx,
1976
+ struct ggml_tensor * a, // logits
1977
+ struct ggml_tensor * b, // labels
1978
+ struct ggml_tensor * c); // gradients of cross_entropy_loss result
1979
+
1980
+ // AdamW optimizer step
1981
+ // Paper: https://arxiv.org/pdf/1711.05101v3.pdf
1982
+ // PyTorch: https://pytorch.org/docs/stable/generated/torch.optim.AdamW.html
1983
+ GGML_API struct ggml_tensor * ggml_opt_step_adamw(
1984
+ struct ggml_context * ctx,
1985
+ struct ggml_tensor * a,
1986
+ struct ggml_tensor * grad,
1987
+ struct ggml_tensor * m,
1988
+ struct ggml_tensor * v,
1989
+ struct ggml_tensor * adamw_params); // parameters such a the learning rate
1986
1990
 
1987
1991
  //
1988
1992
  // automatic differentiation
1989
1993
  //
1990
1994
 
1991
- GGML_API void ggml_set_param(
1992
- struct ggml_context * ctx,
1993
- struct ggml_tensor * tensor);
1995
+ GGML_API void ggml_build_forward_expand(struct ggml_cgraph * cgraph, struct ggml_tensor * tensor);
1996
+ GGML_API void ggml_build_backward_expand(
1997
+ struct ggml_context * ctx_static, // context for static gradients (loss + gradient accumulation)
1998
+ struct ggml_context * ctx_compute, // context for gradient computation
1999
+ struct ggml_cgraph * cgraph,
2000
+ bool accumulate); // whether or not gradients should be accumulated, requires static allocation of tensors in ctx_static
1994
2001
 
2002
+ // graph allocation in a context
2003
+ GGML_API struct ggml_cgraph * ggml_new_graph (struct ggml_context * ctx); // size = GGML_DEFAULT_GRAPH_SIZE, grads = false
2004
+ GGML_API struct ggml_cgraph * ggml_new_graph_custom(struct ggml_context * ctx, size_t size, bool grads);
2005
+ GGML_API struct ggml_cgraph * ggml_graph_dup (struct ggml_context * ctx, struct ggml_cgraph * cgraph);
2006
+ GGML_API void ggml_graph_cpy (struct ggml_cgraph * src, struct ggml_cgraph * dst);
2007
+ GGML_API void ggml_graph_reset (struct ggml_cgraph * cgraph); // set regular grads + optimizer momenta to 0, set loss grad to 1
2008
+ GGML_API void ggml_graph_clear (struct ggml_cgraph * cgraph);
1995
2009
 
1996
- GGML_API void ggml_build_forward_expand (struct ggml_cgraph * cgraph, struct ggml_tensor * tensor);
1997
- GGML_API void ggml_build_backward_expand(struct ggml_context * ctx, struct ggml_cgraph * gf, struct ggml_cgraph * gb, bool keep);
2010
+ GGML_API int ggml_graph_size (struct ggml_cgraph * cgraph);
2011
+ GGML_API struct ggml_tensor * ggml_graph_node (struct ggml_cgraph * cgraph, int i); // if i < 0, returns nodes[n_nodes + i]
2012
+ GGML_API struct ggml_tensor ** ggml_graph_nodes (struct ggml_cgraph * cgraph);
2013
+ GGML_API int ggml_graph_n_nodes(struct ggml_cgraph * cgraph);
1998
2014
 
1999
- // graph allocation in a context
2000
- GGML_API struct ggml_cgraph * ggml_new_graph (struct ggml_context * ctx); // size = GGML_DEFAULT_GRAPH_SIZE, grads = false
2001
- GGML_API struct ggml_cgraph * ggml_new_graph_custom (struct ggml_context * ctx, size_t size, bool grads);
2002
- GGML_API struct ggml_cgraph * ggml_graph_dup (struct ggml_context * ctx, struct ggml_cgraph * cgraph);
2003
- GGML_API struct ggml_cgraph ggml_graph_view (struct ggml_cgraph * cgraph, int i0, int i1);
2004
- GGML_API void ggml_graph_cpy (struct ggml_cgraph * src, struct ggml_cgraph * dst);
2005
- GGML_API void ggml_graph_reset (struct ggml_cgraph * cgraph); // zero grads
2006
- GGML_API void ggml_graph_clear (struct ggml_cgraph * cgraph);
2015
+ GGML_API void ggml_graph_add_node(struct ggml_cgraph * cgraph, struct ggml_tensor * tensor);
2007
2016
 
2008
2017
  GGML_API size_t ggml_graph_overhead(void);
2009
2018
  GGML_API size_t ggml_graph_overhead_custom(size_t size, bool grads);
2010
2019
 
2011
- // ggml_graph_plan() has to be called before ggml_graph_compute()
2012
- // when plan.work_size > 0, caller must allocate memory for plan.work_data
2013
- GGML_API struct ggml_cplan ggml_graph_plan (const struct ggml_cgraph * cgraph, int n_threads /*= GGML_DEFAULT_N_THREADS*/);
2014
- GGML_API enum ggml_status ggml_graph_compute( struct ggml_cgraph * cgraph, struct ggml_cplan * cplan);
2015
- // same as ggml_graph_compute() but the work data is allocated as a part of the context
2016
- // note: the drawback of this API is that you must have ensured that the context has enough memory for the work data
2017
- GGML_API enum ggml_status ggml_graph_compute_with_ctx(struct ggml_context * ctx, struct ggml_cgraph * cgraph, int n_threads);
2018
-
2019
- GGML_API struct ggml_tensor * ggml_graph_get_tensor(struct ggml_cgraph * cgraph, const char * name);
2020
+ GGML_API struct ggml_tensor * ggml_graph_get_tensor (const struct ggml_cgraph * cgraph, const char * name);
2021
+ GGML_API struct ggml_tensor * ggml_graph_get_grad (const struct ggml_cgraph * cgraph, const struct ggml_tensor * node);
2022
+ GGML_API struct ggml_tensor * ggml_graph_get_grad_acc(const struct ggml_cgraph * cgraph, const struct ggml_tensor * node);
2020
2023
 
2021
2024
  GGML_API void ggml_graph_export(const struct ggml_cgraph * cgraph, const char * fname);
2022
2025
  GGML_API struct ggml_cgraph * ggml_graph_import(const char * fname, struct ggml_context ** ctx_data, struct ggml_context ** ctx_eval);
@@ -2027,197 +2030,14 @@ extern "C" {
2027
2030
  // dump the graph into a file using the dot format
2028
2031
  GGML_API void ggml_graph_dump_dot(const struct ggml_cgraph * gb, const struct ggml_cgraph * gf, const char * filename);
2029
2032
 
2030
- // build gradient checkpointing backward graph gb for gf using provided checkpoints
2031
- // gb_tmp will contain original backward graph with rewritten backward process nodes,
2032
- // but without the second forward pass nodes.
2033
- GGML_API void ggml_build_backward_gradient_checkpointing(
2034
- struct ggml_context * ctx,
2035
- struct ggml_cgraph * gf,
2036
- struct ggml_cgraph * gb,
2037
- struct ggml_cgraph * gb_tmp,
2038
- struct ggml_tensor * * checkpoints,
2039
- int n_checkpoints);
2040
- //
2041
- // optimization
2042
- //
2043
-
2044
- // optimization methods
2045
- enum ggml_opt_type {
2046
- GGML_OPT_TYPE_ADAM,
2047
- GGML_OPT_TYPE_LBFGS,
2048
- };
2049
-
2050
- // linesearch methods
2051
- enum ggml_linesearch {
2052
- GGML_LINESEARCH_DEFAULT = 1,
2053
-
2054
- GGML_LINESEARCH_BACKTRACKING_ARMIJO = 0,
2055
- GGML_LINESEARCH_BACKTRACKING_WOLFE = 1,
2056
- GGML_LINESEARCH_BACKTRACKING_STRONG_WOLFE = 2,
2057
- };
2058
-
2059
- // optimization return values
2060
- enum ggml_opt_result {
2061
- GGML_OPT_RESULT_OK = 0,
2062
- GGML_OPT_RESULT_DID_NOT_CONVERGE,
2063
- GGML_OPT_RESULT_NO_CONTEXT,
2064
- GGML_OPT_RESULT_INVALID_WOLFE,
2065
- GGML_OPT_RESULT_FAIL,
2066
- GGML_OPT_RESULT_CANCEL,
2067
-
2068
- GGML_LINESEARCH_FAIL = -128,
2069
- GGML_LINESEARCH_MINIMUM_STEP,
2070
- GGML_LINESEARCH_MAXIMUM_STEP,
2071
- GGML_LINESEARCH_MAXIMUM_ITERATIONS,
2072
- GGML_LINESEARCH_INVALID_PARAMETERS,
2073
- };
2074
-
2075
- typedef void (*ggml_opt_callback)(void * data, int accum_step, float * sched, bool * cancel);
2033
+ // TODO these functions were sandwiched in the old optimization interface, is there a better place for them?
2076
2034
  typedef void (*ggml_log_callback)(enum ggml_log_level level, const char * text, void * user_data);
2077
2035
 
2078
- // optimization parameters
2079
- //
2080
- // see ggml.c (ggml_opt_default_params) for default values
2081
- //
2082
- struct ggml_opt_params {
2083
- enum ggml_opt_type type;
2084
-
2085
- size_t graph_size;
2086
-
2087
- int n_threads;
2088
-
2089
- // delta-based convergence test
2090
- //
2091
- // if past == 0 - disabled
2092
- // if past > 0:
2093
- // stop if |f(x) - f(x_past)| < delta * max(1, |f(x)|)
2094
- //
2095
- int past;
2096
- float delta;
2097
-
2098
- // maximum number of iterations without improvement
2099
- //
2100
- // if 0 - disabled
2101
- // if > 0:
2102
- // assume convergence if no cost improvement in this number of iterations
2103
- //
2104
- int max_no_improvement;
2105
-
2106
- bool print_forward_graph;
2107
- bool print_backward_graph;
2108
-
2109
- int n_gradient_accumulation;
2110
-
2111
- // ADAM parameters
2112
- struct {
2113
- int n_iter;
2114
-
2115
- float sched; // schedule multiplier (fixed, decay or warmup)
2116
- float decay; // weight decay for AdamW, use 0.0f to disable
2117
- int decay_min_ndim; // minimum number of tensor dimension to apply weight decay
2118
- float alpha; // learning rate
2119
- float beta1;
2120
- float beta2;
2121
- float eps; // epsilon for numerical stability
2122
- float eps_f; // epsilon for convergence test
2123
- float eps_g; // epsilon for convergence test
2124
- float gclip; // gradient clipping
2125
- } adam;
2126
-
2127
- // LBFGS parameters
2128
- struct {
2129
- int m; // number of corrections to approximate the inv. Hessian
2130
- int n_iter;
2131
- int max_linesearch;
2132
-
2133
- float eps; // convergence tolerance
2134
- float ftol; // line search tolerance
2135
- float wolfe;
2136
- float min_step;
2137
- float max_step;
2138
-
2139
- enum ggml_linesearch linesearch;
2140
- } lbfgs;
2141
- };
2142
-
2143
- struct ggml_opt_context {
2144
- struct ggml_context * ctx;
2145
- struct ggml_opt_params params;
2146
-
2147
- int iter;
2148
- int64_t nx; // number of parameter elements
2149
-
2150
- bool just_initialized;
2151
-
2152
- float loss_before;
2153
- float loss_after;
2154
-
2155
- struct {
2156
- struct ggml_tensor * g; // current gradient
2157
- struct ggml_tensor * m; // first moment
2158
- struct ggml_tensor * v; // second moment
2159
- struct ggml_tensor * pf; // past function values
2160
- float fx_best;
2161
- float fx_prev;
2162
- int n_no_improvement;
2163
- } adam;
2164
-
2165
- struct {
2166
- struct ggml_tensor * x; // current parameters
2167
- struct ggml_tensor * xp; // previous parameters
2168
- struct ggml_tensor * g; // current gradient
2169
- struct ggml_tensor * gp; // previous gradient
2170
- struct ggml_tensor * d; // search direction
2171
- struct ggml_tensor * pf; // past function values
2172
- struct ggml_tensor * lmal; // the L-BFGS memory alpha
2173
- struct ggml_tensor * lmys; // the L-BFGS memory ys
2174
- struct ggml_tensor * lms; // the L-BFGS memory s
2175
- struct ggml_tensor * lmy; // the L-BFGS memory y
2176
- float fx_best;
2177
- float step;
2178
- int j;
2179
- int k;
2180
- int end;
2181
- int n_no_improvement;
2182
- } lbfgs;
2183
- };
2184
-
2185
- GGML_API struct ggml_opt_params ggml_opt_default_params(enum ggml_opt_type type);
2186
-
2187
- // optimize the function defined by the tensor f
2188
- GGML_API enum ggml_opt_result ggml_opt(
2189
- struct ggml_context * ctx,
2190
- struct ggml_opt_params params,
2191
- struct ggml_tensor * f);
2192
-
2193
- // initialize optimizer context
2194
- GGML_API void ggml_opt_init(
2195
- struct ggml_context * ctx,
2196
- struct ggml_opt_context * opt,
2197
- struct ggml_opt_params params,
2198
- int64_t nx);
2199
-
2200
- // continue optimizing the function defined by the tensor f
2201
- GGML_API enum ggml_opt_result ggml_opt_resume(
2202
- struct ggml_context * ctx,
2203
- struct ggml_opt_context * opt,
2204
- struct ggml_tensor * f);
2205
-
2206
- // continue optimizing the function defined by the tensor f
2207
- GGML_API enum ggml_opt_result ggml_opt_resume_g(
2208
- struct ggml_context * ctx,
2209
- struct ggml_opt_context * opt,
2210
- struct ggml_tensor * f,
2211
- struct ggml_cgraph * gf,
2212
- struct ggml_cgraph * gb,
2213
- ggml_opt_callback callback,
2214
- void * callback_data);
2036
+ // Set callback for all future logging events.
2037
+ // If this is not called, or NULL is supplied, everything is output on stderr.
2038
+ GGML_API void ggml_log_set(ggml_log_callback log_callback, void * user_data);
2215
2039
 
2216
- //
2217
- // tensor flags
2218
- //
2219
- GGML_API void ggml_set_input(struct ggml_tensor * tensor);
2220
- GGML_API void ggml_set_output(struct ggml_tensor * tensor);
2040
+ GGML_API struct ggml_tensor * ggml_set_zero(struct ggml_tensor * tensor);
2221
2041
 
2222
2042
  //
2223
2043
  // quantization
@@ -2374,43 +2194,6 @@ extern "C" {
2374
2194
  GGML_API size_t gguf_get_meta_size(const struct gguf_context * ctx);
2375
2195
  GGML_API void gguf_get_meta_data(const struct gguf_context * ctx, void * data);
2376
2196
 
2377
- //
2378
- // system info
2379
- //
2380
-
2381
- GGML_API int ggml_cpu_has_avx (void);
2382
- GGML_API int ggml_cpu_has_avx_vnni (void);
2383
- GGML_API int ggml_cpu_has_avx2 (void);
2384
- GGML_API int ggml_cpu_has_avx512 (void);
2385
- GGML_API int ggml_cpu_has_avx512_vbmi(void);
2386
- GGML_API int ggml_cpu_has_avx512_vnni(void);
2387
- GGML_API int ggml_cpu_has_avx512_bf16(void);
2388
- GGML_API int ggml_cpu_has_fma (void);
2389
- GGML_API int ggml_cpu_has_neon (void);
2390
- GGML_API int ggml_cpu_has_sve (void);
2391
- GGML_API int ggml_cpu_has_arm_fma (void);
2392
- GGML_API int ggml_cpu_has_metal (void);
2393
- GGML_API int ggml_cpu_has_f16c (void);
2394
- GGML_API int ggml_cpu_has_fp16_va (void);
2395
- GGML_API int ggml_cpu_has_wasm_simd (void);
2396
- GGML_API int ggml_cpu_has_blas (void);
2397
- GGML_API int ggml_cpu_has_cuda (void);
2398
- GGML_API int ggml_cpu_has_vulkan (void);
2399
- GGML_API int ggml_cpu_has_kompute (void);
2400
- GGML_API int ggml_cpu_has_gpublas (void);
2401
- GGML_API int ggml_cpu_has_sse3 (void);
2402
- GGML_API int ggml_cpu_has_ssse3 (void);
2403
- GGML_API int ggml_cpu_has_sycl (void);
2404
- GGML_API int ggml_cpu_has_rpc (void);
2405
- GGML_API int ggml_cpu_has_vsx (void);
2406
- GGML_API int ggml_cpu_has_matmul_int8(void);
2407
- GGML_API int ggml_cpu_has_cann (void);
2408
- GGML_API int ggml_cpu_has_llamafile (void);
2409
-
2410
- //
2411
- // Internal types and functions exposed for tests and benchmarks
2412
- //
2413
-
2414
2197
  #ifdef __cplusplus
2415
2198
  // restrict not standard in C++
2416
2199
  #define GGML_RESTRICT
@@ -2419,34 +2202,18 @@ extern "C" {
2419
2202
  #endif
2420
2203
  typedef void (*ggml_to_float_t) (const void * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k);
2421
2204
  typedef void (*ggml_from_float_t)(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k);
2422
- typedef void (*ggml_from_float_to_mat_t)
2423
- (const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t nr, int64_t k, int64_t bs);
2424
- typedef void (*ggml_vec_dot_t) (int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT x, size_t bx,
2425
- const void * GGML_RESTRICT y, size_t by, int nrc);
2426
- typedef void (*ggml_gemv_t) (int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT x,
2427
- const void * GGML_RESTRICT y, int nr, int nc);
2428
- typedef void (*ggml_gemm_t) (int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT x,
2429
- const void * GGML_RESTRICT y, int nr, int nc);
2430
-
2431
- typedef struct {
2205
+
2206
+ struct ggml_type_traits {
2432
2207
  const char * type_name;
2433
2208
  int64_t blck_size;
2434
2209
  int64_t blck_size_interleave; // interleave elements in blocks
2435
2210
  size_t type_size;
2436
2211
  bool is_quantized;
2437
2212
  ggml_to_float_t to_float;
2438
- ggml_from_float_t from_float;
2439
2213
  ggml_from_float_t from_float_ref;
2440
- ggml_from_float_to_mat_t from_float_to_mat;
2441
- ggml_vec_dot_t vec_dot;
2442
- enum ggml_type vec_dot_type;
2443
- int64_t nrows; // number of rows to process simultaneously
2444
- int64_t ncols; // number of columns to process simultaneously
2445
- ggml_gemv_t gemv;
2446
- ggml_gemm_t gemm;
2447
- } ggml_type_traits_t;
2448
-
2449
- GGML_API ggml_type_traits_t ggml_internal_get_type_traits(enum ggml_type type);
2214
+ };
2215
+
2216
+ GGML_API const struct ggml_type_traits * ggml_get_type_traits(enum ggml_type type);
2450
2217
 
2451
2218
  #ifdef __cplusplus
2452
2219
  }