@fugood/llama.node 0.3.2 → 0.3.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (286) hide show
  1. package/CMakeLists.txt +7 -0
  2. package/bin/darwin/arm64/llama-node.node +0 -0
  3. package/bin/darwin/x64/llama-node.node +0 -0
  4. package/bin/linux/arm64/llama-node.node +0 -0
  5. package/bin/linux/x64/llama-node.node +0 -0
  6. package/bin/linux-vulkan/arm64/llama-node.node +0 -0
  7. package/bin/linux-vulkan/x64/llama-node.node +0 -0
  8. package/bin/win32/arm64/llama-node.node +0 -0
  9. package/bin/win32/arm64/node.lib +0 -0
  10. package/bin/win32/x64/llama-node.node +0 -0
  11. package/bin/win32/x64/node.lib +0 -0
  12. package/bin/win32-vulkan/arm64/llama-node.node +0 -0
  13. package/bin/win32-vulkan/arm64/node.lib +0 -0
  14. package/bin/win32-vulkan/x64/llama-node.node +0 -0
  15. package/bin/win32-vulkan/x64/node.lib +0 -0
  16. package/lib/binding.ts +18 -1
  17. package/package.json +1 -1
  18. package/src/DetokenizeWorker.cpp +1 -1
  19. package/src/EmbeddingWorker.cpp +17 -7
  20. package/src/EmbeddingWorker.h +2 -1
  21. package/src/LlamaCompletionWorker.cpp +8 -8
  22. package/src/LlamaCompletionWorker.h +2 -2
  23. package/src/LlamaContext.cpp +89 -27
  24. package/src/LlamaContext.h +2 -0
  25. package/src/TokenizeWorker.cpp +1 -1
  26. package/src/common.hpp +4 -4
  27. package/src/llama.cpp/.github/workflows/build.yml +240 -168
  28. package/src/llama.cpp/.github/workflows/docker.yml +8 -8
  29. package/src/llama.cpp/.github/workflows/python-lint.yml +8 -1
  30. package/src/llama.cpp/.github/workflows/server.yml +21 -14
  31. package/src/llama.cpp/CMakeLists.txt +14 -6
  32. package/src/llama.cpp/Sources/llama/llama.h +4 -0
  33. package/src/llama.cpp/cmake/arm64-apple-clang.cmake +16 -0
  34. package/src/llama.cpp/cmake/common.cmake +33 -0
  35. package/src/llama.cpp/cmake/x64-windows-llvm.cmake +11 -0
  36. package/src/llama.cpp/common/CMakeLists.txt +6 -4
  37. package/src/llama.cpp/common/arg.cpp +986 -770
  38. package/src/llama.cpp/common/arg.h +22 -22
  39. package/src/llama.cpp/common/common.cpp +212 -351
  40. package/src/llama.cpp/common/common.h +204 -117
  41. package/src/llama.cpp/common/json-schema-to-grammar.cpp +1 -1
  42. package/src/llama.cpp/common/log.cpp +50 -50
  43. package/src/llama.cpp/common/log.h +18 -18
  44. package/src/llama.cpp/common/ngram-cache.cpp +36 -36
  45. package/src/llama.cpp/common/ngram-cache.h +19 -19
  46. package/src/llama.cpp/common/sampling.cpp +163 -121
  47. package/src/llama.cpp/common/sampling.h +41 -20
  48. package/src/llama.cpp/common/speculative.cpp +274 -0
  49. package/src/llama.cpp/common/speculative.h +28 -0
  50. package/src/llama.cpp/docs/build.md +134 -161
  51. package/src/llama.cpp/examples/CMakeLists.txt +33 -14
  52. package/src/llama.cpp/examples/batched/CMakeLists.txt +1 -1
  53. package/src/llama.cpp/examples/batched/batched.cpp +19 -18
  54. package/src/llama.cpp/examples/batched-bench/CMakeLists.txt +1 -1
  55. package/src/llama.cpp/examples/batched-bench/batched-bench.cpp +10 -11
  56. package/src/llama.cpp/examples/convert-llama2c-to-ggml/CMakeLists.txt +1 -1
  57. package/src/llama.cpp/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp +1 -1
  58. package/src/llama.cpp/examples/cvector-generator/CMakeLists.txt +1 -1
  59. package/src/llama.cpp/examples/cvector-generator/cvector-generator.cpp +9 -9
  60. package/src/llama.cpp/examples/deprecation-warning/deprecation-warning.cpp +1 -1
  61. package/src/llama.cpp/examples/embedding/CMakeLists.txt +1 -1
  62. package/src/llama.cpp/examples/embedding/embedding.cpp +12 -12
  63. package/src/llama.cpp/examples/eval-callback/CMakeLists.txt +3 -2
  64. package/src/llama.cpp/examples/eval-callback/eval-callback.cpp +8 -8
  65. package/src/llama.cpp/examples/export-lora/CMakeLists.txt +1 -1
  66. package/src/llama.cpp/examples/export-lora/export-lora.cpp +5 -5
  67. package/src/llama.cpp/examples/gbnf-validator/CMakeLists.txt +1 -1
  68. package/src/llama.cpp/examples/gbnf-validator/gbnf-validator.cpp +4 -7
  69. package/src/llama.cpp/examples/gen-docs/CMakeLists.txt +1 -1
  70. package/src/llama.cpp/examples/gen-docs/gen-docs.cpp +7 -7
  71. package/src/llama.cpp/examples/gguf/CMakeLists.txt +1 -1
  72. package/src/llama.cpp/examples/gguf-hash/CMakeLists.txt +8 -1
  73. package/src/llama.cpp/examples/gguf-split/CMakeLists.txt +1 -1
  74. package/src/llama.cpp/examples/gguf-split/gguf-split.cpp +2 -2
  75. package/src/llama.cpp/examples/gritlm/CMakeLists.txt +1 -1
  76. package/src/llama.cpp/examples/gritlm/gritlm.cpp +18 -18
  77. package/src/llama.cpp/examples/imatrix/CMakeLists.txt +1 -1
  78. package/src/llama.cpp/examples/imatrix/imatrix.cpp +31 -13
  79. package/src/llama.cpp/examples/infill/CMakeLists.txt +1 -1
  80. package/src/llama.cpp/examples/infill/infill.cpp +41 -87
  81. package/src/llama.cpp/examples/llama-bench/CMakeLists.txt +1 -1
  82. package/src/llama.cpp/examples/llama-bench/llama-bench.cpp +439 -459
  83. package/src/llama.cpp/examples/llama.android/llama/build.gradle.kts +2 -0
  84. package/src/llama.cpp/examples/llama.android/llama/src/main/cpp/llama-android.cpp +11 -14
  85. package/src/llama.cpp/examples/llava/CMakeLists.txt +10 -3
  86. package/src/llama.cpp/examples/llava/clip.cpp +263 -66
  87. package/src/llama.cpp/examples/llava/clip.h +8 -2
  88. package/src/llama.cpp/examples/llava/llava-cli.cpp +23 -23
  89. package/src/llama.cpp/examples/llava/llava.cpp +83 -22
  90. package/src/llama.cpp/examples/llava/minicpmv-cli.cpp +21 -21
  91. package/src/llama.cpp/examples/llava/qwen2vl-cli.cpp +581 -0
  92. package/src/llama.cpp/examples/lookahead/CMakeLists.txt +1 -1
  93. package/src/llama.cpp/examples/lookahead/lookahead.cpp +26 -26
  94. package/src/llama.cpp/examples/lookup/CMakeLists.txt +4 -4
  95. package/src/llama.cpp/examples/lookup/lookup-create.cpp +7 -7
  96. package/src/llama.cpp/examples/lookup/lookup-merge.cpp +4 -4
  97. package/src/llama.cpp/examples/lookup/lookup-stats.cpp +16 -15
  98. package/src/llama.cpp/examples/lookup/lookup.cpp +30 -30
  99. package/src/llama.cpp/examples/main/CMakeLists.txt +1 -1
  100. package/src/llama.cpp/examples/main/main.cpp +73 -114
  101. package/src/llama.cpp/examples/main-cmake-pkg/CMakeLists.txt +1 -1
  102. package/src/llama.cpp/examples/parallel/CMakeLists.txt +1 -1
  103. package/src/llama.cpp/examples/parallel/parallel.cpp +18 -19
  104. package/src/llama.cpp/examples/passkey/CMakeLists.txt +1 -1
  105. package/src/llama.cpp/examples/passkey/passkey.cpp +14 -14
  106. package/src/llama.cpp/examples/perplexity/CMakeLists.txt +1 -1
  107. package/src/llama.cpp/examples/perplexity/perplexity.cpp +99 -120
  108. package/src/llama.cpp/examples/quantize/CMakeLists.txt +1 -1
  109. package/src/llama.cpp/examples/quantize/quantize.cpp +0 -3
  110. package/src/llama.cpp/examples/quantize-stats/CMakeLists.txt +1 -1
  111. package/src/llama.cpp/examples/quantize-stats/quantize-stats.cpp +10 -9
  112. package/src/llama.cpp/examples/retrieval/CMakeLists.txt +1 -1
  113. package/src/llama.cpp/examples/retrieval/retrieval.cpp +16 -16
  114. package/src/llama.cpp/examples/rpc/rpc-server.cpp +3 -1
  115. package/src/llama.cpp/examples/run/CMakeLists.txt +5 -0
  116. package/src/llama.cpp/examples/run/run.cpp +911 -0
  117. package/src/llama.cpp/examples/save-load-state/CMakeLists.txt +1 -1
  118. package/src/llama.cpp/examples/save-load-state/save-load-state.cpp +38 -21
  119. package/src/llama.cpp/examples/server/CMakeLists.txt +3 -16
  120. package/src/llama.cpp/examples/server/server.cpp +2073 -1339
  121. package/src/llama.cpp/examples/server/tests/requirements.txt +2 -2
  122. package/src/llama.cpp/examples/server/utils.hpp +354 -277
  123. package/src/llama.cpp/examples/simple/CMakeLists.txt +2 -2
  124. package/src/llama.cpp/examples/simple/simple.cpp +130 -94
  125. package/src/llama.cpp/examples/simple-chat/CMakeLists.txt +5 -0
  126. package/src/llama.cpp/examples/simple-chat/simple-chat.cpp +200 -0
  127. package/src/llama.cpp/examples/speculative/CMakeLists.txt +1 -1
  128. package/src/llama.cpp/examples/speculative/speculative.cpp +68 -64
  129. package/src/llama.cpp/examples/speculative-simple/CMakeLists.txt +5 -0
  130. package/src/llama.cpp/examples/speculative-simple/speculative-simple.cpp +265 -0
  131. package/src/llama.cpp/examples/tokenize/CMakeLists.txt +1 -1
  132. package/src/llama.cpp/examples/tokenize/tokenize.cpp +3 -3
  133. package/src/llama.cpp/examples/tts/CMakeLists.txt +5 -0
  134. package/src/llama.cpp/examples/tts/tts.cpp +932 -0
  135. package/src/llama.cpp/ggml/CMakeLists.txt +54 -36
  136. package/src/llama.cpp/ggml/include/ggml-backend.h +63 -34
  137. package/src/llama.cpp/ggml/include/ggml-blas.h +5 -3
  138. package/src/llama.cpp/ggml/include/ggml-cann.h +9 -7
  139. package/src/llama.cpp/ggml/include/ggml-cpp.h +38 -0
  140. package/src/llama.cpp/ggml/include/ggml-cpu.h +135 -0
  141. package/src/llama.cpp/ggml/include/ggml-cuda.h +12 -12
  142. package/src/llama.cpp/ggml/include/ggml-kompute.h +7 -3
  143. package/src/llama.cpp/ggml/include/ggml-metal.h +11 -7
  144. package/src/llama.cpp/ggml/include/ggml-opencl.h +26 -0
  145. package/src/llama.cpp/ggml/include/ggml-opt.h +216 -0
  146. package/src/llama.cpp/ggml/include/ggml-rpc.h +9 -5
  147. package/src/llama.cpp/ggml/include/ggml-sycl.h +18 -11
  148. package/src/llama.cpp/ggml/include/ggml-vulkan.h +10 -8
  149. package/src/llama.cpp/ggml/include/ggml.h +159 -417
  150. package/src/llama.cpp/ggml/src/CMakeLists.txt +121 -1155
  151. package/src/llama.cpp/ggml/src/ggml-alloc.c +23 -28
  152. package/src/llama.cpp/ggml/src/ggml-backend-impl.h +57 -36
  153. package/src/llama.cpp/ggml/src/ggml-backend-reg.cpp +552 -0
  154. package/src/llama.cpp/ggml/src/ggml-backend.cpp +306 -867
  155. package/src/llama.cpp/ggml/src/ggml-blas/CMakeLists.txt +87 -0
  156. package/src/llama.cpp/ggml/src/{ggml-blas.cpp → ggml-blas/ggml-blas.cpp} +216 -65
  157. package/src/llama.cpp/ggml/src/ggml-cann/CMakeLists.txt +76 -0
  158. package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.cpp +456 -111
  159. package/src/llama.cpp/ggml/src/ggml-cann/common.h +6 -3
  160. package/src/llama.cpp/ggml/src/{ggml-cann.cpp → ggml-cann/ggml-cann.cpp} +343 -177
  161. package/src/llama.cpp/ggml/src/ggml-cann/kernels/CMakeLists.txt +2 -5
  162. package/src/llama.cpp/ggml/src/ggml-cann/kernels/dup.cpp +22 -9
  163. package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_f16.cpp +24 -13
  164. package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_f32.cpp +23 -13
  165. package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_q4_0.cpp +11 -0
  166. package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_f16_q8_0.cpp +10 -0
  167. package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_f32_q8_0.cpp +10 -0
  168. package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_float_to_q4_0.cpp +17 -0
  169. package/src/llama.cpp/ggml/src/ggml-common.h +42 -42
  170. package/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +336 -0
  171. package/src/llama.cpp/ggml/src/ggml-cpu/amx/amx.cpp +220 -0
  172. package/src/llama.cpp/ggml/src/ggml-cpu/amx/amx.h +8 -0
  173. package/src/llama.cpp/ggml/src/ggml-cpu/amx/common.h +91 -0
  174. package/src/llama.cpp/ggml/src/ggml-cpu/amx/mmq.cpp +2511 -0
  175. package/src/llama.cpp/ggml/src/ggml-cpu/amx/mmq.h +10 -0
  176. package/src/llama.cpp/ggml/src/ggml-cpu/cpu-feats-x86.cpp +323 -0
  177. package/src/llama.cpp/ggml/src/{ggml-aarch64.c → ggml-cpu/ggml-cpu-aarch64.cpp} +1299 -246
  178. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.h +8 -0
  179. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-hbm.cpp +55 -0
  180. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-hbm.h +8 -0
  181. package/src/llama.cpp/ggml/src/{ggml-cpu-impl.h → ggml-cpu/ggml-cpu-impl.h} +14 -242
  182. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.c +10835 -0
  183. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.h +63 -0
  184. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-traits.cpp +36 -0
  185. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-traits.h +38 -0
  186. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +14123 -0
  187. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +628 -0
  188. package/src/llama.cpp/ggml/src/{llamafile → ggml-cpu/llamafile}/sgemm.cpp +666 -0
  189. package/src/llama.cpp/ggml/src/ggml-cuda/CMakeLists.txt +152 -0
  190. package/src/llama.cpp/ggml/src/ggml-cuda/vendors/hip.h +8 -0
  191. package/src/llama.cpp/ggml/src/ggml-hip/CMakeLists.txt +104 -0
  192. package/src/llama.cpp/ggml/src/ggml-impl.h +393 -22
  193. package/src/llama.cpp/ggml/src/ggml-kompute/CMakeLists.txt +166 -0
  194. package/src/llama.cpp/ggml/src/{ggml-kompute.cpp → ggml-kompute/ggml-kompute.cpp} +360 -127
  195. package/src/llama.cpp/ggml/src/ggml-metal/CMakeLists.txt +105 -0
  196. package/src/llama.cpp/ggml/src/ggml-metal/ggml-metal-impl.h +288 -0
  197. package/src/llama.cpp/ggml/src/ggml-musa/CMakeLists.txt +107 -0
  198. package/src/llama.cpp/ggml/src/ggml-opencl/CMakeLists.txt +147 -0
  199. package/src/llama.cpp/ggml/src/ggml-opencl/ggml-opencl.cpp +4004 -0
  200. package/src/llama.cpp/ggml/src/ggml-opt.cpp +854 -0
  201. package/src/llama.cpp/ggml/src/ggml-quants.c +188 -10702
  202. package/src/llama.cpp/ggml/src/ggml-quants.h +78 -125
  203. package/src/llama.cpp/ggml/src/ggml-rpc/CMakeLists.txt +9 -0
  204. package/src/llama.cpp/ggml/src/{ggml-rpc.cpp → ggml-rpc/ggml-rpc.cpp} +478 -300
  205. package/src/llama.cpp/ggml/src/ggml-sycl/CMakeLists.txt +84 -0
  206. package/src/llama.cpp/ggml/src/ggml-sycl/backend.hpp +3 -0
  207. package/src/llama.cpp/ggml/src/ggml-sycl/common.cpp +36 -5
  208. package/src/llama.cpp/ggml/src/ggml-sycl/common.hpp +259 -0
  209. package/src/llama.cpp/ggml/src/ggml-sycl/concat.cpp +3 -2
  210. package/src/llama.cpp/ggml/src/ggml-sycl/convert.cpp +1 -1
  211. package/src/llama.cpp/ggml/src/ggml-sycl/dmmv.cpp +5 -5
  212. package/src/llama.cpp/ggml/src/ggml-sycl/dpct/helper.hpp +34 -35
  213. package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.cpp +1030 -0
  214. package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.hpp +76 -0
  215. package/src/llama.cpp/ggml/src/ggml-sycl/gemm.hpp +4 -4
  216. package/src/llama.cpp/ggml/src/{ggml-sycl.cpp → ggml-sycl/ggml-sycl.cpp} +3638 -4151
  217. package/src/llama.cpp/ggml/src/ggml-sycl/im2col.cpp +3 -2
  218. package/src/llama.cpp/ggml/src/ggml-sycl/mmq.cpp +6 -6
  219. package/src/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +75 -87
  220. package/src/llama.cpp/ggml/src/ggml-sycl/norm.cpp +7 -6
  221. package/src/llama.cpp/ggml/src/ggml-sycl/outprod.cpp +56 -0
  222. package/src/llama.cpp/ggml/src/ggml-sycl/outprod.hpp +11 -0
  223. package/src/llama.cpp/ggml/src/ggml-sycl/presets.hpp +6 -0
  224. package/src/llama.cpp/ggml/src/ggml-sycl/rope.cpp +4 -3
  225. package/src/llama.cpp/ggml/src/ggml-sycl/softmax.cpp +7 -7
  226. package/src/llama.cpp/ggml/src/ggml-sycl/tsembd.cpp +1 -0
  227. package/src/llama.cpp/ggml/src/ggml-sycl/vecdotq.hpp +4 -4
  228. package/src/llama.cpp/ggml/src/ggml-sycl/wkv6.cpp +141 -0
  229. package/src/llama.cpp/ggml/src/ggml-sycl/wkv6.hpp +10 -0
  230. package/src/llama.cpp/ggml/src/ggml-threading.cpp +12 -0
  231. package/src/llama.cpp/ggml/src/ggml-threading.h +14 -0
  232. package/src/llama.cpp/ggml/src/ggml-vulkan/CMakeLists.txt +92 -0
  233. package/src/llama.cpp/ggml/src/{ggml-vulkan.cpp → ggml-vulkan/ggml-vulkan.cpp} +2138 -887
  234. package/src/llama.cpp/ggml/src/{vulkan-shaders → ggml-vulkan/vulkan-shaders}/CMakeLists.txt +3 -1
  235. package/src/llama.cpp/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +593 -0
  236. package/src/llama.cpp/ggml/src/ggml.c +4427 -20125
  237. package/src/llama.cpp/include/llama-cpp.h +25 -0
  238. package/src/llama.cpp/include/llama.h +93 -52
  239. package/src/llama.cpp/models/ggml-vocab-roberta-bpe.gguf.inp +112 -0
  240. package/src/llama.cpp/models/ggml-vocab-roberta-bpe.gguf.out +46 -0
  241. package/src/llama.cpp/pocs/CMakeLists.txt +3 -1
  242. package/src/llama.cpp/pocs/vdot/CMakeLists.txt +2 -2
  243. package/src/llama.cpp/pocs/vdot/q8dot.cpp +4 -3
  244. package/src/llama.cpp/pocs/vdot/vdot.cpp +8 -7
  245. package/src/llama.cpp/src/CMakeLists.txt +4 -8
  246. package/src/llama.cpp/src/llama-grammar.cpp +15 -15
  247. package/src/llama.cpp/src/llama-grammar.h +2 -5
  248. package/src/llama.cpp/src/llama-sampling.cpp +779 -194
  249. package/src/llama.cpp/src/llama-sampling.h +21 -2
  250. package/src/llama.cpp/src/llama-vocab.cpp +55 -10
  251. package/src/llama.cpp/src/llama-vocab.h +35 -11
  252. package/src/llama.cpp/src/llama.cpp +4317 -2979
  253. package/src/llama.cpp/src/unicode-data.cpp +2 -2
  254. package/src/llama.cpp/src/unicode.cpp +62 -51
  255. package/src/llama.cpp/src/unicode.h +9 -10
  256. package/src/llama.cpp/tests/CMakeLists.txt +48 -38
  257. package/src/llama.cpp/tests/test-arg-parser.cpp +15 -15
  258. package/src/llama.cpp/tests/test-backend-ops.cpp +324 -80
  259. package/src/llama.cpp/tests/test-barrier.cpp +1 -0
  260. package/src/llama.cpp/tests/test-chat-template.cpp +59 -9
  261. package/src/llama.cpp/tests/test-gguf.cpp +1303 -0
  262. package/src/llama.cpp/tests/test-grammar-integration.cpp +3 -6
  263. package/src/llama.cpp/tests/test-json-schema-to-grammar.cpp +17 -4
  264. package/src/llama.cpp/tests/test-llama-grammar.cpp +2 -4
  265. package/src/llama.cpp/tests/test-log.cpp +2 -2
  266. package/src/llama.cpp/tests/test-opt.cpp +853 -142
  267. package/src/llama.cpp/tests/test-quantize-fns.cpp +24 -21
  268. package/src/llama.cpp/tests/test-quantize-perf.cpp +16 -14
  269. package/src/llama.cpp/tests/test-rope.cpp +62 -20
  270. package/src/llama.cpp/tests/test-sampling.cpp +163 -138
  271. package/src/llama.cpp/tests/test-tokenizer-0.cpp +7 -7
  272. package/src/llama.cpp/tests/test-tokenizer-1-bpe.cpp +5 -5
  273. package/src/llama.cpp/tests/test-tokenizer-1-spm.cpp +5 -5
  274. package/src/llama.cpp/.github/workflows/nix-ci-aarch64.yml +0 -72
  275. package/src/llama.cpp/.github/workflows/nix-ci.yml +0 -79
  276. package/src/llama.cpp/.github/workflows/nix-flake-update.yml +0 -22
  277. package/src/llama.cpp/.github/workflows/nix-publish-flake.yml +0 -36
  278. package/src/llama.cpp/common/train.cpp +0 -1515
  279. package/src/llama.cpp/common/train.h +0 -233
  280. package/src/llama.cpp/examples/baby-llama/CMakeLists.txt +0 -5
  281. package/src/llama.cpp/examples/baby-llama/baby-llama.cpp +0 -1639
  282. package/src/llama.cpp/ggml/src/ggml-aarch64.h +0 -39
  283. package/src/llama.cpp/ggml/src/vulkan-shaders/vulkan-shaders-gen.cpp +0 -600
  284. package/src/llama.cpp/tests/test-grad0.cpp +0 -1683
  285. /package/src/llama.cpp/ggml/{cmake → src/ggml-cpu/cmake}/FindSIMD.cmake +0 -0
  286. /package/src/llama.cpp/ggml/src/{llamafile → ggml-cpu/llamafile}/sgemm.h +0 -0
@@ -1,1683 +0,0 @@
1
- #define _CRT_SECURE_NO_DEPRECATE // Disables ridiculous "unsafe" warnings on Windows
2
- #include "ggml.h"
3
-
4
- #include <cfloat>
5
- #include <cmath>
6
- #include <cstdint>
7
- #include <cstdio>
8
- #include <cstdlib>
9
- #include <cassert>
10
- #include <initializer_list>
11
- #include <vector>
12
-
13
- #if defined(_MSC_VER)
14
- #pragma warning(disable: 4244 4267) // possible loss of data
15
- #endif
16
-
17
- #if defined(__GNUC__)
18
- #pragma GCC diagnostic ignored "-Wdouble-promotion"
19
- #endif
20
-
21
- #define MAX_NARGS 3
22
-
23
- #undef MIN
24
- #undef MAX
25
- #define MIN(a, b) ((a) < (b) ? (a) : (b))
26
- #define MAX(a, b) ((a) > (b) ? (a) : (b))
27
-
28
- #define GGML_SILU_FP16
29
-
30
- //
31
- // logging
32
- //
33
-
34
- #if (GGML_DEBUG >= 1)
35
- #define GGML_PRINT_DEBUG(...) printf(__VA_ARGS__)
36
- #else
37
- #define GGML_PRINT_DEBUG(...)
38
- #endif
39
-
40
- #if (GGML_DEBUG >= 5)
41
- #define GGML_PRINT_DEBUG_5(...) printf(__VA_ARGS__)
42
- #else
43
- #define GGML_PRINT_DEBUG_5(...)
44
- #endif
45
-
46
- #if (GGML_DEBUG >= 10)
47
- #define GGML_PRINT_DEBUG_10(...) printf(__VA_ARGS__)
48
- #else
49
- #define GGML_PRINT_DEBUG_10(...)
50
- #endif
51
-
52
- #define GGML_PRINT(...) printf(__VA_ARGS__)
53
-
54
- static float frand(void) {
55
- return (float)rand()/(float)RAND_MAX;
56
- }
57
-
58
- static int irand(int n) {
59
- if (n == 0) return 0;
60
- return rand()%n;
61
- }
62
-
63
- static void get_random_dims(int64_t * dims, int ndims) {
64
- dims[0] = dims[1] = dims[2] = dims[3] = 1;
65
-
66
- for (int i = 0; i < ndims; i++) {
67
- dims[i] = 1 + irand(4);
68
- }
69
- }
70
-
71
- static struct ggml_tensor * get_random_tensor_f32(
72
- struct ggml_context * ctx0,
73
- int ndims,
74
- int64_t ne[],
75
- float fmin,
76
- float fmax) {
77
- struct ggml_tensor * result = ggml_new_tensor(ctx0, GGML_TYPE_F32, ndims, ne);
78
-
79
- switch (ndims) {
80
- case 1:
81
- for (int i0 = 0; i0 < ne[0]; i0++) {
82
- ((float *)result->data)[i0] = frand()*(fmax - fmin) + fmin;
83
- }
84
- break;
85
- case 2:
86
- for (int i1 = 0; i1 < ne[1]; i1++) {
87
- for (int i0 = 0; i0 < ne[0]; i0++) {
88
- ((float *)result->data)[i1*ne[0] + i0] = frand()*(fmax - fmin) + fmin;
89
- }
90
- }
91
- break;
92
- case 3:
93
- for (int i2 = 0; i2 < ne[2]; i2++) {
94
- for (int i1 = 0; i1 < ne[1]; i1++) {
95
- for (int i0 = 0; i0 < ne[0]; i0++) {
96
- ((float *)result->data)[i2*ne[1]*ne[0] + i1*ne[0] + i0] = frand()*(fmax - fmin) + fmin;
97
- }
98
- }
99
- }
100
- break;
101
- case 4:
102
- for (int i3 = 0; i3 < ne[3]; i3++) {
103
- for (int i2 = 0; i2 < ne[2]; i2++) {
104
- for (int i1 = 0; i1 < ne[1]; i1++) {
105
- for (int i0 = 0; i0 < ne[0]; i0++) {
106
- ((float *)result->data)[i3*ne[2]*ne[1]*ne[0] + i2*ne[1]*ne[0] + i1*ne[0] + i0] = frand()*(fmax - fmin) + fmin;
107
- }
108
- }
109
- }
110
- }
111
- break;
112
- default:
113
- assert(false);
114
- }
115
-
116
- return result;
117
- }
118
-
119
- static struct ggml_tensor * get_random_tensor_f16(
120
- struct ggml_context * ctx0,
121
- int ndims,
122
- int64_t ne[],
123
- float fmin,
124
- float fmax) {
125
- struct ggml_tensor * result = ggml_new_tensor(ctx0, GGML_TYPE_F16, ndims, ne);
126
-
127
- switch (ndims) {
128
- case 1:
129
- for (int i0 = 0; i0 < ne[0]; i0++) {
130
- ((ggml_fp16_t *)result->data)[i0] = ggml_fp32_to_fp16(frand()*(fmax - fmin) + fmin);
131
- }
132
- break;
133
- case 2:
134
- for (int i1 = 0; i1 < ne[1]; i1++) {
135
- for (int i0 = 0; i0 < ne[0]; i0++) {
136
- ((ggml_fp16_t *)result->data)[i1*ne[0] + i0] = ggml_fp32_to_fp16(frand()*(fmax - fmin) + fmin);
137
- }
138
- }
139
- break;
140
- case 3:
141
- for (int i2 = 0; i2 < ne[2]; i2++) {
142
- for (int i1 = 0; i1 < ne[1]; i1++) {
143
- for (int i0 = 0; i0 < ne[0]; i0++) {
144
- ((ggml_fp16_t *)result->data)[i2*ne[1]*ne[0] + i1*ne[0] + i0] = ggml_fp32_to_fp16(frand()*(fmax - fmin) + fmin);
145
- }
146
- }
147
- }
148
- break;
149
- case 4:
150
- for (int i3 = 0; i3 < ne[3]; i3++) {
151
- for (int i2 = 0; i2 < ne[2]; i2++) {
152
- for (int i1 = 0; i1 < ne[1]; i1++) {
153
- for (int i0 = 0; i0 < ne[0]; i0++) {
154
- ((ggml_fp16_t *)result->data)[i3*ne[2]*ne[1]*ne[0] + i2*ne[1]*ne[0] + i1*ne[0] + i0] = ggml_fp32_to_fp16(frand()*(fmax - fmin) + fmin);
155
- }
156
- }
157
- }
158
- }
159
- break;
160
- default:
161
- assert(false);
162
- }
163
-
164
- return result;
165
- }
166
-
167
- static struct ggml_tensor * get_random_tensor_i32(
168
- struct ggml_context * ctx0,
169
- int ndims,
170
- int64_t ne[],
171
- int32_t imin,
172
- int32_t imax) {
173
- struct ggml_tensor * result = ggml_new_tensor(ctx0, GGML_TYPE_I32, ndims, ne);
174
-
175
- switch (ndims) {
176
- case 1:
177
- for (int i0 = 0; i0 < ne[0]; i0++) {
178
- ((int32_t *)result->data)[i0] = irand(imax - imin) + imin;
179
- }
180
- break;
181
- case 2:
182
- for (int i1 = 0; i1 < ne[1]; i1++) {
183
- for (int i0 = 0; i0 < ne[0]; i0++) {
184
- ((int32_t *)result->data)[i1*ne[0] + i0] = irand(imax - imin) + imin;
185
- }
186
- }
187
- break;
188
- case 3:
189
- for (int i2 = 0; i2 < ne[2]; i2++) {
190
- for (int i1 = 0; i1 < ne[1]; i1++) {
191
- for (int i0 = 0; i0 < ne[0]; i0++) {
192
- ((int32_t *)result->data)[i2*ne[1]*ne[0] + i1*ne[0] + i0] = irand(imax - imin) + imin;
193
- }
194
- }
195
- }
196
- break;
197
- case 4:
198
- for (int i3 = 0; i3 < ne[3]; i3++) {
199
- for (int i2 = 0; i2 < ne[2]; i2++) {
200
- for (int i1 = 0; i1 < ne[1]; i1++) {
201
- for (int i0 = 0; i0 < ne[0]; i0++) {
202
- ((int32_t *)result->data)[i3*ne[2]*ne[1]*ne[0] + i2*ne[1]*ne[0] + i1*ne[0] + i0] = irand(imax - imin) + imin;
203
- }
204
- }
205
- }
206
- }
207
- break;
208
- default:
209
- assert(false);
210
- }
211
-
212
- return result;
213
- }
214
-
215
- static bool check_gradient(
216
- const char * op_name,
217
- struct ggml_context * ctx0,
218
- struct ggml_tensor * x[],
219
- struct ggml_tensor * f,
220
- int ndims,
221
- int nargs,
222
- float eps,
223
- float max_error_abs,
224
- float max_error_rel,
225
- std::vector<double> expected_vals) {
226
-
227
- static int n_threads = -1;
228
- if (n_threads < 0) {
229
- n_threads = GGML_DEFAULT_N_THREADS;
230
-
231
- const char *env = getenv("GGML_N_THREADS");
232
- if (env) {
233
- n_threads = atoi(env);
234
- }
235
-
236
- printf("GGML_N_THREADS = %d\n", n_threads);
237
- }
238
-
239
- struct ggml_cgraph * gf = ggml_new_graph_custom(ctx0, GGML_DEFAULT_GRAPH_SIZE, true);
240
- struct ggml_cgraph * gb = ggml_new_graph_custom(ctx0, GGML_DEFAULT_GRAPH_SIZE, true);
241
- ggml_build_forward_expand(gf, f);
242
- ggml_graph_cpy(gf, gb);
243
- ggml_build_backward_expand(ctx0, gf, gb, false);
244
-
245
- ggml_graph_compute_with_ctx(ctx0, gf, n_threads);
246
-
247
- ggml_graph_reset(gb);
248
- if (f->grad) {
249
- ggml_set_f32(f->grad, 1.0f);
250
- }
251
-
252
- ggml_graph_compute_with_ctx(ctx0, gb, n_threads);
253
-
254
- // ggml_graph_dump_dot(gf, NULL, "test-grad0-forward.dot");
255
- // ggml_graph_dump_dot(gb, gf, "test-grad0-backward.dot");
256
-
257
- for (int i = 0; i < nargs; ++i) {
258
- bool all_g0_bad = true;
259
- const int nelements = ggml_nelements(x[i]);
260
- for (int k = 0; k < nelements; ++k) {
261
- // Calculate gradient numerically:
262
- const float x0 = ggml_get_f32_1d(x[i], k);
263
- const float xm = x0 - eps;
264
- const float xp = x0 + eps;
265
- ggml_set_f32_1d(x[i], k, xp);
266
-
267
- ggml_graph_compute_with_ctx(ctx0, gf, n_threads);
268
-
269
- const double f0 = ggml_get_f32_1d(f, 0);
270
-
271
- ggml_set_f32_1d(x[i], k, xm);
272
-
273
- ggml_graph_compute_with_ctx(ctx0, gf, n_threads);
274
-
275
- const double f1 = ggml_get_f32_1d(f, 0);
276
- const double g0 = (f0 - f1)/(2.0*(double) eps);
277
-
278
- // The numerical calculation of the gradient fails around noncontinuities (e.g. 0 for ReLU).
279
- // In such cases, provide a vector of expected values and skip the comparison for failed calculations.
280
- if (!expected_vals.empty()) {
281
- bool matches_any = false;
282
- for (const double & ev : expected_vals) {
283
- const double error_abs = std::fabs(g0 - ev);
284
- if (error_abs > max_error_abs) {
285
- continue;
286
- }
287
- const double error_rel = g0 != 0.0 ? fabs(g0 - ev)/fabs(g0) : 0.0;
288
- if (error_rel > max_error_rel) {
289
- continue;
290
- }
291
- matches_any = true;
292
- break;
293
- }
294
- if (!matches_any) {
295
- continue;
296
- }
297
- }
298
- all_g0_bad = false;
299
-
300
- ggml_set_f32_1d(x[i], k, x0);
301
-
302
- // compute gradient using backward graph
303
- ggml_graph_reset(gb);
304
- if (f->grad) {
305
- ggml_set_f32(f->grad, 1.0f);
306
- }
307
-
308
- ggml_graph_compute_with_ctx(ctx0, gb, n_threads);
309
-
310
- const double g1 = ggml_get_f32_1d(x[i]->grad, k);
311
-
312
- const double error_abs = fabs(g0 - g1);
313
- const double error_rel = g0 != 0.0 ? fabs(g0 - g1)/fabs(g0) : 0.0;
314
-
315
- if (error_abs > max_error_abs || error_rel > max_error_rel) {
316
- printf("%s: ndims=%d, i=%d, k=%d, x0=%f, xm=%f, xp=%f, f0=%f, f1=%f, g0=%f, g1=%f, eps=%f, error_abs=%f, error_rel=%f\n",
317
- op_name, ndims, i, k, x0, xm, xp, f0, f1, g0, g1, eps, error_abs, error_rel);
318
- //assert(false);
319
- return false;
320
- }
321
- }
322
- if (all_g0_bad) {
323
- printf("%s: numerical calculation of the gradient failed for all values\n", op_name);
324
- return false;
325
- }
326
- }
327
-
328
- return true;
329
- }
330
-
331
- // TODO: clean-up this ..
332
- static bool check_mat_mul(
333
- const struct ggml_tensor * y,
334
- const struct ggml_tensor * x0,
335
- const struct ggml_tensor * x1) {
336
- float * dst = (float *) y->data;
337
- float * src0 = (float *) x0->data;
338
- float * src1 = (float *) x1->data;
339
-
340
- const int nc = x0->ne[1];
341
- const int nr = x1->ne[1];
342
- const int nk = x0->ne[0];
343
-
344
- GGML_PRINT_DEBUG("check_mat_mul: nc=%d, nr=%d, nk=%d\n", nc, nr, nk);
345
-
346
- GGML_PRINT_DEBUG("x0:\n");
347
- for (int j = 0; j < x0->ne[1]; ++j) {
348
- for (int i = 0; i < x0->ne[0]; ++i) {
349
- GGML_PRINT_DEBUG("%6.3f ", src0[j*nk + i]);
350
- }
351
- GGML_PRINT_DEBUG("\n");
352
- }
353
- GGML_PRINT_DEBUG("\n");
354
-
355
- GGML_PRINT_DEBUG("x1:\n");
356
- for (int j = 0; j < x1->ne[1]; ++j) {
357
- for (int i = 0; i < x1->ne[0]; ++i) {
358
- GGML_PRINT_DEBUG("%6.3f ", src1[j*nk + i]);
359
- }
360
- GGML_PRINT_DEBUG("\n");
361
- }
362
- GGML_PRINT_DEBUG("\n");
363
-
364
- GGML_PRINT_DEBUG("y: n_dims = %d, (%lld, %lld)\n", y->n_dims, y->ne[0], y->ne[1]);
365
- for (int j = 0; j < y->ne[1]; ++j) {
366
- for (int i = 0; i < y->ne[0]; ++i) {
367
- GGML_PRINT_DEBUG("%6.3f ", dst[j*nr + i]);
368
- }
369
- GGML_PRINT_DEBUG("\n");
370
- }
371
-
372
- for (int i = 0; i < nr; ++i) {
373
- for (int j = 0; j < nc; ++j) {
374
- float sum = 0.0f;
375
-
376
- for (int k = 0; k < nk; ++k) {
377
- sum += src0[j*nk + k]*src1[i*nk + k];
378
- }
379
-
380
- if (fabsf(dst[i*nc + j] - sum) > 1e-5f) {
381
- fprintf(stderr, "check_mat_mul: dst[%d] = %f, sum = %f\n", i*nc + j, dst[i*nc + j], sum);
382
- assert(false);
383
- return false;
384
- }
385
- }
386
- }
387
-
388
- return true;
389
- }
390
-
391
- #define NUM_PERMUTATIONS (4*3*2*1)
392
-
393
- int main(int argc, const char ** argv) {
394
- struct ggml_init_params params = {
395
- /* .mem_size = */ 256*1024*1024,
396
- /* .mem_buffer = */ NULL,
397
- /* .no_alloc = */ false,
398
- };
399
-
400
- int64_t ne[4];
401
-
402
- int all_permutations[4 * NUM_PERMUTATIONS];
403
- {
404
- int count = 0;
405
- for (int ax0=0; ax0<4; ++ax0) {
406
- for (int ax1=0; ax1<4; ++ax1) {
407
- if (ax1 == ax0) continue;
408
- for (int ax2=0; ax2<4; ++ax2) {
409
- if (ax2 == ax0) continue;
410
- if (ax2 == ax1) continue;
411
- for (int ax3=0; ax3<4; ++ax3) {
412
- if (ax3 == ax0) continue;
413
- if (ax3 == ax1) continue;
414
- if (ax3 == ax2) continue;
415
- assert(count < NUM_PERMUTATIONS);
416
- all_permutations[count*4+0] = ax0;
417
- all_permutations[count*4+1] = ax1;
418
- all_permutations[count*4+2] = ax2;
419
- all_permutations[count*4+3] = ax3;
420
- ++count;
421
- }
422
- }
423
- }
424
- }
425
- }
426
-
427
- unsigned seed_iter = 1;
428
-
429
- // original loop: 1000
430
- int niter = 4;
431
- const char *env = getenv("GGML_NLOOP");
432
- if (env != NULL) {
433
- niter = atoi(env);
434
- }
435
- if (argc > 1) {
436
- niter = atoi(argv[1]);
437
- }
438
- for (int iter = 0; iter < niter; ++iter) {
439
- srand(seed_iter);
440
- seed_iter = rand();
441
- unsigned seed = rand();
442
-
443
- printf("test-grad0: iter:%d/%d\n", (iter+1), niter);
444
- struct ggml_context * ctx0 = ggml_init(params);
445
-
446
- get_random_dims(ne, 4);
447
-
448
- struct ggml_tensor * x[MAX_NARGS];
449
-
450
- // add f32
451
- {
452
- srand(seed);
453
- const int nargs = 2;
454
-
455
- for (int ndims = 1; ndims <= 4; ++ndims) {
456
- for (int i = 0; i < nargs; ++i) {
457
- x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f);
458
- ggml_set_param(ctx0, x[i]);
459
- }
460
-
461
- struct ggml_tensor * f = ggml_sum(ctx0, ggml_add(ctx0, x[0], x[1]));
462
-
463
- check_gradient("add f32", ctx0, x, f, ndims, nargs, 1e-3f, 2e-3f, 2e-3f, {});
464
- }
465
- }
466
-
467
- // add f16
468
- {
469
- srand(seed);
470
- const int nargs = 2;
471
-
472
- for (int ndims = 1; ndims <= 4; ++ndims) {
473
- for (int i = 0; i < nargs; ++i) {
474
- x[i] = get_random_tensor_f16(ctx0, ndims, ne, -1.0f, 1.0f);
475
- ggml_set_param(ctx0, x[i]);
476
- }
477
-
478
- struct ggml_tensor * f = ggml_sum(ctx0, ggml_add(ctx0, x[0], x[1]));
479
-
480
- check_gradient("add f16", ctx0, x, f, ndims, nargs, 1e-1f, 2e-1f, 2e-1f, {});
481
- }
482
- }
483
-
484
- // sub
485
- {
486
- srand(seed);
487
- const int nargs = 2;
488
-
489
- for (int ndims = 1; ndims <= 4; ++ndims) {
490
- for (int i = 0; i < nargs; ++i) {
491
- x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f);
492
- ggml_set_param(ctx0, x[i]);
493
- }
494
-
495
- struct ggml_tensor * f = ggml_sum(ctx0, ggml_sub(ctx0, x[0], x[1]));
496
-
497
- check_gradient("sub", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, 1e-3f, {});
498
- }
499
- }
500
-
501
- // mul
502
- {
503
- srand(seed);
504
- const int nargs = 2;
505
-
506
- for (int ndims = 1; ndims <= 4; ++ndims) {
507
- for (int i = 0; i < nargs; ++i) {
508
- x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f);
509
- ggml_set_param(ctx0, x[i]);
510
- }
511
-
512
- struct ggml_tensor * f = ggml_sum(ctx0, ggml_mul(ctx0, x[0], x[1]));
513
-
514
- check_gradient("mul", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY, {});
515
- }
516
- }
517
-
518
- // div
519
- {
520
- srand(seed);
521
- const int nargs = 2;
522
-
523
- for (int ndims = 1; ndims <= 4; ++ndims) {
524
- for (int i = 0; i < nargs; ++i) {
525
- x[i] = get_random_tensor_f32(ctx0, ndims, ne, 0.5f, 1.0f);
526
- ggml_set_param(ctx0, x[i]);
527
- }
528
-
529
- struct ggml_tensor * f = ggml_sum(ctx0, ggml_div(ctx0, x[0], x[1]));
530
-
531
- check_gradient("div", ctx0, x, f, ndims, nargs, 1e-3f, 1e-1f, 1e-1f, {});
532
- }
533
- }
534
-
535
- // sqr
536
- {
537
- srand(seed);
538
- const int nargs = 1;
539
-
540
- for (int ndims = 1; ndims <= 2; ++ndims) {
541
- for (int i = 0; i < nargs; ++i) {
542
- x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f);
543
- ggml_set_param(ctx0, x[i]);
544
- }
545
-
546
- struct ggml_tensor * f = ggml_sum(ctx0, ggml_sqr(ctx0, x[0]));
547
-
548
- check_gradient("sqr", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY, {});
549
- }
550
- }
551
-
552
- // sqrt
553
- {
554
- srand(seed);
555
- const int nargs = 1;
556
-
557
- for (int ndims = 1; ndims <= 2; ++ndims) {
558
- for (int i = 0; i < nargs; ++i) {
559
- x[i] = get_random_tensor_f32(ctx0, ndims, ne, 2.0f*1e-3f, 1.0f);
560
- ggml_set_param(ctx0, x[i]);
561
- }
562
-
563
- struct ggml_tensor * f = ggml_sum(ctx0, ggml_sqrt(ctx0, x[0]));
564
-
565
- check_gradient("sqrt", ctx0, x, f, ndims, nargs, 1e-3f, 2e-2f, 1e-1f, {});
566
- }
567
- }
568
-
569
- // log
570
- {
571
- srand(seed);
572
- const int nargs = 1;
573
-
574
- for (int ndims = 1; ndims <= 2; ++ndims) {
575
- for (int i = 0; i < nargs; ++i) {
576
- x[i] = get_random_tensor_f32(ctx0, ndims, ne, 2.0f*1e-3f, 1.0f);
577
- ggml_set_param(ctx0, x[i]);
578
- }
579
-
580
- struct ggml_tensor * f = ggml_sum(ctx0, ggml_log(ctx0, x[0]));
581
-
582
- check_gradient("log", ctx0, x, f, ndims, nargs, 1e-3f, INFINITY, 1e-1f, {});
583
- }
584
- }
585
-
586
- // sum
587
- {
588
- srand(seed);
589
- const int nargs = 1;
590
-
591
- for (int ndims = 1; ndims <= 2; ++ndims) {
592
- for (int i = 0; i < nargs; ++i) {
593
- x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f);
594
- ggml_set_param(ctx0, x[i]);
595
- }
596
-
597
- struct ggml_tensor * f = ggml_sum(ctx0, x[0]);
598
-
599
- check_gradient("sum", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, 1e-3f, {});
600
- }
601
- }
602
-
603
-
604
- // sum_rows
605
- {
606
- srand(seed);
607
- const int nargs = 1;
608
-
609
- for (int ndims = 1; ndims <= 4; ++ndims) {
610
- for (int i = 0; i < nargs; ++i) {
611
- x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f);
612
- ggml_set_param(ctx0, x[i]);
613
- }
614
-
615
- struct ggml_tensor * f = ggml_sum(ctx0, ggml_sqr(ctx0, ggml_sum_rows(ctx0, x[0])));
616
-
617
- check_gradient("sum_rows", ctx0, x, f, ndims, nargs, 1e-3f, 1e-2f, INFINITY, {});
618
- }
619
- }
620
-
621
- // mean, not yet fully implemented
622
- if(0)
623
- {
624
- srand(seed);
625
- const int nargs = 1;
626
-
627
- for (int ndims = 1; ndims <= 4; ++ndims) {
628
- for (int i = 0; i < nargs; ++i) {
629
- x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f);
630
- ggml_set_param(ctx0, x[i]);
631
- }
632
-
633
- struct ggml_tensor * f = ggml_sum(ctx0, ggml_mean(ctx0, x[0]));
634
-
635
- check_gradient("mean", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, 1e-3f, {});
636
- }
637
- }
638
-
639
- // argmax
640
- if (0)
641
- {
642
- srand(seed);
643
- const int nargs = 1;
644
-
645
- for (int ndims = 1; ndims <= 4; ++ndims) {
646
- for (int i = 0; i < nargs; ++i) {
647
- x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f);
648
- ggml_set_param(ctx0, x[i]);
649
- }
650
-
651
- struct ggml_tensor * f = ggml_sum(ctx0, ggml_argmax(ctx0, x[0]));
652
-
653
- check_gradient("argmax", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, 1e-3f, {});
654
- }
655
- }
656
-
657
- // repeat
658
- {
659
- srand(seed);
660
- int64_t ne2[4];
661
- get_random_dims(ne2, 4);
662
-
663
- ne2[0] = ne[0] * ne2[0];
664
- ne2[1] = ne[1] * ne2[1];
665
- ne2[2] = 1;
666
- ne2[3] = 1;
667
-
668
- const int nargs = 1;
669
- for (int ndims = 1; ndims <= 2; ++ndims) {
670
- x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f);
671
- x[1] = get_random_tensor_f32(ctx0, ndims, ne2, -1.0f, 1.0f);
672
- ggml_set_param(ctx0, x[0]);
673
-
674
- struct ggml_tensor * f = ggml_sum(ctx0, ggml_sqr(ctx0, ggml_sub(ctx0, x[1], ggml_repeat(ctx0, x[0], x[1]))));
675
-
676
- check_gradient("repeat", ctx0, x, f, ndims, nargs, 1e-3f, 1e-2f, INFINITY, {});
677
- }
678
- }
679
-
680
- // repeat back
681
- {
682
- srand(seed);
683
- int64_t ne2[4];
684
- get_random_dims(ne2, 4);
685
-
686
- ne2[0] = ne[0] * ne2[0];
687
- ne2[1] = ne[1] * ne2[1];
688
- ne2[2] = 1;
689
- ne2[3] = 1;
690
-
691
- const int nargs = 1;
692
- for (int ndims = 1; ndims <= 2; ++ndims) {
693
- x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f);
694
- x[1] = get_random_tensor_f32(ctx0, ndims, ne2, -1.0f, 1.0f);
695
- ggml_set_param(ctx0, x[0]);
696
-
697
- struct ggml_tensor * f = ggml_sum(ctx0, ggml_sqr(ctx0, ggml_sub(ctx0, x[0], ggml_repeat_back(ctx0, x[1], x[0]))));
698
-
699
- check_gradient("repeat back", ctx0, x, f, ndims, nargs, 1e-3f, 1e-2f, INFINITY, {});
700
- }
701
- }
702
-
703
- // abs
704
- {
705
- const int nargs = 1;
706
-
707
- for (int ndims = 1; ndims <= 4; ++ndims) {
708
- for (int i = 0; i < nargs; ++i) {
709
- x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f);
710
- ggml_set_param(ctx0, x[i]);
711
- }
712
-
713
- struct ggml_tensor * f = ggml_sum(ctx0, ggml_abs(ctx0, x[0]));
714
-
715
- check_gradient("abs", ctx0, x, f, ndims, nargs, 1e-3f, INFINITY, 1e-3f, {-1.0, 1.0});
716
- }
717
- }
718
-
719
- // sgn
720
- {
721
- srand(seed);
722
- const int nargs = 1;
723
-
724
- for (int ndims = 1; ndims <= 4; ++ndims) {
725
- for (int i = 0; i < nargs; ++i) {
726
- x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f);
727
- ggml_set_param(ctx0, x[i]);
728
- }
729
-
730
- struct ggml_tensor* f = ggml_sum(ctx0, ggml_sgn(ctx0, x[0]));
731
-
732
- check_gradient("sgn", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, 1e-3f, {0.0});
733
- }
734
- }
735
-
736
- // neg
737
- {
738
- srand(seed);
739
- const int nargs = 1;
740
-
741
- for (int ndims = 1; ndims <= 4; ++ndims) {
742
- for (int i = 0; i < nargs; ++i) {
743
- x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f);
744
- ggml_set_param(ctx0, x[i]);
745
- }
746
-
747
- struct ggml_tensor* f = ggml_sum(ctx0, ggml_neg(ctx0, x[0]));
748
-
749
- check_gradient("neg", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, 1e-3f, {});
750
- }
751
- }
752
-
753
- // step
754
- {
755
- srand(seed);
756
- const int nargs = 1;
757
-
758
- for (int ndims = 1; ndims <= 4; ++ndims) {
759
- for (int i = 0; i < nargs; ++i) {
760
- x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f);
761
- ggml_set_param(ctx0, x[i]);
762
- }
763
-
764
- struct ggml_tensor* f = ggml_sum(ctx0, ggml_step(ctx0, x[0]));
765
-
766
- check_gradient("step", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, 1e-3f, {0.0});
767
- }
768
- }
769
-
770
- // tanh, not yet fully implemented
771
- if(0)
772
- {
773
- srand(seed);
774
- const int nargs = 1;
775
-
776
- for (int ndims = 1; ndims <= 4; ++ndims) {
777
- for (int i = 0; i < nargs; ++i) {
778
- x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f);
779
- ggml_set_param(ctx0, x[i]);
780
- }
781
-
782
- struct ggml_tensor* f = ggml_sum(ctx0, ggml_tanh(ctx0, x[0]));
783
-
784
- check_gradient("tanh", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, 1e-3f, {});
785
- }
786
- }
787
-
788
- // mul_mat
789
- {
790
- srand(seed);
791
- const int nargs = 2;
792
-
793
- for (int ndims = 2; ndims <= 4; ++ndims) {
794
- int max_nrep = (ndims >= 3) ? 2 : 1;
795
- x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f);
796
- for (int nrep2 = 1; nrep2 < max_nrep; ++nrep2) {
797
- for (int nrep3 = 1; nrep3 < max_nrep; ++nrep3) {
798
- {
799
- int64_t ne2[4];
800
- get_random_dims(ne2, 4);
801
- ne2[0] = ne[0];
802
- ne2[2] = nrep2 * ne[2];
803
- ne2[3] = nrep3 * ne[3];
804
- x[1] = get_random_tensor_f32(ctx0, ndims, ne2, -1.0f, 1.0f);
805
- }
806
-
807
- ggml_set_param(ctx0, x[0]);
808
- ggml_set_param(ctx0, x[1]);
809
-
810
- struct ggml_tensor * m = ggml_mul_mat(ctx0, x[1], x[0]);
811
- struct ggml_tensor * f = ggml_sum(ctx0, m);
812
-
813
- GGML_PRINT_DEBUG("testing: mul_mat, [%lld, %lld] (%d) * [%lld, %lld] (%d)\n", x[1]->ne[0], x[1]->ne[1], x[1]->n_dims, x[0]->ne[0], x[0]->ne[1], x[0]->n_dims);
814
-
815
- check_gradient("mul_mat", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY, {});
816
- if (ndims == 2) {
817
- // check_mat_mul does not support ndims > 2
818
- check_mat_mul(m, x[1], x[0]);
819
- }
820
- }
821
- }
822
- }
823
- }
824
-
825
- // elu, not yet fully implemented
826
- if(0)
827
- {
828
- srand(seed);
829
- const int nargs = 1;
830
-
831
- for (int ndims = 1; ndims <= 4; ++ndims) {
832
- for (int i = 0; i < nargs; ++i) {
833
- x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f);
834
- ggml_set_param(ctx0, x[i]);
835
- }
836
-
837
- struct ggml_tensor* f = ggml_sum(ctx0, ggml_elu(ctx0, x[0]));
838
-
839
- check_gradient("elu", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, 1e-3f, {});
840
- }
841
- }
842
-
843
- // relu
844
- {
845
- srand(seed);
846
- const int nargs = 1;
847
-
848
- for (int ndims = 1; ndims <= 4; ++ndims) {
849
- for (int i = 0; i < nargs; ++i) {
850
- x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f);
851
- ggml_set_param(ctx0, x[i]);
852
- }
853
-
854
- struct ggml_tensor* f = ggml_sum(ctx0, ggml_relu(ctx0, x[0]));
855
-
856
- check_gradient("relu", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY, {0.0, 1.0});
857
- }
858
- }
859
-
860
- // gelu, not yet fully implemented
861
- if(0)
862
- {
863
- srand(seed);
864
- const int nargs = 1;
865
-
866
- for (int ndims = 1; ndims <= 4; ++ndims) {
867
- for (int i = 0; i < nargs; ++i) {
868
- x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f);
869
- ggml_set_param(ctx0, x[i]);
870
- }
871
-
872
- struct ggml_tensor* f = ggml_sum(ctx0, ggml_gelu(ctx0, x[0]));
873
-
874
- check_gradient("gelu", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, 1e-3f, {});
875
- }
876
- }
877
-
878
- // silu
879
- {
880
- srand(seed);
881
- const int nargs = 1;
882
-
883
- for (int ndims = 1; ndims <= 2; ++ndims) {
884
- for (int i = 0; i < nargs; ++i) {
885
- x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f);
886
- ggml_set_param(ctx0, x[i]);
887
- }
888
-
889
- struct ggml_tensor * f = ggml_sum(ctx0, ggml_silu(ctx0, x[0]));
890
-
891
- #ifdef GGML_SILU_FP16
892
- // due to GGML_SILU_FP16 the finite difference method will be slightly wrong -> increase error bounds.
893
- check_gradient("silu", ctx0, x, f, ndims, nargs, 1e-3f, 0.5, INFINITY, {});
894
- #else
895
- check_gradient("silu", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY, {});
896
- #endif
897
- }
898
- }
899
-
900
- // rms_norm
901
- {
902
- srand(seed);
903
- const int nargs = 1;
904
-
905
- for (int ndims = 1; ndims <= 2; ++ndims) {
906
- for (int i = 0; i < nargs; ++i) {
907
- x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f);
908
- ggml_set_param(ctx0, x[i]);
909
- }
910
-
911
- struct ggml_tensor * f = ggml_sum(ctx0, ggml_rms_norm(ctx0, x[0], 1e-6f));
912
-
913
- check_gradient("rms_norm", ctx0, x, f, ndims, nargs, 1e-4f, 1.0f, INFINITY, {});
914
- }
915
- }
916
-
917
- // scale
918
- {
919
- srand(seed);
920
- const int nargs = 1;
921
-
922
- for (int ndims = 1; ndims <= 2; ++ndims) {
923
- x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f);
924
-
925
- const float s = -1.0f + 2.0f*frand();
926
-
927
- ggml_set_param(ctx0, x[0]);
928
-
929
- struct ggml_tensor * f = ggml_sum(ctx0, ggml_scale(ctx0, x[0], s));
930
-
931
- check_gradient("scale", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY, {});
932
- }
933
- }
934
-
935
- // cpy f32
936
- {
937
- srand(seed);
938
- const int nargs = 2;
939
-
940
- for (int ndims = 1; ndims <= 2; ++ndims) {
941
- for (int i = 0; i < nargs; ++i) {
942
- x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f);
943
- ggml_set_param(ctx0, x[i]);
944
- }
945
- // x[1] is overwritten by x[0], so the gradients don't propagate to x[1]
946
-
947
- struct ggml_tensor * f = ggml_sum(ctx0, ggml_cpy(ctx0, x[0], x[1]));
948
-
949
- check_gradient("cpy f32", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY, {});
950
- }
951
- }
952
-
953
- // cpy f16
954
- {
955
- srand(seed);
956
- const int nargs = 2;
957
-
958
- for (int ndims = 1; ndims <= 2; ++ndims) {
959
- for (int i = 0; i < nargs; ++i) {
960
- x[i] = get_random_tensor_f16(ctx0, ndims, ne, -1.0f, 1.0f);
961
- ggml_set_param(ctx0, x[i]);
962
- }
963
- // x[1] is overwritten by x[0], so the gradients don't propagate to x[1]
964
-
965
- struct ggml_tensor * f = ggml_sum(ctx0, ggml_cpy(ctx0, x[0], x[1]));
966
-
967
- check_gradient("cpy f16", ctx0, x, f, ndims, nargs, 1e-1f, 1e-1f, INFINITY, {});
968
- }
969
- }
970
-
971
- // reshape (1d->nd)
972
- {
973
- srand(seed);
974
- const int nargs = 1;
975
-
976
- for (int ndims = 1; ndims <= 2; ++ndims) {
977
- int64_t ne2[4];
978
- ne2[0] = 1;
979
- ne2[1] = 1;
980
- ne2[2] = 1;
981
- ne2[3] = 1;
982
- for (int i = 0; i < ndims; ++i) {
983
- ne2[0] *= ne[i];
984
- }
985
- x[0] = get_random_tensor_f32(ctx0, 1, ne2, -1.0f, 1.0f);
986
- x[1] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f);
987
- ggml_set_param(ctx0, x[0]);
988
-
989
-
990
- struct ggml_tensor * f = ggml_sum(ctx0, ggml_reshape(ctx0, x[0], x[1]));
991
- check_gradient("reshape", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY, {});
992
- }
993
- }
994
-
995
- // reshape (nd->1d)
996
- {
997
- srand(seed);
998
- const int nargs = 1;
999
-
1000
- for (int ndims = 1; ndims <= 2; ++ndims) {
1001
- int64_t ne2[4];
1002
- ne2[0] = 1;
1003
- ne2[1] = 1;
1004
- ne2[2] = 1;
1005
- ne2[3] = 1;
1006
- for (int i = 0; i < ndims; ++i) {
1007
- ne2[0] *= ne[i];
1008
- }
1009
- x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f);
1010
- x[1] = get_random_tensor_f32(ctx0, 1, ne2, -1.0f, 1.0f);
1011
- ggml_set_param(ctx0, x[0]);
1012
-
1013
-
1014
- struct ggml_tensor * f = ggml_sum(ctx0, ggml_reshape(ctx0, x[0], x[1]));
1015
- check_gradient("reshape", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY, {});
1016
- }
1017
- }
1018
-
1019
- // acc 1d
1020
- {
1021
- srand(seed);
1022
- int64_t ne2[4] = { 1, 1, 1, 1 };
1023
-
1024
- const int nargs = 2;
1025
- for (int ndims = 1; ndims <= 4; ++ndims) {
1026
-
1027
- x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f);
1028
- ggml_set_param(ctx0, x[0]);
1029
-
1030
- get_random_dims(ne2, 1);
1031
- while ((ne2[0] > ne[0]) || (ne2[0] > ggml_nelements(x[0]))) {
1032
- get_random_dims(ne2, 1);
1033
- }
1034
-
1035
- x[1] = get_random_tensor_f32(ctx0, 1, ne2, -1.0f, 1.0f);
1036
- ggml_set_param(ctx0, x[1]);
1037
-
1038
- const int max_offset = MAX(0, ggml_nelements(x[0]) - ggml_nelements(x[1]));
1039
- const int offset = irand(max_offset) * ggml_element_size(x[0]);
1040
-
1041
- struct ggml_tensor * f = ggml_sum(ctx0, ggml_acc(ctx0, x[0], x[1], x[0]->nb[1], x[0]->nb[2], x[0]->nb[3], offset));
1042
-
1043
- check_gradient("acc 1d", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY, {});
1044
- }
1045
- }
1046
-
1047
- // acc 2d
1048
- {
1049
- srand(seed);
1050
- int64_t ne2[4] = { 1, 1, 1, 1 };
1051
- int64_t max_offsets[4] = { 0, 0, 0, 0 };
1052
- int64_t offsets[4] = { 0, 0, 0, 0 };
1053
-
1054
- const int nargs = 2;
1055
- for (int ndims = 2; ndims <= 4; ++ndims) {
1056
-
1057
- x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f);
1058
- ggml_set_param(ctx0, x[0]);
1059
-
1060
- get_random_dims(ne2, 2);
1061
- while ((ne2[0] > ne[0]) || (ne2[1] > ne[1]) || (ne2[0]*ne2[1] > ggml_nelements(x[0]))) {
1062
- get_random_dims(ne2, 2);
1063
- }
1064
-
1065
- x[1] = get_random_tensor_f32(ctx0, 2, ne2, -1.0f, 1.0f);
1066
- ggml_set_param(ctx0, x[1]);
1067
-
1068
- max_offsets[0] = MAX(0, x[0]->ne[0] - x[1]->ne[0]);
1069
- max_offsets[1] = MAX(0, x[0]->ne[1] - x[1]->ne[1]);
1070
- offsets[0] = irand(max_offsets[0]) * x[0]->nb[0];
1071
- offsets[1] = irand(max_offsets[1]) * x[0]->nb[1];
1072
- const int offset = offsets[0] + offsets[1];
1073
-
1074
- struct ggml_tensor * f = ggml_sum(ctx0, ggml_acc(ctx0, x[0], x[1], x[0]->nb[1], x[0]->nb[2], x[0]->nb[3], offset));
1075
-
1076
- check_gradient("acc 2d", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY, {});
1077
- }
1078
- }
1079
-
1080
- // acc 3d
1081
- {
1082
- srand(seed);
1083
- int64_t ne2[4] = { 1, 1, 1, 1 };
1084
- int64_t max_offsets[4] = { 0, 0, 0, 0 };
1085
- int64_t offsets[4] = { 0, 0, 0, 0 };
1086
-
1087
- const int nargs = 2;
1088
- for (int ndims = 3; ndims <= 4; ++ndims) {
1089
-
1090
- x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f);
1091
- ggml_set_param(ctx0, x[0]);
1092
-
1093
- get_random_dims(ne2, 3);
1094
- while ((ne2[0] > ne[0]) || (ne2[1] > ne[1]) || (ne2[2] > ne[2]) || (ne2[0]*ne2[1]*ne2[2] > ggml_nelements(x[0]))) {
1095
- get_random_dims(ne2, 3);
1096
- }
1097
-
1098
- x[1] = get_random_tensor_f32(ctx0, 3, ne2, -1.0f, 1.0f);
1099
- ggml_set_param(ctx0, x[1]);
1100
-
1101
- max_offsets[0] = MAX(0, x[0]->ne[0] - x[1]->ne[0]);
1102
- max_offsets[1] = MAX(0, x[0]->ne[1] - x[1]->ne[1]);
1103
- max_offsets[2] = MAX(0, x[0]->ne[2] - x[1]->ne[2]);
1104
- offsets[0] = irand(max_offsets[0]) * x[0]->nb[0];
1105
- offsets[1] = irand(max_offsets[1]) * x[0]->nb[1];
1106
- offsets[2] = irand(max_offsets[2]) * x[0]->nb[2];
1107
- const int offset = offsets[0] + offsets[1] + offsets[2];
1108
-
1109
- struct ggml_tensor * f = ggml_sum(ctx0, ggml_acc(ctx0, x[0], x[1], x[0]->nb[1], x[0]->nb[2], x[0]->nb[3], offset));
1110
-
1111
- check_gradient("acc 3d", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY, {});
1112
- }
1113
- }
1114
-
1115
- // acc 4d
1116
- {
1117
- srand(seed);
1118
- int64_t ne2[4] = { 1, 1, 1, 1 };
1119
- int64_t max_offsets[4] = { 0, 0, 0, 0 };
1120
- int64_t offsets[4] = { 0, 0, 0, 0 };
1121
-
1122
- const int nargs = 2;
1123
- for (int ndims = 4; ndims <= 4; ++ndims) {
1124
-
1125
- x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f);
1126
- ggml_set_param(ctx0, x[0]);
1127
-
1128
- get_random_dims(ne2, 4);
1129
- while ((ne2[0] > ne[0]) || (ne2[1] > ne[1]) || (ne2[2] > ne[2]) || (ne2[3] > ne[3]) || (ne2[0]*ne2[1]*ne2[2]*ne2[3] > ggml_nelements(x[0]))) {
1130
- get_random_dims(ne2, 4);
1131
- }
1132
-
1133
- x[1] = get_random_tensor_f32(ctx0, 4, ne2, -1.0f, 1.0f);
1134
- ggml_set_param(ctx0, x[1]);
1135
-
1136
- max_offsets[0] = MAX(0, x[0]->ne[0] - x[1]->ne[0]);
1137
- max_offsets[1] = MAX(0, x[0]->ne[1] - x[1]->ne[1]);
1138
- max_offsets[2] = MAX(0, x[0]->ne[2] - x[1]->ne[2]);
1139
- max_offsets[3] = MAX(0, x[0]->ne[3] - x[1]->ne[3]);
1140
- offsets[0] = irand(max_offsets[0]) * x[0]->nb[0];
1141
- offsets[1] = irand(max_offsets[1]) * x[0]->nb[1];
1142
- offsets[2] = irand(max_offsets[2]) * x[0]->nb[2];
1143
- offsets[3] = irand(max_offsets[3]) * x[0]->nb[3];
1144
- const int offset = offsets[0] + offsets[1] + offsets[2] + offsets[3];
1145
-
1146
- struct ggml_tensor * f = ggml_sum(ctx0, ggml_acc(ctx0, x[0], x[1], x[0]->nb[1], x[0]->nb[2], x[0]->nb[3], offset));
1147
-
1148
- check_gradient("acc 4d", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY, {});
1149
- }
1150
- }
1151
-
1152
- // set_1d
1153
- {
1154
- srand(seed);
1155
- int64_t ne2[4];
1156
-
1157
- const int nargs = 2;
1158
- for (int ndims = 1; ndims <= 4; ++ndims) {
1159
-
1160
- x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f);
1161
- ggml_set_param(ctx0, x[0]);
1162
-
1163
- get_random_dims(ne2, 1);
1164
- while ((ne2[0] > ne[0]) || (ne2[0] > ggml_nelements(x[0]))) {
1165
- get_random_dims(ne2, 1);
1166
- }
1167
-
1168
- x[1] = get_random_tensor_f32(ctx0, 1, ne2, -1.0f, 1.0f);
1169
- ggml_set_param(ctx0, x[1]);
1170
-
1171
- const int max_offset = MAX(0, ggml_nelements(x[0]) - ggml_nelements(x[1]));
1172
- const int offset = irand(max_offset) * ggml_element_size(x[0]);
1173
-
1174
- struct ggml_tensor * f = ggml_sum(ctx0, ggml_set_1d(ctx0, x[0], x[1], offset));
1175
-
1176
- check_gradient("set_1d", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY, {});
1177
- }
1178
- }
1179
-
1180
- // set_2d
1181
- {
1182
- srand(seed);
1183
- int64_t ne2[4];
1184
- int64_t max_offsets[4] = { 0, 0, 0, 0 };
1185
- int64_t offsets[4] = { 0, 0, 0, 0 };
1186
-
1187
- const int nargs = 1;
1188
- for (int ndims = 2; ndims <= 4; ++ndims) {
1189
-
1190
- x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f);
1191
- ggml_set_param(ctx0, x[0]);
1192
-
1193
- get_random_dims(ne2, 2);
1194
- while ((ne2[0] > ne[0]) || (ne2[1] > ne[1]) || (ne2[0]*ne2[1] > ggml_nelements(x[0]))) {
1195
- get_random_dims(ne2, 2);
1196
- }
1197
-
1198
- x[1] = get_random_tensor_f32(ctx0, 2, ne2, -1.0f, 1.0f);
1199
- ggml_set_param(ctx0, x[1]);
1200
-
1201
- max_offsets[0] = MAX(0, x[0]->ne[0] - x[1]->ne[0]);
1202
- max_offsets[1] = MAX(0, x[0]->ne[1] - x[1]->ne[1]);
1203
- offsets[0] = irand(max_offsets[0]) * x[0]->nb[0];
1204
- offsets[1] = irand(max_offsets[1]) * x[0]->nb[1];
1205
- const int offset = offsets[0] + offsets[1];
1206
-
1207
- struct ggml_tensor * f = ggml_sum(ctx0, ggml_set_2d(ctx0, x[0], x[1], x[1]->nb[1], offset));
1208
-
1209
- check_gradient("set_2d", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY, {});
1210
- }
1211
- }
1212
-
1213
- // view_1d
1214
- {
1215
- srand(seed);
1216
- const int nargs = 1;
1217
- for (int ndims = 1; ndims <= 4; ++ndims) {
1218
-
1219
- x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f);
1220
-
1221
- ggml_set_param(ctx0, x[0]);
1222
-
1223
- const int k0 = irand(ggml_nelements(x[0]));
1224
- const int k1 = irand(ggml_nelements(x[0]));
1225
- const int i0 = MIN(k0, k1);
1226
- const int i1 = MAX(k0, k1);
1227
-
1228
- const int offset = i0 * sizeof(float);
1229
- const int nelem = i1 - i0;
1230
-
1231
- struct ggml_tensor * f = ggml_sum(ctx0, ggml_view_1d(ctx0, x[0], nelem, offset));
1232
-
1233
- check_gradient("view_1d", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY, {});
1234
- }
1235
- }
1236
-
1237
- // view_2d
1238
- {
1239
- srand(seed);
1240
- int64_t ne2[4];
1241
- int64_t nb2[4];
1242
-
1243
- const int nargs = 1;
1244
- for (int ndims = 1; ndims <= 4; ++ndims) {
1245
-
1246
- x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f);
1247
-
1248
- get_random_dims(ne2, 2);
1249
- while (ne2[0]*ne2[1] > ggml_nelements(x[0])) {
1250
- get_random_dims(ne2, 2);
1251
- }
1252
- const int count = ne2[0]*ne2[1];
1253
-
1254
- nb2[0] = sizeof(float);
1255
- nb2[1] = nb2[0]*ne2[0];
1256
-
1257
- ggml_set_param(ctx0, x[0]);
1258
-
1259
- const int max_offset = ggml_nelements(x[0]) - count;
1260
- const int offset = irand(max_offset+1) * sizeof(float);
1261
-
1262
- struct ggml_tensor * f = ggml_sum(ctx0, ggml_view_2d(ctx0, x[0], ne2[0], ne2[1], nb2[1], offset));
1263
-
1264
- check_gradient("view_2d", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY, {});
1265
- }
1266
- }
1267
-
1268
- // view_3d
1269
- {
1270
- srand(seed);
1271
- int64_t ne2[4] = {1,1,1,1};
1272
- int64_t nb2[4] = {0,0,0,0};
1273
-
1274
- const int nargs = 1;
1275
- for (int ndims = 1; ndims <= 4; ++ndims) {
1276
-
1277
- x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f);
1278
-
1279
- get_random_dims(ne2, 3);
1280
- while (ne2[0]*ne2[1]*ne2[2] > ggml_nelements(x[0])) {
1281
- get_random_dims(ne2, 3);
1282
- }
1283
- const int count = ne2[0]*ne2[1]*ne2[2];
1284
-
1285
- nb2[0] = sizeof(float);
1286
- nb2[1] = nb2[0]*ne2[0];
1287
- nb2[2] = nb2[1]*ne2[1];
1288
-
1289
- ggml_set_param(ctx0, x[0]);
1290
-
1291
- const int max_offset = ggml_nelements(x[0]) - count;
1292
- const int offset = irand(max_offset+1) * sizeof(float);
1293
-
1294
- struct ggml_tensor * f = ggml_sum(ctx0, ggml_view_3d(ctx0, x[0], ne2[0], ne2[1], ne2[2], nb2[1], nb2[2], offset));
1295
-
1296
- check_gradient("view_3d", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY, {});
1297
- }
1298
- }
1299
-
1300
- // permute
1301
- {
1302
- srand(seed);
1303
- int64_t ne2[4];
1304
-
1305
- const int nargs = 1;
1306
- for (int ndims = 1; ndims <= 4; ++ndims)
1307
- {
1308
- // ggml_permute will set axes of dimensions below n_dims to 1.
1309
- // to make ggml_permute work correctly on all axes,
1310
- // the input tensor needs maximal n_dim of 4.
1311
- for (int i=0; i<ndims; ++i) {
1312
- ne2[i] = ne[i];
1313
- }
1314
- for (int i=ndims; i<4; ++i) {
1315
- ne2[i] = 1;
1316
- }
1317
- x[0] = get_random_tensor_f32(ctx0, 4, ne2, -1.0f, 1.0f);
1318
-
1319
- ggml_set_param(ctx0, x[0]);
1320
-
1321
- const int p = irand(NUM_PERMUTATIONS);
1322
- const int ax0 = all_permutations[p*4+0];
1323
- const int ax1 = all_permutations[p*4+1];
1324
- const int ax2 = all_permutations[p*4+2];
1325
- const int ax3 = all_permutations[p*4+3];
1326
-
1327
- // sum requires contiguous tensor rows
1328
- struct ggml_tensor * f = ggml_sum(ctx0, ggml_cont(ctx0, ggml_permute(ctx0, x[0], ax0, ax1, ax2, ax3)));
1329
-
1330
- check_gradient("permute", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY, {});
1331
- }
1332
- }
1333
-
1334
- // transpose
1335
- {
1336
- srand(seed);
1337
- int64_t ne2[4];
1338
-
1339
- const int nargs = 1;
1340
- for (int ndims = 1; ndims <= 4; ++ndims)
1341
- {
1342
- // ggml_transpose will set axes of dimensions below n_dims to 1.
1343
- // to make ggml_transpose work correctly on all axes,
1344
- // the input tensor needs maximal n_dim of 4.
1345
- for (int i=0; i<ndims; ++i) {
1346
- ne2[i] = ne[i];
1347
- }
1348
- for (int i=ndims; i<4; ++i) {
1349
- ne2[i] = 1;
1350
- }
1351
- x[0] = get_random_tensor_f32(ctx0, 4, ne2, -1.0f, 1.0f);
1352
-
1353
- ggml_set_param(ctx0, x[0]);
1354
-
1355
- // sum requires contiguous tensor rows
1356
- struct ggml_tensor * f = ggml_sum(ctx0, ggml_cont(ctx0, ggml_transpose(ctx0, x[0])));
1357
-
1358
- check_gradient("transpose", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY, {});
1359
- }
1360
- }
1361
-
1362
- // get_rows
1363
- {
1364
- srand(seed);
1365
- int64_t ne2[4] = {ne[0], ne[1], 1, 1};
1366
- int64_t ne3[4] = {1+irand(ne[1]), 1, 1, 1};
1367
- const int nargs = 1;
1368
- const int ndims = 2;
1369
- x[0] = get_random_tensor_f32(ctx0, ndims, ne2, -1.0f, 1.0f);
1370
- x[1] = get_random_tensor_i32(ctx0, 1, ne3, 0, ne2[1]);
1371
-
1372
- ggml_set_param(ctx0, x[0]);
1373
-
1374
- struct ggml_tensor * f = ggml_sum(ctx0, ggml_get_rows(ctx0, x[0], x[1]));
1375
-
1376
- check_gradient("get_rows", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY, {});
1377
- }
1378
-
1379
- // diag_mask_inf
1380
- {
1381
- srand(seed);
1382
- const int nargs = 1;
1383
- const int ndims = 2;
1384
-
1385
- x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f);
1386
- ggml_set_param(ctx0, x[0]);
1387
-
1388
- int n_past = irand(ne[0]);
1389
-
1390
- struct ggml_tensor * f = ggml_sum(ctx0, ggml_diag_mask_inf(ctx0, x[0], n_past));
1391
-
1392
- check_gradient("diag_mask_inf", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY, {});
1393
- }
1394
-
1395
- // diag_mask_zero
1396
- {
1397
- srand(seed);
1398
- const int nargs = 1;
1399
- const int ndims = 2;
1400
-
1401
- x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f);
1402
- ggml_set_param(ctx0, x[0]);
1403
-
1404
- int n_past = irand(ne[0]);
1405
-
1406
- struct ggml_tensor * f = ggml_sum(ctx0, ggml_diag_mask_zero(ctx0, x[0], n_past));
1407
-
1408
- check_gradient("diag_mask_zero", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY, {});
1409
- }
1410
-
1411
- // softmax
1412
- {
1413
- srand(seed);
1414
- const int nargs = 1;
1415
-
1416
- int64_t ne2[4];
1417
- get_random_dims(ne2, 4);
1418
-
1419
- for (int ndims = 1; ndims <= 3; ++ndims) {
1420
- x[0] = get_random_tensor_f32(ctx0, ndims, ne2, -1.0f, 1.0f);
1421
- ggml_set_param(ctx0, x[0]);
1422
-
1423
- float eps = 1e-6f;
1424
- // dont use only sum as aggregation, because sum of softmax is always 1 -> finite differences should not work
1425
- // instead use sum(log(soft_max()*(1-eps)+eps)); use eps to avoid log(0)
1426
- struct ggml_tensor * f = ggml_sum(ctx0,
1427
- ggml_log(ctx0,
1428
- ggml_add1(ctx0,
1429
- ggml_scale(ctx0,
1430
- ggml_soft_max(ctx0, x[0]),
1431
- 1.0f - eps),
1432
- ggml_new_f32(ctx0, eps))));
1433
-
1434
- check_gradient("softmax", ctx0, x, f, ndims, nargs, 1e-3f, 2e-1f, INFINITY, {});
1435
- // NOTE: softmax forward is computed using f16 table lookup instead of using actual expf, but backward assumes actual expf.
1436
- // this may result in different gradients too finite differences.
1437
- // when this test reports errors, first try to replace the table lookup with actual expf and test again to see if just that was the cause.
1438
- // if only the table lookup causes gradients to differ this is acceptable.
1439
- }
1440
- }
1441
-
1442
- // cross_entropy_loss
1443
- {
1444
- srand(seed);
1445
- const int nargs = 1;
1446
-
1447
- int64_t ne2[4];
1448
- get_random_dims(ne2, 4);
1449
-
1450
- for (int ndims = 1; ndims <= 4; ++ndims) {
1451
- x[0] = get_random_tensor_f32(ctx0, ndims, ne2, -1.0f, 1.0f);
1452
- x[1] = get_random_tensor_f32(ctx0, ndims, ne2, 0.0f, 1.0f);
1453
- // the second argument to cross_entropy_loss must sum up to 1 for each row
1454
- int nr = ggml_nrows(x[1]);
1455
- int nc = ggml_nelements(x[1]) / nr;
1456
- for (int ir = 0; ir < nr; ++ir) {
1457
- float sum = 0;
1458
- for (int ic = 0; ic < nc; ++ic) {
1459
- sum += ((float *) x[1]->data)[ic + ir*nc];
1460
- }
1461
- for (int ic = 0; ic < nc; ++ic) {
1462
- ((float *) x[1]->data)[ic + ir*nc] /= sum;
1463
- }
1464
- }
1465
- ggml_set_param(ctx0, x[0]);
1466
-
1467
- struct ggml_tensor * f = ggml_cross_entropy_loss(ctx0, x[0], x[1]);
1468
-
1469
- check_gradient("cross_entropy_loss", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY, {});
1470
- }
1471
- }
1472
-
1473
- // rope f32
1474
- {
1475
- srand(seed);
1476
- const int nargs = 1;
1477
-
1478
- int64_t ne2[4];
1479
- get_random_dims(ne2, 4);
1480
- ne2[0] += ne2[0] % 2;
1481
- int n_rot = ne2[0];
1482
-
1483
- for (int ndims = 3; ndims <= 4; ++ndims) {
1484
- for (int mode = 0; mode < 4; ++mode) {
1485
- for (int n_past = 1; n_past < ne2[2]; ++n_past) {
1486
- x[0] = get_random_tensor_f32(ctx0, ndims, ne2, -1.0f, 1.0f);
1487
-
1488
- struct ggml_tensor * p = ggml_new_tensor_1d(ctx0, GGML_TYPE_I32, ne2[2]);
1489
- for (int i = 0; i < ne2[2]; ++i) {
1490
- ((int32_t *) p->data)[i] = n_past + i;
1491
- }
1492
-
1493
- ggml_set_param(ctx0, x[0]);
1494
-
1495
- const bool skip_past = (mode & 1);
1496
- if (skip_past) {
1497
- // we have no past, so this would have to work on uninitialized memory.
1498
- // we only test the gradients here;
1499
- // skip_past should have no influence on gradient computation.
1500
- // so when other modes work, we assume that this does as well.
1501
- continue;
1502
- }
1503
-
1504
- struct ggml_tensor * f = ggml_sum(ctx0, ggml_rope(ctx0, x[0], p, n_rot, mode));
1505
-
1506
- GGML_PRINT_DEBUG("rope f32: n_past: %d n_rot: %d mode: %d\n", n_past, n_rot, mode);
1507
- check_gradient("rope f32", ctx0, x, f, ndims, nargs, 1e-2f, 1e-3f, INFINITY, {});
1508
- }
1509
- }
1510
- }
1511
- }
1512
-
1513
- // rope f16
1514
- {
1515
- srand(seed);
1516
- const int nargs = 1;
1517
-
1518
- int64_t ne2[4];
1519
- get_random_dims(ne2, 4);
1520
- ne2[0] += ne2[0] % 2;
1521
- int n_rot = ne2[0];
1522
-
1523
- for (int ndims = 3; ndims <= 4; ++ndims) {
1524
- for (int mode = 0; mode < 4; ++mode) {
1525
- for (int n_past = 1; n_past < ne2[2]; ++n_past) {
1526
- x[0] = get_random_tensor_f16(ctx0, ndims, ne2, -1.0f, 1.0f);
1527
-
1528
- struct ggml_tensor * p = ggml_new_tensor_1d(ctx0, GGML_TYPE_I32, ne2[2]);
1529
- for (int i = 0; i < ne2[2]; ++i) {
1530
- ((int32_t *) p->data)[i] = n_past + i;
1531
- }
1532
-
1533
- ggml_set_param(ctx0, x[0]);
1534
-
1535
- const bool skip_past = (mode & 1);
1536
- if (skip_past) {
1537
- // we have no past, so this would have to work on uninitialized memory.
1538
- // we only test the gradients here;
1539
- // skip_past should have no influence on gradient computation.
1540
- // so when other modes work, we assume that this does as well.
1541
- continue;
1542
- }
1543
-
1544
- struct ggml_tensor * f = ggml_sum(ctx0, ggml_rope(ctx0, x[0], p, n_rot, mode));
1545
-
1546
- GGML_PRINT_DEBUG("rope f16: n_past: %d n_rot: %d mode: %d\n", n_past, n_rot, mode);
1547
- check_gradient("rope f16", ctx0, x, f, ndims, nargs, 1e-1f, 1e-1f, INFINITY, {});
1548
- }
1549
- }
1550
- }
1551
- }
1552
-
1553
- // im2col f32
1554
- {
1555
- srand(seed);
1556
- const int nargs = 1;
1557
- const int ndims = 4;
1558
-
1559
- for (const bool is_2D : {false, true}) {
1560
- int64_t ne0[ndims];
1561
- int64_t ne1[ndims];
1562
- get_random_dims(ne0, ndims);
1563
- get_random_dims(ne1, ndims);
1564
-
1565
- // // Ensure that the output is not zero-sized:
1566
- ne1[0] += 8;
1567
- ne1[1] += 8;
1568
-
1569
- if (is_2D) {
1570
- ne1[2] = ne0[2];
1571
- } else {
1572
- ne1[1] = ne0[1];
1573
- ne0[3] = 1;
1574
- ne1[3] = 1;
1575
- }
1576
-
1577
- // The order of arguments is swapped because the first tensor is only used for its shape.
1578
- x[1] = get_random_tensor_f16(ctx0, ndims, ne0, -1.0f, 1.0f);
1579
- x[0] = get_random_tensor_f32(ctx0, ndims, ne1, -1.0f, 1.0f);
1580
-
1581
- ggml_set_param(ctx0, x[0]);
1582
-
1583
- const int s0 = 1 + irand(2);
1584
- const int s1 = is_2D ? 1 + irand(2) : 0;
1585
- const int p0 = 0 + irand(2);
1586
- const int p1 = is_2D ? 0 + irand(2) : 0;
1587
- const int d0 = 1 + irand(2);
1588
- const int d1 = is_2D ? 1 + irand(2) : 0;
1589
-
1590
- struct ggml_tensor * f = ggml_sum(ctx0, ggml_im2col(ctx0, x[1], x[0], s0, s1, p0, p1, d0, d1, is_2D, GGML_TYPE_F32));
1591
-
1592
- GGML_PRINT_DEBUG("im2col f32: is_2D=%s, s0=%d, s1=%d, p0=%d, p1=%d, d0=%d, d1=%d\n", is_2D ? "yes" : "no", s0, s1, p0, p1, d0, d1);
1593
- check_gradient("im2col f32", ctx0, x, f, ndims, nargs, 1e-2f, 1e-3f, INFINITY, {});
1594
- }
1595
- }
1596
-
1597
- // pool_2d f32
1598
- {
1599
- srand(seed);
1600
- const int nargs = 1;
1601
- const int ndims = 4;
1602
-
1603
- for (const enum ggml_op_pool op : {GGML_OP_POOL_AVG, GGML_OP_POOL_MAX}) {
1604
- int64_t ne0[ndims];
1605
- get_random_dims(ne0, ndims);
1606
-
1607
- ne0[0] += 8;
1608
- ne0[1] += 8;
1609
-
1610
- x[0] = get_random_tensor_f32(ctx0, ndims, ne0, -1.0f, 1.0f);
1611
-
1612
- ggml_set_param(ctx0, x[0]);
1613
-
1614
- const int k0 = 2 + irand(2);
1615
- const int k1 = 2 + irand(2);
1616
- const int s0 = 2 + irand(2);
1617
- const int s1 = 2 + irand(2);
1618
- const int p0 = 0 + irand(2);
1619
- const int p1 = 0 + irand(2);
1620
-
1621
- struct ggml_tensor * f = ggml_sum(ctx0, ggml_pool_2d(ctx0, x[0], op, k0, k1, s0, s1, p0, p1));
1622
-
1623
- GGML_PRINT_DEBUG("ggml_pool_2d f32: op=%s k0=%d, k1=%d, s0=%d, s1=%d, p0=%d, p1=%d\n",
1624
- op == GGML_OP_POOL_MAX ? "max" : "avg", k0, k1, s0, s1, p0, p1);
1625
- std::vector<double> expected_vals;
1626
- if (op == GGML_OP_POOL_MAX) {
1627
- expected_vals.push_back(0.0);
1628
- expected_vals.push_back(1.0);
1629
- }
1630
- check_gradient("ggml_pool_2d f32", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY, expected_vals);
1631
- }
1632
- }
1633
-
1634
- // flash_attn f32
1635
- // TODO: adapt to ggml_flash_attn_ext() changes
1636
- //{
1637
- // srand(seed);
1638
- // const int nargs = 3;
1639
-
1640
- // int64_t ne2[4];
1641
-
1642
- // get_random_dims(ne2, 4);
1643
- // int64_t D = ne2[0];
1644
- // int64_t N = ne2[1];
1645
- // int64_t M = ne2[2] + N;
1646
- // int64_t B = ne2[3];
1647
-
1648
- // for (int masked = 0; masked <= 1; ++masked) {
1649
- // for (int ndims = 2; ndims <= 4; ++ndims) {
1650
- // int max_nrep = (ndims >= 3) ? 2 : 1;
1651
- // for (int nrep = 1; nrep < max_nrep; ++nrep) {
1652
- // int64_t neq[4] = { D, N, B*nrep, ne[3] };
1653
- // int64_t nek[4] = { D, M, B, ne[3] };
1654
- // int64_t nev[4] = { M, D, B, ne[3] };
1655
- // if (ndims == 2) {
1656
- // neq[2] = 1; neq[3] = 1;
1657
- // nek[2] = 1; nek[3] = 1;
1658
- // nev[2] = 1; nev[3] = 1;
1659
- // } else if (ndims == 3) {
1660
- // neq[3] = 1;
1661
- // nek[3] = 1;
1662
- // nev[3] = 1;
1663
- // }
1664
- // x[0] = get_random_tensor_f32(ctx0, ndims, neq, -0.1250f, 0.1250f);
1665
- // x[1] = get_random_tensor_f32(ctx0, ndims, nek, -0.1250f, 0.1250f);
1666
- // x[2] = get_random_tensor_f32(ctx0, ndims, nev, -0.1250f, 0.1250f);
1667
- // ggml_set_param(ctx0, x[0]);
1668
- // ggml_set_param(ctx0, x[1]);
1669
- // ggml_set_param(ctx0, x[2]);
1670
-
1671
- // struct ggml_tensor * f = ggml_sum(ctx0, ggml_flash_attn(ctx0, x[0], x[1], x[2], (masked == 0)));
1672
-
1673
- // check_gradient("flash_attn f32", ctx0, x, f, ndims, nargs, 1.5e-4f, 1e-3f, INFINITY, {});
1674
- // }
1675
- // }
1676
- // }
1677
- //}
1678
-
1679
- ggml_free(ctx0);
1680
- }
1681
-
1682
- return 0;
1683
- }