@fugood/llama.node 0.3.1 → 0.3.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (252) hide show
  1. package/CMakeLists.txt +1 -8
  2. package/bin/darwin/arm64/llama-node.node +0 -0
  3. package/bin/darwin/x64/llama-node.node +0 -0
  4. package/bin/linux/arm64/llama-node.node +0 -0
  5. package/bin/linux/x64/llama-node.node +0 -0
  6. package/bin/linux-vulkan/arm64/llama-node.node +0 -0
  7. package/bin/linux-vulkan/x64/llama-node.node +0 -0
  8. package/bin/win32/arm64/llama-node.node +0 -0
  9. package/bin/win32/arm64/node.lib +0 -0
  10. package/bin/win32/x64/llama-node.node +0 -0
  11. package/bin/win32/x64/node.lib +0 -0
  12. package/bin/win32-vulkan/arm64/llama-node.node +0 -0
  13. package/bin/win32-vulkan/arm64/node.lib +0 -0
  14. package/bin/win32-vulkan/x64/llama-node.node +0 -0
  15. package/bin/win32-vulkan/x64/node.lib +0 -0
  16. package/package.json +4 -2
  17. package/src/DetokenizeWorker.cpp +1 -1
  18. package/src/EmbeddingWorker.cpp +2 -2
  19. package/src/LlamaCompletionWorker.cpp +10 -10
  20. package/src/LlamaCompletionWorker.h +2 -2
  21. package/src/LlamaContext.cpp +14 -17
  22. package/src/TokenizeWorker.cpp +1 -1
  23. package/src/common.hpp +5 -4
  24. package/src/llama.cpp/.github/workflows/build.yml +137 -29
  25. package/src/llama.cpp/.github/workflows/close-issue.yml +5 -0
  26. package/src/llama.cpp/.github/workflows/docker.yml +46 -34
  27. package/src/llama.cpp/.github/workflows/nix-ci-aarch64.yml +7 -0
  28. package/src/llama.cpp/.github/workflows/nix-ci.yml +7 -0
  29. package/src/llama.cpp/.github/workflows/python-check-requirements.yml +2 -4
  30. package/src/llama.cpp/.github/workflows/python-type-check.yml +3 -1
  31. package/src/llama.cpp/.github/workflows/server.yml +7 -0
  32. package/src/llama.cpp/CMakeLists.txt +26 -11
  33. package/src/llama.cpp/cmake/arm64-apple-clang.cmake +16 -0
  34. package/src/llama.cpp/common/CMakeLists.txt +10 -10
  35. package/src/llama.cpp/common/arg.cpp +2041 -0
  36. package/src/llama.cpp/common/arg.h +77 -0
  37. package/src/llama.cpp/common/common.cpp +523 -1861
  38. package/src/llama.cpp/common/common.h +234 -106
  39. package/src/llama.cpp/common/console.cpp +3 -0
  40. package/src/llama.cpp/common/json-schema-to-grammar.cpp +1 -1
  41. package/src/llama.cpp/common/log.cpp +401 -0
  42. package/src/llama.cpp/common/log.h +66 -698
  43. package/src/llama.cpp/common/ngram-cache.cpp +39 -36
  44. package/src/llama.cpp/common/ngram-cache.h +19 -19
  45. package/src/llama.cpp/common/sampling.cpp +356 -350
  46. package/src/llama.cpp/common/sampling.h +62 -139
  47. package/src/llama.cpp/common/stb_image.h +5990 -6398
  48. package/src/llama.cpp/docs/build.md +72 -17
  49. package/src/llama.cpp/examples/CMakeLists.txt +1 -2
  50. package/src/llama.cpp/examples/batched/batched.cpp +49 -65
  51. package/src/llama.cpp/examples/batched-bench/batched-bench.cpp +42 -53
  52. package/src/llama.cpp/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp +55 -52
  53. package/src/llama.cpp/examples/cvector-generator/cvector-generator.cpp +22 -22
  54. package/src/llama.cpp/examples/cvector-generator/pca.hpp +3 -13
  55. package/src/llama.cpp/examples/embedding/embedding.cpp +147 -91
  56. package/src/llama.cpp/examples/eval-callback/eval-callback.cpp +37 -37
  57. package/src/llama.cpp/examples/export-lora/export-lora.cpp +39 -38
  58. package/src/llama.cpp/examples/gbnf-validator/gbnf-validator.cpp +14 -39
  59. package/src/llama.cpp/examples/{baby-llama → gen-docs}/CMakeLists.txt +2 -2
  60. package/src/llama.cpp/examples/gen-docs/gen-docs.cpp +83 -0
  61. package/src/llama.cpp/examples/gguf-split/gguf-split.cpp +58 -39
  62. package/src/llama.cpp/examples/gritlm/gritlm.cpp +46 -39
  63. package/src/llama.cpp/examples/imatrix/imatrix.cpp +75 -69
  64. package/src/llama.cpp/examples/infill/infill.cpp +131 -192
  65. package/src/llama.cpp/examples/llama-bench/llama-bench.cpp +276 -178
  66. package/src/llama.cpp/examples/llama.android/llama/build.gradle.kts +1 -0
  67. package/src/llama.cpp/examples/llama.android/llama/src/main/cpp/llama-android.cpp +40 -36
  68. package/src/llama.cpp/examples/llava/CMakeLists.txt +7 -0
  69. package/src/llama.cpp/examples/llava/clip.cpp +686 -150
  70. package/src/llama.cpp/examples/llava/clip.h +11 -2
  71. package/src/llama.cpp/examples/llava/llava-cli.cpp +60 -71
  72. package/src/llama.cpp/examples/llava/llava.cpp +146 -26
  73. package/src/llama.cpp/examples/llava/llava.h +2 -3
  74. package/src/llama.cpp/examples/llava/minicpmv-cli.cpp +323 -0
  75. package/src/llama.cpp/examples/llava/requirements.txt +1 -0
  76. package/src/llama.cpp/examples/lookahead/lookahead.cpp +55 -56
  77. package/src/llama.cpp/examples/lookup/lookup-create.cpp +15 -13
  78. package/src/llama.cpp/examples/lookup/lookup-merge.cpp +4 -4
  79. package/src/llama.cpp/examples/lookup/lookup-stats.cpp +34 -33
  80. package/src/llama.cpp/examples/lookup/lookup.cpp +60 -63
  81. package/src/llama.cpp/examples/main/main.cpp +216 -313
  82. package/src/llama.cpp/examples/parallel/parallel.cpp +58 -59
  83. package/src/llama.cpp/examples/passkey/passkey.cpp +53 -61
  84. package/src/llama.cpp/examples/perplexity/perplexity.cpp +277 -311
  85. package/src/llama.cpp/examples/quantize/CMakeLists.txt +1 -1
  86. package/src/llama.cpp/examples/quantize/quantize.cpp +27 -9
  87. package/src/llama.cpp/examples/quantize-stats/quantize-stats.cpp +12 -12
  88. package/src/llama.cpp/examples/retrieval/retrieval.cpp +57 -52
  89. package/src/llama.cpp/examples/rpc/rpc-server.cpp +27 -2
  90. package/src/llama.cpp/examples/save-load-state/save-load-state.cpp +60 -46
  91. package/src/llama.cpp/examples/server/CMakeLists.txt +7 -18
  92. package/src/llama.cpp/examples/server/server.cpp +1347 -1531
  93. package/src/llama.cpp/examples/server/tests/requirements.txt +2 -1
  94. package/src/llama.cpp/examples/server/utils.hpp +396 -107
  95. package/src/llama.cpp/examples/simple/CMakeLists.txt +1 -1
  96. package/src/llama.cpp/examples/simple/simple.cpp +132 -106
  97. package/src/llama.cpp/examples/simple-chat/CMakeLists.txt +5 -0
  98. package/src/llama.cpp/examples/simple-chat/simple-chat.cpp +197 -0
  99. package/src/llama.cpp/examples/speculative/speculative.cpp +153 -124
  100. package/src/llama.cpp/examples/sycl/run-llama2.sh +10 -19
  101. package/src/llama.cpp/examples/sycl/win-run-llama2.bat +1 -1
  102. package/src/llama.cpp/examples/tokenize/tokenize.cpp +27 -29
  103. package/src/llama.cpp/ggml/CMakeLists.txt +29 -12
  104. package/src/llama.cpp/ggml/include/ggml-alloc.h +3 -3
  105. package/src/llama.cpp/ggml/include/ggml-amx.h +25 -0
  106. package/src/llama.cpp/ggml/include/ggml-backend.h +166 -68
  107. package/src/llama.cpp/ggml/include/ggml-blas.h +5 -3
  108. package/src/llama.cpp/ggml/include/ggml-cann.h +17 -19
  109. package/src/llama.cpp/ggml/include/ggml-cpp.h +38 -0
  110. package/src/llama.cpp/ggml/include/ggml-cpu.h +177 -0
  111. package/src/llama.cpp/ggml/include/ggml-cuda.h +17 -17
  112. package/src/llama.cpp/ggml/include/ggml-kompute.h +7 -3
  113. package/src/llama.cpp/ggml/include/ggml-metal.h +13 -12
  114. package/src/llama.cpp/ggml/include/ggml-opt.h +216 -0
  115. package/src/llama.cpp/ggml/include/ggml-rpc.h +9 -5
  116. package/src/llama.cpp/ggml/include/ggml-sycl.h +18 -11
  117. package/src/llama.cpp/ggml/include/ggml-vulkan.h +10 -8
  118. package/src/llama.cpp/ggml/include/ggml.h +272 -505
  119. package/src/llama.cpp/ggml/src/CMakeLists.txt +69 -1110
  120. package/src/llama.cpp/ggml/src/ggml-aarch64.c +52 -2116
  121. package/src/llama.cpp/ggml/src/ggml-aarch64.h +0 -20
  122. package/src/llama.cpp/ggml/src/ggml-alloc.c +29 -27
  123. package/src/llama.cpp/ggml/src/ggml-amx/CMakeLists.txt +107 -0
  124. package/src/llama.cpp/ggml/src/ggml-amx/common.h +94 -0
  125. package/src/llama.cpp/ggml/src/ggml-amx/ggml-amx.cpp +446 -0
  126. package/src/llama.cpp/ggml/src/ggml-amx/mmq.cpp +2510 -0
  127. package/src/llama.cpp/ggml/src/ggml-amx/mmq.h +17 -0
  128. package/src/llama.cpp/ggml/src/ggml-backend-impl.h +144 -81
  129. package/src/llama.cpp/ggml/src/ggml-backend-reg.cpp +195 -0
  130. package/src/llama.cpp/ggml/src/{ggml-backend.c → ggml-backend.cpp} +394 -635
  131. package/src/llama.cpp/ggml/src/ggml-blas/CMakeLists.txt +91 -0
  132. package/src/llama.cpp/ggml/src/{ggml-blas.cpp → ggml-blas/ggml-blas.cpp} +217 -70
  133. package/src/llama.cpp/ggml/src/ggml-cann/CMakeLists.txt +46 -0
  134. package/src/llama.cpp/ggml/src/ggml-cann/acl_tensor.cpp +4 -27
  135. package/src/llama.cpp/ggml/src/ggml-cann/acl_tensor.h +32 -4
  136. package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.cpp +179 -41
  137. package/src/llama.cpp/ggml/src/ggml-cann/common.h +1 -0
  138. package/src/llama.cpp/ggml/src/{ggml-cann.cpp → ggml-cann/ggml-cann.cpp} +458 -353
  139. package/src/llama.cpp/ggml/src/ggml-cann/kernels/CMakeLists.txt +2 -1
  140. package/src/llama.cpp/ggml/src/ggml-cann/kernels/ascendc_kernels.h +2 -0
  141. package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_float_to_q4_0.cpp +278 -0
  142. package/src/llama.cpp/ggml/src/ggml-common.h +20 -0
  143. package/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +261 -0
  144. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.c +3560 -0
  145. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-aarch64.h +30 -0
  146. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-impl.h +371 -0
  147. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.c +10822 -0
  148. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-quants.h +63 -0
  149. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +13970 -0
  150. package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp +663 -0
  151. package/src/llama.cpp/ggml/src/ggml-cpu/llamafile/sgemm.cpp +1885 -0
  152. package/src/llama.cpp/ggml/src/ggml-cuda/CMakeLists.txt +155 -0
  153. package/src/llama.cpp/ggml/src/ggml-cuda/vendors/cuda.h +14 -0
  154. package/src/llama.cpp/ggml/src/ggml-cuda/vendors/hip.h +178 -0
  155. package/src/llama.cpp/ggml/src/ggml-cuda/vendors/musa.h +134 -0
  156. package/src/llama.cpp/ggml/src/ggml-hip/CMakeLists.txt +106 -0
  157. package/src/llama.cpp/ggml/src/ggml-impl.h +380 -584
  158. package/src/llama.cpp/ggml/src/ggml-kompute/CMakeLists.txt +162 -0
  159. package/src/llama.cpp/ggml/src/{ggml-kompute.cpp → ggml-kompute/ggml-kompute.cpp} +233 -87
  160. package/src/llama.cpp/ggml/src/ggml-metal/CMakeLists.txt +108 -0
  161. package/src/llama.cpp/ggml/src/ggml-metal/ggml-metal-impl.h +249 -0
  162. package/src/llama.cpp/ggml/src/ggml-musa/CMakeLists.txt +100 -0
  163. package/src/llama.cpp/ggml/src/ggml-opt.cpp +867 -0
  164. package/src/llama.cpp/ggml/src/ggml-quants.c +369 -9994
  165. package/src/llama.cpp/ggml/src/ggml-quants.h +78 -110
  166. package/src/llama.cpp/ggml/src/ggml-rpc/CMakeLists.txt +11 -0
  167. package/src/llama.cpp/ggml/src/{ggml-rpc.cpp → ggml-rpc/ggml-rpc.cpp} +560 -335
  168. package/src/llama.cpp/ggml/src/ggml-sycl/CMakeLists.txt +81 -0
  169. package/src/llama.cpp/ggml/src/ggml-sycl/backend.hpp +6 -0
  170. package/src/llama.cpp/ggml/src/ggml-sycl/common.cpp +51 -0
  171. package/src/llama.cpp/ggml/src/ggml-sycl/common.hpp +310 -0
  172. package/src/llama.cpp/ggml/src/ggml-sycl/concat.cpp +1 -0
  173. package/src/llama.cpp/ggml/src/ggml-sycl/conv.cpp +99 -0
  174. package/src/llama.cpp/ggml/src/ggml-sycl/conv.hpp +21 -0
  175. package/src/llama.cpp/ggml/src/ggml-sycl/convert.cpp +57 -57
  176. package/src/llama.cpp/ggml/src/ggml-sycl/convert.hpp +1 -1
  177. package/src/llama.cpp/ggml/src/ggml-sycl/dequantize.hpp +106 -106
  178. package/src/llama.cpp/ggml/src/ggml-sycl/dmmv.cpp +4 -4
  179. package/src/llama.cpp/ggml/src/ggml-sycl/dpct/helper.hpp +18 -25
  180. package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.cpp +1011 -0
  181. package/src/llama.cpp/ggml/src/ggml-sycl/element_wise.hpp +76 -0
  182. package/src/llama.cpp/ggml/src/ggml-sycl/gemm.hpp +101 -0
  183. package/src/llama.cpp/ggml/src/{ggml-sycl.cpp → ggml-sycl/ggml-sycl.cpp} +3350 -3980
  184. package/src/llama.cpp/ggml/src/ggml-sycl/im2col.cpp +125 -0
  185. package/src/llama.cpp/ggml/src/ggml-sycl/im2col.hpp +23 -0
  186. package/src/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +70 -68
  187. package/src/llama.cpp/ggml/src/ggml-sycl/norm.cpp +9 -6
  188. package/src/llama.cpp/ggml/src/ggml-sycl/outprod.cpp +56 -0
  189. package/src/llama.cpp/ggml/src/ggml-sycl/outprod.hpp +11 -0
  190. package/src/llama.cpp/ggml/src/ggml-sycl/presets.hpp +8 -0
  191. package/src/llama.cpp/ggml/src/ggml-sycl/rope.cpp +1 -1
  192. package/src/llama.cpp/ggml/src/ggml-sycl/tsembd.cpp +71 -0
  193. package/src/llama.cpp/ggml/src/ggml-sycl/tsembd.hpp +21 -0
  194. package/src/llama.cpp/ggml/src/ggml-sycl/vecdotq.hpp +4 -4
  195. package/src/llama.cpp/ggml/src/ggml-sycl/wkv6.cpp +138 -0
  196. package/src/llama.cpp/ggml/src/ggml-sycl/wkv6.hpp +10 -0
  197. package/src/llama.cpp/ggml/src/ggml-threading.cpp +12 -0
  198. package/src/llama.cpp/ggml/src/ggml-threading.h +12 -0
  199. package/src/llama.cpp/ggml/src/ggml-vulkan/CMakeLists.txt +78 -0
  200. package/src/llama.cpp/ggml/src/{ggml-vulkan.cpp → ggml-vulkan/ggml-vulkan.cpp} +2034 -1718
  201. package/src/llama.cpp/ggml/src/{vulkan-shaders → ggml-vulkan/vulkan-shaders}/CMakeLists.txt +2 -0
  202. package/src/llama.cpp/ggml/src/{vulkan-shaders → ggml-vulkan/vulkan-shaders}/vulkan-shaders-gen.cpp +152 -185
  203. package/src/llama.cpp/ggml/src/ggml.c +2075 -16579
  204. package/src/llama.cpp/include/llama.h +296 -285
  205. package/src/llama.cpp/models/ggml-vocab-chameleon.gguf.inp +112 -0
  206. package/src/llama.cpp/models/ggml-vocab-chameleon.gguf.out +46 -0
  207. package/src/llama.cpp/pocs/vdot/q8dot.cpp +4 -3
  208. package/src/llama.cpp/pocs/vdot/vdot.cpp +8 -7
  209. package/src/llama.cpp/requirements/requirements-convert_legacy_llama.txt +1 -1
  210. package/src/llama.cpp/src/CMakeLists.txt +2 -1
  211. package/src/llama.cpp/src/llama-grammar.cpp +721 -122
  212. package/src/llama.cpp/src/llama-grammar.h +120 -15
  213. package/src/llama.cpp/src/llama-impl.h +156 -1
  214. package/src/llama.cpp/src/llama-sampling.cpp +2058 -346
  215. package/src/llama.cpp/src/llama-sampling.h +39 -47
  216. package/src/llama.cpp/src/llama-vocab.cpp +390 -127
  217. package/src/llama.cpp/src/llama-vocab.h +60 -20
  218. package/src/llama.cpp/src/llama.cpp +6215 -3263
  219. package/src/llama.cpp/src/unicode-data.cpp +6 -4
  220. package/src/llama.cpp/src/unicode-data.h +4 -4
  221. package/src/llama.cpp/src/unicode.cpp +15 -7
  222. package/src/llama.cpp/tests/CMakeLists.txt +4 -2
  223. package/src/llama.cpp/tests/test-arg-parser.cpp +131 -0
  224. package/src/llama.cpp/tests/test-backend-ops.cpp +1725 -297
  225. package/src/llama.cpp/tests/test-barrier.cpp +94 -0
  226. package/src/llama.cpp/tests/test-chat-template.cpp +9 -5
  227. package/src/llama.cpp/tests/test-grammar-integration.cpp +23 -38
  228. package/src/llama.cpp/tests/test-grammar-parser.cpp +6 -4
  229. package/src/llama.cpp/tests/test-json-schema-to-grammar.cpp +23 -8
  230. package/src/llama.cpp/tests/test-llama-grammar.cpp +9 -8
  231. package/src/llama.cpp/tests/test-log.cpp +39 -0
  232. package/src/llama.cpp/tests/test-opt.cpp +853 -142
  233. package/src/llama.cpp/tests/test-quantize-fns.cpp +28 -19
  234. package/src/llama.cpp/tests/test-quantize-perf.cpp +16 -14
  235. package/src/llama.cpp/tests/test-rope.cpp +2 -1
  236. package/src/llama.cpp/tests/test-sampling.cpp +226 -142
  237. package/src/llama.cpp/tests/test-tokenizer-0.cpp +56 -36
  238. package/src/llama.cpp/tests/test-tokenizer-1-bpe.cpp +5 -5
  239. package/src/llama.cpp/tests/test-tokenizer-1-spm.cpp +5 -5
  240. package/patches/llama.patch +0 -22
  241. package/src/llama.cpp/.github/workflows/bench.yml +0 -310
  242. package/src/llama.cpp/common/grammar-parser.cpp +0 -536
  243. package/src/llama.cpp/common/grammar-parser.h +0 -29
  244. package/src/llama.cpp/common/train.cpp +0 -1513
  245. package/src/llama.cpp/common/train.h +0 -233
  246. package/src/llama.cpp/examples/baby-llama/baby-llama.cpp +0 -1640
  247. package/src/llama.cpp/examples/benchmark/CMakeLists.txt +0 -6
  248. package/src/llama.cpp/examples/benchmark/benchmark-matmult.cpp +0 -275
  249. package/src/llama.cpp/ggml/src/llamafile/sgemm.cpp +0 -1027
  250. package/src/llama.cpp/tests/test-grad0.cpp +0 -1566
  251. /package/src/llama.cpp/ggml/{cmake → src/ggml-cpu/cmake}/FindSIMD.cmake +0 -0
  252. /package/src/llama.cpp/ggml/src/{llamafile → ggml-cpu/llamafile}/sgemm.h +0 -0
@@ -1,1566 +0,0 @@
1
- #define _CRT_SECURE_NO_DEPRECATE // Disables ridiculous "unsafe" warnings on Windows
2
- #include "ggml.h"
3
-
4
- #include <cmath>
5
- #include <cstdio>
6
- #include <cstdlib>
7
- #include <cassert>
8
-
9
- #if defined(_MSC_VER)
10
- #pragma warning(disable: 4244 4267) // possible loss of data
11
- #endif
12
-
13
- #if defined(__GNUC__)
14
- #pragma GCC diagnostic ignored "-Wdouble-promotion"
15
- #endif
16
-
17
- #define MAX_NARGS 3
18
-
19
- #undef MIN
20
- #undef MAX
21
- #define MIN(a, b) ((a) < (b) ? (a) : (b))
22
- #define MAX(a, b) ((a) > (b) ? (a) : (b))
23
-
24
- #define GGML_SILU_FP16
25
-
26
- //
27
- // logging
28
- //
29
-
30
- #if (GGML_DEBUG >= 1)
31
- #define GGML_PRINT_DEBUG(...) printf(__VA_ARGS__)
32
- #else
33
- #define GGML_PRINT_DEBUG(...)
34
- #endif
35
-
36
- #if (GGML_DEBUG >= 5)
37
- #define GGML_PRINT_DEBUG_5(...) printf(__VA_ARGS__)
38
- #else
39
- #define GGML_PRINT_DEBUG_5(...)
40
- #endif
41
-
42
- #if (GGML_DEBUG >= 10)
43
- #define GGML_PRINT_DEBUG_10(...) printf(__VA_ARGS__)
44
- #else
45
- #define GGML_PRINT_DEBUG_10(...)
46
- #endif
47
-
48
- #define GGML_PRINT(...) printf(__VA_ARGS__)
49
-
50
- static float frand(void) {
51
- return (float)rand()/(float)RAND_MAX;
52
- }
53
-
54
- static int irand(int n) {
55
- if (n == 0) return 0;
56
- return rand()%n;
57
- }
58
-
59
- static void get_random_dims(int64_t * dims, int ndims) {
60
- dims[0] = dims[1] = dims[2] = dims[3] = 1;
61
-
62
- for (int i = 0; i < ndims; i++) {
63
- dims[i] = 1 + irand(4);
64
- }
65
- }
66
-
67
- static struct ggml_tensor * get_random_tensor_f32(
68
- struct ggml_context * ctx0,
69
- int ndims,
70
- int64_t ne[],
71
- float fmin,
72
- float fmax) {
73
- struct ggml_tensor * result = ggml_new_tensor(ctx0, GGML_TYPE_F32, ndims, ne);
74
-
75
- switch (ndims) {
76
- case 1:
77
- for (int i0 = 0; i0 < ne[0]; i0++) {
78
- ((float *)result->data)[i0] = frand()*(fmax - fmin) + fmin;
79
- }
80
- break;
81
- case 2:
82
- for (int i1 = 0; i1 < ne[1]; i1++) {
83
- for (int i0 = 0; i0 < ne[0]; i0++) {
84
- ((float *)result->data)[i1*ne[0] + i0] = frand()*(fmax - fmin) + fmin;
85
- }
86
- }
87
- break;
88
- case 3:
89
- for (int i2 = 0; i2 < ne[2]; i2++) {
90
- for (int i1 = 0; i1 < ne[1]; i1++) {
91
- for (int i0 = 0; i0 < ne[0]; i0++) {
92
- ((float *)result->data)[i2*ne[1]*ne[0] + i1*ne[0] + i0] = frand()*(fmax - fmin) + fmin;
93
- }
94
- }
95
- }
96
- break;
97
- case 4:
98
- for (int i3 = 0; i3 < ne[3]; i3++) {
99
- for (int i2 = 0; i2 < ne[2]; i2++) {
100
- for (int i1 = 0; i1 < ne[1]; i1++) {
101
- for (int i0 = 0; i0 < ne[0]; i0++) {
102
- ((float *)result->data)[i3*ne[2]*ne[1]*ne[0] + i2*ne[1]*ne[0] + i1*ne[0] + i0] = frand()*(fmax - fmin) + fmin;
103
- }
104
- }
105
- }
106
- }
107
- break;
108
- default:
109
- assert(false);
110
- }
111
-
112
- return result;
113
- }
114
-
115
- static struct ggml_tensor * get_random_tensor_f16(
116
- struct ggml_context * ctx0,
117
- int ndims,
118
- int64_t ne[],
119
- float fmin,
120
- float fmax) {
121
- struct ggml_tensor * result = ggml_new_tensor(ctx0, GGML_TYPE_F16, ndims, ne);
122
-
123
- switch (ndims) {
124
- case 1:
125
- for (int i0 = 0; i0 < ne[0]; i0++) {
126
- ((ggml_fp16_t *)result->data)[i0] = ggml_fp32_to_fp16(frand()*(fmax - fmin) + fmin);
127
- }
128
- break;
129
- case 2:
130
- for (int i1 = 0; i1 < ne[1]; i1++) {
131
- for (int i0 = 0; i0 < ne[0]; i0++) {
132
- ((ggml_fp16_t *)result->data)[i1*ne[0] + i0] = ggml_fp32_to_fp16(frand()*(fmax - fmin) + fmin);
133
- }
134
- }
135
- break;
136
- case 3:
137
- for (int i2 = 0; i2 < ne[2]; i2++) {
138
- for (int i1 = 0; i1 < ne[1]; i1++) {
139
- for (int i0 = 0; i0 < ne[0]; i0++) {
140
- ((ggml_fp16_t *)result->data)[i2*ne[1]*ne[0] + i1*ne[0] + i0] = ggml_fp32_to_fp16(frand()*(fmax - fmin) + fmin);
141
- }
142
- }
143
- }
144
- break;
145
- case 4:
146
- for (int i3 = 0; i3 < ne[3]; i3++) {
147
- for (int i2 = 0; i2 < ne[2]; i2++) {
148
- for (int i1 = 0; i1 < ne[1]; i1++) {
149
- for (int i0 = 0; i0 < ne[0]; i0++) {
150
- ((ggml_fp16_t *)result->data)[i3*ne[2]*ne[1]*ne[0] + i2*ne[1]*ne[0] + i1*ne[0] + i0] = ggml_fp32_to_fp16(frand()*(fmax - fmin) + fmin);
151
- }
152
- }
153
- }
154
- }
155
- break;
156
- default:
157
- assert(false);
158
- }
159
-
160
- return result;
161
- }
162
-
163
- static struct ggml_tensor * get_random_tensor_i32(
164
- struct ggml_context * ctx0,
165
- int ndims,
166
- int64_t ne[],
167
- int32_t imin,
168
- int32_t imax) {
169
- struct ggml_tensor * result = ggml_new_tensor(ctx0, GGML_TYPE_I32, ndims, ne);
170
-
171
- switch (ndims) {
172
- case 1:
173
- for (int i0 = 0; i0 < ne[0]; i0++) {
174
- ((int32_t *)result->data)[i0] = irand(imax - imin) + imin;
175
- }
176
- break;
177
- case 2:
178
- for (int i1 = 0; i1 < ne[1]; i1++) {
179
- for (int i0 = 0; i0 < ne[0]; i0++) {
180
- ((int32_t *)result->data)[i1*ne[0] + i0] = irand(imax - imin) + imin;
181
- }
182
- }
183
- break;
184
- case 3:
185
- for (int i2 = 0; i2 < ne[2]; i2++) {
186
- for (int i1 = 0; i1 < ne[1]; i1++) {
187
- for (int i0 = 0; i0 < ne[0]; i0++) {
188
- ((int32_t *)result->data)[i2*ne[1]*ne[0] + i1*ne[0] + i0] = irand(imax - imin) + imin;
189
- }
190
- }
191
- }
192
- break;
193
- case 4:
194
- for (int i3 = 0; i3 < ne[3]; i3++) {
195
- for (int i2 = 0; i2 < ne[2]; i2++) {
196
- for (int i1 = 0; i1 < ne[1]; i1++) {
197
- for (int i0 = 0; i0 < ne[0]; i0++) {
198
- ((int32_t *)result->data)[i3*ne[2]*ne[1]*ne[0] + i2*ne[1]*ne[0] + i1*ne[0] + i0] = irand(imax - imin) + imin;
199
- }
200
- }
201
- }
202
- }
203
- break;
204
- default:
205
- assert(false);
206
- }
207
-
208
- return result;
209
- }
210
-
211
- static bool check_gradient(
212
- const char * op_name,
213
- struct ggml_context * ctx0,
214
- struct ggml_tensor * x[],
215
- struct ggml_tensor * f,
216
- int ndims,
217
- int nargs,
218
- float eps,
219
- float max_error_abs,
220
- float max_error_rel) {
221
-
222
- static int n_threads = -1;
223
- if (n_threads < 0) {
224
- n_threads = GGML_DEFAULT_N_THREADS;
225
-
226
- const char *env = getenv("GGML_N_THREADS");
227
- if (env) {
228
- n_threads = atoi(env);
229
- }
230
-
231
- printf("GGML_N_THREADS = %d\n", n_threads);
232
- }
233
-
234
- struct ggml_cgraph * gf = ggml_new_graph_custom(ctx0, GGML_DEFAULT_GRAPH_SIZE, true);
235
- struct ggml_cgraph * gb = ggml_new_graph_custom(ctx0, GGML_DEFAULT_GRAPH_SIZE, true);
236
- ggml_build_forward_expand(gf, f);
237
- ggml_graph_cpy(gf, gb);
238
- ggml_build_backward_expand(ctx0, gf, gb, false);
239
-
240
- ggml_graph_compute_with_ctx(ctx0, gf, n_threads);
241
-
242
- ggml_graph_reset (gf);
243
- ggml_set_f32 (f->grad, 1.0f);
244
-
245
- ggml_graph_compute_with_ctx(ctx0, gb, n_threads);
246
-
247
- // ggml_graph_dump_dot(gf, NULL, "test-grad0-forward.dot");
248
- // ggml_graph_dump_dot(gb, gf, "test-grad0-backward.dot");
249
-
250
- for (int i = 0; i < nargs; ++i) {
251
- const int nelements = ggml_nelements(x[i]);
252
- for (int k = 0; k < nelements; ++k) {
253
- // compute gradient using finite differences
254
- const float x0 = ggml_get_f32_1d(x[i], k);
255
- const float xm = x0 - eps;
256
- const float xp = x0 + eps;
257
- ggml_set_f32_1d(x[i], k, xp);
258
-
259
- ggml_graph_compute_with_ctx(ctx0, gf, n_threads);
260
-
261
- const double f0 = ggml_get_f32_1d(f, 0);
262
-
263
- ggml_set_f32_1d(x[i], k, xm);
264
-
265
- ggml_graph_compute_with_ctx(ctx0, gf, n_threads);
266
-
267
- const double f1 = ggml_get_f32_1d(f, 0);
268
- const double g0 = (f0 - f1)/(2.0*(double) eps);
269
-
270
- ggml_set_f32_1d(x[i], k, x0);
271
-
272
- // compute gradient using backward graph
273
- ggml_graph_reset (gf);
274
- ggml_set_f32 (f->grad, 1.0f);
275
-
276
- ggml_graph_compute_with_ctx(ctx0, gb, n_threads);
277
-
278
- const double g1 = ggml_get_f32_1d(x[i]->grad, k);
279
-
280
- const double error_abs = fabs(g0 - g1);
281
- const double error_rel = g0 != 0 ? fabs(g0 - g1)/fabs(g0) : 0;
282
-
283
- if (error_abs > max_error_abs || error_rel > max_error_rel) {
284
- printf("%s: ndims=%d, i=%d, k=%d, x0=%f, xm=%f, xp=%f, f0=%f, f1=%f, g0=%f, g1=%f, eps=%f, error_abs=%f, error_rel=%f\n",
285
- op_name, ndims, i, k, x0, xm, xp, f0, f1, g0, g1, eps, error_abs, error_rel);
286
- //assert(false);
287
- return false;
288
- }
289
- }
290
- }
291
-
292
- return true;
293
- }
294
-
295
- // TODO: clean-up this ..
296
- static bool check_mat_mul(
297
- const struct ggml_tensor * y,
298
- const struct ggml_tensor * x0,
299
- const struct ggml_tensor * x1) {
300
- float * dst = (float *) y->data;
301
- float * src0 = (float *) x0->data;
302
- float * src1 = (float *) x1->data;
303
-
304
- const int nc = x0->ne[1];
305
- const int nr = x1->ne[1];
306
- const int nk = x0->ne[0];
307
-
308
- GGML_PRINT_DEBUG("check_mat_mul: nc=%d, nr=%d, nk=%d\n", nc, nr, nk);
309
-
310
- GGML_PRINT_DEBUG("x0:\n");
311
- for (int j = 0; j < x0->ne[1]; ++j) {
312
- for (int i = 0; i < x0->ne[0]; ++i) {
313
- GGML_PRINT_DEBUG("%6.3f ", src0[j*nk + i]);
314
- }
315
- GGML_PRINT_DEBUG("\n");
316
- }
317
- GGML_PRINT_DEBUG("\n");
318
-
319
- GGML_PRINT_DEBUG("x1:\n");
320
- for (int j = 0; j < x1->ne[1]; ++j) {
321
- for (int i = 0; i < x1->ne[0]; ++i) {
322
- GGML_PRINT_DEBUG("%6.3f ", src1[j*nk + i]);
323
- }
324
- GGML_PRINT_DEBUG("\n");
325
- }
326
- GGML_PRINT_DEBUG("\n");
327
-
328
- GGML_PRINT_DEBUG("y: n_dims = %d, (%lld, %lld)\n", y->n_dims, y->ne[0], y->ne[1]);
329
- for (int j = 0; j < y->ne[1]; ++j) {
330
- for (int i = 0; i < y->ne[0]; ++i) {
331
- GGML_PRINT_DEBUG("%6.3f ", dst[j*nr + i]);
332
- }
333
- GGML_PRINT_DEBUG("\n");
334
- }
335
-
336
- for (int i = 0; i < nr; ++i) {
337
- for (int j = 0; j < nc; ++j) {
338
- float sum = 0.0f;
339
-
340
- for (int k = 0; k < nk; ++k) {
341
- sum += src0[j*nk + k]*src1[i*nk + k];
342
- }
343
-
344
- if (fabsf(dst[i*nc + j] - sum) > 1e-5f) {
345
- fprintf(stderr, "check_mat_mul: dst[%d] = %f, sum = %f\n", i*nc + j, dst[i*nc + j], sum);
346
- assert(false);
347
- return false;
348
- }
349
- }
350
- }
351
-
352
- return true;
353
- }
354
-
355
- #define NUM_PERMUTATIONS (4*3*2*1)
356
-
357
- int main(int argc, const char ** argv) {
358
- struct ggml_init_params params = {
359
- /* .mem_size = */ 256*1024*1024,
360
- /* .mem_buffer = */ NULL,
361
- /* .no_alloc = */ false,
362
- };
363
-
364
- int64_t ne[4];
365
-
366
- int all_permutations[4 * NUM_PERMUTATIONS];
367
- {
368
- int count = 0;
369
- for (int ax0=0; ax0<4; ++ax0) {
370
- for (int ax1=0; ax1<4; ++ax1) {
371
- if (ax1 == ax0) continue;
372
- for (int ax2=0; ax2<4; ++ax2) {
373
- if (ax2 == ax0) continue;
374
- if (ax2 == ax1) continue;
375
- for (int ax3=0; ax3<4; ++ax3) {
376
- if (ax3 == ax0) continue;
377
- if (ax3 == ax1) continue;
378
- if (ax3 == ax2) continue;
379
- assert(count < NUM_PERMUTATIONS);
380
- all_permutations[count*4+0] = ax0;
381
- all_permutations[count*4+1] = ax1;
382
- all_permutations[count*4+2] = ax2;
383
- all_permutations[count*4+3] = ax3;
384
- ++count;
385
- }
386
- }
387
- }
388
- }
389
- }
390
-
391
- unsigned seed_iter = 1;
392
-
393
- // original loop: 1000
394
- int niter = 4;
395
- const char *env = getenv("GGML_NLOOP");
396
- if (env != NULL) {
397
- niter = atoi(env);
398
- }
399
- if (argc > 1) {
400
- niter = atoi(argv[1]);
401
- }
402
- for (int iter = 0; iter < niter; ++iter) {
403
- srand(seed_iter);
404
- seed_iter = rand();
405
- unsigned seed = rand();
406
-
407
- printf("test-grad0: iter:%d/%d\n", iter, niter);
408
- struct ggml_context * ctx0 = ggml_init(params);
409
-
410
- get_random_dims(ne, 4);
411
-
412
- struct ggml_tensor * x[MAX_NARGS];
413
-
414
- // add f32
415
- {
416
- srand(seed);
417
- const int nargs = 2;
418
-
419
- for (int ndims = 1; ndims <= 4; ++ndims) {
420
- for (int i = 0; i < nargs; ++i) {
421
- x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f);
422
- ggml_set_param(ctx0, x[i]);
423
- }
424
-
425
- struct ggml_tensor * f = ggml_sum(ctx0, ggml_add(ctx0, x[0], x[1]));
426
-
427
- check_gradient("add f32", ctx0, x, f, ndims, nargs, 1e-3f, 2e-3f, 2e-3f);
428
- }
429
- }
430
-
431
- // add f16
432
- {
433
- srand(seed);
434
- const int nargs = 2;
435
-
436
- for (int ndims = 1; ndims <= 4; ++ndims) {
437
- for (int i = 0; i < nargs; ++i) {
438
- x[i] = get_random_tensor_f16(ctx0, ndims, ne, -1.0f, 1.0f);
439
- ggml_set_param(ctx0, x[i]);
440
- }
441
-
442
- struct ggml_tensor * f = ggml_sum(ctx0, ggml_add(ctx0, x[0], x[1]));
443
-
444
- check_gradient("add f16", ctx0, x, f, ndims, nargs, 1e-1f, 2e-1f, 2e-1f);
445
- }
446
- }
447
-
448
- // sub
449
- {
450
- srand(seed);
451
- const int nargs = 2;
452
-
453
- for (int ndims = 1; ndims <= 4; ++ndims) {
454
- for (int i = 0; i < nargs; ++i) {
455
- x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f);
456
- ggml_set_param(ctx0, x[i]);
457
- }
458
-
459
- struct ggml_tensor * f = ggml_sum(ctx0, ggml_sub(ctx0, x[0], x[1]));
460
-
461
- check_gradient("sub", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, 1e-3f);
462
- }
463
- }
464
-
465
- // mul
466
- {
467
- srand(seed);
468
- const int nargs = 2;
469
-
470
- for (int ndims = 1; ndims <= 4; ++ndims) {
471
- for (int i = 0; i < nargs; ++i) {
472
- x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f);
473
- ggml_set_param(ctx0, x[i]);
474
- }
475
-
476
- struct ggml_tensor * f = ggml_sum(ctx0, ggml_mul(ctx0, x[0], x[1]));
477
-
478
- check_gradient("mul", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY);
479
- }
480
- }
481
-
482
- // div
483
- {
484
- srand(seed);
485
- const int nargs = 2;
486
-
487
- for (int ndims = 1; ndims <= 4; ++ndims) {
488
- for (int i = 0; i < nargs; ++i) {
489
- x[i] = get_random_tensor_f32(ctx0, ndims, ne, 0.5f, 1.0f);
490
- ggml_set_param(ctx0, x[i]);
491
- }
492
-
493
- struct ggml_tensor * f = ggml_sum(ctx0, ggml_div(ctx0, x[0], x[1]));
494
-
495
- check_gradient("div", ctx0, x, f, ndims, nargs, 1e-3f, 1e-1f, 1e-1f);
496
- }
497
- }
498
-
499
- // sqr
500
- {
501
- srand(seed);
502
- const int nargs = 1;
503
-
504
- for (int ndims = 1; ndims <= 2; ++ndims) {
505
- for (int i = 0; i < nargs; ++i) {
506
- x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f);
507
- ggml_set_param(ctx0, x[i]);
508
- }
509
-
510
- struct ggml_tensor * f = ggml_sum(ctx0, ggml_sqr(ctx0, x[0]));
511
-
512
- check_gradient("sqr", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY);
513
- }
514
- }
515
-
516
- // sqrt
517
- {
518
- srand(seed);
519
- const int nargs = 1;
520
-
521
- for (int ndims = 1; ndims <= 2; ++ndims) {
522
- for (int i = 0; i < nargs; ++i) {
523
- x[i] = get_random_tensor_f32(ctx0, ndims, ne, 2.0f*1e-3f, 1.0f);
524
- ggml_set_param(ctx0, x[i]);
525
- }
526
-
527
- struct ggml_tensor * f = ggml_sum(ctx0, ggml_sqrt(ctx0, x[0]));
528
-
529
- check_gradient("sqrt", ctx0, x, f, ndims, nargs, 1e-3f, 2e-2f, 1e-1f);
530
- }
531
- }
532
-
533
- // log
534
- {
535
- srand(seed);
536
- const int nargs = 1;
537
-
538
- for (int ndims = 1; ndims <= 2; ++ndims) {
539
- for (int i = 0; i < nargs; ++i) {
540
- x[i] = get_random_tensor_f32(ctx0, ndims, ne, 2.0f*1e-3f, 1.0f);
541
- ggml_set_param(ctx0, x[i]);
542
- }
543
-
544
- struct ggml_tensor * f = ggml_sum(ctx0, ggml_log(ctx0, x[0]));
545
-
546
- check_gradient("log", ctx0, x, f, ndims, nargs, 1e-3f, INFINITY, 1e-1f);
547
- }
548
- }
549
-
550
- // sum
551
- {
552
- srand(seed);
553
- const int nargs = 1;
554
-
555
- for (int ndims = 1; ndims <= 2; ++ndims) {
556
- for (int i = 0; i < nargs; ++i) {
557
- x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f);
558
- ggml_set_param(ctx0, x[i]);
559
- }
560
-
561
- struct ggml_tensor * f = ggml_sum(ctx0, x[0]);
562
-
563
- check_gradient("sum", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, 1e-3f);
564
- }
565
- }
566
-
567
-
568
- // sum_rows
569
- {
570
- srand(seed);
571
- const int nargs = 1;
572
-
573
- for (int ndims = 1; ndims <= 4; ++ndims) {
574
- for (int i = 0; i < nargs; ++i) {
575
- x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f);
576
- ggml_set_param(ctx0, x[i]);
577
- }
578
-
579
- struct ggml_tensor * f = ggml_sum(ctx0, ggml_sqr(ctx0, ggml_sum_rows(ctx0, x[0])));
580
-
581
- check_gradient("sum_rows", ctx0, x, f, ndims, nargs, 1e-3f, 1e-2f, INFINITY);
582
- }
583
- }
584
-
585
- // mean, not yet fully implemented
586
- if(0)
587
- {
588
- srand(seed);
589
- const int nargs = 1;
590
-
591
- for (int ndims = 1; ndims <= 4; ++ndims) {
592
- for (int i = 0; i < nargs; ++i) {
593
- x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f);
594
- ggml_set_param(ctx0, x[i]);
595
- }
596
-
597
- struct ggml_tensor * f = ggml_sum(ctx0, ggml_mean(ctx0, x[0]));
598
-
599
- check_gradient("mean", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, 1e-3f);
600
- }
601
- }
602
-
603
- // argmax
604
- if (0)
605
- {
606
- srand(seed);
607
- const int nargs = 1;
608
-
609
- for (int ndims = 1; ndims <= 4; ++ndims) {
610
- for (int i = 0; i < nargs; ++i) {
611
- x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f);
612
- ggml_set_param(ctx0, x[i]);
613
- }
614
-
615
- struct ggml_tensor * f = ggml_sum(ctx0, ggml_argmax(ctx0, x[0]));
616
-
617
- check_gradient("argmax", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, 1e-3f);
618
- }
619
- }
620
-
621
- // repeat
622
- {
623
- srand(seed);
624
- int64_t ne2[4];
625
- get_random_dims(ne2, 4);
626
-
627
- ne2[0] = ne[0] * ne2[0];
628
- ne2[1] = ne[1] * ne2[1];
629
- ne2[2] = 1;
630
- ne2[3] = 1;
631
-
632
- const int nargs = 1;
633
- for (int ndims = 1; ndims <= 2; ++ndims) {
634
- x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f);
635
- x[1] = get_random_tensor_f32(ctx0, ndims, ne2, -1.0f, 1.0f);
636
- ggml_set_param(ctx0, x[0]);
637
-
638
- struct ggml_tensor * f = ggml_sum(ctx0, ggml_sqr(ctx0, ggml_sub(ctx0, x[1], ggml_repeat(ctx0, x[0], x[1]))));
639
-
640
- check_gradient("repeat", ctx0, x, f, ndims, nargs, 1e-3f, 1e-2f, INFINITY);
641
- }
642
- }
643
-
644
- // repeat back
645
- {
646
- srand(seed);
647
- int64_t ne2[4];
648
- get_random_dims(ne2, 4);
649
-
650
- ne2[0] = ne[0] * ne2[0];
651
- ne2[1] = ne[1] * ne2[1];
652
- ne2[2] = 1;
653
- ne2[3] = 1;
654
-
655
- const int nargs = 1;
656
- for (int ndims = 1; ndims <= 2; ++ndims) {
657
- x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f);
658
- x[1] = get_random_tensor_f32(ctx0, ndims, ne2, -1.0f, 1.0f);
659
- ggml_set_param(ctx0, x[0]);
660
-
661
- struct ggml_tensor * f = ggml_sum(ctx0, ggml_sqr(ctx0, ggml_sub(ctx0, x[0], ggml_repeat_back(ctx0, x[1], x[0]))));
662
-
663
- check_gradient("repeat back", ctx0, x, f, ndims, nargs, 1e-3f, 1e-2f, INFINITY);
664
- }
665
- }
666
-
667
- // abs (finite differences do not work)
668
- //{
669
- // const int nargs = 1;
670
-
671
- // for (int ndims = 1; ndims <= 2; ++ndims) {
672
- // for (int i = 0; i < nargs; ++i) {
673
- // x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f);
674
- // ggml_set_param(ctx0, x[i]);
675
- // }
676
-
677
- // struct ggml_tensor * f = ggml_sum(ctx0, ggml_abs(ctx0, x[0]));
678
-
679
- // check_gradient("abs", ctx0, x, f, ndims, nargs, 1e-3f, INFINITY, 1e-3f);
680
- // }
681
- //}
682
-
683
- // sgn
684
- {
685
- srand(seed);
686
- const int nargs = 1;
687
-
688
- for (int ndims = 1; ndims <= 4; ++ndims) {
689
- for (int i = 0; i < nargs; ++i) {
690
- x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f);
691
- ggml_set_param(ctx0, x[i]);
692
- }
693
-
694
- struct ggml_tensor* f = ggml_sum(ctx0, ggml_sgn(ctx0, x[0]));
695
-
696
- check_gradient("sgn", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, 1e-3f);
697
- }
698
- }
699
-
700
- // neg
701
- {
702
- srand(seed);
703
- const int nargs = 1;
704
-
705
- for (int ndims = 1; ndims <= 4; ++ndims) {
706
- for (int i = 0; i < nargs; ++i) {
707
- x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f);
708
- ggml_set_param(ctx0, x[i]);
709
- }
710
-
711
- struct ggml_tensor* f = ggml_sum(ctx0, ggml_neg(ctx0, x[0]));
712
-
713
- check_gradient("neg", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, 1e-3f);
714
- }
715
- }
716
-
717
- // step
718
- {
719
- srand(seed);
720
- const int nargs = 1;
721
-
722
- for (int ndims = 1; ndims <= 4; ++ndims) {
723
- for (int i = 0; i < nargs; ++i) {
724
- x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f);
725
- ggml_set_param(ctx0, x[i]);
726
- }
727
-
728
- struct ggml_tensor* f = ggml_sum(ctx0, ggml_step(ctx0, x[0]));
729
-
730
- check_gradient("step", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, 1e-3f);
731
- }
732
- }
733
-
734
- // tanh, not yet fully implemented
735
- if(0)
736
- {
737
- srand(seed);
738
- const int nargs = 1;
739
-
740
- for (int ndims = 1; ndims <= 4; ++ndims) {
741
- for (int i = 0; i < nargs; ++i) {
742
- x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f);
743
- ggml_set_param(ctx0, x[i]);
744
- }
745
-
746
- struct ggml_tensor* f = ggml_sum(ctx0, ggml_tanh(ctx0, x[0]));
747
-
748
- check_gradient("tanh", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, 1e-3f);
749
- }
750
- }
751
-
752
- // mul_mat
753
- {
754
- srand(seed);
755
- const int nargs = 2;
756
-
757
- for (int ndims = 2; ndims <= 4; ++ndims) {
758
- int max_nrep = (ndims >= 3) ? 2 : 1;
759
- x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f);
760
- for (int nrep2 = 1; nrep2 < max_nrep; ++nrep2) {
761
- for (int nrep3 = 1; nrep3 < max_nrep; ++nrep3) {
762
- {
763
- int64_t ne2[4];
764
- get_random_dims(ne2, 4);
765
- ne2[0] = ne[0];
766
- ne2[2] = nrep2 * ne[2];
767
- ne2[3] = nrep3 * ne[3];
768
- x[1] = get_random_tensor_f32(ctx0, ndims, ne2, -1.0f, 1.0f);
769
- }
770
-
771
- ggml_set_param(ctx0, x[0]);
772
- ggml_set_param(ctx0, x[1]);
773
-
774
- struct ggml_tensor * m = ggml_mul_mat(ctx0, x[1], x[0]);
775
- struct ggml_tensor * f = ggml_sum(ctx0, m);
776
-
777
- GGML_PRINT_DEBUG("testing: mul_mat, [%lld, %lld] (%d) * [%lld, %lld] (%d)\n", x[1]->ne[0], x[1]->ne[1], x[1]->n_dims, x[0]->ne[0], x[0]->ne[1], x[0]->n_dims);
778
-
779
- check_gradient("mul_mat", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY);
780
- if (ndims == 2) {
781
- // check_mat_mul does not support ndims > 2
782
- check_mat_mul(m, x[1], x[0]);
783
- }
784
- }
785
- }
786
- }
787
- }
788
-
789
- // elu, not yet fully implemented
790
- if(0)
791
- {
792
- srand(seed);
793
- const int nargs = 1;
794
-
795
- for (int ndims = 1; ndims <= 4; ++ndims) {
796
- for (int i = 0; i < nargs; ++i) {
797
- x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f);
798
- ggml_set_param(ctx0, x[i]);
799
- }
800
-
801
- struct ggml_tensor* f = ggml_sum(ctx0, ggml_elu(ctx0, x[0]));
802
-
803
- check_gradient("elu", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, 1e-3f);
804
- }
805
- }
806
-
807
- // relu
808
- {
809
- srand(seed);
810
- const int nargs = 1;
811
-
812
- for (int ndims = 1; ndims <= 4; ++ndims) {
813
- for (int i = 0; i < nargs; ++i) {
814
- x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f);
815
- ggml_set_param(ctx0, x[i]);
816
- }
817
-
818
- struct ggml_tensor* f = ggml_sum(ctx0, ggml_relu(ctx0, x[0]));
819
-
820
- check_gradient("relu", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY);
821
- }
822
- }
823
-
824
- // gelu, not yet fully implemented
825
- if(0)
826
- {
827
- srand(seed);
828
- const int nargs = 1;
829
-
830
- for (int ndims = 1; ndims <= 4; ++ndims) {
831
- for (int i = 0; i < nargs; ++i) {
832
- x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f);
833
- ggml_set_param(ctx0, x[i]);
834
- }
835
-
836
- struct ggml_tensor* f = ggml_sum(ctx0, ggml_gelu(ctx0, x[0]));
837
-
838
- check_gradient("gelu", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, 1e-3f);
839
- }
840
- }
841
-
842
- // silu
843
- {
844
- srand(seed);
845
- const int nargs = 1;
846
-
847
- for (int ndims = 1; ndims <= 2; ++ndims) {
848
- for (int i = 0; i < nargs; ++i) {
849
- x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f);
850
- ggml_set_param(ctx0, x[i]);
851
- }
852
-
853
- struct ggml_tensor * f = ggml_sum(ctx0, ggml_silu(ctx0, x[0]));
854
-
855
- #ifdef GGML_SILU_FP16
856
- // due to GGML_SILU_FP16 the finite difference method will be slightly wrong -> increase error bounds.
857
- check_gradient("silu", ctx0, x, f, ndims, nargs, 1e-3f, 0.5, INFINITY);
858
- #else
859
- check_gradient("silu", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY);
860
- #endif
861
- }
862
- }
863
-
864
- // rms_norm
865
- {
866
- srand(seed);
867
- const int nargs = 1;
868
-
869
- for (int ndims = 1; ndims <= 2; ++ndims) {
870
- for (int i = 0; i < nargs; ++i) {
871
- x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f);
872
- ggml_set_param(ctx0, x[i]);
873
- }
874
-
875
- struct ggml_tensor * f = ggml_sum(ctx0, ggml_rms_norm(ctx0, x[0], 1e-6f));
876
-
877
- check_gradient("rms_norm", ctx0, x, f, ndims, nargs, 1e-4f, 1.0f, INFINITY);
878
- }
879
- }
880
-
881
- // scale
882
- {
883
- srand(seed);
884
- const int nargs = 1;
885
-
886
- for (int ndims = 1; ndims <= 2; ++ndims) {
887
- x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f);
888
-
889
- const float s = -1.0f + 2.0f*frand();
890
-
891
- ggml_set_param(ctx0, x[0]);
892
-
893
- struct ggml_tensor * f = ggml_sum(ctx0, ggml_scale(ctx0, x[0], s));
894
-
895
- check_gradient("scale", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY);
896
- }
897
- }
898
-
899
- // cpy f32
900
- {
901
- srand(seed);
902
- const int nargs = 2;
903
-
904
- for (int ndims = 1; ndims <= 2; ++ndims) {
905
- for (int i = 0; i < nargs; ++i) {
906
- x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f);
907
- ggml_set_param(ctx0, x[i]);
908
- }
909
- // x[1] is overwritten by x[0], so the gradients don't propagate to x[1]
910
-
911
- struct ggml_tensor * f = ggml_sum(ctx0, ggml_cpy(ctx0, x[0], x[1]));
912
-
913
- check_gradient("cpy f32", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY);
914
- }
915
- }
916
-
917
- // cpy f16
918
- {
919
- srand(seed);
920
- const int nargs = 2;
921
-
922
- for (int ndims = 1; ndims <= 2; ++ndims) {
923
- for (int i = 0; i < nargs; ++i) {
924
- x[i] = get_random_tensor_f16(ctx0, ndims, ne, -1.0f, 1.0f);
925
- ggml_set_param(ctx0, x[i]);
926
- }
927
- // x[1] is overwritten by x[0], so the gradients don't propagate to x[1]
928
-
929
- struct ggml_tensor * f = ggml_sum(ctx0, ggml_cpy(ctx0, x[0], x[1]));
930
-
931
- check_gradient("cpy f16", ctx0, x, f, ndims, nargs, 1e-1f, 1e-1f, INFINITY);
932
- }
933
- }
934
-
935
- // reshape (1d->nd)
936
- {
937
- srand(seed);
938
- const int nargs = 1;
939
-
940
- for (int ndims = 1; ndims <= 2; ++ndims) {
941
- int64_t ne2[4];
942
- ne2[0] = 1;
943
- ne2[1] = 1;
944
- ne2[2] = 1;
945
- ne2[3] = 1;
946
- for (int i = 0; i < ndims; ++i) {
947
- ne2[0] *= ne[i];
948
- }
949
- x[0] = get_random_tensor_f32(ctx0, 1, ne2, -1.0f, 1.0f);
950
- x[1] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f);
951
- ggml_set_param(ctx0, x[0]);
952
-
953
-
954
- struct ggml_tensor * f = ggml_sum(ctx0, ggml_reshape(ctx0, x[0], x[1]));
955
- check_gradient("reshape", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY);
956
- }
957
- }
958
-
959
- // reshape (nd->1d)
960
- {
961
- srand(seed);
962
- const int nargs = 1;
963
-
964
- for (int ndims = 1; ndims <= 2; ++ndims) {
965
- int64_t ne2[4];
966
- ne2[0] = 1;
967
- ne2[1] = 1;
968
- ne2[2] = 1;
969
- ne2[3] = 1;
970
- for (int i = 0; i < ndims; ++i) {
971
- ne2[0] *= ne[i];
972
- }
973
- x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f);
974
- x[1] = get_random_tensor_f32(ctx0, 1, ne2, -1.0f, 1.0f);
975
- ggml_set_param(ctx0, x[0]);
976
-
977
-
978
- struct ggml_tensor * f = ggml_sum(ctx0, ggml_reshape(ctx0, x[0], x[1]));
979
- check_gradient("reshape", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY);
980
- }
981
- }
982
-
983
- // acc 1d
984
- {
985
- srand(seed);
986
- int64_t ne2[4] = { 1, 1, 1, 1 };
987
-
988
- const int nargs = 2;
989
- for (int ndims = 1; ndims <= 4; ++ndims) {
990
-
991
- x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f);
992
- ggml_set_param(ctx0, x[0]);
993
-
994
- get_random_dims(ne2, 1);
995
- while ((ne2[0] > ne[0]) || (ne2[0] > ggml_nelements(x[0]))) {
996
- get_random_dims(ne2, 1);
997
- }
998
-
999
- x[1] = get_random_tensor_f32(ctx0, 1, ne2, -1.0f, 1.0f);
1000
- ggml_set_param(ctx0, x[1]);
1001
-
1002
- const int max_offset = MAX(0, ggml_nelements(x[0]) - ggml_nelements(x[1]));
1003
- const int offset = irand(max_offset) * ggml_element_size(x[0]);
1004
-
1005
- struct ggml_tensor * f = ggml_sum(ctx0, ggml_acc(ctx0, x[0], x[1], x[0]->nb[1], x[0]->nb[2], x[0]->nb[3], offset));
1006
-
1007
- check_gradient("acc 1d", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY);
1008
- }
1009
- }
1010
-
1011
- // acc 2d
1012
- {
1013
- srand(seed);
1014
- int64_t ne2[4] = { 1, 1, 1, 1 };
1015
- int64_t max_offsets[4] = { 0, 0, 0, 0 };
1016
- int64_t offsets[4] = { 0, 0, 0, 0 };
1017
-
1018
- const int nargs = 2;
1019
- for (int ndims = 2; ndims <= 4; ++ndims) {
1020
-
1021
- x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f);
1022
- ggml_set_param(ctx0, x[0]);
1023
-
1024
- get_random_dims(ne2, 2);
1025
- while ((ne2[0] > ne[0]) || (ne2[1] > ne[1]) || (ne2[0]*ne2[1] > ggml_nelements(x[0]))) {
1026
- get_random_dims(ne2, 2);
1027
- }
1028
-
1029
- x[1] = get_random_tensor_f32(ctx0, 2, ne2, -1.0f, 1.0f);
1030
- ggml_set_param(ctx0, x[1]);
1031
-
1032
- max_offsets[0] = MAX(0, x[0]->ne[0] - x[1]->ne[0]);
1033
- max_offsets[1] = MAX(0, x[0]->ne[1] - x[1]->ne[1]);
1034
- offsets[0] = irand(max_offsets[0]) * x[0]->nb[0];
1035
- offsets[1] = irand(max_offsets[1]) * x[0]->nb[1];
1036
- const int offset = offsets[0] + offsets[1];
1037
-
1038
- struct ggml_tensor * f = ggml_sum(ctx0, ggml_acc(ctx0, x[0], x[1], x[0]->nb[1], x[0]->nb[2], x[0]->nb[3], offset));
1039
-
1040
- check_gradient("acc 2d", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY);
1041
- }
1042
- }
1043
-
1044
- // acc 3d
1045
- {
1046
- srand(seed);
1047
- int64_t ne2[4] = { 1, 1, 1, 1 };
1048
- int64_t max_offsets[4] = { 0, 0, 0, 0 };
1049
- int64_t offsets[4] = { 0, 0, 0, 0 };
1050
-
1051
- const int nargs = 2;
1052
- for (int ndims = 3; ndims <= 4; ++ndims) {
1053
-
1054
- x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f);
1055
- ggml_set_param(ctx0, x[0]);
1056
-
1057
- get_random_dims(ne2, 3);
1058
- while ((ne2[0] > ne[0]) || (ne2[1] > ne[1]) || (ne2[2] > ne[2]) || (ne2[0]*ne2[1]*ne2[2] > ggml_nelements(x[0]))) {
1059
- get_random_dims(ne2, 3);
1060
- }
1061
-
1062
- x[1] = get_random_tensor_f32(ctx0, 3, ne2, -1.0f, 1.0f);
1063
- ggml_set_param(ctx0, x[1]);
1064
-
1065
- max_offsets[0] = MAX(0, x[0]->ne[0] - x[1]->ne[0]);
1066
- max_offsets[1] = MAX(0, x[0]->ne[1] - x[1]->ne[1]);
1067
- max_offsets[2] = MAX(0, x[0]->ne[2] - x[1]->ne[2]);
1068
- offsets[0] = irand(max_offsets[0]) * x[0]->nb[0];
1069
- offsets[1] = irand(max_offsets[1]) * x[0]->nb[1];
1070
- offsets[2] = irand(max_offsets[2]) * x[0]->nb[2];
1071
- const int offset = offsets[0] + offsets[1] + offsets[2];
1072
-
1073
- struct ggml_tensor * f = ggml_sum(ctx0, ggml_acc(ctx0, x[0], x[1], x[0]->nb[1], x[0]->nb[2], x[0]->nb[3], offset));
1074
-
1075
- check_gradient("acc 3d", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY);
1076
- }
1077
- }
1078
-
1079
- // acc 4d
1080
- {
1081
- srand(seed);
1082
- int64_t ne2[4] = { 1, 1, 1, 1 };
1083
- int64_t max_offsets[4] = { 0, 0, 0, 0 };
1084
- int64_t offsets[4] = { 0, 0, 0, 0 };
1085
-
1086
- const int nargs = 2;
1087
- for (int ndims = 4; ndims <= 4; ++ndims) {
1088
-
1089
- x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f);
1090
- ggml_set_param(ctx0, x[0]);
1091
-
1092
- get_random_dims(ne2, 4);
1093
- while ((ne2[0] > ne[0]) || (ne2[1] > ne[1]) || (ne2[2] > ne[2]) || (ne2[3] > ne[3]) || (ne2[0]*ne2[1]*ne2[2]*ne2[3] > ggml_nelements(x[0]))) {
1094
- get_random_dims(ne2, 4);
1095
- }
1096
-
1097
- x[1] = get_random_tensor_f32(ctx0, 4, ne2, -1.0f, 1.0f);
1098
- ggml_set_param(ctx0, x[1]);
1099
-
1100
- max_offsets[0] = MAX(0, x[0]->ne[0] - x[1]->ne[0]);
1101
- max_offsets[1] = MAX(0, x[0]->ne[1] - x[1]->ne[1]);
1102
- max_offsets[2] = MAX(0, x[0]->ne[2] - x[1]->ne[2]);
1103
- max_offsets[3] = MAX(0, x[0]->ne[3] - x[1]->ne[3]);
1104
- offsets[0] = irand(max_offsets[0]) * x[0]->nb[0];
1105
- offsets[1] = irand(max_offsets[1]) * x[0]->nb[1];
1106
- offsets[2] = irand(max_offsets[2]) * x[0]->nb[2];
1107
- offsets[3] = irand(max_offsets[3]) * x[0]->nb[3];
1108
- const int offset = offsets[0] + offsets[1] + offsets[2] + offsets[3];
1109
-
1110
- struct ggml_tensor * f = ggml_sum(ctx0, ggml_acc(ctx0, x[0], x[1], x[0]->nb[1], x[0]->nb[2], x[0]->nb[3], offset));
1111
-
1112
- check_gradient("acc 4d", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY);
1113
- }
1114
- }
1115
-
1116
- // set_1d
1117
- {
1118
- srand(seed);
1119
- int64_t ne2[4];
1120
-
1121
- const int nargs = 2;
1122
- for (int ndims = 1; ndims <= 4; ++ndims) {
1123
-
1124
- x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f);
1125
- ggml_set_param(ctx0, x[0]);
1126
-
1127
- get_random_dims(ne2, 1);
1128
- while ((ne2[0] > ne[0]) || (ne2[0] > ggml_nelements(x[0]))) {
1129
- get_random_dims(ne2, 1);
1130
- }
1131
-
1132
- x[1] = get_random_tensor_f32(ctx0, 1, ne2, -1.0f, 1.0f);
1133
- ggml_set_param(ctx0, x[1]);
1134
-
1135
- const int max_offset = MAX(0, ggml_nelements(x[0]) - ggml_nelements(x[1]));
1136
- const int offset = irand(max_offset) * ggml_element_size(x[0]);
1137
-
1138
- struct ggml_tensor * f = ggml_sum(ctx0, ggml_set_1d(ctx0, x[0], x[1], offset));
1139
-
1140
- check_gradient("set_1d", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY);
1141
- }
1142
- }
1143
-
1144
- // set_2d
1145
- {
1146
- srand(seed);
1147
- int64_t ne2[4];
1148
- int64_t max_offsets[4] = { 0, 0, 0, 0 };
1149
- int64_t offsets[4] = { 0, 0, 0, 0 };
1150
-
1151
- const int nargs = 1;
1152
- for (int ndims = 2; ndims <= 4; ++ndims) {
1153
-
1154
- x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f);
1155
- ggml_set_param(ctx0, x[0]);
1156
-
1157
- get_random_dims(ne2, 2);
1158
- while ((ne2[0] > ne[0]) || (ne2[1] > ne[1]) || (ne2[0]*ne2[1] > ggml_nelements(x[0]))) {
1159
- get_random_dims(ne2, 2);
1160
- }
1161
-
1162
- x[1] = get_random_tensor_f32(ctx0, 2, ne2, -1.0f, 1.0f);
1163
- ggml_set_param(ctx0, x[1]);
1164
-
1165
- max_offsets[0] = MAX(0, x[0]->ne[0] - x[1]->ne[0]);
1166
- max_offsets[1] = MAX(0, x[0]->ne[1] - x[1]->ne[1]);
1167
- offsets[0] = irand(max_offsets[0]) * x[0]->nb[0];
1168
- offsets[1] = irand(max_offsets[1]) * x[0]->nb[1];
1169
- const int offset = offsets[0] + offsets[1];
1170
-
1171
- struct ggml_tensor * f = ggml_sum(ctx0, ggml_set_2d(ctx0, x[0], x[1], x[1]->nb[1], offset));
1172
-
1173
- check_gradient("set_2d", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY);
1174
- }
1175
- }
1176
-
1177
- // view_1d
1178
- {
1179
- srand(seed);
1180
- const int nargs = 1;
1181
- for (int ndims = 1; ndims <= 4; ++ndims) {
1182
-
1183
- x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f);
1184
-
1185
- ggml_set_param(ctx0, x[0]);
1186
-
1187
- const int k0 = irand(ggml_nelements(x[0]));
1188
- const int k1 = irand(ggml_nelements(x[0]));
1189
- const int i0 = MIN(k0, k1);
1190
- const int i1 = MAX(k0, k1);
1191
-
1192
- const int offset = i0 * sizeof(float);
1193
- const int nelem = i1 - i0;
1194
-
1195
- struct ggml_tensor * f = ggml_sum(ctx0, ggml_view_1d(ctx0, x[0], nelem, offset));
1196
-
1197
- check_gradient("view_1d", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY);
1198
- }
1199
- }
1200
-
1201
- // view_2d
1202
- {
1203
- srand(seed);
1204
- int64_t ne2[4];
1205
- int64_t nb2[4];
1206
-
1207
- const int nargs = 1;
1208
- for (int ndims = 1; ndims <= 4; ++ndims) {
1209
-
1210
- x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f);
1211
-
1212
- get_random_dims(ne2, 2);
1213
- while (ne2[0]*ne2[1] > ggml_nelements(x[0])) {
1214
- get_random_dims(ne2, 2);
1215
- }
1216
- const int count = ne2[0]*ne2[1];
1217
-
1218
- nb2[0] = sizeof(float);
1219
- nb2[1] = nb2[0]*ne2[0];
1220
-
1221
- ggml_set_param(ctx0, x[0]);
1222
-
1223
- const int max_offset = ggml_nelements(x[0]) - count;
1224
- const int offset = irand(max_offset+1) * sizeof(float);
1225
-
1226
- struct ggml_tensor * f = ggml_sum(ctx0, ggml_view_2d(ctx0, x[0], ne2[0], ne2[1], nb2[1], offset));
1227
-
1228
- check_gradient("view_2d", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY);
1229
- }
1230
- }
1231
-
1232
- // view_3d
1233
- {
1234
- srand(seed);
1235
- int64_t ne2[4] = {1,1,1,1};
1236
- int64_t nb2[4] = {0,0,0,0};
1237
-
1238
- const int nargs = 1;
1239
- for (int ndims = 1; ndims <= 4; ++ndims) {
1240
-
1241
- x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f);
1242
-
1243
- get_random_dims(ne2, 3);
1244
- while (ne2[0]*ne2[1]*ne2[2] > ggml_nelements(x[0])) {
1245
- get_random_dims(ne2, 3);
1246
- }
1247
- const int count = ne2[0]*ne2[1]*ne2[2];
1248
-
1249
- nb2[0] = sizeof(float);
1250
- nb2[1] = nb2[0]*ne2[0];
1251
- nb2[2] = nb2[1]*ne2[1];
1252
-
1253
- ggml_set_param(ctx0, x[0]);
1254
-
1255
- const int max_offset = ggml_nelements(x[0]) - count;
1256
- const int offset = irand(max_offset+1) * sizeof(float);
1257
-
1258
- struct ggml_tensor * f = ggml_sum(ctx0, ggml_view_3d(ctx0, x[0], ne2[0], ne2[1], ne2[2], nb2[1], nb2[2], offset));
1259
-
1260
- check_gradient("view_3d", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY);
1261
- }
1262
- }
1263
-
1264
- // permute
1265
- {
1266
- srand(seed);
1267
- int64_t ne2[4];
1268
-
1269
- const int nargs = 1;
1270
- for (int ndims = 1; ndims <= 4; ++ndims)
1271
- {
1272
- // ggml_permute will set axes of dimensions below n_dims to 1.
1273
- // to make ggml_permute work correctly on all axes,
1274
- // the input tensor needs maximal n_dim of 4.
1275
- for (int i=0; i<ndims; ++i) {
1276
- ne2[i] = ne[i];
1277
- }
1278
- for (int i=ndims; i<4; ++i) {
1279
- ne2[i] = 1;
1280
- }
1281
- x[0] = get_random_tensor_f32(ctx0, 4, ne2, -1.0f, 1.0f);
1282
-
1283
- ggml_set_param(ctx0, x[0]);
1284
-
1285
- const int p = irand(NUM_PERMUTATIONS);
1286
- const int ax0 = all_permutations[p*4+0];
1287
- const int ax1 = all_permutations[p*4+1];
1288
- const int ax2 = all_permutations[p*4+2];
1289
- const int ax3 = all_permutations[p*4+3];
1290
-
1291
- // sum requires contiguous tensor rows
1292
- struct ggml_tensor * f = ggml_sum(ctx0, ggml_cont(ctx0, ggml_permute(ctx0, x[0], ax0, ax1, ax2, ax3)));
1293
-
1294
- check_gradient("permute", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY);
1295
- }
1296
- }
1297
-
1298
- // transpose
1299
- {
1300
- srand(seed);
1301
- int64_t ne2[4];
1302
-
1303
- const int nargs = 1;
1304
- for (int ndims = 1; ndims <= 4; ++ndims)
1305
- {
1306
- // ggml_transpose will set axes of dimensions below n_dims to 1.
1307
- // to make ggml_transpose work correctly on all axes,
1308
- // the input tensor needs maximal n_dim of 4.
1309
- for (int i=0; i<ndims; ++i) {
1310
- ne2[i] = ne[i];
1311
- }
1312
- for (int i=ndims; i<4; ++i) {
1313
- ne2[i] = 1;
1314
- }
1315
- x[0] = get_random_tensor_f32(ctx0, 4, ne2, -1.0f, 1.0f);
1316
-
1317
- ggml_set_param(ctx0, x[0]);
1318
-
1319
- // sum requires contiguous tensor rows
1320
- struct ggml_tensor * f = ggml_sum(ctx0, ggml_cont(ctx0, ggml_transpose(ctx0, x[0])));
1321
-
1322
- check_gradient("transpose", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY);
1323
- }
1324
- }
1325
-
1326
- // get_rows
1327
- {
1328
- srand(seed);
1329
- int64_t ne2[4] = {ne[0], ne[1], 1, 1};
1330
- int64_t ne3[4] = {1+irand(ne[1]), 1, 1, 1};
1331
- const int nargs = 1;
1332
- const int ndims = 2;
1333
- x[0] = get_random_tensor_f32(ctx0, ndims, ne2, -1.0f, 1.0f);
1334
- x[1] = get_random_tensor_i32(ctx0, 1, ne3, 0, ne2[1]);
1335
-
1336
- ggml_set_param(ctx0, x[0]);
1337
-
1338
- struct ggml_tensor * f = ggml_sum(ctx0, ggml_get_rows(ctx0, x[0], x[1]));
1339
-
1340
- check_gradient("get_rows", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY);
1341
- }
1342
-
1343
- // diag_mask_inf
1344
- {
1345
- srand(seed);
1346
- const int nargs = 1;
1347
- const int ndims = 2;
1348
-
1349
- x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f);
1350
- ggml_set_param(ctx0, x[0]);
1351
-
1352
- int n_past = irand(ne[0]);
1353
-
1354
- struct ggml_tensor * f = ggml_sum(ctx0, ggml_diag_mask_inf(ctx0, x[0], n_past));
1355
-
1356
- check_gradient("diag_mask_inf", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY);
1357
- }
1358
-
1359
- // diag_mask_zero
1360
- {
1361
- srand(seed);
1362
- const int nargs = 1;
1363
- const int ndims = 2;
1364
-
1365
- x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f);
1366
- ggml_set_param(ctx0, x[0]);
1367
-
1368
- int n_past = irand(ne[0]);
1369
-
1370
- struct ggml_tensor * f = ggml_sum(ctx0, ggml_diag_mask_zero(ctx0, x[0], n_past));
1371
-
1372
- check_gradient("diag_mask_zero", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY);
1373
- }
1374
-
1375
- // softmax
1376
- {
1377
- srand(seed);
1378
- const int nargs = 1;
1379
-
1380
- int64_t ne2[4];
1381
- get_random_dims(ne2, 4);
1382
-
1383
- for (int ndims = 1; ndims <= 3; ++ndims) {
1384
- x[0] = get_random_tensor_f32(ctx0, ndims, ne2, -1.0f, 1.0f);
1385
- ggml_set_param(ctx0, x[0]);
1386
-
1387
- float eps = 1e-6f;
1388
- // dont use only sum as aggregation, because sum of softmax is always 1 -> finite differences should not work
1389
- // instead use sum(log(soft_max()*(1-eps)+eps)); use eps to avoid log(0)
1390
- struct ggml_tensor * f = ggml_sum(ctx0,
1391
- ggml_log(ctx0,
1392
- ggml_add1(ctx0,
1393
- ggml_scale(ctx0,
1394
- ggml_soft_max(ctx0, x[0]),
1395
- 1.0f - eps),
1396
- ggml_new_f32(ctx0, eps))));
1397
-
1398
- check_gradient("softmax", ctx0, x, f, ndims, nargs, 1e-3f, 2e-1f, INFINITY);
1399
- // NOTE: softmax forward is computed using f16 table lookup instead of using actual expf, but backward assumes actual expf.
1400
- // this may result in different gradients too finite differences.
1401
- // when this test reports errors, first try to replace the table lookup with actual expf and test again to see if just that was the cause.
1402
- // if only the table lookup causes gradients to differ this is acceptable.
1403
- }
1404
- }
1405
-
1406
- // cross_entropy_loss
1407
- {
1408
- srand(seed);
1409
- const int nargs = 1;
1410
-
1411
- int64_t ne2[4];
1412
- get_random_dims(ne2, 4);
1413
-
1414
- for (int ndims = 1; ndims <= 4; ++ndims) {
1415
- x[0] = get_random_tensor_f32(ctx0, ndims, ne2, -0.1f, 0.1f);
1416
- x[1] = get_random_tensor_f32(ctx0, ndims, ne2, 0.0f, 1.0f);
1417
- // the second argument to cross_entropy_loss must sum up to 1 for each row
1418
- int nr = ggml_nrows(x[1]);
1419
- int nc = ggml_nelements(x[1]) / nr;
1420
- for (int ir = 0; ir < nr; ++ir) {
1421
- float sum = 0;
1422
- for (int ic = 0; ic < nc; ++ic) {
1423
- sum += ((float *) x[1]->data)[ic + ir*nc];
1424
- }
1425
- for (int ic = 0; ic < nc; ++ic) {
1426
- ((float *) x[1]->data)[ic + ir*nc] /= sum;
1427
- }
1428
- }
1429
- ggml_set_param(ctx0, x[0]);
1430
-
1431
- struct ggml_tensor * f = ggml_cross_entropy_loss(ctx0, x[0], x[1]);
1432
-
1433
- check_gradient("cross_entropy_loss", ctx0, x, f, ndims, nargs, 1e-4f, 1e-3f, INFINITY);
1434
- }
1435
- }
1436
-
1437
- // rope f32
1438
- {
1439
- srand(seed);
1440
- const int nargs = 1;
1441
-
1442
- int64_t ne2[4];
1443
- get_random_dims(ne2, 4);
1444
- ne2[0] += ne2[0] % 2;
1445
- int n_rot = ne2[0];
1446
-
1447
- for (int ndims = 3; ndims <= 4; ++ndims) {
1448
- for (int mode = 0; mode < 4; ++mode) {
1449
- for (int n_past = 1; n_past < ne2[2]; ++n_past) {
1450
- x[0] = get_random_tensor_f32(ctx0, ndims, ne2, -1.0f, 1.0f);
1451
-
1452
- struct ggml_tensor * p = ggml_new_tensor_1d(ctx0, GGML_TYPE_I32, ne2[2]);
1453
- for (int i = 0; i < ne2[2]; ++i) {
1454
- ((int32_t *) p->data)[i] = n_past + i;
1455
- }
1456
-
1457
- ggml_set_param(ctx0, x[0]);
1458
-
1459
- const bool skip_past = (mode & 1);
1460
- if (skip_past) {
1461
- // we have no past, so this would have to work on uninitialized memory.
1462
- // we only test the gradients here;
1463
- // skip_past should have no influence on gradient computation.
1464
- // so when other modes work, we assume that this does as well.
1465
- continue;
1466
- }
1467
-
1468
- struct ggml_tensor * f = ggml_sum(ctx0, ggml_rope(ctx0, x[0], p, n_rot, mode));
1469
-
1470
- GGML_PRINT_DEBUG("rope f32: n_past: %d n_rot: %d mode: %d\n", n_past, n_rot, mode);
1471
- check_gradient("rope f32", ctx0, x, f, ndims, nargs, 1e-2f, 1e-3f, INFINITY);
1472
- }
1473
- }
1474
- }
1475
- }
1476
-
1477
- // rope f16
1478
- {
1479
- srand(seed);
1480
- const int nargs = 1;
1481
-
1482
- int64_t ne2[4];
1483
- get_random_dims(ne2, 4);
1484
- ne2[0] += ne2[0] % 2;
1485
- int n_rot = ne2[0];
1486
-
1487
- for (int ndims = 3; ndims <= 4; ++ndims) {
1488
- for (int mode = 0; mode < 4; ++mode) {
1489
- for (int n_past = 1; n_past < ne2[2]; ++n_past) {
1490
- x[0] = get_random_tensor_f16(ctx0, ndims, ne2, -1.0f, 1.0f);
1491
-
1492
- struct ggml_tensor * p = ggml_new_tensor_1d(ctx0, GGML_TYPE_I32, ne2[2]);
1493
- for (int i = 0; i < ne2[2]; ++i) {
1494
- ((int32_t *) p->data)[i] = n_past + i;
1495
- }
1496
-
1497
- ggml_set_param(ctx0, x[0]);
1498
-
1499
- const bool skip_past = (mode & 1);
1500
- if (skip_past) {
1501
- // we have no past, so this would have to work on uninitialized memory.
1502
- // we only test the gradients here;
1503
- // skip_past should have no influence on gradient computation.
1504
- // so when other modes work, we assume that this does as well.
1505
- continue;
1506
- }
1507
-
1508
- struct ggml_tensor * f = ggml_sum(ctx0, ggml_rope(ctx0, x[0], p, n_rot, mode));
1509
-
1510
- GGML_PRINT_DEBUG("rope f16: n_past: %d n_rot: %d mode: %d\n", n_past, n_rot, mode);
1511
- check_gradient("rope f16", ctx0, x, f, ndims, nargs, 1e-1f, 1e-1f, INFINITY);
1512
- }
1513
- }
1514
- }
1515
- }
1516
-
1517
- // flash_attn f32
1518
- // TODO: adapt to ggml_flash_attn_ext() changes
1519
- //{
1520
- // srand(seed);
1521
- // const int nargs = 3;
1522
-
1523
- // int64_t ne2[4];
1524
-
1525
- // get_random_dims(ne2, 4);
1526
- // int64_t D = ne2[0];
1527
- // int64_t N = ne2[1];
1528
- // int64_t M = ne2[2] + N;
1529
- // int64_t B = ne2[3];
1530
-
1531
- // for (int masked = 0; masked <= 1; ++masked) {
1532
- // for (int ndims = 2; ndims <= 4; ++ndims) {
1533
- // int max_nrep = (ndims >= 3) ? 2 : 1;
1534
- // for (int nrep = 1; nrep < max_nrep; ++nrep) {
1535
- // int64_t neq[4] = { D, N, B*nrep, ne[3] };
1536
- // int64_t nek[4] = { D, M, B, ne[3] };
1537
- // int64_t nev[4] = { M, D, B, ne[3] };
1538
- // if (ndims == 2) {
1539
- // neq[2] = 1; neq[3] = 1;
1540
- // nek[2] = 1; nek[3] = 1;
1541
- // nev[2] = 1; nev[3] = 1;
1542
- // } else if (ndims == 3) {
1543
- // neq[3] = 1;
1544
- // nek[3] = 1;
1545
- // nev[3] = 1;
1546
- // }
1547
- // x[0] = get_random_tensor_f32(ctx0, ndims, neq, -0.1250f, 0.1250f);
1548
- // x[1] = get_random_tensor_f32(ctx0, ndims, nek, -0.1250f, 0.1250f);
1549
- // x[2] = get_random_tensor_f32(ctx0, ndims, nev, -0.1250f, 0.1250f);
1550
- // ggml_set_param(ctx0, x[0]);
1551
- // ggml_set_param(ctx0, x[1]);
1552
- // ggml_set_param(ctx0, x[2]);
1553
-
1554
- // struct ggml_tensor * f = ggml_sum(ctx0, ggml_flash_attn(ctx0, x[0], x[1], x[2], (masked == 0)));
1555
-
1556
- // check_gradient("flash_attn f32", ctx0, x, f, ndims, nargs, 1.5e-4f, 1e-3f, INFINITY);
1557
- // }
1558
- // }
1559
- // }
1560
- //}
1561
-
1562
- ggml_free(ctx0);
1563
- }
1564
-
1565
- return 0;
1566
- }