llama_cpp 0.16.0 → 0.16.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (142) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +13 -0
  3. data/ext/llama_cpp/extconf.rb +3 -0
  4. data/ext/llama_cpp/llama_cpp.cpp +14 -0
  5. data/lib/llama_cpp/version.rb +2 -2
  6. data/sig/llama_cpp.rbs +4 -0
  7. data/vendor/tmp/llama.cpp/Makefile +119 -54
  8. data/vendor/tmp/llama.cpp/ggml-alloc.c +78 -22
  9. data/vendor/tmp/llama.cpp/ggml-backend-impl.h +20 -8
  10. data/vendor/tmp/llama.cpp/ggml-backend.c +190 -65
  11. data/vendor/tmp/llama.cpp/ggml-backend.h +6 -3
  12. data/vendor/tmp/llama.cpp/ggml-blas.cpp +363 -0
  13. data/vendor/tmp/llama.cpp/ggml-blas.h +23 -0
  14. data/vendor/tmp/llama.cpp/ggml-common.h +6 -0
  15. data/vendor/tmp/llama.cpp/ggml-cuda/argsort.cu +1 -0
  16. data/vendor/tmp/llama.cpp/ggml-cuda/dmmv.cu +21 -9
  17. data/vendor/tmp/llama.cpp/ggml-cuda/fattn-tile-f16.cu +1 -1
  18. data/vendor/tmp/llama.cpp/ggml-cuda/mmq.cu +15 -1491
  19. data/vendor/tmp/llama.cpp/ggml-cuda/mmvq.cu +77 -62
  20. data/vendor/tmp/llama.cpp/ggml-cuda/quantize.cu +77 -10
  21. data/vendor/tmp/llama.cpp/ggml-cuda/softmax.cu +1 -0
  22. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-f16.cu +1 -1
  23. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q4_0.cu +1 -1
  24. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q4_1.cu +1 -1
  25. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q5_0.cu +1 -1
  26. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q5_1.cu +1 -1
  27. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q8_0.cu +1 -1
  28. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-f16.cu +1 -1
  29. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q4_0.cu +1 -1
  30. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q4_1.cu +1 -1
  31. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q5_0.cu +1 -1
  32. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q5_1.cu +1 -1
  33. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q8_0.cu +1 -1
  34. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-f16.cu +1 -1
  35. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q4_0.cu +1 -1
  36. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q4_1.cu +1 -1
  37. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q5_0.cu +1 -1
  38. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q5_1.cu +1 -1
  39. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q8_0.cu +1 -1
  40. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-f16.cu +1 -1
  41. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q4_0.cu +1 -1
  42. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q4_1.cu +1 -1
  43. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q5_0.cu +1 -1
  44. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q5_1.cu +1 -1
  45. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q8_0.cu +1 -1
  46. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-f16.cu +1 -1
  47. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q4_0.cu +1 -1
  48. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q4_1.cu +1 -1
  49. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q5_0.cu +1 -1
  50. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q5_1.cu +1 -1
  51. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q8_0.cu +1 -1
  52. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-f16.cu +1 -1
  53. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q4_0.cu +1 -1
  54. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q4_1.cu +1 -1
  55. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q5_0.cu +1 -1
  56. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q5_1.cu +1 -1
  57. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q8_0.cu +1 -1
  58. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs256-f16-f16.cu +1 -1
  59. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-f16.cu +1 -1
  60. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q4_0.cu +1 -1
  61. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q4_1.cu +1 -1
  62. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q5_0.cu +1 -1
  63. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q5_1.cu +1 -1
  64. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q8_0.cu +1 -1
  65. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-f16.cu +1 -1
  66. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q4_0.cu +1 -1
  67. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q4_1.cu +1 -1
  68. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q5_0.cu +1 -1
  69. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q5_1.cu +1 -1
  70. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q8_0.cu +1 -1
  71. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-f16.cu +1 -1
  72. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q4_0.cu +1 -1
  73. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q4_1.cu +1 -1
  74. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q5_0.cu +1 -1
  75. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q5_1.cu +1 -1
  76. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q8_0.cu +1 -1
  77. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-f16.cu +1 -1
  78. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q4_0.cu +1 -1
  79. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q4_1.cu +1 -1
  80. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q5_0.cu +1 -1
  81. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q5_1.cu +1 -1
  82. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q8_0.cu +1 -1
  83. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-f16.cu +1 -1
  84. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q4_0.cu +1 -1
  85. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q4_1.cu +1 -1
  86. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q5_0.cu +1 -1
  87. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q5_1.cu +1 -1
  88. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q8_0.cu +1 -1
  89. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-f16.cu +1 -1
  90. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q4_0.cu +1 -1
  91. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q4_1.cu +1 -1
  92. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q5_0.cu +1 -1
  93. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q5_1.cu +1 -1
  94. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q8_0.cu +1 -1
  95. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-f16.cu +1 -1
  96. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q4_0.cu +1 -1
  97. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q4_1.cu +1 -1
  98. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q5_0.cu +1 -1
  99. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q5_1.cu +1 -1
  100. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q8_0.cu +1 -1
  101. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs256-f16-f16.cu +1 -1
  102. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-f16.cu +1 -1
  103. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q4_0.cu +1 -1
  104. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q4_1.cu +1 -1
  105. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q5_0.cu +1 -1
  106. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q5_1.cu +1 -1
  107. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q8_0.cu +1 -1
  108. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqfloat-cpb16.cu +1 -1
  109. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqfloat-cpb32.cu +1 -1
  110. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb16.cu +1 -1
  111. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb32.cu +1 -1
  112. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb8.cu +1 -1
  113. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/mmq-instance-q2_k.cu +5 -0
  114. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/mmq-instance-q3_k.cu +5 -0
  115. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/mmq-instance-q4_0.cu +5 -0
  116. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/mmq-instance-q4_1.cu +5 -0
  117. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/mmq-instance-q4_k.cu +5 -0
  118. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/mmq-instance-q5_0.cu +5 -0
  119. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/mmq-instance-q5_1.cu +5 -0
  120. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/mmq-instance-q5_k.cu +5 -0
  121. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/mmq-instance-q6_k.cu +5 -0
  122. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/mmq-instance-q8_0.cu +5 -0
  123. data/vendor/tmp/llama.cpp/ggml-cuda/unary.cu +48 -0
  124. data/vendor/tmp/llama.cpp/ggml-cuda.cu +95 -129
  125. data/vendor/tmp/llama.cpp/ggml-impl.h +1 -1
  126. data/vendor/tmp/llama.cpp/ggml-kompute.cpp +8 -7
  127. data/vendor/tmp/llama.cpp/ggml-metal.m +17 -9
  128. data/vendor/tmp/llama.cpp/ggml-quants.c +982 -368
  129. data/vendor/tmp/llama.cpp/ggml-rpc.cpp +21 -15
  130. data/vendor/tmp/llama.cpp/ggml-sycl.cpp +2133 -13215
  131. data/vendor/tmp/llama.cpp/ggml-sycl.h +1 -10
  132. data/vendor/tmp/llama.cpp/ggml-vulkan-shaders.hpp +28826 -25037
  133. data/vendor/tmp/llama.cpp/ggml-vulkan.cpp +438 -493
  134. data/vendor/tmp/llama.cpp/ggml.c +158 -414
  135. data/vendor/tmp/llama.cpp/ggml.h +6 -0
  136. data/vendor/tmp/llama.cpp/llama.cpp +628 -279
  137. data/vendor/tmp/llama.cpp/llama.h +9 -1
  138. data/vendor/tmp/llama.cpp/sgemm.cpp +2 -0
  139. data/vendor/tmp/llama.cpp/unicode-data.cpp +851 -801
  140. data/vendor/tmp/llama.cpp/unicode.cpp +33 -19
  141. data/vendor/tmp/llama.cpp/unicode.h +1 -1
  142. metadata +15 -3
@@ -297,12 +297,6 @@ inline static void * ggml_calloc(size_t num, size_t size) {
297
297
 
298
298
  #if defined(GGML_USE_ACCELERATE)
299
299
  #include <Accelerate/Accelerate.h>
300
- #elif defined(GGML_USE_OPENBLAS)
301
- #if defined(GGML_BLAS_USE_MKL)
302
- #include <mkl.h>
303
- #else
304
- #include <cblas.h>
305
- #endif
306
300
  #endif
307
301
 
308
302
  // floating point type used to accumulate sums
@@ -1759,9 +1753,8 @@ struct ggml_compute_state_shared {
1759
1753
  int n_threads;
1760
1754
 
1761
1755
  // synchronization primitives
1762
- atomic_int n_active; // num active threads
1763
- atomic_int node_n; // active graph node
1764
- atomic_int node_task; // active graph node task phase
1756
+ atomic_int n_barrier;
1757
+ atomic_int n_barrier_passed;
1765
1758
 
1766
1759
  ggml_abort_callback abort_callback; // abort ggml_graph_compute when true
1767
1760
  void* abort_callback_data;
@@ -3212,35 +3205,42 @@ GGML_CALL bool ggml_is_transposed(const struct ggml_tensor * tensor) {
3212
3205
  return tensor->nb[0] > tensor->nb[1];
3213
3206
  }
3214
3207
 
3215
- GGML_CALL bool ggml_is_contiguous(const struct ggml_tensor * tensor) {
3216
- static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
3208
+ static bool ggml_is_contiguous_n(const struct ggml_tensor * tensor, int n) {
3209
+ size_t next_nb = ggml_type_size(tensor->type);
3210
+ if (tensor->ne[0] != ggml_blck_size(tensor->type) && tensor->nb[0] != next_nb) {
3211
+ return false;
3212
+ }
3213
+ next_nb *= tensor->ne[0]/ggml_blck_size(tensor->type);
3214
+ for (int i = 1; i < GGML_MAX_DIMS; i++) {
3215
+ if (tensor->ne[i] != 1) {
3216
+ if (i > n) {
3217
+ if (tensor->nb[i] != next_nb) {
3218
+ return false;
3219
+ }
3220
+ next_nb *= tensor->ne[i];
3221
+ } else {
3222
+ // this dimension does not need to be contiguous
3223
+ next_nb = tensor->ne[i]*tensor->nb[i];
3224
+ }
3225
+ }
3226
+ }
3227
+ return true;
3228
+ }
3217
3229
 
3218
- return
3219
- tensor->nb[0] == ggml_type_size(tensor->type) &&
3220
- tensor->nb[1] == (tensor->nb[0]*tensor->ne[0])/ggml_blck_size(tensor->type) &&
3221
- tensor->nb[2] == tensor->nb[1]*tensor->ne[1] &&
3222
- tensor->nb[3] == tensor->nb[2]*tensor->ne[2];
3230
+ GGML_CALL bool ggml_is_contiguous(const struct ggml_tensor * tensor) {
3231
+ return ggml_is_contiguous_0(tensor);
3223
3232
  }
3224
3233
 
3225
3234
  GGML_CALL bool ggml_is_contiguous_0(const struct ggml_tensor * tensor) {
3226
- return ggml_is_contiguous(tensor);
3235
+ return ggml_is_contiguous_n(tensor, 0);
3227
3236
  }
3228
3237
 
3229
3238
  GGML_CALL bool ggml_is_contiguous_1(const struct ggml_tensor * tensor) {
3230
- static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
3231
-
3232
- return
3233
- tensor->nb[0] == ggml_type_size(tensor->type) &&
3234
- tensor->nb[2] == tensor->nb[1]*tensor->ne[1] &&
3235
- tensor->nb[3] == tensor->nb[2]*tensor->ne[2];
3239
+ return ggml_is_contiguous_n(tensor, 1);
3236
3240
  }
3237
3241
 
3238
3242
  GGML_CALL bool ggml_is_contiguous_2(const struct ggml_tensor * tensor) {
3239
- static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
3240
-
3241
- return
3242
- tensor->nb[0] == ggml_type_size(tensor->type) &&
3243
- tensor->nb[3] == tensor->nb[2]*tensor->ne[2];
3243
+ return ggml_is_contiguous_n(tensor, 2);
3244
3244
  }
3245
3245
 
3246
3246
  GGML_CALL bool ggml_is_permuted(const struct ggml_tensor * tensor) {
@@ -3272,20 +3272,20 @@ bool ggml_are_same_shape(const struct ggml_tensor * t0, const struct ggml_tensor
3272
3272
  static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
3273
3273
 
3274
3274
  return
3275
- (t0->ne[0] == t1->ne[0] ) &&
3276
- (t0->ne[1] == t1->ne[1] ) &&
3277
- (t0->ne[2] == t1->ne[2] ) &&
3278
- (t0->ne[3] == t1->ne[3] );
3275
+ (t0->ne[0] == t1->ne[0]) &&
3276
+ (t0->ne[1] == t1->ne[1]) &&
3277
+ (t0->ne[2] == t1->ne[2]) &&
3278
+ (t0->ne[3] == t1->ne[3]);
3279
3279
  }
3280
3280
 
3281
3281
  bool ggml_are_same_stride(const struct ggml_tensor * t0, const struct ggml_tensor * t1) {
3282
3282
  static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
3283
3283
 
3284
3284
  return
3285
- (t0->nb[0] == t1->nb[0] ) &&
3286
- (t0->nb[1] == t1->nb[1] ) &&
3287
- (t0->nb[2] == t1->nb[2] ) &&
3288
- (t0->nb[3] == t1->nb[3] );
3285
+ (t0->nb[0] == t1->nb[0]) &&
3286
+ (t0->nb[1] == t1->nb[1]) &&
3287
+ (t0->nb[2] == t1->nb[2]) &&
3288
+ (t0->nb[3] == t1->nb[3]);
3289
3289
  }
3290
3290
 
3291
3291
  // check if t1 can be represented as a repeatition of t0
@@ -4078,32 +4078,26 @@ float ggml_get_f32_1d(const struct ggml_tensor * tensor, int i) {
4078
4078
  switch (tensor->type) {
4079
4079
  case GGML_TYPE_I8:
4080
4080
  {
4081
- GGML_ASSERT(tensor->nb[0] == sizeof(int8_t));
4082
4081
  return ((int8_t *)(tensor->data))[i];
4083
4082
  }
4084
4083
  case GGML_TYPE_I16:
4085
4084
  {
4086
- GGML_ASSERT(tensor->nb[0] == sizeof(int16_t));
4087
4085
  return ((int16_t *)(tensor->data))[i];
4088
4086
  }
4089
4087
  case GGML_TYPE_I32:
4090
4088
  {
4091
- GGML_ASSERT(tensor->nb[0] == sizeof(int32_t));
4092
4089
  return ((int32_t *)(tensor->data))[i];
4093
4090
  }
4094
4091
  case GGML_TYPE_F16:
4095
4092
  {
4096
- GGML_ASSERT(tensor->nb[0] == sizeof(ggml_fp16_t));
4097
4093
  return GGML_FP16_TO_FP32(((ggml_fp16_t *)(tensor->data))[i]);
4098
4094
  }
4099
4095
  case GGML_TYPE_BF16:
4100
4096
  {
4101
- GGML_ASSERT(tensor->nb[0] == sizeof(ggml_bf16_t));
4102
4097
  return GGML_BF16_TO_FP32(((ggml_bf16_t *)(tensor->data))[i]);
4103
4098
  }
4104
4099
  case GGML_TYPE_F32:
4105
4100
  {
4106
- GGML_ASSERT(tensor->nb[0] == sizeof(float));
4107
4101
  return ((float *)(tensor->data))[i];
4108
4102
  }
4109
4103
  default:
@@ -4125,32 +4119,26 @@ void ggml_set_f32_1d(const struct ggml_tensor * tensor, int i, float value) {
4125
4119
  switch (tensor->type) {
4126
4120
  case GGML_TYPE_I8:
4127
4121
  {
4128
- GGML_ASSERT(tensor->nb[0] == sizeof(int8_t));
4129
4122
  ((int8_t *)(tensor->data))[i] = value;
4130
4123
  } break;
4131
4124
  case GGML_TYPE_I16:
4132
4125
  {
4133
- GGML_ASSERT(tensor->nb[0] == sizeof(int16_t));
4134
4126
  ((int16_t *)(tensor->data))[i] = value;
4135
4127
  } break;
4136
4128
  case GGML_TYPE_I32:
4137
4129
  {
4138
- GGML_ASSERT(tensor->nb[0] == sizeof(int32_t));
4139
4130
  ((int32_t *)(tensor->data))[i] = value;
4140
4131
  } break;
4141
4132
  case GGML_TYPE_F16:
4142
4133
  {
4143
- GGML_ASSERT(tensor->nb[0] == sizeof(ggml_fp16_t));
4144
4134
  ((ggml_fp16_t *)(tensor->data))[i] = GGML_FP32_TO_FP16(value);
4145
4135
  } break;
4146
4136
  case GGML_TYPE_BF16:
4147
4137
  {
4148
- GGML_ASSERT(tensor->nb[0] == sizeof(ggml_bf16_t));
4149
4138
  ((ggml_bf16_t *)(tensor->data))[i] = GGML_FP32_TO_BF16(value);
4150
4139
  } break;
4151
4140
  case GGML_TYPE_F32:
4152
4141
  {
4153
- GGML_ASSERT(tensor->nb[0] == sizeof(float));
4154
4142
  ((float *)(tensor->data))[i] = value;
4155
4143
  } break;
4156
4144
  default:
@@ -7343,13 +7331,15 @@ struct ggml_tensor * ggml_add_rel_pos_inplace(
7343
7331
  return ggml_add_rel_pos_impl(ctx, a, pw, ph, true);
7344
7332
  }
7345
7333
 
7346
- // gmml_unary
7334
+ // ggml_unary
7347
7335
 
7348
7336
  static struct ggml_tensor * ggml_unary_impl(
7349
7337
  struct ggml_context * ctx,
7350
7338
  struct ggml_tensor * a,
7351
7339
  enum ggml_unary_op op,
7352
7340
  bool inplace) {
7341
+ GGML_ASSERT(ggml_is_contiguous_1(a));
7342
+
7353
7343
  bool is_node = false;
7354
7344
 
7355
7345
  if (!inplace && (a->grad)) {
@@ -11014,6 +11004,8 @@ static void ggml_compute_forward_abs_f32(
11014
11004
  const struct ggml_tensor * src0 = dst->src[0];
11015
11005
 
11016
11006
  assert(params->ith == 0);
11007
+ assert(ggml_is_contiguous_1(src0));
11008
+ assert(ggml_is_contiguous_1(dst));
11017
11009
  assert(ggml_are_same_shape(src0, dst));
11018
11010
 
11019
11011
  if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
@@ -11023,9 +11015,6 @@ static void ggml_compute_forward_abs_f32(
11023
11015
  const int n = ggml_nrows(src0);
11024
11016
  const int nc = src0->ne[0];
11025
11017
 
11026
- assert(dst->nb[0] == sizeof(float));
11027
- assert(src0->nb[0] == sizeof(float));
11028
-
11029
11018
  for (int i = 0; i < n; i++) {
11030
11019
  ggml_vec_abs_f32(nc,
11031
11020
  (float *) ((char *) dst->data + i*( dst->nb[1])),
@@ -11060,6 +11049,8 @@ static void ggml_compute_forward_sgn_f32(
11060
11049
  const struct ggml_tensor * src0 = dst->src[0];
11061
11050
 
11062
11051
  assert(params->ith == 0);
11052
+ assert(ggml_is_contiguous_1(src0));
11053
+ assert(ggml_is_contiguous_1(dst));
11063
11054
  assert(ggml_are_same_shape(src0, dst));
11064
11055
 
11065
11056
  if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
@@ -11069,9 +11060,6 @@ static void ggml_compute_forward_sgn_f32(
11069
11060
  const int n = ggml_nrows(src0);
11070
11061
  const int nc = src0->ne[0];
11071
11062
 
11072
- assert(dst->nb[0] == sizeof(float));
11073
- assert(src0->nb[0] == sizeof(float));
11074
-
11075
11063
  for (int i = 0; i < n; i++) {
11076
11064
  ggml_vec_sgn_f32(nc,
11077
11065
  (float *) ((char *) dst->data + i*( dst->nb[1])),
@@ -11106,6 +11094,8 @@ static void ggml_compute_forward_neg_f32(
11106
11094
  const struct ggml_tensor * src0 = dst->src[0];
11107
11095
 
11108
11096
  assert(params->ith == 0);
11097
+ assert(ggml_is_contiguous_1(src0));
11098
+ assert(ggml_is_contiguous_1(dst));
11109
11099
  assert(ggml_are_same_shape(src0, dst));
11110
11100
 
11111
11101
  if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
@@ -11115,9 +11105,6 @@ static void ggml_compute_forward_neg_f32(
11115
11105
  const int n = ggml_nrows(src0);
11116
11106
  const int nc = src0->ne[0];
11117
11107
 
11118
- assert(dst->nb[0] == sizeof(float));
11119
- assert(src0->nb[0] == sizeof(float));
11120
-
11121
11108
  for (int i = 0; i < n; i++) {
11122
11109
  ggml_vec_neg_f32(nc,
11123
11110
  (float *) ((char *) dst->data + i*( dst->nb[1])),
@@ -11152,6 +11139,8 @@ static void ggml_compute_forward_step_f32(
11152
11139
  const struct ggml_tensor * src0 = dst->src[0];
11153
11140
 
11154
11141
  assert(params->ith == 0);
11142
+ assert(ggml_is_contiguous_1(src0));
11143
+ assert(ggml_is_contiguous_1(dst));
11155
11144
  assert(ggml_are_same_shape(src0, dst));
11156
11145
 
11157
11146
  if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
@@ -11161,9 +11150,6 @@ static void ggml_compute_forward_step_f32(
11161
11150
  const int n = ggml_nrows(src0);
11162
11151
  const int nc = src0->ne[0];
11163
11152
 
11164
- assert(dst->nb[0] == sizeof(float));
11165
- assert(src0->nb[0] == sizeof(float));
11166
-
11167
11153
  for (int i = 0; i < n; i++) {
11168
11154
  ggml_vec_step_f32(nc,
11169
11155
  (float *) ((char *) dst->data + i*( dst->nb[1])),
@@ -11198,6 +11184,8 @@ static void ggml_compute_forward_tanh_f32(
11198
11184
  const struct ggml_tensor * src0 = dst->src[0];
11199
11185
 
11200
11186
  assert(params->ith == 0);
11187
+ assert(ggml_is_contiguous_1(src0));
11188
+ assert(ggml_is_contiguous_1(dst));
11201
11189
  assert(ggml_are_same_shape(src0, dst));
11202
11190
 
11203
11191
  if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
@@ -11207,9 +11195,6 @@ static void ggml_compute_forward_tanh_f32(
11207
11195
  const int n = ggml_nrows(src0);
11208
11196
  const int nc = src0->ne[0];
11209
11197
 
11210
- assert(dst->nb[0] == sizeof(float));
11211
- assert(src0->nb[0] == sizeof(float));
11212
-
11213
11198
  for (int i = 0; i < n; i++) {
11214
11199
  ggml_vec_tanh_f32(nc,
11215
11200
  (float *) ((char *) dst->data + i*( dst->nb[1])),
@@ -11244,6 +11229,8 @@ static void ggml_compute_forward_elu_f32(
11244
11229
  const struct ggml_tensor * src0 = dst->src[0];
11245
11230
 
11246
11231
  assert(params->ith == 0);
11232
+ assert(ggml_is_contiguous_1(src0));
11233
+ assert(ggml_is_contiguous_1(dst));
11247
11234
  assert(ggml_are_same_shape(src0, dst));
11248
11235
 
11249
11236
  if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
@@ -11253,9 +11240,6 @@ static void ggml_compute_forward_elu_f32(
11253
11240
  const int n = ggml_nrows(src0);
11254
11241
  const int nc = src0->ne[0];
11255
11242
 
11256
- assert(dst->nb[0] == sizeof(float));
11257
- assert(src0->nb[0] == sizeof(float));
11258
-
11259
11243
  for (int i = 0; i < n; i++) {
11260
11244
  ggml_vec_elu_f32(nc,
11261
11245
  (float *) ((char *) dst->data + i*( dst->nb[1])),
@@ -11290,6 +11274,8 @@ static void ggml_compute_forward_relu_f32(
11290
11274
  const struct ggml_tensor * src0 = dst->src[0];
11291
11275
 
11292
11276
  assert(params->ith == 0);
11277
+ assert(ggml_is_contiguous_1(src0));
11278
+ assert(ggml_is_contiguous_1(dst));
11293
11279
  assert(ggml_are_same_shape(src0, dst));
11294
11280
 
11295
11281
  if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
@@ -11299,9 +11285,6 @@ static void ggml_compute_forward_relu_f32(
11299
11285
  const int n = ggml_nrows(src0);
11300
11286
  const int nc = src0->ne[0];
11301
11287
 
11302
- assert(dst->nb[0] == sizeof(float));
11303
- assert(src0->nb[0] == sizeof(float));
11304
-
11305
11288
  for (int i = 0; i < n; i++) {
11306
11289
  ggml_vec_relu_f32(nc,
11307
11290
  (float *) ((char *) dst->data + i*( dst->nb[1])),
@@ -11336,6 +11319,8 @@ static void ggml_compute_forward_sigmoid_f32(
11336
11319
  const struct ggml_tensor * src0 = dst->src[0];
11337
11320
 
11338
11321
  assert(params->ith == 0);
11322
+ assert(ggml_is_contiguous_1(src0));
11323
+ assert(ggml_is_contiguous_1(dst));
11339
11324
  assert(ggml_are_same_shape(src0, dst));
11340
11325
 
11341
11326
  if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
@@ -11345,9 +11330,6 @@ static void ggml_compute_forward_sigmoid_f32(
11345
11330
  const int n = ggml_nrows(src0);
11346
11331
  const int nc = src0->ne[0];
11347
11332
 
11348
- assert(dst->nb[0] == sizeof(float));
11349
- assert(src0->nb[0] == sizeof(float));
11350
-
11351
11333
  for (int i = 0; i < n; i++) {
11352
11334
  ggml_vec_sigmoid_f32(nc,
11353
11335
  (float *) ((char *) dst->data + i*( dst->nb[1])),
@@ -11381,9 +11363,9 @@ static void ggml_compute_forward_gelu_f32(
11381
11363
 
11382
11364
  const struct ggml_tensor * src0 = dst->src[0];
11383
11365
 
11384
- GGML_ASSERT(ggml_is_contiguous_1(src0));
11385
- GGML_ASSERT(ggml_is_contiguous_1(dst));
11386
- GGML_ASSERT(ggml_are_same_shape(src0, dst));
11366
+ assert(ggml_is_contiguous_1(src0));
11367
+ assert(ggml_is_contiguous_1(dst));
11368
+ assert(ggml_are_same_shape(src0, dst));
11387
11369
 
11388
11370
  if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
11389
11371
  return;
@@ -11444,9 +11426,9 @@ static void ggml_compute_forward_gelu_quick_f32(
11444
11426
 
11445
11427
  const struct ggml_tensor * src0 = dst->src[0];
11446
11428
 
11447
- GGML_ASSERT(ggml_is_contiguous_1(src0));
11448
- GGML_ASSERT(ggml_is_contiguous_1(dst));
11449
- GGML_ASSERT(ggml_are_same_shape(src0, dst));
11429
+ assert(ggml_is_contiguous_1(src0));
11430
+ assert(ggml_is_contiguous_1(dst));
11431
+ assert(ggml_are_same_shape(src0, dst));
11450
11432
 
11451
11433
  if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
11452
11434
  return;
@@ -11507,9 +11489,9 @@ static void ggml_compute_forward_silu_f32(
11507
11489
 
11508
11490
  const struct ggml_tensor * src0 = dst->src[0];
11509
11491
 
11510
- GGML_ASSERT(ggml_is_contiguous_1(src0));
11511
- GGML_ASSERT(ggml_is_contiguous_1(dst));
11512
- GGML_ASSERT(ggml_are_same_shape(src0, dst));
11492
+ assert(ggml_is_contiguous_1(src0));
11493
+ assert(ggml_is_contiguous_1(dst));
11494
+ assert(ggml_are_same_shape(src0, dst));
11513
11495
 
11514
11496
  if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
11515
11497
  return;
@@ -11570,6 +11552,8 @@ static void ggml_compute_forward_leaky_relu_f32(
11570
11552
  const struct ggml_tensor * src0 = dst->src[0];
11571
11553
 
11572
11554
  assert(params->ith == 0);
11555
+ assert(ggml_is_contiguous_1(src0));
11556
+ assert(ggml_is_contiguous_1(dst));
11573
11557
  assert(ggml_are_same_shape(src0, dst));
11574
11558
 
11575
11559
  if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
@@ -11619,11 +11603,11 @@ static void ggml_compute_forward_silu_back_f32(
11619
11603
  const struct ggml_tensor * src0 = dst->src[0];
11620
11604
  const struct ggml_tensor * grad = dst->src[1];
11621
11605
 
11622
- GGML_ASSERT(ggml_is_contiguous_1(grad));
11623
- GGML_ASSERT(ggml_is_contiguous_1(src0));
11624
- GGML_ASSERT(ggml_is_contiguous_1(dst));
11625
- GGML_ASSERT(ggml_are_same_shape(src0, dst));
11626
- GGML_ASSERT(ggml_are_same_shape(src0, grad));
11606
+ assert(ggml_is_contiguous_1(grad));
11607
+ assert(ggml_is_contiguous_1(src0));
11608
+ assert(ggml_is_contiguous_1(dst));
11609
+ assert(ggml_are_same_shape(src0, dst));
11610
+ assert(ggml_are_same_shape(src0, grad));
11627
11611
 
11628
11612
  if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
11629
11613
  return;
@@ -11685,6 +11669,8 @@ static void ggml_compute_forward_hardswish_f32(
11685
11669
  const struct ggml_tensor * src0 = dst->src[0];
11686
11670
 
11687
11671
  assert(params->ith == 0);
11672
+ assert(ggml_is_contiguous_1(src0));
11673
+ assert(ggml_is_contiguous_1(dst));
11688
11674
  assert(ggml_are_same_shape(src0, dst));
11689
11675
 
11690
11676
  if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
@@ -11694,9 +11680,6 @@ static void ggml_compute_forward_hardswish_f32(
11694
11680
  const int n = ggml_nrows(src0);
11695
11681
  const int nc = src0->ne[0];
11696
11682
 
11697
- assert(dst->nb[0] == sizeof(float));
11698
- assert(src0->nb[0] == sizeof(float));
11699
-
11700
11683
  for (int i = 0; i < n; i++) {
11701
11684
  ggml_vec_hardswish_f32(nc,
11702
11685
  (float *) ((char *) dst->data + i*( dst->nb[1])),
@@ -11728,6 +11711,8 @@ static void ggml_compute_forward_hardsigmoid_f32(
11728
11711
  const struct ggml_tensor * src0 = dst->src[0];
11729
11712
 
11730
11713
  assert(params->ith == 0);
11714
+ assert(ggml_is_contiguous_1(src0));
11715
+ assert(ggml_is_contiguous_1(dst));
11731
11716
  assert(ggml_are_same_shape(src0, dst));
11732
11717
 
11733
11718
  if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
@@ -11737,9 +11722,6 @@ static void ggml_compute_forward_hardsigmoid_f32(
11737
11722
  const int n = ggml_nrows(src0);
11738
11723
  const int nc = src0->ne[0];
11739
11724
 
11740
- assert(dst->nb[0] == sizeof(float));
11741
- assert(src0->nb[0] == sizeof(float));
11742
-
11743
11725
  for (int i = 0; i < n; i++) {
11744
11726
  ggml_vec_hardsigmoid_f32(nc,
11745
11727
  (float *) ((char *) dst->data + i*( dst->nb[1])),
@@ -12190,39 +12172,6 @@ static void ggml_compute_forward_group_norm(
12190
12172
 
12191
12173
  // ggml_compute_forward_mul_mat
12192
12174
 
12193
- #if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS)
12194
- // helper function to determine if it is better to use BLAS or not
12195
- // for large matrices, BLAS is faster
12196
- static bool ggml_compute_forward_mul_mat_use_blas(struct ggml_tensor * dst) {
12197
- const struct ggml_tensor * src0 = dst->src[0];
12198
- const struct ggml_tensor * src1 = dst->src[1];
12199
-
12200
- //const int64_t ne00 = src0->ne[0];
12201
- //const int64_t ne01 = src0->ne[1];
12202
-
12203
- const int64_t ne10 = src1->ne[0];
12204
-
12205
- const int64_t ne0 = dst->ne[0];
12206
- const int64_t ne1 = dst->ne[1];
12207
-
12208
- // NOTE: with GGML_OP_MUL_MAT_ID we don't want to go through the BLAS branch because it will dequantize (to_float)
12209
- // all the experts for each batch element and the processing would become incredibly slow
12210
- // TODO: find the optimal values for these
12211
- if (dst->op != GGML_OP_MUL_MAT_ID &&
12212
- ggml_is_contiguous(src0) &&
12213
- ggml_is_contiguous(src1) &&
12214
- //src0->type == GGML_TYPE_F32 &&
12215
- src1->type == GGML_TYPE_F32 &&
12216
- (ne0 >= 32 && ne1 >= 32 && ne10 >= 32)) {
12217
-
12218
- /*printf("BLAS: %d %d %d %d %d\n", ne0, ne1, ne10, ne00, ne01);*/
12219
- return true;
12220
- }
12221
-
12222
- return false;
12223
- }
12224
- #endif
12225
-
12226
12175
  static void ggml_compute_forward_mul_mat_one_chunk(
12227
12176
  const struct ggml_compute_params * params,
12228
12177
  struct ggml_tensor * dst,
@@ -12360,73 +12309,6 @@ static void ggml_compute_forward_mul_mat(
12360
12309
  // nb01 >= nb00 - src0 is not transposed
12361
12310
  // compute by src0 rows
12362
12311
 
12363
- #if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS)
12364
- if (ggml_compute_forward_mul_mat_use_blas(dst)) {
12365
- const int64_t ne_plane = ne01*ne00;
12366
- const size_t desired_wsize = ne13*ne12*ne_plane*sizeof(float);
12367
- UNUSED(desired_wsize);
12368
-
12369
- if (params->type == GGML_TASK_TYPE_INIT) {
12370
- if (type != GGML_TYPE_F32) {
12371
- assert(params->wsize >= desired_wsize);
12372
- // parallelize by src0 rows
12373
- for (int64_t i13 = 0; i13 < ne13; i13++) {
12374
- for (int64_t i12 = 0; i12 < ne12; i12++) {
12375
- // broadcast src0 into src1 across 2nd,3rd dimension
12376
- const int64_t i03 = i13/r3;
12377
- const int64_t i02 = i12/r2;
12378
-
12379
- const void * x = (char *) src0->data + i02*nb02 + i03*nb03;
12380
- float * const wdata = (float *) params->wdata + i13*ne12*ne_plane + i12*ne_plane;
12381
- ggml_to_float_t const to_float = type_traits[type].to_float;
12382
-
12383
- for (int64_t i01 = ith; i01 < ne01; i01 += nth) {
12384
- to_float((const char *) x + i01*nb01, wdata + i01*ne00, ne00);
12385
- }
12386
- }
12387
- }
12388
- }
12389
- return;
12390
- }
12391
-
12392
- if (params->type == GGML_TASK_TYPE_FINALIZE) {
12393
- return;
12394
- }
12395
-
12396
- // perform sgemm, parallelization controlled by blas lib
12397
- if (ith != 0) {
12398
- return;
12399
- }
12400
-
12401
- //const int64_t tgemm0 = ggml_perf_time_us();
12402
- for (int64_t i13 = 0; i13 < ne13; i13++) {
12403
- for (int64_t i12 = 0; i12 < ne12; i12++) {
12404
- const int64_t i03 = i13/r3;
12405
- const int64_t i02 = i12/r2;
12406
-
12407
- const void * x = (char *) src0->data + i02*nb02 + i03*nb03;
12408
- const float * y = (float *) ((char *) src1->data + i12*nb12 + i13*nb13);
12409
- float * d = (float *) ((char *) dst->data + i12*nb2 + i13*nb3);
12410
-
12411
- if (type != GGML_TYPE_F32) {
12412
- x = (float *) params->wdata + i13*ne12*ne_plane + i12*ne_plane;
12413
- }
12414
-
12415
- cblas_sgemm(CblasRowMajor, CblasNoTrans, CblasTrans,
12416
- ne1, ne01, ne10,
12417
- 1.0f, y, ne10,
12418
- x, ne00,
12419
- 0.0f, d, ne01);
12420
- }
12421
- }
12422
- //printf("cblas_sgemm = %.3f ms, %lld flops\n", (ggml_perf_time_us() - tgemm0)/1000.0, ne13*ne12*ne1*ne01*ne10*2);
12423
-
12424
- //printf("CBLAS = %f ms, %d x %d x %d x %d\n", (ggml_perf_time_us() - t0)/1000.0, ne0, ne1, ne2, ne3);
12425
-
12426
- return;
12427
- }
12428
- #endif
12429
-
12430
12312
  #if GGML_USE_LLAMAFILE
12431
12313
  const bool src1_cont = ggml_is_contiguous(src1);
12432
12314
 
@@ -12807,19 +12689,7 @@ static void ggml_compute_forward_out_prod_f32(
12807
12689
  // nb01 >= nb00 - src0 is not transposed
12808
12690
  // compute by src0 rows
12809
12691
 
12810
- #if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS)
12811
- bool use_blas = ggml_is_matrix(src0) &&
12812
- ggml_is_matrix(src1) &&
12813
- ggml_is_contiguous(src0) &&
12814
- (ggml_is_contiguous(src1) || ggml_is_transposed(src1));
12815
- #endif
12816
-
12817
12692
  if (params->type == GGML_TASK_TYPE_INIT) {
12818
- #if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS) // gemm beta will zero dst
12819
- if (use_blas) {
12820
- return;
12821
- }
12822
- #endif
12823
12693
  if (ith != 0) {
12824
12694
  return;
12825
12695
  }
@@ -12831,50 +12701,6 @@ static void ggml_compute_forward_out_prod_f32(
12831
12701
  return;
12832
12702
  }
12833
12703
 
12834
- #if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS)
12835
- if (use_blas) {
12836
- if (params->ith != 0) { // All threads other than the first do no work.
12837
- return;
12838
- }
12839
- // Arguments to ggml_compute_forward_out_prod (expressed as major,minor)
12840
- // src0: (k,n)
12841
- // src1: (k,m)
12842
- // dst: (m,n)
12843
- //
12844
- // Arguments to sgemm (see https://github.com/Reference-LAPACK/lapack/blob/master/BLAS/SRC/sgemm.f)
12845
- // Also expressed as (major,minor)
12846
- // a: (m,k): so src1 transposed
12847
- // b: (k,n): so src0
12848
- // c: (m,n)
12849
- //
12850
- // However, if ggml_is_transposed(src1) is true, then
12851
- // src1->data already contains a transposed version, so sgemm mustn't
12852
- // transpose it further.
12853
-
12854
- int n = src0->ne[0];
12855
- int k = src0->ne[1];
12856
- int m = src1->ne[0];
12857
-
12858
- int transposeA, lda;
12859
-
12860
- if (!ggml_is_transposed(src1)) {
12861
- transposeA = CblasTrans;
12862
- lda = m;
12863
- } else {
12864
- transposeA = CblasNoTrans;
12865
- lda = k;
12866
- }
12867
-
12868
- float * a = (float *) ((char *) src1->data);
12869
- float * b = (float *) ((char *) src0->data);
12870
- float * c = (float *) ((char *) dst->data);
12871
-
12872
- cblas_sgemm(CblasRowMajor, transposeA, CblasNoTrans, m, n, k, 1.0, a, lda, b, n, 0.0, c, n);
12873
-
12874
- return;
12875
- }
12876
- #endif
12877
-
12878
12704
  // dst[:,:,:,:] = 0
12879
12705
  // for i2,i3:
12880
12706
  // for i1:
@@ -13004,8 +12830,6 @@ static void ggml_compute_forward_out_prod_q_f32(
13004
12830
  // nb01 >= nb00 - src0 is not transposed
13005
12831
  // compute by src0 rows
13006
12832
 
13007
- // TODO: #if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS)
13008
-
13009
12833
  if (params->type == GGML_TASK_TYPE_INIT) {
13010
12834
  if (ith != 0) {
13011
12835
  return;
@@ -13402,6 +13226,8 @@ static void ggml_compute_forward_get_rows_q(
13402
13226
  const int64_t i10 = (i - i12*ne11*ne10 - i11*ne10);
13403
13227
  const int64_t i01 = *(int32_t *) ((char *) src1->data + i10*nb10 + i11*nb11 + i12*nb12);
13404
13228
 
13229
+ assert(i01 >= 0 && i01 < ne01);
13230
+
13405
13231
  dequantize_row_q(
13406
13232
  (const void *) ((char *) src0->data + i01*nb01 + i11*nb02 + i12*nb03),
13407
13233
  (float *) ((char *) dst->data + i10*nb1 + i11*nb2 + i12*nb3), nc);
@@ -13445,6 +13271,8 @@ static void ggml_compute_forward_get_rows_f16(
13445
13271
  const int64_t i10 = (i - i12*ne11*ne10 - i11*ne10);
13446
13272
  const int64_t i01 = *(int32_t *) ((char *) src1->data + i10*nb10 + i11*nb11 + i12*nb12);
13447
13273
 
13274
+ assert(i01 >= 0 && i01 < ne01);
13275
+
13448
13276
  ggml_fp16_to_fp32_row(
13449
13277
  (const void *) ((char *) src0->data + i01*nb01 + i11*nb02 + i12*nb03),
13450
13278
  (float *) ((char *) dst->data + i10*nb1 + i11*nb2 + i12*nb3), nc);
@@ -13488,7 +13316,9 @@ static void ggml_compute_forward_get_rows_bf16(
13488
13316
  const int64_t i10 = (i - i12*ne11*ne10 - i11*ne10);
13489
13317
  const int64_t i01 = *(int32_t *) ((char *) src1->data + i10*nb10 + i11*nb11 + i12*nb12);
13490
13318
 
13491
- ggml_bf16_to_fp32_row(
13319
+ assert(i01 >= 0 && i01 < ne01);
13320
+
13321
+ ggml_bf16_to_fp32_row(
13492
13322
  (const void *) ((char *) src0->data + i01*nb01 + i11*nb02 + i12*nb03),
13493
13323
  (float *) ((char *) dst->data + i10*nb1 + i11*nb2 + i12*nb3), nc);
13494
13324
  }
@@ -13531,6 +13361,8 @@ static void ggml_compute_forward_get_rows_f32(
13531
13361
  const int64_t i10 = (i - i12*ne11*ne10 - i11*ne10);
13532
13362
  const int64_t i01 = *(int32_t *) ((char *) src1->data + i10*nb10 + i11*nb11 + i12*nb12);
13533
13363
 
13364
+ assert(i01 >= 0 && i01 < ne01);
13365
+
13534
13366
  ggml_vec_cpy_f32(nc,
13535
13367
  (float *) ((char *) dst->data + i10*nb1 + i11*nb2 + i12*nb3),
13536
13368
  (float *) ((char *) src0->data + i01*nb01 + i11*nb02 + i12*nb03));
@@ -16686,7 +16518,10 @@ static void ggml_compute_forward_map_unary_f32(
16686
16518
 
16687
16519
  const struct ggml_tensor * src0 = dst->src[0];
16688
16520
 
16689
- GGML_ASSERT(ggml_are_same_shape(src0, dst));
16521
+ assert(params->ith == 0);
16522
+ assert(ggml_is_contiguous_1(src0));
16523
+ assert(ggml_is_contiguous_1(dst));
16524
+ assert(ggml_are_same_shape(src0, dst));
16690
16525
 
16691
16526
  if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
16692
16527
  return;
@@ -16695,9 +16530,6 @@ static void ggml_compute_forward_map_unary_f32(
16695
16530
  const int n = ggml_nrows(src0);
16696
16531
  const int nc = src0->ne[0];
16697
16532
 
16698
- assert( dst->nb[0] == sizeof(float));
16699
- assert(src0->nb[0] == sizeof(float));
16700
-
16701
16533
  for (int i = 0; i < n; i++) {
16702
16534
  fun(nc,
16703
16535
  (float *) ((char *) dst->data + i*( dst->nb[1])),
@@ -16735,6 +16567,9 @@ static void ggml_compute_forward_map_binary_f32(
16735
16567
  const struct ggml_tensor * src1 = dst->src[1];
16736
16568
 
16737
16569
  assert(params->ith == 0);
16570
+ assert(ggml_is_contiguous_1(src0));
16571
+ assert(ggml_is_contiguous_1(src1));
16572
+ assert(ggml_is_contiguous_1(dst));
16738
16573
  assert(ggml_are_same_shape(src0, src1) && ggml_are_same_shape(src0, dst));
16739
16574
 
16740
16575
  if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
@@ -16744,10 +16579,6 @@ static void ggml_compute_forward_map_binary_f32(
16744
16579
  const int n = ggml_nrows(src0);
16745
16580
  const int nc = src0->ne[0];
16746
16581
 
16747
- assert( dst->nb[0] == sizeof(float));
16748
- assert(src0->nb[0] == sizeof(float));
16749
- assert(src1->nb[0] == sizeof(float));
16750
-
16751
16582
  for (int i = 0; i < n; i++) {
16752
16583
  fun(nc,
16753
16584
  (float *) ((char *) dst->data + i*( dst->nb[1])),
@@ -18905,6 +18736,7 @@ static int ggml_get_n_tasks(struct ggml_tensor * node, int n_threads, int n_cur_
18905
18736
  switch (node->op) {
18906
18737
  case GGML_OP_CPY:
18907
18738
  case GGML_OP_DUP:
18739
+ case GGML_OP_CONT:
18908
18740
  case GGML_OP_ADD:
18909
18741
  case GGML_OP_ADD1:
18910
18742
  case GGML_OP_ACC:
@@ -18989,7 +18821,6 @@ static int ggml_get_n_tasks(struct ggml_tensor * node, int n_threads, int n_cur_
18989
18821
  } break;
18990
18822
  case GGML_OP_SCALE:
18991
18823
  case GGML_OP_SET:
18992
- case GGML_OP_CONT:
18993
18824
  case GGML_OP_RESHAPE:
18994
18825
  case GGML_OP_VIEW:
18995
18826
  case GGML_OP_PERMUTE:
@@ -19140,41 +18971,49 @@ static int ggml_get_n_tasks(struct ggml_tensor * node, int n_threads, int n_cur_
19140
18971
  return n_tasks;
19141
18972
  }
19142
18973
 
19143
- static void ggml_graph_compute_thread_sync_node(int * node_n, struct ggml_compute_state * state, const bool do_yield) {
19144
- // wait for other threads to finish
19145
- const int last_node_n = * node_n;
19146
-
19147
- while (true) {
19148
- if (do_yield) {
19149
- sched_yield();
19150
- }
19151
-
19152
- * node_n = atomic_load(&state->shared->node_n);
19153
- if (* node_n != last_node_n) break;
19154
- #if defined(__SSE3__)
19155
- // Tell the processor we're spinning. It's a processor hint for spinlocks.
19156
- _mm_pause();
19157
- #endif
18974
+ #ifdef GGML_USE_OPENMP
18975
+ static void ggml_barrier(struct ggml_compute_state * state) {
18976
+ if (state->shared->n_threads == 1) {
18977
+ return;
19158
18978
  }
18979
+
18980
+ #pragma omp barrier
19159
18981
  }
18982
+ #else
18983
+ static void ggml_barrier(struct ggml_compute_state * state) {
18984
+ if (state->shared->n_threads == 1) {
18985
+ return;
18986
+ }
19160
18987
 
19161
- static void ggml_graph_compute_thread_sync_task(int * task_phase, struct ggml_compute_state * state, const bool do_yield) {
19162
- // wait for other threads to finish
19163
- const int last_task_phase = * task_phase;
18988
+ atomic_int * n_barrier = &state->shared->n_barrier;
18989
+ atomic_int * n_barrier_passed = &state->shared->n_barrier_passed;
19164
18990
 
19165
- while (true) {
19166
- if (do_yield) {
18991
+ int n_threads = state->shared->n_threads;
18992
+ int passed_old = atomic_load(n_barrier_passed);
18993
+
18994
+ if (atomic_fetch_add(n_barrier, 1) == n_threads - 1) {
18995
+ // last thread
18996
+ atomic_store(n_barrier, 0);
18997
+ atomic_fetch_add(n_barrier_passed, 1);
18998
+ } else {
18999
+ // wait for other threads
19000
+ //while (atomic_load(n_barrier_passed) == passed_old) {
19001
+ //}
19002
+ const int n_spin_before_sleep = 100000;
19003
+ while (true) {
19004
+ for (int i = 0; i < n_spin_before_sleep; i++) {
19005
+ if (atomic_load(n_barrier_passed) != passed_old) {
19006
+ return;
19007
+ }
19008
+ #if defined(__SSE3__)
19009
+ _mm_pause();
19010
+ #endif
19011
+ }
19167
19012
  sched_yield();
19168
19013
  }
19169
-
19170
- * task_phase = atomic_load(&state->shared->node_task);
19171
- if (* task_phase != last_task_phase) break;
19172
- #if defined(__SSE3__)
19173
- // Tell the processor we're spinning. It's a processor hint for spinlocks.
19174
- _mm_pause();
19175
- #endif
19176
19014
  }
19177
19015
  }
19016
+ #endif
19178
19017
 
19179
19018
  static thread_ret_t ggml_graph_compute_thread(void * data) {
19180
19019
  struct ggml_compute_state * state = (struct ggml_compute_state *) data;
@@ -19182,136 +19021,54 @@ static thread_ret_t ggml_graph_compute_thread(void * data) {
19182
19021
  const struct ggml_cgraph * cgraph = state->shared->cgraph;
19183
19022
  const struct ggml_cplan * cplan = state->shared->cplan;
19184
19023
 
19185
- const int n_threads = state->shared->n_threads;
19024
+ const int ith = state->ith;
19025
+ const int n_threads = state->shared->n_threads;
19186
19026
 
19187
- set_numa_thread_affinity(state->ith);
19027
+ set_numa_thread_affinity(ith);
19188
19028
 
19189
- int node_n = -1;
19190
- int task_phase = GGML_TASK_TYPE_FINALIZE;
19029
+ struct ggml_compute_params params = {
19030
+ /*.type =*/ GGML_TASK_TYPE_INIT,
19031
+ /*.ith =*/ ith,
19032
+ /*.nth =*/ state->shared->n_threads,
19033
+ /*.wsize =*/ cplan->work_size,
19034
+ /*.wdata =*/ cplan->work_data,
19035
+ };
19191
19036
 
19192
- while (true) {
19037
+ for (int node_n = 0; node_n < cgraph->n_nodes; node_n++) {
19193
19038
  if (cplan->abort_callback && cplan->abort_callback(cplan->abort_callback_data)) {
19194
- state->shared->node_n += 1;
19195
19039
  state->ec = GGML_STATUS_ABORTED;
19196
19040
  return 0;
19197
19041
  }
19198
19042
 
19199
- if (atomic_fetch_sub(&state->shared->n_active, 1) == 1) {
19200
- // all other threads are finished and spinning
19201
- // do finalize and init here so we don't have synchronize again
19202
- struct ggml_compute_params params = {
19203
- /*.type =*/ GGML_TASK_TYPE_FINALIZE,
19204
- /*.ith =*/ 0,
19205
- /*.nth =*/ 0,
19206
- /*.wsize =*/ cplan->work_size,
19207
- /*.wdata =*/ cplan->work_data,
19208
- };
19209
-
19210
- if (node_n != -1) {
19211
- /* FINALIZE */
19212
- struct ggml_tensor * node = cgraph->nodes[node_n];
19213
- if (GGML_OP_HAS_FINALIZE[node->op]) {
19214
- params.nth = ggml_get_n_tasks(node, n_threads, state->shared->n_threads);
19215
- ggml_compute_forward(&params, node, state);
19216
- }
19217
- ggml_graph_compute_perf_stats_node(node, state->shared);
19218
- }
19219
-
19220
- // distribute new work or execute it direct if 1T
19221
- while (++node_n < cgraph->n_nodes) {
19222
- GGML_PRINT_DEBUG_5("%s: %d/%d\n", __func__, node_n, cgraph->n_nodes);
19223
- struct ggml_tensor * node = cgraph->nodes[node_n];
19224
- const int n_tasks = ggml_get_n_tasks(node, n_threads, state->shared->n_threads);
19225
-
19226
- state->shared->perf_node_start_cycles = ggml_perf_cycles();
19227
- state->shared->perf_node_start_time_us = ggml_perf_time_us();
19228
-
19229
- params.nth = n_tasks;
19230
-
19231
- if (n_tasks == 1) {
19232
- /* INIT */
19233
- if (GGML_OP_HAS_INIT[node->op]) {
19234
- params.type = GGML_TASK_TYPE_INIT;
19235
- ggml_compute_forward(&params, node, state);
19236
- }
19237
-
19238
- // TODO: maybe push node_n to the atomic but if other threads see n_tasks is 1,
19239
- // they do something more efficient than spinning (?)
19240
- params.type = GGML_TASK_TYPE_COMPUTE;
19241
- ggml_compute_forward(&params, node, state);
19242
-
19243
- if (GGML_OP_HAS_FINALIZE[node->op]) {
19244
- params.type = GGML_TASK_TYPE_FINALIZE;
19245
- ggml_compute_forward(&params, node, state);
19246
- }
19247
-
19248
- ggml_graph_compute_perf_stats_node(node, state->shared);
19249
- } else {
19250
- break;
19251
- }
19252
-
19253
- if (cplan->abort_callback && cplan->abort_callback(cplan->abort_callback_data)) {
19254
- break;
19255
- }
19256
- }
19257
-
19258
- task_phase = GGML_TASK_TYPE_INIT;
19259
- atomic_store(&state->shared->n_active, n_threads);
19260
- atomic_store(&state->shared->node_n, node_n);
19261
- atomic_store(&state->shared->node_task, task_phase);
19262
- } else {
19263
- ggml_graph_compute_thread_sync_node(&node_n, state, false);
19264
- ggml_graph_compute_thread_sync_task(&task_phase, state, false);
19265
- }
19266
-
19267
- // check if we should stop
19268
- if (node_n >= cgraph->n_nodes) break;
19269
-
19270
- /* INIT & COMPUTE */
19271
19043
  struct ggml_tensor * node = cgraph->nodes[node_n];
19272
19044
  const int n_tasks = ggml_get_n_tasks(node, n_threads, state->shared->n_threads);
19273
19045
 
19274
- struct ggml_compute_params params = {
19275
- /*.type =*/ GGML_TASK_TYPE_INIT,
19276
- /*.ith =*/ state->ith,
19277
- /*.nth =*/ n_tasks,
19278
- /*.wsize =*/ cplan->work_size,
19279
- /*.wdata =*/ cplan->work_data,
19280
- };
19046
+ params.nth = n_tasks;
19281
19047
 
19282
- if (state->ith < n_tasks) {
19283
- if (GGML_OP_HAS_INIT[node->op]) {
19048
+ /* INIT */
19049
+ if (GGML_OP_HAS_INIT[node->op]) {
19050
+ if (ith < n_tasks) {
19051
+ params.type = GGML_TASK_TYPE_INIT;
19284
19052
  ggml_compute_forward(&params, node, state);
19285
19053
  }
19054
+ ggml_barrier(state);
19286
19055
  }
19287
19056
 
19288
- if (atomic_fetch_sub(&state->shared->n_active, 1) == 1) {
19289
- task_phase = GGML_TASK_TYPE_COMPUTE;
19290
- atomic_store(&state->shared->n_active, n_threads);
19291
- atomic_store(&state->shared->node_task, task_phase);
19292
- }
19293
- else {
19294
- // TODO: this sched_yield can have significant impact on the performance - either positive or negative
19295
- // depending on the workload and the operating system.
19296
- // since it is not clear what is the best approach, it should potentially become user-configurable
19297
- // ref: https://github.com/ggerganov/ggml/issues/291
19298
- // UPD: adding the do_yield flag seems to resolve the issue universally
19299
- const bool do_yield = node_n < 0 || cgraph->nodes[node_n]->op == GGML_OP_MUL_MAT;
19300
- ggml_graph_compute_thread_sync_task(&task_phase, state, do_yield);
19301
- }
19302
-
19303
- if (state->ith < n_tasks) {
19057
+ /* COMPUTE */
19058
+ if (ith < n_tasks) {
19304
19059
  params.type = GGML_TASK_TYPE_COMPUTE;
19305
19060
  ggml_compute_forward(&params, node, state);
19306
19061
  }
19307
19062
 
19308
- if (atomic_fetch_sub(&state->shared->n_active, 1) == 1) {
19309
- task_phase = GGML_TASK_TYPE_FINALIZE;
19310
- atomic_store(&state->shared->n_active, n_threads);
19311
- atomic_store(&state->shared->node_task, task_phase);
19312
- }
19313
- else {
19314
- ggml_graph_compute_thread_sync_task(&task_phase, state, false);
19063
+ ggml_barrier(state);
19064
+
19065
+ /* FINALIZE */
19066
+ if (GGML_OP_HAS_FINALIZE[node->op]) {
19067
+ if (params.ith == 0) {
19068
+ params.type = GGML_TASK_TYPE_FINALIZE;
19069
+ ggml_compute_forward(&params, node, state);
19070
+ }
19071
+ ggml_barrier(state);
19315
19072
  }
19316
19073
  }
19317
19074
 
@@ -19368,17 +19125,6 @@ struct ggml_cplan ggml_graph_plan(const struct ggml_cgraph * cgraph, int n_threa
19368
19125
  {
19369
19126
  const enum ggml_type vec_dot_type = type_traits[node->src[0]->type].vec_dot_type;
19370
19127
 
19371
- #if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS)
19372
- if (ggml_compute_forward_mul_mat_use_blas(node)) {
19373
- if (node->src[0]->type != GGML_TYPE_F32) {
19374
- // here we need memory for fully dequantized matrix from src0
19375
- // take into account that src0 can be broadcasted into src1[2,3]
19376
- cur = ggml_type_size(GGML_TYPE_F32)
19377
- * node->src[0]->ne[0]*node->src[0]->ne[1]
19378
- * node->src[1]->ne[2]*node->src[1]->ne[3];
19379
- }
19380
- } else
19381
- #endif
19382
19128
  if (node->src[1]->type != vec_dot_type) {
19383
19129
  cur = ggml_row_size(vec_dot_type, ggml_nelements(node->src[1]));
19384
19130
  }
@@ -19509,7 +19255,6 @@ static enum ggml_status ggml_graph_compute_parallel(struct ggml_compute_state *
19509
19255
  // update the number of threads from the actual number of threads that we got from OpenMP
19510
19256
  n_threads = omp_get_num_threads();
19511
19257
  workers[0].shared->n_threads = n_threads;
19512
- workers[0].shared->n_active = n_threads;
19513
19258
  }
19514
19259
  ggml_graph_compute_thread(&workers[omp_get_thread_num()]);
19515
19260
  }
@@ -19572,9 +19317,8 @@ enum ggml_status ggml_graph_compute(struct ggml_cgraph * cgraph, struct ggml_cpl
19572
19317
  /*.perf_node_start_cycles =*/ 0,
19573
19318
  /*.perf_node_start_time_us =*/ 0,
19574
19319
  /*.n_threads =*/ n_threads,
19575
- /*.n_active =*/ n_threads,
19576
- /*.node_n =*/ -1,
19577
- /*.node_task =*/ GGML_TASK_TYPE_FINALIZE,
19320
+ /*.n_barrier =*/ 0,
19321
+ /*.n_barrier_passed =*/ 0,
19578
19322
  /*.abort_callback =*/ NULL,
19579
19323
  /*.abort_callback_data =*/ NULL,
19580
19324
  /*.current_chunk; =*/ 0,
@@ -22676,7 +22420,7 @@ int ggml_cpu_has_wasm_simd(void) {
22676
22420
  }
22677
22421
 
22678
22422
  int ggml_cpu_has_blas(void) {
22679
- #if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS) || defined(GGML_USE_CUDA) || defined(GGML_USE_VULKAN) || defined(GGML_USE_SYCL)
22423
+ #if defined(GGML_USE_BLAS) || defined(GGML_USE_CUDA) || defined(GGML_USE_VULKAN) || defined(GGML_USE_SYCL)
22680
22424
  return 1;
22681
22425
  #else
22682
22426
  return 0;