llama_cpp 0.16.0 → 0.16.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (142) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +13 -0
  3. data/ext/llama_cpp/extconf.rb +3 -0
  4. data/ext/llama_cpp/llama_cpp.cpp +14 -0
  5. data/lib/llama_cpp/version.rb +2 -2
  6. data/sig/llama_cpp.rbs +4 -0
  7. data/vendor/tmp/llama.cpp/Makefile +119 -54
  8. data/vendor/tmp/llama.cpp/ggml-alloc.c +78 -22
  9. data/vendor/tmp/llama.cpp/ggml-backend-impl.h +20 -8
  10. data/vendor/tmp/llama.cpp/ggml-backend.c +190 -65
  11. data/vendor/tmp/llama.cpp/ggml-backend.h +6 -3
  12. data/vendor/tmp/llama.cpp/ggml-blas.cpp +363 -0
  13. data/vendor/tmp/llama.cpp/ggml-blas.h +23 -0
  14. data/vendor/tmp/llama.cpp/ggml-common.h +6 -0
  15. data/vendor/tmp/llama.cpp/ggml-cuda/argsort.cu +1 -0
  16. data/vendor/tmp/llama.cpp/ggml-cuda/dmmv.cu +21 -9
  17. data/vendor/tmp/llama.cpp/ggml-cuda/fattn-tile-f16.cu +1 -1
  18. data/vendor/tmp/llama.cpp/ggml-cuda/mmq.cu +15 -1491
  19. data/vendor/tmp/llama.cpp/ggml-cuda/mmvq.cu +77 -62
  20. data/vendor/tmp/llama.cpp/ggml-cuda/quantize.cu +77 -10
  21. data/vendor/tmp/llama.cpp/ggml-cuda/softmax.cu +1 -0
  22. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-f16.cu +1 -1
  23. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q4_0.cu +1 -1
  24. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q4_1.cu +1 -1
  25. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q5_0.cu +1 -1
  26. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q5_1.cu +1 -1
  27. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q8_0.cu +1 -1
  28. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-f16.cu +1 -1
  29. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q4_0.cu +1 -1
  30. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q4_1.cu +1 -1
  31. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q5_0.cu +1 -1
  32. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q5_1.cu +1 -1
  33. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q8_0.cu +1 -1
  34. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-f16.cu +1 -1
  35. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q4_0.cu +1 -1
  36. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q4_1.cu +1 -1
  37. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q5_0.cu +1 -1
  38. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q5_1.cu +1 -1
  39. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q8_0.cu +1 -1
  40. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-f16.cu +1 -1
  41. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q4_0.cu +1 -1
  42. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q4_1.cu +1 -1
  43. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q5_0.cu +1 -1
  44. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q5_1.cu +1 -1
  45. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q8_0.cu +1 -1
  46. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-f16.cu +1 -1
  47. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q4_0.cu +1 -1
  48. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q4_1.cu +1 -1
  49. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q5_0.cu +1 -1
  50. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q5_1.cu +1 -1
  51. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q8_0.cu +1 -1
  52. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-f16.cu +1 -1
  53. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q4_0.cu +1 -1
  54. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q4_1.cu +1 -1
  55. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q5_0.cu +1 -1
  56. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q5_1.cu +1 -1
  57. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q8_0.cu +1 -1
  58. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs256-f16-f16.cu +1 -1
  59. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-f16.cu +1 -1
  60. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q4_0.cu +1 -1
  61. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q4_1.cu +1 -1
  62. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q5_0.cu +1 -1
  63. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q5_1.cu +1 -1
  64. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q8_0.cu +1 -1
  65. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-f16.cu +1 -1
  66. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q4_0.cu +1 -1
  67. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q4_1.cu +1 -1
  68. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q5_0.cu +1 -1
  69. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q5_1.cu +1 -1
  70. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q8_0.cu +1 -1
  71. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-f16.cu +1 -1
  72. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q4_0.cu +1 -1
  73. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q4_1.cu +1 -1
  74. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q5_0.cu +1 -1
  75. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q5_1.cu +1 -1
  76. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q8_0.cu +1 -1
  77. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-f16.cu +1 -1
  78. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q4_0.cu +1 -1
  79. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q4_1.cu +1 -1
  80. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q5_0.cu +1 -1
  81. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q5_1.cu +1 -1
  82. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q8_0.cu +1 -1
  83. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-f16.cu +1 -1
  84. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q4_0.cu +1 -1
  85. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q4_1.cu +1 -1
  86. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q5_0.cu +1 -1
  87. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q5_1.cu +1 -1
  88. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q8_0.cu +1 -1
  89. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-f16.cu +1 -1
  90. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q4_0.cu +1 -1
  91. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q4_1.cu +1 -1
  92. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q5_0.cu +1 -1
  93. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q5_1.cu +1 -1
  94. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q8_0.cu +1 -1
  95. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-f16.cu +1 -1
  96. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q4_0.cu +1 -1
  97. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q4_1.cu +1 -1
  98. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q5_0.cu +1 -1
  99. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q5_1.cu +1 -1
  100. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q8_0.cu +1 -1
  101. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs256-f16-f16.cu +1 -1
  102. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-f16.cu +1 -1
  103. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q4_0.cu +1 -1
  104. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q4_1.cu +1 -1
  105. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q5_0.cu +1 -1
  106. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q5_1.cu +1 -1
  107. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q8_0.cu +1 -1
  108. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqfloat-cpb16.cu +1 -1
  109. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqfloat-cpb32.cu +1 -1
  110. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb16.cu +1 -1
  111. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb32.cu +1 -1
  112. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb8.cu +1 -1
  113. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/mmq-instance-q2_k.cu +5 -0
  114. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/mmq-instance-q3_k.cu +5 -0
  115. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/mmq-instance-q4_0.cu +5 -0
  116. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/mmq-instance-q4_1.cu +5 -0
  117. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/mmq-instance-q4_k.cu +5 -0
  118. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/mmq-instance-q5_0.cu +5 -0
  119. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/mmq-instance-q5_1.cu +5 -0
  120. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/mmq-instance-q5_k.cu +5 -0
  121. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/mmq-instance-q6_k.cu +5 -0
  122. data/vendor/tmp/llama.cpp/ggml-cuda/template-instances/mmq-instance-q8_0.cu +5 -0
  123. data/vendor/tmp/llama.cpp/ggml-cuda/unary.cu +48 -0
  124. data/vendor/tmp/llama.cpp/ggml-cuda.cu +95 -129
  125. data/vendor/tmp/llama.cpp/ggml-impl.h +1 -1
  126. data/vendor/tmp/llama.cpp/ggml-kompute.cpp +8 -7
  127. data/vendor/tmp/llama.cpp/ggml-metal.m +17 -9
  128. data/vendor/tmp/llama.cpp/ggml-quants.c +982 -368
  129. data/vendor/tmp/llama.cpp/ggml-rpc.cpp +21 -15
  130. data/vendor/tmp/llama.cpp/ggml-sycl.cpp +2133 -13215
  131. data/vendor/tmp/llama.cpp/ggml-sycl.h +1 -10
  132. data/vendor/tmp/llama.cpp/ggml-vulkan-shaders.hpp +28826 -25037
  133. data/vendor/tmp/llama.cpp/ggml-vulkan.cpp +438 -493
  134. data/vendor/tmp/llama.cpp/ggml.c +158 -414
  135. data/vendor/tmp/llama.cpp/ggml.h +6 -0
  136. data/vendor/tmp/llama.cpp/llama.cpp +628 -279
  137. data/vendor/tmp/llama.cpp/llama.h +9 -1
  138. data/vendor/tmp/llama.cpp/sgemm.cpp +2 -0
  139. data/vendor/tmp/llama.cpp/unicode-data.cpp +851 -801
  140. data/vendor/tmp/llama.cpp/unicode.cpp +33 -19
  141. data/vendor/tmp/llama.cpp/unicode.h +1 -1
  142. metadata +15 -3
@@ -297,12 +297,6 @@ inline static void * ggml_calloc(size_t num, size_t size) {
297
297
 
298
298
  #if defined(GGML_USE_ACCELERATE)
299
299
  #include <Accelerate/Accelerate.h>
300
- #elif defined(GGML_USE_OPENBLAS)
301
- #if defined(GGML_BLAS_USE_MKL)
302
- #include <mkl.h>
303
- #else
304
- #include <cblas.h>
305
- #endif
306
300
  #endif
307
301
 
308
302
  // floating point type used to accumulate sums
@@ -1759,9 +1753,8 @@ struct ggml_compute_state_shared {
1759
1753
  int n_threads;
1760
1754
 
1761
1755
  // synchronization primitives
1762
- atomic_int n_active; // num active threads
1763
- atomic_int node_n; // active graph node
1764
- atomic_int node_task; // active graph node task phase
1756
+ atomic_int n_barrier;
1757
+ atomic_int n_barrier_passed;
1765
1758
 
1766
1759
  ggml_abort_callback abort_callback; // abort ggml_graph_compute when true
1767
1760
  void* abort_callback_data;
@@ -3212,35 +3205,42 @@ GGML_CALL bool ggml_is_transposed(const struct ggml_tensor * tensor) {
3212
3205
  return tensor->nb[0] > tensor->nb[1];
3213
3206
  }
3214
3207
 
3215
- GGML_CALL bool ggml_is_contiguous(const struct ggml_tensor * tensor) {
3216
- static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
3208
+ static bool ggml_is_contiguous_n(const struct ggml_tensor * tensor, int n) {
3209
+ size_t next_nb = ggml_type_size(tensor->type);
3210
+ if (tensor->ne[0] != ggml_blck_size(tensor->type) && tensor->nb[0] != next_nb) {
3211
+ return false;
3212
+ }
3213
+ next_nb *= tensor->ne[0]/ggml_blck_size(tensor->type);
3214
+ for (int i = 1; i < GGML_MAX_DIMS; i++) {
3215
+ if (tensor->ne[i] != 1) {
3216
+ if (i > n) {
3217
+ if (tensor->nb[i] != next_nb) {
3218
+ return false;
3219
+ }
3220
+ next_nb *= tensor->ne[i];
3221
+ } else {
3222
+ // this dimension does not need to be contiguous
3223
+ next_nb = tensor->ne[i]*tensor->nb[i];
3224
+ }
3225
+ }
3226
+ }
3227
+ return true;
3228
+ }
3217
3229
 
3218
- return
3219
- tensor->nb[0] == ggml_type_size(tensor->type) &&
3220
- tensor->nb[1] == (tensor->nb[0]*tensor->ne[0])/ggml_blck_size(tensor->type) &&
3221
- tensor->nb[2] == tensor->nb[1]*tensor->ne[1] &&
3222
- tensor->nb[3] == tensor->nb[2]*tensor->ne[2];
3230
+ GGML_CALL bool ggml_is_contiguous(const struct ggml_tensor * tensor) {
3231
+ return ggml_is_contiguous_0(tensor);
3223
3232
  }
3224
3233
 
3225
3234
  GGML_CALL bool ggml_is_contiguous_0(const struct ggml_tensor * tensor) {
3226
- return ggml_is_contiguous(tensor);
3235
+ return ggml_is_contiguous_n(tensor, 0);
3227
3236
  }
3228
3237
 
3229
3238
  GGML_CALL bool ggml_is_contiguous_1(const struct ggml_tensor * tensor) {
3230
- static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
3231
-
3232
- return
3233
- tensor->nb[0] == ggml_type_size(tensor->type) &&
3234
- tensor->nb[2] == tensor->nb[1]*tensor->ne[1] &&
3235
- tensor->nb[3] == tensor->nb[2]*tensor->ne[2];
3239
+ return ggml_is_contiguous_n(tensor, 1);
3236
3240
  }
3237
3241
 
3238
3242
  GGML_CALL bool ggml_is_contiguous_2(const struct ggml_tensor * tensor) {
3239
- static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
3240
-
3241
- return
3242
- tensor->nb[0] == ggml_type_size(tensor->type) &&
3243
- tensor->nb[3] == tensor->nb[2]*tensor->ne[2];
3243
+ return ggml_is_contiguous_n(tensor, 2);
3244
3244
  }
3245
3245
 
3246
3246
  GGML_CALL bool ggml_is_permuted(const struct ggml_tensor * tensor) {
@@ -3272,20 +3272,20 @@ bool ggml_are_same_shape(const struct ggml_tensor * t0, const struct ggml_tensor
3272
3272
  static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
3273
3273
 
3274
3274
  return
3275
- (t0->ne[0] == t1->ne[0] ) &&
3276
- (t0->ne[1] == t1->ne[1] ) &&
3277
- (t0->ne[2] == t1->ne[2] ) &&
3278
- (t0->ne[3] == t1->ne[3] );
3275
+ (t0->ne[0] == t1->ne[0]) &&
3276
+ (t0->ne[1] == t1->ne[1]) &&
3277
+ (t0->ne[2] == t1->ne[2]) &&
3278
+ (t0->ne[3] == t1->ne[3]);
3279
3279
  }
3280
3280
 
3281
3281
  bool ggml_are_same_stride(const struct ggml_tensor * t0, const struct ggml_tensor * t1) {
3282
3282
  static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
3283
3283
 
3284
3284
  return
3285
- (t0->nb[0] == t1->nb[0] ) &&
3286
- (t0->nb[1] == t1->nb[1] ) &&
3287
- (t0->nb[2] == t1->nb[2] ) &&
3288
- (t0->nb[3] == t1->nb[3] );
3285
+ (t0->nb[0] == t1->nb[0]) &&
3286
+ (t0->nb[1] == t1->nb[1]) &&
3287
+ (t0->nb[2] == t1->nb[2]) &&
3288
+ (t0->nb[3] == t1->nb[3]);
3289
3289
  }
3290
3290
 
3291
3291
  // check if t1 can be represented as a repeatition of t0
@@ -4078,32 +4078,26 @@ float ggml_get_f32_1d(const struct ggml_tensor * tensor, int i) {
4078
4078
  switch (tensor->type) {
4079
4079
  case GGML_TYPE_I8:
4080
4080
  {
4081
- GGML_ASSERT(tensor->nb[0] == sizeof(int8_t));
4082
4081
  return ((int8_t *)(tensor->data))[i];
4083
4082
  }
4084
4083
  case GGML_TYPE_I16:
4085
4084
  {
4086
- GGML_ASSERT(tensor->nb[0] == sizeof(int16_t));
4087
4085
  return ((int16_t *)(tensor->data))[i];
4088
4086
  }
4089
4087
  case GGML_TYPE_I32:
4090
4088
  {
4091
- GGML_ASSERT(tensor->nb[0] == sizeof(int32_t));
4092
4089
  return ((int32_t *)(tensor->data))[i];
4093
4090
  }
4094
4091
  case GGML_TYPE_F16:
4095
4092
  {
4096
- GGML_ASSERT(tensor->nb[0] == sizeof(ggml_fp16_t));
4097
4093
  return GGML_FP16_TO_FP32(((ggml_fp16_t *)(tensor->data))[i]);
4098
4094
  }
4099
4095
  case GGML_TYPE_BF16:
4100
4096
  {
4101
- GGML_ASSERT(tensor->nb[0] == sizeof(ggml_bf16_t));
4102
4097
  return GGML_BF16_TO_FP32(((ggml_bf16_t *)(tensor->data))[i]);
4103
4098
  }
4104
4099
  case GGML_TYPE_F32:
4105
4100
  {
4106
- GGML_ASSERT(tensor->nb[0] == sizeof(float));
4107
4101
  return ((float *)(tensor->data))[i];
4108
4102
  }
4109
4103
  default:
@@ -4125,32 +4119,26 @@ void ggml_set_f32_1d(const struct ggml_tensor * tensor, int i, float value) {
4125
4119
  switch (tensor->type) {
4126
4120
  case GGML_TYPE_I8:
4127
4121
  {
4128
- GGML_ASSERT(tensor->nb[0] == sizeof(int8_t));
4129
4122
  ((int8_t *)(tensor->data))[i] = value;
4130
4123
  } break;
4131
4124
  case GGML_TYPE_I16:
4132
4125
  {
4133
- GGML_ASSERT(tensor->nb[0] == sizeof(int16_t));
4134
4126
  ((int16_t *)(tensor->data))[i] = value;
4135
4127
  } break;
4136
4128
  case GGML_TYPE_I32:
4137
4129
  {
4138
- GGML_ASSERT(tensor->nb[0] == sizeof(int32_t));
4139
4130
  ((int32_t *)(tensor->data))[i] = value;
4140
4131
  } break;
4141
4132
  case GGML_TYPE_F16:
4142
4133
  {
4143
- GGML_ASSERT(tensor->nb[0] == sizeof(ggml_fp16_t));
4144
4134
  ((ggml_fp16_t *)(tensor->data))[i] = GGML_FP32_TO_FP16(value);
4145
4135
  } break;
4146
4136
  case GGML_TYPE_BF16:
4147
4137
  {
4148
- GGML_ASSERT(tensor->nb[0] == sizeof(ggml_bf16_t));
4149
4138
  ((ggml_bf16_t *)(tensor->data))[i] = GGML_FP32_TO_BF16(value);
4150
4139
  } break;
4151
4140
  case GGML_TYPE_F32:
4152
4141
  {
4153
- GGML_ASSERT(tensor->nb[0] == sizeof(float));
4154
4142
  ((float *)(tensor->data))[i] = value;
4155
4143
  } break;
4156
4144
  default:
@@ -7343,13 +7331,15 @@ struct ggml_tensor * ggml_add_rel_pos_inplace(
7343
7331
  return ggml_add_rel_pos_impl(ctx, a, pw, ph, true);
7344
7332
  }
7345
7333
 
7346
- // gmml_unary
7334
+ // ggml_unary
7347
7335
 
7348
7336
  static struct ggml_tensor * ggml_unary_impl(
7349
7337
  struct ggml_context * ctx,
7350
7338
  struct ggml_tensor * a,
7351
7339
  enum ggml_unary_op op,
7352
7340
  bool inplace) {
7341
+ GGML_ASSERT(ggml_is_contiguous_1(a));
7342
+
7353
7343
  bool is_node = false;
7354
7344
 
7355
7345
  if (!inplace && (a->grad)) {
@@ -11014,6 +11004,8 @@ static void ggml_compute_forward_abs_f32(
11014
11004
  const struct ggml_tensor * src0 = dst->src[0];
11015
11005
 
11016
11006
  assert(params->ith == 0);
11007
+ assert(ggml_is_contiguous_1(src0));
11008
+ assert(ggml_is_contiguous_1(dst));
11017
11009
  assert(ggml_are_same_shape(src0, dst));
11018
11010
 
11019
11011
  if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
@@ -11023,9 +11015,6 @@ static void ggml_compute_forward_abs_f32(
11023
11015
  const int n = ggml_nrows(src0);
11024
11016
  const int nc = src0->ne[0];
11025
11017
 
11026
- assert(dst->nb[0] == sizeof(float));
11027
- assert(src0->nb[0] == sizeof(float));
11028
-
11029
11018
  for (int i = 0; i < n; i++) {
11030
11019
  ggml_vec_abs_f32(nc,
11031
11020
  (float *) ((char *) dst->data + i*( dst->nb[1])),
@@ -11060,6 +11049,8 @@ static void ggml_compute_forward_sgn_f32(
11060
11049
  const struct ggml_tensor * src0 = dst->src[0];
11061
11050
 
11062
11051
  assert(params->ith == 0);
11052
+ assert(ggml_is_contiguous_1(src0));
11053
+ assert(ggml_is_contiguous_1(dst));
11063
11054
  assert(ggml_are_same_shape(src0, dst));
11064
11055
 
11065
11056
  if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
@@ -11069,9 +11060,6 @@ static void ggml_compute_forward_sgn_f32(
11069
11060
  const int n = ggml_nrows(src0);
11070
11061
  const int nc = src0->ne[0];
11071
11062
 
11072
- assert(dst->nb[0] == sizeof(float));
11073
- assert(src0->nb[0] == sizeof(float));
11074
-
11075
11063
  for (int i = 0; i < n; i++) {
11076
11064
  ggml_vec_sgn_f32(nc,
11077
11065
  (float *) ((char *) dst->data + i*( dst->nb[1])),
@@ -11106,6 +11094,8 @@ static void ggml_compute_forward_neg_f32(
11106
11094
  const struct ggml_tensor * src0 = dst->src[0];
11107
11095
 
11108
11096
  assert(params->ith == 0);
11097
+ assert(ggml_is_contiguous_1(src0));
11098
+ assert(ggml_is_contiguous_1(dst));
11109
11099
  assert(ggml_are_same_shape(src0, dst));
11110
11100
 
11111
11101
  if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
@@ -11115,9 +11105,6 @@ static void ggml_compute_forward_neg_f32(
11115
11105
  const int n = ggml_nrows(src0);
11116
11106
  const int nc = src0->ne[0];
11117
11107
 
11118
- assert(dst->nb[0] == sizeof(float));
11119
- assert(src0->nb[0] == sizeof(float));
11120
-
11121
11108
  for (int i = 0; i < n; i++) {
11122
11109
  ggml_vec_neg_f32(nc,
11123
11110
  (float *) ((char *) dst->data + i*( dst->nb[1])),
@@ -11152,6 +11139,8 @@ static void ggml_compute_forward_step_f32(
11152
11139
  const struct ggml_tensor * src0 = dst->src[0];
11153
11140
 
11154
11141
  assert(params->ith == 0);
11142
+ assert(ggml_is_contiguous_1(src0));
11143
+ assert(ggml_is_contiguous_1(dst));
11155
11144
  assert(ggml_are_same_shape(src0, dst));
11156
11145
 
11157
11146
  if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
@@ -11161,9 +11150,6 @@ static void ggml_compute_forward_step_f32(
11161
11150
  const int n = ggml_nrows(src0);
11162
11151
  const int nc = src0->ne[0];
11163
11152
 
11164
- assert(dst->nb[0] == sizeof(float));
11165
- assert(src0->nb[0] == sizeof(float));
11166
-
11167
11153
  for (int i = 0; i < n; i++) {
11168
11154
  ggml_vec_step_f32(nc,
11169
11155
  (float *) ((char *) dst->data + i*( dst->nb[1])),
@@ -11198,6 +11184,8 @@ static void ggml_compute_forward_tanh_f32(
11198
11184
  const struct ggml_tensor * src0 = dst->src[0];
11199
11185
 
11200
11186
  assert(params->ith == 0);
11187
+ assert(ggml_is_contiguous_1(src0));
11188
+ assert(ggml_is_contiguous_1(dst));
11201
11189
  assert(ggml_are_same_shape(src0, dst));
11202
11190
 
11203
11191
  if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
@@ -11207,9 +11195,6 @@ static void ggml_compute_forward_tanh_f32(
11207
11195
  const int n = ggml_nrows(src0);
11208
11196
  const int nc = src0->ne[0];
11209
11197
 
11210
- assert(dst->nb[0] == sizeof(float));
11211
- assert(src0->nb[0] == sizeof(float));
11212
-
11213
11198
  for (int i = 0; i < n; i++) {
11214
11199
  ggml_vec_tanh_f32(nc,
11215
11200
  (float *) ((char *) dst->data + i*( dst->nb[1])),
@@ -11244,6 +11229,8 @@ static void ggml_compute_forward_elu_f32(
11244
11229
  const struct ggml_tensor * src0 = dst->src[0];
11245
11230
 
11246
11231
  assert(params->ith == 0);
11232
+ assert(ggml_is_contiguous_1(src0));
11233
+ assert(ggml_is_contiguous_1(dst));
11247
11234
  assert(ggml_are_same_shape(src0, dst));
11248
11235
 
11249
11236
  if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
@@ -11253,9 +11240,6 @@ static void ggml_compute_forward_elu_f32(
11253
11240
  const int n = ggml_nrows(src0);
11254
11241
  const int nc = src0->ne[0];
11255
11242
 
11256
- assert(dst->nb[0] == sizeof(float));
11257
- assert(src0->nb[0] == sizeof(float));
11258
-
11259
11243
  for (int i = 0; i < n; i++) {
11260
11244
  ggml_vec_elu_f32(nc,
11261
11245
  (float *) ((char *) dst->data + i*( dst->nb[1])),
@@ -11290,6 +11274,8 @@ static void ggml_compute_forward_relu_f32(
11290
11274
  const struct ggml_tensor * src0 = dst->src[0];
11291
11275
 
11292
11276
  assert(params->ith == 0);
11277
+ assert(ggml_is_contiguous_1(src0));
11278
+ assert(ggml_is_contiguous_1(dst));
11293
11279
  assert(ggml_are_same_shape(src0, dst));
11294
11280
 
11295
11281
  if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
@@ -11299,9 +11285,6 @@ static void ggml_compute_forward_relu_f32(
11299
11285
  const int n = ggml_nrows(src0);
11300
11286
  const int nc = src0->ne[0];
11301
11287
 
11302
- assert(dst->nb[0] == sizeof(float));
11303
- assert(src0->nb[0] == sizeof(float));
11304
-
11305
11288
  for (int i = 0; i < n; i++) {
11306
11289
  ggml_vec_relu_f32(nc,
11307
11290
  (float *) ((char *) dst->data + i*( dst->nb[1])),
@@ -11336,6 +11319,8 @@ static void ggml_compute_forward_sigmoid_f32(
11336
11319
  const struct ggml_tensor * src0 = dst->src[0];
11337
11320
 
11338
11321
  assert(params->ith == 0);
11322
+ assert(ggml_is_contiguous_1(src0));
11323
+ assert(ggml_is_contiguous_1(dst));
11339
11324
  assert(ggml_are_same_shape(src0, dst));
11340
11325
 
11341
11326
  if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
@@ -11345,9 +11330,6 @@ static void ggml_compute_forward_sigmoid_f32(
11345
11330
  const int n = ggml_nrows(src0);
11346
11331
  const int nc = src0->ne[0];
11347
11332
 
11348
- assert(dst->nb[0] == sizeof(float));
11349
- assert(src0->nb[0] == sizeof(float));
11350
-
11351
11333
  for (int i = 0; i < n; i++) {
11352
11334
  ggml_vec_sigmoid_f32(nc,
11353
11335
  (float *) ((char *) dst->data + i*( dst->nb[1])),
@@ -11381,9 +11363,9 @@ static void ggml_compute_forward_gelu_f32(
11381
11363
 
11382
11364
  const struct ggml_tensor * src0 = dst->src[0];
11383
11365
 
11384
- GGML_ASSERT(ggml_is_contiguous_1(src0));
11385
- GGML_ASSERT(ggml_is_contiguous_1(dst));
11386
- GGML_ASSERT(ggml_are_same_shape(src0, dst));
11366
+ assert(ggml_is_contiguous_1(src0));
11367
+ assert(ggml_is_contiguous_1(dst));
11368
+ assert(ggml_are_same_shape(src0, dst));
11387
11369
 
11388
11370
  if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
11389
11371
  return;
@@ -11444,9 +11426,9 @@ static void ggml_compute_forward_gelu_quick_f32(
11444
11426
 
11445
11427
  const struct ggml_tensor * src0 = dst->src[0];
11446
11428
 
11447
- GGML_ASSERT(ggml_is_contiguous_1(src0));
11448
- GGML_ASSERT(ggml_is_contiguous_1(dst));
11449
- GGML_ASSERT(ggml_are_same_shape(src0, dst));
11429
+ assert(ggml_is_contiguous_1(src0));
11430
+ assert(ggml_is_contiguous_1(dst));
11431
+ assert(ggml_are_same_shape(src0, dst));
11450
11432
 
11451
11433
  if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
11452
11434
  return;
@@ -11507,9 +11489,9 @@ static void ggml_compute_forward_silu_f32(
11507
11489
 
11508
11490
  const struct ggml_tensor * src0 = dst->src[0];
11509
11491
 
11510
- GGML_ASSERT(ggml_is_contiguous_1(src0));
11511
- GGML_ASSERT(ggml_is_contiguous_1(dst));
11512
- GGML_ASSERT(ggml_are_same_shape(src0, dst));
11492
+ assert(ggml_is_contiguous_1(src0));
11493
+ assert(ggml_is_contiguous_1(dst));
11494
+ assert(ggml_are_same_shape(src0, dst));
11513
11495
 
11514
11496
  if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
11515
11497
  return;
@@ -11570,6 +11552,8 @@ static void ggml_compute_forward_leaky_relu_f32(
11570
11552
  const struct ggml_tensor * src0 = dst->src[0];
11571
11553
 
11572
11554
  assert(params->ith == 0);
11555
+ assert(ggml_is_contiguous_1(src0));
11556
+ assert(ggml_is_contiguous_1(dst));
11573
11557
  assert(ggml_are_same_shape(src0, dst));
11574
11558
 
11575
11559
  if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
@@ -11619,11 +11603,11 @@ static void ggml_compute_forward_silu_back_f32(
11619
11603
  const struct ggml_tensor * src0 = dst->src[0];
11620
11604
  const struct ggml_tensor * grad = dst->src[1];
11621
11605
 
11622
- GGML_ASSERT(ggml_is_contiguous_1(grad));
11623
- GGML_ASSERT(ggml_is_contiguous_1(src0));
11624
- GGML_ASSERT(ggml_is_contiguous_1(dst));
11625
- GGML_ASSERT(ggml_are_same_shape(src0, dst));
11626
- GGML_ASSERT(ggml_are_same_shape(src0, grad));
11606
+ assert(ggml_is_contiguous_1(grad));
11607
+ assert(ggml_is_contiguous_1(src0));
11608
+ assert(ggml_is_contiguous_1(dst));
11609
+ assert(ggml_are_same_shape(src0, dst));
11610
+ assert(ggml_are_same_shape(src0, grad));
11627
11611
 
11628
11612
  if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
11629
11613
  return;
@@ -11685,6 +11669,8 @@ static void ggml_compute_forward_hardswish_f32(
11685
11669
  const struct ggml_tensor * src0 = dst->src[0];
11686
11670
 
11687
11671
  assert(params->ith == 0);
11672
+ assert(ggml_is_contiguous_1(src0));
11673
+ assert(ggml_is_contiguous_1(dst));
11688
11674
  assert(ggml_are_same_shape(src0, dst));
11689
11675
 
11690
11676
  if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
@@ -11694,9 +11680,6 @@ static void ggml_compute_forward_hardswish_f32(
11694
11680
  const int n = ggml_nrows(src0);
11695
11681
  const int nc = src0->ne[0];
11696
11682
 
11697
- assert(dst->nb[0] == sizeof(float));
11698
- assert(src0->nb[0] == sizeof(float));
11699
-
11700
11683
  for (int i = 0; i < n; i++) {
11701
11684
  ggml_vec_hardswish_f32(nc,
11702
11685
  (float *) ((char *) dst->data + i*( dst->nb[1])),
@@ -11728,6 +11711,8 @@ static void ggml_compute_forward_hardsigmoid_f32(
11728
11711
  const struct ggml_tensor * src0 = dst->src[0];
11729
11712
 
11730
11713
  assert(params->ith == 0);
11714
+ assert(ggml_is_contiguous_1(src0));
11715
+ assert(ggml_is_contiguous_1(dst));
11731
11716
  assert(ggml_are_same_shape(src0, dst));
11732
11717
 
11733
11718
  if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
@@ -11737,9 +11722,6 @@ static void ggml_compute_forward_hardsigmoid_f32(
11737
11722
  const int n = ggml_nrows(src0);
11738
11723
  const int nc = src0->ne[0];
11739
11724
 
11740
- assert(dst->nb[0] == sizeof(float));
11741
- assert(src0->nb[0] == sizeof(float));
11742
-
11743
11725
  for (int i = 0; i < n; i++) {
11744
11726
  ggml_vec_hardsigmoid_f32(nc,
11745
11727
  (float *) ((char *) dst->data + i*( dst->nb[1])),
@@ -12190,39 +12172,6 @@ static void ggml_compute_forward_group_norm(
12190
12172
 
12191
12173
  // ggml_compute_forward_mul_mat
12192
12174
 
12193
- #if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS)
12194
- // helper function to determine if it is better to use BLAS or not
12195
- // for large matrices, BLAS is faster
12196
- static bool ggml_compute_forward_mul_mat_use_blas(struct ggml_tensor * dst) {
12197
- const struct ggml_tensor * src0 = dst->src[0];
12198
- const struct ggml_tensor * src1 = dst->src[1];
12199
-
12200
- //const int64_t ne00 = src0->ne[0];
12201
- //const int64_t ne01 = src0->ne[1];
12202
-
12203
- const int64_t ne10 = src1->ne[0];
12204
-
12205
- const int64_t ne0 = dst->ne[0];
12206
- const int64_t ne1 = dst->ne[1];
12207
-
12208
- // NOTE: with GGML_OP_MUL_MAT_ID we don't want to go through the BLAS branch because it will dequantize (to_float)
12209
- // all the experts for each batch element and the processing would become incredibly slow
12210
- // TODO: find the optimal values for these
12211
- if (dst->op != GGML_OP_MUL_MAT_ID &&
12212
- ggml_is_contiguous(src0) &&
12213
- ggml_is_contiguous(src1) &&
12214
- //src0->type == GGML_TYPE_F32 &&
12215
- src1->type == GGML_TYPE_F32 &&
12216
- (ne0 >= 32 && ne1 >= 32 && ne10 >= 32)) {
12217
-
12218
- /*printf("BLAS: %d %d %d %d %d\n", ne0, ne1, ne10, ne00, ne01);*/
12219
- return true;
12220
- }
12221
-
12222
- return false;
12223
- }
12224
- #endif
12225
-
12226
12175
  static void ggml_compute_forward_mul_mat_one_chunk(
12227
12176
  const struct ggml_compute_params * params,
12228
12177
  struct ggml_tensor * dst,
@@ -12360,73 +12309,6 @@ static void ggml_compute_forward_mul_mat(
12360
12309
  // nb01 >= nb00 - src0 is not transposed
12361
12310
  // compute by src0 rows
12362
12311
 
12363
- #if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS)
12364
- if (ggml_compute_forward_mul_mat_use_blas(dst)) {
12365
- const int64_t ne_plane = ne01*ne00;
12366
- const size_t desired_wsize = ne13*ne12*ne_plane*sizeof(float);
12367
- UNUSED(desired_wsize);
12368
-
12369
- if (params->type == GGML_TASK_TYPE_INIT) {
12370
- if (type != GGML_TYPE_F32) {
12371
- assert(params->wsize >= desired_wsize);
12372
- // parallelize by src0 rows
12373
- for (int64_t i13 = 0; i13 < ne13; i13++) {
12374
- for (int64_t i12 = 0; i12 < ne12; i12++) {
12375
- // broadcast src0 into src1 across 2nd,3rd dimension
12376
- const int64_t i03 = i13/r3;
12377
- const int64_t i02 = i12/r2;
12378
-
12379
- const void * x = (char *) src0->data + i02*nb02 + i03*nb03;
12380
- float * const wdata = (float *) params->wdata + i13*ne12*ne_plane + i12*ne_plane;
12381
- ggml_to_float_t const to_float = type_traits[type].to_float;
12382
-
12383
- for (int64_t i01 = ith; i01 < ne01; i01 += nth) {
12384
- to_float((const char *) x + i01*nb01, wdata + i01*ne00, ne00);
12385
- }
12386
- }
12387
- }
12388
- }
12389
- return;
12390
- }
12391
-
12392
- if (params->type == GGML_TASK_TYPE_FINALIZE) {
12393
- return;
12394
- }
12395
-
12396
- // perform sgemm, parallelization controlled by blas lib
12397
- if (ith != 0) {
12398
- return;
12399
- }
12400
-
12401
- //const int64_t tgemm0 = ggml_perf_time_us();
12402
- for (int64_t i13 = 0; i13 < ne13; i13++) {
12403
- for (int64_t i12 = 0; i12 < ne12; i12++) {
12404
- const int64_t i03 = i13/r3;
12405
- const int64_t i02 = i12/r2;
12406
-
12407
- const void * x = (char *) src0->data + i02*nb02 + i03*nb03;
12408
- const float * y = (float *) ((char *) src1->data + i12*nb12 + i13*nb13);
12409
- float * d = (float *) ((char *) dst->data + i12*nb2 + i13*nb3);
12410
-
12411
- if (type != GGML_TYPE_F32) {
12412
- x = (float *) params->wdata + i13*ne12*ne_plane + i12*ne_plane;
12413
- }
12414
-
12415
- cblas_sgemm(CblasRowMajor, CblasNoTrans, CblasTrans,
12416
- ne1, ne01, ne10,
12417
- 1.0f, y, ne10,
12418
- x, ne00,
12419
- 0.0f, d, ne01);
12420
- }
12421
- }
12422
- //printf("cblas_sgemm = %.3f ms, %lld flops\n", (ggml_perf_time_us() - tgemm0)/1000.0, ne13*ne12*ne1*ne01*ne10*2);
12423
-
12424
- //printf("CBLAS = %f ms, %d x %d x %d x %d\n", (ggml_perf_time_us() - t0)/1000.0, ne0, ne1, ne2, ne3);
12425
-
12426
- return;
12427
- }
12428
- #endif
12429
-
12430
12312
  #if GGML_USE_LLAMAFILE
12431
12313
  const bool src1_cont = ggml_is_contiguous(src1);
12432
12314
 
@@ -12807,19 +12689,7 @@ static void ggml_compute_forward_out_prod_f32(
12807
12689
  // nb01 >= nb00 - src0 is not transposed
12808
12690
  // compute by src0 rows
12809
12691
 
12810
- #if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS)
12811
- bool use_blas = ggml_is_matrix(src0) &&
12812
- ggml_is_matrix(src1) &&
12813
- ggml_is_contiguous(src0) &&
12814
- (ggml_is_contiguous(src1) || ggml_is_transposed(src1));
12815
- #endif
12816
-
12817
12692
  if (params->type == GGML_TASK_TYPE_INIT) {
12818
- #if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS) // gemm beta will zero dst
12819
- if (use_blas) {
12820
- return;
12821
- }
12822
- #endif
12823
12693
  if (ith != 0) {
12824
12694
  return;
12825
12695
  }
@@ -12831,50 +12701,6 @@ static void ggml_compute_forward_out_prod_f32(
12831
12701
  return;
12832
12702
  }
12833
12703
 
12834
- #if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS)
12835
- if (use_blas) {
12836
- if (params->ith != 0) { // All threads other than the first do no work.
12837
- return;
12838
- }
12839
- // Arguments to ggml_compute_forward_out_prod (expressed as major,minor)
12840
- // src0: (k,n)
12841
- // src1: (k,m)
12842
- // dst: (m,n)
12843
- //
12844
- // Arguments to sgemm (see https://github.com/Reference-LAPACK/lapack/blob/master/BLAS/SRC/sgemm.f)
12845
- // Also expressed as (major,minor)
12846
- // a: (m,k): so src1 transposed
12847
- // b: (k,n): so src0
12848
- // c: (m,n)
12849
- //
12850
- // However, if ggml_is_transposed(src1) is true, then
12851
- // src1->data already contains a transposed version, so sgemm mustn't
12852
- // transpose it further.
12853
-
12854
- int n = src0->ne[0];
12855
- int k = src0->ne[1];
12856
- int m = src1->ne[0];
12857
-
12858
- int transposeA, lda;
12859
-
12860
- if (!ggml_is_transposed(src1)) {
12861
- transposeA = CblasTrans;
12862
- lda = m;
12863
- } else {
12864
- transposeA = CblasNoTrans;
12865
- lda = k;
12866
- }
12867
-
12868
- float * a = (float *) ((char *) src1->data);
12869
- float * b = (float *) ((char *) src0->data);
12870
- float * c = (float *) ((char *) dst->data);
12871
-
12872
- cblas_sgemm(CblasRowMajor, transposeA, CblasNoTrans, m, n, k, 1.0, a, lda, b, n, 0.0, c, n);
12873
-
12874
- return;
12875
- }
12876
- #endif
12877
-
12878
12704
  // dst[:,:,:,:] = 0
12879
12705
  // for i2,i3:
12880
12706
  // for i1:
@@ -13004,8 +12830,6 @@ static void ggml_compute_forward_out_prod_q_f32(
13004
12830
  // nb01 >= nb00 - src0 is not transposed
13005
12831
  // compute by src0 rows
13006
12832
 
13007
- // TODO: #if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS)
13008
-
13009
12833
  if (params->type == GGML_TASK_TYPE_INIT) {
13010
12834
  if (ith != 0) {
13011
12835
  return;
@@ -13402,6 +13226,8 @@ static void ggml_compute_forward_get_rows_q(
13402
13226
  const int64_t i10 = (i - i12*ne11*ne10 - i11*ne10);
13403
13227
  const int64_t i01 = *(int32_t *) ((char *) src1->data + i10*nb10 + i11*nb11 + i12*nb12);
13404
13228
 
13229
+ assert(i01 >= 0 && i01 < ne01);
13230
+
13405
13231
  dequantize_row_q(
13406
13232
  (const void *) ((char *) src0->data + i01*nb01 + i11*nb02 + i12*nb03),
13407
13233
  (float *) ((char *) dst->data + i10*nb1 + i11*nb2 + i12*nb3), nc);
@@ -13445,6 +13271,8 @@ static void ggml_compute_forward_get_rows_f16(
13445
13271
  const int64_t i10 = (i - i12*ne11*ne10 - i11*ne10);
13446
13272
  const int64_t i01 = *(int32_t *) ((char *) src1->data + i10*nb10 + i11*nb11 + i12*nb12);
13447
13273
 
13274
+ assert(i01 >= 0 && i01 < ne01);
13275
+
13448
13276
  ggml_fp16_to_fp32_row(
13449
13277
  (const void *) ((char *) src0->data + i01*nb01 + i11*nb02 + i12*nb03),
13450
13278
  (float *) ((char *) dst->data + i10*nb1 + i11*nb2 + i12*nb3), nc);
@@ -13488,7 +13316,9 @@ static void ggml_compute_forward_get_rows_bf16(
13488
13316
  const int64_t i10 = (i - i12*ne11*ne10 - i11*ne10);
13489
13317
  const int64_t i01 = *(int32_t *) ((char *) src1->data + i10*nb10 + i11*nb11 + i12*nb12);
13490
13318
 
13491
- ggml_bf16_to_fp32_row(
13319
+ assert(i01 >= 0 && i01 < ne01);
13320
+
13321
+ ggml_bf16_to_fp32_row(
13492
13322
  (const void *) ((char *) src0->data + i01*nb01 + i11*nb02 + i12*nb03),
13493
13323
  (float *) ((char *) dst->data + i10*nb1 + i11*nb2 + i12*nb3), nc);
13494
13324
  }
@@ -13531,6 +13361,8 @@ static void ggml_compute_forward_get_rows_f32(
13531
13361
  const int64_t i10 = (i - i12*ne11*ne10 - i11*ne10);
13532
13362
  const int64_t i01 = *(int32_t *) ((char *) src1->data + i10*nb10 + i11*nb11 + i12*nb12);
13533
13363
 
13364
+ assert(i01 >= 0 && i01 < ne01);
13365
+
13534
13366
  ggml_vec_cpy_f32(nc,
13535
13367
  (float *) ((char *) dst->data + i10*nb1 + i11*nb2 + i12*nb3),
13536
13368
  (float *) ((char *) src0->data + i01*nb01 + i11*nb02 + i12*nb03));
@@ -16686,7 +16518,10 @@ static void ggml_compute_forward_map_unary_f32(
16686
16518
 
16687
16519
  const struct ggml_tensor * src0 = dst->src[0];
16688
16520
 
16689
- GGML_ASSERT(ggml_are_same_shape(src0, dst));
16521
+ assert(params->ith == 0);
16522
+ assert(ggml_is_contiguous_1(src0));
16523
+ assert(ggml_is_contiguous_1(dst));
16524
+ assert(ggml_are_same_shape(src0, dst));
16690
16525
 
16691
16526
  if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
16692
16527
  return;
@@ -16695,9 +16530,6 @@ static void ggml_compute_forward_map_unary_f32(
16695
16530
  const int n = ggml_nrows(src0);
16696
16531
  const int nc = src0->ne[0];
16697
16532
 
16698
- assert( dst->nb[0] == sizeof(float));
16699
- assert(src0->nb[0] == sizeof(float));
16700
-
16701
16533
  for (int i = 0; i < n; i++) {
16702
16534
  fun(nc,
16703
16535
  (float *) ((char *) dst->data + i*( dst->nb[1])),
@@ -16735,6 +16567,9 @@ static void ggml_compute_forward_map_binary_f32(
16735
16567
  const struct ggml_tensor * src1 = dst->src[1];
16736
16568
 
16737
16569
  assert(params->ith == 0);
16570
+ assert(ggml_is_contiguous_1(src0));
16571
+ assert(ggml_is_contiguous_1(src1));
16572
+ assert(ggml_is_contiguous_1(dst));
16738
16573
  assert(ggml_are_same_shape(src0, src1) && ggml_are_same_shape(src0, dst));
16739
16574
 
16740
16575
  if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
@@ -16744,10 +16579,6 @@ static void ggml_compute_forward_map_binary_f32(
16744
16579
  const int n = ggml_nrows(src0);
16745
16580
  const int nc = src0->ne[0];
16746
16581
 
16747
- assert( dst->nb[0] == sizeof(float));
16748
- assert(src0->nb[0] == sizeof(float));
16749
- assert(src1->nb[0] == sizeof(float));
16750
-
16751
16582
  for (int i = 0; i < n; i++) {
16752
16583
  fun(nc,
16753
16584
  (float *) ((char *) dst->data + i*( dst->nb[1])),
@@ -18905,6 +18736,7 @@ static int ggml_get_n_tasks(struct ggml_tensor * node, int n_threads, int n_cur_
18905
18736
  switch (node->op) {
18906
18737
  case GGML_OP_CPY:
18907
18738
  case GGML_OP_DUP:
18739
+ case GGML_OP_CONT:
18908
18740
  case GGML_OP_ADD:
18909
18741
  case GGML_OP_ADD1:
18910
18742
  case GGML_OP_ACC:
@@ -18989,7 +18821,6 @@ static int ggml_get_n_tasks(struct ggml_tensor * node, int n_threads, int n_cur_
18989
18821
  } break;
18990
18822
  case GGML_OP_SCALE:
18991
18823
  case GGML_OP_SET:
18992
- case GGML_OP_CONT:
18993
18824
  case GGML_OP_RESHAPE:
18994
18825
  case GGML_OP_VIEW:
18995
18826
  case GGML_OP_PERMUTE:
@@ -19140,41 +18971,49 @@ static int ggml_get_n_tasks(struct ggml_tensor * node, int n_threads, int n_cur_
19140
18971
  return n_tasks;
19141
18972
  }
19142
18973
 
19143
- static void ggml_graph_compute_thread_sync_node(int * node_n, struct ggml_compute_state * state, const bool do_yield) {
19144
- // wait for other threads to finish
19145
- const int last_node_n = * node_n;
19146
-
19147
- while (true) {
19148
- if (do_yield) {
19149
- sched_yield();
19150
- }
19151
-
19152
- * node_n = atomic_load(&state->shared->node_n);
19153
- if (* node_n != last_node_n) break;
19154
- #if defined(__SSE3__)
19155
- // Tell the processor we're spinning. It's a processor hint for spinlocks.
19156
- _mm_pause();
19157
- #endif
18974
+ #ifdef GGML_USE_OPENMP
18975
+ static void ggml_barrier(struct ggml_compute_state * state) {
18976
+ if (state->shared->n_threads == 1) {
18977
+ return;
19158
18978
  }
18979
+
18980
+ #pragma omp barrier
19159
18981
  }
18982
+ #else
18983
+ static void ggml_barrier(struct ggml_compute_state * state) {
18984
+ if (state->shared->n_threads == 1) {
18985
+ return;
18986
+ }
19160
18987
 
19161
- static void ggml_graph_compute_thread_sync_task(int * task_phase, struct ggml_compute_state * state, const bool do_yield) {
19162
- // wait for other threads to finish
19163
- const int last_task_phase = * task_phase;
18988
+ atomic_int * n_barrier = &state->shared->n_barrier;
18989
+ atomic_int * n_barrier_passed = &state->shared->n_barrier_passed;
19164
18990
 
19165
- while (true) {
19166
- if (do_yield) {
18991
+ int n_threads = state->shared->n_threads;
18992
+ int passed_old = atomic_load(n_barrier_passed);
18993
+
18994
+ if (atomic_fetch_add(n_barrier, 1) == n_threads - 1) {
18995
+ // last thread
18996
+ atomic_store(n_barrier, 0);
18997
+ atomic_fetch_add(n_barrier_passed, 1);
18998
+ } else {
18999
+ // wait for other threads
19000
+ //while (atomic_load(n_barrier_passed) == passed_old) {
19001
+ //}
19002
+ const int n_spin_before_sleep = 100000;
19003
+ while (true) {
19004
+ for (int i = 0; i < n_spin_before_sleep; i++) {
19005
+ if (atomic_load(n_barrier_passed) != passed_old) {
19006
+ return;
19007
+ }
19008
+ #if defined(__SSE3__)
19009
+ _mm_pause();
19010
+ #endif
19011
+ }
19167
19012
  sched_yield();
19168
19013
  }
19169
-
19170
- * task_phase = atomic_load(&state->shared->node_task);
19171
- if (* task_phase != last_task_phase) break;
19172
- #if defined(__SSE3__)
19173
- // Tell the processor we're spinning. It's a processor hint for spinlocks.
19174
- _mm_pause();
19175
- #endif
19176
19014
  }
19177
19015
  }
19016
+ #endif
19178
19017
 
19179
19018
  static thread_ret_t ggml_graph_compute_thread(void * data) {
19180
19019
  struct ggml_compute_state * state = (struct ggml_compute_state *) data;
@@ -19182,136 +19021,54 @@ static thread_ret_t ggml_graph_compute_thread(void * data) {
19182
19021
  const struct ggml_cgraph * cgraph = state->shared->cgraph;
19183
19022
  const struct ggml_cplan * cplan = state->shared->cplan;
19184
19023
 
19185
- const int n_threads = state->shared->n_threads;
19024
+ const int ith = state->ith;
19025
+ const int n_threads = state->shared->n_threads;
19186
19026
 
19187
- set_numa_thread_affinity(state->ith);
19027
+ set_numa_thread_affinity(ith);
19188
19028
 
19189
- int node_n = -1;
19190
- int task_phase = GGML_TASK_TYPE_FINALIZE;
19029
+ struct ggml_compute_params params = {
19030
+ /*.type =*/ GGML_TASK_TYPE_INIT,
19031
+ /*.ith =*/ ith,
19032
+ /*.nth =*/ state->shared->n_threads,
19033
+ /*.wsize =*/ cplan->work_size,
19034
+ /*.wdata =*/ cplan->work_data,
19035
+ };
19191
19036
 
19192
- while (true) {
19037
+ for (int node_n = 0; node_n < cgraph->n_nodes; node_n++) {
19193
19038
  if (cplan->abort_callback && cplan->abort_callback(cplan->abort_callback_data)) {
19194
- state->shared->node_n += 1;
19195
19039
  state->ec = GGML_STATUS_ABORTED;
19196
19040
  return 0;
19197
19041
  }
19198
19042
 
19199
- if (atomic_fetch_sub(&state->shared->n_active, 1) == 1) {
19200
- // all other threads are finished and spinning
19201
- // do finalize and init here so we don't have synchronize again
19202
- struct ggml_compute_params params = {
19203
- /*.type =*/ GGML_TASK_TYPE_FINALIZE,
19204
- /*.ith =*/ 0,
19205
- /*.nth =*/ 0,
19206
- /*.wsize =*/ cplan->work_size,
19207
- /*.wdata =*/ cplan->work_data,
19208
- };
19209
-
19210
- if (node_n != -1) {
19211
- /* FINALIZE */
19212
- struct ggml_tensor * node = cgraph->nodes[node_n];
19213
- if (GGML_OP_HAS_FINALIZE[node->op]) {
19214
- params.nth = ggml_get_n_tasks(node, n_threads, state->shared->n_threads);
19215
- ggml_compute_forward(&params, node, state);
19216
- }
19217
- ggml_graph_compute_perf_stats_node(node, state->shared);
19218
- }
19219
-
19220
- // distribute new work or execute it direct if 1T
19221
- while (++node_n < cgraph->n_nodes) {
19222
- GGML_PRINT_DEBUG_5("%s: %d/%d\n", __func__, node_n, cgraph->n_nodes);
19223
- struct ggml_tensor * node = cgraph->nodes[node_n];
19224
- const int n_tasks = ggml_get_n_tasks(node, n_threads, state->shared->n_threads);
19225
-
19226
- state->shared->perf_node_start_cycles = ggml_perf_cycles();
19227
- state->shared->perf_node_start_time_us = ggml_perf_time_us();
19228
-
19229
- params.nth = n_tasks;
19230
-
19231
- if (n_tasks == 1) {
19232
- /* INIT */
19233
- if (GGML_OP_HAS_INIT[node->op]) {
19234
- params.type = GGML_TASK_TYPE_INIT;
19235
- ggml_compute_forward(&params, node, state);
19236
- }
19237
-
19238
- // TODO: maybe push node_n to the atomic but if other threads see n_tasks is 1,
19239
- // they do something more efficient than spinning (?)
19240
- params.type = GGML_TASK_TYPE_COMPUTE;
19241
- ggml_compute_forward(&params, node, state);
19242
-
19243
- if (GGML_OP_HAS_FINALIZE[node->op]) {
19244
- params.type = GGML_TASK_TYPE_FINALIZE;
19245
- ggml_compute_forward(&params, node, state);
19246
- }
19247
-
19248
- ggml_graph_compute_perf_stats_node(node, state->shared);
19249
- } else {
19250
- break;
19251
- }
19252
-
19253
- if (cplan->abort_callback && cplan->abort_callback(cplan->abort_callback_data)) {
19254
- break;
19255
- }
19256
- }
19257
-
19258
- task_phase = GGML_TASK_TYPE_INIT;
19259
- atomic_store(&state->shared->n_active, n_threads);
19260
- atomic_store(&state->shared->node_n, node_n);
19261
- atomic_store(&state->shared->node_task, task_phase);
19262
- } else {
19263
- ggml_graph_compute_thread_sync_node(&node_n, state, false);
19264
- ggml_graph_compute_thread_sync_task(&task_phase, state, false);
19265
- }
19266
-
19267
- // check if we should stop
19268
- if (node_n >= cgraph->n_nodes) break;
19269
-
19270
- /* INIT & COMPUTE */
19271
19043
  struct ggml_tensor * node = cgraph->nodes[node_n];
19272
19044
  const int n_tasks = ggml_get_n_tasks(node, n_threads, state->shared->n_threads);
19273
19045
 
19274
- struct ggml_compute_params params = {
19275
- /*.type =*/ GGML_TASK_TYPE_INIT,
19276
- /*.ith =*/ state->ith,
19277
- /*.nth =*/ n_tasks,
19278
- /*.wsize =*/ cplan->work_size,
19279
- /*.wdata =*/ cplan->work_data,
19280
- };
19046
+ params.nth = n_tasks;
19281
19047
 
19282
- if (state->ith < n_tasks) {
19283
- if (GGML_OP_HAS_INIT[node->op]) {
19048
+ /* INIT */
19049
+ if (GGML_OP_HAS_INIT[node->op]) {
19050
+ if (ith < n_tasks) {
19051
+ params.type = GGML_TASK_TYPE_INIT;
19284
19052
  ggml_compute_forward(&params, node, state);
19285
19053
  }
19054
+ ggml_barrier(state);
19286
19055
  }
19287
19056
 
19288
- if (atomic_fetch_sub(&state->shared->n_active, 1) == 1) {
19289
- task_phase = GGML_TASK_TYPE_COMPUTE;
19290
- atomic_store(&state->shared->n_active, n_threads);
19291
- atomic_store(&state->shared->node_task, task_phase);
19292
- }
19293
- else {
19294
- // TODO: this sched_yield can have significant impact on the performance - either positive or negative
19295
- // depending on the workload and the operating system.
19296
- // since it is not clear what is the best approach, it should potentially become user-configurable
19297
- // ref: https://github.com/ggerganov/ggml/issues/291
19298
- // UPD: adding the do_yield flag seems to resolve the issue universally
19299
- const bool do_yield = node_n < 0 || cgraph->nodes[node_n]->op == GGML_OP_MUL_MAT;
19300
- ggml_graph_compute_thread_sync_task(&task_phase, state, do_yield);
19301
- }
19302
-
19303
- if (state->ith < n_tasks) {
19057
+ /* COMPUTE */
19058
+ if (ith < n_tasks) {
19304
19059
  params.type = GGML_TASK_TYPE_COMPUTE;
19305
19060
  ggml_compute_forward(&params, node, state);
19306
19061
  }
19307
19062
 
19308
- if (atomic_fetch_sub(&state->shared->n_active, 1) == 1) {
19309
- task_phase = GGML_TASK_TYPE_FINALIZE;
19310
- atomic_store(&state->shared->n_active, n_threads);
19311
- atomic_store(&state->shared->node_task, task_phase);
19312
- }
19313
- else {
19314
- ggml_graph_compute_thread_sync_task(&task_phase, state, false);
19063
+ ggml_barrier(state);
19064
+
19065
+ /* FINALIZE */
19066
+ if (GGML_OP_HAS_FINALIZE[node->op]) {
19067
+ if (params.ith == 0) {
19068
+ params.type = GGML_TASK_TYPE_FINALIZE;
19069
+ ggml_compute_forward(&params, node, state);
19070
+ }
19071
+ ggml_barrier(state);
19315
19072
  }
19316
19073
  }
19317
19074
 
@@ -19368,17 +19125,6 @@ struct ggml_cplan ggml_graph_plan(const struct ggml_cgraph * cgraph, int n_threa
19368
19125
  {
19369
19126
  const enum ggml_type vec_dot_type = type_traits[node->src[0]->type].vec_dot_type;
19370
19127
 
19371
- #if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS)
19372
- if (ggml_compute_forward_mul_mat_use_blas(node)) {
19373
- if (node->src[0]->type != GGML_TYPE_F32) {
19374
- // here we need memory for fully dequantized matrix from src0
19375
- // take into account that src0 can be broadcasted into src1[2,3]
19376
- cur = ggml_type_size(GGML_TYPE_F32)
19377
- * node->src[0]->ne[0]*node->src[0]->ne[1]
19378
- * node->src[1]->ne[2]*node->src[1]->ne[3];
19379
- }
19380
- } else
19381
- #endif
19382
19128
  if (node->src[1]->type != vec_dot_type) {
19383
19129
  cur = ggml_row_size(vec_dot_type, ggml_nelements(node->src[1]));
19384
19130
  }
@@ -19509,7 +19255,6 @@ static enum ggml_status ggml_graph_compute_parallel(struct ggml_compute_state *
19509
19255
  // update the number of threads from the actual number of threads that we got from OpenMP
19510
19256
  n_threads = omp_get_num_threads();
19511
19257
  workers[0].shared->n_threads = n_threads;
19512
- workers[0].shared->n_active = n_threads;
19513
19258
  }
19514
19259
  ggml_graph_compute_thread(&workers[omp_get_thread_num()]);
19515
19260
  }
@@ -19572,9 +19317,8 @@ enum ggml_status ggml_graph_compute(struct ggml_cgraph * cgraph, struct ggml_cpl
19572
19317
  /*.perf_node_start_cycles =*/ 0,
19573
19318
  /*.perf_node_start_time_us =*/ 0,
19574
19319
  /*.n_threads =*/ n_threads,
19575
- /*.n_active =*/ n_threads,
19576
- /*.node_n =*/ -1,
19577
- /*.node_task =*/ GGML_TASK_TYPE_FINALIZE,
19320
+ /*.n_barrier =*/ 0,
19321
+ /*.n_barrier_passed =*/ 0,
19578
19322
  /*.abort_callback =*/ NULL,
19579
19323
  /*.abort_callback_data =*/ NULL,
19580
19324
  /*.current_chunk; =*/ 0,
@@ -22676,7 +22420,7 @@ int ggml_cpu_has_wasm_simd(void) {
22676
22420
  }
22677
22421
 
22678
22422
  int ggml_cpu_has_blas(void) {
22679
- #if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS) || defined(GGML_USE_CUDA) || defined(GGML_USE_VULKAN) || defined(GGML_USE_SYCL)
22423
+ #if defined(GGML_USE_BLAS) || defined(GGML_USE_CUDA) || defined(GGML_USE_VULKAN) || defined(GGML_USE_SYCL)
22680
22424
  return 1;
22681
22425
  #else
22682
22426
  return 0;