llama_cpp 0.12.4 → 0.12.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -7693,6 +7693,13 @@ static void cpy_1_f16_f16(const char * cxi, char * cdsti) {
7693
7693
  *dsti = *xi;
7694
7694
  }
7695
7695
 
7696
+ static void cpy_1_f16_f32(const char * cxi, char * cdsti) {
7697
+ const sycl::half *xi = (const sycl::half *)cxi;
7698
+ float *dsti = (float *)cdsti;
7699
+
7700
+ *dsti = *xi;
7701
+ }
7702
+
7696
7703
  static void cpy_1_i16_i16(const char * cxi, char * cdsti) {
7697
7704
  const int16_t *xi = (const int16_t *)cxi;
7698
7705
  int16_t *dsti = (int16_t *)cdsti;
@@ -7709,9 +7716,9 @@ static void cpy_1_i32_i32(const char * cxi, char * cdsti) {
7709
7716
 
7710
7717
  template <cpy_kernel_t cpy_1>
7711
7718
  static void cpy_f32_f16(const char * cx, char * cdst, const int ne,
7712
- const int ne00, const int ne01, const int nb00, const int nb01, const int nb02,
7713
- const int ne10, const int ne11, const int nb10, const int nb11, const int nb12,
7714
- const sycl::nd_item<3> &item_ct1) {
7719
+ const int ne00, const int ne01, const int ne02, const int nb00, const int nb01, const int nb02,
7720
+ const int nb03, const int ne10, const int ne11, const int ne12, const int nb10, const int nb11,
7721
+ const int nb12, const int nb13, const sycl::nd_item<3> &item_ct1) {
7715
7722
  const int i = item_ct1.get_local_range(2) * item_ct1.get_group(2) +
7716
7723
  item_ct1.get_local_id(2);
7717
7724
 
@@ -7721,15 +7728,17 @@ static void cpy_f32_f16(const char * cx, char * cdst, const int ne,
7721
7728
 
7722
7729
  // determine indices i02/i12, i01/i11, i00/i10 as a function of index i of flattened tensor
7723
7730
  // then combine those indices with the corresponding byte offsets to get the total offsets
7724
- const int i02 = i / (ne00*ne01);
7725
- const int i01 = (i - i02*ne01*ne00) / ne00;
7726
- const int i00 = i - i02*ne01*ne00 - i01*ne00;
7727
- const int x_offset = i00*nb00 + i01*nb01 + i02*nb02;
7728
-
7729
- const int i12 = i / (ne10*ne11);
7730
- const int i11 = (i - i12*ne10*ne11) / ne10;
7731
- const int i10 = i - i12*ne10*ne11 - i11*ne10;
7732
- const int dst_offset = i10*nb10 + i11*nb11 + i12*nb12;
7731
+ const int i03 = i/(ne00 * ne01 * ne02);
7732
+ const int i02 = (i - i03*ne00*ne01*ne02 )/ (ne00*ne01);
7733
+ const int i01 = (i - i03*ne00*ne01*ne02 - i02*ne01*ne00) / ne00;
7734
+ const int i00 = i - i03*ne00*ne01*ne02 - i02*ne01*ne00 - i01*ne00;
7735
+ const int x_offset = i00*nb00 + i01*nb01 + i02*nb02 + i03 * nb03;
7736
+
7737
+ const int i13 = i/(ne10 * ne11 * ne12);
7738
+ const int i12 = (i - i13*ne10*ne11*ne12) / (ne10*ne11);
7739
+ const int i11 = (i - i13*ne10*ne11*ne12 - i12*ne10*ne11) / ne10;
7740
+ const int i10 = i - i13*ne10*ne11*ne12 - i12*ne10*ne11 - i11*ne10;
7741
+ const int dst_offset = i10*nb10 + i11*nb11 + i12*nb12 + i13 * nb13;
7733
7742
 
7734
7743
  cpy_1(cx + x_offset, cdst + dst_offset);
7735
7744
  }
@@ -7823,9 +7832,9 @@ static void cpy_blck_f32_q4_1(const char * cxi, char * cdsti) {
7823
7832
 
7824
7833
  template <cpy_kernel_t cpy_blck, int qk>
7825
7834
  static void cpy_f32_q(const char * cx, char * cdst, const int ne,
7826
- const int ne00, const int ne01, const int nb00, const int nb01, const int nb02,
7827
- const int ne10, const int ne11, const int nb10, const int nb11, const int nb12,
7828
- const sycl::nd_item<3> &item_ct1) {
7835
+ const int ne00, const int ne01, const int ne02, const int nb00, const int nb01, const int nb02,
7836
+ const int nb03, const int ne10, const int ne11, const int ne12, const int nb10, const int nb11,
7837
+ const int nb12, const int nb13, const sycl::nd_item<3> &item_ct1) {
7829
7838
  const int i = (item_ct1.get_local_range(2) * item_ct1.get_group(2) +
7830
7839
  item_ct1.get_local_id(2)) *
7831
7840
  qk;
@@ -7834,15 +7843,17 @@ static void cpy_f32_q(const char * cx, char * cdst, const int ne,
7834
7843
  return;
7835
7844
  }
7836
7845
 
7837
- const int i02 = i / (ne00*ne01);
7838
- const int i01 = (i - i02*ne01*ne00) / ne00;
7839
- const int i00 = (i - i02*ne01*ne00 - i01*ne00);
7840
- const int x_offset = i00*nb00 + i01*nb01 + i02*nb02;
7846
+ const int i03 = i/(ne00 * ne01 * ne02);
7847
+ const int i02 = (i - i03*ne00*ne01*ne02 )/ (ne00*ne01);
7848
+ const int i01 = (i - i03*ne00*ne01*ne02 - i02*ne01*ne00) / ne00;
7849
+ const int i00 = i - i03*ne00*ne01*ne02 - i02*ne01*ne00 - i01*ne00;
7850
+ const int x_offset = i00*nb00 + i01*nb01 + i02*nb02 + i03 * nb03;
7841
7851
 
7842
- const int i12 = i / (ne10*ne11);
7843
- const int i11 = (i - i12*ne10*ne11) / ne10;
7844
- const int i10 = (i - i12*ne10*ne11 - i11*ne10)/qk;
7845
- const int dst_offset = i10*nb10 + i11*nb11 + i12*nb12;
7852
+ const int i13 = i/(ne10 * ne11 * ne12);
7853
+ const int i12 = (i - i13*ne10*ne11*ne12) / (ne10*ne11);
7854
+ const int i11 = (i - i13*ne10*ne11*ne12 - i12*ne10*ne11) / ne10;
7855
+ const int i10 = i - i13*ne10*ne11*ne12 - i12*ne10*ne11 - i11*ne10;
7856
+ const int dst_offset = (i10/qk)*nb10 + i11*nb11 + i12*nb12 + i13*nb13;
7846
7857
 
7847
7858
  cpy_blck(cx + x_offset, cdst + dst_offset);
7848
7859
  }
@@ -8247,7 +8258,8 @@ static void clamp_f32(const float * x, float * dst, const float min, const float
8247
8258
  dst[i] = x[i] < min ? min : (x[i] > max ? max : x[i]);
8248
8259
  }
8249
8260
 
8250
- static void im2col_f32_f16(const float *x, sycl::half *dst, int offset_delta,
8261
+ template <typename T>
8262
+ static void im2col_kernel(const float *x, T *dst, int offset_delta,
8251
8263
  int IW, int IH, int OW, int KW, int KH,
8252
8264
  int pelements, int CHW, int s0, int s1, int p0,
8253
8265
  int p1, int d0, int d1,
@@ -10598,10 +10610,12 @@ static void ggml_mul_mat_vec_nc_f16_f32_sycl(
10598
10610
 
10599
10611
  static void ggml_cpy_f32_f32_sycl(const char *cx, char *cdst, const int ne,
10600
10612
  const int ne00, const int ne01,
10601
- const int nb00, const int nb01,
10602
- const int nb02, const int ne10,
10603
- const int ne11, const int nb10,
10604
- const int nb11, const int nb12,
10613
+ const int ne02, const int nb00,
10614
+ const int nb01, const int nb02,
10615
+ const int nb03, const int ne10,
10616
+ const int ne11, const int ne12,
10617
+ const int nb10, const int nb11,
10618
+ const int nb12, const int nb13,
10605
10619
  dpct::queue_ptr stream) {
10606
10620
 
10607
10621
  const int num_blocks = (ne + SYCL_CPY_BLOCK_SIZE - 1) / SYCL_CPY_BLOCK_SIZE;
@@ -10614,8 +10628,8 @@ static void ggml_cpy_f32_f32_sycl(const char *cx, char *cdst, const int ne,
10614
10628
  sycl::range<3>(1, 1, SYCL_CPY_BLOCK_SIZE),
10615
10629
  sycl::range<3>(1, 1, SYCL_CPY_BLOCK_SIZE)),
10616
10630
  [=](sycl::nd_item<3> item_ct1) {
10617
- cpy_f32_f16<cpy_1_f32_f32>(cx, cdst, ne, ne00, ne01, nb00, nb01,
10618
- nb02, ne10, ne11, nb10, nb11, nb12,
10631
+ cpy_f32_f16<cpy_1_f32_f32>(cx, cdst, ne, ne00, ne01, ne02, nb00, nb01, nb02,
10632
+ nb03, ne10, ne11, ne12, nb10, nb11, nb12, nb13,
10619
10633
  item_ct1);
10620
10634
  });
10621
10635
  }
@@ -10623,10 +10637,12 @@ static void ggml_cpy_f32_f32_sycl(const char *cx, char *cdst, const int ne,
10623
10637
 
10624
10638
  static void ggml_cpy_f32_f16_sycl(const char *cx, char *cdst, const int ne,
10625
10639
  const int ne00, const int ne01,
10626
- const int nb00, const int nb01,
10627
- const int nb02, const int ne10,
10628
- const int ne11, const int nb10,
10629
- const int nb11, const int nb12,
10640
+ const int ne02, const int nb00,
10641
+ const int nb01, const int nb02,
10642
+ const int nb03, const int ne10,
10643
+ const int ne11, const int ne12,
10644
+ const int nb10, const int nb11,
10645
+ const int nb12, const int nb13,
10630
10646
  dpct::queue_ptr stream) {
10631
10647
 
10632
10648
  const int num_blocks = (ne + SYCL_CPY_BLOCK_SIZE - 1) / SYCL_CPY_BLOCK_SIZE;
@@ -10639,8 +10655,8 @@ static void ggml_cpy_f32_f16_sycl(const char *cx, char *cdst, const int ne,
10639
10655
  sycl::range<3>(1, 1, SYCL_CPY_BLOCK_SIZE),
10640
10656
  sycl::range<3>(1, 1, SYCL_CPY_BLOCK_SIZE)),
10641
10657
  [=](sycl::nd_item<3> item_ct1) {
10642
- cpy_f32_f16<cpy_1_f32_f16>(cx, cdst, ne, ne00, ne01, nb00, nb01,
10643
- nb02, ne10, ne11, nb10, nb11, nb12,
10658
+ cpy_f32_f16<cpy_1_f32_f16>(cx, cdst, ne, ne00, ne01, ne02, nb00, nb01, nb02,
10659
+ nb03, ne10, ne11, ne12, nb10, nb11, nb12, nb13,
10644
10660
  item_ct1);
10645
10661
  });
10646
10662
  }
@@ -10648,10 +10664,12 @@ static void ggml_cpy_f32_f16_sycl(const char *cx, char *cdst, const int ne,
10648
10664
 
10649
10665
  static void ggml_cpy_f32_q8_0_sycl(const char *cx, char *cdst, const int ne,
10650
10666
  const int ne00, const int ne01,
10651
- const int nb00, const int nb01,
10652
- const int nb02, const int ne10,
10653
- const int ne11, const int nb10,
10654
- const int nb11, const int nb12,
10667
+ const int ne02, const int nb00,
10668
+ const int nb01, const int nb02,
10669
+ const int nb03, const int ne10,
10670
+ const int ne11, const int ne12,
10671
+ const int nb10, const int nb11,
10672
+ const int nb12, const int nb13,
10655
10673
  dpct::queue_ptr stream) {
10656
10674
 
10657
10675
  GGML_ASSERT(ne % QK8_0 == 0);
@@ -10660,17 +10678,20 @@ static void ggml_cpy_f32_q8_0_sycl(const char *cx, char *cdst, const int ne,
10660
10678
  sycl::range<3>(1, 1, 1)),
10661
10679
  [=](sycl::nd_item<3> item_ct1) {
10662
10680
  cpy_f32_q<cpy_blck_f32_q8_0, QK8_0>(
10663
- cx, cdst, ne, ne00, ne01, nb00, nb01, nb02,
10664
- ne10, ne11, nb10, nb11, nb12, item_ct1);
10681
+ cx, cdst, ne, ne00, ne01, ne02, nb00, nb01, nb02,
10682
+ nb03, ne10, ne11, ne12, nb10, nb11, nb12, nb13,
10683
+ item_ct1);
10665
10684
  });
10666
10685
  }
10667
10686
 
10668
10687
  static void ggml_cpy_f32_q4_0_sycl(const char *cx, char *cdst, const int ne,
10669
10688
  const int ne00, const int ne01,
10670
- const int nb00, const int nb01,
10671
- const int nb02, const int ne10,
10672
- const int ne11, const int nb10,
10673
- const int nb11, const int nb12,
10689
+ const int ne02, const int nb00,
10690
+ const int nb01, const int nb02,
10691
+ const int nb03, const int ne10,
10692
+ const int ne11, const int ne12,
10693
+ const int nb10, const int nb11,
10694
+ const int nb12, const int nb13,
10674
10695
  dpct::queue_ptr stream) {
10675
10696
 
10676
10697
  GGML_ASSERT(ne % QK4_0 == 0);
@@ -10679,17 +10700,20 @@ static void ggml_cpy_f32_q4_0_sycl(const char *cx, char *cdst, const int ne,
10679
10700
  sycl::range<3>(1, 1, 1)),
10680
10701
  [=](sycl::nd_item<3> item_ct1) {
10681
10702
  cpy_f32_q<cpy_blck_f32_q4_0, QK4_0>(
10682
- cx, cdst, ne, ne00, ne01, nb00, nb01, nb02,
10683
- ne10, ne11, nb10, nb11, nb12, item_ct1);
10703
+ cx, cdst, ne, ne00, ne01, ne02, nb00, nb01, nb02,
10704
+ nb03, ne10, ne11, ne12, nb10, nb11, nb12, nb13,
10705
+ item_ct1);
10684
10706
  });
10685
10707
  }
10686
10708
 
10687
10709
  static void ggml_cpy_f32_q4_1_sycl(const char *cx, char *cdst, const int ne,
10688
10710
  const int ne00, const int ne01,
10689
- const int nb00, const int nb01,
10690
- const int nb02, const int ne10,
10691
- const int ne11, const int nb10,
10692
- const int nb11, const int nb12,
10711
+ const int ne02, const int nb00,
10712
+ const int nb01, const int nb02,
10713
+ const int nb03, const int ne10,
10714
+ const int ne11, const int ne12,
10715
+ const int nb10, const int nb11,
10716
+ const int nb12, const int nb13,
10693
10717
  dpct::queue_ptr stream) {
10694
10718
 
10695
10719
  GGML_ASSERT(ne % QK4_1 == 0);
@@ -10698,17 +10722,20 @@ static void ggml_cpy_f32_q4_1_sycl(const char *cx, char *cdst, const int ne,
10698
10722
  sycl::range<3>(1, 1, 1)),
10699
10723
  [=](sycl::nd_item<3> item_ct1) {
10700
10724
  cpy_f32_q<cpy_blck_f32_q4_1, QK4_1>(
10701
- cx, cdst, ne, ne00, ne01, nb00, nb01, nb02,
10702
- ne10, ne11, nb10, nb11, nb12, item_ct1);
10725
+ cx, cdst, ne, ne00, ne01, ne02, nb00, nb01, nb02,
10726
+ nb03, ne10, ne11, ne12, nb10, nb11, nb12, nb13,
10727
+ item_ct1);
10703
10728
  });
10704
10729
  }
10705
10730
 
10706
10731
  static void ggml_cpy_f16_f16_sycl(const char *cx, char *cdst, const int ne,
10707
10732
  const int ne00, const int ne01,
10708
- const int nb00, const int nb01,
10709
- const int nb02, const int ne10,
10710
- const int ne11, const int nb10,
10711
- const int nb11, const int nb12,
10733
+ const int ne02, const int nb00,
10734
+ const int nb01, const int nb02,
10735
+ const int nb03, const int ne10,
10736
+ const int ne11, const int ne12,
10737
+ const int nb10, const int nb11,
10738
+ const int nb12, const int nb13,
10712
10739
  dpct::queue_ptr stream) {
10713
10740
 
10714
10741
  const int num_blocks = (ne + SYCL_CPY_BLOCK_SIZE - 1) / SYCL_CPY_BLOCK_SIZE;
@@ -10721,8 +10748,8 @@ static void ggml_cpy_f16_f16_sycl(const char *cx, char *cdst, const int ne,
10721
10748
  sycl::range<3>(1, 1, SYCL_CPY_BLOCK_SIZE),
10722
10749
  sycl::range<3>(1, 1, SYCL_CPY_BLOCK_SIZE)),
10723
10750
  [=](sycl::nd_item<3> item_ct1) {
10724
- cpy_f32_f16<cpy_1_f16_f16>(cx, cdst, ne, ne00, ne01, nb00, nb01,
10725
- nb02, ne10, ne11, nb10, nb11, nb12,
10751
+ cpy_f32_f16<cpy_1_f16_f16>(cx, cdst, ne, ne00, ne01, ne02, nb00, nb01, nb02,
10752
+ nb03, ne10, ne11, ne12, nb10, nb11, nb12, nb13,
10726
10753
  item_ct1);
10727
10754
  });
10728
10755
  }
@@ -10730,10 +10757,12 @@ static void ggml_cpy_f16_f16_sycl(const char *cx, char *cdst, const int ne,
10730
10757
 
10731
10758
  static void ggml_cpy_i16_i16_sycl(const char *cx, char *cdst, const int ne,
10732
10759
  const int ne00, const int ne01,
10733
- const int nb00, const int nb01,
10734
- const int nb02, const int ne10,
10735
- const int ne11, const int nb10,
10736
- const int nb11, const int nb12,
10760
+ const int ne02, const int nb00,
10761
+ const int nb01, const int nb02,
10762
+ const int nb03, const int ne10,
10763
+ const int ne11, const int ne12,
10764
+ const int nb10, const int nb11,
10765
+ const int nb12, const int nb13,
10737
10766
  dpct::queue_ptr stream) {
10738
10767
 
10739
10768
  const int num_blocks = (ne + SYCL_CPY_BLOCK_SIZE - 1) / SYCL_CPY_BLOCK_SIZE;
@@ -10746,8 +10775,8 @@ static void ggml_cpy_i16_i16_sycl(const char *cx, char *cdst, const int ne,
10746
10775
  sycl::range<3>(1, 1, SYCL_CPY_BLOCK_SIZE),
10747
10776
  sycl::range<3>(1, 1, SYCL_CPY_BLOCK_SIZE)),
10748
10777
  [=](sycl::nd_item<3> item_ct1) {
10749
- cpy_f32_f16<cpy_1_i16_i16>(cx, cdst, ne, ne00, ne01, nb00, nb01,
10750
- nb02, ne10, ne11, nb10, nb11, nb12,
10778
+ cpy_f32_f16<cpy_1_i16_i16>(cx, cdst, ne, ne00, ne01, ne02, nb00, nb01, nb02,
10779
+ nb03, ne10, ne11, ne12, nb10, nb11, nb12, nb13,
10751
10780
  item_ct1);
10752
10781
  });
10753
10782
  }
@@ -10755,10 +10784,12 @@ static void ggml_cpy_i16_i16_sycl(const char *cx, char *cdst, const int ne,
10755
10784
 
10756
10785
  static void ggml_cpy_i32_i32_sycl(const char *cx, char *cdst, const int ne,
10757
10786
  const int ne00, const int ne01,
10758
- const int nb00, const int nb01,
10759
- const int nb02, const int ne10,
10760
- const int ne11, const int nb10,
10761
- const int nb11, const int nb12,
10787
+ const int ne02, const int nb00,
10788
+ const int nb01, const int nb02,
10789
+ const int nb03, const int ne10,
10790
+ const int ne11, const int ne12,
10791
+ const int nb10, const int nb11,
10792
+ const int nb12, const int nb13,
10762
10793
  dpct::queue_ptr stream) {
10763
10794
 
10764
10795
  const int num_blocks = (ne + SYCL_CPY_BLOCK_SIZE - 1) / SYCL_CPY_BLOCK_SIZE;
@@ -10771,8 +10802,8 @@ static void ggml_cpy_i32_i32_sycl(const char *cx, char *cdst, const int ne,
10771
10802
  sycl::range<3>(1, 1, SYCL_CPY_BLOCK_SIZE),
10772
10803
  sycl::range<3>(1, 1, SYCL_CPY_BLOCK_SIZE)),
10773
10804
  [=](sycl::nd_item<3> item_ct1) {
10774
- cpy_f32_f16<cpy_1_i32_i32>(cx, cdst, ne, ne00, ne01, nb00, nb01,
10775
- nb02, ne10, ne11, nb10, nb11, nb12,
10805
+ cpy_f32_f16<cpy_1_i32_i32>(cx, cdst, ne, ne00, ne01, ne02, nb00, nb01, nb02,
10806
+ nb03, ne10, ne11, ne12, nb10, nb11, nb12, nb13,
10776
10807
  item_ct1);
10777
10808
  });
10778
10809
  }
@@ -11019,7 +11050,8 @@ static void soft_max_f32_sycl(const float *x, const float *y, float *dst,
11019
11050
  });
11020
11051
  }
11021
11052
 
11022
- static void im2col_f32_f16_sycl(const float *x, sycl::half *dst, int IW, int IH,
11053
+ template <typename T>
11054
+ static void im2col_sycl(const float *x, T *dst, int IW, int IH,
11023
11055
  int OW, int OH, int KW, int KH, int IC,
11024
11056
  int offset_delta, int s0, int s1, int p0,
11025
11057
  int p1, int d0, int d1,
@@ -11036,7 +11068,7 @@ static void im2col_f32_f16_sycl(const float *x, sycl::half *dst, int IW, int IH,
11036
11068
  sycl::range<3>(1, 1, SYCL_IM2COL_BLOCK_SIZE),
11037
11069
  sycl::range<3>(1, 1, SYCL_IM2COL_BLOCK_SIZE)),
11038
11070
  [=](sycl::nd_item<3> item_ct1) {
11039
- im2col_f32_f16(x, dst, offset_delta, IW, IH, OW, KW, KH,
11071
+ im2col_kernel(x, dst, offset_delta, IW, IH, OW, KW, KH,
11040
11072
  parallel_elements, (IC * KH * KW), s0, s1, p0,
11041
11073
  p1, d0, d1, item_ct1);
11042
11074
  });
@@ -11546,11 +11578,8 @@ static dpct::err0 ggml_sycl_cpy_tensor_2d(void *dst,
11546
11578
  }
11547
11579
  char * dst_ptr = (char *) dst;
11548
11580
 
11549
- const int64_t ne0 = src->ne[0];
11550
- const int64_t nb0 = src->nb[0];
11551
- const int64_t nb1 = src->nb[1];
11552
- const int64_t nb2 = src->nb[2];
11553
- const int64_t nb3 = src->nb[3];
11581
+ GGML_TENSOR_LOCALS_1(int64_t, ne, src, ne);
11582
+ GGML_TENSOR_LOCALS(int64_t, nb, src, nb);
11554
11583
  const enum ggml_type type = src->type;
11555
11584
  const int64_t ts = ggml_type_size(type);
11556
11585
  const int64_t bs = ggml_blck_size(type);
@@ -12116,7 +12145,8 @@ inline void ggml_sycl_op_dequantize_mul_mat_vec(
12116
12145
  const int64_t src1_ncols, const int64_t src1_padded_row_size,
12117
12146
  const dpct::queue_ptr &stream) {
12118
12147
 
12119
- const int64_t ne00 = src0->ne[0];
12148
+ GGML_TENSOR_BINARY_OP_LOCALS
12149
+
12120
12150
  const int64_t row_diff = row_high - row_low;
12121
12151
 
12122
12152
  // on some GPUs it is faster to convert src1 to half and to use half precision intrinsics
@@ -12135,8 +12165,9 @@ inline void ggml_sycl_op_dequantize_mul_mat_vec(
12135
12165
  } else {
12136
12166
  src1_dfloat = src1_dfloat_a.alloc(ne00);
12137
12167
  ggml_cpy_f32_f16_sycl((const char *)src1_ddf_i, (char *)src1_dfloat,
12138
- ne00, ne00, 1, sizeof(float), 0, 0, ne00, 1,
12139
- sizeof(sycl::half), 0, 0, stream);
12168
+ ne00, ne00, ne01, ne02, nb00, nb01, nb02,
12169
+ nb03, ne10, ne11, ne12, nb10, nb11, nb12,
12170
+ nb13, stream);
12140
12171
  }
12141
12172
  }
12142
12173
  #else
@@ -12392,9 +12423,7 @@ inline void ggml_sycl_op_alibi(const ggml_tensor *src0, const ggml_tensor *src1,
12392
12423
  GGML_ASSERT(src0->type == GGML_TYPE_F32);
12393
12424
  GGML_ASSERT( dst->type == GGML_TYPE_F32);
12394
12425
 
12395
- const int64_t ne00 = src0->ne[0];
12396
- const int64_t ne01 = src0->ne[1];
12397
- const int64_t ne02 = src0->ne[2];
12426
+ GGML_TENSOR_LOCALS_3(int64_t, ne0, src0, ne);
12398
12427
  const int64_t nrows = ggml_nrows(src0);
12399
12428
 
12400
12429
  //const int n_past = ((int32_t *) dst->op_params)[0];
@@ -12424,7 +12453,7 @@ inline void ggml_sycl_op_im2col(const ggml_tensor *src0,
12424
12453
 
12425
12454
  GGML_ASSERT(src0->type == GGML_TYPE_F16);
12426
12455
  GGML_ASSERT(src1->type == GGML_TYPE_F32);
12427
- GGML_ASSERT( dst->type == GGML_TYPE_F16);
12456
+ GGML_ASSERT( dst->type == GGML_TYPE_F16 || dst->type == GGML_TYPE_F32);
12428
12457
 
12429
12458
  const int32_t s0 = ((const int32_t*)(dst->op_params))[0];
12430
12459
  const int32_t s1 = ((const int32_t*)(dst->op_params))[1];
@@ -12447,8 +12476,11 @@ inline void ggml_sycl_op_im2col(const ggml_tensor *src0,
12447
12476
 
12448
12477
  const size_t delta_offset = src1->nb[is_2D ? 2 : 1] / 4; // nb is byte offset, src is type float32
12449
12478
 
12450
- im2col_f32_f16_sycl(src1_dd, (sycl::half *)dst_dd, IW, IH, OW, OH, KW, KH,
12451
- IC, delta_offset, s0, s1, p0, p1, d0, d1, main_stream);
12479
+ if (dst->type == GGML_TYPE_F16) {
12480
+ im2col_sycl(src1_dd, (sycl::half *)dst_dd, IW, IH, OW, OH, KW, KH, IC, delta_offset, s0, s1, p0, p1, d0, d1, main_stream);
12481
+ } else {
12482
+ im2col_sycl(src1_dd, (float *)dst_dd, IW, IH, OW, OH, KW, KH, IC, delta_offset, s0, s1, p0, p1, d0, d1, main_stream);
12483
+ }
12452
12484
 
12453
12485
  (void) src0;
12454
12486
  (void) src0_dd;
@@ -12721,15 +12753,9 @@ static void ggml_sycl_op_mul_mat(const ggml_tensor *src0,
12721
12753
  ggml_sycl_op_mul_mat_t op,
12722
12754
  const bool convert_src1_to_q8_1) try {
12723
12755
 
12724
- const int64_t ne00 = src0->ne[0];
12725
- const int64_t ne01 = src0->ne[1];
12726
- const int64_t ne02 = src0->ne[2];
12727
- const int64_t ne03 = src0->ne[3];
12756
+ GGML_TENSOR_LOCALS(int64_t, ne0, src0, ne);
12728
12757
 
12729
- const int64_t ne10 = src1->ne[0];
12730
- const int64_t ne11 = src1->ne[1];
12731
- const int64_t ne12 = src1->ne[2];
12732
- const int64_t ne13 = src1->ne[3];
12758
+ GGML_TENSOR_LOCALS(int64_t, ne1, src1, ne);
12733
12759
  const int64_t nrows1 = ggml_nrows(src1);
12734
12760
 
12735
12761
  GGML_ASSERT(ne03 == ne13);
@@ -13300,23 +13326,13 @@ static void ggml_sycl_mul_mat_mat_batched_sycl(const ggml_tensor *src0,
13300
13326
  GGML_ASSERT(src0->type == GGML_TYPE_F16);
13301
13327
  GGML_ASSERT(src1->type == GGML_TYPE_F32);
13302
13328
 
13303
- const int64_t ne00 = src0->ne[0]; GGML_UNUSED(ne00);
13304
- const int64_t ne01 = src0->ne[1];
13305
- const int64_t ne02 = src0->ne[2];
13306
- const int64_t ne03 = src0->ne[3];
13329
+ GGML_TENSOR_LOCALS(int64_t, ne0, src0, ne);
13307
13330
 
13308
- const int64_t nb01 = src0->nb[1];
13309
- const int64_t nb02 = src0->nb[2]; GGML_UNUSED(nb02);
13310
- const int64_t nb03 = src0->nb[3]; GGML_UNUSED(nb03);
13331
+ GGML_TENSOR_LOCALS(int64_t, nb0, src0, nb);
13311
13332
 
13312
- const int64_t ne10 = src1->ne[0];
13313
- const int64_t ne11 = src1->ne[1];
13314
- const int64_t ne12 = src1->ne[2];
13315
- const int64_t ne13 = src1->ne[3];
13333
+ GGML_TENSOR_LOCALS(int64_t, ne1, src1, ne);
13316
13334
 
13317
- const int64_t nb11 = src1->nb[1];
13318
- const int64_t nb12 = src1->nb[2]; GGML_UNUSED(nb12);
13319
- const int64_t nb13 = src1->nb[3]; GGML_UNUSED(nb13);
13335
+ GGML_TENSOR_LOCALS(int64_t, nb1, src1, nb);
13320
13336
 
13321
13337
  const int64_t ne1 = ggml_nelements(src1);
13322
13338
  const int64_t ne = ggml_nelements(dst);
@@ -13618,23 +13634,15 @@ static void ggml_sycl_mul_mat_id_sycl(ggml_tensor * dst) {
13618
13634
  GGML_ASSERT(src00->backend != GGML_BACKEND_GPU_SPLIT);
13619
13635
  GGML_ASSERT(src1->type == GGML_TYPE_F32);
13620
13636
 
13621
- const int64_t ne00 = src00->ne[0]; GGML_UNUSED(ne00);
13622
- const int64_t ne01 = src00->ne[1];
13623
- const int64_t ne02 = src00->ne[2];
13624
- const int64_t ne03 = src00->ne[3];
13637
+ GGML_TENSOR_LOCALS(int64_t, ne0, src00, ne);
13625
13638
 
13626
13639
  //const int64_t nb01 = src00->nb[1];
13627
- const int64_t nb02 = src00->nb[2]; GGML_UNUSED(nb02);
13628
- const int64_t nb03 = src00->nb[3]; GGML_UNUSED(nb03);
13640
+ GGML_TENSOR_LOCALS(int64_t, nb0, src00, nb);
13629
13641
 
13630
- const int64_t ne10 = src1->ne[0];
13631
- const int64_t ne11 = src1->ne[1];
13632
- const int64_t ne12 = src1->ne[2];
13633
- const int64_t ne13 = src1->ne[3];
13642
+ GGML_TENSOR_LOCALS(int64_t, ne1, src1, ne);
13634
13643
 
13644
+ GGML_TENSOR_LOCALS(int64_t, nb1, src1, nb);
13635
13645
  //const int64_t nb11 = src1->nb[1];
13636
- const int64_t nb12 = src1->nb[2]; GGML_UNUSED(nb12);
13637
- const int64_t nb13 = src1->nb[3]; GGML_UNUSED(nb13);
13638
13646
 
13639
13647
  const int64_t ne1 = ggml_nelements(src1);
13640
13648
  const int64_t ne = ggml_nelements(dst);
@@ -13903,21 +13911,7 @@ static void ggml_sycl_cpy(const ggml_tensor *src0, const ggml_tensor *src1,
13903
13911
  GGML_ASSERT(ggml_nbytes(src0) <= INT_MAX);
13904
13912
  GGML_ASSERT(ggml_nbytes(src1) <= INT_MAX);
13905
13913
 
13906
- const int64_t ne00 = src0->ne[0];
13907
- const int64_t ne01 = src0->ne[1];
13908
- GGML_ASSERT(src0->ne[3] == 1);
13909
-
13910
- const int64_t nb00 = src0->nb[0];
13911
- const int64_t nb01 = src0->nb[1];
13912
- const int64_t nb02 = src0->nb[2];
13913
-
13914
- const int64_t ne10 = src1->ne[0];
13915
- const int64_t ne11 = src1->ne[1];
13916
- GGML_ASSERT(src1->ne[3] == 1);
13917
-
13918
- const int64_t nb10 = src1->nb[0];
13919
- const int64_t nb11 = src1->nb[1];
13920
- const int64_t nb12 = src1->nb[2];
13914
+ GGML_TENSOR_BINARY_OP_LOCALS;
13921
13915
 
13922
13916
  SYCL_CHECK(ggml_sycl_set_device(g_main_device));
13923
13917
  dpct::queue_ptr main_stream = g_syclStreams[g_main_device_index][0];
@@ -13929,21 +13923,21 @@ static void ggml_sycl_cpy(const ggml_tensor *src0, const ggml_tensor *src1,
13929
13923
  char * src1_ddc = (char *) src1_extra->data_device[g_main_device_index];
13930
13924
 
13931
13925
  if (src0->type == GGML_TYPE_F32 && src1->type == GGML_TYPE_F32) {
13932
- ggml_cpy_f32_f32_sycl (src0_ddc, src1_ddc, ne, ne00, ne01, nb00, nb01, nb02, ne10, ne11, nb10, nb11, nb12, main_stream);
13926
+ ggml_cpy_f32_f32_sycl (src0_ddc, src1_ddc, ne, ne00, ne01, ne02, nb00, nb01, nb02, nb03, ne10, ne11, ne12, nb10, nb11, nb12, nb13, main_stream);
13933
13927
  } else if (src0->type == GGML_TYPE_F32 && src1->type == GGML_TYPE_F16) {
13934
- ggml_cpy_f32_f16_sycl (src0_ddc, src1_ddc, ne, ne00, ne01, nb00, nb01, nb02, ne10, ne11, nb10, nb11, nb12, main_stream);
13928
+ ggml_cpy_f32_f16_sycl (src0_ddc, src1_ddc, ne, ne00, ne01, ne02, nb00, nb01, nb02, nb03, ne10, ne11, ne12, nb10, nb11, nb12, nb13, main_stream);
13935
13929
  } else if (src0->type == GGML_TYPE_F32 && src1->type == GGML_TYPE_Q8_0) {
13936
- ggml_cpy_f32_q8_0_sycl(src0_ddc, src1_ddc, ne, ne00, ne01, nb00, nb01, nb02, ne10, ne11, nb10, nb11, nb12, main_stream);
13930
+ ggml_cpy_f32_q8_0_sycl(src0_ddc, src1_ddc, ne, ne00, ne01, ne02, nb00, nb01, nb02, nb03, ne10, ne11, ne12, nb10, nb11, nb12, nb13, main_stream);
13937
13931
  } else if (src0->type == GGML_TYPE_F32 && src1->type == GGML_TYPE_Q4_0) {
13938
- ggml_cpy_f32_q4_0_sycl(src0_ddc, src1_ddc, ne, ne00, ne01, nb00, nb01, nb02, ne10, ne11, nb10, nb11, nb12, main_stream);
13932
+ ggml_cpy_f32_q4_0_sycl(src0_ddc, src1_ddc, ne, ne00, ne01, ne02, nb00, nb01, nb02, nb03, ne10, ne11, ne12, nb10, nb11, nb12, nb13, main_stream);
13939
13933
  } else if (src0->type == GGML_TYPE_F32 && src1->type == GGML_TYPE_Q4_1) {
13940
- ggml_cpy_f32_q4_1_sycl(src0_ddc, src1_ddc, ne, ne00, ne01, nb00, nb01, nb02, ne10, ne11, nb10, nb11, nb12, main_stream);
13934
+ ggml_cpy_f32_q4_1_sycl(src0_ddc, src1_ddc, ne, ne00, ne01, ne02, nb00, nb01, nb02, nb03, ne10, ne11, ne12, nb10, nb11, nb12, nb13, main_stream);
13941
13935
  } else if (src0->type == GGML_TYPE_F16 && src1->type == GGML_TYPE_F16) {
13942
- ggml_cpy_f16_f16_sycl (src0_ddc, src1_ddc, ne, ne00, ne01, nb00, nb01, nb02, ne10, ne11, nb10, nb11, nb12, main_stream);
13936
+ ggml_cpy_f16_f16_sycl (src0_ddc, src1_ddc, ne, ne00, ne01, ne02, nb00, nb01, nb02, nb03, ne10, ne11, ne12, nb10, nb11, nb12, nb13, main_stream);
13943
13937
  } else if (src0->type == GGML_TYPE_I16 && src1->type == GGML_TYPE_I16) {
13944
- ggml_cpy_i16_i16_sycl (src0_ddc, src1_ddc, ne, ne00, ne01, nb00, nb01, nb02, ne10, ne11, nb10, nb11, nb12, main_stream);
13938
+ ggml_cpy_i16_i16_sycl (src0_ddc, src1_ddc, ne, ne00, ne01, ne02, nb00, nb01, nb02, nb03, ne10, ne11, ne12, nb10, nb11, nb12, nb13, main_stream);
13945
13939
  } else if (src0->type == GGML_TYPE_I32 && src1->type == GGML_TYPE_I32) {
13946
- ggml_cpy_i32_i32_sycl (src0_ddc, src1_ddc, ne, ne00, ne01, nb00, nb01, nb02, ne10, ne11, nb10, nb11, nb12, main_stream);
13940
+ ggml_cpy_i32_i32_sycl (src0_ddc, src1_ddc, ne, ne00, ne01, ne02, nb00, nb01, nb02, nb03, ne10, ne11, ne12, nb10, nb11, nb12, nb13, main_stream);
13947
13941
  } else {
13948
13942
  fprintf(stderr, "%s: unsupported type combination (%s to %s)\n", __func__,
13949
13943
  ggml_type_name(src0->type), ggml_type_name(src1->type));