llama_cpp 0.12.7 → 0.13.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +11 -0
- data/ext/llama_cpp/llama_cpp.cpp +72 -262
- data/lib/llama_cpp/version.rb +2 -2
- data/sig/llama_cpp.rbs +23 -25
- data/vendor/tmp/llama.cpp/Makefile +8 -3
- data/vendor/tmp/llama.cpp/ggml-backend-impl.h +2 -0
- data/vendor/tmp/llama.cpp/ggml-backend.c +14 -2
- data/vendor/tmp/llama.cpp/ggml-backend.h +1 -1
- data/vendor/tmp/llama.cpp/ggml-kompute.cpp +7 -1
- data/vendor/tmp/llama.cpp/ggml-metal.m +96 -15
- data/vendor/tmp/llama.cpp/ggml-metal.metal +1049 -38
- data/vendor/tmp/llama.cpp/ggml-opencl.cpp +25 -25
- data/vendor/tmp/llama.cpp/ggml-quants.c +1873 -218
- data/vendor/tmp/llama.cpp/ggml-quants.h +52 -0
- data/vendor/tmp/llama.cpp/ggml-sycl.cpp +292 -221
- data/vendor/tmp/llama.cpp/ggml-vulkan.cpp +64 -52
- data/vendor/tmp/llama.cpp/ggml.c +318 -195
- data/vendor/tmp/llama.cpp/ggml.h +35 -19
- data/vendor/tmp/llama.cpp/llama.cpp +806 -531
- data/vendor/tmp/llama.cpp/llama.h +53 -65
- data/vendor/tmp/llama.cpp/unicode.h +310 -1
- metadata +2 -2
data/vendor/tmp/llama.cpp/ggml.c
CHANGED
@@ -355,6 +355,10 @@ void ggml_fp32_to_fp16_row(const float * x, ggml_fp16_t * y, int n) {
|
|
355
355
|
}
|
356
356
|
}
|
357
357
|
|
358
|
+
bool ggml_guid_matches(ggml_guid_t guid_a, ggml_guid_t guid_b) {
|
359
|
+
return memcmp(guid_a, guid_b, sizeof(ggml_guid)) == 0;
|
360
|
+
}
|
361
|
+
|
358
362
|
//
|
359
363
|
// timing
|
360
364
|
//
|
@@ -678,6 +682,30 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = {
|
|
678
682
|
.vec_dot_type = GGML_TYPE_Q8_K,
|
679
683
|
.nrows = 1,
|
680
684
|
},
|
685
|
+
[GGML_TYPE_IQ3_S] = {
|
686
|
+
.type_name = "iq3_s",
|
687
|
+
.blck_size = QK_K,
|
688
|
+
.type_size = sizeof(block_iq3_s),
|
689
|
+
.is_quantized = true,
|
690
|
+
.to_float = (ggml_to_float_t) dequantize_row_iq3_s,
|
691
|
+
.from_float = quantize_row_iq3_s,
|
692
|
+
.from_float_reference = (ggml_from_float_t)quantize_row_iq3_s_reference,
|
693
|
+
.vec_dot = ggml_vec_dot_iq3_s_q8_K,
|
694
|
+
.vec_dot_type = GGML_TYPE_Q8_K,
|
695
|
+
.nrows = 1,
|
696
|
+
},
|
697
|
+
[GGML_TYPE_IQ2_S] = {
|
698
|
+
.type_name = "iq2_s",
|
699
|
+
.blck_size = QK_K,
|
700
|
+
.type_size = sizeof(block_iq2_s),
|
701
|
+
.is_quantized = true,
|
702
|
+
.to_float = (ggml_to_float_t) dequantize_row_iq2_s,
|
703
|
+
.from_float = quantize_row_iq2_s,
|
704
|
+
.from_float_reference = (ggml_from_float_t)quantize_row_iq2_s_reference,
|
705
|
+
.vec_dot = ggml_vec_dot_iq2_s_q8_K,
|
706
|
+
.vec_dot_type = GGML_TYPE_Q8_K,
|
707
|
+
.nrows = 1,
|
708
|
+
},
|
681
709
|
[GGML_TYPE_IQ1_S] = {
|
682
710
|
.type_name = "iq1_s",
|
683
711
|
.blck_size = QK_K,
|
@@ -702,6 +730,26 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = {
|
|
702
730
|
.vec_dot_type = GGML_TYPE_Q8_0,
|
703
731
|
.nrows = 1,
|
704
732
|
},
|
733
|
+
[GGML_TYPE_IQ4_XS] = {
|
734
|
+
.type_name = "iq4_xs",
|
735
|
+
#if QK_K == 64
|
736
|
+
.blck_size = QK4_NL,
|
737
|
+
#else
|
738
|
+
.blck_size = QK_K,
|
739
|
+
#endif
|
740
|
+
.type_size = sizeof(block_iq4_xs),
|
741
|
+
.is_quantized = true,
|
742
|
+
.to_float = (ggml_to_float_t) dequantize_row_iq4_xs,
|
743
|
+
.from_float = quantize_row_iq4_xs,
|
744
|
+
.from_float_reference = (ggml_from_float_t)quantize_row_iq4_xs_reference,
|
745
|
+
.vec_dot = ggml_vec_dot_iq4_xs_q8_K,
|
746
|
+
#if QK_K == 64
|
747
|
+
.vec_dot_type = GGML_TYPE_Q8_0,
|
748
|
+
#else
|
749
|
+
.vec_dot_type = GGML_TYPE_Q8_K,
|
750
|
+
#endif
|
751
|
+
.nrows = 1,
|
752
|
+
},
|
705
753
|
[GGML_TYPE_Q8_K] = {
|
706
754
|
.type_name = "q8_K",
|
707
755
|
.blck_size = QK_K,
|
@@ -1560,9 +1608,15 @@ inline static void ggml_vec_gelu_f16(const int n, ggml_fp16_t * y, const ggml_fp
|
|
1560
1608
|
inline static void ggml_vec_gelu_f32(const int n, float * y, const float * x) {
|
1561
1609
|
uint16_t t;
|
1562
1610
|
for (int i = 0; i < n; ++i) {
|
1563
|
-
|
1564
|
-
|
1565
|
-
|
1611
|
+
if (x[i] <= -10.0f) {
|
1612
|
+
y[i] = 0.0f;
|
1613
|
+
} else if (x[i] >= 10.0f) {
|
1614
|
+
y[i] = x[i];
|
1615
|
+
} else {
|
1616
|
+
ggml_fp16_t fp16 = GGML_FP32_TO_FP16(x[i]);
|
1617
|
+
memcpy(&t, &fp16, sizeof(uint16_t));
|
1618
|
+
y[i] = GGML_FP16_TO_FP32(ggml_table_gelu_f16[t]);
|
1619
|
+
}
|
1566
1620
|
}
|
1567
1621
|
}
|
1568
1622
|
#else
|
@@ -2304,6 +2358,9 @@ enum ggml_type ggml_ftype_to_ggml_type(enum ggml_ftype ftype) {
|
|
2304
2358
|
case GGML_FTYPE_MOSTLY_IQ3_XXS: wtype = GGML_TYPE_IQ3_XXS; break;
|
2305
2359
|
case GGML_FTYPE_MOSTLY_IQ1_S: wtype = GGML_TYPE_IQ1_S; break;
|
2306
2360
|
case GGML_FTYPE_MOSTLY_IQ4_NL: wtype = GGML_TYPE_IQ4_NL; break;
|
2361
|
+
case GGML_FTYPE_MOSTLY_IQ4_XS: wtype = GGML_TYPE_IQ4_XS; break;
|
2362
|
+
case GGML_FTYPE_MOSTLY_IQ3_S: wtype = GGML_TYPE_IQ3_S; break;
|
2363
|
+
case GGML_FTYPE_MOSTLY_IQ2_S: wtype = GGML_TYPE_IQ2_S; break;
|
2307
2364
|
case GGML_FTYPE_UNKNOWN: wtype = GGML_TYPE_COUNT; break;
|
2308
2365
|
case GGML_FTYPE_MOSTLY_Q4_1_SOME_F16: wtype = GGML_TYPE_COUNT; break;
|
2309
2366
|
}
|
@@ -2708,7 +2765,7 @@ static struct ggml_tensor * ggml_new_tensor_impl(
|
|
2708
2765
|
}
|
2709
2766
|
}
|
2710
2767
|
|
2711
|
-
struct ggml_object * const obj_new = ggml_new_object(ctx,
|
2768
|
+
struct ggml_object * const obj_new = ggml_new_object(ctx, GGML_OBJECT_TYPE_TENSOR, GGML_TENSOR_SIZE + obj_alloc_size);
|
2712
2769
|
|
2713
2770
|
// TODO: for recoverable errors, we would need to free the data allocated from the scratch buffer here
|
2714
2771
|
|
@@ -2716,7 +2773,7 @@ static struct ggml_tensor * ggml_new_tensor_impl(
|
|
2716
2773
|
|
2717
2774
|
*result = (struct ggml_tensor) {
|
2718
2775
|
/*.type =*/ type,
|
2719
|
-
/*.backend =*/
|
2776
|
+
/*.backend =*/ GGML_BACKEND_TYPE_CPU,
|
2720
2777
|
/*.buffer =*/ NULL,
|
2721
2778
|
/*.ne =*/ { 1, 1, 1, 1 },
|
2722
2779
|
/*.nb =*/ { 0, 0, 0, 0 },
|
@@ -3289,7 +3346,7 @@ struct ggml_tensor * ggml_get_first_tensor(const struct ggml_context * ctx) {
|
|
3289
3346
|
char * const mem_buffer = ctx->mem_buffer;
|
3290
3347
|
|
3291
3348
|
while (obj != NULL) {
|
3292
|
-
if (obj->type ==
|
3349
|
+
if (obj->type == GGML_OBJECT_TYPE_TENSOR) {
|
3293
3350
|
return (struct ggml_tensor *)(mem_buffer + obj->offs);
|
3294
3351
|
}
|
3295
3352
|
|
@@ -3306,7 +3363,7 @@ struct ggml_tensor * ggml_get_next_tensor(const struct ggml_context * ctx, struc
|
|
3306
3363
|
char * const mem_buffer = ctx->mem_buffer;
|
3307
3364
|
|
3308
3365
|
while (obj != NULL) {
|
3309
|
-
if (obj->type ==
|
3366
|
+
if (obj->type == GGML_OBJECT_TYPE_TENSOR) {
|
3310
3367
|
return (struct ggml_tensor *)(mem_buffer + obj->offs);
|
3311
3368
|
}
|
3312
3369
|
|
@@ -3322,7 +3379,7 @@ struct ggml_tensor * ggml_get_tensor(struct ggml_context * ctx, const char * nam
|
|
3322
3379
|
char * const mem_buffer = ctx->mem_buffer;
|
3323
3380
|
|
3324
3381
|
while (obj != NULL) {
|
3325
|
-
if (obj->type ==
|
3382
|
+
if (obj->type == GGML_OBJECT_TYPE_TENSOR) {
|
3326
3383
|
struct ggml_tensor * cur = (struct ggml_tensor *)(mem_buffer + obj->offs);
|
3327
3384
|
if (strcmp(cur->name, name) == 0) {
|
3328
3385
|
return cur;
|
@@ -5729,11 +5786,13 @@ struct ggml_tensor * ggml_pool_1d(
|
|
5729
5786
|
is_node = true;
|
5730
5787
|
}
|
5731
5788
|
|
5732
|
-
const int64_t ne[
|
5789
|
+
const int64_t ne[4] = {
|
5733
5790
|
ggml_calc_pool_output_size(a->ne[0], k0, s0, p0),
|
5734
5791
|
a->ne[1],
|
5792
|
+
a->ne[2],
|
5793
|
+
a->ne[3],
|
5735
5794
|
};
|
5736
|
-
struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32,
|
5795
|
+
struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
|
5737
5796
|
|
5738
5797
|
int32_t params[] = { op, k0, s0, p0 };
|
5739
5798
|
ggml_set_op_params(result, params, sizeof(params));
|
@@ -5866,7 +5925,7 @@ struct ggml_tensor * ggml_top_k(
|
|
5866
5925
|
int k) {
|
5867
5926
|
GGML_ASSERT(a->ne[0] >= k);
|
5868
5927
|
|
5869
|
-
struct ggml_tensor * result = ggml_argsort(ctx, a,
|
5928
|
+
struct ggml_tensor * result = ggml_argsort(ctx, a, GGML_SORT_ORDER_DESC);
|
5870
5929
|
|
5871
5930
|
result = ggml_view_4d(ctx, result,
|
5872
5931
|
k, result->ne[1], result->ne[2], result->ne[3],
|
@@ -6660,7 +6719,7 @@ static void ggml_compute_forward_dup_same_cont(
|
|
6660
6719
|
GGML_ASSERT(ggml_is_contiguous(dst) && ggml_is_contiguous(src0));
|
6661
6720
|
GGML_ASSERT(src0->type == dst->type);
|
6662
6721
|
|
6663
|
-
if (params->type ==
|
6722
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
6664
6723
|
return;
|
6665
6724
|
}
|
6666
6725
|
|
@@ -6692,7 +6751,7 @@ static void ggml_compute_forward_dup_f16(
|
|
6692
6751
|
|
6693
6752
|
GGML_ASSERT(ggml_nelements(dst) == ggml_nelements(src0));
|
6694
6753
|
|
6695
|
-
if (params->type ==
|
6754
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
6696
6755
|
return;
|
6697
6756
|
}
|
6698
6757
|
|
@@ -6965,7 +7024,7 @@ static void ggml_compute_forward_dup_f32(
|
|
6965
7024
|
|
6966
7025
|
GGML_ASSERT(ggml_nelements(dst) == ggml_nelements(src0));
|
6967
7026
|
|
6968
|
-
if (params->type ==
|
7027
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
6969
7028
|
return;
|
6970
7029
|
}
|
6971
7030
|
|
@@ -7218,7 +7277,7 @@ static void ggml_compute_forward_dup_bytes(
|
|
7218
7277
|
GGML_ASSERT(ggml_nelements(dst) == ggml_nelements(src0));
|
7219
7278
|
GGML_ASSERT(src0->type == dst->type);
|
7220
7279
|
|
7221
|
-
if (params->type ==
|
7280
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
7222
7281
|
return;
|
7223
7282
|
}
|
7224
7283
|
|
@@ -7398,7 +7457,7 @@ static void ggml_compute_forward_add_f32(
|
|
7398
7457
|
|
7399
7458
|
GGML_ASSERT(ggml_can_repeat(src1, src0) && ggml_are_same_shape(src0, dst));
|
7400
7459
|
|
7401
|
-
if (params->type ==
|
7460
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
7402
7461
|
return;
|
7403
7462
|
}
|
7404
7463
|
|
@@ -7406,7 +7465,7 @@ static void ggml_compute_forward_add_f32(
|
|
7406
7465
|
const int nth = params->nth;
|
7407
7466
|
|
7408
7467
|
#ifdef GGML_USE_CLBLAST
|
7409
|
-
if (src1->backend ==
|
7468
|
+
if (src1->backend == GGML_BACKEND_TYPE_GPU) {
|
7410
7469
|
// TODO: OpenCL kernel support full broadcast
|
7411
7470
|
GGML_ASSERT(ggml_can_repeat_rows(src1, src0));
|
7412
7471
|
if (ith == 0) {
|
@@ -7488,7 +7547,7 @@ static void ggml_compute_forward_add_f16_f32(
|
|
7488
7547
|
|
7489
7548
|
GGML_ASSERT(ggml_are_same_shape(src0, src1) && ggml_are_same_shape(src0, dst));
|
7490
7549
|
|
7491
|
-
if (params->type ==
|
7550
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
7492
7551
|
return;
|
7493
7552
|
}
|
7494
7553
|
|
@@ -7567,7 +7626,7 @@ static void ggml_compute_forward_add_f16_f16(
|
|
7567
7626
|
|
7568
7627
|
GGML_ASSERT(ggml_are_same_shape(src0, src1) && ggml_are_same_shape(src0, dst));
|
7569
7628
|
|
7570
|
-
if (params->type ==
|
7629
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
7571
7630
|
return;
|
7572
7631
|
}
|
7573
7632
|
|
@@ -7623,7 +7682,7 @@ static void ggml_compute_forward_add_q_f32(
|
|
7623
7682
|
|
7624
7683
|
GGML_ASSERT(ggml_are_same_shape(src0, src1) && ggml_are_same_shape(src0, dst));
|
7625
7684
|
|
7626
|
-
if (params->type ==
|
7685
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
7627
7686
|
return;
|
7628
7687
|
}
|
7629
7688
|
|
@@ -7738,6 +7797,9 @@ static void ggml_compute_forward_add(
|
|
7738
7797
|
case GGML_TYPE_IQ3_XXS:
|
7739
7798
|
case GGML_TYPE_IQ1_S:
|
7740
7799
|
case GGML_TYPE_IQ4_NL:
|
7800
|
+
case GGML_TYPE_IQ4_XS:
|
7801
|
+
case GGML_TYPE_IQ3_S:
|
7802
|
+
case GGML_TYPE_IQ2_S:
|
7741
7803
|
{
|
7742
7804
|
ggml_compute_forward_add_q_f32(params, dst);
|
7743
7805
|
} break;
|
@@ -7760,7 +7822,7 @@ static void ggml_compute_forward_add1_f32(
|
|
7760
7822
|
GGML_ASSERT(ggml_are_same_shape(src0, dst));
|
7761
7823
|
GGML_ASSERT(ggml_is_scalar(src1));
|
7762
7824
|
|
7763
|
-
if (params->type ==
|
7825
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
7764
7826
|
return;
|
7765
7827
|
}
|
7766
7828
|
|
@@ -7814,7 +7876,7 @@ static void ggml_compute_forward_add1_f16_f32(
|
|
7814
7876
|
GGML_ASSERT(ggml_are_same_shape(src0, dst));
|
7815
7877
|
GGML_ASSERT(ggml_is_scalar(src1));
|
7816
7878
|
|
7817
|
-
if (params->type ==
|
7879
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
7818
7880
|
return;
|
7819
7881
|
}
|
7820
7882
|
|
@@ -7866,7 +7928,7 @@ static void ggml_compute_forward_add1_f16_f16(
|
|
7866
7928
|
GGML_ASSERT(ggml_are_same_shape(src0, dst));
|
7867
7929
|
GGML_ASSERT(ggml_is_scalar(src1));
|
7868
7930
|
|
7869
|
-
if (params->type ==
|
7931
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
7870
7932
|
return;
|
7871
7933
|
}
|
7872
7934
|
|
@@ -7918,7 +7980,7 @@ static void ggml_compute_forward_add1_q_f32(
|
|
7918
7980
|
GGML_ASSERT(ggml_are_same_shape(src0, dst));
|
7919
7981
|
GGML_ASSERT(ggml_is_scalar(src1));
|
7920
7982
|
|
7921
|
-
if (params->type ==
|
7983
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
7922
7984
|
return;
|
7923
7985
|
}
|
7924
7986
|
|
@@ -8017,6 +8079,9 @@ static void ggml_compute_forward_add1(
|
|
8017
8079
|
case GGML_TYPE_IQ3_XXS:
|
8018
8080
|
case GGML_TYPE_IQ1_S:
|
8019
8081
|
case GGML_TYPE_IQ4_NL:
|
8082
|
+
case GGML_TYPE_IQ4_XS:
|
8083
|
+
case GGML_TYPE_IQ3_S:
|
8084
|
+
case GGML_TYPE_IQ2_S:
|
8020
8085
|
{
|
8021
8086
|
ggml_compute_forward_add1_q_f32(params, dst);
|
8022
8087
|
} break;
|
@@ -8047,7 +8112,7 @@ static void ggml_compute_forward_acc_f32(
|
|
8047
8112
|
size_t offset = ((int32_t *) dst->op_params)[3];
|
8048
8113
|
bool inplace = (bool) ((int32_t *) dst->op_params)[4];
|
8049
8114
|
|
8050
|
-
if (!inplace && (params->type ==
|
8115
|
+
if (!inplace && (params->type == GGML_TASK_TYPE_INIT)) {
|
8051
8116
|
if (params->ith != 0) {
|
8052
8117
|
return;
|
8053
8118
|
}
|
@@ -8059,7 +8124,7 @@ static void ggml_compute_forward_acc_f32(
|
|
8059
8124
|
ggml_nbytes(dst));
|
8060
8125
|
}
|
8061
8126
|
|
8062
|
-
if (params->type ==
|
8127
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
8063
8128
|
return;
|
8064
8129
|
}
|
8065
8130
|
|
@@ -8141,6 +8206,9 @@ static void ggml_compute_forward_acc(
|
|
8141
8206
|
case GGML_TYPE_IQ3_XXS:
|
8142
8207
|
case GGML_TYPE_IQ1_S:
|
8143
8208
|
case GGML_TYPE_IQ4_NL:
|
8209
|
+
case GGML_TYPE_IQ4_XS:
|
8210
|
+
case GGML_TYPE_IQ3_S:
|
8211
|
+
case GGML_TYPE_IQ2_S:
|
8144
8212
|
default:
|
8145
8213
|
{
|
8146
8214
|
GGML_ASSERT(false);
|
@@ -8160,7 +8228,7 @@ static void ggml_compute_forward_sub_f32(
|
|
8160
8228
|
assert(params->ith == 0);
|
8161
8229
|
assert(ggml_are_same_shape(src0, src1) && ggml_are_same_shape(src0, dst));
|
8162
8230
|
|
8163
|
-
if (params->type ==
|
8231
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
8164
8232
|
return;
|
8165
8233
|
}
|
8166
8234
|
|
@@ -8241,14 +8309,14 @@ static void ggml_compute_forward_mul_f32(
|
|
8241
8309
|
|
8242
8310
|
GGML_ASSERT(ggml_can_repeat(src1, src0) && ggml_are_same_shape(src0, dst));
|
8243
8311
|
|
8244
|
-
if (params->type ==
|
8312
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
8245
8313
|
return;
|
8246
8314
|
}
|
8247
8315
|
const int ith = params->ith;
|
8248
8316
|
const int nth = params->nth;
|
8249
8317
|
|
8250
8318
|
#if defined(GGML_USE_CLBLAST)
|
8251
|
-
if (src1->backend ==
|
8319
|
+
if (src1->backend == GGML_BACKEND_TYPE_GPU) {
|
8252
8320
|
// TODO: OpenCL kernel support full broadcast
|
8253
8321
|
GGML_ASSERT(ggml_can_repeat_rows(src1, src0));
|
8254
8322
|
if (ith == 0) {
|
@@ -8349,7 +8417,7 @@ static void ggml_compute_forward_div_f32(
|
|
8349
8417
|
|
8350
8418
|
GGML_ASSERT(ggml_can_repeat(src1, src0) && ggml_are_same_shape(src0, dst));
|
8351
8419
|
|
8352
|
-
if (params->type ==
|
8420
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
8353
8421
|
return;
|
8354
8422
|
}
|
8355
8423
|
|
@@ -8444,7 +8512,7 @@ static void ggml_compute_forward_sqr_f32(
|
|
8444
8512
|
assert(params->ith == 0);
|
8445
8513
|
assert(ggml_are_same_shape(src0, dst));
|
8446
8514
|
|
8447
|
-
if (params->type ==
|
8515
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
8448
8516
|
return;
|
8449
8517
|
}
|
8450
8518
|
|
@@ -8490,7 +8558,7 @@ static void ggml_compute_forward_sqrt_f32(
|
|
8490
8558
|
assert(params->ith == 0);
|
8491
8559
|
assert(ggml_are_same_shape(src0, dst));
|
8492
8560
|
|
8493
|
-
if (params->type ==
|
8561
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
8494
8562
|
return;
|
8495
8563
|
}
|
8496
8564
|
|
@@ -8536,7 +8604,7 @@ static void ggml_compute_forward_log_f32(
|
|
8536
8604
|
GGML_ASSERT(params->ith == 0);
|
8537
8605
|
GGML_ASSERT(ggml_are_same_shape(src0, dst));
|
8538
8606
|
|
8539
|
-
if (params->type ==
|
8607
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
8540
8608
|
return;
|
8541
8609
|
}
|
8542
8610
|
|
@@ -8582,7 +8650,7 @@ static void ggml_compute_forward_sum_f32(
|
|
8582
8650
|
assert(params->ith == 0);
|
8583
8651
|
assert(ggml_is_scalar(dst));
|
8584
8652
|
|
8585
|
-
if (params->type ==
|
8653
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
8586
8654
|
return;
|
8587
8655
|
}
|
8588
8656
|
|
@@ -8617,7 +8685,7 @@ static void ggml_compute_forward_sum_f16(
|
|
8617
8685
|
assert(params->ith == 0);
|
8618
8686
|
assert(ggml_is_scalar(dst));
|
8619
8687
|
|
8620
|
-
if (params->type ==
|
8688
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
8621
8689
|
return;
|
8622
8690
|
}
|
8623
8691
|
|
@@ -8674,7 +8742,7 @@ static void ggml_compute_forward_sum_rows_f32(
|
|
8674
8742
|
|
8675
8743
|
GGML_ASSERT(params->ith == 0);
|
8676
8744
|
|
8677
|
-
if (params->type ==
|
8745
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
8678
8746
|
return;
|
8679
8747
|
}
|
8680
8748
|
|
@@ -8729,7 +8797,7 @@ static void ggml_compute_forward_mean_f32(
|
|
8729
8797
|
|
8730
8798
|
assert(params->ith == 0);
|
8731
8799
|
|
8732
|
-
if (params->type ==
|
8800
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
8733
8801
|
return;
|
8734
8802
|
}
|
8735
8803
|
|
@@ -8788,7 +8856,7 @@ static void ggml_compute_forward_argmax_f32(
|
|
8788
8856
|
|
8789
8857
|
assert(params->ith == 0);
|
8790
8858
|
|
8791
|
-
if (params->type ==
|
8859
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
8792
8860
|
return;
|
8793
8861
|
}
|
8794
8862
|
|
@@ -8839,7 +8907,7 @@ static void ggml_compute_forward_repeat_f32(
|
|
8839
8907
|
GGML_ASSERT(params->ith == 0);
|
8840
8908
|
GGML_ASSERT(ggml_can_repeat(src0, dst));
|
8841
8909
|
|
8842
|
-
if (params->type ==
|
8910
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
8843
8911
|
return;
|
8844
8912
|
}
|
8845
8913
|
|
@@ -8884,7 +8952,7 @@ static void ggml_compute_forward_repeat_f16(
|
|
8884
8952
|
GGML_ASSERT(params->ith == 0);
|
8885
8953
|
GGML_ASSERT(ggml_can_repeat(src0, dst));
|
8886
8954
|
|
8887
|
-
if (params->type ==
|
8955
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
8888
8956
|
return;
|
8889
8957
|
}
|
8890
8958
|
|
@@ -8958,7 +9026,7 @@ static void ggml_compute_forward_repeat_back_f32(
|
|
8958
9026
|
GGML_ASSERT(params->ith == 0);
|
8959
9027
|
GGML_ASSERT(ggml_can_repeat(dst, src0));
|
8960
9028
|
|
8961
|
-
if (params->type ==
|
9029
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
8962
9030
|
return;
|
8963
9031
|
}
|
8964
9032
|
|
@@ -9035,7 +9103,7 @@ static void ggml_compute_forward_concat_f32(
|
|
9035
9103
|
const struct ggml_tensor * src0 = dst->src[0];
|
9036
9104
|
const struct ggml_tensor * src1 = dst->src[1];
|
9037
9105
|
|
9038
|
-
if (params->type ==
|
9106
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
9039
9107
|
return;
|
9040
9108
|
}
|
9041
9109
|
|
@@ -9107,7 +9175,7 @@ static void ggml_compute_forward_abs_f32(
|
|
9107
9175
|
assert(params->ith == 0);
|
9108
9176
|
assert(ggml_are_same_shape(src0, dst));
|
9109
9177
|
|
9110
|
-
if (params->type ==
|
9178
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
9111
9179
|
return;
|
9112
9180
|
}
|
9113
9181
|
|
@@ -9153,7 +9221,7 @@ static void ggml_compute_forward_sgn_f32(
|
|
9153
9221
|
assert(params->ith == 0);
|
9154
9222
|
assert(ggml_are_same_shape(src0, dst));
|
9155
9223
|
|
9156
|
-
if (params->type ==
|
9224
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
9157
9225
|
return;
|
9158
9226
|
}
|
9159
9227
|
|
@@ -9199,7 +9267,7 @@ static void ggml_compute_forward_neg_f32(
|
|
9199
9267
|
assert(params->ith == 0);
|
9200
9268
|
assert(ggml_are_same_shape(src0, dst));
|
9201
9269
|
|
9202
|
-
if (params->type ==
|
9270
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
9203
9271
|
return;
|
9204
9272
|
}
|
9205
9273
|
|
@@ -9245,7 +9313,7 @@ static void ggml_compute_forward_step_f32(
|
|
9245
9313
|
assert(params->ith == 0);
|
9246
9314
|
assert(ggml_are_same_shape(src0, dst));
|
9247
9315
|
|
9248
|
-
if (params->type ==
|
9316
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
9249
9317
|
return;
|
9250
9318
|
}
|
9251
9319
|
|
@@ -9291,7 +9359,7 @@ static void ggml_compute_forward_tanh_f32(
|
|
9291
9359
|
assert(params->ith == 0);
|
9292
9360
|
assert(ggml_are_same_shape(src0, dst));
|
9293
9361
|
|
9294
|
-
if (params->type ==
|
9362
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
9295
9363
|
return;
|
9296
9364
|
}
|
9297
9365
|
|
@@ -9337,7 +9405,7 @@ static void ggml_compute_forward_elu_f32(
|
|
9337
9405
|
assert(params->ith == 0);
|
9338
9406
|
assert(ggml_are_same_shape(src0, dst));
|
9339
9407
|
|
9340
|
-
if (params->type ==
|
9408
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
9341
9409
|
return;
|
9342
9410
|
}
|
9343
9411
|
|
@@ -9383,7 +9451,7 @@ static void ggml_compute_forward_relu_f32(
|
|
9383
9451
|
assert(params->ith == 0);
|
9384
9452
|
assert(ggml_are_same_shape(src0, dst));
|
9385
9453
|
|
9386
|
-
if (params->type ==
|
9454
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
9387
9455
|
return;
|
9388
9456
|
}
|
9389
9457
|
|
@@ -9430,7 +9498,7 @@ static void ggml_compute_forward_gelu_f32(
|
|
9430
9498
|
GGML_ASSERT(ggml_is_contiguous_except_dim_1(dst));
|
9431
9499
|
GGML_ASSERT(ggml_are_same_shape(src0, dst));
|
9432
9500
|
|
9433
|
-
if (params->type ==
|
9501
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
9434
9502
|
return;
|
9435
9503
|
}
|
9436
9504
|
|
@@ -9493,7 +9561,7 @@ static void ggml_compute_forward_gelu_quick_f32(
|
|
9493
9561
|
GGML_ASSERT(ggml_is_contiguous_except_dim_1(dst));
|
9494
9562
|
GGML_ASSERT(ggml_are_same_shape(src0, dst));
|
9495
9563
|
|
9496
|
-
if (params->type ==
|
9564
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
9497
9565
|
return;
|
9498
9566
|
}
|
9499
9567
|
|
@@ -9556,7 +9624,7 @@ static void ggml_compute_forward_silu_f32(
|
|
9556
9624
|
GGML_ASSERT(ggml_is_contiguous_except_dim_1(dst));
|
9557
9625
|
GGML_ASSERT(ggml_are_same_shape(src0, dst));
|
9558
9626
|
|
9559
|
-
if (params->type ==
|
9627
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
9560
9628
|
return;
|
9561
9629
|
}
|
9562
9630
|
|
@@ -9617,7 +9685,7 @@ static void ggml_compute_forward_leaky_relu_f32(
|
|
9617
9685
|
assert(params->ith == 0);
|
9618
9686
|
assert(ggml_are_same_shape(src0, dst));
|
9619
9687
|
|
9620
|
-
if (params->type ==
|
9688
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
9621
9689
|
return;
|
9622
9690
|
}
|
9623
9691
|
|
@@ -9670,7 +9738,7 @@ static void ggml_compute_forward_silu_back_f32(
|
|
9670
9738
|
GGML_ASSERT(ggml_are_same_shape(src0, dst));
|
9671
9739
|
GGML_ASSERT(ggml_are_same_shape(src0, grad));
|
9672
9740
|
|
9673
|
-
if (params->type ==
|
9741
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
9674
9742
|
return;
|
9675
9743
|
}
|
9676
9744
|
|
@@ -9732,7 +9800,7 @@ static void ggml_compute_forward_hardswish_f32(
|
|
9732
9800
|
assert(params->ith == 0);
|
9733
9801
|
assert(ggml_are_same_shape(src0, dst));
|
9734
9802
|
|
9735
|
-
if (params->type ==
|
9803
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
9736
9804
|
return;
|
9737
9805
|
}
|
9738
9806
|
|
@@ -9775,7 +9843,7 @@ static void ggml_compute_forward_hardsigmoid_f32(
|
|
9775
9843
|
assert(params->ith == 0);
|
9776
9844
|
assert(ggml_are_same_shape(src0, dst));
|
9777
9845
|
|
9778
|
-
if (params->type ==
|
9846
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
9779
9847
|
return;
|
9780
9848
|
}
|
9781
9849
|
|
@@ -9821,7 +9889,7 @@ static void ggml_compute_forward_norm_f32(
|
|
9821
9889
|
|
9822
9890
|
GGML_ASSERT(ggml_are_same_shape(src0, dst));
|
9823
9891
|
|
9824
|
-
if (params->type ==
|
9892
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
9825
9893
|
return;
|
9826
9894
|
}
|
9827
9895
|
|
@@ -9896,7 +9964,7 @@ static void ggml_compute_forward_rms_norm_f32(
|
|
9896
9964
|
|
9897
9965
|
GGML_ASSERT(ggml_are_same_shape(src0, dst));
|
9898
9966
|
|
9899
|
-
if (params->type ==
|
9967
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
9900
9968
|
return;
|
9901
9969
|
}
|
9902
9970
|
|
@@ -9967,7 +10035,7 @@ static void ggml_compute_forward_rms_norm_back_f32(
|
|
9967
10035
|
|
9968
10036
|
GGML_ASSERT(ggml_are_same_shape(src0, dst) && ggml_are_same_shape(src0, src1));
|
9969
10037
|
|
9970
|
-
if (params->type ==
|
10038
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
9971
10039
|
return;
|
9972
10040
|
}
|
9973
10041
|
|
@@ -10145,7 +10213,7 @@ static void ggml_compute_forward_group_norm_f32(
|
|
10145
10213
|
|
10146
10214
|
GGML_ASSERT(ggml_are_same_shape(src0, dst));
|
10147
10215
|
|
10148
|
-
if (params->type ==
|
10216
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
10149
10217
|
return;
|
10150
10218
|
}
|
10151
10219
|
|
@@ -10312,7 +10380,7 @@ static void ggml_compute_forward_mul_mat(
|
|
10312
10380
|
|
10313
10381
|
#if defined(GGML_USE_CLBLAST)
|
10314
10382
|
if (ggml_cl_can_mul_mat(src0, src1, dst)) {
|
10315
|
-
if (params->ith == 0 && params->type ==
|
10383
|
+
if (params->ith == 0 && params->type == GGML_TASK_TYPE_COMPUTE) {
|
10316
10384
|
ggml_cl_mul_mat(src0, src1, dst, params->wdata, params->wsize);
|
10317
10385
|
}
|
10318
10386
|
return;
|
@@ -10325,7 +10393,7 @@ static void ggml_compute_forward_mul_mat(
|
|
10325
10393
|
const size_t desired_wsize = ne13*ne12*ne_plane*sizeof(float);
|
10326
10394
|
UNUSED(desired_wsize);
|
10327
10395
|
|
10328
|
-
if (params->type ==
|
10396
|
+
if (params->type == GGML_TASK_TYPE_INIT) {
|
10329
10397
|
if (type != GGML_TYPE_F32) {
|
10330
10398
|
assert(params->wsize >= desired_wsize);
|
10331
10399
|
// parallelize by src0 rows
|
@@ -10348,7 +10416,7 @@ static void ggml_compute_forward_mul_mat(
|
|
10348
10416
|
return;
|
10349
10417
|
}
|
10350
10418
|
|
10351
|
-
if (params->type ==
|
10419
|
+
if (params->type == GGML_TASK_TYPE_FINALIZE) {
|
10352
10420
|
return;
|
10353
10421
|
}
|
10354
10422
|
|
@@ -10386,7 +10454,7 @@ static void ggml_compute_forward_mul_mat(
|
|
10386
10454
|
}
|
10387
10455
|
#endif
|
10388
10456
|
|
10389
|
-
if (params->type ==
|
10457
|
+
if (params->type == GGML_TASK_TYPE_INIT) {
|
10390
10458
|
if (ith != 0) {
|
10391
10459
|
return;
|
10392
10460
|
}
|
@@ -10410,7 +10478,7 @@ static void ggml_compute_forward_mul_mat(
|
|
10410
10478
|
return;
|
10411
10479
|
}
|
10412
10480
|
|
10413
|
-
if (params->type ==
|
10481
|
+
if (params->type == GGML_TASK_TYPE_FINALIZE) {
|
10414
10482
|
return;
|
10415
10483
|
}
|
10416
10484
|
|
@@ -10567,7 +10635,7 @@ static void ggml_compute_forward_mul_mat_id(
|
|
10567
10635
|
|
10568
10636
|
#define MMID_MATRIX_ROW(row_id, i1) matrix_rows[(row_id)*ne11 + (i1)]
|
10569
10637
|
|
10570
|
-
if (params->type ==
|
10638
|
+
if (params->type == GGML_TASK_TYPE_INIT) {
|
10571
10639
|
if (ith != 0) {
|
10572
10640
|
return;
|
10573
10641
|
}
|
@@ -10604,7 +10672,7 @@ static void ggml_compute_forward_mul_mat_id(
|
|
10604
10672
|
return;
|
10605
10673
|
}
|
10606
10674
|
|
10607
|
-
if (params->type ==
|
10675
|
+
if (params->type == GGML_TASK_TYPE_FINALIZE) {
|
10608
10676
|
return;
|
10609
10677
|
}
|
10610
10678
|
|
@@ -10752,7 +10820,7 @@ static void ggml_compute_forward_out_prod_f32(
|
|
10752
10820
|
(ggml_is_contiguous(src1) || ggml_is_transposed(src1));
|
10753
10821
|
#endif
|
10754
10822
|
|
10755
|
-
if (params->type ==
|
10823
|
+
if (params->type == GGML_TASK_TYPE_INIT) {
|
10756
10824
|
#if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS) // gemm beta will zero dst
|
10757
10825
|
if (use_blas) {
|
10758
10826
|
return;
|
@@ -10765,7 +10833,7 @@ static void ggml_compute_forward_out_prod_f32(
|
|
10765
10833
|
return;
|
10766
10834
|
}
|
10767
10835
|
|
10768
|
-
if (params->type ==
|
10836
|
+
if (params->type == GGML_TASK_TYPE_FINALIZE) {
|
10769
10837
|
return;
|
10770
10838
|
}
|
10771
10839
|
|
@@ -10945,7 +11013,7 @@ static void ggml_compute_forward_out_prod_q_f32(
|
|
10945
11013
|
// TODO: #if defined(GGML_USE_CUBLAS) ggml_cuda_out_prod
|
10946
11014
|
// TODO: #if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS) || defined(GGML_USE_CLBLAST)
|
10947
11015
|
|
10948
|
-
if (params->type ==
|
11016
|
+
if (params->type == GGML_TASK_TYPE_INIT) {
|
10949
11017
|
if (ith != 0) {
|
10950
11018
|
return;
|
10951
11019
|
}
|
@@ -10953,7 +11021,7 @@ static void ggml_compute_forward_out_prod_q_f32(
|
|
10953
11021
|
return;
|
10954
11022
|
}
|
10955
11023
|
|
10956
|
-
if (params->type ==
|
11024
|
+
if (params->type == GGML_TASK_TYPE_FINALIZE) {
|
10957
11025
|
return;
|
10958
11026
|
}
|
10959
11027
|
|
@@ -11039,6 +11107,9 @@ static void ggml_compute_forward_out_prod(
|
|
11039
11107
|
case GGML_TYPE_IQ3_XXS:
|
11040
11108
|
case GGML_TYPE_IQ1_S:
|
11041
11109
|
case GGML_TYPE_IQ4_NL:
|
11110
|
+
case GGML_TYPE_IQ4_XS:
|
11111
|
+
case GGML_TYPE_IQ3_S:
|
11112
|
+
case GGML_TYPE_IQ2_S:
|
11042
11113
|
{
|
11043
11114
|
ggml_compute_forward_out_prod_q_f32(params, dst);
|
11044
11115
|
} break;
|
@@ -11070,7 +11141,7 @@ static void ggml_compute_forward_scale_f32(
|
|
11070
11141
|
GGML_ASSERT(ggml_is_contiguous(dst));
|
11071
11142
|
GGML_ASSERT(ggml_are_same_shape(src0, dst));
|
11072
11143
|
|
11073
|
-
if (params->type ==
|
11144
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
11074
11145
|
return;
|
11075
11146
|
}
|
11076
11147
|
|
@@ -11142,7 +11213,7 @@ static void ggml_compute_forward_set_f32(
|
|
11142
11213
|
size_t offset = ((int32_t *) dst->op_params)[3];
|
11143
11214
|
bool inplace = (bool) ((int32_t *) dst->op_params)[4];
|
11144
11215
|
|
11145
|
-
if (!inplace && (params->type ==
|
11216
|
+
if (!inplace && (params->type == GGML_TASK_TYPE_INIT)) {
|
11146
11217
|
if (params->ith != 0) {
|
11147
11218
|
return;
|
11148
11219
|
}
|
@@ -11154,7 +11225,7 @@ static void ggml_compute_forward_set_f32(
|
|
11154
11225
|
ggml_nbytes(dst));
|
11155
11226
|
}
|
11156
11227
|
|
11157
|
-
if (params->type ==
|
11228
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
11158
11229
|
return;
|
11159
11230
|
}
|
11160
11231
|
|
@@ -11227,6 +11298,9 @@ static void ggml_compute_forward_set(
|
|
11227
11298
|
case GGML_TYPE_IQ3_XXS:
|
11228
11299
|
case GGML_TYPE_IQ1_S:
|
11229
11300
|
case GGML_TYPE_IQ4_NL:
|
11301
|
+
case GGML_TYPE_IQ4_XS:
|
11302
|
+
case GGML_TYPE_IQ3_S:
|
11303
|
+
case GGML_TYPE_IQ2_S:
|
11230
11304
|
default:
|
11231
11305
|
{
|
11232
11306
|
GGML_ASSERT(false);
|
@@ -11301,7 +11375,7 @@ static void ggml_compute_forward_get_rows_q(
|
|
11301
11375
|
|
11302
11376
|
assert(params->ith == 0);
|
11303
11377
|
|
11304
|
-
if (params->type ==
|
11378
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
11305
11379
|
return;
|
11306
11380
|
}
|
11307
11381
|
|
@@ -11341,7 +11415,7 @@ static void ggml_compute_forward_get_rows_f16(
|
|
11341
11415
|
|
11342
11416
|
assert(params->ith == 0);
|
11343
11417
|
|
11344
|
-
if (params->type ==
|
11418
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
11345
11419
|
return;
|
11346
11420
|
}
|
11347
11421
|
|
@@ -11378,7 +11452,7 @@ static void ggml_compute_forward_get_rows_f32(
|
|
11378
11452
|
|
11379
11453
|
assert(params->ith == 0);
|
11380
11454
|
|
11381
|
-
if (params->type ==
|
11455
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
11382
11456
|
return;
|
11383
11457
|
}
|
11384
11458
|
|
@@ -11429,6 +11503,9 @@ static void ggml_compute_forward_get_rows(
|
|
11429
11503
|
case GGML_TYPE_IQ3_XXS:
|
11430
11504
|
case GGML_TYPE_IQ1_S:
|
11431
11505
|
case GGML_TYPE_IQ4_NL:
|
11506
|
+
case GGML_TYPE_IQ4_XS:
|
11507
|
+
case GGML_TYPE_IQ3_S:
|
11508
|
+
case GGML_TYPE_IQ2_S:
|
11432
11509
|
{
|
11433
11510
|
ggml_compute_forward_get_rows_q(params, dst);
|
11434
11511
|
} break;
|
@@ -11480,14 +11557,14 @@ static void ggml_compute_forward_get_rows_back_f32_f16(
|
|
11480
11557
|
|
11481
11558
|
// ggml_compute_forward_dup_same_cont(params, opt0, dst);
|
11482
11559
|
|
11483
|
-
if (params->type ==
|
11560
|
+
if (params->type == GGML_TASK_TYPE_INIT) {
|
11484
11561
|
if (params->ith != 0) {
|
11485
11562
|
return;
|
11486
11563
|
}
|
11487
11564
|
memset(dst->data, 0, ggml_nbytes(dst));
|
11488
11565
|
}
|
11489
11566
|
|
11490
|
-
if (params->type ==
|
11567
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
11491
11568
|
return;
|
11492
11569
|
}
|
11493
11570
|
|
@@ -11519,14 +11596,14 @@ static void ggml_compute_forward_get_rows_back_f32(
|
|
11519
11596
|
|
11520
11597
|
// ggml_compute_forward_dup_same_cont(params, opt0, dst);
|
11521
11598
|
|
11522
|
-
if (params->type ==
|
11599
|
+
if (params->type == GGML_TASK_TYPE_INIT) {
|
11523
11600
|
if (params->ith != 0) {
|
11524
11601
|
return;
|
11525
11602
|
}
|
11526
11603
|
memset(dst->data, 0, ggml_nbytes(dst));
|
11527
11604
|
}
|
11528
11605
|
|
11529
|
-
if (params->type ==
|
11606
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
11530
11607
|
return;
|
11531
11608
|
}
|
11532
11609
|
|
@@ -11596,7 +11673,7 @@ static void ggml_compute_forward_diag_f32(
|
|
11596
11673
|
|
11597
11674
|
GGML_ASSERT(params->ith == 0);
|
11598
11675
|
|
11599
|
-
if (params->type ==
|
11676
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
11600
11677
|
return;
|
11601
11678
|
}
|
11602
11679
|
|
@@ -11665,7 +11742,7 @@ static void ggml_compute_forward_diag_mask_f32(
|
|
11665
11742
|
|
11666
11743
|
GGML_ASSERT(n_past >= 0);
|
11667
11744
|
|
11668
|
-
if (!inplace && (params->type ==
|
11745
|
+
if (!inplace && (params->type == GGML_TASK_TYPE_INIT)) {
|
11669
11746
|
if (ith != 0) {
|
11670
11747
|
return;
|
11671
11748
|
}
|
@@ -11679,7 +11756,7 @@ static void ggml_compute_forward_diag_mask_f32(
|
|
11679
11756
|
ggml_nbytes(dst));
|
11680
11757
|
}
|
11681
11758
|
|
11682
|
-
if (params->type ==
|
11759
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
11683
11760
|
return;
|
11684
11761
|
}
|
11685
11762
|
|
@@ -11753,7 +11830,7 @@ static void ggml_compute_forward_soft_max_f32(
|
|
11753
11830
|
assert(ggml_is_contiguous(dst));
|
11754
11831
|
assert(ggml_are_same_shape(src0, dst));
|
11755
11832
|
|
11756
|
-
if (params->type ==
|
11833
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
11757
11834
|
return;
|
11758
11835
|
}
|
11759
11836
|
|
@@ -11891,7 +11968,7 @@ static void ggml_compute_forward_soft_max_back_f32(
|
|
11891
11968
|
GGML_ASSERT(ggml_are_same_shape(src0, dst));
|
11892
11969
|
GGML_ASSERT(ggml_are_same_shape(src1, dst));
|
11893
11970
|
|
11894
|
-
if (params->type ==
|
11971
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
11895
11972
|
return;
|
11896
11973
|
}
|
11897
11974
|
|
@@ -11985,7 +12062,7 @@ static void ggml_compute_forward_alibi_f32(
|
|
11985
12062
|
|
11986
12063
|
assert(params->ith == 0);
|
11987
12064
|
|
11988
|
-
if (params->type ==
|
12065
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
11989
12066
|
return;
|
11990
12067
|
}
|
11991
12068
|
|
@@ -12044,7 +12121,7 @@ static void ggml_compute_forward_alibi_f16(
|
|
12044
12121
|
|
12045
12122
|
assert(params->ith == 0);
|
12046
12123
|
|
12047
|
-
if (params->type ==
|
12124
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
12048
12125
|
return;
|
12049
12126
|
}
|
12050
12127
|
|
@@ -12129,6 +12206,9 @@ static void ggml_compute_forward_alibi(
|
|
12129
12206
|
case GGML_TYPE_IQ3_XXS:
|
12130
12207
|
case GGML_TYPE_IQ1_S:
|
12131
12208
|
case GGML_TYPE_IQ4_NL:
|
12209
|
+
case GGML_TYPE_IQ4_XS:
|
12210
|
+
case GGML_TYPE_IQ3_S:
|
12211
|
+
case GGML_TYPE_IQ2_S:
|
12132
12212
|
case GGML_TYPE_Q8_K:
|
12133
12213
|
case GGML_TYPE_I8:
|
12134
12214
|
case GGML_TYPE_I16:
|
@@ -12150,7 +12230,7 @@ static void ggml_compute_forward_clamp_f32(
|
|
12150
12230
|
|
12151
12231
|
assert(params->ith == 0);
|
12152
12232
|
|
12153
|
-
if (params->type ==
|
12233
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
12154
12234
|
return;
|
12155
12235
|
}
|
12156
12236
|
|
@@ -12212,6 +12292,9 @@ static void ggml_compute_forward_clamp(
|
|
12212
12292
|
case GGML_TYPE_IQ3_XXS:
|
12213
12293
|
case GGML_TYPE_IQ1_S:
|
12214
12294
|
case GGML_TYPE_IQ4_NL:
|
12295
|
+
case GGML_TYPE_IQ4_XS:
|
12296
|
+
case GGML_TYPE_IQ3_S:
|
12297
|
+
case GGML_TYPE_IQ2_S:
|
12215
12298
|
case GGML_TYPE_Q8_K:
|
12216
12299
|
case GGML_TYPE_I8:
|
12217
12300
|
case GGML_TYPE_I16:
|
@@ -12289,7 +12372,7 @@ static void ggml_compute_forward_rope_f32(
|
|
12289
12372
|
const struct ggml_tensor * src0 = dst->src[0];
|
12290
12373
|
const struct ggml_tensor * src1 = dst->src[1];
|
12291
12374
|
|
12292
|
-
if (params->type ==
|
12375
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
12293
12376
|
return;
|
12294
12377
|
}
|
12295
12378
|
|
@@ -12467,7 +12550,7 @@ static void ggml_compute_forward_rope_f16(
|
|
12467
12550
|
const struct ggml_tensor * src0 = dst->src[0];
|
12468
12551
|
const struct ggml_tensor * src1 = dst->src[1];
|
12469
12552
|
|
12470
|
-
if (params->type ==
|
12553
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
12471
12554
|
return;
|
12472
12555
|
}
|
12473
12556
|
|
@@ -12698,7 +12781,7 @@ static void ggml_compute_forward_conv_transpose_1d_f16_f32(
|
|
12698
12781
|
GGML_ASSERT(nb00 == sizeof(ggml_fp16_t));
|
12699
12782
|
GGML_ASSERT(nb10 == sizeof(float));
|
12700
12783
|
|
12701
|
-
if (params->type ==
|
12784
|
+
if (params->type == GGML_TASK_TYPE_INIT) {
|
12702
12785
|
if (ith != 0) {
|
12703
12786
|
return;
|
12704
12787
|
}
|
@@ -12738,7 +12821,7 @@ static void ggml_compute_forward_conv_transpose_1d_f16_f32(
|
|
12738
12821
|
return;
|
12739
12822
|
}
|
12740
12823
|
|
12741
|
-
if (params->type ==
|
12824
|
+
if (params->type == GGML_TASK_TYPE_FINALIZE) {
|
12742
12825
|
return;
|
12743
12826
|
}
|
12744
12827
|
|
@@ -12797,7 +12880,7 @@ static void ggml_compute_forward_conv_transpose_1d_f32(
|
|
12797
12880
|
GGML_ASSERT(nb00 == sizeof(float));
|
12798
12881
|
GGML_ASSERT(nb10 == sizeof(float));
|
12799
12882
|
|
12800
|
-
if (params->type ==
|
12883
|
+
if (params->type == GGML_TASK_TYPE_INIT) {
|
12801
12884
|
if (ith != 0) {
|
12802
12885
|
return;
|
12803
12886
|
}
|
@@ -12837,7 +12920,7 @@ static void ggml_compute_forward_conv_transpose_1d_f32(
|
|
12837
12920
|
return;
|
12838
12921
|
}
|
12839
12922
|
|
12840
|
-
if (params->type ==
|
12923
|
+
if (params->type == GGML_TASK_TYPE_FINALIZE) {
|
12841
12924
|
return;
|
12842
12925
|
}
|
12843
12926
|
|
@@ -12941,11 +13024,11 @@ static void ggml_compute_forward_im2col_f32(
|
|
12941
13024
|
GGML_ASSERT(nb00 == sizeof(ggml_fp16_t));
|
12942
13025
|
GGML_ASSERT(nb10 == sizeof(float));
|
12943
13026
|
|
12944
|
-
if (params->type ==
|
13027
|
+
if (params->type == GGML_TASK_TYPE_INIT) {
|
12945
13028
|
return;
|
12946
13029
|
}
|
12947
13030
|
|
12948
|
-
if (params->type ==
|
13031
|
+
if (params->type == GGML_TASK_TYPE_FINALIZE) {
|
12949
13032
|
return;
|
12950
13033
|
}
|
12951
13034
|
|
@@ -13029,11 +13112,11 @@ static void ggml_compute_forward_im2col_f16(
|
|
13029
13112
|
GGML_ASSERT(nb00 == sizeof(ggml_fp16_t));
|
13030
13113
|
GGML_ASSERT(nb10 == sizeof(float));
|
13031
13114
|
|
13032
|
-
if (params->type ==
|
13115
|
+
if (params->type == GGML_TASK_TYPE_INIT) {
|
13033
13116
|
return;
|
13034
13117
|
}
|
13035
13118
|
|
13036
|
-
if (params->type ==
|
13119
|
+
if (params->type == GGML_TASK_TYPE_FINALIZE) {
|
13037
13120
|
return;
|
13038
13121
|
}
|
13039
13122
|
|
@@ -13115,7 +13198,7 @@ static void ggml_compute_forward_conv_transpose_2d(
|
|
13115
13198
|
GGML_ASSERT(nb00 == sizeof(ggml_fp16_t));
|
13116
13199
|
GGML_ASSERT(nb10 == sizeof(float));
|
13117
13200
|
|
13118
|
-
if (params->type ==
|
13201
|
+
if (params->type == GGML_TASK_TYPE_INIT) {
|
13119
13202
|
if (ith != 0) {
|
13120
13203
|
return;
|
13121
13204
|
}
|
@@ -13157,7 +13240,7 @@ static void ggml_compute_forward_conv_transpose_2d(
|
|
13157
13240
|
return;
|
13158
13241
|
}
|
13159
13242
|
|
13160
|
-
if (params->type ==
|
13243
|
+
if (params->type == GGML_TASK_TYPE_FINALIZE) {
|
13161
13244
|
return;
|
13162
13245
|
}
|
13163
13246
|
|
@@ -13209,7 +13292,7 @@ static void ggml_compute_forward_pool_1d_sk_p0(
|
|
13209
13292
|
assert(src->type == GGML_TYPE_F32);
|
13210
13293
|
assert(params->ith == 0);
|
13211
13294
|
|
13212
|
-
if (params->type ==
|
13295
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
13213
13296
|
return;
|
13214
13297
|
}
|
13215
13298
|
|
@@ -13278,7 +13361,7 @@ static void ggml_compute_forward_pool_2d(
|
|
13278
13361
|
GGML_ASSERT(src->type == GGML_TYPE_F32);
|
13279
13362
|
GGML_ASSERT(params->ith == 0);
|
13280
13363
|
|
13281
|
-
if (params->type ==
|
13364
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
13282
13365
|
return;
|
13283
13366
|
}
|
13284
13367
|
|
@@ -13351,7 +13434,7 @@ static void ggml_compute_forward_upscale_f32(
|
|
13351
13434
|
|
13352
13435
|
const struct ggml_tensor * src0 = dst->src[0];
|
13353
13436
|
|
13354
|
-
if (params->type ==
|
13437
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
13355
13438
|
return;
|
13356
13439
|
}
|
13357
13440
|
|
@@ -13411,7 +13494,7 @@ static void ggml_compute_forward_pad_f32(
|
|
13411
13494
|
|
13412
13495
|
const struct ggml_tensor * src0 = dst->src[0];
|
13413
13496
|
|
13414
|
-
if (params->type ==
|
13497
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
13415
13498
|
return;
|
13416
13499
|
}
|
13417
13500
|
|
@@ -13472,7 +13555,7 @@ static void ggml_compute_forward_argsort_f32(
|
|
13472
13555
|
|
13473
13556
|
const struct ggml_tensor * src0 = dst->src[0];
|
13474
13557
|
|
13475
|
-
if (params->type ==
|
13558
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
13476
13559
|
return;
|
13477
13560
|
}
|
13478
13561
|
|
@@ -13498,8 +13581,8 @@ static void ggml_compute_forward_argsort_f32(
|
|
13498
13581
|
// C doesn't have a functional sort, so we do a bubble sort instead
|
13499
13582
|
for (int64_t j = 0; j < ne0; j++) {
|
13500
13583
|
for (int64_t k = j + 1; k < ne0; k++) {
|
13501
|
-
if ((order ==
|
13502
|
-
(order ==
|
13584
|
+
if ((order == GGML_SORT_ORDER_ASC && src_data[dst_data[j]] > src_data[dst_data[k]]) ||
|
13585
|
+
(order == GGML_SORT_ORDER_DESC && src_data[dst_data[j]] < src_data[dst_data[k]])) {
|
13503
13586
|
int32_t tmp = dst_data[j];
|
13504
13587
|
dst_data[j] = dst_data[k];
|
13505
13588
|
dst_data[k] = tmp;
|
@@ -13582,11 +13665,11 @@ static void ggml_compute_forward_flash_attn_f32(
|
|
13582
13665
|
GGML_ASSERT(nb1 <= nb2);
|
13583
13666
|
GGML_ASSERT(nb2 <= nb3);
|
13584
13667
|
|
13585
|
-
if (params->type ==
|
13668
|
+
if (params->type == GGML_TASK_TYPE_INIT) {
|
13586
13669
|
return;
|
13587
13670
|
}
|
13588
13671
|
|
13589
|
-
if (params->type ==
|
13672
|
+
if (params->type == GGML_TASK_TYPE_FINALIZE) {
|
13590
13673
|
return;
|
13591
13674
|
}
|
13592
13675
|
|
@@ -13774,11 +13857,11 @@ static void ggml_compute_forward_flash_attn_f16(
|
|
13774
13857
|
GGML_ASSERT(nb1 <= nb2);
|
13775
13858
|
GGML_ASSERT(nb2 <= nb3);
|
13776
13859
|
|
13777
|
-
if (params->type ==
|
13860
|
+
if (params->type == GGML_TASK_TYPE_INIT) {
|
13778
13861
|
return;
|
13779
13862
|
}
|
13780
13863
|
|
13781
|
-
if (params->type ==
|
13864
|
+
if (params->type == GGML_TASK_TYPE_FINALIZE) {
|
13782
13865
|
return;
|
13783
13866
|
}
|
13784
13867
|
|
@@ -14033,11 +14116,11 @@ static void ggml_compute_forward_flash_ff_f16(
|
|
14033
14116
|
GGML_ASSERT(nb1 <= nb2);
|
14034
14117
|
GGML_ASSERT(nb2 <= nb3);
|
14035
14118
|
|
14036
|
-
if (params->type ==
|
14119
|
+
if (params->type == GGML_TASK_TYPE_INIT) {
|
14037
14120
|
return;
|
14038
14121
|
}
|
14039
14122
|
|
14040
|
-
if (params->type ==
|
14123
|
+
if (params->type == GGML_TASK_TYPE_FINALIZE) {
|
14041
14124
|
return;
|
14042
14125
|
}
|
14043
14126
|
|
@@ -14192,14 +14275,14 @@ static void ggml_compute_forward_flash_attn_back_f32(
|
|
14192
14275
|
GGML_ASSERT(nb1 <= nb2);
|
14193
14276
|
GGML_ASSERT(nb2 <= nb3);
|
14194
14277
|
|
14195
|
-
if (params->type ==
|
14278
|
+
if (params->type == GGML_TASK_TYPE_INIT) {
|
14196
14279
|
if (ith == 0) {
|
14197
14280
|
memset(dst->data, 0, nb0*ne0*ne1*ne2*ne3);
|
14198
14281
|
}
|
14199
14282
|
return;
|
14200
14283
|
}
|
14201
14284
|
|
14202
|
-
if (params->type ==
|
14285
|
+
if (params->type == GGML_TASK_TYPE_FINALIZE) {
|
14203
14286
|
return;
|
14204
14287
|
}
|
14205
14288
|
|
@@ -14515,7 +14598,7 @@ static void ggml_compute_forward_win_part_f32(
|
|
14515
14598
|
|
14516
14599
|
const struct ggml_tensor * src0 = dst->src[0];
|
14517
14600
|
|
14518
|
-
if (params->type ==
|
14601
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
14519
14602
|
return;
|
14520
14603
|
}
|
14521
14604
|
|
@@ -14581,7 +14664,7 @@ static void ggml_compute_forward_win_unpart_f32(
|
|
14581
14664
|
|
14582
14665
|
const struct ggml_tensor * src0 = dst->src[0];
|
14583
14666
|
|
14584
|
-
if (params->type ==
|
14667
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
14585
14668
|
return;
|
14586
14669
|
}
|
14587
14670
|
|
@@ -14709,7 +14792,7 @@ static void ggml_compute_forward_get_rel_pos_f16(
|
|
14709
14792
|
|
14710
14793
|
const struct ggml_tensor * src0 = dst->src[0];
|
14711
14794
|
|
14712
|
-
if (params->type ==
|
14795
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
14713
14796
|
return;
|
14714
14797
|
}
|
14715
14798
|
|
@@ -14761,14 +14844,14 @@ static void ggml_compute_forward_add_rel_pos_f32(
|
|
14761
14844
|
const struct ggml_tensor * src2 = dst->src[2];
|
14762
14845
|
|
14763
14846
|
const bool inplace = (bool) ((int32_t *) dst->op_params)[0];
|
14764
|
-
if (!inplace && params->type ==
|
14847
|
+
if (!inplace && params->type == GGML_TASK_TYPE_INIT) {
|
14765
14848
|
if (params->ith != 0) {
|
14766
14849
|
return;
|
14767
14850
|
}
|
14768
14851
|
memcpy((char *) dst->data, (char *) src0->data, ggml_nbytes(dst));
|
14769
14852
|
return;
|
14770
14853
|
}
|
14771
|
-
if (params->type ==
|
14854
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
14772
14855
|
return;
|
14773
14856
|
}
|
14774
14857
|
|
@@ -14850,7 +14933,7 @@ static void ggml_compute_forward_map_unary_f32(
|
|
14850
14933
|
|
14851
14934
|
GGML_ASSERT(ggml_are_same_shape(src0, dst));
|
14852
14935
|
|
14853
|
-
if (params->type ==
|
14936
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
14854
14937
|
return;
|
14855
14938
|
}
|
14856
14939
|
|
@@ -14899,7 +14982,7 @@ static void ggml_compute_forward_map_binary_f32(
|
|
14899
14982
|
assert(params->ith == 0);
|
14900
14983
|
assert(ggml_are_same_shape(src0, src1) && ggml_are_same_shape(src0, dst));
|
14901
14984
|
|
14902
|
-
if (params->type ==
|
14985
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
14903
14986
|
return;
|
14904
14987
|
}
|
14905
14988
|
|
@@ -14948,7 +15031,7 @@ static void ggml_compute_forward_map_custom1_f32(
|
|
14948
15031
|
|
14949
15032
|
assert(params->ith == 0);
|
14950
15033
|
|
14951
|
-
if (params->type ==
|
15034
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
14952
15035
|
return;
|
14953
15036
|
}
|
14954
15037
|
|
@@ -14967,7 +15050,7 @@ static void ggml_compute_forward_map_custom2_f32(
|
|
14967
15050
|
|
14968
15051
|
assert(params->ith == 0);
|
14969
15052
|
|
14970
|
-
if (params->type ==
|
15053
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
14971
15054
|
return;
|
14972
15055
|
}
|
14973
15056
|
|
@@ -14987,7 +15070,7 @@ static void ggml_compute_forward_map_custom3_f32(
|
|
14987
15070
|
|
14988
15071
|
assert(params->ith == 0);
|
14989
15072
|
|
14990
|
-
if (params->type ==
|
15073
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
14991
15074
|
return;
|
14992
15075
|
}
|
14993
15076
|
|
@@ -15002,13 +15085,14 @@ static void ggml_compute_forward_map_custom1(
|
|
15002
15085
|
|
15003
15086
|
const struct ggml_tensor * a = dst->src[0];
|
15004
15087
|
|
15005
|
-
if (params->type ==
|
15088
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
15006
15089
|
return;
|
15007
15090
|
}
|
15008
15091
|
|
15009
|
-
struct ggml_map_custom1_op_params
|
15092
|
+
struct ggml_map_custom1_op_params p;
|
15093
|
+
memcpy(&p, dst->op_params, sizeof(p));
|
15010
15094
|
|
15011
|
-
p
|
15095
|
+
p.fun(dst, a, params->ith, params->nth, p.userdata);
|
15012
15096
|
}
|
15013
15097
|
|
15014
15098
|
// ggml_compute_forward_map_custom2
|
@@ -15020,13 +15104,14 @@ static void ggml_compute_forward_map_custom2(
|
|
15020
15104
|
const struct ggml_tensor * a = dst->src[0];
|
15021
15105
|
const struct ggml_tensor * b = dst->src[1];
|
15022
15106
|
|
15023
|
-
if (params->type ==
|
15107
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
15024
15108
|
return;
|
15025
15109
|
}
|
15026
15110
|
|
15027
|
-
struct ggml_map_custom2_op_params
|
15111
|
+
struct ggml_map_custom2_op_params p;
|
15112
|
+
memcpy(&p, dst->op_params, sizeof(p));
|
15028
15113
|
|
15029
|
-
p
|
15114
|
+
p.fun(dst, a, b, params->ith, params->nth, p.userdata);
|
15030
15115
|
}
|
15031
15116
|
|
15032
15117
|
// ggml_compute_forward_map_custom3
|
@@ -15039,13 +15124,14 @@ static void ggml_compute_forward_map_custom3(
|
|
15039
15124
|
const struct ggml_tensor * b = dst->src[1];
|
15040
15125
|
const struct ggml_tensor * c = dst->src[2];
|
15041
15126
|
|
15042
|
-
if (params->type ==
|
15127
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
15043
15128
|
return;
|
15044
15129
|
}
|
15045
15130
|
|
15046
|
-
struct ggml_map_custom3_op_params
|
15131
|
+
struct ggml_map_custom3_op_params p;
|
15132
|
+
memcpy(&p, dst->op_params, sizeof(p));
|
15047
15133
|
|
15048
|
-
p
|
15134
|
+
p.fun(dst, a, b, c, params->ith, params->nth, p.userdata);
|
15049
15135
|
}
|
15050
15136
|
|
15051
15137
|
// ggml_compute_forward_cross_entropy_loss
|
@@ -15073,14 +15159,14 @@ static void ggml_compute_forward_cross_entropy_loss_f32(
|
|
15073
15159
|
|
15074
15160
|
GGML_ASSERT(params->wsize >= sizeof(float) * (nth + nth * nc));
|
15075
15161
|
|
15076
|
-
if (params->type ==
|
15162
|
+
if (params->type == GGML_TASK_TYPE_INIT) {
|
15077
15163
|
if (ith == 0) {
|
15078
15164
|
memset(sums, 0, sizeof(float) * (nth + nth * nc));
|
15079
15165
|
}
|
15080
15166
|
return;
|
15081
15167
|
}
|
15082
15168
|
|
15083
|
-
if (params->type ==
|
15169
|
+
if (params->type == GGML_TASK_TYPE_FINALIZE) {
|
15084
15170
|
if (ith == 0) {
|
15085
15171
|
float * dp = (float *) dst->data;
|
15086
15172
|
ggml_vec_sum_f32(nth, dp, sums);
|
@@ -15195,7 +15281,7 @@ static void ggml_compute_forward_cross_entropy_loss_back_f32(
|
|
15195
15281
|
const int64_t ith = params->ith;
|
15196
15282
|
const int64_t nth = params->nth;
|
15197
15283
|
|
15198
|
-
if (params->type ==
|
15284
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
15199
15285
|
return;
|
15200
15286
|
}
|
15201
15287
|
|
@@ -15302,8 +15388,8 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm
|
|
15302
15388
|
if (skip_cpu) {
|
15303
15389
|
return;
|
15304
15390
|
}
|
15305
|
-
GGML_ASSERT(tensor->src[0] == NULL || tensor->src[0]->backend ==
|
15306
|
-
GGML_ASSERT(tensor->src[1] == NULL || tensor->src[1]->backend ==
|
15391
|
+
GGML_ASSERT(tensor->src[0] == NULL || tensor->src[0]->backend == GGML_BACKEND_TYPE_CPU);
|
15392
|
+
GGML_ASSERT(tensor->src[1] == NULL || tensor->src[1]->backend == GGML_BACKEND_TYPE_CPU);
|
15307
15393
|
#elif defined(GGML_USE_VULKAN)
|
15308
15394
|
const bool skip_cpu = ggml_vk_compute_forward_cpu_assist(params, tensor);
|
15309
15395
|
#ifdef GGML_VULKAN_CHECK_RESULTS
|
@@ -15314,8 +15400,8 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm
|
|
15314
15400
|
if (skip_cpu) {
|
15315
15401
|
return;
|
15316
15402
|
}
|
15317
|
-
GGML_ASSERT(tensor->src[0] == NULL || tensor->src[0]->backend ==
|
15318
|
-
GGML_ASSERT(tensor->src[1] == NULL || tensor->src[1]->backend ==
|
15403
|
+
GGML_ASSERT(tensor->src[0] == NULL || tensor->src[0]->backend == GGML_BACKEND_TYPE_CPU);
|
15404
|
+
GGML_ASSERT(tensor->src[1] == NULL || tensor->src[1]->backend == GGML_BACKEND_TYPE_CPU);
|
15319
15405
|
#endif // GGML_USE_CUBLAS
|
15320
15406
|
|
15321
15407
|
#ifdef GGML_USE_SYCL
|
@@ -16861,7 +16947,7 @@ size_t ggml_graph_overhead(void) {
|
|
16861
16947
|
|
16862
16948
|
struct ggml_cgraph * ggml_new_graph_custom(struct ggml_context * ctx, size_t size, bool grads) {
|
16863
16949
|
const size_t obj_size = ggml_graph_nbytes(size, grads);
|
16864
|
-
struct ggml_object * obj = ggml_new_object(ctx,
|
16950
|
+
struct ggml_object * obj = ggml_new_object(ctx, GGML_OBJECT_TYPE_GRAPH, obj_size);
|
16865
16951
|
struct ggml_cgraph * cgraph = (struct ggml_cgraph *) ((char *) ctx->mem_buffer + obj->offs);
|
16866
16952
|
|
16867
16953
|
struct ggml_tensor ** data_start = (struct ggml_tensor **) (cgraph + 1);
|
@@ -17311,29 +17397,32 @@ static int ggml_get_n_tasks(struct ggml_tensor * node, int n_threads) {
|
|
17311
17397
|
} break;
|
17312
17398
|
case GGML_OP_MAP_CUSTOM1:
|
17313
17399
|
{
|
17314
|
-
struct ggml_map_custom1_op_params
|
17315
|
-
|
17400
|
+
struct ggml_map_custom1_op_params p;
|
17401
|
+
memcpy(&p, node->op_params, sizeof(p));
|
17402
|
+
if (p.n_tasks == GGML_N_TASKS_MAX) {
|
17316
17403
|
n_tasks = n_threads;
|
17317
17404
|
} else {
|
17318
|
-
n_tasks = MIN(p
|
17405
|
+
n_tasks = MIN(p.n_tasks, n_threads);
|
17319
17406
|
}
|
17320
17407
|
} break;
|
17321
17408
|
case GGML_OP_MAP_CUSTOM2:
|
17322
17409
|
{
|
17323
|
-
struct ggml_map_custom2_op_params
|
17324
|
-
|
17410
|
+
struct ggml_map_custom2_op_params p;
|
17411
|
+
memcpy(&p, node->op_params, sizeof(p));
|
17412
|
+
if (p.n_tasks == GGML_N_TASKS_MAX) {
|
17325
17413
|
n_tasks = n_threads;
|
17326
17414
|
} else {
|
17327
|
-
n_tasks = MIN(p
|
17415
|
+
n_tasks = MIN(p.n_tasks, n_threads);
|
17328
17416
|
}
|
17329
17417
|
} break;
|
17330
17418
|
case GGML_OP_MAP_CUSTOM3:
|
17331
17419
|
{
|
17332
|
-
struct ggml_map_custom3_op_params
|
17333
|
-
|
17420
|
+
struct ggml_map_custom3_op_params p;
|
17421
|
+
memcpy(&p, node->op_params, sizeof(p));
|
17422
|
+
if (p.n_tasks == GGML_N_TASKS_MAX) {
|
17334
17423
|
n_tasks = n_threads;
|
17335
17424
|
} else {
|
17336
|
-
n_tasks = MIN(p
|
17425
|
+
n_tasks = MIN(p.n_tasks, n_threads);
|
17337
17426
|
}
|
17338
17427
|
} break;
|
17339
17428
|
case GGML_OP_CROSS_ENTROPY_LOSS:
|
@@ -17408,7 +17497,7 @@ static thread_ret_t ggml_graph_compute_thread(void * data) {
|
|
17408
17497
|
set_numa_thread_affinity(state->ith);
|
17409
17498
|
|
17410
17499
|
int node_n = -1;
|
17411
|
-
int task_phase =
|
17500
|
+
int task_phase = GGML_TASK_TYPE_FINALIZE;
|
17412
17501
|
|
17413
17502
|
while (true) {
|
17414
17503
|
if (cplan->abort_callback && cplan->abort_callback(cplan->abort_callback_data)) {
|
@@ -17420,7 +17509,7 @@ static thread_ret_t ggml_graph_compute_thread(void * data) {
|
|
17420
17509
|
// all other threads are finished and spinning
|
17421
17510
|
// do finalize and init here so we don't have synchronize again
|
17422
17511
|
struct ggml_compute_params params = {
|
17423
|
-
/*.type =*/
|
17512
|
+
/*.type =*/ GGML_TASK_TYPE_FINALIZE,
|
17424
17513
|
/*.ith =*/ 0,
|
17425
17514
|
/*.nth =*/ 0,
|
17426
17515
|
/*.wsize =*/ cplan->work_size,
|
@@ -17451,17 +17540,17 @@ static thread_ret_t ggml_graph_compute_thread(void * data) {
|
|
17451
17540
|
if (n_tasks == 1) {
|
17452
17541
|
/* INIT */
|
17453
17542
|
if (GGML_OP_HAS_INIT[node->op]) {
|
17454
|
-
params.type =
|
17543
|
+
params.type = GGML_TASK_TYPE_INIT;
|
17455
17544
|
ggml_compute_forward(¶ms, node);
|
17456
17545
|
}
|
17457
17546
|
|
17458
17547
|
// TODO: maybe push node_n to the atomic but if other threads see n_tasks is 1,
|
17459
17548
|
// they do something more efficient than spinning (?)
|
17460
|
-
params.type =
|
17549
|
+
params.type = GGML_TASK_TYPE_COMPUTE;
|
17461
17550
|
ggml_compute_forward(¶ms, node);
|
17462
17551
|
|
17463
17552
|
if (GGML_OP_HAS_FINALIZE[node->op]) {
|
17464
|
-
params.type =
|
17553
|
+
params.type = GGML_TASK_TYPE_FINALIZE;
|
17465
17554
|
ggml_compute_forward(¶ms, node);
|
17466
17555
|
}
|
17467
17556
|
|
@@ -17475,7 +17564,7 @@ static thread_ret_t ggml_graph_compute_thread(void * data) {
|
|
17475
17564
|
}
|
17476
17565
|
}
|
17477
17566
|
|
17478
|
-
task_phase =
|
17567
|
+
task_phase = GGML_TASK_TYPE_INIT;
|
17479
17568
|
atomic_store(&state->shared->n_active, n_threads);
|
17480
17569
|
atomic_store(&state->shared->node_n, node_n);
|
17481
17570
|
atomic_store(&state->shared->node_task, task_phase);
|
@@ -17492,7 +17581,7 @@ static thread_ret_t ggml_graph_compute_thread(void * data) {
|
|
17492
17581
|
const int n_tasks = ggml_get_n_tasks(node, n_threads);
|
17493
17582
|
|
17494
17583
|
struct ggml_compute_params params = {
|
17495
|
-
/*.type =*/
|
17584
|
+
/*.type =*/ GGML_TASK_TYPE_INIT,
|
17496
17585
|
/*.ith =*/ state->ith,
|
17497
17586
|
/*.nth =*/ n_tasks,
|
17498
17587
|
/*.wsize =*/ cplan->work_size,
|
@@ -17506,7 +17595,7 @@ static thread_ret_t ggml_graph_compute_thread(void * data) {
|
|
17506
17595
|
}
|
17507
17596
|
|
17508
17597
|
if (atomic_fetch_sub(&state->shared->n_active, 1) == 1) {
|
17509
|
-
task_phase =
|
17598
|
+
task_phase = GGML_TASK_TYPE_COMPUTE;
|
17510
17599
|
atomic_store(&state->shared->n_active, n_threads);
|
17511
17600
|
atomic_store(&state->shared->node_task, task_phase);
|
17512
17601
|
}
|
@@ -17521,12 +17610,12 @@ static thread_ret_t ggml_graph_compute_thread(void * data) {
|
|
17521
17610
|
}
|
17522
17611
|
|
17523
17612
|
if (state->ith < n_tasks) {
|
17524
|
-
params.type =
|
17613
|
+
params.type = GGML_TASK_TYPE_COMPUTE;
|
17525
17614
|
ggml_compute_forward(¶ms, node);
|
17526
17615
|
}
|
17527
17616
|
|
17528
17617
|
if (atomic_fetch_sub(&state->shared->n_active, 1) == 1) {
|
17529
|
-
task_phase =
|
17618
|
+
task_phase = GGML_TASK_TYPE_FINALIZE;
|
17530
17619
|
atomic_store(&state->shared->n_active, n_threads);
|
17531
17620
|
atomic_store(&state->shared->node_task, task_phase);
|
17532
17621
|
}
|
@@ -17762,7 +17851,7 @@ int ggml_graph_compute(struct ggml_cgraph * cgraph, struct ggml_cplan * cplan) {
|
|
17762
17851
|
/*.n_threads =*/ n_threads,
|
17763
17852
|
/*.n_active =*/ n_threads,
|
17764
17853
|
/*.node_n =*/ -1,
|
17765
|
-
/*.node_task =*/
|
17854
|
+
/*.node_task =*/ GGML_TASK_TYPE_FINALIZE,
|
17766
17855
|
/*.abort_callback =*/ NULL,
|
17767
17856
|
/*.abort_callback_data =*/ NULL,
|
17768
17857
|
};
|
@@ -17830,7 +17919,7 @@ int ggml_graph_compute(struct ggml_cgraph * cgraph, struct ggml_cplan * cplan) {
|
|
17830
17919
|
void ggml_graph_compute_with_ctx(struct ggml_context * ctx, struct ggml_cgraph * cgraph, int n_threads) {
|
17831
17920
|
struct ggml_cplan cplan = ggml_graph_plan(cgraph, n_threads);
|
17832
17921
|
|
17833
|
-
struct ggml_object * obj = ggml_new_object(ctx,
|
17922
|
+
struct ggml_object * obj = ggml_new_object(ctx, GGML_OBJECT_TYPE_WORK_BUFFER, cplan.work_size);
|
17834
17923
|
|
17835
17924
|
cplan.work_data = (uint8_t *)ctx->mem_buffer + obj->offs;
|
17836
17925
|
|
@@ -18638,7 +18727,7 @@ static enum ggml_opt_result ggml_opt_adam(
|
|
18638
18727
|
float * pf = params.past > 0 ? opt->adam.pf->data : NULL; // past function values
|
18639
18728
|
|
18640
18729
|
struct ggml_cplan cplan = ggml_graph_plan(gb, params.n_threads);
|
18641
|
-
struct ggml_object * obj = ggml_new_object(ctx,
|
18730
|
+
struct ggml_object * obj = ggml_new_object(ctx, GGML_OBJECT_TYPE_WORK_BUFFER, cplan.work_size);
|
18642
18731
|
cplan.work_data = (uint8_t *)ctx->mem_buffer + obj->offs;
|
18643
18732
|
|
18644
18733
|
bool cancel = false;
|
@@ -18650,7 +18739,7 @@ static enum ggml_opt_result ggml_opt_adam(
|
|
18650
18739
|
if (callback) {
|
18651
18740
|
callback(callback_data, accum_step, &sched, &cancel);
|
18652
18741
|
if (cancel) {
|
18653
|
-
return
|
18742
|
+
return GGML_OPT_RESULT_CANCEL;
|
18654
18743
|
}
|
18655
18744
|
}
|
18656
18745
|
// ggml_graph_reset (gf);
|
@@ -18741,7 +18830,7 @@ static enum ggml_opt_result ggml_opt_adam(
|
|
18741
18830
|
if (callback) {
|
18742
18831
|
callback(callback_data, accum_step, &sched, &cancel);
|
18743
18832
|
if (cancel) {
|
18744
|
-
return
|
18833
|
+
return GGML_OPT_RESULT_CANCEL;;
|
18745
18834
|
}
|
18746
18835
|
}
|
18747
18836
|
// ggml_graph_reset (gf);
|
@@ -18758,7 +18847,7 @@ static enum ggml_opt_result ggml_opt_adam(
|
|
18758
18847
|
if (fabsf(fx - fx_prev[0])/fx < params.adam.eps_f) {
|
18759
18848
|
GGML_PRINT_DEBUG("converged\n");
|
18760
18849
|
|
18761
|
-
return
|
18850
|
+
return GGML_OPT_RESULT_OK;
|
18762
18851
|
}
|
18763
18852
|
|
18764
18853
|
// delta-based convergence test
|
@@ -18768,7 +18857,7 @@ static enum ggml_opt_result ggml_opt_adam(
|
|
18768
18857
|
const float rate = (pf[(iter0 + t)%params.past] - fx)/fx;
|
18769
18858
|
|
18770
18859
|
if (fabsf(rate) < params.delta) {
|
18771
|
-
return
|
18860
|
+
return GGML_OPT_RESULT_OK;
|
18772
18861
|
}
|
18773
18862
|
}
|
18774
18863
|
|
@@ -18784,7 +18873,7 @@ static enum ggml_opt_result ggml_opt_adam(
|
|
18784
18873
|
++n_no_improvement[0];
|
18785
18874
|
|
18786
18875
|
if (n_no_improvement[0] >= params.max_no_improvement) {
|
18787
|
-
return
|
18876
|
+
return GGML_OPT_RESULT_OK;
|
18788
18877
|
}
|
18789
18878
|
}
|
18790
18879
|
}
|
@@ -18802,7 +18891,7 @@ static enum ggml_opt_result ggml_opt_adam(
|
|
18802
18891
|
}
|
18803
18892
|
}
|
18804
18893
|
|
18805
|
-
return
|
18894
|
+
return GGML_OPT_RESULT_DID_NOT_CONVERGE;
|
18806
18895
|
}
|
18807
18896
|
|
18808
18897
|
//
|
@@ -18883,7 +18972,7 @@ static enum ggml_opt_result linesearch_backtracking(
|
|
18883
18972
|
float sched = 0;
|
18884
18973
|
callback(callback_data, accum_step, &sched, cancel);
|
18885
18974
|
if (*cancel) {
|
18886
|
-
return
|
18975
|
+
return GGML_OPT_RESULT_CANCEL;
|
18887
18976
|
}
|
18888
18977
|
}
|
18889
18978
|
// ggml_graph_reset (gf);
|
@@ -18956,7 +19045,7 @@ static enum ggml_opt_result ggml_opt_lbfgs(
|
|
18956
19045
|
if (params.lbfgs.linesearch == GGML_LINESEARCH_BACKTRACKING_WOLFE ||
|
18957
19046
|
params.lbfgs.linesearch == GGML_LINESEARCH_BACKTRACKING_STRONG_WOLFE) {
|
18958
19047
|
if (params.lbfgs.wolfe <= params.lbfgs.ftol || 1.f <= params.lbfgs.wolfe) {
|
18959
|
-
return
|
19048
|
+
return GGML_OPT_RESULT_INVALID_WOLFE;
|
18960
19049
|
}
|
18961
19050
|
}
|
18962
19051
|
|
@@ -18985,7 +19074,7 @@ static enum ggml_opt_result ggml_opt_lbfgs(
|
|
18985
19074
|
}
|
18986
19075
|
|
18987
19076
|
struct ggml_cplan cplan = ggml_graph_plan(gb, params.n_threads);
|
18988
|
-
struct ggml_object * obj = ggml_new_object(ctx,
|
19077
|
+
struct ggml_object * obj = ggml_new_object(ctx, GGML_OBJECT_TYPE_WORK_BUFFER, cplan.work_size);
|
18989
19078
|
cplan.work_data = (uint8_t *)ctx->mem_buffer + obj->offs;
|
18990
19079
|
|
18991
19080
|
float * x = opt->lbfgs.x->data; // current parameters
|
@@ -19026,7 +19115,7 @@ static enum ggml_opt_result ggml_opt_lbfgs(
|
|
19026
19115
|
float sched = 0;
|
19027
19116
|
callback(callback_data, accum_step, &sched, &cancel);
|
19028
19117
|
if (cancel) {
|
19029
|
-
return
|
19118
|
+
return GGML_OPT_RESULT_CANCEL;
|
19030
19119
|
}
|
19031
19120
|
}
|
19032
19121
|
// ggml_graph_reset (gf);
|
@@ -19054,7 +19143,7 @@ static enum ggml_opt_result ggml_opt_lbfgs(
|
|
19054
19143
|
|
19055
19144
|
// already optimized
|
19056
19145
|
if (gnorm/xnorm <= params.lbfgs.eps) {
|
19057
|
-
return
|
19146
|
+
return GGML_OPT_RESULT_OK;
|
19058
19147
|
}
|
19059
19148
|
|
19060
19149
|
if (opt->just_initialized) {
|
@@ -19099,7 +19188,7 @@ static enum ggml_opt_result ggml_opt_lbfgs(
|
|
19099
19188
|
// way to test and don't want to break something with so many changes lined up
|
19100
19189
|
ls = linesearch_backtracking(¶ms, nx, x, &fx, g, d, step, xp, f, gb, &cplan, np, ps, &cancel, callback, callback_data);
|
19101
19190
|
if (cancel) {
|
19102
|
-
return
|
19191
|
+
return GGML_OPT_RESULT_CANCEL;
|
19103
19192
|
}
|
19104
19193
|
|
19105
19194
|
if (ls < 0) {
|
@@ -19122,7 +19211,7 @@ static enum ggml_opt_result ggml_opt_lbfgs(
|
|
19122
19211
|
}
|
19123
19212
|
if (gnorm/xnorm <= params.lbfgs.eps) {
|
19124
19213
|
// converged
|
19125
|
-
return
|
19214
|
+
return GGML_OPT_RESULT_OK;
|
19126
19215
|
}
|
19127
19216
|
|
19128
19217
|
// delta-based convergence test
|
@@ -19132,7 +19221,7 @@ static enum ggml_opt_result ggml_opt_lbfgs(
|
|
19132
19221
|
const float rate = (pf[k[0]%params.past] - fx)/fx;
|
19133
19222
|
|
19134
19223
|
if (fabsf(rate) < params.delta) {
|
19135
|
-
return
|
19224
|
+
return GGML_OPT_RESULT_OK;
|
19136
19225
|
}
|
19137
19226
|
}
|
19138
19227
|
|
@@ -19148,14 +19237,14 @@ static enum ggml_opt_result ggml_opt_lbfgs(
|
|
19148
19237
|
n_no_improvement[0]++;
|
19149
19238
|
|
19150
19239
|
if (n_no_improvement[0] >= params.max_no_improvement) {
|
19151
|
-
return
|
19240
|
+
return GGML_OPT_RESULT_OK;
|
19152
19241
|
}
|
19153
19242
|
}
|
19154
19243
|
}
|
19155
19244
|
|
19156
19245
|
if (params.lbfgs.n_iter != 0 && params.lbfgs.n_iter < it + 1) {
|
19157
19246
|
// reached the maximum number of iterations
|
19158
|
-
return
|
19247
|
+
return GGML_OPT_RESULT_DID_NOT_CONVERGE;
|
19159
19248
|
}
|
19160
19249
|
|
19161
19250
|
// update vectors s and y:
|
@@ -19211,17 +19300,17 @@ static enum ggml_opt_result ggml_opt_lbfgs(
|
|
19211
19300
|
|
19212
19301
|
GGML_ASSERT(false && "lbfgs failed");
|
19213
19302
|
|
19214
|
-
return
|
19303
|
+
return GGML_OPT_RESULT_DID_NOT_CONVERGE;
|
19215
19304
|
}
|
19216
19305
|
|
19217
19306
|
struct ggml_opt_params ggml_opt_default_params(enum ggml_opt_type type) {
|
19218
19307
|
struct ggml_opt_params result;
|
19219
19308
|
|
19220
19309
|
switch (type) {
|
19221
|
-
case
|
19310
|
+
case GGML_OPT_TYPE_ADAM:
|
19222
19311
|
{
|
19223
19312
|
result = (struct ggml_opt_params) {
|
19224
|
-
.type =
|
19313
|
+
.type = GGML_OPT_TYPE_ADAM,
|
19225
19314
|
.graph_size = GGML_DEFAULT_GRAPH_SIZE,
|
19226
19315
|
.n_threads = 1, // FIXME: GGML_DEFAULT_N_THREADS ?
|
19227
19316
|
.past = 0,
|
@@ -19249,10 +19338,10 @@ struct ggml_opt_params ggml_opt_default_params(enum ggml_opt_type type) {
|
|
19249
19338
|
},
|
19250
19339
|
};
|
19251
19340
|
} break;
|
19252
|
-
case
|
19341
|
+
case GGML_OPT_TYPE_LBFGS:
|
19253
19342
|
{
|
19254
19343
|
result = (struct ggml_opt_params) {
|
19255
|
-
.type =
|
19344
|
+
.type = GGML_OPT_TYPE_LBFGS,
|
19256
19345
|
.graph_size = GGML_DEFAULT_GRAPH_SIZE,
|
19257
19346
|
.n_threads = 1,
|
19258
19347
|
.past = 0,
|
@@ -19297,12 +19386,12 @@ GGML_API void ggml_opt_init(
|
|
19297
19386
|
opt->just_initialized = true;
|
19298
19387
|
if (opt->ctx == NULL) {
|
19299
19388
|
struct ggml_init_params ctx_opt_params;
|
19300
|
-
if (opt->params.type ==
|
19389
|
+
if (opt->params.type == GGML_OPT_TYPE_ADAM) {
|
19301
19390
|
ctx_opt_params.mem_size = GGML_MEM_ALIGN*3 + ggml_tensor_overhead()*3 + ggml_type_size(GGML_TYPE_F32)*nx*3;
|
19302
19391
|
if (opt->params.past > 0) {
|
19303
19392
|
ctx_opt_params.mem_size += GGML_MEM_ALIGN + ggml_tensor_overhead() + ggml_type_size(GGML_TYPE_F32)*opt->params.past;
|
19304
19393
|
}
|
19305
|
-
} else if (opt->params.type ==
|
19394
|
+
} else if (opt->params.type == GGML_OPT_TYPE_LBFGS) {
|
19306
19395
|
ctx_opt_params.mem_size = GGML_MEM_ALIGN*9 + ggml_tensor_overhead()*9 + ggml_type_size(GGML_TYPE_F32)*(nx*5 + opt->params.lbfgs.m*2 + nx*opt->params.lbfgs.m*2);
|
19307
19396
|
if (opt->params.past > 0) {
|
19308
19397
|
ctx_opt_params.mem_size += GGML_MEM_ALIGN + ggml_tensor_overhead() + ggml_type_size(GGML_TYPE_F32)*opt->params.past;
|
@@ -19314,7 +19403,7 @@ GGML_API void ggml_opt_init(
|
|
19314
19403
|
opt->ctx = ggml_init(ctx_opt_params);
|
19315
19404
|
}
|
19316
19405
|
switch (opt->params.type) {
|
19317
|
-
case
|
19406
|
+
case GGML_OPT_TYPE_ADAM:
|
19318
19407
|
{
|
19319
19408
|
opt->adam.g = ggml_new_tensor_1d(opt->ctx, GGML_TYPE_F32, nx);
|
19320
19409
|
opt->adam.m = ggml_new_tensor_1d(opt->ctx, GGML_TYPE_F32, nx);
|
@@ -19328,7 +19417,7 @@ GGML_API void ggml_opt_init(
|
|
19328
19417
|
ggml_set_zero(opt->adam.pf);
|
19329
19418
|
}
|
19330
19419
|
} break;
|
19331
|
-
case
|
19420
|
+
case GGML_OPT_TYPE_LBFGS:
|
19332
19421
|
{
|
19333
19422
|
opt->lbfgs.x = ggml_new_tensor_1d(opt->ctx, GGML_TYPE_F32, nx);
|
19334
19423
|
opt->lbfgs.xp = ggml_new_tensor_1d(opt->ctx, GGML_TYPE_F32, nx);
|
@@ -19372,13 +19461,13 @@ enum ggml_opt_result ggml_opt(
|
|
19372
19461
|
|
19373
19462
|
ctx = ggml_init(params_ctx);
|
19374
19463
|
if (ctx == NULL) {
|
19375
|
-
return
|
19464
|
+
return GGML_OPT_RESULT_NO_CONTEXT;
|
19376
19465
|
}
|
19377
19466
|
|
19378
19467
|
free_ctx = true;
|
19379
19468
|
}
|
19380
19469
|
|
19381
|
-
enum ggml_opt_result result =
|
19470
|
+
enum ggml_opt_result result = GGML_OPT_RESULT_OK;
|
19382
19471
|
|
19383
19472
|
struct ggml_opt_context * opt = (struct ggml_opt_context *) alloca(sizeof(struct ggml_opt_context));
|
19384
19473
|
|
@@ -19417,14 +19506,14 @@ enum ggml_opt_result ggml_opt_resume_g(
|
|
19417
19506
|
void * callback_data) {
|
19418
19507
|
|
19419
19508
|
// build forward + backward compute graphs
|
19420
|
-
enum ggml_opt_result result =
|
19509
|
+
enum ggml_opt_result result = GGML_OPT_RESULT_OK;
|
19421
19510
|
|
19422
19511
|
switch (opt->params.type) {
|
19423
|
-
case
|
19512
|
+
case GGML_OPT_TYPE_ADAM:
|
19424
19513
|
{
|
19425
19514
|
result = ggml_opt_adam(ctx, opt, opt->params, f, gf, gb, callback, callback_data);
|
19426
19515
|
} break;
|
19427
|
-
case
|
19516
|
+
case GGML_OPT_TYPE_LBFGS:
|
19428
19517
|
{
|
19429
19518
|
result = ggml_opt_lbfgs(ctx, opt, opt->params, f, gf, gb, callback, callback_data);
|
19430
19519
|
} break;
|
@@ -19461,8 +19550,10 @@ void ggml_quantize_init(enum ggml_type type) {
|
|
19461
19550
|
switch (type) {
|
19462
19551
|
case GGML_TYPE_IQ2_XXS:
|
19463
19552
|
case GGML_TYPE_IQ2_XS:
|
19553
|
+
case GGML_TYPE_IQ2_S:
|
19464
19554
|
case GGML_TYPE_IQ1_S: iq2xs_init_impl(type); break;
|
19465
19555
|
case GGML_TYPE_IQ3_XXS: iq3xs_init_impl(256); break;
|
19556
|
+
case GGML_TYPE_IQ3_S: iq3xs_init_impl(512); break;
|
19466
19557
|
default: // nothing
|
19467
19558
|
break;
|
19468
19559
|
}
|
@@ -19737,6 +19828,24 @@ size_t ggml_quantize_chunk(enum ggml_type type, const float * src, void * dst, i
|
|
19737
19828
|
result = quantize_iq3_xxs(src + start, (char *)dst + start_row * row_size, nrows, n_per_row, hist, imatrix);
|
19738
19829
|
GGML_ASSERT(result == row_size * nrows);
|
19739
19830
|
} break;
|
19831
|
+
case GGML_TYPE_IQ3_S:
|
19832
|
+
{
|
19833
|
+
GGML_ASSERT(start % QK_K == 0);
|
19834
|
+
GGML_ASSERT(start % n_per_row == 0);
|
19835
|
+
size_t start_row = start / n_per_row;
|
19836
|
+
size_t row_size = ggml_row_size(type, n_per_row);
|
19837
|
+
result = quantize_iq3_s(src + start, (char *)dst + start_row * row_size, nrows, n_per_row, hist, imatrix);
|
19838
|
+
GGML_ASSERT(result == row_size * nrows);
|
19839
|
+
} break;
|
19840
|
+
case GGML_TYPE_IQ2_S:
|
19841
|
+
{
|
19842
|
+
GGML_ASSERT(start % QK_K == 0);
|
19843
|
+
GGML_ASSERT(start % n_per_row == 0);
|
19844
|
+
size_t start_row = start / n_per_row;
|
19845
|
+
size_t row_size = ggml_row_size(type, n_per_row);
|
19846
|
+
result = quantize_iq2_s(src + start, (char *)dst + start_row * row_size, nrows, n_per_row, hist, imatrix);
|
19847
|
+
GGML_ASSERT(result == row_size * nrows);
|
19848
|
+
} break;
|
19740
19849
|
case GGML_TYPE_IQ1_S:
|
19741
19850
|
{
|
19742
19851
|
GGML_ASSERT(start % QK_K == 0);
|
@@ -19747,6 +19856,9 @@ size_t ggml_quantize_chunk(enum ggml_type type, const float * src, void * dst, i
|
|
19747
19856
|
GGML_ASSERT(result == row_size * nrows);
|
19748
19857
|
} break;
|
19749
19858
|
case GGML_TYPE_IQ4_NL:
|
19859
|
+
#if QK_K == 64
|
19860
|
+
case GGML_TYPE_IQ4_XS:
|
19861
|
+
#endif
|
19750
19862
|
{
|
19751
19863
|
GGML_ASSERT(start % QK4_NL == 0);
|
19752
19864
|
GGML_ASSERT(start % n_per_row == 0);
|
@@ -19755,6 +19867,17 @@ size_t ggml_quantize_chunk(enum ggml_type type, const float * src, void * dst, i
|
|
19755
19867
|
result = quantize_iq4_nl(src + start, (char *)dst + start_row * row_size, nrows, n_per_row, hist, imatrix);
|
19756
19868
|
GGML_ASSERT(result == row_size * nrows);
|
19757
19869
|
} break;
|
19870
|
+
#if QK_K != 64
|
19871
|
+
case GGML_TYPE_IQ4_XS:
|
19872
|
+
{
|
19873
|
+
GGML_ASSERT(start % QK_K == 0);
|
19874
|
+
GGML_ASSERT(start % n_per_row == 0);
|
19875
|
+
size_t start_row = start / n_per_row;
|
19876
|
+
size_t row_size = ggml_row_size(type, n_per_row);
|
19877
|
+
result = quantize_iq4_xs(src + start, (char *)dst + start_row * row_size, nrows, n_per_row, hist, imatrix);
|
19878
|
+
GGML_ASSERT(result == row_size * nrows);
|
19879
|
+
} break;
|
19880
|
+
#endif
|
19758
19881
|
case GGML_TYPE_F16:
|
19759
19882
|
{
|
19760
19883
|
size_t elemsize = sizeof(ggml_fp16_t);
|