llama_cpp 0.12.7 → 0.14.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +24 -0
- data/ext/llama_cpp/llama_cpp.cpp +131 -288
- data/lib/llama_cpp/version.rb +2 -2
- data/sig/llama_cpp.rbs +29 -29
- data/vendor/tmp/llama.cpp/Makefile +10 -6
- data/vendor/tmp/llama.cpp/ggml-backend-impl.h +6 -3
- data/vendor/tmp/llama.cpp/ggml-backend.c +32 -23
- data/vendor/tmp/llama.cpp/ggml-backend.h +17 -16
- data/vendor/tmp/llama.cpp/ggml-cuda.cu +949 -168
- data/vendor/tmp/llama.cpp/ggml-kompute.cpp +9 -3
- data/vendor/tmp/llama.cpp/ggml-metal.m +159 -22
- data/vendor/tmp/llama.cpp/ggml-metal.metal +1195 -139
- data/vendor/tmp/llama.cpp/ggml-opencl.cpp +27 -27
- data/vendor/tmp/llama.cpp/ggml-quants.c +1971 -271
- data/vendor/tmp/llama.cpp/ggml-quants.h +52 -0
- data/vendor/tmp/llama.cpp/ggml-sycl.cpp +3586 -1201
- data/vendor/tmp/llama.cpp/ggml-sycl.h +5 -0
- data/vendor/tmp/llama.cpp/ggml-vulkan-shaders.hpp +39336 -43461
- data/vendor/tmp/llama.cpp/ggml-vulkan.cpp +1391 -825
- data/vendor/tmp/llama.cpp/ggml-vulkan.h +1 -0
- data/vendor/tmp/llama.cpp/ggml.c +545 -210
- data/vendor/tmp/llama.cpp/ggml.h +65 -23
- data/vendor/tmp/llama.cpp/llama.cpp +1458 -763
- data/vendor/tmp/llama.cpp/llama.h +81 -75
- data/vendor/tmp/llama.cpp/unicode.h +310 -1
- metadata +2 -2
data/vendor/tmp/llama.cpp/ggml.c
CHANGED
@@ -320,6 +320,17 @@ static ggml_fp16_t ggml_table_exp_f16[1 << 16];
|
|
320
320
|
// precomputed f32 table for f16 (256 KB) (ggml-impl.h)
|
321
321
|
float ggml_table_f32_f16[1 << 16];
|
322
322
|
|
323
|
+
const char * ggml_status_to_string(enum ggml_status status) {
|
324
|
+
switch (status) {
|
325
|
+
case GGML_STATUS_ALLOC_FAILED: return "GGML status: error (failed to allocate memory)";
|
326
|
+
case GGML_STATUS_FAILED: return "GGML status: error (operation failed)";
|
327
|
+
case GGML_STATUS_SUCCESS: return "GGML status: success";
|
328
|
+
case GGML_STATUS_ABORTED: return "GGML status: warning (operation aborted)";
|
329
|
+
}
|
330
|
+
|
331
|
+
return "GGML status: unknown";
|
332
|
+
}
|
333
|
+
|
323
334
|
// note: do not use these inside ggml.c
|
324
335
|
// these are meant to be used via the ggml.h API
|
325
336
|
float ggml_fp16_to_fp32(ggml_fp16_t x) {
|
@@ -355,6 +366,10 @@ void ggml_fp32_to_fp16_row(const float * x, ggml_fp16_t * y, int n) {
|
|
355
366
|
}
|
356
367
|
}
|
357
368
|
|
369
|
+
bool ggml_guid_matches(ggml_guid_t guid_a, ggml_guid_t guid_b) {
|
370
|
+
return memcmp(guid_a, guid_b, sizeof(ggml_guid)) == 0;
|
371
|
+
}
|
372
|
+
|
358
373
|
//
|
359
374
|
// timing
|
360
375
|
//
|
@@ -678,6 +693,30 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = {
|
|
678
693
|
.vec_dot_type = GGML_TYPE_Q8_K,
|
679
694
|
.nrows = 1,
|
680
695
|
},
|
696
|
+
[GGML_TYPE_IQ3_S] = {
|
697
|
+
.type_name = "iq3_s",
|
698
|
+
.blck_size = QK_K,
|
699
|
+
.type_size = sizeof(block_iq3_s),
|
700
|
+
.is_quantized = true,
|
701
|
+
.to_float = (ggml_to_float_t) dequantize_row_iq3_s,
|
702
|
+
.from_float = quantize_row_iq3_s,
|
703
|
+
.from_float_reference = (ggml_from_float_t)quantize_row_iq3_s_reference,
|
704
|
+
.vec_dot = ggml_vec_dot_iq3_s_q8_K,
|
705
|
+
.vec_dot_type = GGML_TYPE_Q8_K,
|
706
|
+
.nrows = 1,
|
707
|
+
},
|
708
|
+
[GGML_TYPE_IQ2_S] = {
|
709
|
+
.type_name = "iq2_s",
|
710
|
+
.blck_size = QK_K,
|
711
|
+
.type_size = sizeof(block_iq2_s),
|
712
|
+
.is_quantized = true,
|
713
|
+
.to_float = (ggml_to_float_t) dequantize_row_iq2_s,
|
714
|
+
.from_float = quantize_row_iq2_s,
|
715
|
+
.from_float_reference = (ggml_from_float_t)quantize_row_iq2_s_reference,
|
716
|
+
.vec_dot = ggml_vec_dot_iq2_s_q8_K,
|
717
|
+
.vec_dot_type = GGML_TYPE_Q8_K,
|
718
|
+
.nrows = 1,
|
719
|
+
},
|
681
720
|
[GGML_TYPE_IQ1_S] = {
|
682
721
|
.type_name = "iq1_s",
|
683
722
|
.blck_size = QK_K,
|
@@ -702,6 +741,26 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = {
|
|
702
741
|
.vec_dot_type = GGML_TYPE_Q8_0,
|
703
742
|
.nrows = 1,
|
704
743
|
},
|
744
|
+
[GGML_TYPE_IQ4_XS] = {
|
745
|
+
.type_name = "iq4_xs",
|
746
|
+
#if QK_K == 64
|
747
|
+
.blck_size = QK4_NL,
|
748
|
+
#else
|
749
|
+
.blck_size = QK_K,
|
750
|
+
#endif
|
751
|
+
.type_size = sizeof(block_iq4_xs),
|
752
|
+
.is_quantized = true,
|
753
|
+
.to_float = (ggml_to_float_t) dequantize_row_iq4_xs,
|
754
|
+
.from_float = quantize_row_iq4_xs,
|
755
|
+
.from_float_reference = (ggml_from_float_t)quantize_row_iq4_xs_reference,
|
756
|
+
.vec_dot = ggml_vec_dot_iq4_xs_q8_K,
|
757
|
+
#if QK_K == 64
|
758
|
+
.vec_dot_type = GGML_TYPE_Q8_0,
|
759
|
+
#else
|
760
|
+
.vec_dot_type = GGML_TYPE_Q8_K,
|
761
|
+
#endif
|
762
|
+
.nrows = 1,
|
763
|
+
},
|
705
764
|
[GGML_TYPE_Q8_K] = {
|
706
765
|
.type_name = "q8_K",
|
707
766
|
.blck_size = QK_K,
|
@@ -1560,9 +1619,15 @@ inline static void ggml_vec_gelu_f16(const int n, ggml_fp16_t * y, const ggml_fp
|
|
1560
1619
|
inline static void ggml_vec_gelu_f32(const int n, float * y, const float * x) {
|
1561
1620
|
uint16_t t;
|
1562
1621
|
for (int i = 0; i < n; ++i) {
|
1563
|
-
|
1564
|
-
|
1565
|
-
|
1622
|
+
if (x[i] <= -10.0f) {
|
1623
|
+
y[i] = 0.0f;
|
1624
|
+
} else if (x[i] >= 10.0f) {
|
1625
|
+
y[i] = x[i];
|
1626
|
+
} else {
|
1627
|
+
ggml_fp16_t fp16 = GGML_FP32_TO_FP16(x[i]);
|
1628
|
+
memcpy(&t, &fp16, sizeof(uint16_t));
|
1629
|
+
y[i] = GGML_FP16_TO_FP32(ggml_table_gelu_f16[t]);
|
1630
|
+
}
|
1566
1631
|
}
|
1567
1632
|
}
|
1568
1633
|
#else
|
@@ -1768,6 +1833,8 @@ static const char * GGML_OP_NAME[GGML_OP_COUNT] = {
|
|
1768
1833
|
"POOL_2D",
|
1769
1834
|
"UPSCALE",
|
1770
1835
|
"PAD",
|
1836
|
+
"ARANGE",
|
1837
|
+
"TIMESTEP_EMBEDDING",
|
1771
1838
|
"ARGSORT",
|
1772
1839
|
"LEAKY_RELU",
|
1773
1840
|
|
@@ -1796,7 +1863,7 @@ static const char * GGML_OP_NAME[GGML_OP_COUNT] = {
|
|
1796
1863
|
"CROSS_ENTROPY_LOSS_BACK",
|
1797
1864
|
};
|
1798
1865
|
|
1799
|
-
static_assert(GGML_OP_COUNT ==
|
1866
|
+
static_assert(GGML_OP_COUNT == 74, "GGML_OP_COUNT != 74");
|
1800
1867
|
|
1801
1868
|
static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
|
1802
1869
|
"none",
|
@@ -1854,6 +1921,8 @@ static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
|
|
1854
1921
|
"pool_2d(x)",
|
1855
1922
|
"upscale(x)",
|
1856
1923
|
"pad(x)",
|
1924
|
+
"arange(start, stop, step)",
|
1925
|
+
"timestep_embedding(timesteps, dim, max_period)",
|
1857
1926
|
"argsort(x)",
|
1858
1927
|
"leaky_relu(x)",
|
1859
1928
|
|
@@ -1882,7 +1951,7 @@ static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
|
|
1882
1951
|
"cross_entropy_loss_back(x,y)",
|
1883
1952
|
};
|
1884
1953
|
|
1885
|
-
static_assert(GGML_OP_COUNT ==
|
1954
|
+
static_assert(GGML_OP_COUNT == 74, "GGML_OP_COUNT != 74");
|
1886
1955
|
|
1887
1956
|
static_assert(GGML_OP_POOL_COUNT == 2, "GGML_OP_POOL_COUNT != 2");
|
1888
1957
|
|
@@ -2085,7 +2154,10 @@ void ggml_numa_init(enum ggml_numa_strategy numa_flag) {
|
|
2085
2154
|
getcpu_ret = getcpu(¤t_cpu, &g_state.numa.current_node);
|
2086
2155
|
#else
|
2087
2156
|
// old glibc doesn't have a wrapper for this call. Fall back on direct syscall
|
2088
|
-
|
2157
|
+
# if !defined(SYS_getcpu) && defined(SYS_get_cpu)
|
2158
|
+
# define SYS_getcpu SYS_get_cpu // some older glibc versions use this name
|
2159
|
+
# endif
|
2160
|
+
getcpu_ret = syscall(SYS_getcpu, ¤t_cpu, &g_state.numa.current_node);
|
2089
2161
|
#endif
|
2090
2162
|
|
2091
2163
|
if (g_state.numa.n_nodes < 1 || g_state.numa.total_cpus < 1 || getcpu_ret != 0) {
|
@@ -2304,6 +2376,9 @@ enum ggml_type ggml_ftype_to_ggml_type(enum ggml_ftype ftype) {
|
|
2304
2376
|
case GGML_FTYPE_MOSTLY_IQ3_XXS: wtype = GGML_TYPE_IQ3_XXS; break;
|
2305
2377
|
case GGML_FTYPE_MOSTLY_IQ1_S: wtype = GGML_TYPE_IQ1_S; break;
|
2306
2378
|
case GGML_FTYPE_MOSTLY_IQ4_NL: wtype = GGML_TYPE_IQ4_NL; break;
|
2379
|
+
case GGML_FTYPE_MOSTLY_IQ4_XS: wtype = GGML_TYPE_IQ4_XS; break;
|
2380
|
+
case GGML_FTYPE_MOSTLY_IQ3_S: wtype = GGML_TYPE_IQ3_S; break;
|
2381
|
+
case GGML_FTYPE_MOSTLY_IQ2_S: wtype = GGML_TYPE_IQ2_S; break;
|
2307
2382
|
case GGML_FTYPE_UNKNOWN: wtype = GGML_TYPE_COUNT; break;
|
2308
2383
|
case GGML_FTYPE_MOSTLY_Q4_1_SOME_F16: wtype = GGML_TYPE_COUNT; break;
|
2309
2384
|
}
|
@@ -2708,7 +2783,7 @@ static struct ggml_tensor * ggml_new_tensor_impl(
|
|
2708
2783
|
}
|
2709
2784
|
}
|
2710
2785
|
|
2711
|
-
struct ggml_object * const obj_new = ggml_new_object(ctx,
|
2786
|
+
struct ggml_object * const obj_new = ggml_new_object(ctx, GGML_OBJECT_TYPE_TENSOR, GGML_TENSOR_SIZE + obj_alloc_size);
|
2712
2787
|
|
2713
2788
|
// TODO: for recoverable errors, we would need to free the data allocated from the scratch buffer here
|
2714
2789
|
|
@@ -2716,7 +2791,7 @@ static struct ggml_tensor * ggml_new_tensor_impl(
|
|
2716
2791
|
|
2717
2792
|
*result = (struct ggml_tensor) {
|
2718
2793
|
/*.type =*/ type,
|
2719
|
-
/*.backend =*/
|
2794
|
+
/*.backend =*/ GGML_BACKEND_TYPE_CPU,
|
2720
2795
|
/*.buffer =*/ NULL,
|
2721
2796
|
/*.ne =*/ { 1, 1, 1, 1 },
|
2722
2797
|
/*.nb =*/ { 0, 0, 0, 0 },
|
@@ -2838,11 +2913,21 @@ static int32_t ggml_get_op_params_i32(const struct ggml_tensor * tensor, uint32_
|
|
2838
2913
|
return ((const int32_t *)(tensor->op_params))[i];
|
2839
2914
|
}
|
2840
2915
|
|
2916
|
+
static float ggml_get_op_params_f32(const struct ggml_tensor * tensor, uint32_t i) {
|
2917
|
+
assert(i < GGML_MAX_OP_PARAMS / sizeof(float));
|
2918
|
+
return ((const float *)(tensor->op_params))[i];
|
2919
|
+
}
|
2920
|
+
|
2841
2921
|
static void ggml_set_op_params_i32(struct ggml_tensor * tensor, uint32_t i, int32_t value) {
|
2842
2922
|
assert(i < GGML_MAX_OP_PARAMS / sizeof(int32_t));
|
2843
2923
|
((int32_t *)(tensor->op_params))[i] = value;
|
2844
2924
|
}
|
2845
2925
|
|
2926
|
+
static void ggml_set_op_params_f32(struct ggml_tensor * tensor, uint32_t i, float value) {
|
2927
|
+
assert(i < GGML_MAX_OP_PARAMS / sizeof(float));
|
2928
|
+
((float *)(tensor->op_params))[i] = value;
|
2929
|
+
}
|
2930
|
+
|
2846
2931
|
struct ggml_tensor * ggml_set_zero(struct ggml_tensor * tensor) {
|
2847
2932
|
memset(tensor->data, 0, ggml_nbytes(tensor));
|
2848
2933
|
return tensor;
|
@@ -3289,7 +3374,7 @@ struct ggml_tensor * ggml_get_first_tensor(const struct ggml_context * ctx) {
|
|
3289
3374
|
char * const mem_buffer = ctx->mem_buffer;
|
3290
3375
|
|
3291
3376
|
while (obj != NULL) {
|
3292
|
-
if (obj->type ==
|
3377
|
+
if (obj->type == GGML_OBJECT_TYPE_TENSOR) {
|
3293
3378
|
return (struct ggml_tensor *)(mem_buffer + obj->offs);
|
3294
3379
|
}
|
3295
3380
|
|
@@ -3306,7 +3391,7 @@ struct ggml_tensor * ggml_get_next_tensor(const struct ggml_context * ctx, struc
|
|
3306
3391
|
char * const mem_buffer = ctx->mem_buffer;
|
3307
3392
|
|
3308
3393
|
while (obj != NULL) {
|
3309
|
-
if (obj->type ==
|
3394
|
+
if (obj->type == GGML_OBJECT_TYPE_TENSOR) {
|
3310
3395
|
return (struct ggml_tensor *)(mem_buffer + obj->offs);
|
3311
3396
|
}
|
3312
3397
|
|
@@ -3322,7 +3407,7 @@ struct ggml_tensor * ggml_get_tensor(struct ggml_context * ctx, const char * nam
|
|
3322
3407
|
char * const mem_buffer = ctx->mem_buffer;
|
3323
3408
|
|
3324
3409
|
while (obj != NULL) {
|
3325
|
-
if (obj->type ==
|
3410
|
+
if (obj->type == GGML_OBJECT_TYPE_TENSOR) {
|
3326
3411
|
struct ggml_tensor * cur = (struct ggml_tensor *)(mem_buffer + obj->offs);
|
3327
3412
|
if (strcmp(cur->name, name) == 0) {
|
3328
3413
|
return cur;
|
@@ -5729,11 +5814,13 @@ struct ggml_tensor * ggml_pool_1d(
|
|
5729
5814
|
is_node = true;
|
5730
5815
|
}
|
5731
5816
|
|
5732
|
-
const int64_t ne[
|
5817
|
+
const int64_t ne[4] = {
|
5733
5818
|
ggml_calc_pool_output_size(a->ne[0], k0, s0, p0),
|
5734
5819
|
a->ne[1],
|
5820
|
+
a->ne[2],
|
5821
|
+
a->ne[3],
|
5735
5822
|
};
|
5736
|
-
struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32,
|
5823
|
+
struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
|
5737
5824
|
|
5738
5825
|
int32_t params[] = { op, k0, s0, p0 };
|
5739
5826
|
ggml_set_op_params(result, params, sizeof(params));
|
@@ -5839,6 +5926,55 @@ struct ggml_tensor * ggml_upscale(
|
|
5839
5926
|
return ggml_upscale_impl(ctx, a, scale_factor);
|
5840
5927
|
}
|
5841
5928
|
|
5929
|
+
struct ggml_tensor * ggml_arange(
|
5930
|
+
struct ggml_context * ctx,
|
5931
|
+
float start,
|
5932
|
+
float stop,
|
5933
|
+
float step) {
|
5934
|
+
|
5935
|
+
GGML_ASSERT(stop > start);
|
5936
|
+
|
5937
|
+
const int64_t steps = (int64_t) ceilf((stop - start) / step);
|
5938
|
+
|
5939
|
+
struct ggml_tensor * result = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, steps);
|
5940
|
+
|
5941
|
+
result->op = GGML_OP_ARANGE;
|
5942
|
+
ggml_set_op_params_f32(result, 0, start);
|
5943
|
+
ggml_set_op_params_f32(result, 1, stop);
|
5944
|
+
ggml_set_op_params_f32(result, 2, step);
|
5945
|
+
|
5946
|
+
return result;
|
5947
|
+
}
|
5948
|
+
|
5949
|
+
struct ggml_tensor * ggml_timestep_embedding(
|
5950
|
+
struct ggml_context * ctx,
|
5951
|
+
struct ggml_tensor * timesteps,
|
5952
|
+
int dim,
|
5953
|
+
int max_period) {
|
5954
|
+
bool is_node = false;
|
5955
|
+
|
5956
|
+
if (timesteps->grad) {
|
5957
|
+
GGML_ASSERT(false); // TODO: implement backward
|
5958
|
+
is_node = true;
|
5959
|
+
}
|
5960
|
+
|
5961
|
+
int actual_dim = dim;
|
5962
|
+
if (dim % 2 != 0) {
|
5963
|
+
actual_dim = dim + 1;
|
5964
|
+
}
|
5965
|
+
|
5966
|
+
struct ggml_tensor * result = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, actual_dim, timesteps->ne[0]);
|
5967
|
+
|
5968
|
+
result->op = GGML_OP_TIMESTEP_EMBEDDING;
|
5969
|
+
ggml_set_op_params_i32(result, 0, dim);
|
5970
|
+
ggml_set_op_params_i32(result, 1, max_period);
|
5971
|
+
|
5972
|
+
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
5973
|
+
result->src[0] = timesteps;
|
5974
|
+
|
5975
|
+
return result;
|
5976
|
+
}
|
5977
|
+
|
5842
5978
|
// ggml_argsort
|
5843
5979
|
|
5844
5980
|
struct ggml_tensor * ggml_argsort(
|
@@ -5866,7 +6002,7 @@ struct ggml_tensor * ggml_top_k(
|
|
5866
6002
|
int k) {
|
5867
6003
|
GGML_ASSERT(a->ne[0] >= k);
|
5868
6004
|
|
5869
|
-
struct ggml_tensor * result = ggml_argsort(ctx, a,
|
6005
|
+
struct ggml_tensor * result = ggml_argsort(ctx, a, GGML_SORT_ORDER_DESC);
|
5870
6006
|
|
5871
6007
|
result = ggml_view_4d(ctx, result,
|
5872
6008
|
k, result->ne[1], result->ne[2], result->ne[3],
|
@@ -6660,7 +6796,7 @@ static void ggml_compute_forward_dup_same_cont(
|
|
6660
6796
|
GGML_ASSERT(ggml_is_contiguous(dst) && ggml_is_contiguous(src0));
|
6661
6797
|
GGML_ASSERT(src0->type == dst->type);
|
6662
6798
|
|
6663
|
-
if (params->type ==
|
6799
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
6664
6800
|
return;
|
6665
6801
|
}
|
6666
6802
|
|
@@ -6692,7 +6828,7 @@ static void ggml_compute_forward_dup_f16(
|
|
6692
6828
|
|
6693
6829
|
GGML_ASSERT(ggml_nelements(dst) == ggml_nelements(src0));
|
6694
6830
|
|
6695
|
-
if (params->type ==
|
6831
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
6696
6832
|
return;
|
6697
6833
|
}
|
6698
6834
|
|
@@ -6965,7 +7101,7 @@ static void ggml_compute_forward_dup_f32(
|
|
6965
7101
|
|
6966
7102
|
GGML_ASSERT(ggml_nelements(dst) == ggml_nelements(src0));
|
6967
7103
|
|
6968
|
-
if (params->type ==
|
7104
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
6969
7105
|
return;
|
6970
7106
|
}
|
6971
7107
|
|
@@ -7218,7 +7354,7 @@ static void ggml_compute_forward_dup_bytes(
|
|
7218
7354
|
GGML_ASSERT(ggml_nelements(dst) == ggml_nelements(src0));
|
7219
7355
|
GGML_ASSERT(src0->type == dst->type);
|
7220
7356
|
|
7221
|
-
if (params->type ==
|
7357
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
7222
7358
|
return;
|
7223
7359
|
}
|
7224
7360
|
|
@@ -7398,7 +7534,7 @@ static void ggml_compute_forward_add_f32(
|
|
7398
7534
|
|
7399
7535
|
GGML_ASSERT(ggml_can_repeat(src1, src0) && ggml_are_same_shape(src0, dst));
|
7400
7536
|
|
7401
|
-
if (params->type ==
|
7537
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
7402
7538
|
return;
|
7403
7539
|
}
|
7404
7540
|
|
@@ -7406,7 +7542,7 @@ static void ggml_compute_forward_add_f32(
|
|
7406
7542
|
const int nth = params->nth;
|
7407
7543
|
|
7408
7544
|
#ifdef GGML_USE_CLBLAST
|
7409
|
-
if (src1->backend ==
|
7545
|
+
if (src1->backend == GGML_BACKEND_TYPE_GPU) {
|
7410
7546
|
// TODO: OpenCL kernel support full broadcast
|
7411
7547
|
GGML_ASSERT(ggml_can_repeat_rows(src1, src0));
|
7412
7548
|
if (ith == 0) {
|
@@ -7488,7 +7624,7 @@ static void ggml_compute_forward_add_f16_f32(
|
|
7488
7624
|
|
7489
7625
|
GGML_ASSERT(ggml_are_same_shape(src0, src1) && ggml_are_same_shape(src0, dst));
|
7490
7626
|
|
7491
|
-
if (params->type ==
|
7627
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
7492
7628
|
return;
|
7493
7629
|
}
|
7494
7630
|
|
@@ -7567,7 +7703,7 @@ static void ggml_compute_forward_add_f16_f16(
|
|
7567
7703
|
|
7568
7704
|
GGML_ASSERT(ggml_are_same_shape(src0, src1) && ggml_are_same_shape(src0, dst));
|
7569
7705
|
|
7570
|
-
if (params->type ==
|
7706
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
7571
7707
|
return;
|
7572
7708
|
}
|
7573
7709
|
|
@@ -7623,7 +7759,7 @@ static void ggml_compute_forward_add_q_f32(
|
|
7623
7759
|
|
7624
7760
|
GGML_ASSERT(ggml_are_same_shape(src0, src1) && ggml_are_same_shape(src0, dst));
|
7625
7761
|
|
7626
|
-
if (params->type ==
|
7762
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
7627
7763
|
return;
|
7628
7764
|
}
|
7629
7765
|
|
@@ -7738,6 +7874,9 @@ static void ggml_compute_forward_add(
|
|
7738
7874
|
case GGML_TYPE_IQ3_XXS:
|
7739
7875
|
case GGML_TYPE_IQ1_S:
|
7740
7876
|
case GGML_TYPE_IQ4_NL:
|
7877
|
+
case GGML_TYPE_IQ4_XS:
|
7878
|
+
case GGML_TYPE_IQ3_S:
|
7879
|
+
case GGML_TYPE_IQ2_S:
|
7741
7880
|
{
|
7742
7881
|
ggml_compute_forward_add_q_f32(params, dst);
|
7743
7882
|
} break;
|
@@ -7760,7 +7899,7 @@ static void ggml_compute_forward_add1_f32(
|
|
7760
7899
|
GGML_ASSERT(ggml_are_same_shape(src0, dst));
|
7761
7900
|
GGML_ASSERT(ggml_is_scalar(src1));
|
7762
7901
|
|
7763
|
-
if (params->type ==
|
7902
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
7764
7903
|
return;
|
7765
7904
|
}
|
7766
7905
|
|
@@ -7814,7 +7953,7 @@ static void ggml_compute_forward_add1_f16_f32(
|
|
7814
7953
|
GGML_ASSERT(ggml_are_same_shape(src0, dst));
|
7815
7954
|
GGML_ASSERT(ggml_is_scalar(src1));
|
7816
7955
|
|
7817
|
-
if (params->type ==
|
7956
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
7818
7957
|
return;
|
7819
7958
|
}
|
7820
7959
|
|
@@ -7866,7 +8005,7 @@ static void ggml_compute_forward_add1_f16_f16(
|
|
7866
8005
|
GGML_ASSERT(ggml_are_same_shape(src0, dst));
|
7867
8006
|
GGML_ASSERT(ggml_is_scalar(src1));
|
7868
8007
|
|
7869
|
-
if (params->type ==
|
8008
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
7870
8009
|
return;
|
7871
8010
|
}
|
7872
8011
|
|
@@ -7918,7 +8057,7 @@ static void ggml_compute_forward_add1_q_f32(
|
|
7918
8057
|
GGML_ASSERT(ggml_are_same_shape(src0, dst));
|
7919
8058
|
GGML_ASSERT(ggml_is_scalar(src1));
|
7920
8059
|
|
7921
|
-
if (params->type ==
|
8060
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
7922
8061
|
return;
|
7923
8062
|
}
|
7924
8063
|
|
@@ -8017,6 +8156,9 @@ static void ggml_compute_forward_add1(
|
|
8017
8156
|
case GGML_TYPE_IQ3_XXS:
|
8018
8157
|
case GGML_TYPE_IQ1_S:
|
8019
8158
|
case GGML_TYPE_IQ4_NL:
|
8159
|
+
case GGML_TYPE_IQ4_XS:
|
8160
|
+
case GGML_TYPE_IQ3_S:
|
8161
|
+
case GGML_TYPE_IQ2_S:
|
8020
8162
|
{
|
8021
8163
|
ggml_compute_forward_add1_q_f32(params, dst);
|
8022
8164
|
} break;
|
@@ -8047,7 +8189,7 @@ static void ggml_compute_forward_acc_f32(
|
|
8047
8189
|
size_t offset = ((int32_t *) dst->op_params)[3];
|
8048
8190
|
bool inplace = (bool) ((int32_t *) dst->op_params)[4];
|
8049
8191
|
|
8050
|
-
if (!inplace && (params->type ==
|
8192
|
+
if (!inplace && (params->type == GGML_TASK_TYPE_INIT)) {
|
8051
8193
|
if (params->ith != 0) {
|
8052
8194
|
return;
|
8053
8195
|
}
|
@@ -8059,7 +8201,7 @@ static void ggml_compute_forward_acc_f32(
|
|
8059
8201
|
ggml_nbytes(dst));
|
8060
8202
|
}
|
8061
8203
|
|
8062
|
-
if (params->type ==
|
8204
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
8063
8205
|
return;
|
8064
8206
|
}
|
8065
8207
|
|
@@ -8141,6 +8283,9 @@ static void ggml_compute_forward_acc(
|
|
8141
8283
|
case GGML_TYPE_IQ3_XXS:
|
8142
8284
|
case GGML_TYPE_IQ1_S:
|
8143
8285
|
case GGML_TYPE_IQ4_NL:
|
8286
|
+
case GGML_TYPE_IQ4_XS:
|
8287
|
+
case GGML_TYPE_IQ3_S:
|
8288
|
+
case GGML_TYPE_IQ2_S:
|
8144
8289
|
default:
|
8145
8290
|
{
|
8146
8291
|
GGML_ASSERT(false);
|
@@ -8160,7 +8305,7 @@ static void ggml_compute_forward_sub_f32(
|
|
8160
8305
|
assert(params->ith == 0);
|
8161
8306
|
assert(ggml_are_same_shape(src0, src1) && ggml_are_same_shape(src0, dst));
|
8162
8307
|
|
8163
|
-
if (params->type ==
|
8308
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
8164
8309
|
return;
|
8165
8310
|
}
|
8166
8311
|
|
@@ -8241,14 +8386,14 @@ static void ggml_compute_forward_mul_f32(
|
|
8241
8386
|
|
8242
8387
|
GGML_ASSERT(ggml_can_repeat(src1, src0) && ggml_are_same_shape(src0, dst));
|
8243
8388
|
|
8244
|
-
if (params->type ==
|
8389
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
8245
8390
|
return;
|
8246
8391
|
}
|
8247
8392
|
const int ith = params->ith;
|
8248
8393
|
const int nth = params->nth;
|
8249
8394
|
|
8250
8395
|
#if defined(GGML_USE_CLBLAST)
|
8251
|
-
if (src1->backend ==
|
8396
|
+
if (src1->backend == GGML_BACKEND_TYPE_GPU) {
|
8252
8397
|
// TODO: OpenCL kernel support full broadcast
|
8253
8398
|
GGML_ASSERT(ggml_can_repeat_rows(src1, src0));
|
8254
8399
|
if (ith == 0) {
|
@@ -8349,7 +8494,7 @@ static void ggml_compute_forward_div_f32(
|
|
8349
8494
|
|
8350
8495
|
GGML_ASSERT(ggml_can_repeat(src1, src0) && ggml_are_same_shape(src0, dst));
|
8351
8496
|
|
8352
|
-
if (params->type ==
|
8497
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
8353
8498
|
return;
|
8354
8499
|
}
|
8355
8500
|
|
@@ -8444,7 +8589,7 @@ static void ggml_compute_forward_sqr_f32(
|
|
8444
8589
|
assert(params->ith == 0);
|
8445
8590
|
assert(ggml_are_same_shape(src0, dst));
|
8446
8591
|
|
8447
|
-
if (params->type ==
|
8592
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
8448
8593
|
return;
|
8449
8594
|
}
|
8450
8595
|
|
@@ -8490,7 +8635,7 @@ static void ggml_compute_forward_sqrt_f32(
|
|
8490
8635
|
assert(params->ith == 0);
|
8491
8636
|
assert(ggml_are_same_shape(src0, dst));
|
8492
8637
|
|
8493
|
-
if (params->type ==
|
8638
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
8494
8639
|
return;
|
8495
8640
|
}
|
8496
8641
|
|
@@ -8536,7 +8681,7 @@ static void ggml_compute_forward_log_f32(
|
|
8536
8681
|
GGML_ASSERT(params->ith == 0);
|
8537
8682
|
GGML_ASSERT(ggml_are_same_shape(src0, dst));
|
8538
8683
|
|
8539
|
-
if (params->type ==
|
8684
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
8540
8685
|
return;
|
8541
8686
|
}
|
8542
8687
|
|
@@ -8582,7 +8727,7 @@ static void ggml_compute_forward_sum_f32(
|
|
8582
8727
|
assert(params->ith == 0);
|
8583
8728
|
assert(ggml_is_scalar(dst));
|
8584
8729
|
|
8585
|
-
if (params->type ==
|
8730
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
8586
8731
|
return;
|
8587
8732
|
}
|
8588
8733
|
|
@@ -8617,7 +8762,7 @@ static void ggml_compute_forward_sum_f16(
|
|
8617
8762
|
assert(params->ith == 0);
|
8618
8763
|
assert(ggml_is_scalar(dst));
|
8619
8764
|
|
8620
|
-
if (params->type ==
|
8765
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
8621
8766
|
return;
|
8622
8767
|
}
|
8623
8768
|
|
@@ -8674,7 +8819,7 @@ static void ggml_compute_forward_sum_rows_f32(
|
|
8674
8819
|
|
8675
8820
|
GGML_ASSERT(params->ith == 0);
|
8676
8821
|
|
8677
|
-
if (params->type ==
|
8822
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
8678
8823
|
return;
|
8679
8824
|
}
|
8680
8825
|
|
@@ -8729,7 +8874,7 @@ static void ggml_compute_forward_mean_f32(
|
|
8729
8874
|
|
8730
8875
|
assert(params->ith == 0);
|
8731
8876
|
|
8732
|
-
if (params->type ==
|
8877
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
8733
8878
|
return;
|
8734
8879
|
}
|
8735
8880
|
|
@@ -8788,7 +8933,7 @@ static void ggml_compute_forward_argmax_f32(
|
|
8788
8933
|
|
8789
8934
|
assert(params->ith == 0);
|
8790
8935
|
|
8791
|
-
if (params->type ==
|
8936
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
8792
8937
|
return;
|
8793
8938
|
}
|
8794
8939
|
|
@@ -8839,7 +8984,7 @@ static void ggml_compute_forward_repeat_f32(
|
|
8839
8984
|
GGML_ASSERT(params->ith == 0);
|
8840
8985
|
GGML_ASSERT(ggml_can_repeat(src0, dst));
|
8841
8986
|
|
8842
|
-
if (params->type ==
|
8987
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
8843
8988
|
return;
|
8844
8989
|
}
|
8845
8990
|
|
@@ -8884,7 +9029,7 @@ static void ggml_compute_forward_repeat_f16(
|
|
8884
9029
|
GGML_ASSERT(params->ith == 0);
|
8885
9030
|
GGML_ASSERT(ggml_can_repeat(src0, dst));
|
8886
9031
|
|
8887
|
-
if (params->type ==
|
9032
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
8888
9033
|
return;
|
8889
9034
|
}
|
8890
9035
|
|
@@ -8958,7 +9103,7 @@ static void ggml_compute_forward_repeat_back_f32(
|
|
8958
9103
|
GGML_ASSERT(params->ith == 0);
|
8959
9104
|
GGML_ASSERT(ggml_can_repeat(dst, src0));
|
8960
9105
|
|
8961
|
-
if (params->type ==
|
9106
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
8962
9107
|
return;
|
8963
9108
|
}
|
8964
9109
|
|
@@ -9035,7 +9180,7 @@ static void ggml_compute_forward_concat_f32(
|
|
9035
9180
|
const struct ggml_tensor * src0 = dst->src[0];
|
9036
9181
|
const struct ggml_tensor * src1 = dst->src[1];
|
9037
9182
|
|
9038
|
-
if (params->type ==
|
9183
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
9039
9184
|
return;
|
9040
9185
|
}
|
9041
9186
|
|
@@ -9107,7 +9252,7 @@ static void ggml_compute_forward_abs_f32(
|
|
9107
9252
|
assert(params->ith == 0);
|
9108
9253
|
assert(ggml_are_same_shape(src0, dst));
|
9109
9254
|
|
9110
|
-
if (params->type ==
|
9255
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
9111
9256
|
return;
|
9112
9257
|
}
|
9113
9258
|
|
@@ -9153,7 +9298,7 @@ static void ggml_compute_forward_sgn_f32(
|
|
9153
9298
|
assert(params->ith == 0);
|
9154
9299
|
assert(ggml_are_same_shape(src0, dst));
|
9155
9300
|
|
9156
|
-
if (params->type ==
|
9301
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
9157
9302
|
return;
|
9158
9303
|
}
|
9159
9304
|
|
@@ -9199,7 +9344,7 @@ static void ggml_compute_forward_neg_f32(
|
|
9199
9344
|
assert(params->ith == 0);
|
9200
9345
|
assert(ggml_are_same_shape(src0, dst));
|
9201
9346
|
|
9202
|
-
if (params->type ==
|
9347
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
9203
9348
|
return;
|
9204
9349
|
}
|
9205
9350
|
|
@@ -9245,7 +9390,7 @@ static void ggml_compute_forward_step_f32(
|
|
9245
9390
|
assert(params->ith == 0);
|
9246
9391
|
assert(ggml_are_same_shape(src0, dst));
|
9247
9392
|
|
9248
|
-
if (params->type ==
|
9393
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
9249
9394
|
return;
|
9250
9395
|
}
|
9251
9396
|
|
@@ -9291,7 +9436,7 @@ static void ggml_compute_forward_tanh_f32(
|
|
9291
9436
|
assert(params->ith == 0);
|
9292
9437
|
assert(ggml_are_same_shape(src0, dst));
|
9293
9438
|
|
9294
|
-
if (params->type ==
|
9439
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
9295
9440
|
return;
|
9296
9441
|
}
|
9297
9442
|
|
@@ -9337,7 +9482,7 @@ static void ggml_compute_forward_elu_f32(
|
|
9337
9482
|
assert(params->ith == 0);
|
9338
9483
|
assert(ggml_are_same_shape(src0, dst));
|
9339
9484
|
|
9340
|
-
if (params->type ==
|
9485
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
9341
9486
|
return;
|
9342
9487
|
}
|
9343
9488
|
|
@@ -9383,7 +9528,7 @@ static void ggml_compute_forward_relu_f32(
|
|
9383
9528
|
assert(params->ith == 0);
|
9384
9529
|
assert(ggml_are_same_shape(src0, dst));
|
9385
9530
|
|
9386
|
-
if (params->type ==
|
9531
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
9387
9532
|
return;
|
9388
9533
|
}
|
9389
9534
|
|
@@ -9430,7 +9575,7 @@ static void ggml_compute_forward_gelu_f32(
|
|
9430
9575
|
GGML_ASSERT(ggml_is_contiguous_except_dim_1(dst));
|
9431
9576
|
GGML_ASSERT(ggml_are_same_shape(src0, dst));
|
9432
9577
|
|
9433
|
-
if (params->type ==
|
9578
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
9434
9579
|
return;
|
9435
9580
|
}
|
9436
9581
|
|
@@ -9493,7 +9638,7 @@ static void ggml_compute_forward_gelu_quick_f32(
|
|
9493
9638
|
GGML_ASSERT(ggml_is_contiguous_except_dim_1(dst));
|
9494
9639
|
GGML_ASSERT(ggml_are_same_shape(src0, dst));
|
9495
9640
|
|
9496
|
-
if (params->type ==
|
9641
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
9497
9642
|
return;
|
9498
9643
|
}
|
9499
9644
|
|
@@ -9556,7 +9701,7 @@ static void ggml_compute_forward_silu_f32(
|
|
9556
9701
|
GGML_ASSERT(ggml_is_contiguous_except_dim_1(dst));
|
9557
9702
|
GGML_ASSERT(ggml_are_same_shape(src0, dst));
|
9558
9703
|
|
9559
|
-
if (params->type ==
|
9704
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
9560
9705
|
return;
|
9561
9706
|
}
|
9562
9707
|
|
@@ -9617,7 +9762,7 @@ static void ggml_compute_forward_leaky_relu_f32(
|
|
9617
9762
|
assert(params->ith == 0);
|
9618
9763
|
assert(ggml_are_same_shape(src0, dst));
|
9619
9764
|
|
9620
|
-
if (params->type ==
|
9765
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
9621
9766
|
return;
|
9622
9767
|
}
|
9623
9768
|
|
@@ -9670,7 +9815,7 @@ static void ggml_compute_forward_silu_back_f32(
|
|
9670
9815
|
GGML_ASSERT(ggml_are_same_shape(src0, dst));
|
9671
9816
|
GGML_ASSERT(ggml_are_same_shape(src0, grad));
|
9672
9817
|
|
9673
|
-
if (params->type ==
|
9818
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
9674
9819
|
return;
|
9675
9820
|
}
|
9676
9821
|
|
@@ -9732,7 +9877,7 @@ static void ggml_compute_forward_hardswish_f32(
|
|
9732
9877
|
assert(params->ith == 0);
|
9733
9878
|
assert(ggml_are_same_shape(src0, dst));
|
9734
9879
|
|
9735
|
-
if (params->type ==
|
9880
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
9736
9881
|
return;
|
9737
9882
|
}
|
9738
9883
|
|
@@ -9775,7 +9920,7 @@ static void ggml_compute_forward_hardsigmoid_f32(
|
|
9775
9920
|
assert(params->ith == 0);
|
9776
9921
|
assert(ggml_are_same_shape(src0, dst));
|
9777
9922
|
|
9778
|
-
if (params->type ==
|
9923
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
9779
9924
|
return;
|
9780
9925
|
}
|
9781
9926
|
|
@@ -9821,7 +9966,7 @@ static void ggml_compute_forward_norm_f32(
|
|
9821
9966
|
|
9822
9967
|
GGML_ASSERT(ggml_are_same_shape(src0, dst));
|
9823
9968
|
|
9824
|
-
if (params->type ==
|
9969
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
9825
9970
|
return;
|
9826
9971
|
}
|
9827
9972
|
|
@@ -9896,7 +10041,7 @@ static void ggml_compute_forward_rms_norm_f32(
|
|
9896
10041
|
|
9897
10042
|
GGML_ASSERT(ggml_are_same_shape(src0, dst));
|
9898
10043
|
|
9899
|
-
if (params->type ==
|
10044
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
9900
10045
|
return;
|
9901
10046
|
}
|
9902
10047
|
|
@@ -9967,7 +10112,7 @@ static void ggml_compute_forward_rms_norm_back_f32(
|
|
9967
10112
|
|
9968
10113
|
GGML_ASSERT(ggml_are_same_shape(src0, dst) && ggml_are_same_shape(src0, src1));
|
9969
10114
|
|
9970
|
-
if (params->type ==
|
10115
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
9971
10116
|
return;
|
9972
10117
|
}
|
9973
10118
|
|
@@ -10145,7 +10290,7 @@ static void ggml_compute_forward_group_norm_f32(
|
|
10145
10290
|
|
10146
10291
|
GGML_ASSERT(ggml_are_same_shape(src0, dst));
|
10147
10292
|
|
10148
|
-
if (params->type ==
|
10293
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
10149
10294
|
return;
|
10150
10295
|
}
|
10151
10296
|
|
@@ -10163,7 +10308,7 @@ static void ggml_compute_forward_group_norm_f32(
|
|
10163
10308
|
int n_channels = src0->ne[2];
|
10164
10309
|
int n_groups = dst->op_params[0];
|
10165
10310
|
int n_channels_per_group = (n_channels + n_groups - 1) / n_groups;
|
10166
|
-
for (int i = ith; i < n_groups; i+=nth) {
|
10311
|
+
for (int i = ith; i < n_groups; i += nth) {
|
10167
10312
|
int start = i * n_channels_per_group;
|
10168
10313
|
int end = start + n_channels_per_group;
|
10169
10314
|
if (end > n_channels) {
|
@@ -10177,28 +10322,32 @@ static void ggml_compute_forward_group_norm_f32(
|
|
10177
10322
|
for (int64_t i01 = 0; i01 < ne01; i01++) {
|
10178
10323
|
const float * x = (float *)((char *) src0->data + i01 * nb01 + i02 * nb02 + i03 * nb03);
|
10179
10324
|
|
10325
|
+
ggml_float sumr = 0.0;
|
10180
10326
|
for (int64_t i00 = 0; i00 < ne00; i00++) {
|
10181
|
-
|
10327
|
+
sumr += (ggml_float)x[i00];
|
10182
10328
|
}
|
10329
|
+
sum += sumr;
|
10183
10330
|
}
|
10184
10331
|
}
|
10185
|
-
float mean = sum / (ne00 * ne01 * step);
|
10186
|
-
ggml_float sum2 = 0.0;
|
10332
|
+
const float mean = sum / (ne00 * ne01 * step);
|
10187
10333
|
|
10334
|
+
ggml_float sum2 = 0.0;
|
10188
10335
|
for (int64_t i02 = start; i02 < end; i02++) {
|
10189
10336
|
for (int64_t i01 = 0; i01 < ne01; i01++) {
|
10190
10337
|
const float * x = (float *)((char *) src0->data + i01 * nb01 + i02 * nb02 + i03 * nb03);
|
10191
10338
|
|
10192
10339
|
float * y = (float *)((char *) dst->data + i01 * nb1 + i02 * nb2 + i03 * nb3);
|
10193
10340
|
|
10341
|
+
ggml_float sumr = 0.0;
|
10194
10342
|
for (int64_t i00 = 0; i00 < ne00; i00++) {
|
10195
10343
|
float v = x[i00] - mean;
|
10196
10344
|
y[i00] = v;
|
10197
|
-
|
10345
|
+
sumr += (ggml_float)(v * v);
|
10198
10346
|
}
|
10347
|
+
sum2 += sumr;
|
10199
10348
|
}
|
10200
10349
|
}
|
10201
|
-
float variance = sum2 / (ne00 * ne01 * step);
|
10350
|
+
const float variance = sum2 / (ne00 * ne01 * step);
|
10202
10351
|
const float scale = 1.0f / sqrtf(variance + eps);
|
10203
10352
|
|
10204
10353
|
for (int64_t i02 = start; i02 < end; i02++) {
|
@@ -10312,7 +10461,7 @@ static void ggml_compute_forward_mul_mat(
|
|
10312
10461
|
|
10313
10462
|
#if defined(GGML_USE_CLBLAST)
|
10314
10463
|
if (ggml_cl_can_mul_mat(src0, src1, dst)) {
|
10315
|
-
if (params->ith == 0 && params->type ==
|
10464
|
+
if (params->ith == 0 && params->type == GGML_TASK_TYPE_COMPUTE) {
|
10316
10465
|
ggml_cl_mul_mat(src0, src1, dst, params->wdata, params->wsize);
|
10317
10466
|
}
|
10318
10467
|
return;
|
@@ -10325,7 +10474,7 @@ static void ggml_compute_forward_mul_mat(
|
|
10325
10474
|
const size_t desired_wsize = ne13*ne12*ne_plane*sizeof(float);
|
10326
10475
|
UNUSED(desired_wsize);
|
10327
10476
|
|
10328
|
-
if (params->type ==
|
10477
|
+
if (params->type == GGML_TASK_TYPE_INIT) {
|
10329
10478
|
if (type != GGML_TYPE_F32) {
|
10330
10479
|
assert(params->wsize >= desired_wsize);
|
10331
10480
|
// parallelize by src0 rows
|
@@ -10348,7 +10497,7 @@ static void ggml_compute_forward_mul_mat(
|
|
10348
10497
|
return;
|
10349
10498
|
}
|
10350
10499
|
|
10351
|
-
if (params->type ==
|
10500
|
+
if (params->type == GGML_TASK_TYPE_FINALIZE) {
|
10352
10501
|
return;
|
10353
10502
|
}
|
10354
10503
|
|
@@ -10386,7 +10535,7 @@ static void ggml_compute_forward_mul_mat(
|
|
10386
10535
|
}
|
10387
10536
|
#endif
|
10388
10537
|
|
10389
|
-
if (params->type ==
|
10538
|
+
if (params->type == GGML_TASK_TYPE_INIT) {
|
10390
10539
|
if (ith != 0) {
|
10391
10540
|
return;
|
10392
10541
|
}
|
@@ -10410,7 +10559,7 @@ static void ggml_compute_forward_mul_mat(
|
|
10410
10559
|
return;
|
10411
10560
|
}
|
10412
10561
|
|
10413
|
-
if (params->type ==
|
10562
|
+
if (params->type == GGML_TASK_TYPE_FINALIZE) {
|
10414
10563
|
return;
|
10415
10564
|
}
|
10416
10565
|
|
@@ -10567,7 +10716,7 @@ static void ggml_compute_forward_mul_mat_id(
|
|
10567
10716
|
|
10568
10717
|
#define MMID_MATRIX_ROW(row_id, i1) matrix_rows[(row_id)*ne11 + (i1)]
|
10569
10718
|
|
10570
|
-
if (params->type ==
|
10719
|
+
if (params->type == GGML_TASK_TYPE_INIT) {
|
10571
10720
|
if (ith != 0) {
|
10572
10721
|
return;
|
10573
10722
|
}
|
@@ -10604,7 +10753,7 @@ static void ggml_compute_forward_mul_mat_id(
|
|
10604
10753
|
return;
|
10605
10754
|
}
|
10606
10755
|
|
10607
|
-
if (params->type ==
|
10756
|
+
if (params->type == GGML_TASK_TYPE_FINALIZE) {
|
10608
10757
|
return;
|
10609
10758
|
}
|
10610
10759
|
|
@@ -10752,7 +10901,7 @@ static void ggml_compute_forward_out_prod_f32(
|
|
10752
10901
|
(ggml_is_contiguous(src1) || ggml_is_transposed(src1));
|
10753
10902
|
#endif
|
10754
10903
|
|
10755
|
-
if (params->type ==
|
10904
|
+
if (params->type == GGML_TASK_TYPE_INIT) {
|
10756
10905
|
#if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS) // gemm beta will zero dst
|
10757
10906
|
if (use_blas) {
|
10758
10907
|
return;
|
@@ -10765,7 +10914,7 @@ static void ggml_compute_forward_out_prod_f32(
|
|
10765
10914
|
return;
|
10766
10915
|
}
|
10767
10916
|
|
10768
|
-
if (params->type ==
|
10917
|
+
if (params->type == GGML_TASK_TYPE_FINALIZE) {
|
10769
10918
|
return;
|
10770
10919
|
}
|
10771
10920
|
|
@@ -10945,7 +11094,7 @@ static void ggml_compute_forward_out_prod_q_f32(
|
|
10945
11094
|
// TODO: #if defined(GGML_USE_CUBLAS) ggml_cuda_out_prod
|
10946
11095
|
// TODO: #if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS) || defined(GGML_USE_CLBLAST)
|
10947
11096
|
|
10948
|
-
if (params->type ==
|
11097
|
+
if (params->type == GGML_TASK_TYPE_INIT) {
|
10949
11098
|
if (ith != 0) {
|
10950
11099
|
return;
|
10951
11100
|
}
|
@@ -10953,7 +11102,7 @@ static void ggml_compute_forward_out_prod_q_f32(
|
|
10953
11102
|
return;
|
10954
11103
|
}
|
10955
11104
|
|
10956
|
-
if (params->type ==
|
11105
|
+
if (params->type == GGML_TASK_TYPE_FINALIZE) {
|
10957
11106
|
return;
|
10958
11107
|
}
|
10959
11108
|
|
@@ -11039,6 +11188,9 @@ static void ggml_compute_forward_out_prod(
|
|
11039
11188
|
case GGML_TYPE_IQ3_XXS:
|
11040
11189
|
case GGML_TYPE_IQ1_S:
|
11041
11190
|
case GGML_TYPE_IQ4_NL:
|
11191
|
+
case GGML_TYPE_IQ4_XS:
|
11192
|
+
case GGML_TYPE_IQ3_S:
|
11193
|
+
case GGML_TYPE_IQ2_S:
|
11042
11194
|
{
|
11043
11195
|
ggml_compute_forward_out_prod_q_f32(params, dst);
|
11044
11196
|
} break;
|
@@ -11070,7 +11222,7 @@ static void ggml_compute_forward_scale_f32(
|
|
11070
11222
|
GGML_ASSERT(ggml_is_contiguous(dst));
|
11071
11223
|
GGML_ASSERT(ggml_are_same_shape(src0, dst));
|
11072
11224
|
|
11073
|
-
if (params->type ==
|
11225
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
11074
11226
|
return;
|
11075
11227
|
}
|
11076
11228
|
|
@@ -11142,7 +11294,7 @@ static void ggml_compute_forward_set_f32(
|
|
11142
11294
|
size_t offset = ((int32_t *) dst->op_params)[3];
|
11143
11295
|
bool inplace = (bool) ((int32_t *) dst->op_params)[4];
|
11144
11296
|
|
11145
|
-
if (!inplace && (params->type ==
|
11297
|
+
if (!inplace && (params->type == GGML_TASK_TYPE_INIT)) {
|
11146
11298
|
if (params->ith != 0) {
|
11147
11299
|
return;
|
11148
11300
|
}
|
@@ -11154,7 +11306,7 @@ static void ggml_compute_forward_set_f32(
|
|
11154
11306
|
ggml_nbytes(dst));
|
11155
11307
|
}
|
11156
11308
|
|
11157
|
-
if (params->type ==
|
11309
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
11158
11310
|
return;
|
11159
11311
|
}
|
11160
11312
|
|
@@ -11227,6 +11379,9 @@ static void ggml_compute_forward_set(
|
|
11227
11379
|
case GGML_TYPE_IQ3_XXS:
|
11228
11380
|
case GGML_TYPE_IQ1_S:
|
11229
11381
|
case GGML_TYPE_IQ4_NL:
|
11382
|
+
case GGML_TYPE_IQ4_XS:
|
11383
|
+
case GGML_TYPE_IQ3_S:
|
11384
|
+
case GGML_TYPE_IQ2_S:
|
11230
11385
|
default:
|
11231
11386
|
{
|
11232
11387
|
GGML_ASSERT(false);
|
@@ -11301,7 +11456,7 @@ static void ggml_compute_forward_get_rows_q(
|
|
11301
11456
|
|
11302
11457
|
assert(params->ith == 0);
|
11303
11458
|
|
11304
|
-
if (params->type ==
|
11459
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
11305
11460
|
return;
|
11306
11461
|
}
|
11307
11462
|
|
@@ -11341,7 +11496,7 @@ static void ggml_compute_forward_get_rows_f16(
|
|
11341
11496
|
|
11342
11497
|
assert(params->ith == 0);
|
11343
11498
|
|
11344
|
-
if (params->type ==
|
11499
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
11345
11500
|
return;
|
11346
11501
|
}
|
11347
11502
|
|
@@ -11378,7 +11533,7 @@ static void ggml_compute_forward_get_rows_f32(
|
|
11378
11533
|
|
11379
11534
|
assert(params->ith == 0);
|
11380
11535
|
|
11381
|
-
if (params->type ==
|
11536
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
11382
11537
|
return;
|
11383
11538
|
}
|
11384
11539
|
|
@@ -11429,6 +11584,9 @@ static void ggml_compute_forward_get_rows(
|
|
11429
11584
|
case GGML_TYPE_IQ3_XXS:
|
11430
11585
|
case GGML_TYPE_IQ1_S:
|
11431
11586
|
case GGML_TYPE_IQ4_NL:
|
11587
|
+
case GGML_TYPE_IQ4_XS:
|
11588
|
+
case GGML_TYPE_IQ3_S:
|
11589
|
+
case GGML_TYPE_IQ2_S:
|
11432
11590
|
{
|
11433
11591
|
ggml_compute_forward_get_rows_q(params, dst);
|
11434
11592
|
} break;
|
@@ -11480,14 +11638,14 @@ static void ggml_compute_forward_get_rows_back_f32_f16(
|
|
11480
11638
|
|
11481
11639
|
// ggml_compute_forward_dup_same_cont(params, opt0, dst);
|
11482
11640
|
|
11483
|
-
if (params->type ==
|
11641
|
+
if (params->type == GGML_TASK_TYPE_INIT) {
|
11484
11642
|
if (params->ith != 0) {
|
11485
11643
|
return;
|
11486
11644
|
}
|
11487
11645
|
memset(dst->data, 0, ggml_nbytes(dst));
|
11488
11646
|
}
|
11489
11647
|
|
11490
|
-
if (params->type ==
|
11648
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
11491
11649
|
return;
|
11492
11650
|
}
|
11493
11651
|
|
@@ -11519,14 +11677,14 @@ static void ggml_compute_forward_get_rows_back_f32(
|
|
11519
11677
|
|
11520
11678
|
// ggml_compute_forward_dup_same_cont(params, opt0, dst);
|
11521
11679
|
|
11522
|
-
if (params->type ==
|
11680
|
+
if (params->type == GGML_TASK_TYPE_INIT) {
|
11523
11681
|
if (params->ith != 0) {
|
11524
11682
|
return;
|
11525
11683
|
}
|
11526
11684
|
memset(dst->data, 0, ggml_nbytes(dst));
|
11527
11685
|
}
|
11528
11686
|
|
11529
|
-
if (params->type ==
|
11687
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
11530
11688
|
return;
|
11531
11689
|
}
|
11532
11690
|
|
@@ -11596,7 +11754,7 @@ static void ggml_compute_forward_diag_f32(
|
|
11596
11754
|
|
11597
11755
|
GGML_ASSERT(params->ith == 0);
|
11598
11756
|
|
11599
|
-
if (params->type ==
|
11757
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
11600
11758
|
return;
|
11601
11759
|
}
|
11602
11760
|
|
@@ -11665,7 +11823,7 @@ static void ggml_compute_forward_diag_mask_f32(
|
|
11665
11823
|
|
11666
11824
|
GGML_ASSERT(n_past >= 0);
|
11667
11825
|
|
11668
|
-
if (!inplace && (params->type ==
|
11826
|
+
if (!inplace && (params->type == GGML_TASK_TYPE_INIT)) {
|
11669
11827
|
if (ith != 0) {
|
11670
11828
|
return;
|
11671
11829
|
}
|
@@ -11679,7 +11837,7 @@ static void ggml_compute_forward_diag_mask_f32(
|
|
11679
11837
|
ggml_nbytes(dst));
|
11680
11838
|
}
|
11681
11839
|
|
11682
|
-
if (params->type ==
|
11840
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
11683
11841
|
return;
|
11684
11842
|
}
|
11685
11843
|
|
@@ -11753,7 +11911,7 @@ static void ggml_compute_forward_soft_max_f32(
|
|
11753
11911
|
assert(ggml_is_contiguous(dst));
|
11754
11912
|
assert(ggml_are_same_shape(src0, dst));
|
11755
11913
|
|
11756
|
-
if (params->type ==
|
11914
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
11757
11915
|
return;
|
11758
11916
|
}
|
11759
11917
|
|
@@ -11891,7 +12049,7 @@ static void ggml_compute_forward_soft_max_back_f32(
|
|
11891
12049
|
GGML_ASSERT(ggml_are_same_shape(src0, dst));
|
11892
12050
|
GGML_ASSERT(ggml_are_same_shape(src1, dst));
|
11893
12051
|
|
11894
|
-
if (params->type ==
|
12052
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
11895
12053
|
return;
|
11896
12054
|
}
|
11897
12055
|
|
@@ -11985,7 +12143,7 @@ static void ggml_compute_forward_alibi_f32(
|
|
11985
12143
|
|
11986
12144
|
assert(params->ith == 0);
|
11987
12145
|
|
11988
|
-
if (params->type ==
|
12146
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
11989
12147
|
return;
|
11990
12148
|
}
|
11991
12149
|
|
@@ -12044,7 +12202,7 @@ static void ggml_compute_forward_alibi_f16(
|
|
12044
12202
|
|
12045
12203
|
assert(params->ith == 0);
|
12046
12204
|
|
12047
|
-
if (params->type ==
|
12205
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
12048
12206
|
return;
|
12049
12207
|
}
|
12050
12208
|
|
@@ -12129,6 +12287,9 @@ static void ggml_compute_forward_alibi(
|
|
12129
12287
|
case GGML_TYPE_IQ3_XXS:
|
12130
12288
|
case GGML_TYPE_IQ1_S:
|
12131
12289
|
case GGML_TYPE_IQ4_NL:
|
12290
|
+
case GGML_TYPE_IQ4_XS:
|
12291
|
+
case GGML_TYPE_IQ3_S:
|
12292
|
+
case GGML_TYPE_IQ2_S:
|
12132
12293
|
case GGML_TYPE_Q8_K:
|
12133
12294
|
case GGML_TYPE_I8:
|
12134
12295
|
case GGML_TYPE_I16:
|
@@ -12150,7 +12311,7 @@ static void ggml_compute_forward_clamp_f32(
|
|
12150
12311
|
|
12151
12312
|
assert(params->ith == 0);
|
12152
12313
|
|
12153
|
-
if (params->type ==
|
12314
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
12154
12315
|
return;
|
12155
12316
|
}
|
12156
12317
|
|
@@ -12212,6 +12373,9 @@ static void ggml_compute_forward_clamp(
|
|
12212
12373
|
case GGML_TYPE_IQ3_XXS:
|
12213
12374
|
case GGML_TYPE_IQ1_S:
|
12214
12375
|
case GGML_TYPE_IQ4_NL:
|
12376
|
+
case GGML_TYPE_IQ4_XS:
|
12377
|
+
case GGML_TYPE_IQ3_S:
|
12378
|
+
case GGML_TYPE_IQ2_S:
|
12215
12379
|
case GGML_TYPE_Q8_K:
|
12216
12380
|
case GGML_TYPE_I8:
|
12217
12381
|
case GGML_TYPE_I16:
|
@@ -12289,7 +12453,7 @@ static void ggml_compute_forward_rope_f32(
|
|
12289
12453
|
const struct ggml_tensor * src0 = dst->src[0];
|
12290
12454
|
const struct ggml_tensor * src1 = dst->src[1];
|
12291
12455
|
|
12292
|
-
if (params->type ==
|
12456
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
12293
12457
|
return;
|
12294
12458
|
}
|
12295
12459
|
|
@@ -12467,7 +12631,7 @@ static void ggml_compute_forward_rope_f16(
|
|
12467
12631
|
const struct ggml_tensor * src0 = dst->src[0];
|
12468
12632
|
const struct ggml_tensor * src1 = dst->src[1];
|
12469
12633
|
|
12470
|
-
if (params->type ==
|
12634
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
12471
12635
|
return;
|
12472
12636
|
}
|
12473
12637
|
|
@@ -12698,7 +12862,7 @@ static void ggml_compute_forward_conv_transpose_1d_f16_f32(
|
|
12698
12862
|
GGML_ASSERT(nb00 == sizeof(ggml_fp16_t));
|
12699
12863
|
GGML_ASSERT(nb10 == sizeof(float));
|
12700
12864
|
|
12701
|
-
if (params->type ==
|
12865
|
+
if (params->type == GGML_TASK_TYPE_INIT) {
|
12702
12866
|
if (ith != 0) {
|
12703
12867
|
return;
|
12704
12868
|
}
|
@@ -12738,7 +12902,7 @@ static void ggml_compute_forward_conv_transpose_1d_f16_f32(
|
|
12738
12902
|
return;
|
12739
12903
|
}
|
12740
12904
|
|
12741
|
-
if (params->type ==
|
12905
|
+
if (params->type == GGML_TASK_TYPE_FINALIZE) {
|
12742
12906
|
return;
|
12743
12907
|
}
|
12744
12908
|
|
@@ -12797,7 +12961,7 @@ static void ggml_compute_forward_conv_transpose_1d_f32(
|
|
12797
12961
|
GGML_ASSERT(nb00 == sizeof(float));
|
12798
12962
|
GGML_ASSERT(nb10 == sizeof(float));
|
12799
12963
|
|
12800
|
-
if (params->type ==
|
12964
|
+
if (params->type == GGML_TASK_TYPE_INIT) {
|
12801
12965
|
if (ith != 0) {
|
12802
12966
|
return;
|
12803
12967
|
}
|
@@ -12837,7 +13001,7 @@ static void ggml_compute_forward_conv_transpose_1d_f32(
|
|
12837
13001
|
return;
|
12838
13002
|
}
|
12839
13003
|
|
12840
|
-
if (params->type ==
|
13004
|
+
if (params->type == GGML_TASK_TYPE_FINALIZE) {
|
12841
13005
|
return;
|
12842
13006
|
}
|
12843
13007
|
|
@@ -12941,11 +13105,11 @@ static void ggml_compute_forward_im2col_f32(
|
|
12941
13105
|
GGML_ASSERT(nb00 == sizeof(ggml_fp16_t));
|
12942
13106
|
GGML_ASSERT(nb10 == sizeof(float));
|
12943
13107
|
|
12944
|
-
if (params->type ==
|
13108
|
+
if (params->type == GGML_TASK_TYPE_INIT) {
|
12945
13109
|
return;
|
12946
13110
|
}
|
12947
13111
|
|
12948
|
-
if (params->type ==
|
13112
|
+
if (params->type == GGML_TASK_TYPE_FINALIZE) {
|
12949
13113
|
return;
|
12950
13114
|
}
|
12951
13115
|
|
@@ -13029,11 +13193,11 @@ static void ggml_compute_forward_im2col_f16(
|
|
13029
13193
|
GGML_ASSERT(nb00 == sizeof(ggml_fp16_t));
|
13030
13194
|
GGML_ASSERT(nb10 == sizeof(float));
|
13031
13195
|
|
13032
|
-
if (params->type ==
|
13196
|
+
if (params->type == GGML_TASK_TYPE_INIT) {
|
13033
13197
|
return;
|
13034
13198
|
}
|
13035
13199
|
|
13036
|
-
if (params->type ==
|
13200
|
+
if (params->type == GGML_TASK_TYPE_FINALIZE) {
|
13037
13201
|
return;
|
13038
13202
|
}
|
13039
13203
|
|
@@ -13115,7 +13279,7 @@ static void ggml_compute_forward_conv_transpose_2d(
|
|
13115
13279
|
GGML_ASSERT(nb00 == sizeof(ggml_fp16_t));
|
13116
13280
|
GGML_ASSERT(nb10 == sizeof(float));
|
13117
13281
|
|
13118
|
-
if (params->type ==
|
13282
|
+
if (params->type == GGML_TASK_TYPE_INIT) {
|
13119
13283
|
if (ith != 0) {
|
13120
13284
|
return;
|
13121
13285
|
}
|
@@ -13157,7 +13321,7 @@ static void ggml_compute_forward_conv_transpose_2d(
|
|
13157
13321
|
return;
|
13158
13322
|
}
|
13159
13323
|
|
13160
|
-
if (params->type ==
|
13324
|
+
if (params->type == GGML_TASK_TYPE_FINALIZE) {
|
13161
13325
|
return;
|
13162
13326
|
}
|
13163
13327
|
|
@@ -13209,7 +13373,7 @@ static void ggml_compute_forward_pool_1d_sk_p0(
|
|
13209
13373
|
assert(src->type == GGML_TYPE_F32);
|
13210
13374
|
assert(params->ith == 0);
|
13211
13375
|
|
13212
|
-
if (params->type ==
|
13376
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
13213
13377
|
return;
|
13214
13378
|
}
|
13215
13379
|
|
@@ -13278,7 +13442,7 @@ static void ggml_compute_forward_pool_2d(
|
|
13278
13442
|
GGML_ASSERT(src->type == GGML_TYPE_F32);
|
13279
13443
|
GGML_ASSERT(params->ith == 0);
|
13280
13444
|
|
13281
|
-
if (params->type ==
|
13445
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
13282
13446
|
return;
|
13283
13447
|
}
|
13284
13448
|
|
@@ -13351,7 +13515,7 @@ static void ggml_compute_forward_upscale_f32(
|
|
13351
13515
|
|
13352
13516
|
const struct ggml_tensor * src0 = dst->src[0];
|
13353
13517
|
|
13354
|
-
if (params->type ==
|
13518
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
13355
13519
|
return;
|
13356
13520
|
}
|
13357
13521
|
|
@@ -13411,7 +13575,7 @@ static void ggml_compute_forward_pad_f32(
|
|
13411
13575
|
|
13412
13576
|
const struct ggml_tensor * src0 = dst->src[0];
|
13413
13577
|
|
13414
|
-
if (params->type ==
|
13578
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
13415
13579
|
return;
|
13416
13580
|
}
|
13417
13581
|
|
@@ -13464,6 +13628,106 @@ static void ggml_compute_forward_pad(
|
|
13464
13628
|
}
|
13465
13629
|
}
|
13466
13630
|
|
13631
|
+
|
13632
|
+
// ggml_compute_forward_arange
|
13633
|
+
|
13634
|
+
static void ggml_compute_forward_arange_f32(
|
13635
|
+
const struct ggml_compute_params * params,
|
13636
|
+
struct ggml_tensor * dst) {
|
13637
|
+
|
13638
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
13639
|
+
return;
|
13640
|
+
}
|
13641
|
+
|
13642
|
+
GGML_ASSERT(dst->nb[0] == sizeof(float));
|
13643
|
+
|
13644
|
+
const int ith = params->ith;
|
13645
|
+
const int nth = params->nth;
|
13646
|
+
|
13647
|
+
const float start = ggml_get_op_params_f32(dst, 0);
|
13648
|
+
const float stop = ggml_get_op_params_f32(dst, 1);
|
13649
|
+
const float step = ggml_get_op_params_f32(dst, 2);
|
13650
|
+
|
13651
|
+
const int64_t steps = (int64_t) ceilf((stop - start) / step);
|
13652
|
+
|
13653
|
+
GGML_ASSERT(ggml_nelements(dst) == steps);
|
13654
|
+
|
13655
|
+
for (int64_t i = ith; i < steps; i+= nth) {
|
13656
|
+
float value = start + step * i;
|
13657
|
+
((float *)dst->data)[i] = value;
|
13658
|
+
}
|
13659
|
+
}
|
13660
|
+
|
13661
|
+
static void ggml_compute_forward_arange(
|
13662
|
+
const struct ggml_compute_params * params,
|
13663
|
+
struct ggml_tensor * dst) {
|
13664
|
+
switch (dst->type) {
|
13665
|
+
case GGML_TYPE_F32:
|
13666
|
+
{
|
13667
|
+
ggml_compute_forward_arange_f32(params, dst);
|
13668
|
+
} break;
|
13669
|
+
default:
|
13670
|
+
{
|
13671
|
+
GGML_ASSERT(false);
|
13672
|
+
} break;
|
13673
|
+
}
|
13674
|
+
}
|
13675
|
+
|
13676
|
+
static void ggml_compute_forward_timestep_embedding_f32(
|
13677
|
+
const struct ggml_compute_params * params,
|
13678
|
+
struct ggml_tensor * dst) {
|
13679
|
+
|
13680
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
13681
|
+
return;
|
13682
|
+
}
|
13683
|
+
|
13684
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
13685
|
+
|
13686
|
+
GGML_ASSERT(src0->nb[0] == sizeof(float));
|
13687
|
+
|
13688
|
+
const int ith = params->ith;
|
13689
|
+
const int nth = params->nth;
|
13690
|
+
|
13691
|
+
GGML_TENSOR_UNARY_OP_LOCALS
|
13692
|
+
|
13693
|
+
const int dim = ggml_get_op_params_i32(dst, 0);
|
13694
|
+
const int max_period = ggml_get_op_params_i32(dst, 1);
|
13695
|
+
|
13696
|
+
int half = dim / 2;
|
13697
|
+
|
13698
|
+
for (int64_t i = 0; i < ne00; i++) {
|
13699
|
+
float * embed_data = (float *)((char *) dst->data + i*nb1);
|
13700
|
+
for (int64_t j = ith; j < half; j += nth) {
|
13701
|
+
float timestep = ((float *)src0->data)[i];
|
13702
|
+
float freq = (float)expf(-logf(max_period) * j / half);
|
13703
|
+
float arg = timestep * freq;
|
13704
|
+
embed_data[j] = cosf(arg);
|
13705
|
+
embed_data[j + half] = sinf(arg);
|
13706
|
+
}
|
13707
|
+
if (dim % 2 != 0 && ith == 0) {
|
13708
|
+
embed_data[dim] = 0.f;
|
13709
|
+
}
|
13710
|
+
}
|
13711
|
+
}
|
13712
|
+
|
13713
|
+
static void ggml_compute_forward_timestep_embedding(
|
13714
|
+
const struct ggml_compute_params * params,
|
13715
|
+
struct ggml_tensor * dst) {
|
13716
|
+
|
13717
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
13718
|
+
|
13719
|
+
switch (src0->type) {
|
13720
|
+
case GGML_TYPE_F32:
|
13721
|
+
{
|
13722
|
+
ggml_compute_forward_timestep_embedding_f32(params, dst);
|
13723
|
+
} break;
|
13724
|
+
default:
|
13725
|
+
{
|
13726
|
+
GGML_ASSERT(false);
|
13727
|
+
} break;
|
13728
|
+
}
|
13729
|
+
}
|
13730
|
+
|
13467
13731
|
// ggml_compute_forward_argsort
|
13468
13732
|
|
13469
13733
|
static void ggml_compute_forward_argsort_f32(
|
@@ -13472,7 +13736,7 @@ static void ggml_compute_forward_argsort_f32(
|
|
13472
13736
|
|
13473
13737
|
const struct ggml_tensor * src0 = dst->src[0];
|
13474
13738
|
|
13475
|
-
if (params->type ==
|
13739
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
13476
13740
|
return;
|
13477
13741
|
}
|
13478
13742
|
|
@@ -13498,8 +13762,8 @@ static void ggml_compute_forward_argsort_f32(
|
|
13498
13762
|
// C doesn't have a functional sort, so we do a bubble sort instead
|
13499
13763
|
for (int64_t j = 0; j < ne0; j++) {
|
13500
13764
|
for (int64_t k = j + 1; k < ne0; k++) {
|
13501
|
-
if ((order ==
|
13502
|
-
(order ==
|
13765
|
+
if ((order == GGML_SORT_ORDER_ASC && src_data[dst_data[j]] > src_data[dst_data[k]]) ||
|
13766
|
+
(order == GGML_SORT_ORDER_DESC && src_data[dst_data[j]] < src_data[dst_data[k]])) {
|
13503
13767
|
int32_t tmp = dst_data[j];
|
13504
13768
|
dst_data[j] = dst_data[k];
|
13505
13769
|
dst_data[k] = tmp;
|
@@ -13582,11 +13846,11 @@ static void ggml_compute_forward_flash_attn_f32(
|
|
13582
13846
|
GGML_ASSERT(nb1 <= nb2);
|
13583
13847
|
GGML_ASSERT(nb2 <= nb3);
|
13584
13848
|
|
13585
|
-
if (params->type ==
|
13849
|
+
if (params->type == GGML_TASK_TYPE_INIT) {
|
13586
13850
|
return;
|
13587
13851
|
}
|
13588
13852
|
|
13589
|
-
if (params->type ==
|
13853
|
+
if (params->type == GGML_TASK_TYPE_FINALIZE) {
|
13590
13854
|
return;
|
13591
13855
|
}
|
13592
13856
|
|
@@ -13774,11 +14038,11 @@ static void ggml_compute_forward_flash_attn_f16(
|
|
13774
14038
|
GGML_ASSERT(nb1 <= nb2);
|
13775
14039
|
GGML_ASSERT(nb2 <= nb3);
|
13776
14040
|
|
13777
|
-
if (params->type ==
|
14041
|
+
if (params->type == GGML_TASK_TYPE_INIT) {
|
13778
14042
|
return;
|
13779
14043
|
}
|
13780
14044
|
|
13781
|
-
if (params->type ==
|
14045
|
+
if (params->type == GGML_TASK_TYPE_FINALIZE) {
|
13782
14046
|
return;
|
13783
14047
|
}
|
13784
14048
|
|
@@ -14033,11 +14297,11 @@ static void ggml_compute_forward_flash_ff_f16(
|
|
14033
14297
|
GGML_ASSERT(nb1 <= nb2);
|
14034
14298
|
GGML_ASSERT(nb2 <= nb3);
|
14035
14299
|
|
14036
|
-
if (params->type ==
|
14300
|
+
if (params->type == GGML_TASK_TYPE_INIT) {
|
14037
14301
|
return;
|
14038
14302
|
}
|
14039
14303
|
|
14040
|
-
if (params->type ==
|
14304
|
+
if (params->type == GGML_TASK_TYPE_FINALIZE) {
|
14041
14305
|
return;
|
14042
14306
|
}
|
14043
14307
|
|
@@ -14192,14 +14456,14 @@ static void ggml_compute_forward_flash_attn_back_f32(
|
|
14192
14456
|
GGML_ASSERT(nb1 <= nb2);
|
14193
14457
|
GGML_ASSERT(nb2 <= nb3);
|
14194
14458
|
|
14195
|
-
if (params->type ==
|
14459
|
+
if (params->type == GGML_TASK_TYPE_INIT) {
|
14196
14460
|
if (ith == 0) {
|
14197
14461
|
memset(dst->data, 0, nb0*ne0*ne1*ne2*ne3);
|
14198
14462
|
}
|
14199
14463
|
return;
|
14200
14464
|
}
|
14201
14465
|
|
14202
|
-
if (params->type ==
|
14466
|
+
if (params->type == GGML_TASK_TYPE_FINALIZE) {
|
14203
14467
|
return;
|
14204
14468
|
}
|
14205
14469
|
|
@@ -14515,7 +14779,7 @@ static void ggml_compute_forward_win_part_f32(
|
|
14515
14779
|
|
14516
14780
|
const struct ggml_tensor * src0 = dst->src[0];
|
14517
14781
|
|
14518
|
-
if (params->type ==
|
14782
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
14519
14783
|
return;
|
14520
14784
|
}
|
14521
14785
|
|
@@ -14581,7 +14845,7 @@ static void ggml_compute_forward_win_unpart_f32(
|
|
14581
14845
|
|
14582
14846
|
const struct ggml_tensor * src0 = dst->src[0];
|
14583
14847
|
|
14584
|
-
if (params->type ==
|
14848
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
14585
14849
|
return;
|
14586
14850
|
}
|
14587
14851
|
|
@@ -14709,7 +14973,7 @@ static void ggml_compute_forward_get_rel_pos_f16(
|
|
14709
14973
|
|
14710
14974
|
const struct ggml_tensor * src0 = dst->src[0];
|
14711
14975
|
|
14712
|
-
if (params->type ==
|
14976
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
14713
14977
|
return;
|
14714
14978
|
}
|
14715
14979
|
|
@@ -14761,14 +15025,14 @@ static void ggml_compute_forward_add_rel_pos_f32(
|
|
14761
15025
|
const struct ggml_tensor * src2 = dst->src[2];
|
14762
15026
|
|
14763
15027
|
const bool inplace = (bool) ((int32_t *) dst->op_params)[0];
|
14764
|
-
if (!inplace && params->type ==
|
15028
|
+
if (!inplace && params->type == GGML_TASK_TYPE_INIT) {
|
14765
15029
|
if (params->ith != 0) {
|
14766
15030
|
return;
|
14767
15031
|
}
|
14768
15032
|
memcpy((char *) dst->data, (char *) src0->data, ggml_nbytes(dst));
|
14769
15033
|
return;
|
14770
15034
|
}
|
14771
|
-
if (params->type ==
|
15035
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
14772
15036
|
return;
|
14773
15037
|
}
|
14774
15038
|
|
@@ -14850,7 +15114,7 @@ static void ggml_compute_forward_map_unary_f32(
|
|
14850
15114
|
|
14851
15115
|
GGML_ASSERT(ggml_are_same_shape(src0, dst));
|
14852
15116
|
|
14853
|
-
if (params->type ==
|
15117
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
14854
15118
|
return;
|
14855
15119
|
}
|
14856
15120
|
|
@@ -14899,7 +15163,7 @@ static void ggml_compute_forward_map_binary_f32(
|
|
14899
15163
|
assert(params->ith == 0);
|
14900
15164
|
assert(ggml_are_same_shape(src0, src1) && ggml_are_same_shape(src0, dst));
|
14901
15165
|
|
14902
|
-
if (params->type ==
|
15166
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
14903
15167
|
return;
|
14904
15168
|
}
|
14905
15169
|
|
@@ -14948,7 +15212,7 @@ static void ggml_compute_forward_map_custom1_f32(
|
|
14948
15212
|
|
14949
15213
|
assert(params->ith == 0);
|
14950
15214
|
|
14951
|
-
if (params->type ==
|
15215
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
14952
15216
|
return;
|
14953
15217
|
}
|
14954
15218
|
|
@@ -14967,7 +15231,7 @@ static void ggml_compute_forward_map_custom2_f32(
|
|
14967
15231
|
|
14968
15232
|
assert(params->ith == 0);
|
14969
15233
|
|
14970
|
-
if (params->type ==
|
15234
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
14971
15235
|
return;
|
14972
15236
|
}
|
14973
15237
|
|
@@ -14987,7 +15251,7 @@ static void ggml_compute_forward_map_custom3_f32(
|
|
14987
15251
|
|
14988
15252
|
assert(params->ith == 0);
|
14989
15253
|
|
14990
|
-
if (params->type ==
|
15254
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
14991
15255
|
return;
|
14992
15256
|
}
|
14993
15257
|
|
@@ -15002,13 +15266,14 @@ static void ggml_compute_forward_map_custom1(
|
|
15002
15266
|
|
15003
15267
|
const struct ggml_tensor * a = dst->src[0];
|
15004
15268
|
|
15005
|
-
if (params->type ==
|
15269
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
15006
15270
|
return;
|
15007
15271
|
}
|
15008
15272
|
|
15009
|
-
struct ggml_map_custom1_op_params
|
15273
|
+
struct ggml_map_custom1_op_params p;
|
15274
|
+
memcpy(&p, dst->op_params, sizeof(p));
|
15010
15275
|
|
15011
|
-
p
|
15276
|
+
p.fun(dst, a, params->ith, params->nth, p.userdata);
|
15012
15277
|
}
|
15013
15278
|
|
15014
15279
|
// ggml_compute_forward_map_custom2
|
@@ -15020,13 +15285,14 @@ static void ggml_compute_forward_map_custom2(
|
|
15020
15285
|
const struct ggml_tensor * a = dst->src[0];
|
15021
15286
|
const struct ggml_tensor * b = dst->src[1];
|
15022
15287
|
|
15023
|
-
if (params->type ==
|
15288
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
15024
15289
|
return;
|
15025
15290
|
}
|
15026
15291
|
|
15027
|
-
struct ggml_map_custom2_op_params
|
15292
|
+
struct ggml_map_custom2_op_params p;
|
15293
|
+
memcpy(&p, dst->op_params, sizeof(p));
|
15028
15294
|
|
15029
|
-
p
|
15295
|
+
p.fun(dst, a, b, params->ith, params->nth, p.userdata);
|
15030
15296
|
}
|
15031
15297
|
|
15032
15298
|
// ggml_compute_forward_map_custom3
|
@@ -15039,13 +15305,14 @@ static void ggml_compute_forward_map_custom3(
|
|
15039
15305
|
const struct ggml_tensor * b = dst->src[1];
|
15040
15306
|
const struct ggml_tensor * c = dst->src[2];
|
15041
15307
|
|
15042
|
-
if (params->type ==
|
15308
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
15043
15309
|
return;
|
15044
15310
|
}
|
15045
15311
|
|
15046
|
-
struct ggml_map_custom3_op_params
|
15312
|
+
struct ggml_map_custom3_op_params p;
|
15313
|
+
memcpy(&p, dst->op_params, sizeof(p));
|
15047
15314
|
|
15048
|
-
p
|
15315
|
+
p.fun(dst, a, b, c, params->ith, params->nth, p.userdata);
|
15049
15316
|
}
|
15050
15317
|
|
15051
15318
|
// ggml_compute_forward_cross_entropy_loss
|
@@ -15073,14 +15340,14 @@ static void ggml_compute_forward_cross_entropy_loss_f32(
|
|
15073
15340
|
|
15074
15341
|
GGML_ASSERT(params->wsize >= sizeof(float) * (nth + nth * nc));
|
15075
15342
|
|
15076
|
-
if (params->type ==
|
15343
|
+
if (params->type == GGML_TASK_TYPE_INIT) {
|
15077
15344
|
if (ith == 0) {
|
15078
15345
|
memset(sums, 0, sizeof(float) * (nth + nth * nc));
|
15079
15346
|
}
|
15080
15347
|
return;
|
15081
15348
|
}
|
15082
15349
|
|
15083
|
-
if (params->type ==
|
15350
|
+
if (params->type == GGML_TASK_TYPE_FINALIZE) {
|
15084
15351
|
if (ith == 0) {
|
15085
15352
|
float * dp = (float *) dst->data;
|
15086
15353
|
ggml_vec_sum_f32(nth, dp, sums);
|
@@ -15195,7 +15462,7 @@ static void ggml_compute_forward_cross_entropy_loss_back_f32(
|
|
15195
15462
|
const int64_t ith = params->ith;
|
15196
15463
|
const int64_t nth = params->nth;
|
15197
15464
|
|
15198
|
-
if (params->type ==
|
15465
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
15199
15466
|
return;
|
15200
15467
|
}
|
15201
15468
|
|
@@ -15302,8 +15569,8 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm
|
|
15302
15569
|
if (skip_cpu) {
|
15303
15570
|
return;
|
15304
15571
|
}
|
15305
|
-
GGML_ASSERT(tensor->src[0] == NULL || tensor->src[0]->backend ==
|
15306
|
-
GGML_ASSERT(tensor->src[1] == NULL || tensor->src[1]->backend ==
|
15572
|
+
GGML_ASSERT(tensor->src[0] == NULL || tensor->src[0]->backend == GGML_BACKEND_TYPE_CPU);
|
15573
|
+
GGML_ASSERT(tensor->src[1] == NULL || tensor->src[1]->backend == GGML_BACKEND_TYPE_CPU);
|
15307
15574
|
#elif defined(GGML_USE_VULKAN)
|
15308
15575
|
const bool skip_cpu = ggml_vk_compute_forward_cpu_assist(params, tensor);
|
15309
15576
|
#ifdef GGML_VULKAN_CHECK_RESULTS
|
@@ -15314,8 +15581,8 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm
|
|
15314
15581
|
if (skip_cpu) {
|
15315
15582
|
return;
|
15316
15583
|
}
|
15317
|
-
GGML_ASSERT(tensor->src[0] == NULL || tensor->src[0]->backend ==
|
15318
|
-
GGML_ASSERT(tensor->src[1] == NULL || tensor->src[1]->backend ==
|
15584
|
+
GGML_ASSERT(tensor->src[0] == NULL || tensor->src[0]->backend == GGML_BACKEND_TYPE_CPU);
|
15585
|
+
GGML_ASSERT(tensor->src[1] == NULL || tensor->src[1]->backend == GGML_BACKEND_TYPE_CPU);
|
15319
15586
|
#endif // GGML_USE_CUBLAS
|
15320
15587
|
|
15321
15588
|
#ifdef GGML_USE_SYCL
|
@@ -15529,6 +15796,14 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm
|
|
15529
15796
|
{
|
15530
15797
|
ggml_compute_forward_pad(params, tensor);
|
15531
15798
|
} break;
|
15799
|
+
case GGML_OP_ARANGE:
|
15800
|
+
{
|
15801
|
+
ggml_compute_forward_arange(params, tensor);
|
15802
|
+
} break;
|
15803
|
+
case GGML_OP_TIMESTEP_EMBEDDING:
|
15804
|
+
{
|
15805
|
+
ggml_compute_forward_timestep_embedding(params, tensor);
|
15806
|
+
} break;
|
15532
15807
|
case GGML_OP_ARGSORT:
|
15533
15808
|
{
|
15534
15809
|
ggml_compute_forward_argsort(params, tensor);
|
@@ -16531,6 +16806,14 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor
|
|
16531
16806
|
{
|
16532
16807
|
GGML_ASSERT(false); // TODO: not implemented
|
16533
16808
|
} break;
|
16809
|
+
case GGML_OP_ARANGE:
|
16810
|
+
{
|
16811
|
+
GGML_ASSERT(false); // TODO: not implemented
|
16812
|
+
} break;
|
16813
|
+
case GGML_OP_TIMESTEP_EMBEDDING:
|
16814
|
+
{
|
16815
|
+
GGML_ASSERT(false); // TODO: not implemented
|
16816
|
+
} break;
|
16534
16817
|
case GGML_OP_ARGSORT:
|
16535
16818
|
{
|
16536
16819
|
GGML_ASSERT(false); // TODO: not implemented
|
@@ -16861,7 +17144,7 @@ size_t ggml_graph_overhead(void) {
|
|
16861
17144
|
|
16862
17145
|
struct ggml_cgraph * ggml_new_graph_custom(struct ggml_context * ctx, size_t size, bool grads) {
|
16863
17146
|
const size_t obj_size = ggml_graph_nbytes(size, grads);
|
16864
|
-
struct ggml_object * obj = ggml_new_object(ctx,
|
17147
|
+
struct ggml_object * obj = ggml_new_object(ctx, GGML_OBJECT_TYPE_GRAPH, obj_size);
|
16865
17148
|
struct ggml_cgraph * cgraph = (struct ggml_cgraph *) ((char *) ctx->mem_buffer + obj->offs);
|
16866
17149
|
|
16867
17150
|
struct ggml_tensor ** data_start = (struct ggml_tensor **) (cgraph + 1);
|
@@ -17131,6 +17414,7 @@ struct ggml_compute_state {
|
|
17131
17414
|
ggml_thread_t thrd;
|
17132
17415
|
int ith;
|
17133
17416
|
struct ggml_compute_state_shared * shared;
|
17417
|
+
enum ggml_status ec;
|
17134
17418
|
};
|
17135
17419
|
|
17136
17420
|
static void ggml_graph_compute_perf_stats_node(struct ggml_tensor * node, const struct ggml_compute_state_shared * st) {
|
@@ -17282,6 +17566,14 @@ static int ggml_get_n_tasks(struct ggml_tensor * node, int n_threads) {
|
|
17282
17566
|
{
|
17283
17567
|
n_tasks = n_threads;
|
17284
17568
|
} break;
|
17569
|
+
case GGML_OP_ARANGE:
|
17570
|
+
{
|
17571
|
+
n_tasks = n_threads;
|
17572
|
+
} break;
|
17573
|
+
case GGML_OP_TIMESTEP_EMBEDDING:
|
17574
|
+
{
|
17575
|
+
n_tasks = n_threads;
|
17576
|
+
} break;
|
17285
17577
|
case GGML_OP_ARGSORT:
|
17286
17578
|
{
|
17287
17579
|
n_tasks = n_threads;
|
@@ -17311,29 +17603,32 @@ static int ggml_get_n_tasks(struct ggml_tensor * node, int n_threads) {
|
|
17311
17603
|
} break;
|
17312
17604
|
case GGML_OP_MAP_CUSTOM1:
|
17313
17605
|
{
|
17314
|
-
struct ggml_map_custom1_op_params
|
17315
|
-
|
17606
|
+
struct ggml_map_custom1_op_params p;
|
17607
|
+
memcpy(&p, node->op_params, sizeof(p));
|
17608
|
+
if (p.n_tasks == GGML_N_TASKS_MAX) {
|
17316
17609
|
n_tasks = n_threads;
|
17317
17610
|
} else {
|
17318
|
-
n_tasks = MIN(p
|
17611
|
+
n_tasks = MIN(p.n_tasks, n_threads);
|
17319
17612
|
}
|
17320
17613
|
} break;
|
17321
17614
|
case GGML_OP_MAP_CUSTOM2:
|
17322
17615
|
{
|
17323
|
-
struct ggml_map_custom2_op_params
|
17324
|
-
|
17616
|
+
struct ggml_map_custom2_op_params p;
|
17617
|
+
memcpy(&p, node->op_params, sizeof(p));
|
17618
|
+
if (p.n_tasks == GGML_N_TASKS_MAX) {
|
17325
17619
|
n_tasks = n_threads;
|
17326
17620
|
} else {
|
17327
|
-
n_tasks = MIN(p
|
17621
|
+
n_tasks = MIN(p.n_tasks, n_threads);
|
17328
17622
|
}
|
17329
17623
|
} break;
|
17330
17624
|
case GGML_OP_MAP_CUSTOM3:
|
17331
17625
|
{
|
17332
|
-
struct ggml_map_custom3_op_params
|
17333
|
-
|
17626
|
+
struct ggml_map_custom3_op_params p;
|
17627
|
+
memcpy(&p, node->op_params, sizeof(p));
|
17628
|
+
if (p.n_tasks == GGML_N_TASKS_MAX) {
|
17334
17629
|
n_tasks = n_threads;
|
17335
17630
|
} else {
|
17336
|
-
n_tasks = MIN(p
|
17631
|
+
n_tasks = MIN(p.n_tasks, n_threads);
|
17337
17632
|
}
|
17338
17633
|
} break;
|
17339
17634
|
case GGML_OP_CROSS_ENTROPY_LOSS:
|
@@ -17408,19 +17703,20 @@ static thread_ret_t ggml_graph_compute_thread(void * data) {
|
|
17408
17703
|
set_numa_thread_affinity(state->ith);
|
17409
17704
|
|
17410
17705
|
int node_n = -1;
|
17411
|
-
int task_phase =
|
17706
|
+
int task_phase = GGML_TASK_TYPE_FINALIZE;
|
17412
17707
|
|
17413
17708
|
while (true) {
|
17414
17709
|
if (cplan->abort_callback && cplan->abort_callback(cplan->abort_callback_data)) {
|
17415
17710
|
state->shared->node_n += 1;
|
17416
|
-
|
17711
|
+
state->ec = GGML_STATUS_ABORTED;
|
17712
|
+
return 0;
|
17417
17713
|
}
|
17418
17714
|
|
17419
17715
|
if (atomic_fetch_sub(&state->shared->n_active, 1) == 1) {
|
17420
17716
|
// all other threads are finished and spinning
|
17421
17717
|
// do finalize and init here so we don't have synchronize again
|
17422
17718
|
struct ggml_compute_params params = {
|
17423
|
-
/*.type =*/
|
17719
|
+
/*.type =*/ GGML_TASK_TYPE_FINALIZE,
|
17424
17720
|
/*.ith =*/ 0,
|
17425
17721
|
/*.nth =*/ 0,
|
17426
17722
|
/*.wsize =*/ cplan->work_size,
|
@@ -17451,17 +17747,17 @@ static thread_ret_t ggml_graph_compute_thread(void * data) {
|
|
17451
17747
|
if (n_tasks == 1) {
|
17452
17748
|
/* INIT */
|
17453
17749
|
if (GGML_OP_HAS_INIT[node->op]) {
|
17454
|
-
params.type =
|
17750
|
+
params.type = GGML_TASK_TYPE_INIT;
|
17455
17751
|
ggml_compute_forward(¶ms, node);
|
17456
17752
|
}
|
17457
17753
|
|
17458
17754
|
// TODO: maybe push node_n to the atomic but if other threads see n_tasks is 1,
|
17459
17755
|
// they do something more efficient than spinning (?)
|
17460
|
-
params.type =
|
17756
|
+
params.type = GGML_TASK_TYPE_COMPUTE;
|
17461
17757
|
ggml_compute_forward(¶ms, node);
|
17462
17758
|
|
17463
17759
|
if (GGML_OP_HAS_FINALIZE[node->op]) {
|
17464
|
-
params.type =
|
17760
|
+
params.type = GGML_TASK_TYPE_FINALIZE;
|
17465
17761
|
ggml_compute_forward(¶ms, node);
|
17466
17762
|
}
|
17467
17763
|
|
@@ -17475,7 +17771,7 @@ static thread_ret_t ggml_graph_compute_thread(void * data) {
|
|
17475
17771
|
}
|
17476
17772
|
}
|
17477
17773
|
|
17478
|
-
task_phase =
|
17774
|
+
task_phase = GGML_TASK_TYPE_INIT;
|
17479
17775
|
atomic_store(&state->shared->n_active, n_threads);
|
17480
17776
|
atomic_store(&state->shared->node_n, node_n);
|
17481
17777
|
atomic_store(&state->shared->node_task, task_phase);
|
@@ -17492,7 +17788,7 @@ static thread_ret_t ggml_graph_compute_thread(void * data) {
|
|
17492
17788
|
const int n_tasks = ggml_get_n_tasks(node, n_threads);
|
17493
17789
|
|
17494
17790
|
struct ggml_compute_params params = {
|
17495
|
-
/*.type =*/
|
17791
|
+
/*.type =*/ GGML_TASK_TYPE_INIT,
|
17496
17792
|
/*.ith =*/ state->ith,
|
17497
17793
|
/*.nth =*/ n_tasks,
|
17498
17794
|
/*.wsize =*/ cplan->work_size,
|
@@ -17506,7 +17802,7 @@ static thread_ret_t ggml_graph_compute_thread(void * data) {
|
|
17506
17802
|
}
|
17507
17803
|
|
17508
17804
|
if (atomic_fetch_sub(&state->shared->n_active, 1) == 1) {
|
17509
|
-
task_phase =
|
17805
|
+
task_phase = GGML_TASK_TYPE_COMPUTE;
|
17510
17806
|
atomic_store(&state->shared->n_active, n_threads);
|
17511
17807
|
atomic_store(&state->shared->node_task, task_phase);
|
17512
17808
|
}
|
@@ -17521,12 +17817,12 @@ static thread_ret_t ggml_graph_compute_thread(void * data) {
|
|
17521
17817
|
}
|
17522
17818
|
|
17523
17819
|
if (state->ith < n_tasks) {
|
17524
|
-
params.type =
|
17820
|
+
params.type = GGML_TASK_TYPE_COMPUTE;
|
17525
17821
|
ggml_compute_forward(¶ms, node);
|
17526
17822
|
}
|
17527
17823
|
|
17528
17824
|
if (atomic_fetch_sub(&state->shared->n_active, 1) == 1) {
|
17529
|
-
task_phase =
|
17825
|
+
task_phase = GGML_TASK_TYPE_FINALIZE;
|
17530
17826
|
atomic_store(&state->shared->n_active, n_threads);
|
17531
17827
|
atomic_store(&state->shared->node_task, task_phase);
|
17532
17828
|
}
|
@@ -17535,7 +17831,7 @@ static thread_ret_t ggml_graph_compute_thread(void * data) {
|
|
17535
17831
|
}
|
17536
17832
|
}
|
17537
17833
|
|
17538
|
-
return
|
17834
|
+
return 0;
|
17539
17835
|
}
|
17540
17836
|
|
17541
17837
|
struct ggml_cplan ggml_graph_plan(const struct ggml_cgraph * cgraph, int n_threads) {
|
@@ -17731,7 +18027,7 @@ struct ggml_cplan ggml_graph_plan(const struct ggml_cgraph * cgraph, int n_threa
|
|
17731
18027
|
return cplan;
|
17732
18028
|
}
|
17733
18029
|
|
17734
|
-
|
18030
|
+
enum ggml_status ggml_graph_compute(struct ggml_cgraph * cgraph, struct ggml_cplan * cplan) {
|
17735
18031
|
{
|
17736
18032
|
GGML_ASSERT(cplan);
|
17737
18033
|
GGML_ASSERT(cplan->n_threads > 0);
|
@@ -17762,7 +18058,7 @@ int ggml_graph_compute(struct ggml_cgraph * cgraph, struct ggml_cplan * cplan) {
|
|
17762
18058
|
/*.n_threads =*/ n_threads,
|
17763
18059
|
/*.n_active =*/ n_threads,
|
17764
18060
|
/*.node_n =*/ -1,
|
17765
|
-
/*.node_task =*/
|
18061
|
+
/*.node_task =*/ GGML_TASK_TYPE_FINALIZE,
|
17766
18062
|
/*.abort_callback =*/ NULL,
|
17767
18063
|
/*.abort_callback_data =*/ NULL,
|
17768
18064
|
};
|
@@ -17775,6 +18071,7 @@ int ggml_graph_compute(struct ggml_cgraph * cgraph, struct ggml_cplan * cplan) {
|
|
17775
18071
|
.thrd = 0,
|
17776
18072
|
.ith = j,
|
17777
18073
|
.shared = &state_shared,
|
18074
|
+
.ec = GGML_STATUS_SUCCESS,
|
17778
18075
|
};
|
17779
18076
|
|
17780
18077
|
const int rc = ggml_thread_create(&workers[j].thrd, NULL, ggml_graph_compute_thread, &workers[j]);
|
@@ -17785,12 +18082,14 @@ int ggml_graph_compute(struct ggml_cgraph * cgraph, struct ggml_cplan * cplan) {
|
|
17785
18082
|
|
17786
18083
|
workers[0].ith = 0;
|
17787
18084
|
workers[0].shared = &state_shared;
|
18085
|
+
workers[0].ec = GGML_STATUS_SUCCESS;
|
17788
18086
|
|
17789
18087
|
const int64_t perf_start_cycles = ggml_perf_cycles();
|
17790
18088
|
const int64_t perf_start_time_us = ggml_perf_time_us();
|
17791
18089
|
|
17792
18090
|
// this is a work thread too
|
17793
|
-
|
18091
|
+
ggml_graph_compute_thread(&workers[0]);
|
18092
|
+
enum ggml_status compute_status = workers[0].ec;
|
17794
18093
|
|
17795
18094
|
// don't leave affinity set on the main thread
|
17796
18095
|
clear_numa_thread_affinity();
|
@@ -17800,6 +18099,8 @@ int ggml_graph_compute(struct ggml_cgraph * cgraph, struct ggml_cplan * cplan) {
|
|
17800
18099
|
for (int j = 1; j < n_threads; j++) {
|
17801
18100
|
const int rc = ggml_thread_join(workers[j].thrd, NULL);
|
17802
18101
|
GGML_ASSERT(rc == 0);
|
18102
|
+
if (workers[j].ec != GGML_STATUS_SUCCESS)
|
18103
|
+
compute_status = workers[j].ec;
|
17803
18104
|
}
|
17804
18105
|
}
|
17805
18106
|
|
@@ -17827,14 +18128,14 @@ int ggml_graph_compute(struct ggml_cgraph * cgraph, struct ggml_cplan * cplan) {
|
|
17827
18128
|
return compute_status;
|
17828
18129
|
}
|
17829
18130
|
|
17830
|
-
|
18131
|
+
enum ggml_status ggml_graph_compute_with_ctx(struct ggml_context * ctx, struct ggml_cgraph * cgraph, int n_threads) {
|
17831
18132
|
struct ggml_cplan cplan = ggml_graph_plan(cgraph, n_threads);
|
17832
18133
|
|
17833
|
-
struct ggml_object * obj = ggml_new_object(ctx,
|
18134
|
+
struct ggml_object * obj = ggml_new_object(ctx, GGML_OBJECT_TYPE_WORK_BUFFER, cplan.work_size);
|
17834
18135
|
|
17835
18136
|
cplan.work_data = (uint8_t *)ctx->mem_buffer + obj->offs;
|
17836
18137
|
|
17837
|
-
ggml_graph_compute(cgraph, &cplan);
|
18138
|
+
return ggml_graph_compute(cgraph, &cplan);
|
17838
18139
|
}
|
17839
18140
|
|
17840
18141
|
struct ggml_tensor * ggml_graph_get_tensor(struct ggml_cgraph * cgraph, const char * name) {
|
@@ -18638,7 +18939,7 @@ static enum ggml_opt_result ggml_opt_adam(
|
|
18638
18939
|
float * pf = params.past > 0 ? opt->adam.pf->data : NULL; // past function values
|
18639
18940
|
|
18640
18941
|
struct ggml_cplan cplan = ggml_graph_plan(gb, params.n_threads);
|
18641
|
-
struct ggml_object * obj = ggml_new_object(ctx,
|
18942
|
+
struct ggml_object * obj = ggml_new_object(ctx, GGML_OBJECT_TYPE_WORK_BUFFER, cplan.work_size);
|
18642
18943
|
cplan.work_data = (uint8_t *)ctx->mem_buffer + obj->offs;
|
18643
18944
|
|
18644
18945
|
bool cancel = false;
|
@@ -18650,7 +18951,7 @@ static enum ggml_opt_result ggml_opt_adam(
|
|
18650
18951
|
if (callback) {
|
18651
18952
|
callback(callback_data, accum_step, &sched, &cancel);
|
18652
18953
|
if (cancel) {
|
18653
|
-
return
|
18954
|
+
return GGML_OPT_RESULT_CANCEL;
|
18654
18955
|
}
|
18655
18956
|
}
|
18656
18957
|
// ggml_graph_reset (gf);
|
@@ -18741,7 +19042,7 @@ static enum ggml_opt_result ggml_opt_adam(
|
|
18741
19042
|
if (callback) {
|
18742
19043
|
callback(callback_data, accum_step, &sched, &cancel);
|
18743
19044
|
if (cancel) {
|
18744
|
-
return
|
19045
|
+
return GGML_OPT_RESULT_CANCEL;;
|
18745
19046
|
}
|
18746
19047
|
}
|
18747
19048
|
// ggml_graph_reset (gf);
|
@@ -18758,7 +19059,7 @@ static enum ggml_opt_result ggml_opt_adam(
|
|
18758
19059
|
if (fabsf(fx - fx_prev[0])/fx < params.adam.eps_f) {
|
18759
19060
|
GGML_PRINT_DEBUG("converged\n");
|
18760
19061
|
|
18761
|
-
return
|
19062
|
+
return GGML_OPT_RESULT_OK;
|
18762
19063
|
}
|
18763
19064
|
|
18764
19065
|
// delta-based convergence test
|
@@ -18768,7 +19069,7 @@ static enum ggml_opt_result ggml_opt_adam(
|
|
18768
19069
|
const float rate = (pf[(iter0 + t)%params.past] - fx)/fx;
|
18769
19070
|
|
18770
19071
|
if (fabsf(rate) < params.delta) {
|
18771
|
-
return
|
19072
|
+
return GGML_OPT_RESULT_OK;
|
18772
19073
|
}
|
18773
19074
|
}
|
18774
19075
|
|
@@ -18784,7 +19085,7 @@ static enum ggml_opt_result ggml_opt_adam(
|
|
18784
19085
|
++n_no_improvement[0];
|
18785
19086
|
|
18786
19087
|
if (n_no_improvement[0] >= params.max_no_improvement) {
|
18787
|
-
return
|
19088
|
+
return GGML_OPT_RESULT_OK;
|
18788
19089
|
}
|
18789
19090
|
}
|
18790
19091
|
}
|
@@ -18802,7 +19103,7 @@ static enum ggml_opt_result ggml_opt_adam(
|
|
18802
19103
|
}
|
18803
19104
|
}
|
18804
19105
|
|
18805
|
-
return
|
19106
|
+
return GGML_OPT_RESULT_DID_NOT_CONVERGE;
|
18806
19107
|
}
|
18807
19108
|
|
18808
19109
|
//
|
@@ -18883,7 +19184,7 @@ static enum ggml_opt_result linesearch_backtracking(
|
|
18883
19184
|
float sched = 0;
|
18884
19185
|
callback(callback_data, accum_step, &sched, cancel);
|
18885
19186
|
if (*cancel) {
|
18886
|
-
return
|
19187
|
+
return GGML_OPT_RESULT_CANCEL;
|
18887
19188
|
}
|
18888
19189
|
}
|
18889
19190
|
// ggml_graph_reset (gf);
|
@@ -18956,7 +19257,7 @@ static enum ggml_opt_result ggml_opt_lbfgs(
|
|
18956
19257
|
if (params.lbfgs.linesearch == GGML_LINESEARCH_BACKTRACKING_WOLFE ||
|
18957
19258
|
params.lbfgs.linesearch == GGML_LINESEARCH_BACKTRACKING_STRONG_WOLFE) {
|
18958
19259
|
if (params.lbfgs.wolfe <= params.lbfgs.ftol || 1.f <= params.lbfgs.wolfe) {
|
18959
|
-
return
|
19260
|
+
return GGML_OPT_RESULT_INVALID_WOLFE;
|
18960
19261
|
}
|
18961
19262
|
}
|
18962
19263
|
|
@@ -18985,7 +19286,7 @@ static enum ggml_opt_result ggml_opt_lbfgs(
|
|
18985
19286
|
}
|
18986
19287
|
|
18987
19288
|
struct ggml_cplan cplan = ggml_graph_plan(gb, params.n_threads);
|
18988
|
-
struct ggml_object * obj = ggml_new_object(ctx,
|
19289
|
+
struct ggml_object * obj = ggml_new_object(ctx, GGML_OBJECT_TYPE_WORK_BUFFER, cplan.work_size);
|
18989
19290
|
cplan.work_data = (uint8_t *)ctx->mem_buffer + obj->offs;
|
18990
19291
|
|
18991
19292
|
float * x = opt->lbfgs.x->data; // current parameters
|
@@ -19026,7 +19327,7 @@ static enum ggml_opt_result ggml_opt_lbfgs(
|
|
19026
19327
|
float sched = 0;
|
19027
19328
|
callback(callback_data, accum_step, &sched, &cancel);
|
19028
19329
|
if (cancel) {
|
19029
|
-
return
|
19330
|
+
return GGML_OPT_RESULT_CANCEL;
|
19030
19331
|
}
|
19031
19332
|
}
|
19032
19333
|
// ggml_graph_reset (gf);
|
@@ -19054,7 +19355,7 @@ static enum ggml_opt_result ggml_opt_lbfgs(
|
|
19054
19355
|
|
19055
19356
|
// already optimized
|
19056
19357
|
if (gnorm/xnorm <= params.lbfgs.eps) {
|
19057
|
-
return
|
19358
|
+
return GGML_OPT_RESULT_OK;
|
19058
19359
|
}
|
19059
19360
|
|
19060
19361
|
if (opt->just_initialized) {
|
@@ -19099,7 +19400,7 @@ static enum ggml_opt_result ggml_opt_lbfgs(
|
|
19099
19400
|
// way to test and don't want to break something with so many changes lined up
|
19100
19401
|
ls = linesearch_backtracking(¶ms, nx, x, &fx, g, d, step, xp, f, gb, &cplan, np, ps, &cancel, callback, callback_data);
|
19101
19402
|
if (cancel) {
|
19102
|
-
return
|
19403
|
+
return GGML_OPT_RESULT_CANCEL;
|
19103
19404
|
}
|
19104
19405
|
|
19105
19406
|
if (ls < 0) {
|
@@ -19122,7 +19423,7 @@ static enum ggml_opt_result ggml_opt_lbfgs(
|
|
19122
19423
|
}
|
19123
19424
|
if (gnorm/xnorm <= params.lbfgs.eps) {
|
19124
19425
|
// converged
|
19125
|
-
return
|
19426
|
+
return GGML_OPT_RESULT_OK;
|
19126
19427
|
}
|
19127
19428
|
|
19128
19429
|
// delta-based convergence test
|
@@ -19132,7 +19433,7 @@ static enum ggml_opt_result ggml_opt_lbfgs(
|
|
19132
19433
|
const float rate = (pf[k[0]%params.past] - fx)/fx;
|
19133
19434
|
|
19134
19435
|
if (fabsf(rate) < params.delta) {
|
19135
|
-
return
|
19436
|
+
return GGML_OPT_RESULT_OK;
|
19136
19437
|
}
|
19137
19438
|
}
|
19138
19439
|
|
@@ -19148,14 +19449,14 @@ static enum ggml_opt_result ggml_opt_lbfgs(
|
|
19148
19449
|
n_no_improvement[0]++;
|
19149
19450
|
|
19150
19451
|
if (n_no_improvement[0] >= params.max_no_improvement) {
|
19151
|
-
return
|
19452
|
+
return GGML_OPT_RESULT_OK;
|
19152
19453
|
}
|
19153
19454
|
}
|
19154
19455
|
}
|
19155
19456
|
|
19156
19457
|
if (params.lbfgs.n_iter != 0 && params.lbfgs.n_iter < it + 1) {
|
19157
19458
|
// reached the maximum number of iterations
|
19158
|
-
return
|
19459
|
+
return GGML_OPT_RESULT_DID_NOT_CONVERGE;
|
19159
19460
|
}
|
19160
19461
|
|
19161
19462
|
// update vectors s and y:
|
@@ -19211,17 +19512,17 @@ static enum ggml_opt_result ggml_opt_lbfgs(
|
|
19211
19512
|
|
19212
19513
|
GGML_ASSERT(false && "lbfgs failed");
|
19213
19514
|
|
19214
|
-
return
|
19515
|
+
return GGML_OPT_RESULT_DID_NOT_CONVERGE;
|
19215
19516
|
}
|
19216
19517
|
|
19217
19518
|
struct ggml_opt_params ggml_opt_default_params(enum ggml_opt_type type) {
|
19218
19519
|
struct ggml_opt_params result;
|
19219
19520
|
|
19220
19521
|
switch (type) {
|
19221
|
-
case
|
19522
|
+
case GGML_OPT_TYPE_ADAM:
|
19222
19523
|
{
|
19223
19524
|
result = (struct ggml_opt_params) {
|
19224
|
-
.type =
|
19525
|
+
.type = GGML_OPT_TYPE_ADAM,
|
19225
19526
|
.graph_size = GGML_DEFAULT_GRAPH_SIZE,
|
19226
19527
|
.n_threads = 1, // FIXME: GGML_DEFAULT_N_THREADS ?
|
19227
19528
|
.past = 0,
|
@@ -19249,10 +19550,10 @@ struct ggml_opt_params ggml_opt_default_params(enum ggml_opt_type type) {
|
|
19249
19550
|
},
|
19250
19551
|
};
|
19251
19552
|
} break;
|
19252
|
-
case
|
19553
|
+
case GGML_OPT_TYPE_LBFGS:
|
19253
19554
|
{
|
19254
19555
|
result = (struct ggml_opt_params) {
|
19255
|
-
.type =
|
19556
|
+
.type = GGML_OPT_TYPE_LBFGS,
|
19256
19557
|
.graph_size = GGML_DEFAULT_GRAPH_SIZE,
|
19257
19558
|
.n_threads = 1,
|
19258
19559
|
.past = 0,
|
@@ -19297,12 +19598,12 @@ GGML_API void ggml_opt_init(
|
|
19297
19598
|
opt->just_initialized = true;
|
19298
19599
|
if (opt->ctx == NULL) {
|
19299
19600
|
struct ggml_init_params ctx_opt_params;
|
19300
|
-
if (opt->params.type ==
|
19601
|
+
if (opt->params.type == GGML_OPT_TYPE_ADAM) {
|
19301
19602
|
ctx_opt_params.mem_size = GGML_MEM_ALIGN*3 + ggml_tensor_overhead()*3 + ggml_type_size(GGML_TYPE_F32)*nx*3;
|
19302
19603
|
if (opt->params.past > 0) {
|
19303
19604
|
ctx_opt_params.mem_size += GGML_MEM_ALIGN + ggml_tensor_overhead() + ggml_type_size(GGML_TYPE_F32)*opt->params.past;
|
19304
19605
|
}
|
19305
|
-
} else if (opt->params.type ==
|
19606
|
+
} else if (opt->params.type == GGML_OPT_TYPE_LBFGS) {
|
19306
19607
|
ctx_opt_params.mem_size = GGML_MEM_ALIGN*9 + ggml_tensor_overhead()*9 + ggml_type_size(GGML_TYPE_F32)*(nx*5 + opt->params.lbfgs.m*2 + nx*opt->params.lbfgs.m*2);
|
19307
19608
|
if (opt->params.past > 0) {
|
19308
19609
|
ctx_opt_params.mem_size += GGML_MEM_ALIGN + ggml_tensor_overhead() + ggml_type_size(GGML_TYPE_F32)*opt->params.past;
|
@@ -19314,7 +19615,7 @@ GGML_API void ggml_opt_init(
|
|
19314
19615
|
opt->ctx = ggml_init(ctx_opt_params);
|
19315
19616
|
}
|
19316
19617
|
switch (opt->params.type) {
|
19317
|
-
case
|
19618
|
+
case GGML_OPT_TYPE_ADAM:
|
19318
19619
|
{
|
19319
19620
|
opt->adam.g = ggml_new_tensor_1d(opt->ctx, GGML_TYPE_F32, nx);
|
19320
19621
|
opt->adam.m = ggml_new_tensor_1d(opt->ctx, GGML_TYPE_F32, nx);
|
@@ -19328,7 +19629,7 @@ GGML_API void ggml_opt_init(
|
|
19328
19629
|
ggml_set_zero(opt->adam.pf);
|
19329
19630
|
}
|
19330
19631
|
} break;
|
19331
|
-
case
|
19632
|
+
case GGML_OPT_TYPE_LBFGS:
|
19332
19633
|
{
|
19333
19634
|
opt->lbfgs.x = ggml_new_tensor_1d(opt->ctx, GGML_TYPE_F32, nx);
|
19334
19635
|
opt->lbfgs.xp = ggml_new_tensor_1d(opt->ctx, GGML_TYPE_F32, nx);
|
@@ -19372,13 +19673,13 @@ enum ggml_opt_result ggml_opt(
|
|
19372
19673
|
|
19373
19674
|
ctx = ggml_init(params_ctx);
|
19374
19675
|
if (ctx == NULL) {
|
19375
|
-
return
|
19676
|
+
return GGML_OPT_RESULT_NO_CONTEXT;
|
19376
19677
|
}
|
19377
19678
|
|
19378
19679
|
free_ctx = true;
|
19379
19680
|
}
|
19380
19681
|
|
19381
|
-
enum ggml_opt_result result =
|
19682
|
+
enum ggml_opt_result result = GGML_OPT_RESULT_OK;
|
19382
19683
|
|
19383
19684
|
struct ggml_opt_context * opt = (struct ggml_opt_context *) alloca(sizeof(struct ggml_opt_context));
|
19384
19685
|
|
@@ -19417,14 +19718,14 @@ enum ggml_opt_result ggml_opt_resume_g(
|
|
19417
19718
|
void * callback_data) {
|
19418
19719
|
|
19419
19720
|
// build forward + backward compute graphs
|
19420
|
-
enum ggml_opt_result result =
|
19721
|
+
enum ggml_opt_result result = GGML_OPT_RESULT_OK;
|
19421
19722
|
|
19422
19723
|
switch (opt->params.type) {
|
19423
|
-
case
|
19724
|
+
case GGML_OPT_TYPE_ADAM:
|
19424
19725
|
{
|
19425
19726
|
result = ggml_opt_adam(ctx, opt, opt->params, f, gf, gb, callback, callback_data);
|
19426
19727
|
} break;
|
19427
|
-
case
|
19728
|
+
case GGML_OPT_TYPE_LBFGS:
|
19428
19729
|
{
|
19429
19730
|
result = ggml_opt_lbfgs(ctx, opt, opt->params, f, gf, gb, callback, callback_data);
|
19430
19731
|
} break;
|
@@ -19461,8 +19762,10 @@ void ggml_quantize_init(enum ggml_type type) {
|
|
19461
19762
|
switch (type) {
|
19462
19763
|
case GGML_TYPE_IQ2_XXS:
|
19463
19764
|
case GGML_TYPE_IQ2_XS:
|
19765
|
+
case GGML_TYPE_IQ2_S:
|
19464
19766
|
case GGML_TYPE_IQ1_S: iq2xs_init_impl(type); break;
|
19465
19767
|
case GGML_TYPE_IQ3_XXS: iq3xs_init_impl(256); break;
|
19768
|
+
case GGML_TYPE_IQ3_S: iq3xs_init_impl(512); break;
|
19466
19769
|
default: // nothing
|
19467
19770
|
break;
|
19468
19771
|
}
|
@@ -19737,6 +20040,24 @@ size_t ggml_quantize_chunk(enum ggml_type type, const float * src, void * dst, i
|
|
19737
20040
|
result = quantize_iq3_xxs(src + start, (char *)dst + start_row * row_size, nrows, n_per_row, hist, imatrix);
|
19738
20041
|
GGML_ASSERT(result == row_size * nrows);
|
19739
20042
|
} break;
|
20043
|
+
case GGML_TYPE_IQ3_S:
|
20044
|
+
{
|
20045
|
+
GGML_ASSERT(start % QK_K == 0);
|
20046
|
+
GGML_ASSERT(start % n_per_row == 0);
|
20047
|
+
size_t start_row = start / n_per_row;
|
20048
|
+
size_t row_size = ggml_row_size(type, n_per_row);
|
20049
|
+
result = quantize_iq3_s(src + start, (char *)dst + start_row * row_size, nrows, n_per_row, hist, imatrix);
|
20050
|
+
GGML_ASSERT(result == row_size * nrows);
|
20051
|
+
} break;
|
20052
|
+
case GGML_TYPE_IQ2_S:
|
20053
|
+
{
|
20054
|
+
GGML_ASSERT(start % QK_K == 0);
|
20055
|
+
GGML_ASSERT(start % n_per_row == 0);
|
20056
|
+
size_t start_row = start / n_per_row;
|
20057
|
+
size_t row_size = ggml_row_size(type, n_per_row);
|
20058
|
+
result = quantize_iq2_s(src + start, (char *)dst + start_row * row_size, nrows, n_per_row, hist, imatrix);
|
20059
|
+
GGML_ASSERT(result == row_size * nrows);
|
20060
|
+
} break;
|
19740
20061
|
case GGML_TYPE_IQ1_S:
|
19741
20062
|
{
|
19742
20063
|
GGML_ASSERT(start % QK_K == 0);
|
@@ -19747,6 +20068,9 @@ size_t ggml_quantize_chunk(enum ggml_type type, const float * src, void * dst, i
|
|
19747
20068
|
GGML_ASSERT(result == row_size * nrows);
|
19748
20069
|
} break;
|
19749
20070
|
case GGML_TYPE_IQ4_NL:
|
20071
|
+
#if QK_K == 64
|
20072
|
+
case GGML_TYPE_IQ4_XS:
|
20073
|
+
#endif
|
19750
20074
|
{
|
19751
20075
|
GGML_ASSERT(start % QK4_NL == 0);
|
19752
20076
|
GGML_ASSERT(start % n_per_row == 0);
|
@@ -19755,6 +20079,17 @@ size_t ggml_quantize_chunk(enum ggml_type type, const float * src, void * dst, i
|
|
19755
20079
|
result = quantize_iq4_nl(src + start, (char *)dst + start_row * row_size, nrows, n_per_row, hist, imatrix);
|
19756
20080
|
GGML_ASSERT(result == row_size * nrows);
|
19757
20081
|
} break;
|
20082
|
+
#if QK_K != 64
|
20083
|
+
case GGML_TYPE_IQ4_XS:
|
20084
|
+
{
|
20085
|
+
GGML_ASSERT(start % QK_K == 0);
|
20086
|
+
GGML_ASSERT(start % n_per_row == 0);
|
20087
|
+
size_t start_row = start / n_per_row;
|
20088
|
+
size_t row_size = ggml_row_size(type, n_per_row);
|
20089
|
+
result = quantize_iq4_xs(src + start, (char *)dst + start_row * row_size, nrows, n_per_row, hist, imatrix);
|
20090
|
+
GGML_ASSERT(result == row_size * nrows);
|
20091
|
+
} break;
|
20092
|
+
#endif
|
19758
20093
|
case GGML_TYPE_F16:
|
19759
20094
|
{
|
19760
20095
|
size_t elemsize = sizeof(ggml_fp16_t);
|