llama_cpp 0.12.7 → 0.14.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +24 -0
- data/ext/llama_cpp/llama_cpp.cpp +131 -288
- data/lib/llama_cpp/version.rb +2 -2
- data/sig/llama_cpp.rbs +29 -29
- data/vendor/tmp/llama.cpp/Makefile +10 -6
- data/vendor/tmp/llama.cpp/ggml-backend-impl.h +6 -3
- data/vendor/tmp/llama.cpp/ggml-backend.c +32 -23
- data/vendor/tmp/llama.cpp/ggml-backend.h +17 -16
- data/vendor/tmp/llama.cpp/ggml-cuda.cu +949 -168
- data/vendor/tmp/llama.cpp/ggml-kompute.cpp +9 -3
- data/vendor/tmp/llama.cpp/ggml-metal.m +159 -22
- data/vendor/tmp/llama.cpp/ggml-metal.metal +1195 -139
- data/vendor/tmp/llama.cpp/ggml-opencl.cpp +27 -27
- data/vendor/tmp/llama.cpp/ggml-quants.c +1971 -271
- data/vendor/tmp/llama.cpp/ggml-quants.h +52 -0
- data/vendor/tmp/llama.cpp/ggml-sycl.cpp +3586 -1201
- data/vendor/tmp/llama.cpp/ggml-sycl.h +5 -0
- data/vendor/tmp/llama.cpp/ggml-vulkan-shaders.hpp +39336 -43461
- data/vendor/tmp/llama.cpp/ggml-vulkan.cpp +1391 -825
- data/vendor/tmp/llama.cpp/ggml-vulkan.h +1 -0
- data/vendor/tmp/llama.cpp/ggml.c +545 -210
- data/vendor/tmp/llama.cpp/ggml.h +65 -23
- data/vendor/tmp/llama.cpp/llama.cpp +1458 -763
- data/vendor/tmp/llama.cpp/llama.h +81 -75
- data/vendor/tmp/llama.cpp/unicode.h +310 -1
- metadata +2 -2
data/vendor/tmp/llama.cpp/ggml.c
CHANGED
@@ -320,6 +320,17 @@ static ggml_fp16_t ggml_table_exp_f16[1 << 16];
|
|
320
320
|
// precomputed f32 table for f16 (256 KB) (ggml-impl.h)
|
321
321
|
float ggml_table_f32_f16[1 << 16];
|
322
322
|
|
323
|
+
const char * ggml_status_to_string(enum ggml_status status) {
|
324
|
+
switch (status) {
|
325
|
+
case GGML_STATUS_ALLOC_FAILED: return "GGML status: error (failed to allocate memory)";
|
326
|
+
case GGML_STATUS_FAILED: return "GGML status: error (operation failed)";
|
327
|
+
case GGML_STATUS_SUCCESS: return "GGML status: success";
|
328
|
+
case GGML_STATUS_ABORTED: return "GGML status: warning (operation aborted)";
|
329
|
+
}
|
330
|
+
|
331
|
+
return "GGML status: unknown";
|
332
|
+
}
|
333
|
+
|
323
334
|
// note: do not use these inside ggml.c
|
324
335
|
// these are meant to be used via the ggml.h API
|
325
336
|
float ggml_fp16_to_fp32(ggml_fp16_t x) {
|
@@ -355,6 +366,10 @@ void ggml_fp32_to_fp16_row(const float * x, ggml_fp16_t * y, int n) {
|
|
355
366
|
}
|
356
367
|
}
|
357
368
|
|
369
|
+
bool ggml_guid_matches(ggml_guid_t guid_a, ggml_guid_t guid_b) {
|
370
|
+
return memcmp(guid_a, guid_b, sizeof(ggml_guid)) == 0;
|
371
|
+
}
|
372
|
+
|
358
373
|
//
|
359
374
|
// timing
|
360
375
|
//
|
@@ -678,6 +693,30 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = {
|
|
678
693
|
.vec_dot_type = GGML_TYPE_Q8_K,
|
679
694
|
.nrows = 1,
|
680
695
|
},
|
696
|
+
[GGML_TYPE_IQ3_S] = {
|
697
|
+
.type_name = "iq3_s",
|
698
|
+
.blck_size = QK_K,
|
699
|
+
.type_size = sizeof(block_iq3_s),
|
700
|
+
.is_quantized = true,
|
701
|
+
.to_float = (ggml_to_float_t) dequantize_row_iq3_s,
|
702
|
+
.from_float = quantize_row_iq3_s,
|
703
|
+
.from_float_reference = (ggml_from_float_t)quantize_row_iq3_s_reference,
|
704
|
+
.vec_dot = ggml_vec_dot_iq3_s_q8_K,
|
705
|
+
.vec_dot_type = GGML_TYPE_Q8_K,
|
706
|
+
.nrows = 1,
|
707
|
+
},
|
708
|
+
[GGML_TYPE_IQ2_S] = {
|
709
|
+
.type_name = "iq2_s",
|
710
|
+
.blck_size = QK_K,
|
711
|
+
.type_size = sizeof(block_iq2_s),
|
712
|
+
.is_quantized = true,
|
713
|
+
.to_float = (ggml_to_float_t) dequantize_row_iq2_s,
|
714
|
+
.from_float = quantize_row_iq2_s,
|
715
|
+
.from_float_reference = (ggml_from_float_t)quantize_row_iq2_s_reference,
|
716
|
+
.vec_dot = ggml_vec_dot_iq2_s_q8_K,
|
717
|
+
.vec_dot_type = GGML_TYPE_Q8_K,
|
718
|
+
.nrows = 1,
|
719
|
+
},
|
681
720
|
[GGML_TYPE_IQ1_S] = {
|
682
721
|
.type_name = "iq1_s",
|
683
722
|
.blck_size = QK_K,
|
@@ -702,6 +741,26 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = {
|
|
702
741
|
.vec_dot_type = GGML_TYPE_Q8_0,
|
703
742
|
.nrows = 1,
|
704
743
|
},
|
744
|
+
[GGML_TYPE_IQ4_XS] = {
|
745
|
+
.type_name = "iq4_xs",
|
746
|
+
#if QK_K == 64
|
747
|
+
.blck_size = QK4_NL,
|
748
|
+
#else
|
749
|
+
.blck_size = QK_K,
|
750
|
+
#endif
|
751
|
+
.type_size = sizeof(block_iq4_xs),
|
752
|
+
.is_quantized = true,
|
753
|
+
.to_float = (ggml_to_float_t) dequantize_row_iq4_xs,
|
754
|
+
.from_float = quantize_row_iq4_xs,
|
755
|
+
.from_float_reference = (ggml_from_float_t)quantize_row_iq4_xs_reference,
|
756
|
+
.vec_dot = ggml_vec_dot_iq4_xs_q8_K,
|
757
|
+
#if QK_K == 64
|
758
|
+
.vec_dot_type = GGML_TYPE_Q8_0,
|
759
|
+
#else
|
760
|
+
.vec_dot_type = GGML_TYPE_Q8_K,
|
761
|
+
#endif
|
762
|
+
.nrows = 1,
|
763
|
+
},
|
705
764
|
[GGML_TYPE_Q8_K] = {
|
706
765
|
.type_name = "q8_K",
|
707
766
|
.blck_size = QK_K,
|
@@ -1560,9 +1619,15 @@ inline static void ggml_vec_gelu_f16(const int n, ggml_fp16_t * y, const ggml_fp
|
|
1560
1619
|
inline static void ggml_vec_gelu_f32(const int n, float * y, const float * x) {
|
1561
1620
|
uint16_t t;
|
1562
1621
|
for (int i = 0; i < n; ++i) {
|
1563
|
-
|
1564
|
-
|
1565
|
-
|
1622
|
+
if (x[i] <= -10.0f) {
|
1623
|
+
y[i] = 0.0f;
|
1624
|
+
} else if (x[i] >= 10.0f) {
|
1625
|
+
y[i] = x[i];
|
1626
|
+
} else {
|
1627
|
+
ggml_fp16_t fp16 = GGML_FP32_TO_FP16(x[i]);
|
1628
|
+
memcpy(&t, &fp16, sizeof(uint16_t));
|
1629
|
+
y[i] = GGML_FP16_TO_FP32(ggml_table_gelu_f16[t]);
|
1630
|
+
}
|
1566
1631
|
}
|
1567
1632
|
}
|
1568
1633
|
#else
|
@@ -1768,6 +1833,8 @@ static const char * GGML_OP_NAME[GGML_OP_COUNT] = {
|
|
1768
1833
|
"POOL_2D",
|
1769
1834
|
"UPSCALE",
|
1770
1835
|
"PAD",
|
1836
|
+
"ARANGE",
|
1837
|
+
"TIMESTEP_EMBEDDING",
|
1771
1838
|
"ARGSORT",
|
1772
1839
|
"LEAKY_RELU",
|
1773
1840
|
|
@@ -1796,7 +1863,7 @@ static const char * GGML_OP_NAME[GGML_OP_COUNT] = {
|
|
1796
1863
|
"CROSS_ENTROPY_LOSS_BACK",
|
1797
1864
|
};
|
1798
1865
|
|
1799
|
-
static_assert(GGML_OP_COUNT ==
|
1866
|
+
static_assert(GGML_OP_COUNT == 74, "GGML_OP_COUNT != 74");
|
1800
1867
|
|
1801
1868
|
static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
|
1802
1869
|
"none",
|
@@ -1854,6 +1921,8 @@ static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
|
|
1854
1921
|
"pool_2d(x)",
|
1855
1922
|
"upscale(x)",
|
1856
1923
|
"pad(x)",
|
1924
|
+
"arange(start, stop, step)",
|
1925
|
+
"timestep_embedding(timesteps, dim, max_period)",
|
1857
1926
|
"argsort(x)",
|
1858
1927
|
"leaky_relu(x)",
|
1859
1928
|
|
@@ -1882,7 +1951,7 @@ static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
|
|
1882
1951
|
"cross_entropy_loss_back(x,y)",
|
1883
1952
|
};
|
1884
1953
|
|
1885
|
-
static_assert(GGML_OP_COUNT ==
|
1954
|
+
static_assert(GGML_OP_COUNT == 74, "GGML_OP_COUNT != 74");
|
1886
1955
|
|
1887
1956
|
static_assert(GGML_OP_POOL_COUNT == 2, "GGML_OP_POOL_COUNT != 2");
|
1888
1957
|
|
@@ -2085,7 +2154,10 @@ void ggml_numa_init(enum ggml_numa_strategy numa_flag) {
|
|
2085
2154
|
getcpu_ret = getcpu(¤t_cpu, &g_state.numa.current_node);
|
2086
2155
|
#else
|
2087
2156
|
// old glibc doesn't have a wrapper for this call. Fall back on direct syscall
|
2088
|
-
|
2157
|
+
# if !defined(SYS_getcpu) && defined(SYS_get_cpu)
|
2158
|
+
# define SYS_getcpu SYS_get_cpu // some older glibc versions use this name
|
2159
|
+
# endif
|
2160
|
+
getcpu_ret = syscall(SYS_getcpu, ¤t_cpu, &g_state.numa.current_node);
|
2089
2161
|
#endif
|
2090
2162
|
|
2091
2163
|
if (g_state.numa.n_nodes < 1 || g_state.numa.total_cpus < 1 || getcpu_ret != 0) {
|
@@ -2304,6 +2376,9 @@ enum ggml_type ggml_ftype_to_ggml_type(enum ggml_ftype ftype) {
|
|
2304
2376
|
case GGML_FTYPE_MOSTLY_IQ3_XXS: wtype = GGML_TYPE_IQ3_XXS; break;
|
2305
2377
|
case GGML_FTYPE_MOSTLY_IQ1_S: wtype = GGML_TYPE_IQ1_S; break;
|
2306
2378
|
case GGML_FTYPE_MOSTLY_IQ4_NL: wtype = GGML_TYPE_IQ4_NL; break;
|
2379
|
+
case GGML_FTYPE_MOSTLY_IQ4_XS: wtype = GGML_TYPE_IQ4_XS; break;
|
2380
|
+
case GGML_FTYPE_MOSTLY_IQ3_S: wtype = GGML_TYPE_IQ3_S; break;
|
2381
|
+
case GGML_FTYPE_MOSTLY_IQ2_S: wtype = GGML_TYPE_IQ2_S; break;
|
2307
2382
|
case GGML_FTYPE_UNKNOWN: wtype = GGML_TYPE_COUNT; break;
|
2308
2383
|
case GGML_FTYPE_MOSTLY_Q4_1_SOME_F16: wtype = GGML_TYPE_COUNT; break;
|
2309
2384
|
}
|
@@ -2708,7 +2783,7 @@ static struct ggml_tensor * ggml_new_tensor_impl(
|
|
2708
2783
|
}
|
2709
2784
|
}
|
2710
2785
|
|
2711
|
-
struct ggml_object * const obj_new = ggml_new_object(ctx,
|
2786
|
+
struct ggml_object * const obj_new = ggml_new_object(ctx, GGML_OBJECT_TYPE_TENSOR, GGML_TENSOR_SIZE + obj_alloc_size);
|
2712
2787
|
|
2713
2788
|
// TODO: for recoverable errors, we would need to free the data allocated from the scratch buffer here
|
2714
2789
|
|
@@ -2716,7 +2791,7 @@ static struct ggml_tensor * ggml_new_tensor_impl(
|
|
2716
2791
|
|
2717
2792
|
*result = (struct ggml_tensor) {
|
2718
2793
|
/*.type =*/ type,
|
2719
|
-
/*.backend =*/
|
2794
|
+
/*.backend =*/ GGML_BACKEND_TYPE_CPU,
|
2720
2795
|
/*.buffer =*/ NULL,
|
2721
2796
|
/*.ne =*/ { 1, 1, 1, 1 },
|
2722
2797
|
/*.nb =*/ { 0, 0, 0, 0 },
|
@@ -2838,11 +2913,21 @@ static int32_t ggml_get_op_params_i32(const struct ggml_tensor * tensor, uint32_
|
|
2838
2913
|
return ((const int32_t *)(tensor->op_params))[i];
|
2839
2914
|
}
|
2840
2915
|
|
2916
|
+
static float ggml_get_op_params_f32(const struct ggml_tensor * tensor, uint32_t i) {
|
2917
|
+
assert(i < GGML_MAX_OP_PARAMS / sizeof(float));
|
2918
|
+
return ((const float *)(tensor->op_params))[i];
|
2919
|
+
}
|
2920
|
+
|
2841
2921
|
static void ggml_set_op_params_i32(struct ggml_tensor * tensor, uint32_t i, int32_t value) {
|
2842
2922
|
assert(i < GGML_MAX_OP_PARAMS / sizeof(int32_t));
|
2843
2923
|
((int32_t *)(tensor->op_params))[i] = value;
|
2844
2924
|
}
|
2845
2925
|
|
2926
|
+
static void ggml_set_op_params_f32(struct ggml_tensor * tensor, uint32_t i, float value) {
|
2927
|
+
assert(i < GGML_MAX_OP_PARAMS / sizeof(float));
|
2928
|
+
((float *)(tensor->op_params))[i] = value;
|
2929
|
+
}
|
2930
|
+
|
2846
2931
|
struct ggml_tensor * ggml_set_zero(struct ggml_tensor * tensor) {
|
2847
2932
|
memset(tensor->data, 0, ggml_nbytes(tensor));
|
2848
2933
|
return tensor;
|
@@ -3289,7 +3374,7 @@ struct ggml_tensor * ggml_get_first_tensor(const struct ggml_context * ctx) {
|
|
3289
3374
|
char * const mem_buffer = ctx->mem_buffer;
|
3290
3375
|
|
3291
3376
|
while (obj != NULL) {
|
3292
|
-
if (obj->type ==
|
3377
|
+
if (obj->type == GGML_OBJECT_TYPE_TENSOR) {
|
3293
3378
|
return (struct ggml_tensor *)(mem_buffer + obj->offs);
|
3294
3379
|
}
|
3295
3380
|
|
@@ -3306,7 +3391,7 @@ struct ggml_tensor * ggml_get_next_tensor(const struct ggml_context * ctx, struc
|
|
3306
3391
|
char * const mem_buffer = ctx->mem_buffer;
|
3307
3392
|
|
3308
3393
|
while (obj != NULL) {
|
3309
|
-
if (obj->type ==
|
3394
|
+
if (obj->type == GGML_OBJECT_TYPE_TENSOR) {
|
3310
3395
|
return (struct ggml_tensor *)(mem_buffer + obj->offs);
|
3311
3396
|
}
|
3312
3397
|
|
@@ -3322,7 +3407,7 @@ struct ggml_tensor * ggml_get_tensor(struct ggml_context * ctx, const char * nam
|
|
3322
3407
|
char * const mem_buffer = ctx->mem_buffer;
|
3323
3408
|
|
3324
3409
|
while (obj != NULL) {
|
3325
|
-
if (obj->type ==
|
3410
|
+
if (obj->type == GGML_OBJECT_TYPE_TENSOR) {
|
3326
3411
|
struct ggml_tensor * cur = (struct ggml_tensor *)(mem_buffer + obj->offs);
|
3327
3412
|
if (strcmp(cur->name, name) == 0) {
|
3328
3413
|
return cur;
|
@@ -5729,11 +5814,13 @@ struct ggml_tensor * ggml_pool_1d(
|
|
5729
5814
|
is_node = true;
|
5730
5815
|
}
|
5731
5816
|
|
5732
|
-
const int64_t ne[
|
5817
|
+
const int64_t ne[4] = {
|
5733
5818
|
ggml_calc_pool_output_size(a->ne[0], k0, s0, p0),
|
5734
5819
|
a->ne[1],
|
5820
|
+
a->ne[2],
|
5821
|
+
a->ne[3],
|
5735
5822
|
};
|
5736
|
-
struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32,
|
5823
|
+
struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
|
5737
5824
|
|
5738
5825
|
int32_t params[] = { op, k0, s0, p0 };
|
5739
5826
|
ggml_set_op_params(result, params, sizeof(params));
|
@@ -5839,6 +5926,55 @@ struct ggml_tensor * ggml_upscale(
|
|
5839
5926
|
return ggml_upscale_impl(ctx, a, scale_factor);
|
5840
5927
|
}
|
5841
5928
|
|
5929
|
+
struct ggml_tensor * ggml_arange(
|
5930
|
+
struct ggml_context * ctx,
|
5931
|
+
float start,
|
5932
|
+
float stop,
|
5933
|
+
float step) {
|
5934
|
+
|
5935
|
+
GGML_ASSERT(stop > start);
|
5936
|
+
|
5937
|
+
const int64_t steps = (int64_t) ceilf((stop - start) / step);
|
5938
|
+
|
5939
|
+
struct ggml_tensor * result = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, steps);
|
5940
|
+
|
5941
|
+
result->op = GGML_OP_ARANGE;
|
5942
|
+
ggml_set_op_params_f32(result, 0, start);
|
5943
|
+
ggml_set_op_params_f32(result, 1, stop);
|
5944
|
+
ggml_set_op_params_f32(result, 2, step);
|
5945
|
+
|
5946
|
+
return result;
|
5947
|
+
}
|
5948
|
+
|
5949
|
+
struct ggml_tensor * ggml_timestep_embedding(
|
5950
|
+
struct ggml_context * ctx,
|
5951
|
+
struct ggml_tensor * timesteps,
|
5952
|
+
int dim,
|
5953
|
+
int max_period) {
|
5954
|
+
bool is_node = false;
|
5955
|
+
|
5956
|
+
if (timesteps->grad) {
|
5957
|
+
GGML_ASSERT(false); // TODO: implement backward
|
5958
|
+
is_node = true;
|
5959
|
+
}
|
5960
|
+
|
5961
|
+
int actual_dim = dim;
|
5962
|
+
if (dim % 2 != 0) {
|
5963
|
+
actual_dim = dim + 1;
|
5964
|
+
}
|
5965
|
+
|
5966
|
+
struct ggml_tensor * result = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, actual_dim, timesteps->ne[0]);
|
5967
|
+
|
5968
|
+
result->op = GGML_OP_TIMESTEP_EMBEDDING;
|
5969
|
+
ggml_set_op_params_i32(result, 0, dim);
|
5970
|
+
ggml_set_op_params_i32(result, 1, max_period);
|
5971
|
+
|
5972
|
+
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
5973
|
+
result->src[0] = timesteps;
|
5974
|
+
|
5975
|
+
return result;
|
5976
|
+
}
|
5977
|
+
|
5842
5978
|
// ggml_argsort
|
5843
5979
|
|
5844
5980
|
struct ggml_tensor * ggml_argsort(
|
@@ -5866,7 +6002,7 @@ struct ggml_tensor * ggml_top_k(
|
|
5866
6002
|
int k) {
|
5867
6003
|
GGML_ASSERT(a->ne[0] >= k);
|
5868
6004
|
|
5869
|
-
struct ggml_tensor * result = ggml_argsort(ctx, a,
|
6005
|
+
struct ggml_tensor * result = ggml_argsort(ctx, a, GGML_SORT_ORDER_DESC);
|
5870
6006
|
|
5871
6007
|
result = ggml_view_4d(ctx, result,
|
5872
6008
|
k, result->ne[1], result->ne[2], result->ne[3],
|
@@ -6660,7 +6796,7 @@ static void ggml_compute_forward_dup_same_cont(
|
|
6660
6796
|
GGML_ASSERT(ggml_is_contiguous(dst) && ggml_is_contiguous(src0));
|
6661
6797
|
GGML_ASSERT(src0->type == dst->type);
|
6662
6798
|
|
6663
|
-
if (params->type ==
|
6799
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
6664
6800
|
return;
|
6665
6801
|
}
|
6666
6802
|
|
@@ -6692,7 +6828,7 @@ static void ggml_compute_forward_dup_f16(
|
|
6692
6828
|
|
6693
6829
|
GGML_ASSERT(ggml_nelements(dst) == ggml_nelements(src0));
|
6694
6830
|
|
6695
|
-
if (params->type ==
|
6831
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
6696
6832
|
return;
|
6697
6833
|
}
|
6698
6834
|
|
@@ -6965,7 +7101,7 @@ static void ggml_compute_forward_dup_f32(
|
|
6965
7101
|
|
6966
7102
|
GGML_ASSERT(ggml_nelements(dst) == ggml_nelements(src0));
|
6967
7103
|
|
6968
|
-
if (params->type ==
|
7104
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
6969
7105
|
return;
|
6970
7106
|
}
|
6971
7107
|
|
@@ -7218,7 +7354,7 @@ static void ggml_compute_forward_dup_bytes(
|
|
7218
7354
|
GGML_ASSERT(ggml_nelements(dst) == ggml_nelements(src0));
|
7219
7355
|
GGML_ASSERT(src0->type == dst->type);
|
7220
7356
|
|
7221
|
-
if (params->type ==
|
7357
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
7222
7358
|
return;
|
7223
7359
|
}
|
7224
7360
|
|
@@ -7398,7 +7534,7 @@ static void ggml_compute_forward_add_f32(
|
|
7398
7534
|
|
7399
7535
|
GGML_ASSERT(ggml_can_repeat(src1, src0) && ggml_are_same_shape(src0, dst));
|
7400
7536
|
|
7401
|
-
if (params->type ==
|
7537
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
7402
7538
|
return;
|
7403
7539
|
}
|
7404
7540
|
|
@@ -7406,7 +7542,7 @@ static void ggml_compute_forward_add_f32(
|
|
7406
7542
|
const int nth = params->nth;
|
7407
7543
|
|
7408
7544
|
#ifdef GGML_USE_CLBLAST
|
7409
|
-
if (src1->backend ==
|
7545
|
+
if (src1->backend == GGML_BACKEND_TYPE_GPU) {
|
7410
7546
|
// TODO: OpenCL kernel support full broadcast
|
7411
7547
|
GGML_ASSERT(ggml_can_repeat_rows(src1, src0));
|
7412
7548
|
if (ith == 0) {
|
@@ -7488,7 +7624,7 @@ static void ggml_compute_forward_add_f16_f32(
|
|
7488
7624
|
|
7489
7625
|
GGML_ASSERT(ggml_are_same_shape(src0, src1) && ggml_are_same_shape(src0, dst));
|
7490
7626
|
|
7491
|
-
if (params->type ==
|
7627
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
7492
7628
|
return;
|
7493
7629
|
}
|
7494
7630
|
|
@@ -7567,7 +7703,7 @@ static void ggml_compute_forward_add_f16_f16(
|
|
7567
7703
|
|
7568
7704
|
GGML_ASSERT(ggml_are_same_shape(src0, src1) && ggml_are_same_shape(src0, dst));
|
7569
7705
|
|
7570
|
-
if (params->type ==
|
7706
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
7571
7707
|
return;
|
7572
7708
|
}
|
7573
7709
|
|
@@ -7623,7 +7759,7 @@ static void ggml_compute_forward_add_q_f32(
|
|
7623
7759
|
|
7624
7760
|
GGML_ASSERT(ggml_are_same_shape(src0, src1) && ggml_are_same_shape(src0, dst));
|
7625
7761
|
|
7626
|
-
if (params->type ==
|
7762
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
7627
7763
|
return;
|
7628
7764
|
}
|
7629
7765
|
|
@@ -7738,6 +7874,9 @@ static void ggml_compute_forward_add(
|
|
7738
7874
|
case GGML_TYPE_IQ3_XXS:
|
7739
7875
|
case GGML_TYPE_IQ1_S:
|
7740
7876
|
case GGML_TYPE_IQ4_NL:
|
7877
|
+
case GGML_TYPE_IQ4_XS:
|
7878
|
+
case GGML_TYPE_IQ3_S:
|
7879
|
+
case GGML_TYPE_IQ2_S:
|
7741
7880
|
{
|
7742
7881
|
ggml_compute_forward_add_q_f32(params, dst);
|
7743
7882
|
} break;
|
@@ -7760,7 +7899,7 @@ static void ggml_compute_forward_add1_f32(
|
|
7760
7899
|
GGML_ASSERT(ggml_are_same_shape(src0, dst));
|
7761
7900
|
GGML_ASSERT(ggml_is_scalar(src1));
|
7762
7901
|
|
7763
|
-
if (params->type ==
|
7902
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
7764
7903
|
return;
|
7765
7904
|
}
|
7766
7905
|
|
@@ -7814,7 +7953,7 @@ static void ggml_compute_forward_add1_f16_f32(
|
|
7814
7953
|
GGML_ASSERT(ggml_are_same_shape(src0, dst));
|
7815
7954
|
GGML_ASSERT(ggml_is_scalar(src1));
|
7816
7955
|
|
7817
|
-
if (params->type ==
|
7956
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
7818
7957
|
return;
|
7819
7958
|
}
|
7820
7959
|
|
@@ -7866,7 +8005,7 @@ static void ggml_compute_forward_add1_f16_f16(
|
|
7866
8005
|
GGML_ASSERT(ggml_are_same_shape(src0, dst));
|
7867
8006
|
GGML_ASSERT(ggml_is_scalar(src1));
|
7868
8007
|
|
7869
|
-
if (params->type ==
|
8008
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
7870
8009
|
return;
|
7871
8010
|
}
|
7872
8011
|
|
@@ -7918,7 +8057,7 @@ static void ggml_compute_forward_add1_q_f32(
|
|
7918
8057
|
GGML_ASSERT(ggml_are_same_shape(src0, dst));
|
7919
8058
|
GGML_ASSERT(ggml_is_scalar(src1));
|
7920
8059
|
|
7921
|
-
if (params->type ==
|
8060
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
7922
8061
|
return;
|
7923
8062
|
}
|
7924
8063
|
|
@@ -8017,6 +8156,9 @@ static void ggml_compute_forward_add1(
|
|
8017
8156
|
case GGML_TYPE_IQ3_XXS:
|
8018
8157
|
case GGML_TYPE_IQ1_S:
|
8019
8158
|
case GGML_TYPE_IQ4_NL:
|
8159
|
+
case GGML_TYPE_IQ4_XS:
|
8160
|
+
case GGML_TYPE_IQ3_S:
|
8161
|
+
case GGML_TYPE_IQ2_S:
|
8020
8162
|
{
|
8021
8163
|
ggml_compute_forward_add1_q_f32(params, dst);
|
8022
8164
|
} break;
|
@@ -8047,7 +8189,7 @@ static void ggml_compute_forward_acc_f32(
|
|
8047
8189
|
size_t offset = ((int32_t *) dst->op_params)[3];
|
8048
8190
|
bool inplace = (bool) ((int32_t *) dst->op_params)[4];
|
8049
8191
|
|
8050
|
-
if (!inplace && (params->type ==
|
8192
|
+
if (!inplace && (params->type == GGML_TASK_TYPE_INIT)) {
|
8051
8193
|
if (params->ith != 0) {
|
8052
8194
|
return;
|
8053
8195
|
}
|
@@ -8059,7 +8201,7 @@ static void ggml_compute_forward_acc_f32(
|
|
8059
8201
|
ggml_nbytes(dst));
|
8060
8202
|
}
|
8061
8203
|
|
8062
|
-
if (params->type ==
|
8204
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
8063
8205
|
return;
|
8064
8206
|
}
|
8065
8207
|
|
@@ -8141,6 +8283,9 @@ static void ggml_compute_forward_acc(
|
|
8141
8283
|
case GGML_TYPE_IQ3_XXS:
|
8142
8284
|
case GGML_TYPE_IQ1_S:
|
8143
8285
|
case GGML_TYPE_IQ4_NL:
|
8286
|
+
case GGML_TYPE_IQ4_XS:
|
8287
|
+
case GGML_TYPE_IQ3_S:
|
8288
|
+
case GGML_TYPE_IQ2_S:
|
8144
8289
|
default:
|
8145
8290
|
{
|
8146
8291
|
GGML_ASSERT(false);
|
@@ -8160,7 +8305,7 @@ static void ggml_compute_forward_sub_f32(
|
|
8160
8305
|
assert(params->ith == 0);
|
8161
8306
|
assert(ggml_are_same_shape(src0, src1) && ggml_are_same_shape(src0, dst));
|
8162
8307
|
|
8163
|
-
if (params->type ==
|
8308
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
8164
8309
|
return;
|
8165
8310
|
}
|
8166
8311
|
|
@@ -8241,14 +8386,14 @@ static void ggml_compute_forward_mul_f32(
|
|
8241
8386
|
|
8242
8387
|
GGML_ASSERT(ggml_can_repeat(src1, src0) && ggml_are_same_shape(src0, dst));
|
8243
8388
|
|
8244
|
-
if (params->type ==
|
8389
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
8245
8390
|
return;
|
8246
8391
|
}
|
8247
8392
|
const int ith = params->ith;
|
8248
8393
|
const int nth = params->nth;
|
8249
8394
|
|
8250
8395
|
#if defined(GGML_USE_CLBLAST)
|
8251
|
-
if (src1->backend ==
|
8396
|
+
if (src1->backend == GGML_BACKEND_TYPE_GPU) {
|
8252
8397
|
// TODO: OpenCL kernel support full broadcast
|
8253
8398
|
GGML_ASSERT(ggml_can_repeat_rows(src1, src0));
|
8254
8399
|
if (ith == 0) {
|
@@ -8349,7 +8494,7 @@ static void ggml_compute_forward_div_f32(
|
|
8349
8494
|
|
8350
8495
|
GGML_ASSERT(ggml_can_repeat(src1, src0) && ggml_are_same_shape(src0, dst));
|
8351
8496
|
|
8352
|
-
if (params->type ==
|
8497
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
8353
8498
|
return;
|
8354
8499
|
}
|
8355
8500
|
|
@@ -8444,7 +8589,7 @@ static void ggml_compute_forward_sqr_f32(
|
|
8444
8589
|
assert(params->ith == 0);
|
8445
8590
|
assert(ggml_are_same_shape(src0, dst));
|
8446
8591
|
|
8447
|
-
if (params->type ==
|
8592
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
8448
8593
|
return;
|
8449
8594
|
}
|
8450
8595
|
|
@@ -8490,7 +8635,7 @@ static void ggml_compute_forward_sqrt_f32(
|
|
8490
8635
|
assert(params->ith == 0);
|
8491
8636
|
assert(ggml_are_same_shape(src0, dst));
|
8492
8637
|
|
8493
|
-
if (params->type ==
|
8638
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
8494
8639
|
return;
|
8495
8640
|
}
|
8496
8641
|
|
@@ -8536,7 +8681,7 @@ static void ggml_compute_forward_log_f32(
|
|
8536
8681
|
GGML_ASSERT(params->ith == 0);
|
8537
8682
|
GGML_ASSERT(ggml_are_same_shape(src0, dst));
|
8538
8683
|
|
8539
|
-
if (params->type ==
|
8684
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
8540
8685
|
return;
|
8541
8686
|
}
|
8542
8687
|
|
@@ -8582,7 +8727,7 @@ static void ggml_compute_forward_sum_f32(
|
|
8582
8727
|
assert(params->ith == 0);
|
8583
8728
|
assert(ggml_is_scalar(dst));
|
8584
8729
|
|
8585
|
-
if (params->type ==
|
8730
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
8586
8731
|
return;
|
8587
8732
|
}
|
8588
8733
|
|
@@ -8617,7 +8762,7 @@ static void ggml_compute_forward_sum_f16(
|
|
8617
8762
|
assert(params->ith == 0);
|
8618
8763
|
assert(ggml_is_scalar(dst));
|
8619
8764
|
|
8620
|
-
if (params->type ==
|
8765
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
8621
8766
|
return;
|
8622
8767
|
}
|
8623
8768
|
|
@@ -8674,7 +8819,7 @@ static void ggml_compute_forward_sum_rows_f32(
|
|
8674
8819
|
|
8675
8820
|
GGML_ASSERT(params->ith == 0);
|
8676
8821
|
|
8677
|
-
if (params->type ==
|
8822
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
8678
8823
|
return;
|
8679
8824
|
}
|
8680
8825
|
|
@@ -8729,7 +8874,7 @@ static void ggml_compute_forward_mean_f32(
|
|
8729
8874
|
|
8730
8875
|
assert(params->ith == 0);
|
8731
8876
|
|
8732
|
-
if (params->type ==
|
8877
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
8733
8878
|
return;
|
8734
8879
|
}
|
8735
8880
|
|
@@ -8788,7 +8933,7 @@ static void ggml_compute_forward_argmax_f32(
|
|
8788
8933
|
|
8789
8934
|
assert(params->ith == 0);
|
8790
8935
|
|
8791
|
-
if (params->type ==
|
8936
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
8792
8937
|
return;
|
8793
8938
|
}
|
8794
8939
|
|
@@ -8839,7 +8984,7 @@ static void ggml_compute_forward_repeat_f32(
|
|
8839
8984
|
GGML_ASSERT(params->ith == 0);
|
8840
8985
|
GGML_ASSERT(ggml_can_repeat(src0, dst));
|
8841
8986
|
|
8842
|
-
if (params->type ==
|
8987
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
8843
8988
|
return;
|
8844
8989
|
}
|
8845
8990
|
|
@@ -8884,7 +9029,7 @@ static void ggml_compute_forward_repeat_f16(
|
|
8884
9029
|
GGML_ASSERT(params->ith == 0);
|
8885
9030
|
GGML_ASSERT(ggml_can_repeat(src0, dst));
|
8886
9031
|
|
8887
|
-
if (params->type ==
|
9032
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
8888
9033
|
return;
|
8889
9034
|
}
|
8890
9035
|
|
@@ -8958,7 +9103,7 @@ static void ggml_compute_forward_repeat_back_f32(
|
|
8958
9103
|
GGML_ASSERT(params->ith == 0);
|
8959
9104
|
GGML_ASSERT(ggml_can_repeat(dst, src0));
|
8960
9105
|
|
8961
|
-
if (params->type ==
|
9106
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
8962
9107
|
return;
|
8963
9108
|
}
|
8964
9109
|
|
@@ -9035,7 +9180,7 @@ static void ggml_compute_forward_concat_f32(
|
|
9035
9180
|
const struct ggml_tensor * src0 = dst->src[0];
|
9036
9181
|
const struct ggml_tensor * src1 = dst->src[1];
|
9037
9182
|
|
9038
|
-
if (params->type ==
|
9183
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
9039
9184
|
return;
|
9040
9185
|
}
|
9041
9186
|
|
@@ -9107,7 +9252,7 @@ static void ggml_compute_forward_abs_f32(
|
|
9107
9252
|
assert(params->ith == 0);
|
9108
9253
|
assert(ggml_are_same_shape(src0, dst));
|
9109
9254
|
|
9110
|
-
if (params->type ==
|
9255
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
9111
9256
|
return;
|
9112
9257
|
}
|
9113
9258
|
|
@@ -9153,7 +9298,7 @@ static void ggml_compute_forward_sgn_f32(
|
|
9153
9298
|
assert(params->ith == 0);
|
9154
9299
|
assert(ggml_are_same_shape(src0, dst));
|
9155
9300
|
|
9156
|
-
if (params->type ==
|
9301
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
9157
9302
|
return;
|
9158
9303
|
}
|
9159
9304
|
|
@@ -9199,7 +9344,7 @@ static void ggml_compute_forward_neg_f32(
|
|
9199
9344
|
assert(params->ith == 0);
|
9200
9345
|
assert(ggml_are_same_shape(src0, dst));
|
9201
9346
|
|
9202
|
-
if (params->type ==
|
9347
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
9203
9348
|
return;
|
9204
9349
|
}
|
9205
9350
|
|
@@ -9245,7 +9390,7 @@ static void ggml_compute_forward_step_f32(
|
|
9245
9390
|
assert(params->ith == 0);
|
9246
9391
|
assert(ggml_are_same_shape(src0, dst));
|
9247
9392
|
|
9248
|
-
if (params->type ==
|
9393
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
9249
9394
|
return;
|
9250
9395
|
}
|
9251
9396
|
|
@@ -9291,7 +9436,7 @@ static void ggml_compute_forward_tanh_f32(
|
|
9291
9436
|
assert(params->ith == 0);
|
9292
9437
|
assert(ggml_are_same_shape(src0, dst));
|
9293
9438
|
|
9294
|
-
if (params->type ==
|
9439
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
9295
9440
|
return;
|
9296
9441
|
}
|
9297
9442
|
|
@@ -9337,7 +9482,7 @@ static void ggml_compute_forward_elu_f32(
|
|
9337
9482
|
assert(params->ith == 0);
|
9338
9483
|
assert(ggml_are_same_shape(src0, dst));
|
9339
9484
|
|
9340
|
-
if (params->type ==
|
9485
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
9341
9486
|
return;
|
9342
9487
|
}
|
9343
9488
|
|
@@ -9383,7 +9528,7 @@ static void ggml_compute_forward_relu_f32(
|
|
9383
9528
|
assert(params->ith == 0);
|
9384
9529
|
assert(ggml_are_same_shape(src0, dst));
|
9385
9530
|
|
9386
|
-
if (params->type ==
|
9531
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
9387
9532
|
return;
|
9388
9533
|
}
|
9389
9534
|
|
@@ -9430,7 +9575,7 @@ static void ggml_compute_forward_gelu_f32(
|
|
9430
9575
|
GGML_ASSERT(ggml_is_contiguous_except_dim_1(dst));
|
9431
9576
|
GGML_ASSERT(ggml_are_same_shape(src0, dst));
|
9432
9577
|
|
9433
|
-
if (params->type ==
|
9578
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
9434
9579
|
return;
|
9435
9580
|
}
|
9436
9581
|
|
@@ -9493,7 +9638,7 @@ static void ggml_compute_forward_gelu_quick_f32(
|
|
9493
9638
|
GGML_ASSERT(ggml_is_contiguous_except_dim_1(dst));
|
9494
9639
|
GGML_ASSERT(ggml_are_same_shape(src0, dst));
|
9495
9640
|
|
9496
|
-
if (params->type ==
|
9641
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
9497
9642
|
return;
|
9498
9643
|
}
|
9499
9644
|
|
@@ -9556,7 +9701,7 @@ static void ggml_compute_forward_silu_f32(
|
|
9556
9701
|
GGML_ASSERT(ggml_is_contiguous_except_dim_1(dst));
|
9557
9702
|
GGML_ASSERT(ggml_are_same_shape(src0, dst));
|
9558
9703
|
|
9559
|
-
if (params->type ==
|
9704
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
9560
9705
|
return;
|
9561
9706
|
}
|
9562
9707
|
|
@@ -9617,7 +9762,7 @@ static void ggml_compute_forward_leaky_relu_f32(
|
|
9617
9762
|
assert(params->ith == 0);
|
9618
9763
|
assert(ggml_are_same_shape(src0, dst));
|
9619
9764
|
|
9620
|
-
if (params->type ==
|
9765
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
9621
9766
|
return;
|
9622
9767
|
}
|
9623
9768
|
|
@@ -9670,7 +9815,7 @@ static void ggml_compute_forward_silu_back_f32(
|
|
9670
9815
|
GGML_ASSERT(ggml_are_same_shape(src0, dst));
|
9671
9816
|
GGML_ASSERT(ggml_are_same_shape(src0, grad));
|
9672
9817
|
|
9673
|
-
if (params->type ==
|
9818
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
9674
9819
|
return;
|
9675
9820
|
}
|
9676
9821
|
|
@@ -9732,7 +9877,7 @@ static void ggml_compute_forward_hardswish_f32(
|
|
9732
9877
|
assert(params->ith == 0);
|
9733
9878
|
assert(ggml_are_same_shape(src0, dst));
|
9734
9879
|
|
9735
|
-
if (params->type ==
|
9880
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
9736
9881
|
return;
|
9737
9882
|
}
|
9738
9883
|
|
@@ -9775,7 +9920,7 @@ static void ggml_compute_forward_hardsigmoid_f32(
|
|
9775
9920
|
assert(params->ith == 0);
|
9776
9921
|
assert(ggml_are_same_shape(src0, dst));
|
9777
9922
|
|
9778
|
-
if (params->type ==
|
9923
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
9779
9924
|
return;
|
9780
9925
|
}
|
9781
9926
|
|
@@ -9821,7 +9966,7 @@ static void ggml_compute_forward_norm_f32(
|
|
9821
9966
|
|
9822
9967
|
GGML_ASSERT(ggml_are_same_shape(src0, dst));
|
9823
9968
|
|
9824
|
-
if (params->type ==
|
9969
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
9825
9970
|
return;
|
9826
9971
|
}
|
9827
9972
|
|
@@ -9896,7 +10041,7 @@ static void ggml_compute_forward_rms_norm_f32(
|
|
9896
10041
|
|
9897
10042
|
GGML_ASSERT(ggml_are_same_shape(src0, dst));
|
9898
10043
|
|
9899
|
-
if (params->type ==
|
10044
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
9900
10045
|
return;
|
9901
10046
|
}
|
9902
10047
|
|
@@ -9967,7 +10112,7 @@ static void ggml_compute_forward_rms_norm_back_f32(
|
|
9967
10112
|
|
9968
10113
|
GGML_ASSERT(ggml_are_same_shape(src0, dst) && ggml_are_same_shape(src0, src1));
|
9969
10114
|
|
9970
|
-
if (params->type ==
|
10115
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
9971
10116
|
return;
|
9972
10117
|
}
|
9973
10118
|
|
@@ -10145,7 +10290,7 @@ static void ggml_compute_forward_group_norm_f32(
|
|
10145
10290
|
|
10146
10291
|
GGML_ASSERT(ggml_are_same_shape(src0, dst));
|
10147
10292
|
|
10148
|
-
if (params->type ==
|
10293
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
10149
10294
|
return;
|
10150
10295
|
}
|
10151
10296
|
|
@@ -10163,7 +10308,7 @@ static void ggml_compute_forward_group_norm_f32(
|
|
10163
10308
|
int n_channels = src0->ne[2];
|
10164
10309
|
int n_groups = dst->op_params[0];
|
10165
10310
|
int n_channels_per_group = (n_channels + n_groups - 1) / n_groups;
|
10166
|
-
for (int i = ith; i < n_groups; i+=nth) {
|
10311
|
+
for (int i = ith; i < n_groups; i += nth) {
|
10167
10312
|
int start = i * n_channels_per_group;
|
10168
10313
|
int end = start + n_channels_per_group;
|
10169
10314
|
if (end > n_channels) {
|
@@ -10177,28 +10322,32 @@ static void ggml_compute_forward_group_norm_f32(
|
|
10177
10322
|
for (int64_t i01 = 0; i01 < ne01; i01++) {
|
10178
10323
|
const float * x = (float *)((char *) src0->data + i01 * nb01 + i02 * nb02 + i03 * nb03);
|
10179
10324
|
|
10325
|
+
ggml_float sumr = 0.0;
|
10180
10326
|
for (int64_t i00 = 0; i00 < ne00; i00++) {
|
10181
|
-
|
10327
|
+
sumr += (ggml_float)x[i00];
|
10182
10328
|
}
|
10329
|
+
sum += sumr;
|
10183
10330
|
}
|
10184
10331
|
}
|
10185
|
-
float mean = sum / (ne00 * ne01 * step);
|
10186
|
-
ggml_float sum2 = 0.0;
|
10332
|
+
const float mean = sum / (ne00 * ne01 * step);
|
10187
10333
|
|
10334
|
+
ggml_float sum2 = 0.0;
|
10188
10335
|
for (int64_t i02 = start; i02 < end; i02++) {
|
10189
10336
|
for (int64_t i01 = 0; i01 < ne01; i01++) {
|
10190
10337
|
const float * x = (float *)((char *) src0->data + i01 * nb01 + i02 * nb02 + i03 * nb03);
|
10191
10338
|
|
10192
10339
|
float * y = (float *)((char *) dst->data + i01 * nb1 + i02 * nb2 + i03 * nb3);
|
10193
10340
|
|
10341
|
+
ggml_float sumr = 0.0;
|
10194
10342
|
for (int64_t i00 = 0; i00 < ne00; i00++) {
|
10195
10343
|
float v = x[i00] - mean;
|
10196
10344
|
y[i00] = v;
|
10197
|
-
|
10345
|
+
sumr += (ggml_float)(v * v);
|
10198
10346
|
}
|
10347
|
+
sum2 += sumr;
|
10199
10348
|
}
|
10200
10349
|
}
|
10201
|
-
float variance = sum2 / (ne00 * ne01 * step);
|
10350
|
+
const float variance = sum2 / (ne00 * ne01 * step);
|
10202
10351
|
const float scale = 1.0f / sqrtf(variance + eps);
|
10203
10352
|
|
10204
10353
|
for (int64_t i02 = start; i02 < end; i02++) {
|
@@ -10312,7 +10461,7 @@ static void ggml_compute_forward_mul_mat(
|
|
10312
10461
|
|
10313
10462
|
#if defined(GGML_USE_CLBLAST)
|
10314
10463
|
if (ggml_cl_can_mul_mat(src0, src1, dst)) {
|
10315
|
-
if (params->ith == 0 && params->type ==
|
10464
|
+
if (params->ith == 0 && params->type == GGML_TASK_TYPE_COMPUTE) {
|
10316
10465
|
ggml_cl_mul_mat(src0, src1, dst, params->wdata, params->wsize);
|
10317
10466
|
}
|
10318
10467
|
return;
|
@@ -10325,7 +10474,7 @@ static void ggml_compute_forward_mul_mat(
|
|
10325
10474
|
const size_t desired_wsize = ne13*ne12*ne_plane*sizeof(float);
|
10326
10475
|
UNUSED(desired_wsize);
|
10327
10476
|
|
10328
|
-
if (params->type ==
|
10477
|
+
if (params->type == GGML_TASK_TYPE_INIT) {
|
10329
10478
|
if (type != GGML_TYPE_F32) {
|
10330
10479
|
assert(params->wsize >= desired_wsize);
|
10331
10480
|
// parallelize by src0 rows
|
@@ -10348,7 +10497,7 @@ static void ggml_compute_forward_mul_mat(
|
|
10348
10497
|
return;
|
10349
10498
|
}
|
10350
10499
|
|
10351
|
-
if (params->type ==
|
10500
|
+
if (params->type == GGML_TASK_TYPE_FINALIZE) {
|
10352
10501
|
return;
|
10353
10502
|
}
|
10354
10503
|
|
@@ -10386,7 +10535,7 @@ static void ggml_compute_forward_mul_mat(
|
|
10386
10535
|
}
|
10387
10536
|
#endif
|
10388
10537
|
|
10389
|
-
if (params->type ==
|
10538
|
+
if (params->type == GGML_TASK_TYPE_INIT) {
|
10390
10539
|
if (ith != 0) {
|
10391
10540
|
return;
|
10392
10541
|
}
|
@@ -10410,7 +10559,7 @@ static void ggml_compute_forward_mul_mat(
|
|
10410
10559
|
return;
|
10411
10560
|
}
|
10412
10561
|
|
10413
|
-
if (params->type ==
|
10562
|
+
if (params->type == GGML_TASK_TYPE_FINALIZE) {
|
10414
10563
|
return;
|
10415
10564
|
}
|
10416
10565
|
|
@@ -10567,7 +10716,7 @@ static void ggml_compute_forward_mul_mat_id(
|
|
10567
10716
|
|
10568
10717
|
#define MMID_MATRIX_ROW(row_id, i1) matrix_rows[(row_id)*ne11 + (i1)]
|
10569
10718
|
|
10570
|
-
if (params->type ==
|
10719
|
+
if (params->type == GGML_TASK_TYPE_INIT) {
|
10571
10720
|
if (ith != 0) {
|
10572
10721
|
return;
|
10573
10722
|
}
|
@@ -10604,7 +10753,7 @@ static void ggml_compute_forward_mul_mat_id(
|
|
10604
10753
|
return;
|
10605
10754
|
}
|
10606
10755
|
|
10607
|
-
if (params->type ==
|
10756
|
+
if (params->type == GGML_TASK_TYPE_FINALIZE) {
|
10608
10757
|
return;
|
10609
10758
|
}
|
10610
10759
|
|
@@ -10752,7 +10901,7 @@ static void ggml_compute_forward_out_prod_f32(
|
|
10752
10901
|
(ggml_is_contiguous(src1) || ggml_is_transposed(src1));
|
10753
10902
|
#endif
|
10754
10903
|
|
10755
|
-
if (params->type ==
|
10904
|
+
if (params->type == GGML_TASK_TYPE_INIT) {
|
10756
10905
|
#if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS) // gemm beta will zero dst
|
10757
10906
|
if (use_blas) {
|
10758
10907
|
return;
|
@@ -10765,7 +10914,7 @@ static void ggml_compute_forward_out_prod_f32(
|
|
10765
10914
|
return;
|
10766
10915
|
}
|
10767
10916
|
|
10768
|
-
if (params->type ==
|
10917
|
+
if (params->type == GGML_TASK_TYPE_FINALIZE) {
|
10769
10918
|
return;
|
10770
10919
|
}
|
10771
10920
|
|
@@ -10945,7 +11094,7 @@ static void ggml_compute_forward_out_prod_q_f32(
|
|
10945
11094
|
// TODO: #if defined(GGML_USE_CUBLAS) ggml_cuda_out_prod
|
10946
11095
|
// TODO: #if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS) || defined(GGML_USE_CLBLAST)
|
10947
11096
|
|
10948
|
-
if (params->type ==
|
11097
|
+
if (params->type == GGML_TASK_TYPE_INIT) {
|
10949
11098
|
if (ith != 0) {
|
10950
11099
|
return;
|
10951
11100
|
}
|
@@ -10953,7 +11102,7 @@ static void ggml_compute_forward_out_prod_q_f32(
|
|
10953
11102
|
return;
|
10954
11103
|
}
|
10955
11104
|
|
10956
|
-
if (params->type ==
|
11105
|
+
if (params->type == GGML_TASK_TYPE_FINALIZE) {
|
10957
11106
|
return;
|
10958
11107
|
}
|
10959
11108
|
|
@@ -11039,6 +11188,9 @@ static void ggml_compute_forward_out_prod(
|
|
11039
11188
|
case GGML_TYPE_IQ3_XXS:
|
11040
11189
|
case GGML_TYPE_IQ1_S:
|
11041
11190
|
case GGML_TYPE_IQ4_NL:
|
11191
|
+
case GGML_TYPE_IQ4_XS:
|
11192
|
+
case GGML_TYPE_IQ3_S:
|
11193
|
+
case GGML_TYPE_IQ2_S:
|
11042
11194
|
{
|
11043
11195
|
ggml_compute_forward_out_prod_q_f32(params, dst);
|
11044
11196
|
} break;
|
@@ -11070,7 +11222,7 @@ static void ggml_compute_forward_scale_f32(
|
|
11070
11222
|
GGML_ASSERT(ggml_is_contiguous(dst));
|
11071
11223
|
GGML_ASSERT(ggml_are_same_shape(src0, dst));
|
11072
11224
|
|
11073
|
-
if (params->type ==
|
11225
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
11074
11226
|
return;
|
11075
11227
|
}
|
11076
11228
|
|
@@ -11142,7 +11294,7 @@ static void ggml_compute_forward_set_f32(
|
|
11142
11294
|
size_t offset = ((int32_t *) dst->op_params)[3];
|
11143
11295
|
bool inplace = (bool) ((int32_t *) dst->op_params)[4];
|
11144
11296
|
|
11145
|
-
if (!inplace && (params->type ==
|
11297
|
+
if (!inplace && (params->type == GGML_TASK_TYPE_INIT)) {
|
11146
11298
|
if (params->ith != 0) {
|
11147
11299
|
return;
|
11148
11300
|
}
|
@@ -11154,7 +11306,7 @@ static void ggml_compute_forward_set_f32(
|
|
11154
11306
|
ggml_nbytes(dst));
|
11155
11307
|
}
|
11156
11308
|
|
11157
|
-
if (params->type ==
|
11309
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
11158
11310
|
return;
|
11159
11311
|
}
|
11160
11312
|
|
@@ -11227,6 +11379,9 @@ static void ggml_compute_forward_set(
|
|
11227
11379
|
case GGML_TYPE_IQ3_XXS:
|
11228
11380
|
case GGML_TYPE_IQ1_S:
|
11229
11381
|
case GGML_TYPE_IQ4_NL:
|
11382
|
+
case GGML_TYPE_IQ4_XS:
|
11383
|
+
case GGML_TYPE_IQ3_S:
|
11384
|
+
case GGML_TYPE_IQ2_S:
|
11230
11385
|
default:
|
11231
11386
|
{
|
11232
11387
|
GGML_ASSERT(false);
|
@@ -11301,7 +11456,7 @@ static void ggml_compute_forward_get_rows_q(
|
|
11301
11456
|
|
11302
11457
|
assert(params->ith == 0);
|
11303
11458
|
|
11304
|
-
if (params->type ==
|
11459
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
11305
11460
|
return;
|
11306
11461
|
}
|
11307
11462
|
|
@@ -11341,7 +11496,7 @@ static void ggml_compute_forward_get_rows_f16(
|
|
11341
11496
|
|
11342
11497
|
assert(params->ith == 0);
|
11343
11498
|
|
11344
|
-
if (params->type ==
|
11499
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
11345
11500
|
return;
|
11346
11501
|
}
|
11347
11502
|
|
@@ -11378,7 +11533,7 @@ static void ggml_compute_forward_get_rows_f32(
|
|
11378
11533
|
|
11379
11534
|
assert(params->ith == 0);
|
11380
11535
|
|
11381
|
-
if (params->type ==
|
11536
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
11382
11537
|
return;
|
11383
11538
|
}
|
11384
11539
|
|
@@ -11429,6 +11584,9 @@ static void ggml_compute_forward_get_rows(
|
|
11429
11584
|
case GGML_TYPE_IQ3_XXS:
|
11430
11585
|
case GGML_TYPE_IQ1_S:
|
11431
11586
|
case GGML_TYPE_IQ4_NL:
|
11587
|
+
case GGML_TYPE_IQ4_XS:
|
11588
|
+
case GGML_TYPE_IQ3_S:
|
11589
|
+
case GGML_TYPE_IQ2_S:
|
11432
11590
|
{
|
11433
11591
|
ggml_compute_forward_get_rows_q(params, dst);
|
11434
11592
|
} break;
|
@@ -11480,14 +11638,14 @@ static void ggml_compute_forward_get_rows_back_f32_f16(
|
|
11480
11638
|
|
11481
11639
|
// ggml_compute_forward_dup_same_cont(params, opt0, dst);
|
11482
11640
|
|
11483
|
-
if (params->type ==
|
11641
|
+
if (params->type == GGML_TASK_TYPE_INIT) {
|
11484
11642
|
if (params->ith != 0) {
|
11485
11643
|
return;
|
11486
11644
|
}
|
11487
11645
|
memset(dst->data, 0, ggml_nbytes(dst));
|
11488
11646
|
}
|
11489
11647
|
|
11490
|
-
if (params->type ==
|
11648
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
11491
11649
|
return;
|
11492
11650
|
}
|
11493
11651
|
|
@@ -11519,14 +11677,14 @@ static void ggml_compute_forward_get_rows_back_f32(
|
|
11519
11677
|
|
11520
11678
|
// ggml_compute_forward_dup_same_cont(params, opt0, dst);
|
11521
11679
|
|
11522
|
-
if (params->type ==
|
11680
|
+
if (params->type == GGML_TASK_TYPE_INIT) {
|
11523
11681
|
if (params->ith != 0) {
|
11524
11682
|
return;
|
11525
11683
|
}
|
11526
11684
|
memset(dst->data, 0, ggml_nbytes(dst));
|
11527
11685
|
}
|
11528
11686
|
|
11529
|
-
if (params->type ==
|
11687
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
11530
11688
|
return;
|
11531
11689
|
}
|
11532
11690
|
|
@@ -11596,7 +11754,7 @@ static void ggml_compute_forward_diag_f32(
|
|
11596
11754
|
|
11597
11755
|
GGML_ASSERT(params->ith == 0);
|
11598
11756
|
|
11599
|
-
if (params->type ==
|
11757
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
11600
11758
|
return;
|
11601
11759
|
}
|
11602
11760
|
|
@@ -11665,7 +11823,7 @@ static void ggml_compute_forward_diag_mask_f32(
|
|
11665
11823
|
|
11666
11824
|
GGML_ASSERT(n_past >= 0);
|
11667
11825
|
|
11668
|
-
if (!inplace && (params->type ==
|
11826
|
+
if (!inplace && (params->type == GGML_TASK_TYPE_INIT)) {
|
11669
11827
|
if (ith != 0) {
|
11670
11828
|
return;
|
11671
11829
|
}
|
@@ -11679,7 +11837,7 @@ static void ggml_compute_forward_diag_mask_f32(
|
|
11679
11837
|
ggml_nbytes(dst));
|
11680
11838
|
}
|
11681
11839
|
|
11682
|
-
if (params->type ==
|
11840
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
11683
11841
|
return;
|
11684
11842
|
}
|
11685
11843
|
|
@@ -11753,7 +11911,7 @@ static void ggml_compute_forward_soft_max_f32(
|
|
11753
11911
|
assert(ggml_is_contiguous(dst));
|
11754
11912
|
assert(ggml_are_same_shape(src0, dst));
|
11755
11913
|
|
11756
|
-
if (params->type ==
|
11914
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
11757
11915
|
return;
|
11758
11916
|
}
|
11759
11917
|
|
@@ -11891,7 +12049,7 @@ static void ggml_compute_forward_soft_max_back_f32(
|
|
11891
12049
|
GGML_ASSERT(ggml_are_same_shape(src0, dst));
|
11892
12050
|
GGML_ASSERT(ggml_are_same_shape(src1, dst));
|
11893
12051
|
|
11894
|
-
if (params->type ==
|
12052
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
11895
12053
|
return;
|
11896
12054
|
}
|
11897
12055
|
|
@@ -11985,7 +12143,7 @@ static void ggml_compute_forward_alibi_f32(
|
|
11985
12143
|
|
11986
12144
|
assert(params->ith == 0);
|
11987
12145
|
|
11988
|
-
if (params->type ==
|
12146
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
11989
12147
|
return;
|
11990
12148
|
}
|
11991
12149
|
|
@@ -12044,7 +12202,7 @@ static void ggml_compute_forward_alibi_f16(
|
|
12044
12202
|
|
12045
12203
|
assert(params->ith == 0);
|
12046
12204
|
|
12047
|
-
if (params->type ==
|
12205
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
12048
12206
|
return;
|
12049
12207
|
}
|
12050
12208
|
|
@@ -12129,6 +12287,9 @@ static void ggml_compute_forward_alibi(
|
|
12129
12287
|
case GGML_TYPE_IQ3_XXS:
|
12130
12288
|
case GGML_TYPE_IQ1_S:
|
12131
12289
|
case GGML_TYPE_IQ4_NL:
|
12290
|
+
case GGML_TYPE_IQ4_XS:
|
12291
|
+
case GGML_TYPE_IQ3_S:
|
12292
|
+
case GGML_TYPE_IQ2_S:
|
12132
12293
|
case GGML_TYPE_Q8_K:
|
12133
12294
|
case GGML_TYPE_I8:
|
12134
12295
|
case GGML_TYPE_I16:
|
@@ -12150,7 +12311,7 @@ static void ggml_compute_forward_clamp_f32(
|
|
12150
12311
|
|
12151
12312
|
assert(params->ith == 0);
|
12152
12313
|
|
12153
|
-
if (params->type ==
|
12314
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
12154
12315
|
return;
|
12155
12316
|
}
|
12156
12317
|
|
@@ -12212,6 +12373,9 @@ static void ggml_compute_forward_clamp(
|
|
12212
12373
|
case GGML_TYPE_IQ3_XXS:
|
12213
12374
|
case GGML_TYPE_IQ1_S:
|
12214
12375
|
case GGML_TYPE_IQ4_NL:
|
12376
|
+
case GGML_TYPE_IQ4_XS:
|
12377
|
+
case GGML_TYPE_IQ3_S:
|
12378
|
+
case GGML_TYPE_IQ2_S:
|
12215
12379
|
case GGML_TYPE_Q8_K:
|
12216
12380
|
case GGML_TYPE_I8:
|
12217
12381
|
case GGML_TYPE_I16:
|
@@ -12289,7 +12453,7 @@ static void ggml_compute_forward_rope_f32(
|
|
12289
12453
|
const struct ggml_tensor * src0 = dst->src[0];
|
12290
12454
|
const struct ggml_tensor * src1 = dst->src[1];
|
12291
12455
|
|
12292
|
-
if (params->type ==
|
12456
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
12293
12457
|
return;
|
12294
12458
|
}
|
12295
12459
|
|
@@ -12467,7 +12631,7 @@ static void ggml_compute_forward_rope_f16(
|
|
12467
12631
|
const struct ggml_tensor * src0 = dst->src[0];
|
12468
12632
|
const struct ggml_tensor * src1 = dst->src[1];
|
12469
12633
|
|
12470
|
-
if (params->type ==
|
12634
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
12471
12635
|
return;
|
12472
12636
|
}
|
12473
12637
|
|
@@ -12698,7 +12862,7 @@ static void ggml_compute_forward_conv_transpose_1d_f16_f32(
|
|
12698
12862
|
GGML_ASSERT(nb00 == sizeof(ggml_fp16_t));
|
12699
12863
|
GGML_ASSERT(nb10 == sizeof(float));
|
12700
12864
|
|
12701
|
-
if (params->type ==
|
12865
|
+
if (params->type == GGML_TASK_TYPE_INIT) {
|
12702
12866
|
if (ith != 0) {
|
12703
12867
|
return;
|
12704
12868
|
}
|
@@ -12738,7 +12902,7 @@ static void ggml_compute_forward_conv_transpose_1d_f16_f32(
|
|
12738
12902
|
return;
|
12739
12903
|
}
|
12740
12904
|
|
12741
|
-
if (params->type ==
|
12905
|
+
if (params->type == GGML_TASK_TYPE_FINALIZE) {
|
12742
12906
|
return;
|
12743
12907
|
}
|
12744
12908
|
|
@@ -12797,7 +12961,7 @@ static void ggml_compute_forward_conv_transpose_1d_f32(
|
|
12797
12961
|
GGML_ASSERT(nb00 == sizeof(float));
|
12798
12962
|
GGML_ASSERT(nb10 == sizeof(float));
|
12799
12963
|
|
12800
|
-
if (params->type ==
|
12964
|
+
if (params->type == GGML_TASK_TYPE_INIT) {
|
12801
12965
|
if (ith != 0) {
|
12802
12966
|
return;
|
12803
12967
|
}
|
@@ -12837,7 +13001,7 @@ static void ggml_compute_forward_conv_transpose_1d_f32(
|
|
12837
13001
|
return;
|
12838
13002
|
}
|
12839
13003
|
|
12840
|
-
if (params->type ==
|
13004
|
+
if (params->type == GGML_TASK_TYPE_FINALIZE) {
|
12841
13005
|
return;
|
12842
13006
|
}
|
12843
13007
|
|
@@ -12941,11 +13105,11 @@ static void ggml_compute_forward_im2col_f32(
|
|
12941
13105
|
GGML_ASSERT(nb00 == sizeof(ggml_fp16_t));
|
12942
13106
|
GGML_ASSERT(nb10 == sizeof(float));
|
12943
13107
|
|
12944
|
-
if (params->type ==
|
13108
|
+
if (params->type == GGML_TASK_TYPE_INIT) {
|
12945
13109
|
return;
|
12946
13110
|
}
|
12947
13111
|
|
12948
|
-
if (params->type ==
|
13112
|
+
if (params->type == GGML_TASK_TYPE_FINALIZE) {
|
12949
13113
|
return;
|
12950
13114
|
}
|
12951
13115
|
|
@@ -13029,11 +13193,11 @@ static void ggml_compute_forward_im2col_f16(
|
|
13029
13193
|
GGML_ASSERT(nb00 == sizeof(ggml_fp16_t));
|
13030
13194
|
GGML_ASSERT(nb10 == sizeof(float));
|
13031
13195
|
|
13032
|
-
if (params->type ==
|
13196
|
+
if (params->type == GGML_TASK_TYPE_INIT) {
|
13033
13197
|
return;
|
13034
13198
|
}
|
13035
13199
|
|
13036
|
-
if (params->type ==
|
13200
|
+
if (params->type == GGML_TASK_TYPE_FINALIZE) {
|
13037
13201
|
return;
|
13038
13202
|
}
|
13039
13203
|
|
@@ -13115,7 +13279,7 @@ static void ggml_compute_forward_conv_transpose_2d(
|
|
13115
13279
|
GGML_ASSERT(nb00 == sizeof(ggml_fp16_t));
|
13116
13280
|
GGML_ASSERT(nb10 == sizeof(float));
|
13117
13281
|
|
13118
|
-
if (params->type ==
|
13282
|
+
if (params->type == GGML_TASK_TYPE_INIT) {
|
13119
13283
|
if (ith != 0) {
|
13120
13284
|
return;
|
13121
13285
|
}
|
@@ -13157,7 +13321,7 @@ static void ggml_compute_forward_conv_transpose_2d(
|
|
13157
13321
|
return;
|
13158
13322
|
}
|
13159
13323
|
|
13160
|
-
if (params->type ==
|
13324
|
+
if (params->type == GGML_TASK_TYPE_FINALIZE) {
|
13161
13325
|
return;
|
13162
13326
|
}
|
13163
13327
|
|
@@ -13209,7 +13373,7 @@ static void ggml_compute_forward_pool_1d_sk_p0(
|
|
13209
13373
|
assert(src->type == GGML_TYPE_F32);
|
13210
13374
|
assert(params->ith == 0);
|
13211
13375
|
|
13212
|
-
if (params->type ==
|
13376
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
13213
13377
|
return;
|
13214
13378
|
}
|
13215
13379
|
|
@@ -13278,7 +13442,7 @@ static void ggml_compute_forward_pool_2d(
|
|
13278
13442
|
GGML_ASSERT(src->type == GGML_TYPE_F32);
|
13279
13443
|
GGML_ASSERT(params->ith == 0);
|
13280
13444
|
|
13281
|
-
if (params->type ==
|
13445
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
13282
13446
|
return;
|
13283
13447
|
}
|
13284
13448
|
|
@@ -13351,7 +13515,7 @@ static void ggml_compute_forward_upscale_f32(
|
|
13351
13515
|
|
13352
13516
|
const struct ggml_tensor * src0 = dst->src[0];
|
13353
13517
|
|
13354
|
-
if (params->type ==
|
13518
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
13355
13519
|
return;
|
13356
13520
|
}
|
13357
13521
|
|
@@ -13411,7 +13575,7 @@ static void ggml_compute_forward_pad_f32(
|
|
13411
13575
|
|
13412
13576
|
const struct ggml_tensor * src0 = dst->src[0];
|
13413
13577
|
|
13414
|
-
if (params->type ==
|
13578
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
13415
13579
|
return;
|
13416
13580
|
}
|
13417
13581
|
|
@@ -13464,6 +13628,106 @@ static void ggml_compute_forward_pad(
|
|
13464
13628
|
}
|
13465
13629
|
}
|
13466
13630
|
|
13631
|
+
|
13632
|
+
// ggml_compute_forward_arange
|
13633
|
+
|
13634
|
+
static void ggml_compute_forward_arange_f32(
|
13635
|
+
const struct ggml_compute_params * params,
|
13636
|
+
struct ggml_tensor * dst) {
|
13637
|
+
|
13638
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
13639
|
+
return;
|
13640
|
+
}
|
13641
|
+
|
13642
|
+
GGML_ASSERT(dst->nb[0] == sizeof(float));
|
13643
|
+
|
13644
|
+
const int ith = params->ith;
|
13645
|
+
const int nth = params->nth;
|
13646
|
+
|
13647
|
+
const float start = ggml_get_op_params_f32(dst, 0);
|
13648
|
+
const float stop = ggml_get_op_params_f32(dst, 1);
|
13649
|
+
const float step = ggml_get_op_params_f32(dst, 2);
|
13650
|
+
|
13651
|
+
const int64_t steps = (int64_t) ceilf((stop - start) / step);
|
13652
|
+
|
13653
|
+
GGML_ASSERT(ggml_nelements(dst) == steps);
|
13654
|
+
|
13655
|
+
for (int64_t i = ith; i < steps; i+= nth) {
|
13656
|
+
float value = start + step * i;
|
13657
|
+
((float *)dst->data)[i] = value;
|
13658
|
+
}
|
13659
|
+
}
|
13660
|
+
|
13661
|
+
static void ggml_compute_forward_arange(
|
13662
|
+
const struct ggml_compute_params * params,
|
13663
|
+
struct ggml_tensor * dst) {
|
13664
|
+
switch (dst->type) {
|
13665
|
+
case GGML_TYPE_F32:
|
13666
|
+
{
|
13667
|
+
ggml_compute_forward_arange_f32(params, dst);
|
13668
|
+
} break;
|
13669
|
+
default:
|
13670
|
+
{
|
13671
|
+
GGML_ASSERT(false);
|
13672
|
+
} break;
|
13673
|
+
}
|
13674
|
+
}
|
13675
|
+
|
13676
|
+
static void ggml_compute_forward_timestep_embedding_f32(
|
13677
|
+
const struct ggml_compute_params * params,
|
13678
|
+
struct ggml_tensor * dst) {
|
13679
|
+
|
13680
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
13681
|
+
return;
|
13682
|
+
}
|
13683
|
+
|
13684
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
13685
|
+
|
13686
|
+
GGML_ASSERT(src0->nb[0] == sizeof(float));
|
13687
|
+
|
13688
|
+
const int ith = params->ith;
|
13689
|
+
const int nth = params->nth;
|
13690
|
+
|
13691
|
+
GGML_TENSOR_UNARY_OP_LOCALS
|
13692
|
+
|
13693
|
+
const int dim = ggml_get_op_params_i32(dst, 0);
|
13694
|
+
const int max_period = ggml_get_op_params_i32(dst, 1);
|
13695
|
+
|
13696
|
+
int half = dim / 2;
|
13697
|
+
|
13698
|
+
for (int64_t i = 0; i < ne00; i++) {
|
13699
|
+
float * embed_data = (float *)((char *) dst->data + i*nb1);
|
13700
|
+
for (int64_t j = ith; j < half; j += nth) {
|
13701
|
+
float timestep = ((float *)src0->data)[i];
|
13702
|
+
float freq = (float)expf(-logf(max_period) * j / half);
|
13703
|
+
float arg = timestep * freq;
|
13704
|
+
embed_data[j] = cosf(arg);
|
13705
|
+
embed_data[j + half] = sinf(arg);
|
13706
|
+
}
|
13707
|
+
if (dim % 2 != 0 && ith == 0) {
|
13708
|
+
embed_data[dim] = 0.f;
|
13709
|
+
}
|
13710
|
+
}
|
13711
|
+
}
|
13712
|
+
|
13713
|
+
static void ggml_compute_forward_timestep_embedding(
|
13714
|
+
const struct ggml_compute_params * params,
|
13715
|
+
struct ggml_tensor * dst) {
|
13716
|
+
|
13717
|
+
const struct ggml_tensor * src0 = dst->src[0];
|
13718
|
+
|
13719
|
+
switch (src0->type) {
|
13720
|
+
case GGML_TYPE_F32:
|
13721
|
+
{
|
13722
|
+
ggml_compute_forward_timestep_embedding_f32(params, dst);
|
13723
|
+
} break;
|
13724
|
+
default:
|
13725
|
+
{
|
13726
|
+
GGML_ASSERT(false);
|
13727
|
+
} break;
|
13728
|
+
}
|
13729
|
+
}
|
13730
|
+
|
13467
13731
|
// ggml_compute_forward_argsort
|
13468
13732
|
|
13469
13733
|
static void ggml_compute_forward_argsort_f32(
|
@@ -13472,7 +13736,7 @@ static void ggml_compute_forward_argsort_f32(
|
|
13472
13736
|
|
13473
13737
|
const struct ggml_tensor * src0 = dst->src[0];
|
13474
13738
|
|
13475
|
-
if (params->type ==
|
13739
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
13476
13740
|
return;
|
13477
13741
|
}
|
13478
13742
|
|
@@ -13498,8 +13762,8 @@ static void ggml_compute_forward_argsort_f32(
|
|
13498
13762
|
// C doesn't have a functional sort, so we do a bubble sort instead
|
13499
13763
|
for (int64_t j = 0; j < ne0; j++) {
|
13500
13764
|
for (int64_t k = j + 1; k < ne0; k++) {
|
13501
|
-
if ((order ==
|
13502
|
-
(order ==
|
13765
|
+
if ((order == GGML_SORT_ORDER_ASC && src_data[dst_data[j]] > src_data[dst_data[k]]) ||
|
13766
|
+
(order == GGML_SORT_ORDER_DESC && src_data[dst_data[j]] < src_data[dst_data[k]])) {
|
13503
13767
|
int32_t tmp = dst_data[j];
|
13504
13768
|
dst_data[j] = dst_data[k];
|
13505
13769
|
dst_data[k] = tmp;
|
@@ -13582,11 +13846,11 @@ static void ggml_compute_forward_flash_attn_f32(
|
|
13582
13846
|
GGML_ASSERT(nb1 <= nb2);
|
13583
13847
|
GGML_ASSERT(nb2 <= nb3);
|
13584
13848
|
|
13585
|
-
if (params->type ==
|
13849
|
+
if (params->type == GGML_TASK_TYPE_INIT) {
|
13586
13850
|
return;
|
13587
13851
|
}
|
13588
13852
|
|
13589
|
-
if (params->type ==
|
13853
|
+
if (params->type == GGML_TASK_TYPE_FINALIZE) {
|
13590
13854
|
return;
|
13591
13855
|
}
|
13592
13856
|
|
@@ -13774,11 +14038,11 @@ static void ggml_compute_forward_flash_attn_f16(
|
|
13774
14038
|
GGML_ASSERT(nb1 <= nb2);
|
13775
14039
|
GGML_ASSERT(nb2 <= nb3);
|
13776
14040
|
|
13777
|
-
if (params->type ==
|
14041
|
+
if (params->type == GGML_TASK_TYPE_INIT) {
|
13778
14042
|
return;
|
13779
14043
|
}
|
13780
14044
|
|
13781
|
-
if (params->type ==
|
14045
|
+
if (params->type == GGML_TASK_TYPE_FINALIZE) {
|
13782
14046
|
return;
|
13783
14047
|
}
|
13784
14048
|
|
@@ -14033,11 +14297,11 @@ static void ggml_compute_forward_flash_ff_f16(
|
|
14033
14297
|
GGML_ASSERT(nb1 <= nb2);
|
14034
14298
|
GGML_ASSERT(nb2 <= nb3);
|
14035
14299
|
|
14036
|
-
if (params->type ==
|
14300
|
+
if (params->type == GGML_TASK_TYPE_INIT) {
|
14037
14301
|
return;
|
14038
14302
|
}
|
14039
14303
|
|
14040
|
-
if (params->type ==
|
14304
|
+
if (params->type == GGML_TASK_TYPE_FINALIZE) {
|
14041
14305
|
return;
|
14042
14306
|
}
|
14043
14307
|
|
@@ -14192,14 +14456,14 @@ static void ggml_compute_forward_flash_attn_back_f32(
|
|
14192
14456
|
GGML_ASSERT(nb1 <= nb2);
|
14193
14457
|
GGML_ASSERT(nb2 <= nb3);
|
14194
14458
|
|
14195
|
-
if (params->type ==
|
14459
|
+
if (params->type == GGML_TASK_TYPE_INIT) {
|
14196
14460
|
if (ith == 0) {
|
14197
14461
|
memset(dst->data, 0, nb0*ne0*ne1*ne2*ne3);
|
14198
14462
|
}
|
14199
14463
|
return;
|
14200
14464
|
}
|
14201
14465
|
|
14202
|
-
if (params->type ==
|
14466
|
+
if (params->type == GGML_TASK_TYPE_FINALIZE) {
|
14203
14467
|
return;
|
14204
14468
|
}
|
14205
14469
|
|
@@ -14515,7 +14779,7 @@ static void ggml_compute_forward_win_part_f32(
|
|
14515
14779
|
|
14516
14780
|
const struct ggml_tensor * src0 = dst->src[0];
|
14517
14781
|
|
14518
|
-
if (params->type ==
|
14782
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
14519
14783
|
return;
|
14520
14784
|
}
|
14521
14785
|
|
@@ -14581,7 +14845,7 @@ static void ggml_compute_forward_win_unpart_f32(
|
|
14581
14845
|
|
14582
14846
|
const struct ggml_tensor * src0 = dst->src[0];
|
14583
14847
|
|
14584
|
-
if (params->type ==
|
14848
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
14585
14849
|
return;
|
14586
14850
|
}
|
14587
14851
|
|
@@ -14709,7 +14973,7 @@ static void ggml_compute_forward_get_rel_pos_f16(
|
|
14709
14973
|
|
14710
14974
|
const struct ggml_tensor * src0 = dst->src[0];
|
14711
14975
|
|
14712
|
-
if (params->type ==
|
14976
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
14713
14977
|
return;
|
14714
14978
|
}
|
14715
14979
|
|
@@ -14761,14 +15025,14 @@ static void ggml_compute_forward_add_rel_pos_f32(
|
|
14761
15025
|
const struct ggml_tensor * src2 = dst->src[2];
|
14762
15026
|
|
14763
15027
|
const bool inplace = (bool) ((int32_t *) dst->op_params)[0];
|
14764
|
-
if (!inplace && params->type ==
|
15028
|
+
if (!inplace && params->type == GGML_TASK_TYPE_INIT) {
|
14765
15029
|
if (params->ith != 0) {
|
14766
15030
|
return;
|
14767
15031
|
}
|
14768
15032
|
memcpy((char *) dst->data, (char *) src0->data, ggml_nbytes(dst));
|
14769
15033
|
return;
|
14770
15034
|
}
|
14771
|
-
if (params->type ==
|
15035
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
14772
15036
|
return;
|
14773
15037
|
}
|
14774
15038
|
|
@@ -14850,7 +15114,7 @@ static void ggml_compute_forward_map_unary_f32(
|
|
14850
15114
|
|
14851
15115
|
GGML_ASSERT(ggml_are_same_shape(src0, dst));
|
14852
15116
|
|
14853
|
-
if (params->type ==
|
15117
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
14854
15118
|
return;
|
14855
15119
|
}
|
14856
15120
|
|
@@ -14899,7 +15163,7 @@ static void ggml_compute_forward_map_binary_f32(
|
|
14899
15163
|
assert(params->ith == 0);
|
14900
15164
|
assert(ggml_are_same_shape(src0, src1) && ggml_are_same_shape(src0, dst));
|
14901
15165
|
|
14902
|
-
if (params->type ==
|
15166
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
14903
15167
|
return;
|
14904
15168
|
}
|
14905
15169
|
|
@@ -14948,7 +15212,7 @@ static void ggml_compute_forward_map_custom1_f32(
|
|
14948
15212
|
|
14949
15213
|
assert(params->ith == 0);
|
14950
15214
|
|
14951
|
-
if (params->type ==
|
15215
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
14952
15216
|
return;
|
14953
15217
|
}
|
14954
15218
|
|
@@ -14967,7 +15231,7 @@ static void ggml_compute_forward_map_custom2_f32(
|
|
14967
15231
|
|
14968
15232
|
assert(params->ith == 0);
|
14969
15233
|
|
14970
|
-
if (params->type ==
|
15234
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
14971
15235
|
return;
|
14972
15236
|
}
|
14973
15237
|
|
@@ -14987,7 +15251,7 @@ static void ggml_compute_forward_map_custom3_f32(
|
|
14987
15251
|
|
14988
15252
|
assert(params->ith == 0);
|
14989
15253
|
|
14990
|
-
if (params->type ==
|
15254
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
14991
15255
|
return;
|
14992
15256
|
}
|
14993
15257
|
|
@@ -15002,13 +15266,14 @@ static void ggml_compute_forward_map_custom1(
|
|
15002
15266
|
|
15003
15267
|
const struct ggml_tensor * a = dst->src[0];
|
15004
15268
|
|
15005
|
-
if (params->type ==
|
15269
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
15006
15270
|
return;
|
15007
15271
|
}
|
15008
15272
|
|
15009
|
-
struct ggml_map_custom1_op_params
|
15273
|
+
struct ggml_map_custom1_op_params p;
|
15274
|
+
memcpy(&p, dst->op_params, sizeof(p));
|
15010
15275
|
|
15011
|
-
p
|
15276
|
+
p.fun(dst, a, params->ith, params->nth, p.userdata);
|
15012
15277
|
}
|
15013
15278
|
|
15014
15279
|
// ggml_compute_forward_map_custom2
|
@@ -15020,13 +15285,14 @@ static void ggml_compute_forward_map_custom2(
|
|
15020
15285
|
const struct ggml_tensor * a = dst->src[0];
|
15021
15286
|
const struct ggml_tensor * b = dst->src[1];
|
15022
15287
|
|
15023
|
-
if (params->type ==
|
15288
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
15024
15289
|
return;
|
15025
15290
|
}
|
15026
15291
|
|
15027
|
-
struct ggml_map_custom2_op_params
|
15292
|
+
struct ggml_map_custom2_op_params p;
|
15293
|
+
memcpy(&p, dst->op_params, sizeof(p));
|
15028
15294
|
|
15029
|
-
p
|
15295
|
+
p.fun(dst, a, b, params->ith, params->nth, p.userdata);
|
15030
15296
|
}
|
15031
15297
|
|
15032
15298
|
// ggml_compute_forward_map_custom3
|
@@ -15039,13 +15305,14 @@ static void ggml_compute_forward_map_custom3(
|
|
15039
15305
|
const struct ggml_tensor * b = dst->src[1];
|
15040
15306
|
const struct ggml_tensor * c = dst->src[2];
|
15041
15307
|
|
15042
|
-
if (params->type ==
|
15308
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
15043
15309
|
return;
|
15044
15310
|
}
|
15045
15311
|
|
15046
|
-
struct ggml_map_custom3_op_params
|
15312
|
+
struct ggml_map_custom3_op_params p;
|
15313
|
+
memcpy(&p, dst->op_params, sizeof(p));
|
15047
15314
|
|
15048
|
-
p
|
15315
|
+
p.fun(dst, a, b, c, params->ith, params->nth, p.userdata);
|
15049
15316
|
}
|
15050
15317
|
|
15051
15318
|
// ggml_compute_forward_cross_entropy_loss
|
@@ -15073,14 +15340,14 @@ static void ggml_compute_forward_cross_entropy_loss_f32(
|
|
15073
15340
|
|
15074
15341
|
GGML_ASSERT(params->wsize >= sizeof(float) * (nth + nth * nc));
|
15075
15342
|
|
15076
|
-
if (params->type ==
|
15343
|
+
if (params->type == GGML_TASK_TYPE_INIT) {
|
15077
15344
|
if (ith == 0) {
|
15078
15345
|
memset(sums, 0, sizeof(float) * (nth + nth * nc));
|
15079
15346
|
}
|
15080
15347
|
return;
|
15081
15348
|
}
|
15082
15349
|
|
15083
|
-
if (params->type ==
|
15350
|
+
if (params->type == GGML_TASK_TYPE_FINALIZE) {
|
15084
15351
|
if (ith == 0) {
|
15085
15352
|
float * dp = (float *) dst->data;
|
15086
15353
|
ggml_vec_sum_f32(nth, dp, sums);
|
@@ -15195,7 +15462,7 @@ static void ggml_compute_forward_cross_entropy_loss_back_f32(
|
|
15195
15462
|
const int64_t ith = params->ith;
|
15196
15463
|
const int64_t nth = params->nth;
|
15197
15464
|
|
15198
|
-
if (params->type ==
|
15465
|
+
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
15199
15466
|
return;
|
15200
15467
|
}
|
15201
15468
|
|
@@ -15302,8 +15569,8 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm
|
|
15302
15569
|
if (skip_cpu) {
|
15303
15570
|
return;
|
15304
15571
|
}
|
15305
|
-
GGML_ASSERT(tensor->src[0] == NULL || tensor->src[0]->backend ==
|
15306
|
-
GGML_ASSERT(tensor->src[1] == NULL || tensor->src[1]->backend ==
|
15572
|
+
GGML_ASSERT(tensor->src[0] == NULL || tensor->src[0]->backend == GGML_BACKEND_TYPE_CPU);
|
15573
|
+
GGML_ASSERT(tensor->src[1] == NULL || tensor->src[1]->backend == GGML_BACKEND_TYPE_CPU);
|
15307
15574
|
#elif defined(GGML_USE_VULKAN)
|
15308
15575
|
const bool skip_cpu = ggml_vk_compute_forward_cpu_assist(params, tensor);
|
15309
15576
|
#ifdef GGML_VULKAN_CHECK_RESULTS
|
@@ -15314,8 +15581,8 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm
|
|
15314
15581
|
if (skip_cpu) {
|
15315
15582
|
return;
|
15316
15583
|
}
|
15317
|
-
GGML_ASSERT(tensor->src[0] == NULL || tensor->src[0]->backend ==
|
15318
|
-
GGML_ASSERT(tensor->src[1] == NULL || tensor->src[1]->backend ==
|
15584
|
+
GGML_ASSERT(tensor->src[0] == NULL || tensor->src[0]->backend == GGML_BACKEND_TYPE_CPU);
|
15585
|
+
GGML_ASSERT(tensor->src[1] == NULL || tensor->src[1]->backend == GGML_BACKEND_TYPE_CPU);
|
15319
15586
|
#endif // GGML_USE_CUBLAS
|
15320
15587
|
|
15321
15588
|
#ifdef GGML_USE_SYCL
|
@@ -15529,6 +15796,14 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm
|
|
15529
15796
|
{
|
15530
15797
|
ggml_compute_forward_pad(params, tensor);
|
15531
15798
|
} break;
|
15799
|
+
case GGML_OP_ARANGE:
|
15800
|
+
{
|
15801
|
+
ggml_compute_forward_arange(params, tensor);
|
15802
|
+
} break;
|
15803
|
+
case GGML_OP_TIMESTEP_EMBEDDING:
|
15804
|
+
{
|
15805
|
+
ggml_compute_forward_timestep_embedding(params, tensor);
|
15806
|
+
} break;
|
15532
15807
|
case GGML_OP_ARGSORT:
|
15533
15808
|
{
|
15534
15809
|
ggml_compute_forward_argsort(params, tensor);
|
@@ -16531,6 +16806,14 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor
|
|
16531
16806
|
{
|
16532
16807
|
GGML_ASSERT(false); // TODO: not implemented
|
16533
16808
|
} break;
|
16809
|
+
case GGML_OP_ARANGE:
|
16810
|
+
{
|
16811
|
+
GGML_ASSERT(false); // TODO: not implemented
|
16812
|
+
} break;
|
16813
|
+
case GGML_OP_TIMESTEP_EMBEDDING:
|
16814
|
+
{
|
16815
|
+
GGML_ASSERT(false); // TODO: not implemented
|
16816
|
+
} break;
|
16534
16817
|
case GGML_OP_ARGSORT:
|
16535
16818
|
{
|
16536
16819
|
GGML_ASSERT(false); // TODO: not implemented
|
@@ -16861,7 +17144,7 @@ size_t ggml_graph_overhead(void) {
|
|
16861
17144
|
|
16862
17145
|
struct ggml_cgraph * ggml_new_graph_custom(struct ggml_context * ctx, size_t size, bool grads) {
|
16863
17146
|
const size_t obj_size = ggml_graph_nbytes(size, grads);
|
16864
|
-
struct ggml_object * obj = ggml_new_object(ctx,
|
17147
|
+
struct ggml_object * obj = ggml_new_object(ctx, GGML_OBJECT_TYPE_GRAPH, obj_size);
|
16865
17148
|
struct ggml_cgraph * cgraph = (struct ggml_cgraph *) ((char *) ctx->mem_buffer + obj->offs);
|
16866
17149
|
|
16867
17150
|
struct ggml_tensor ** data_start = (struct ggml_tensor **) (cgraph + 1);
|
@@ -17131,6 +17414,7 @@ struct ggml_compute_state {
|
|
17131
17414
|
ggml_thread_t thrd;
|
17132
17415
|
int ith;
|
17133
17416
|
struct ggml_compute_state_shared * shared;
|
17417
|
+
enum ggml_status ec;
|
17134
17418
|
};
|
17135
17419
|
|
17136
17420
|
static void ggml_graph_compute_perf_stats_node(struct ggml_tensor * node, const struct ggml_compute_state_shared * st) {
|
@@ -17282,6 +17566,14 @@ static int ggml_get_n_tasks(struct ggml_tensor * node, int n_threads) {
|
|
17282
17566
|
{
|
17283
17567
|
n_tasks = n_threads;
|
17284
17568
|
} break;
|
17569
|
+
case GGML_OP_ARANGE:
|
17570
|
+
{
|
17571
|
+
n_tasks = n_threads;
|
17572
|
+
} break;
|
17573
|
+
case GGML_OP_TIMESTEP_EMBEDDING:
|
17574
|
+
{
|
17575
|
+
n_tasks = n_threads;
|
17576
|
+
} break;
|
17285
17577
|
case GGML_OP_ARGSORT:
|
17286
17578
|
{
|
17287
17579
|
n_tasks = n_threads;
|
@@ -17311,29 +17603,32 @@ static int ggml_get_n_tasks(struct ggml_tensor * node, int n_threads) {
|
|
17311
17603
|
} break;
|
17312
17604
|
case GGML_OP_MAP_CUSTOM1:
|
17313
17605
|
{
|
17314
|
-
struct ggml_map_custom1_op_params
|
17315
|
-
|
17606
|
+
struct ggml_map_custom1_op_params p;
|
17607
|
+
memcpy(&p, node->op_params, sizeof(p));
|
17608
|
+
if (p.n_tasks == GGML_N_TASKS_MAX) {
|
17316
17609
|
n_tasks = n_threads;
|
17317
17610
|
} else {
|
17318
|
-
n_tasks = MIN(p
|
17611
|
+
n_tasks = MIN(p.n_tasks, n_threads);
|
17319
17612
|
}
|
17320
17613
|
} break;
|
17321
17614
|
case GGML_OP_MAP_CUSTOM2:
|
17322
17615
|
{
|
17323
|
-
struct ggml_map_custom2_op_params
|
17324
|
-
|
17616
|
+
struct ggml_map_custom2_op_params p;
|
17617
|
+
memcpy(&p, node->op_params, sizeof(p));
|
17618
|
+
if (p.n_tasks == GGML_N_TASKS_MAX) {
|
17325
17619
|
n_tasks = n_threads;
|
17326
17620
|
} else {
|
17327
|
-
n_tasks = MIN(p
|
17621
|
+
n_tasks = MIN(p.n_tasks, n_threads);
|
17328
17622
|
}
|
17329
17623
|
} break;
|
17330
17624
|
case GGML_OP_MAP_CUSTOM3:
|
17331
17625
|
{
|
17332
|
-
struct ggml_map_custom3_op_params
|
17333
|
-
|
17626
|
+
struct ggml_map_custom3_op_params p;
|
17627
|
+
memcpy(&p, node->op_params, sizeof(p));
|
17628
|
+
if (p.n_tasks == GGML_N_TASKS_MAX) {
|
17334
17629
|
n_tasks = n_threads;
|
17335
17630
|
} else {
|
17336
|
-
n_tasks = MIN(p
|
17631
|
+
n_tasks = MIN(p.n_tasks, n_threads);
|
17337
17632
|
}
|
17338
17633
|
} break;
|
17339
17634
|
case GGML_OP_CROSS_ENTROPY_LOSS:
|
@@ -17408,19 +17703,20 @@ static thread_ret_t ggml_graph_compute_thread(void * data) {
|
|
17408
17703
|
set_numa_thread_affinity(state->ith);
|
17409
17704
|
|
17410
17705
|
int node_n = -1;
|
17411
|
-
int task_phase =
|
17706
|
+
int task_phase = GGML_TASK_TYPE_FINALIZE;
|
17412
17707
|
|
17413
17708
|
while (true) {
|
17414
17709
|
if (cplan->abort_callback && cplan->abort_callback(cplan->abort_callback_data)) {
|
17415
17710
|
state->shared->node_n += 1;
|
17416
|
-
|
17711
|
+
state->ec = GGML_STATUS_ABORTED;
|
17712
|
+
return 0;
|
17417
17713
|
}
|
17418
17714
|
|
17419
17715
|
if (atomic_fetch_sub(&state->shared->n_active, 1) == 1) {
|
17420
17716
|
// all other threads are finished and spinning
|
17421
17717
|
// do finalize and init here so we don't have synchronize again
|
17422
17718
|
struct ggml_compute_params params = {
|
17423
|
-
/*.type =*/
|
17719
|
+
/*.type =*/ GGML_TASK_TYPE_FINALIZE,
|
17424
17720
|
/*.ith =*/ 0,
|
17425
17721
|
/*.nth =*/ 0,
|
17426
17722
|
/*.wsize =*/ cplan->work_size,
|
@@ -17451,17 +17747,17 @@ static thread_ret_t ggml_graph_compute_thread(void * data) {
|
|
17451
17747
|
if (n_tasks == 1) {
|
17452
17748
|
/* INIT */
|
17453
17749
|
if (GGML_OP_HAS_INIT[node->op]) {
|
17454
|
-
params.type =
|
17750
|
+
params.type = GGML_TASK_TYPE_INIT;
|
17455
17751
|
ggml_compute_forward(¶ms, node);
|
17456
17752
|
}
|
17457
17753
|
|
17458
17754
|
// TODO: maybe push node_n to the atomic but if other threads see n_tasks is 1,
|
17459
17755
|
// they do something more efficient than spinning (?)
|
17460
|
-
params.type =
|
17756
|
+
params.type = GGML_TASK_TYPE_COMPUTE;
|
17461
17757
|
ggml_compute_forward(¶ms, node);
|
17462
17758
|
|
17463
17759
|
if (GGML_OP_HAS_FINALIZE[node->op]) {
|
17464
|
-
params.type =
|
17760
|
+
params.type = GGML_TASK_TYPE_FINALIZE;
|
17465
17761
|
ggml_compute_forward(¶ms, node);
|
17466
17762
|
}
|
17467
17763
|
|
@@ -17475,7 +17771,7 @@ static thread_ret_t ggml_graph_compute_thread(void * data) {
|
|
17475
17771
|
}
|
17476
17772
|
}
|
17477
17773
|
|
17478
|
-
task_phase =
|
17774
|
+
task_phase = GGML_TASK_TYPE_INIT;
|
17479
17775
|
atomic_store(&state->shared->n_active, n_threads);
|
17480
17776
|
atomic_store(&state->shared->node_n, node_n);
|
17481
17777
|
atomic_store(&state->shared->node_task, task_phase);
|
@@ -17492,7 +17788,7 @@ static thread_ret_t ggml_graph_compute_thread(void * data) {
|
|
17492
17788
|
const int n_tasks = ggml_get_n_tasks(node, n_threads);
|
17493
17789
|
|
17494
17790
|
struct ggml_compute_params params = {
|
17495
|
-
/*.type =*/
|
17791
|
+
/*.type =*/ GGML_TASK_TYPE_INIT,
|
17496
17792
|
/*.ith =*/ state->ith,
|
17497
17793
|
/*.nth =*/ n_tasks,
|
17498
17794
|
/*.wsize =*/ cplan->work_size,
|
@@ -17506,7 +17802,7 @@ static thread_ret_t ggml_graph_compute_thread(void * data) {
|
|
17506
17802
|
}
|
17507
17803
|
|
17508
17804
|
if (atomic_fetch_sub(&state->shared->n_active, 1) == 1) {
|
17509
|
-
task_phase =
|
17805
|
+
task_phase = GGML_TASK_TYPE_COMPUTE;
|
17510
17806
|
atomic_store(&state->shared->n_active, n_threads);
|
17511
17807
|
atomic_store(&state->shared->node_task, task_phase);
|
17512
17808
|
}
|
@@ -17521,12 +17817,12 @@ static thread_ret_t ggml_graph_compute_thread(void * data) {
|
|
17521
17817
|
}
|
17522
17818
|
|
17523
17819
|
if (state->ith < n_tasks) {
|
17524
|
-
params.type =
|
17820
|
+
params.type = GGML_TASK_TYPE_COMPUTE;
|
17525
17821
|
ggml_compute_forward(¶ms, node);
|
17526
17822
|
}
|
17527
17823
|
|
17528
17824
|
if (atomic_fetch_sub(&state->shared->n_active, 1) == 1) {
|
17529
|
-
task_phase =
|
17825
|
+
task_phase = GGML_TASK_TYPE_FINALIZE;
|
17530
17826
|
atomic_store(&state->shared->n_active, n_threads);
|
17531
17827
|
atomic_store(&state->shared->node_task, task_phase);
|
17532
17828
|
}
|
@@ -17535,7 +17831,7 @@ static thread_ret_t ggml_graph_compute_thread(void * data) {
|
|
17535
17831
|
}
|
17536
17832
|
}
|
17537
17833
|
|
17538
|
-
return
|
17834
|
+
return 0;
|
17539
17835
|
}
|
17540
17836
|
|
17541
17837
|
struct ggml_cplan ggml_graph_plan(const struct ggml_cgraph * cgraph, int n_threads) {
|
@@ -17731,7 +18027,7 @@ struct ggml_cplan ggml_graph_plan(const struct ggml_cgraph * cgraph, int n_threa
|
|
17731
18027
|
return cplan;
|
17732
18028
|
}
|
17733
18029
|
|
17734
|
-
|
18030
|
+
enum ggml_status ggml_graph_compute(struct ggml_cgraph * cgraph, struct ggml_cplan * cplan) {
|
17735
18031
|
{
|
17736
18032
|
GGML_ASSERT(cplan);
|
17737
18033
|
GGML_ASSERT(cplan->n_threads > 0);
|
@@ -17762,7 +18058,7 @@ int ggml_graph_compute(struct ggml_cgraph * cgraph, struct ggml_cplan * cplan) {
|
|
17762
18058
|
/*.n_threads =*/ n_threads,
|
17763
18059
|
/*.n_active =*/ n_threads,
|
17764
18060
|
/*.node_n =*/ -1,
|
17765
|
-
/*.node_task =*/
|
18061
|
+
/*.node_task =*/ GGML_TASK_TYPE_FINALIZE,
|
17766
18062
|
/*.abort_callback =*/ NULL,
|
17767
18063
|
/*.abort_callback_data =*/ NULL,
|
17768
18064
|
};
|
@@ -17775,6 +18071,7 @@ int ggml_graph_compute(struct ggml_cgraph * cgraph, struct ggml_cplan * cplan) {
|
|
17775
18071
|
.thrd = 0,
|
17776
18072
|
.ith = j,
|
17777
18073
|
.shared = &state_shared,
|
18074
|
+
.ec = GGML_STATUS_SUCCESS,
|
17778
18075
|
};
|
17779
18076
|
|
17780
18077
|
const int rc = ggml_thread_create(&workers[j].thrd, NULL, ggml_graph_compute_thread, &workers[j]);
|
@@ -17785,12 +18082,14 @@ int ggml_graph_compute(struct ggml_cgraph * cgraph, struct ggml_cplan * cplan) {
|
|
17785
18082
|
|
17786
18083
|
workers[0].ith = 0;
|
17787
18084
|
workers[0].shared = &state_shared;
|
18085
|
+
workers[0].ec = GGML_STATUS_SUCCESS;
|
17788
18086
|
|
17789
18087
|
const int64_t perf_start_cycles = ggml_perf_cycles();
|
17790
18088
|
const int64_t perf_start_time_us = ggml_perf_time_us();
|
17791
18089
|
|
17792
18090
|
// this is a work thread too
|
17793
|
-
|
18091
|
+
ggml_graph_compute_thread(&workers[0]);
|
18092
|
+
enum ggml_status compute_status = workers[0].ec;
|
17794
18093
|
|
17795
18094
|
// don't leave affinity set on the main thread
|
17796
18095
|
clear_numa_thread_affinity();
|
@@ -17800,6 +18099,8 @@ int ggml_graph_compute(struct ggml_cgraph * cgraph, struct ggml_cplan * cplan) {
|
|
17800
18099
|
for (int j = 1; j < n_threads; j++) {
|
17801
18100
|
const int rc = ggml_thread_join(workers[j].thrd, NULL);
|
17802
18101
|
GGML_ASSERT(rc == 0);
|
18102
|
+
if (workers[j].ec != GGML_STATUS_SUCCESS)
|
18103
|
+
compute_status = workers[j].ec;
|
17803
18104
|
}
|
17804
18105
|
}
|
17805
18106
|
|
@@ -17827,14 +18128,14 @@ int ggml_graph_compute(struct ggml_cgraph * cgraph, struct ggml_cplan * cplan) {
|
|
17827
18128
|
return compute_status;
|
17828
18129
|
}
|
17829
18130
|
|
17830
|
-
|
18131
|
+
enum ggml_status ggml_graph_compute_with_ctx(struct ggml_context * ctx, struct ggml_cgraph * cgraph, int n_threads) {
|
17831
18132
|
struct ggml_cplan cplan = ggml_graph_plan(cgraph, n_threads);
|
17832
18133
|
|
17833
|
-
struct ggml_object * obj = ggml_new_object(ctx,
|
18134
|
+
struct ggml_object * obj = ggml_new_object(ctx, GGML_OBJECT_TYPE_WORK_BUFFER, cplan.work_size);
|
17834
18135
|
|
17835
18136
|
cplan.work_data = (uint8_t *)ctx->mem_buffer + obj->offs;
|
17836
18137
|
|
17837
|
-
ggml_graph_compute(cgraph, &cplan);
|
18138
|
+
return ggml_graph_compute(cgraph, &cplan);
|
17838
18139
|
}
|
17839
18140
|
|
17840
18141
|
struct ggml_tensor * ggml_graph_get_tensor(struct ggml_cgraph * cgraph, const char * name) {
|
@@ -18638,7 +18939,7 @@ static enum ggml_opt_result ggml_opt_adam(
|
|
18638
18939
|
float * pf = params.past > 0 ? opt->adam.pf->data : NULL; // past function values
|
18639
18940
|
|
18640
18941
|
struct ggml_cplan cplan = ggml_graph_plan(gb, params.n_threads);
|
18641
|
-
struct ggml_object * obj = ggml_new_object(ctx,
|
18942
|
+
struct ggml_object * obj = ggml_new_object(ctx, GGML_OBJECT_TYPE_WORK_BUFFER, cplan.work_size);
|
18642
18943
|
cplan.work_data = (uint8_t *)ctx->mem_buffer + obj->offs;
|
18643
18944
|
|
18644
18945
|
bool cancel = false;
|
@@ -18650,7 +18951,7 @@ static enum ggml_opt_result ggml_opt_adam(
|
|
18650
18951
|
if (callback) {
|
18651
18952
|
callback(callback_data, accum_step, &sched, &cancel);
|
18652
18953
|
if (cancel) {
|
18653
|
-
return
|
18954
|
+
return GGML_OPT_RESULT_CANCEL;
|
18654
18955
|
}
|
18655
18956
|
}
|
18656
18957
|
// ggml_graph_reset (gf);
|
@@ -18741,7 +19042,7 @@ static enum ggml_opt_result ggml_opt_adam(
|
|
18741
19042
|
if (callback) {
|
18742
19043
|
callback(callback_data, accum_step, &sched, &cancel);
|
18743
19044
|
if (cancel) {
|
18744
|
-
return
|
19045
|
+
return GGML_OPT_RESULT_CANCEL;;
|
18745
19046
|
}
|
18746
19047
|
}
|
18747
19048
|
// ggml_graph_reset (gf);
|
@@ -18758,7 +19059,7 @@ static enum ggml_opt_result ggml_opt_adam(
|
|
18758
19059
|
if (fabsf(fx - fx_prev[0])/fx < params.adam.eps_f) {
|
18759
19060
|
GGML_PRINT_DEBUG("converged\n");
|
18760
19061
|
|
18761
|
-
return
|
19062
|
+
return GGML_OPT_RESULT_OK;
|
18762
19063
|
}
|
18763
19064
|
|
18764
19065
|
// delta-based convergence test
|
@@ -18768,7 +19069,7 @@ static enum ggml_opt_result ggml_opt_adam(
|
|
18768
19069
|
const float rate = (pf[(iter0 + t)%params.past] - fx)/fx;
|
18769
19070
|
|
18770
19071
|
if (fabsf(rate) < params.delta) {
|
18771
|
-
return
|
19072
|
+
return GGML_OPT_RESULT_OK;
|
18772
19073
|
}
|
18773
19074
|
}
|
18774
19075
|
|
@@ -18784,7 +19085,7 @@ static enum ggml_opt_result ggml_opt_adam(
|
|
18784
19085
|
++n_no_improvement[0];
|
18785
19086
|
|
18786
19087
|
if (n_no_improvement[0] >= params.max_no_improvement) {
|
18787
|
-
return
|
19088
|
+
return GGML_OPT_RESULT_OK;
|
18788
19089
|
}
|
18789
19090
|
}
|
18790
19091
|
}
|
@@ -18802,7 +19103,7 @@ static enum ggml_opt_result ggml_opt_adam(
|
|
18802
19103
|
}
|
18803
19104
|
}
|
18804
19105
|
|
18805
|
-
return
|
19106
|
+
return GGML_OPT_RESULT_DID_NOT_CONVERGE;
|
18806
19107
|
}
|
18807
19108
|
|
18808
19109
|
//
|
@@ -18883,7 +19184,7 @@ static enum ggml_opt_result linesearch_backtracking(
|
|
18883
19184
|
float sched = 0;
|
18884
19185
|
callback(callback_data, accum_step, &sched, cancel);
|
18885
19186
|
if (*cancel) {
|
18886
|
-
return
|
19187
|
+
return GGML_OPT_RESULT_CANCEL;
|
18887
19188
|
}
|
18888
19189
|
}
|
18889
19190
|
// ggml_graph_reset (gf);
|
@@ -18956,7 +19257,7 @@ static enum ggml_opt_result ggml_opt_lbfgs(
|
|
18956
19257
|
if (params.lbfgs.linesearch == GGML_LINESEARCH_BACKTRACKING_WOLFE ||
|
18957
19258
|
params.lbfgs.linesearch == GGML_LINESEARCH_BACKTRACKING_STRONG_WOLFE) {
|
18958
19259
|
if (params.lbfgs.wolfe <= params.lbfgs.ftol || 1.f <= params.lbfgs.wolfe) {
|
18959
|
-
return
|
19260
|
+
return GGML_OPT_RESULT_INVALID_WOLFE;
|
18960
19261
|
}
|
18961
19262
|
}
|
18962
19263
|
|
@@ -18985,7 +19286,7 @@ static enum ggml_opt_result ggml_opt_lbfgs(
|
|
18985
19286
|
}
|
18986
19287
|
|
18987
19288
|
struct ggml_cplan cplan = ggml_graph_plan(gb, params.n_threads);
|
18988
|
-
struct ggml_object * obj = ggml_new_object(ctx,
|
19289
|
+
struct ggml_object * obj = ggml_new_object(ctx, GGML_OBJECT_TYPE_WORK_BUFFER, cplan.work_size);
|
18989
19290
|
cplan.work_data = (uint8_t *)ctx->mem_buffer + obj->offs;
|
18990
19291
|
|
18991
19292
|
float * x = opt->lbfgs.x->data; // current parameters
|
@@ -19026,7 +19327,7 @@ static enum ggml_opt_result ggml_opt_lbfgs(
|
|
19026
19327
|
float sched = 0;
|
19027
19328
|
callback(callback_data, accum_step, &sched, &cancel);
|
19028
19329
|
if (cancel) {
|
19029
|
-
return
|
19330
|
+
return GGML_OPT_RESULT_CANCEL;
|
19030
19331
|
}
|
19031
19332
|
}
|
19032
19333
|
// ggml_graph_reset (gf);
|
@@ -19054,7 +19355,7 @@ static enum ggml_opt_result ggml_opt_lbfgs(
|
|
19054
19355
|
|
19055
19356
|
// already optimized
|
19056
19357
|
if (gnorm/xnorm <= params.lbfgs.eps) {
|
19057
|
-
return
|
19358
|
+
return GGML_OPT_RESULT_OK;
|
19058
19359
|
}
|
19059
19360
|
|
19060
19361
|
if (opt->just_initialized) {
|
@@ -19099,7 +19400,7 @@ static enum ggml_opt_result ggml_opt_lbfgs(
|
|
19099
19400
|
// way to test and don't want to break something with so many changes lined up
|
19100
19401
|
ls = linesearch_backtracking(¶ms, nx, x, &fx, g, d, step, xp, f, gb, &cplan, np, ps, &cancel, callback, callback_data);
|
19101
19402
|
if (cancel) {
|
19102
|
-
return
|
19403
|
+
return GGML_OPT_RESULT_CANCEL;
|
19103
19404
|
}
|
19104
19405
|
|
19105
19406
|
if (ls < 0) {
|
@@ -19122,7 +19423,7 @@ static enum ggml_opt_result ggml_opt_lbfgs(
|
|
19122
19423
|
}
|
19123
19424
|
if (gnorm/xnorm <= params.lbfgs.eps) {
|
19124
19425
|
// converged
|
19125
|
-
return
|
19426
|
+
return GGML_OPT_RESULT_OK;
|
19126
19427
|
}
|
19127
19428
|
|
19128
19429
|
// delta-based convergence test
|
@@ -19132,7 +19433,7 @@ static enum ggml_opt_result ggml_opt_lbfgs(
|
|
19132
19433
|
const float rate = (pf[k[0]%params.past] - fx)/fx;
|
19133
19434
|
|
19134
19435
|
if (fabsf(rate) < params.delta) {
|
19135
|
-
return
|
19436
|
+
return GGML_OPT_RESULT_OK;
|
19136
19437
|
}
|
19137
19438
|
}
|
19138
19439
|
|
@@ -19148,14 +19449,14 @@ static enum ggml_opt_result ggml_opt_lbfgs(
|
|
19148
19449
|
n_no_improvement[0]++;
|
19149
19450
|
|
19150
19451
|
if (n_no_improvement[0] >= params.max_no_improvement) {
|
19151
|
-
return
|
19452
|
+
return GGML_OPT_RESULT_OK;
|
19152
19453
|
}
|
19153
19454
|
}
|
19154
19455
|
}
|
19155
19456
|
|
19156
19457
|
if (params.lbfgs.n_iter != 0 && params.lbfgs.n_iter < it + 1) {
|
19157
19458
|
// reached the maximum number of iterations
|
19158
|
-
return
|
19459
|
+
return GGML_OPT_RESULT_DID_NOT_CONVERGE;
|
19159
19460
|
}
|
19160
19461
|
|
19161
19462
|
// update vectors s and y:
|
@@ -19211,17 +19512,17 @@ static enum ggml_opt_result ggml_opt_lbfgs(
|
|
19211
19512
|
|
19212
19513
|
GGML_ASSERT(false && "lbfgs failed");
|
19213
19514
|
|
19214
|
-
return
|
19515
|
+
return GGML_OPT_RESULT_DID_NOT_CONVERGE;
|
19215
19516
|
}
|
19216
19517
|
|
19217
19518
|
struct ggml_opt_params ggml_opt_default_params(enum ggml_opt_type type) {
|
19218
19519
|
struct ggml_opt_params result;
|
19219
19520
|
|
19220
19521
|
switch (type) {
|
19221
|
-
case
|
19522
|
+
case GGML_OPT_TYPE_ADAM:
|
19222
19523
|
{
|
19223
19524
|
result = (struct ggml_opt_params) {
|
19224
|
-
.type =
|
19525
|
+
.type = GGML_OPT_TYPE_ADAM,
|
19225
19526
|
.graph_size = GGML_DEFAULT_GRAPH_SIZE,
|
19226
19527
|
.n_threads = 1, // FIXME: GGML_DEFAULT_N_THREADS ?
|
19227
19528
|
.past = 0,
|
@@ -19249,10 +19550,10 @@ struct ggml_opt_params ggml_opt_default_params(enum ggml_opt_type type) {
|
|
19249
19550
|
},
|
19250
19551
|
};
|
19251
19552
|
} break;
|
19252
|
-
case
|
19553
|
+
case GGML_OPT_TYPE_LBFGS:
|
19253
19554
|
{
|
19254
19555
|
result = (struct ggml_opt_params) {
|
19255
|
-
.type =
|
19556
|
+
.type = GGML_OPT_TYPE_LBFGS,
|
19256
19557
|
.graph_size = GGML_DEFAULT_GRAPH_SIZE,
|
19257
19558
|
.n_threads = 1,
|
19258
19559
|
.past = 0,
|
@@ -19297,12 +19598,12 @@ GGML_API void ggml_opt_init(
|
|
19297
19598
|
opt->just_initialized = true;
|
19298
19599
|
if (opt->ctx == NULL) {
|
19299
19600
|
struct ggml_init_params ctx_opt_params;
|
19300
|
-
if (opt->params.type ==
|
19601
|
+
if (opt->params.type == GGML_OPT_TYPE_ADAM) {
|
19301
19602
|
ctx_opt_params.mem_size = GGML_MEM_ALIGN*3 + ggml_tensor_overhead()*3 + ggml_type_size(GGML_TYPE_F32)*nx*3;
|
19302
19603
|
if (opt->params.past > 0) {
|
19303
19604
|
ctx_opt_params.mem_size += GGML_MEM_ALIGN + ggml_tensor_overhead() + ggml_type_size(GGML_TYPE_F32)*opt->params.past;
|
19304
19605
|
}
|
19305
|
-
} else if (opt->params.type ==
|
19606
|
+
} else if (opt->params.type == GGML_OPT_TYPE_LBFGS) {
|
19306
19607
|
ctx_opt_params.mem_size = GGML_MEM_ALIGN*9 + ggml_tensor_overhead()*9 + ggml_type_size(GGML_TYPE_F32)*(nx*5 + opt->params.lbfgs.m*2 + nx*opt->params.lbfgs.m*2);
|
19307
19608
|
if (opt->params.past > 0) {
|
19308
19609
|
ctx_opt_params.mem_size += GGML_MEM_ALIGN + ggml_tensor_overhead() + ggml_type_size(GGML_TYPE_F32)*opt->params.past;
|
@@ -19314,7 +19615,7 @@ GGML_API void ggml_opt_init(
|
|
19314
19615
|
opt->ctx = ggml_init(ctx_opt_params);
|
19315
19616
|
}
|
19316
19617
|
switch (opt->params.type) {
|
19317
|
-
case
|
19618
|
+
case GGML_OPT_TYPE_ADAM:
|
19318
19619
|
{
|
19319
19620
|
opt->adam.g = ggml_new_tensor_1d(opt->ctx, GGML_TYPE_F32, nx);
|
19320
19621
|
opt->adam.m = ggml_new_tensor_1d(opt->ctx, GGML_TYPE_F32, nx);
|
@@ -19328,7 +19629,7 @@ GGML_API void ggml_opt_init(
|
|
19328
19629
|
ggml_set_zero(opt->adam.pf);
|
19329
19630
|
}
|
19330
19631
|
} break;
|
19331
|
-
case
|
19632
|
+
case GGML_OPT_TYPE_LBFGS:
|
19332
19633
|
{
|
19333
19634
|
opt->lbfgs.x = ggml_new_tensor_1d(opt->ctx, GGML_TYPE_F32, nx);
|
19334
19635
|
opt->lbfgs.xp = ggml_new_tensor_1d(opt->ctx, GGML_TYPE_F32, nx);
|
@@ -19372,13 +19673,13 @@ enum ggml_opt_result ggml_opt(
|
|
19372
19673
|
|
19373
19674
|
ctx = ggml_init(params_ctx);
|
19374
19675
|
if (ctx == NULL) {
|
19375
|
-
return
|
19676
|
+
return GGML_OPT_RESULT_NO_CONTEXT;
|
19376
19677
|
}
|
19377
19678
|
|
19378
19679
|
free_ctx = true;
|
19379
19680
|
}
|
19380
19681
|
|
19381
|
-
enum ggml_opt_result result =
|
19682
|
+
enum ggml_opt_result result = GGML_OPT_RESULT_OK;
|
19382
19683
|
|
19383
19684
|
struct ggml_opt_context * opt = (struct ggml_opt_context *) alloca(sizeof(struct ggml_opt_context));
|
19384
19685
|
|
@@ -19417,14 +19718,14 @@ enum ggml_opt_result ggml_opt_resume_g(
|
|
19417
19718
|
void * callback_data) {
|
19418
19719
|
|
19419
19720
|
// build forward + backward compute graphs
|
19420
|
-
enum ggml_opt_result result =
|
19721
|
+
enum ggml_opt_result result = GGML_OPT_RESULT_OK;
|
19421
19722
|
|
19422
19723
|
switch (opt->params.type) {
|
19423
|
-
case
|
19724
|
+
case GGML_OPT_TYPE_ADAM:
|
19424
19725
|
{
|
19425
19726
|
result = ggml_opt_adam(ctx, opt, opt->params, f, gf, gb, callback, callback_data);
|
19426
19727
|
} break;
|
19427
|
-
case
|
19728
|
+
case GGML_OPT_TYPE_LBFGS:
|
19428
19729
|
{
|
19429
19730
|
result = ggml_opt_lbfgs(ctx, opt, opt->params, f, gf, gb, callback, callback_data);
|
19430
19731
|
} break;
|
@@ -19461,8 +19762,10 @@ void ggml_quantize_init(enum ggml_type type) {
|
|
19461
19762
|
switch (type) {
|
19462
19763
|
case GGML_TYPE_IQ2_XXS:
|
19463
19764
|
case GGML_TYPE_IQ2_XS:
|
19765
|
+
case GGML_TYPE_IQ2_S:
|
19464
19766
|
case GGML_TYPE_IQ1_S: iq2xs_init_impl(type); break;
|
19465
19767
|
case GGML_TYPE_IQ3_XXS: iq3xs_init_impl(256); break;
|
19768
|
+
case GGML_TYPE_IQ3_S: iq3xs_init_impl(512); break;
|
19466
19769
|
default: // nothing
|
19467
19770
|
break;
|
19468
19771
|
}
|
@@ -19737,6 +20040,24 @@ size_t ggml_quantize_chunk(enum ggml_type type, const float * src, void * dst, i
|
|
19737
20040
|
result = quantize_iq3_xxs(src + start, (char *)dst + start_row * row_size, nrows, n_per_row, hist, imatrix);
|
19738
20041
|
GGML_ASSERT(result == row_size * nrows);
|
19739
20042
|
} break;
|
20043
|
+
case GGML_TYPE_IQ3_S:
|
20044
|
+
{
|
20045
|
+
GGML_ASSERT(start % QK_K == 0);
|
20046
|
+
GGML_ASSERT(start % n_per_row == 0);
|
20047
|
+
size_t start_row = start / n_per_row;
|
20048
|
+
size_t row_size = ggml_row_size(type, n_per_row);
|
20049
|
+
result = quantize_iq3_s(src + start, (char *)dst + start_row * row_size, nrows, n_per_row, hist, imatrix);
|
20050
|
+
GGML_ASSERT(result == row_size * nrows);
|
20051
|
+
} break;
|
20052
|
+
case GGML_TYPE_IQ2_S:
|
20053
|
+
{
|
20054
|
+
GGML_ASSERT(start % QK_K == 0);
|
20055
|
+
GGML_ASSERT(start % n_per_row == 0);
|
20056
|
+
size_t start_row = start / n_per_row;
|
20057
|
+
size_t row_size = ggml_row_size(type, n_per_row);
|
20058
|
+
result = quantize_iq2_s(src + start, (char *)dst + start_row * row_size, nrows, n_per_row, hist, imatrix);
|
20059
|
+
GGML_ASSERT(result == row_size * nrows);
|
20060
|
+
} break;
|
19740
20061
|
case GGML_TYPE_IQ1_S:
|
19741
20062
|
{
|
19742
20063
|
GGML_ASSERT(start % QK_K == 0);
|
@@ -19747,6 +20068,9 @@ size_t ggml_quantize_chunk(enum ggml_type type, const float * src, void * dst, i
|
|
19747
20068
|
GGML_ASSERT(result == row_size * nrows);
|
19748
20069
|
} break;
|
19749
20070
|
case GGML_TYPE_IQ4_NL:
|
20071
|
+
#if QK_K == 64
|
20072
|
+
case GGML_TYPE_IQ4_XS:
|
20073
|
+
#endif
|
19750
20074
|
{
|
19751
20075
|
GGML_ASSERT(start % QK4_NL == 0);
|
19752
20076
|
GGML_ASSERT(start % n_per_row == 0);
|
@@ -19755,6 +20079,17 @@ size_t ggml_quantize_chunk(enum ggml_type type, const float * src, void * dst, i
|
|
19755
20079
|
result = quantize_iq4_nl(src + start, (char *)dst + start_row * row_size, nrows, n_per_row, hist, imatrix);
|
19756
20080
|
GGML_ASSERT(result == row_size * nrows);
|
19757
20081
|
} break;
|
20082
|
+
#if QK_K != 64
|
20083
|
+
case GGML_TYPE_IQ4_XS:
|
20084
|
+
{
|
20085
|
+
GGML_ASSERT(start % QK_K == 0);
|
20086
|
+
GGML_ASSERT(start % n_per_row == 0);
|
20087
|
+
size_t start_row = start / n_per_row;
|
20088
|
+
size_t row_size = ggml_row_size(type, n_per_row);
|
20089
|
+
result = quantize_iq4_xs(src + start, (char *)dst + start_row * row_size, nrows, n_per_row, hist, imatrix);
|
20090
|
+
GGML_ASSERT(result == row_size * nrows);
|
20091
|
+
} break;
|
20092
|
+
#endif
|
19758
20093
|
case GGML_TYPE_F16:
|
19759
20094
|
{
|
19760
20095
|
size_t elemsize = sizeof(ggml_fp16_t);
|