llama_cpp 0.9.2 → 0.9.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +11 -1
- data/ext/llama_cpp/llama_cpp.cpp +12 -0
- data/ext/llama_cpp/src/ggml-alloc.c +378 -208
- data/ext/llama_cpp/src/ggml-alloc.h +68 -16
- data/ext/llama_cpp/src/ggml-backend-impl.h +87 -0
- data/ext/llama_cpp/src/ggml-backend.c +578 -13
- data/ext/llama_cpp/src/ggml-backend.h +70 -77
- data/ext/llama_cpp/src/ggml-cuda.cu +260 -46
- data/ext/llama_cpp/src/ggml-impl.h +13 -7
- data/ext/llama_cpp/src/ggml-metal.h +1 -1
- data/ext/llama_cpp/src/ggml-metal.m +113 -32
- data/ext/llama_cpp/src/ggml-metal.metal +107 -1
- data/ext/llama_cpp/src/ggml-quants.c +174 -74
- data/ext/llama_cpp/src/ggml.c +881 -1459
- data/ext/llama_cpp/src/ggml.h +64 -45
- data/ext/llama_cpp/src/llama.cpp +555 -49
- data/ext/llama_cpp/src/llama.h +77 -3
- data/lib/llama_cpp/version.rb +2 -2
- data/sig/llama_cpp.rbs +2 -0
- metadata +3 -2
data/ext/llama_cpp/src/ggml.c
CHANGED
@@ -100,6 +100,49 @@ typedef void * thread_ret_t;
|
|
100
100
|
#include <hbwmalloc.h>
|
101
101
|
#endif
|
102
102
|
|
103
|
+
#if defined(__APPLE__)
|
104
|
+
#include <TargetConditionals.h>
|
105
|
+
#endif
|
106
|
+
|
107
|
+
#if (defined(__linux__) || defined(__APPLE__) || defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__)) && \
|
108
|
+
(!defined(TARGET_OS_TV) && !defined(TARGET_OS_WATCH))
|
109
|
+
|
110
|
+
#include <sys/wait.h>
|
111
|
+
|
112
|
+
void ggml_print_backtrace(void) {
|
113
|
+
/*
|
114
|
+
#include <execinfo.h>
|
115
|
+
#include <dlfcn.h>
|
116
|
+
|
117
|
+
void * trace[100];
|
118
|
+
|
119
|
+
int nptrs = backtrace(trace, sizeof(trace)/sizeof(trace[0]));
|
120
|
+
|
121
|
+
backtrace_symbols_fd(trace, nptrs, STDERR_FILENO);
|
122
|
+
*/
|
123
|
+
|
124
|
+
// backtrack_symbols does not show line numbers, use gdb instead
|
125
|
+
char attach[32];
|
126
|
+
snprintf(attach, sizeof(attach), "attach %d", getpid());
|
127
|
+
int pid = fork();
|
128
|
+
if (pid == 0) {
|
129
|
+
execlp("gdb", "gdb", "--batch",
|
130
|
+
"-ex", "set style enabled on",
|
131
|
+
"-ex", attach,
|
132
|
+
"-ex", "bt -frame-info source-and-location",
|
133
|
+
"-ex", "detach",
|
134
|
+
"-ex", "quit",
|
135
|
+
NULL);
|
136
|
+
} else {
|
137
|
+
waitpid(pid, NULL, 0);
|
138
|
+
}
|
139
|
+
}
|
140
|
+
#else
|
141
|
+
void ggml_print_backtrace(void) {
|
142
|
+
// platform not supported
|
143
|
+
}
|
144
|
+
#endif
|
145
|
+
|
103
146
|
/*#define GGML_PERF*/
|
104
147
|
#define GGML_DEBUG 0
|
105
148
|
#define GGML_GELU_FP16
|
@@ -228,6 +271,12 @@ inline static void * ggml_aligned_malloc(size_t size) {
|
|
228
271
|
// floating point type used to accumulate sums
|
229
272
|
typedef double ggml_float;
|
230
273
|
|
274
|
+
#undef MIN
|
275
|
+
#undef MAX
|
276
|
+
|
277
|
+
#define MIN(a, b) ((a) < (b) ? (a) : (b))
|
278
|
+
#define MAX(a, b) ((a) > (b) ? (a) : (b))
|
279
|
+
|
231
280
|
//
|
232
281
|
// global data
|
233
282
|
//
|
@@ -561,6 +610,18 @@ ggml_type_traits_t ggml_internal_get_type_traits(enum ggml_type type) {
|
|
561
610
|
// simd mappings
|
562
611
|
//
|
563
612
|
|
613
|
+
#if defined(__ARM_NEON)
|
614
|
+
#if !defined(__aarch64__)
|
615
|
+
|
616
|
+
// 64-bit compatibility
|
617
|
+
|
618
|
+
inline static float vaddvq_f32(float32x4_t v) {
|
619
|
+
return vgetq_lane_f32(v, 0) + vgetq_lane_f32(v, 1) + vgetq_lane_f32(v, 2) + vgetq_lane_f32(v, 3);
|
620
|
+
}
|
621
|
+
|
622
|
+
#endif
|
623
|
+
#endif
|
624
|
+
|
564
625
|
// we define a common set of C macros which map to specific intrinsics based on the current architecture
|
565
626
|
// we then implement the fundamental computation operations below using only these macros
|
566
627
|
// adding support for new architectures requires to define the corresponding SIMD macros
|
@@ -1352,6 +1413,7 @@ inline static void ggml_vec_step_f32 (const int n, float * y, const float * x) {
|
|
1352
1413
|
inline static void ggml_vec_tanh_f32 (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = tanhf(x[i]); }
|
1353
1414
|
inline static void ggml_vec_elu_f32 (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = (x[i] > 0.f) ? x[i] : expf(x[i])-1; }
|
1354
1415
|
inline static void ggml_vec_relu_f32 (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = (x[i] > 0.f) ? x[i] : 0.f; }
|
1416
|
+
inline static void ggml_vec_leaky_f32 (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = (x[i] > 0.f) ? x[i] : 0.1f*x[i]; }
|
1355
1417
|
|
1356
1418
|
static const float GELU_COEF_A = 0.044715f;
|
1357
1419
|
static const float GELU_QUICK_COEF = -1.702f;
|
@@ -1572,13 +1634,8 @@ static const char * GGML_OP_NAME[GGML_OP_COUNT] = {
|
|
1572
1634
|
"ROPE_BACK",
|
1573
1635
|
"ALIBI",
|
1574
1636
|
"CLAMP",
|
1575
|
-
"CONV_1D",
|
1576
|
-
"CONV_1D_STAGE_0",
|
1577
|
-
"CONV_1D_STAGE_1",
|
1578
1637
|
"CONV_TRANSPOSE_1D",
|
1579
|
-
"
|
1580
|
-
"CONV_2D_STAGE_0",
|
1581
|
-
"CONV_2D_STAGE_1",
|
1638
|
+
"IM2COL",
|
1582
1639
|
"CONV_TRANSPOSE_2D",
|
1583
1640
|
"POOL_1D",
|
1584
1641
|
"POOL_2D",
|
@@ -1609,7 +1666,7 @@ static const char * GGML_OP_NAME[GGML_OP_COUNT] = {
|
|
1609
1666
|
"CROSS_ENTROPY_LOSS_BACK",
|
1610
1667
|
};
|
1611
1668
|
|
1612
|
-
static_assert(GGML_OP_COUNT ==
|
1669
|
+
static_assert(GGML_OP_COUNT == 68, "GGML_OP_COUNT != 68");
|
1613
1670
|
|
1614
1671
|
static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
|
1615
1672
|
"none",
|
@@ -1659,13 +1716,8 @@ static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
|
|
1659
1716
|
"rope_back(x)",
|
1660
1717
|
"alibi(x)",
|
1661
1718
|
"clamp(x)",
|
1662
|
-
"conv_1d(x)",
|
1663
|
-
"conv_1d_stage_0(x)",
|
1664
|
-
"conv_1d_stage_1(x)",
|
1665
1719
|
"conv_transpose_1d(x)",
|
1666
|
-
"
|
1667
|
-
"conv_2d_stage_0(x)",
|
1668
|
-
"conv_2d_stage_1(x)",
|
1720
|
+
"im2col(x)",
|
1669
1721
|
"conv_transpose_2d(x)",
|
1670
1722
|
"pool_1d(x)",
|
1671
1723
|
"pool_2d(x)",
|
@@ -1696,7 +1748,7 @@ static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
|
|
1696
1748
|
"cross_entropy_loss_back(x,y)",
|
1697
1749
|
};
|
1698
1750
|
|
1699
|
-
static_assert(GGML_OP_COUNT ==
|
1751
|
+
static_assert(GGML_OP_COUNT == 68, "GGML_OP_COUNT != 68");
|
1700
1752
|
|
1701
1753
|
static_assert(GGML_OP_POOL_COUNT == 2, "GGML_OP_POOL_COUNT != 2");
|
1702
1754
|
|
@@ -1724,13 +1776,7 @@ static void ggml_setup_op_has_task_pass(void) {
|
|
1724
1776
|
p[GGML_OP_GET_ROWS_BACK ] = true;
|
1725
1777
|
p[GGML_OP_DIAG_MASK_INF ] = true;
|
1726
1778
|
p[GGML_OP_DIAG_MASK_ZERO ] = true;
|
1727
|
-
p[GGML_OP_CONV_1D ] = true;
|
1728
|
-
p[GGML_OP_CONV_1D_STAGE_0 ] = true;
|
1729
|
-
p[GGML_OP_CONV_1D_STAGE_1 ] = true;
|
1730
1779
|
p[GGML_OP_CONV_TRANSPOSE_1D ] = true;
|
1731
|
-
p[GGML_OP_CONV_2D ] = true;
|
1732
|
-
p[GGML_OP_CONV_2D_STAGE_0 ] = true;
|
1733
|
-
p[GGML_OP_CONV_2D_STAGE_1 ] = true;
|
1734
1780
|
p[GGML_OP_CONV_TRANSPOSE_2D ] = true;
|
1735
1781
|
p[GGML_OP_FLASH_ATTN_BACK ] = true;
|
1736
1782
|
p[GGML_OP_CROSS_ENTROPY_LOSS ] = true;
|
@@ -3769,6 +3815,14 @@ struct ggml_tensor * ggml_relu_inplace(
|
|
3769
3815
|
return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_RELU);
|
3770
3816
|
}
|
3771
3817
|
|
3818
|
+
// ggml_leaky
|
3819
|
+
|
3820
|
+
struct ggml_tensor * ggml_leaky(
|
3821
|
+
struct ggml_context * ctx,
|
3822
|
+
struct ggml_tensor * a) {
|
3823
|
+
return ggml_unary(ctx, a, GGML_UNARY_OP_LEAKY);
|
3824
|
+
}
|
3825
|
+
|
3772
3826
|
// ggml_gelu
|
3773
3827
|
|
3774
3828
|
struct ggml_tensor * ggml_gelu(
|
@@ -5076,82 +5130,6 @@ static int64_t ggml_calc_conv_output_size(int64_t ins, int64_t ks, int s, int p,
|
|
5076
5130
|
return (ins + 2 * p - d * (ks - 1) - 1) / s + 1;
|
5077
5131
|
}
|
5078
5132
|
|
5079
|
-
// im2col: [N, IC, IL] => [N, OL, IC*K]
|
5080
|
-
// a: [OC,IC, K]
|
5081
|
-
// b: [N, IC, IL]
|
5082
|
-
// result: [N, OL, IC*K]
|
5083
|
-
static struct ggml_tensor * ggml_conv_1d_stage_0(
|
5084
|
-
struct ggml_context * ctx,
|
5085
|
-
struct ggml_tensor * a,
|
5086
|
-
struct ggml_tensor * b,
|
5087
|
-
int s0,
|
5088
|
-
int p0,
|
5089
|
-
int d0) {
|
5090
|
-
GGML_ASSERT(a->ne[1] == b->ne[1]);
|
5091
|
-
bool is_node = false;
|
5092
|
-
|
5093
|
-
if (a->grad || b->grad) {
|
5094
|
-
GGML_ASSERT(false); // TODO: implement backward
|
5095
|
-
is_node = true;
|
5096
|
-
}
|
5097
|
-
|
5098
|
-
const int64_t OL = ggml_calc_conv_output_size(b->ne[0], a->ne[0], s0, p0, d0);
|
5099
|
-
|
5100
|
-
const int64_t ne[4] = {
|
5101
|
-
a->ne[1] * a->ne[0],
|
5102
|
-
OL,
|
5103
|
-
b->ne[2],
|
5104
|
-
1,
|
5105
|
-
};
|
5106
|
-
struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F16, 4, ne);
|
5107
|
-
|
5108
|
-
int32_t params[] = { s0, p0, d0 };
|
5109
|
-
ggml_set_op_params(result, params, sizeof(params));
|
5110
|
-
|
5111
|
-
result->op = GGML_OP_CONV_1D_STAGE_0;
|
5112
|
-
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
5113
|
-
result->src[0] = a;
|
5114
|
-
result->src[1] = b;
|
5115
|
-
|
5116
|
-
return result;
|
5117
|
-
}
|
5118
|
-
|
5119
|
-
// ggml_conv_1d_stage_1
|
5120
|
-
|
5121
|
-
// gemm: [N, OC, OL] = [OC, IC * K] x [N*OL, IC * K]
|
5122
|
-
// a: [OC, IC, K]
|
5123
|
-
// b: [N, OL, IC * K]
|
5124
|
-
// result: [N, OC, OL]
|
5125
|
-
static struct ggml_tensor * ggml_conv_1d_stage_1(
|
5126
|
-
struct ggml_context * ctx,
|
5127
|
-
struct ggml_tensor * a,
|
5128
|
-
struct ggml_tensor * b) {
|
5129
|
-
|
5130
|
-
bool is_node = false;
|
5131
|
-
|
5132
|
-
if (a->grad || b->grad) {
|
5133
|
-
GGML_ASSERT(false); // TODO: implement backward
|
5134
|
-
is_node = true;
|
5135
|
-
}
|
5136
|
-
|
5137
|
-
const int64_t ne[4] = {
|
5138
|
-
b->ne[1],
|
5139
|
-
a->ne[2],
|
5140
|
-
b->ne[2],
|
5141
|
-
1,
|
5142
|
-
};
|
5143
|
-
struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
|
5144
|
-
|
5145
|
-
result->op = GGML_OP_CONV_1D_STAGE_1;
|
5146
|
-
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
5147
|
-
result->src[0] = a;
|
5148
|
-
result->src[1] = b;
|
5149
|
-
|
5150
|
-
return result;
|
5151
|
-
}
|
5152
|
-
|
5153
|
-
// ggml_conv_1d
|
5154
|
-
|
5155
5133
|
GGML_API struct ggml_tensor * ggml_conv_1d(
|
5156
5134
|
struct ggml_context * ctx,
|
5157
5135
|
struct ggml_tensor * a,
|
@@ -5159,43 +5137,17 @@ GGML_API struct ggml_tensor * ggml_conv_1d(
|
|
5159
5137
|
int s0,
|
5160
5138
|
int p0,
|
5161
5139
|
int d0) {
|
5162
|
-
struct ggml_tensor *
|
5163
|
-
result = ggml_conv_1d_stage_1(ctx, a, result);
|
5164
|
-
return result;
|
5165
|
-
}
|
5166
|
-
|
5167
|
-
// GGML_API struct ggml_tensor * ggml_conv_1d(
|
5168
|
-
// struct ggml_context * ctx,
|
5169
|
-
// struct ggml_tensor * a,
|
5170
|
-
// struct ggml_tensor * b,
|
5171
|
-
// int s0,
|
5172
|
-
// int p0,
|
5173
|
-
// int d0) {
|
5174
|
-
// GGML_ASSERT(ggml_is_matrix(b));
|
5175
|
-
// GGML_ASSERT(a->ne[1] == b->ne[1]);
|
5176
|
-
// bool is_node = false;
|
5177
|
-
|
5178
|
-
// if (a->grad || b->grad) {
|
5179
|
-
// GGML_ASSERT(false); // TODO: implement backward
|
5180
|
-
// is_node = true;
|
5181
|
-
// }
|
5140
|
+
struct ggml_tensor * im2col = ggml_im2col(ctx, a, b, s0, 0, p0, 0, d0, 0, false); // [N, OL, IC * K]
|
5182
5141
|
|
5183
|
-
|
5184
|
-
|
5185
|
-
|
5186
|
-
//
|
5187
|
-
// struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 2, ne);
|
5142
|
+
struct ggml_tensor * result =
|
5143
|
+
ggml_mul_mat(ctx,
|
5144
|
+
ggml_reshape_2d(ctx, im2col, im2col->ne[0], (im2col->ne[2] * im2col->ne[1])), // [N, OL, IC * K] => [N*OL, IC * K]
|
5145
|
+
ggml_reshape_2d(ctx, a, (a->ne[0] * a->ne[1]), a->ne[2])); // [OC,IC, K] => [OC, IC * K]
|
5188
5146
|
|
5189
|
-
|
5190
|
-
// ggml_set_op_params(result, params, sizeof(params));
|
5147
|
+
result = ggml_reshape_3d(ctx, result, im2col->ne[1], a->ne[2], im2col->ne[2]); // [N, OC, OL]
|
5191
5148
|
|
5192
|
-
|
5193
|
-
|
5194
|
-
// result->src[0] = a;
|
5195
|
-
// result->src[1] = b;
|
5196
|
-
|
5197
|
-
// return result;
|
5198
|
-
// }
|
5149
|
+
return result;
|
5150
|
+
}
|
5199
5151
|
|
5200
5152
|
// ggml_conv_1d_ph
|
5201
5153
|
|
@@ -5258,7 +5210,7 @@ GGML_API struct ggml_tensor * ggml_conv_transpose_1d(
|
|
5258
5210
|
// a: [OC,IC, KH, KW]
|
5259
5211
|
// b: [N, IC, IH, IW]
|
5260
5212
|
// result: [N, OH, OW, IC*KH*KW]
|
5261
|
-
|
5213
|
+
struct ggml_tensor * ggml_im2col(
|
5262
5214
|
struct ggml_context * ctx,
|
5263
5215
|
struct ggml_tensor * a,
|
5264
5216
|
struct ggml_tensor * b,
|
@@ -5267,9 +5219,14 @@ static struct ggml_tensor * ggml_conv_2d_stage_0(
|
|
5267
5219
|
int p0,
|
5268
5220
|
int p1,
|
5269
5221
|
int d0,
|
5270
|
-
int d1
|
5222
|
+
int d1,
|
5223
|
+
bool is_2D) {
|
5271
5224
|
|
5272
|
-
|
5225
|
+
if(is_2D) {
|
5226
|
+
GGML_ASSERT(a->ne[2] == b->ne[2]);
|
5227
|
+
} else {
|
5228
|
+
GGML_ASSERT(a->ne[1] == b->ne[1]);
|
5229
|
+
}
|
5273
5230
|
bool is_node = false;
|
5274
5231
|
|
5275
5232
|
if (a->grad || b->grad) {
|
@@ -5277,81 +5234,51 @@ static struct ggml_tensor * ggml_conv_2d_stage_0(
|
|
5277
5234
|
is_node = true;
|
5278
5235
|
}
|
5279
5236
|
|
5280
|
-
const int64_t OH = ggml_calc_conv_output_size(b->ne[1], a->ne[1], s1, p1, d1);
|
5281
|
-
const int64_t OW =
|
5237
|
+
const int64_t OH = is_2D ? ggml_calc_conv_output_size(b->ne[1], a->ne[1], s1, p1, d1) : 0;
|
5238
|
+
const int64_t OW = ggml_calc_conv_output_size(b->ne[0], a->ne[0], s0, p0, d0);
|
5282
5239
|
|
5283
5240
|
const int64_t ne[4] = {
|
5284
|
-
a->ne[2] * a->ne[1] * a->ne[0],
|
5241
|
+
is_2D ? (a->ne[2] * a->ne[1] * a->ne[0]) : a->ne[1] * a->ne[0],
|
5285
5242
|
OW,
|
5286
|
-
OH,
|
5287
|
-
b->ne[3],
|
5243
|
+
is_2D ? OH : b->ne[2],
|
5244
|
+
is_2D ? b->ne[3] : 1,
|
5288
5245
|
};
|
5289
|
-
struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F16, 4, ne);
|
5290
5246
|
|
5291
|
-
|
5247
|
+
struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F16, 4, ne);
|
5248
|
+
int32_t params[] = { s0, s1, p0, p1, d0, d1, (is_2D ? 1 : 0) };
|
5292
5249
|
ggml_set_op_params(result, params, sizeof(params));
|
5293
5250
|
|
5294
|
-
result->op =
|
5295
|
-
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
5296
|
-
result->src[0] = a;
|
5297
|
-
result->src[1] = b;
|
5298
|
-
|
5299
|
-
return result;
|
5300
|
-
|
5301
|
-
}
|
5302
|
-
|
5303
|
-
// gemm: [N, OC, OH, OW] = [OC, IC * KH * KW] x [N*OH*OW, IC * KH * KW]
|
5304
|
-
// a: [OC, IC, KH, KW]
|
5305
|
-
// b: [N, OH, OW, IC * KH * KW]
|
5306
|
-
// result: [N, OC, OH, OW]
|
5307
|
-
static struct ggml_tensor * ggml_conv_2d_stage_1(
|
5308
|
-
struct ggml_context * ctx,
|
5309
|
-
struct ggml_tensor * a,
|
5310
|
-
struct ggml_tensor * b) {
|
5311
|
-
|
5312
|
-
bool is_node = false;
|
5313
|
-
|
5314
|
-
if (a->grad || b->grad) {
|
5315
|
-
GGML_ASSERT(false); // TODO: implement backward
|
5316
|
-
is_node = true;
|
5317
|
-
}
|
5318
|
-
|
5319
|
-
const int64_t ne[4] = {
|
5320
|
-
b->ne[1],
|
5321
|
-
b->ne[2],
|
5322
|
-
a->ne[3],
|
5323
|
-
b->ne[3],
|
5324
|
-
};
|
5325
|
-
struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
|
5326
|
-
|
5327
|
-
result->op = GGML_OP_CONV_2D_STAGE_1;
|
5251
|
+
result->op = GGML_OP_IM2COL;
|
5328
5252
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
5329
5253
|
result->src[0] = a;
|
5330
5254
|
result->src[1] = b;
|
5331
5255
|
|
5332
5256
|
return result;
|
5333
|
-
|
5334
5257
|
}
|
5335
5258
|
|
5336
5259
|
// a: [OC,IC, KH, KW]
|
5337
5260
|
// b: [N, IC, IH, IW]
|
5338
5261
|
// result: [N, OC, OH, OW]
|
5339
5262
|
struct ggml_tensor * ggml_conv_2d(
|
5340
|
-
|
5341
|
-
|
5342
|
-
|
5343
|
-
|
5344
|
-
|
5345
|
-
|
5346
|
-
|
5347
|
-
|
5348
|
-
|
5263
|
+
struct ggml_context * ctx,
|
5264
|
+
struct ggml_tensor * a,
|
5265
|
+
struct ggml_tensor * b,
|
5266
|
+
int s0,
|
5267
|
+
int s1,
|
5268
|
+
int p0,
|
5269
|
+
int p1,
|
5270
|
+
int d0,
|
5271
|
+
int d1) {
|
5272
|
+
struct ggml_tensor * im2col = ggml_im2col(ctx, a, b, s0, s1, p0, p1, d0, d1, true); // [N, OH, OW, IC * KH * KW]
|
5349
5273
|
|
5350
|
-
struct ggml_tensor * result =
|
5351
|
-
|
5274
|
+
struct ggml_tensor * result =
|
5275
|
+
ggml_mul_mat(ctx,
|
5276
|
+
ggml_reshape_2d(ctx, im2col, im2col->ne[0], im2col->ne[3] * im2col->ne[2] * im2col->ne[1]), // [N, OH, OW, IC * KH * KW] => [N*OH*OW, IC * KH * KW]
|
5277
|
+
ggml_reshape_2d(ctx, a, (a->ne[0] * a->ne[1] * a->ne[2]), a->ne[3])); // [OC,IC, KH, KW] => [OC, IC * KH * KW]
|
5352
5278
|
|
5353
|
-
|
5279
|
+
result = ggml_reshape_4d(ctx, result, im2col->ne[1], im2col->ne[2], a->ne[3], im2col->ne[3]); // [N, OC, OH, OW]
|
5354
5280
|
|
5281
|
+
return result;
|
5355
5282
|
}
|
5356
5283
|
|
5357
5284
|
// ggml_conv_2d_sk_p0
|
@@ -5411,7 +5338,7 @@ struct ggml_tensor * ggml_conv_transpose_2d_p0(
|
|
5411
5338
|
|
5412
5339
|
// ggml_pool_*
|
5413
5340
|
|
5414
|
-
static int64_t ggml_calc_pool_output_size(int64_t ins, int ks, int s,
|
5341
|
+
static int64_t ggml_calc_pool_output_size(int64_t ins, int ks, int s, float p) {
|
5415
5342
|
return (ins + 2 * p - ks) / s + 1;
|
5416
5343
|
}
|
5417
5344
|
|
@@ -5458,8 +5385,8 @@ struct ggml_tensor * ggml_pool_2d(
|
|
5458
5385
|
int k1,
|
5459
5386
|
int s0,
|
5460
5387
|
int s1,
|
5461
|
-
|
5462
|
-
|
5388
|
+
float p0,
|
5389
|
+
float p1) {
|
5463
5390
|
|
5464
5391
|
bool is_node = false;
|
5465
5392
|
|
@@ -8921,6 +8848,48 @@ static void ggml_compute_forward_silu(
|
|
8921
8848
|
}
|
8922
8849
|
}
|
8923
8850
|
|
8851
|
+
// ggml_compute_forward_leaky
|
8852
|
+
|
8853
|
+
static void ggml_compute_forward_leaky_f32(
|
8854
|
+
const struct ggml_compute_params * params,
|
8855
|
+
const struct ggml_tensor * src0,
|
8856
|
+
struct ggml_tensor * dst) {
|
8857
|
+
assert(params->ith == 0);
|
8858
|
+
assert(ggml_are_same_shape(src0, dst));
|
8859
|
+
|
8860
|
+
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
8861
|
+
return;
|
8862
|
+
}
|
8863
|
+
|
8864
|
+
const int n = ggml_nrows(src0);
|
8865
|
+
const int nc = src0->ne[0];
|
8866
|
+
|
8867
|
+
assert(dst->nb[0] == sizeof(float));
|
8868
|
+
assert(src0->nb[0] == sizeof(float));
|
8869
|
+
|
8870
|
+
for (int i = 0; i < n; i++) {
|
8871
|
+
ggml_vec_leaky_f32(nc,
|
8872
|
+
(float *) ((char *) dst->data + i*( dst->nb[1])),
|
8873
|
+
(float *) ((char *) src0->data + i*(src0->nb[1])));
|
8874
|
+
}
|
8875
|
+
}
|
8876
|
+
|
8877
|
+
static void ggml_compute_forward_leaky(
|
8878
|
+
const struct ggml_compute_params * params,
|
8879
|
+
const struct ggml_tensor * src0,
|
8880
|
+
struct ggml_tensor * dst) {
|
8881
|
+
switch (src0->type) {
|
8882
|
+
case GGML_TYPE_F32:
|
8883
|
+
{
|
8884
|
+
ggml_compute_forward_leaky_f32(params, src0, dst);
|
8885
|
+
} break;
|
8886
|
+
default:
|
8887
|
+
{
|
8888
|
+
GGML_ASSERT(false);
|
8889
|
+
} break;
|
8890
|
+
}
|
8891
|
+
}
|
8892
|
+
|
8924
8893
|
// ggml_compute_forward_silu_back
|
8925
8894
|
|
8926
8895
|
static void ggml_compute_forward_silu_back_f32(
|
@@ -9404,6 +9373,8 @@ static bool ggml_compute_forward_mul_mat_use_blas(
|
|
9404
9373
|
// TODO: find the optimal values for these
|
9405
9374
|
if (ggml_is_contiguous(src0) &&
|
9406
9375
|
ggml_is_contiguous(src1) &&
|
9376
|
+
src0->type == GGML_TYPE_F32 &&
|
9377
|
+
src1->type == GGML_TYPE_F32 &&
|
9407
9378
|
(ne0 >= 32 && ne1 >= 32 && ne10 >= 32)) {
|
9408
9379
|
|
9409
9380
|
/*printf("BLAS: %d %d %d %d %d\n", ne0, ne1, ne10, ne00, ne01);*/
|
@@ -9442,7 +9413,7 @@ static void ggml_compute_forward_mul_mat(
|
|
9442
9413
|
|
9443
9414
|
// we don't support permuted src0 or src1
|
9444
9415
|
GGML_ASSERT(nb00 == ggml_type_size(type));
|
9445
|
-
GGML_ASSERT(nb10 ==
|
9416
|
+
GGML_ASSERT(nb10 == ggml_type_size(src1->type));
|
9446
9417
|
|
9447
9418
|
// dst cannot be transposed or permuted
|
9448
9419
|
GGML_ASSERT(nb0 == sizeof(float));
|
@@ -9640,10 +9611,12 @@ static void ggml_compute_forward_out_prod_f32(
|
|
9640
9611
|
const int ith = params->ith;
|
9641
9612
|
const int nth = params->nth;
|
9642
9613
|
|
9614
|
+
GGML_ASSERT(ne0 == ne00);
|
9615
|
+
GGML_ASSERT(ne1 == ne10);
|
9616
|
+
GGML_ASSERT(ne2 == ne02);
|
9643
9617
|
GGML_ASSERT(ne02 == ne12);
|
9644
|
-
GGML_ASSERT(ne03 == ne13);
|
9645
|
-
GGML_ASSERT(ne2 == ne12);
|
9646
9618
|
GGML_ASSERT(ne3 == ne13);
|
9619
|
+
GGML_ASSERT(ne03 == ne13);
|
9647
9620
|
|
9648
9621
|
// we don't support permuted src0 or src1
|
9649
9622
|
GGML_ASSERT(nb00 == sizeof(float));
|
@@ -9654,18 +9627,25 @@ static void ggml_compute_forward_out_prod_f32(
|
|
9654
9627
|
// GGML_ASSERT(nb1 <= nb2);
|
9655
9628
|
// GGML_ASSERT(nb2 <= nb3);
|
9656
9629
|
|
9657
|
-
GGML_ASSERT(ne0 == ne00);
|
9658
|
-
GGML_ASSERT(ne1 == ne10);
|
9659
|
-
GGML_ASSERT(ne2 == ne02);
|
9660
|
-
GGML_ASSERT(ne3 == ne03);
|
9661
|
-
|
9662
9630
|
// nb01 >= nb00 - src0 is not transposed
|
9663
9631
|
// compute by src0 rows
|
9664
9632
|
|
9665
9633
|
// TODO: #if defined(GGML_USE_CUBLAS) ggml_cuda_out_prod
|
9666
|
-
// TODO: #if defined(
|
9634
|
+
// TODO: #if defined(GGML_USE_CLBLAST)
|
9635
|
+
|
9636
|
+
#if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS)
|
9637
|
+
bool use_blas = ggml_is_matrix(src0) &&
|
9638
|
+
ggml_is_matrix(src1) &&
|
9639
|
+
ggml_is_contiguous(src0) &&
|
9640
|
+
(ggml_is_contiguous(src1) || ggml_is_transposed(src1));
|
9641
|
+
#endif
|
9667
9642
|
|
9668
9643
|
if (params->type == GGML_TASK_INIT) {
|
9644
|
+
#if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS) // gemm beta will zero dst
|
9645
|
+
if (use_blas) {
|
9646
|
+
return;
|
9647
|
+
}
|
9648
|
+
#endif
|
9669
9649
|
ggml_vec_set_f32(ne0*ne1*ne2*ne3, dst->data, 0);
|
9670
9650
|
return;
|
9671
9651
|
}
|
@@ -9674,6 +9654,50 @@ static void ggml_compute_forward_out_prod_f32(
|
|
9674
9654
|
return;
|
9675
9655
|
}
|
9676
9656
|
|
9657
|
+
#if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS)
|
9658
|
+
if (use_blas) {
|
9659
|
+
if (params->ith != 0) { // All threads other than the first do no work.
|
9660
|
+
return;
|
9661
|
+
}
|
9662
|
+
// Arguments to ggml_compute_forward_out_prod (expressed as major,minor)
|
9663
|
+
// src0: (k,n)
|
9664
|
+
// src1: (k,m)
|
9665
|
+
// dst: (m,n)
|
9666
|
+
//
|
9667
|
+
// Arguments to sgemm (see https://github.com/Reference-LAPACK/lapack/blob/master/BLAS/SRC/sgemm.f)
|
9668
|
+
// Also expressed as (major,minor)
|
9669
|
+
// a: (m,k): so src1 transposed
|
9670
|
+
// b: (k,n): so src0
|
9671
|
+
// c: (m,n)
|
9672
|
+
//
|
9673
|
+
// However, if ggml_is_transposed(src1) is true, then
|
9674
|
+
// src1->data already contains a transposed version, so sgemm mustn't
|
9675
|
+
// transpose it further.
|
9676
|
+
|
9677
|
+
int n = src0->ne[0];
|
9678
|
+
int k = src0->ne[1];
|
9679
|
+
int m = src1->ne[0];
|
9680
|
+
|
9681
|
+
int transposeA, lda;
|
9682
|
+
|
9683
|
+
if (!ggml_is_transposed(src1)) {
|
9684
|
+
transposeA = CblasTrans;
|
9685
|
+
lda = m;
|
9686
|
+
} else {
|
9687
|
+
transposeA = CblasNoTrans;
|
9688
|
+
lda = k;
|
9689
|
+
}
|
9690
|
+
|
9691
|
+
float * a = (float *) ((char *) src1->data);
|
9692
|
+
float * b = (float *) ((char *) src0->data);
|
9693
|
+
float * c = (float *) ((char *) dst->data);
|
9694
|
+
|
9695
|
+
cblas_sgemm(CblasRowMajor, transposeA, CblasNoTrans, m, n, k, 1.0, a, lda, b, n, 0.0, c, n);
|
9696
|
+
|
9697
|
+
return;
|
9698
|
+
}
|
9699
|
+
#endif
|
9700
|
+
|
9677
9701
|
// dst[:,:,:,:] = 0
|
9678
9702
|
// for i2,i3:
|
9679
9703
|
// for i1:
|
@@ -11340,9 +11364,9 @@ static void ggml_compute_forward_rope_back(
|
|
11340
11364
|
}
|
11341
11365
|
}
|
11342
11366
|
|
11343
|
-
//
|
11367
|
+
// ggml_compute_forward_conv_transpose_1d
|
11344
11368
|
|
11345
|
-
static void
|
11369
|
+
static void ggml_compute_forward_conv_transpose_1d_f16_f32(
|
11346
11370
|
const struct ggml_compute_params * params,
|
11347
11371
|
const struct ggml_tensor * src0,
|
11348
11372
|
const struct ggml_tensor * src1,
|
@@ -11359,14 +11383,7 @@ static void ggml_compute_forward_conv_1d_f16_f32(
|
|
11359
11383
|
const int ith = params->ith;
|
11360
11384
|
const int nth = params->nth;
|
11361
11385
|
|
11362
|
-
const int nk = ne00;
|
11363
|
-
|
11364
|
-
// size of the convolution row - the kernel size unrolled across all input channels
|
11365
|
-
const int ew0 = nk*ne01;
|
11366
|
-
|
11367
|
-
const int32_t s0 = ((const int32_t*)(dst->op_params))[0];
|
11368
|
-
const int32_t p0 = ((const int32_t*)(dst->op_params))[1];
|
11369
|
-
const int32_t d0 = ((const int32_t*)(dst->op_params))[2];
|
11386
|
+
const int nk = ne00*ne01*ne02;
|
11370
11387
|
|
11371
11388
|
GGML_ASSERT(nb00 == sizeof(ggml_fp16_t));
|
11372
11389
|
GGML_ASSERT(nb10 == sizeof(float));
|
@@ -11374,23 +11391,37 @@ static void ggml_compute_forward_conv_1d_f16_f32(
|
|
11374
11391
|
if (params->type == GGML_TASK_INIT) {
|
11375
11392
|
memset(params->wdata, 0, params->wsize);
|
11376
11393
|
|
11377
|
-
|
11394
|
+
// permute kernel data (src0) from (K x Cout x Cin) to (Cin x K x Cout)
|
11395
|
+
{
|
11396
|
+
ggml_fp16_t * const wdata = (ggml_fp16_t *) params->wdata + 0;
|
11397
|
+
|
11398
|
+
for (int64_t i02 = 0; i02 < ne02; i02++) {
|
11399
|
+
for (int64_t i01 = 0; i01 < ne01; i01++) {
|
11400
|
+
const ggml_fp16_t * const src = (ggml_fp16_t *)((char *) src0->data + i02*nb02 + i01*nb01);
|
11401
|
+
ggml_fp16_t * dst_data = wdata + i01*ne00*ne02;
|
11402
|
+
for (int64_t i00 = 0; i00 < ne00; i00++) {
|
11403
|
+
dst_data[i00*ne02 + i02] = src[i00];
|
11404
|
+
}
|
11405
|
+
}
|
11406
|
+
}
|
11407
|
+
}
|
11378
11408
|
|
11379
|
-
|
11380
|
-
|
11409
|
+
// permute source data (src1) from (L x Cin) to (Cin x L)
|
11410
|
+
{
|
11411
|
+
ggml_fp16_t * const wdata = (ggml_fp16_t *) params->wdata + nk;
|
11381
11412
|
ggml_fp16_t * dst_data = wdata;
|
11382
11413
|
|
11383
|
-
for (int64_t
|
11384
|
-
|
11385
|
-
|
11386
|
-
|
11387
|
-
if(!(idx0 < 0 || idx0 >= ne10)) {
|
11388
|
-
dst_data[i0*ew0 + i11*nk + ik] = GGML_FP32_TO_FP16(src[idx0]);
|
11389
|
-
}
|
11414
|
+
for (int64_t i11 = 0; i11 < ne11; i11++) {
|
11415
|
+
const float * const src = (float *)((char *) src1->data + i11*nb11);
|
11416
|
+
for (int64_t i10 = 0; i10 < ne10; i10++) {
|
11417
|
+
dst_data[i10*ne11 + i11] = GGML_FP32_TO_FP16(src[i10]);
|
11390
11418
|
}
|
11391
11419
|
}
|
11392
11420
|
}
|
11393
11421
|
|
11422
|
+
// need to zero dst since we are accumulating into it
|
11423
|
+
memset(dst->data, 0, ggml_nbytes(dst));
|
11424
|
+
|
11394
11425
|
return;
|
11395
11426
|
}
|
11396
11427
|
|
@@ -11398,8 +11429,10 @@ static void ggml_compute_forward_conv_1d_f16_f32(
|
|
11398
11429
|
return;
|
11399
11430
|
}
|
11400
11431
|
|
11432
|
+
const int32_t s0 = ((const int32_t*)(dst->op_params))[0];
|
11433
|
+
|
11401
11434
|
// total rows in dst
|
11402
|
-
const int nr =
|
11435
|
+
const int nr = ne1;
|
11403
11436
|
|
11404
11437
|
// rows per thread
|
11405
11438
|
const int dr = (nr + nth - 1)/nth;
|
@@ -11408,22 +11441,26 @@ static void ggml_compute_forward_conv_1d_f16_f32(
|
|
11408
11441
|
const int ir0 = dr*ith;
|
11409
11442
|
const int ir1 = MIN(ir0 + dr, nr);
|
11410
11443
|
|
11411
|
-
ggml_fp16_t * const wdata
|
11412
|
-
|
11413
|
-
for (int i2 = 0; i2 < ne2; i2++) {
|
11414
|
-
for (int i1 = ir0; i1 < ir1; i1++) {
|
11415
|
-
float * dst_data = (float *)((char *) dst->data + i2*nb2 + i1*nb1);
|
11444
|
+
ggml_fp16_t * const wdata = (ggml_fp16_t *) params->wdata + 0;
|
11445
|
+
ggml_fp16_t * const wdata_src = wdata + nk;
|
11416
11446
|
|
11417
|
-
|
11418
|
-
|
11419
|
-
|
11420
|
-
|
11447
|
+
for (int i1 = ir0; i1 < ir1; i1++) {
|
11448
|
+
float * dst_data = (float *)((char *) dst->data + i1*nb1);
|
11449
|
+
ggml_fp16_t * wdata_kernel = wdata + i1*ne02*ne00;
|
11450
|
+
for (int i10 = 0; i10 < ne10; i10++) {
|
11451
|
+
const int i1n = i10*ne11;
|
11452
|
+
for (int i00 = 0; i00 < ne00; i00++) {
|
11453
|
+
float v = 0;
|
11454
|
+
ggml_vec_dot_f16(ne02, &v,
|
11455
|
+
(ggml_fp16_t *) wdata_src + i1n,
|
11456
|
+
(ggml_fp16_t *) wdata_kernel + i00*ne02);
|
11457
|
+
dst_data[i10*s0 + i00] += v;
|
11421
11458
|
}
|
11422
11459
|
}
|
11423
11460
|
}
|
11424
11461
|
}
|
11425
11462
|
|
11426
|
-
static void
|
11463
|
+
static void ggml_compute_forward_conv_transpose_1d_f32(
|
11427
11464
|
const struct ggml_compute_params * params,
|
11428
11465
|
const struct ggml_tensor * src0,
|
11429
11466
|
const struct ggml_tensor * src1,
|
@@ -11440,430 +11477,7 @@ static void ggml_compute_forward_conv_1d_f32(
|
|
11440
11477
|
const int ith = params->ith;
|
11441
11478
|
const int nth = params->nth;
|
11442
11479
|
|
11443
|
-
const int nk = ne00;
|
11444
|
-
|
11445
|
-
const int ew0 = nk*ne01;
|
11446
|
-
|
11447
|
-
const int32_t s0 = ((const int32_t*)(dst->op_params))[0];
|
11448
|
-
const int32_t p0 = ((const int32_t*)(dst->op_params))[1];
|
11449
|
-
const int32_t d0 = ((const int32_t*)(dst->op_params))[2];
|
11450
|
-
|
11451
|
-
GGML_ASSERT(nb00 == sizeof(float));
|
11452
|
-
GGML_ASSERT(nb10 == sizeof(float));
|
11453
|
-
|
11454
|
-
if (params->type == GGML_TASK_INIT) {
|
11455
|
-
memset(params->wdata, 0, params->wsize);
|
11456
|
-
|
11457
|
-
float * const wdata = (float *) params->wdata + 0;
|
11458
|
-
|
11459
|
-
for (int64_t i11 = 0; i11 < ne11; i11++) {
|
11460
|
-
const float * const src = (float *)((char *) src1->data + i11*nb11);
|
11461
|
-
float * dst_data = wdata;
|
11462
|
-
|
11463
|
-
for (int64_t i0 = 0; i0 < ne0; i0++) {
|
11464
|
-
for (int64_t ik = 0; ik < nk; ik++) {
|
11465
|
-
const int idx0 = i0*s0 + ik*d0 - p0;
|
11466
|
-
|
11467
|
-
if(!(idx0 < 0 || idx0 >= ne10)) {
|
11468
|
-
dst_data[i0*ew0 + i11*nk + ik] = src[idx0];
|
11469
|
-
}
|
11470
|
-
}
|
11471
|
-
}
|
11472
|
-
}
|
11473
|
-
|
11474
|
-
return;
|
11475
|
-
}
|
11476
|
-
|
11477
|
-
if (params->type == GGML_TASK_FINALIZE) {
|
11478
|
-
return;
|
11479
|
-
}
|
11480
|
-
|
11481
|
-
// total rows in dst
|
11482
|
-
const int nr = ne02;
|
11483
|
-
|
11484
|
-
// rows per thread
|
11485
|
-
const int dr = (nr + nth - 1)/nth;
|
11486
|
-
|
11487
|
-
// row range for this thread
|
11488
|
-
const int ir0 = dr*ith;
|
11489
|
-
const int ir1 = MIN(ir0 + dr, nr);
|
11490
|
-
|
11491
|
-
float * const wdata = (float *) params->wdata + 0;
|
11492
|
-
|
11493
|
-
for (int i2 = 0; i2 < ne2; i2++) {
|
11494
|
-
for (int i1 = ir0; i1 < ir1; i1++) {
|
11495
|
-
float * dst_data = (float *)((char *) dst->data + i2*nb2 + i1*nb1);
|
11496
|
-
|
11497
|
-
for (int i0 = 0; i0 < ne0; i0++) {
|
11498
|
-
ggml_vec_dot_f32(ew0, dst_data + i0,
|
11499
|
-
(float *) ((char *) src0->data + i1*nb02),
|
11500
|
-
(float *) wdata + i2*nb2 + i0*ew0);
|
11501
|
-
}
|
11502
|
-
}
|
11503
|
-
}
|
11504
|
-
}
|
11505
|
-
|
11506
|
-
// TODO: reuse ggml_mul_mat or implement ggml_im2col and remove stage_0 and stage_1
|
11507
|
-
static void gemm_f16_out_f32(int64_t m, int64_t n, int64_t k,
|
11508
|
-
ggml_fp16_t * A,
|
11509
|
-
ggml_fp16_t * B,
|
11510
|
-
float * C,
|
11511
|
-
const int ith, const int nth) {
|
11512
|
-
// does not seem to make a difference
|
11513
|
-
int64_t m0, m1, n0, n1;
|
11514
|
-
// patches per thread
|
11515
|
-
if (m > n) {
|
11516
|
-
n0 = 0;
|
11517
|
-
n1 = n;
|
11518
|
-
|
11519
|
-
// total patches in dst
|
11520
|
-
const int np = m;
|
11521
|
-
|
11522
|
-
// patches per thread
|
11523
|
-
const int dp = (np + nth - 1)/nth;
|
11524
|
-
|
11525
|
-
// patch range for this thread
|
11526
|
-
m0 = dp*ith;
|
11527
|
-
m1 = MIN(m0 + dp, np);
|
11528
|
-
} else {
|
11529
|
-
m0 = 0;
|
11530
|
-
m1 = m;
|
11531
|
-
|
11532
|
-
// total patches in dst
|
11533
|
-
const int np = n;
|
11534
|
-
|
11535
|
-
// patches per thread
|
11536
|
-
const int dp = (np + nth - 1)/nth;
|
11537
|
-
|
11538
|
-
// patch range for this thread
|
11539
|
-
n0 = dp*ith;
|
11540
|
-
n1 = MIN(n0 + dp, np);
|
11541
|
-
}
|
11542
|
-
|
11543
|
-
// block-tiling attempt
|
11544
|
-
int64_t blck_n = 16;
|
11545
|
-
int64_t blck_m = 16;
|
11546
|
-
|
11547
|
-
// int64_t CACHE_SIZE = 2 * 1024 * 1024; // 2MB
|
11548
|
-
// int64_t blck_size = CACHE_SIZE / (sizeof(float) + 2 * sizeof(ggml_fp16_t) * K);
|
11549
|
-
// if (blck_size > 0) {
|
11550
|
-
// blck_0 = 4;
|
11551
|
-
// blck_1 = blck_size / blck_0;
|
11552
|
-
// if (blck_1 < 0) {
|
11553
|
-
// blck_1 = 1;
|
11554
|
-
// }
|
11555
|
-
// // blck_0 = (int64_t)sqrt(blck_size);
|
11556
|
-
// // blck_1 = blck_0;
|
11557
|
-
// }
|
11558
|
-
// // printf("%zd %zd %zd %zd\n", blck_size, K, blck_0, blck_1);
|
11559
|
-
|
11560
|
-
for (int j = n0; j < n1; j+=blck_n) {
|
11561
|
-
for (int i = m0; i < m1; i+=blck_m) {
|
11562
|
-
// printf("i j k => %d %d %d\n", i, j, K);
|
11563
|
-
for (int ii = i; ii < i + blck_m && ii < m1; ii++) {
|
11564
|
-
for (int jj = j; jj < j + blck_n && jj < n1; jj++) {
|
11565
|
-
ggml_vec_dot_f16(k,
|
11566
|
-
C + ii*n + jj,
|
11567
|
-
A + ii * k,
|
11568
|
-
B + jj * k);
|
11569
|
-
}
|
11570
|
-
}
|
11571
|
-
}
|
11572
|
-
}
|
11573
|
-
}
|
11574
|
-
|
11575
|
-
// src0: kernel [OC, IC, K]
|
11576
|
-
// src1: signal [N, IC, IL]
|
11577
|
-
// dst: result [N, OL, IC*K]
|
11578
|
-
static void ggml_compute_forward_conv_1d_stage_0_f32(
|
11579
|
-
const struct ggml_compute_params * params,
|
11580
|
-
const struct ggml_tensor * src0,
|
11581
|
-
const struct ggml_tensor * src1,
|
11582
|
-
struct ggml_tensor * dst) {
|
11583
|
-
GGML_ASSERT(src0->type == GGML_TYPE_F16);
|
11584
|
-
GGML_ASSERT(src1->type == GGML_TYPE_F32);
|
11585
|
-
GGML_ASSERT( dst->type == GGML_TYPE_F16);
|
11586
|
-
|
11587
|
-
int64_t t0 = ggml_perf_time_us();
|
11588
|
-
UNUSED(t0);
|
11589
|
-
|
11590
|
-
GGML_TENSOR_BINARY_OP_LOCALS;
|
11591
|
-
|
11592
|
-
const int64_t N = ne12;
|
11593
|
-
const int64_t IC = ne11;
|
11594
|
-
const int64_t IL = ne10;
|
11595
|
-
|
11596
|
-
const int64_t K = ne00;
|
11597
|
-
|
11598
|
-
const int64_t OL = ne1;
|
11599
|
-
|
11600
|
-
const int ith = params->ith;
|
11601
|
-
const int nth = params->nth;
|
11602
|
-
|
11603
|
-
const int32_t s0 = ((const int32_t*)(dst->op_params))[0];
|
11604
|
-
const int32_t p0 = ((const int32_t*)(dst->op_params))[1];
|
11605
|
-
const int32_t d0 = ((const int32_t*)(dst->op_params))[2];
|
11606
|
-
|
11607
|
-
GGML_ASSERT(nb00 == sizeof(ggml_fp16_t));
|
11608
|
-
GGML_ASSERT(nb10 == sizeof(float));
|
11609
|
-
|
11610
|
-
if (params->type == GGML_TASK_INIT) {
|
11611
|
-
memset(dst->data, 0, ggml_nbytes(dst));
|
11612
|
-
return;
|
11613
|
-
}
|
11614
|
-
|
11615
|
-
if (params->type == GGML_TASK_FINALIZE) {
|
11616
|
-
return;
|
11617
|
-
}
|
11618
|
-
|
11619
|
-
// im2col: [N, IC, IL] => [N, OL, IC*K]
|
11620
|
-
{
|
11621
|
-
ggml_fp16_t * const wdata = (ggml_fp16_t *) dst->data;
|
11622
|
-
|
11623
|
-
for (int64_t in = 0; in < N; in++) {
|
11624
|
-
for (int64_t iol = 0; iol < OL; iol++) {
|
11625
|
-
for (int64_t iic = ith; iic < IC; iic+=nth) {
|
11626
|
-
|
11627
|
-
// micro kernel
|
11628
|
-
ggml_fp16_t * dst_data = wdata + (in*OL + iol)*(IC*K); // [IC, K]
|
11629
|
-
const float * const src_data = (float *)((char *) src1->data + in*nb12 + iic*nb11); // [IL]
|
11630
|
-
|
11631
|
-
for (int64_t ik = 0; ik < K; ik++) {
|
11632
|
-
const int64_t iil = iol*s0 + ik*d0 - p0;
|
11633
|
-
|
11634
|
-
if (!(iil < 0 || iil >= IL)) {
|
11635
|
-
dst_data[iic*K + ik] = GGML_FP32_TO_FP16(src_data[iil]);
|
11636
|
-
}
|
11637
|
-
}
|
11638
|
-
}
|
11639
|
-
}
|
11640
|
-
}
|
11641
|
-
}
|
11642
|
-
}
|
11643
|
-
|
11644
|
-
// gemm: [N, OC, OL] = [OC, IC * K] x [N*OL, IC * K]
|
11645
|
-
// src0: [OC, IC, K]
|
11646
|
-
// src1: [N, OL, IC * K]
|
11647
|
-
// result: [N, OC, OL]
|
11648
|
-
static void ggml_compute_forward_conv_1d_stage_1_f16(
|
11649
|
-
const struct ggml_compute_params * params,
|
11650
|
-
const struct ggml_tensor * src0,
|
11651
|
-
const struct ggml_tensor * src1,
|
11652
|
-
struct ggml_tensor * dst) {
|
11653
|
-
GGML_ASSERT(src0->type == GGML_TYPE_F16);
|
11654
|
-
GGML_ASSERT(src1->type == GGML_TYPE_F16);
|
11655
|
-
GGML_ASSERT( dst->type == GGML_TYPE_F32);
|
11656
|
-
|
11657
|
-
int64_t t0 = ggml_perf_time_us();
|
11658
|
-
UNUSED(t0);
|
11659
|
-
|
11660
|
-
if (params->type == GGML_TASK_INIT) {
|
11661
|
-
return;
|
11662
|
-
}
|
11663
|
-
|
11664
|
-
if (params->type == GGML_TASK_FINALIZE) {
|
11665
|
-
return;
|
11666
|
-
}
|
11667
|
-
|
11668
|
-
GGML_TENSOR_BINARY_OP_LOCALS;
|
11669
|
-
|
11670
|
-
GGML_ASSERT(nb00 == sizeof(ggml_fp16_t));
|
11671
|
-
GGML_ASSERT(nb10 == sizeof(ggml_fp16_t));
|
11672
|
-
GGML_ASSERT(nb0 == sizeof(float));
|
11673
|
-
|
11674
|
-
const int N = ne12;
|
11675
|
-
const int OL = ne11;
|
11676
|
-
|
11677
|
-
const int OC = ne02;
|
11678
|
-
const int IC = ne01;
|
11679
|
-
const int K = ne00;
|
11680
|
-
|
11681
|
-
const int ith = params->ith;
|
11682
|
-
const int nth = params->nth;
|
11683
|
-
|
11684
|
-
int64_t m = OC;
|
11685
|
-
int64_t n = OL;
|
11686
|
-
int64_t k = IC * K;
|
11687
|
-
|
11688
|
-
// [N, OC, OL] = [OC, IC * K] x [N*OL, IC * K]
|
11689
|
-
for (int i = 0; i < N; i++) {
|
11690
|
-
ggml_fp16_t * A = (ggml_fp16_t *)src0->data; // [m, k]
|
11691
|
-
ggml_fp16_t * B = (ggml_fp16_t *)src1->data + i * m * k; // [n, k]
|
11692
|
-
float * C = (float *)dst->data + i * m * n; // [m, n]
|
11693
|
-
|
11694
|
-
gemm_f16_out_f32(m, n, k, A, B, C, ith, nth);
|
11695
|
-
}
|
11696
|
-
}
|
11697
|
-
|
11698
|
-
static void ggml_compute_forward_conv_1d(
|
11699
|
-
const struct ggml_compute_params * params,
|
11700
|
-
const struct ggml_tensor * src0,
|
11701
|
-
const struct ggml_tensor * src1,
|
11702
|
-
struct ggml_tensor * dst) {
|
11703
|
-
switch(src0->type) {
|
11704
|
-
case GGML_TYPE_F16:
|
11705
|
-
{
|
11706
|
-
ggml_compute_forward_conv_1d_f16_f32(params, src0, src1, dst);
|
11707
|
-
} break;
|
11708
|
-
case GGML_TYPE_F32:
|
11709
|
-
{
|
11710
|
-
ggml_compute_forward_conv_1d_f32(params, src0, src1, dst);
|
11711
|
-
} break;
|
11712
|
-
default:
|
11713
|
-
{
|
11714
|
-
GGML_ASSERT(false);
|
11715
|
-
} break;
|
11716
|
-
}
|
11717
|
-
}
|
11718
|
-
|
11719
|
-
static void ggml_compute_forward_conv_1d_stage_0(
|
11720
|
-
const struct ggml_compute_params * params,
|
11721
|
-
const struct ggml_tensor * src0,
|
11722
|
-
const struct ggml_tensor * src1,
|
11723
|
-
struct ggml_tensor * dst) {
|
11724
|
-
switch(src0->type) {
|
11725
|
-
case GGML_TYPE_F16:
|
11726
|
-
{
|
11727
|
-
ggml_compute_forward_conv_1d_stage_0_f32(params, src0, src1, dst);
|
11728
|
-
} break;
|
11729
|
-
default:
|
11730
|
-
{
|
11731
|
-
GGML_ASSERT(false);
|
11732
|
-
} break;
|
11733
|
-
}
|
11734
|
-
}
|
11735
|
-
|
11736
|
-
static void ggml_compute_forward_conv_1d_stage_1(
|
11737
|
-
const struct ggml_compute_params * params,
|
11738
|
-
const struct ggml_tensor * src0,
|
11739
|
-
const struct ggml_tensor * src1,
|
11740
|
-
struct ggml_tensor * dst) {
|
11741
|
-
switch(src0->type) {
|
11742
|
-
case GGML_TYPE_F16:
|
11743
|
-
{
|
11744
|
-
ggml_compute_forward_conv_1d_stage_1_f16(params, src0, src1, dst);
|
11745
|
-
} break;
|
11746
|
-
default:
|
11747
|
-
{
|
11748
|
-
GGML_ASSERT(false);
|
11749
|
-
} break;
|
11750
|
-
}
|
11751
|
-
}
|
11752
|
-
|
11753
|
-
// ggml_compute_forward_conv_transpose_1d
|
11754
|
-
|
11755
|
-
static void ggml_compute_forward_conv_transpose_1d_f16_f32(
|
11756
|
-
const struct ggml_compute_params * params,
|
11757
|
-
const struct ggml_tensor * src0,
|
11758
|
-
const struct ggml_tensor * src1,
|
11759
|
-
struct ggml_tensor * dst) {
|
11760
|
-
GGML_ASSERT(src0->type == GGML_TYPE_F16);
|
11761
|
-
GGML_ASSERT(src1->type == GGML_TYPE_F32);
|
11762
|
-
GGML_ASSERT( dst->type == GGML_TYPE_F32);
|
11763
|
-
|
11764
|
-
int64_t t0 = ggml_perf_time_us();
|
11765
|
-
UNUSED(t0);
|
11766
|
-
|
11767
|
-
GGML_TENSOR_BINARY_OP_LOCALS
|
11768
|
-
|
11769
|
-
const int ith = params->ith;
|
11770
|
-
const int nth = params->nth;
|
11771
|
-
|
11772
|
-
const int nk = ne00*ne01*ne02;
|
11773
|
-
|
11774
|
-
GGML_ASSERT(nb00 == sizeof(ggml_fp16_t));
|
11775
|
-
GGML_ASSERT(nb10 == sizeof(float));
|
11776
|
-
|
11777
|
-
if (params->type == GGML_TASK_INIT) {
|
11778
|
-
memset(params->wdata, 0, params->wsize);
|
11779
|
-
|
11780
|
-
// permute kernel data (src0) from (K x Cout x Cin) to (Cin x K x Cout)
|
11781
|
-
{
|
11782
|
-
ggml_fp16_t * const wdata = (ggml_fp16_t *) params->wdata + 0;
|
11783
|
-
|
11784
|
-
for (int64_t i02 = 0; i02 < ne02; i02++) {
|
11785
|
-
for (int64_t i01 = 0; i01 < ne01; i01++) {
|
11786
|
-
const ggml_fp16_t * const src = (ggml_fp16_t *)((char *) src0->data + i02*nb02 + i01*nb01);
|
11787
|
-
ggml_fp16_t * dst_data = wdata + i01*ne00*ne02;
|
11788
|
-
for (int64_t i00 = 0; i00 < ne00; i00++) {
|
11789
|
-
dst_data[i00*ne02 + i02] = src[i00];
|
11790
|
-
}
|
11791
|
-
}
|
11792
|
-
}
|
11793
|
-
}
|
11794
|
-
|
11795
|
-
// permute source data (src1) from (L x Cin) to (Cin x L)
|
11796
|
-
{
|
11797
|
-
ggml_fp16_t * const wdata = (ggml_fp16_t *) params->wdata + nk;
|
11798
|
-
ggml_fp16_t * dst_data = wdata;
|
11799
|
-
|
11800
|
-
for (int64_t i11 = 0; i11 < ne11; i11++) {
|
11801
|
-
const float * const src = (float *)((char *) src1->data + i11*nb11);
|
11802
|
-
for (int64_t i10 = 0; i10 < ne10; i10++) {
|
11803
|
-
dst_data[i10*ne11 + i11] = GGML_FP32_TO_FP16(src[i10]);
|
11804
|
-
}
|
11805
|
-
}
|
11806
|
-
}
|
11807
|
-
|
11808
|
-
// need to zero dst since we are accumulating into it
|
11809
|
-
memset(dst->data, 0, ggml_nbytes(dst));
|
11810
|
-
|
11811
|
-
return;
|
11812
|
-
}
|
11813
|
-
|
11814
|
-
if (params->type == GGML_TASK_FINALIZE) {
|
11815
|
-
return;
|
11816
|
-
}
|
11817
|
-
|
11818
|
-
const int32_t s0 = ((const int32_t*)(dst->op_params))[0];
|
11819
|
-
|
11820
|
-
// total rows in dst
|
11821
|
-
const int nr = ne1;
|
11822
|
-
|
11823
|
-
// rows per thread
|
11824
|
-
const int dr = (nr + nth - 1)/nth;
|
11825
|
-
|
11826
|
-
// row range for this thread
|
11827
|
-
const int ir0 = dr*ith;
|
11828
|
-
const int ir1 = MIN(ir0 + dr, nr);
|
11829
|
-
|
11830
|
-
ggml_fp16_t * const wdata = (ggml_fp16_t *) params->wdata + 0;
|
11831
|
-
ggml_fp16_t * const wdata_src = wdata + nk;
|
11832
|
-
|
11833
|
-
for (int i1 = ir0; i1 < ir1; i1++) {
|
11834
|
-
float * dst_data = (float *)((char *) dst->data + i1*nb1);
|
11835
|
-
ggml_fp16_t * wdata_kernel = wdata + i1*ne02*ne00;
|
11836
|
-
for (int i10 = 0; i10 < ne10; i10++) {
|
11837
|
-
const int i1n = i10*ne11;
|
11838
|
-
for (int i00 = 0; i00 < ne00; i00++) {
|
11839
|
-
float v = 0;
|
11840
|
-
ggml_vec_dot_f16(ne02, &v,
|
11841
|
-
(ggml_fp16_t *) wdata_src + i1n,
|
11842
|
-
(ggml_fp16_t *) wdata_kernel + i00*ne02);
|
11843
|
-
dst_data[i10*s0 + i00] += v;
|
11844
|
-
}
|
11845
|
-
}
|
11846
|
-
}
|
11847
|
-
}
|
11848
|
-
|
11849
|
-
static void ggml_compute_forward_conv_transpose_1d_f32(
|
11850
|
-
const struct ggml_compute_params * params,
|
11851
|
-
const struct ggml_tensor * src0,
|
11852
|
-
const struct ggml_tensor * src1,
|
11853
|
-
struct ggml_tensor * dst) {
|
11854
|
-
GGML_ASSERT(src0->type == GGML_TYPE_F32);
|
11855
|
-
GGML_ASSERT(src1->type == GGML_TYPE_F32);
|
11856
|
-
GGML_ASSERT( dst->type == GGML_TYPE_F32);
|
11857
|
-
|
11858
|
-
int64_t t0 = ggml_perf_time_us();
|
11859
|
-
UNUSED(t0);
|
11860
|
-
|
11861
|
-
GGML_TENSOR_BINARY_OP_LOCALS
|
11862
|
-
|
11863
|
-
const int ith = params->ith;
|
11864
|
-
const int nth = params->nth;
|
11865
|
-
|
11866
|
-
const int nk = ne00*ne01*ne02;
|
11480
|
+
const int nk = ne00*ne01*ne02;
|
11867
11481
|
|
11868
11482
|
GGML_ASSERT(nb00 == sizeof(float));
|
11869
11483
|
GGML_ASSERT(nb10 == sizeof(float));
|
@@ -11961,12 +11575,10 @@ static void ggml_compute_forward_conv_transpose_1d(
|
|
11961
11575
|
}
|
11962
11576
|
}
|
11963
11577
|
|
11964
|
-
// ggml_compute_forward_conv_2d
|
11965
|
-
|
11966
11578
|
// src0: kernel [OC, IC, KH, KW]
|
11967
11579
|
// src1: image [N, IC, IH, IW]
|
11968
11580
|
// dst: result [N, OH, OW, IC*KH*KW]
|
11969
|
-
static void
|
11581
|
+
static void ggml_compute_forward_im2col_f16(
|
11970
11582
|
const struct ggml_compute_params * params,
|
11971
11583
|
const struct ggml_tensor * src0,
|
11972
11584
|
const struct ggml_tensor * src1,
|
@@ -11980,34 +11592,35 @@ static void ggml_compute_forward_conv_2d_stage_0_f32(
|
|
11980
11592
|
|
11981
11593
|
GGML_TENSOR_BINARY_OP_LOCALS;
|
11982
11594
|
|
11983
|
-
const
|
11984
|
-
const
|
11985
|
-
const
|
11595
|
+
const int32_t s0 = ((const int32_t *)(dst->op_params))[0];
|
11596
|
+
const int32_t s1 = ((const int32_t *)(dst->op_params))[1];
|
11597
|
+
const int32_t p0 = ((const int32_t *)(dst->op_params))[2];
|
11598
|
+
const int32_t p1 = ((const int32_t *)(dst->op_params))[3];
|
11599
|
+
const int32_t d0 = ((const int32_t *)(dst->op_params))[4];
|
11600
|
+
const int32_t d1 = ((const int32_t *)(dst->op_params))[5];
|
11601
|
+
const bool is_2D = ((const int32_t *)(dst->op_params))[6] == 1;
|
11602
|
+
|
11603
|
+
const int ith = params->ith;
|
11604
|
+
const int nth = params->nth;
|
11605
|
+
|
11606
|
+
const int64_t N = is_2D ? ne13 : ne12;
|
11607
|
+
const int64_t IC = is_2D ? ne12 : ne11;
|
11608
|
+
const int64_t IH = is_2D ? ne11 : 1;
|
11986
11609
|
const int64_t IW = ne10;
|
11987
11610
|
|
11988
|
-
|
11989
|
-
// const int64_t IC = ne02;
|
11990
|
-
const int64_t KH = ne01;
|
11611
|
+
const int64_t KH = is_2D ? ne01 : 1;
|
11991
11612
|
const int64_t KW = ne00;
|
11992
11613
|
|
11993
|
-
const int64_t OH = ne2;
|
11614
|
+
const int64_t OH = is_2D ? ne2 : 1;
|
11994
11615
|
const int64_t OW = ne1;
|
11995
11616
|
|
11996
|
-
|
11997
|
-
|
11998
|
-
|
11999
|
-
const int32_t s0 = ((const int32_t*)(dst->op_params))[0];
|
12000
|
-
const int32_t s1 = ((const int32_t*)(dst->op_params))[1];
|
12001
|
-
const int32_t p0 = ((const int32_t*)(dst->op_params))[2];
|
12002
|
-
const int32_t p1 = ((const int32_t*)(dst->op_params))[3];
|
12003
|
-
const int32_t d0 = ((const int32_t*)(dst->op_params))[4];
|
12004
|
-
const int32_t d1 = ((const int32_t*)(dst->op_params))[5];
|
11617
|
+
int ofs0 = is_2D ? nb13 : nb12;
|
11618
|
+
int ofs1 = is_2D ? nb12 : nb11;
|
12005
11619
|
|
12006
11620
|
GGML_ASSERT(nb00 == sizeof(ggml_fp16_t));
|
12007
11621
|
GGML_ASSERT(nb10 == sizeof(float));
|
12008
11622
|
|
12009
11623
|
if (params->type == GGML_TASK_INIT) {
|
12010
|
-
memset(dst->data, 0, ggml_nbytes(dst));
|
12011
11624
|
return;
|
12012
11625
|
}
|
12013
11626
|
|
@@ -12020,20 +11633,22 @@ static void ggml_compute_forward_conv_2d_stage_0_f32(
|
|
12020
11633
|
ggml_fp16_t * const wdata = (ggml_fp16_t *) dst->data;
|
12021
11634
|
|
12022
11635
|
for (int64_t in = 0; in < N; in++) {
|
12023
|
-
for (int64_t ioh = 0; ioh < OH; ioh++) {
|
11636
|
+
for (int64_t ioh = 0; ioh < OH; ioh++) { // 1
|
12024
11637
|
for (int64_t iow = 0; iow < OW; iow++) {
|
12025
|
-
for (int64_t iic = ith; iic < IC; iic+=nth) {
|
11638
|
+
for (int64_t iic = ith; iic < IC; iic += nth) {
|
12026
11639
|
|
12027
11640
|
// micro kernel
|
12028
11641
|
ggml_fp16_t * dst_data = wdata + (in*OH*OW + ioh*OW + iow)*(IC*KH*KW); // [IC, KH, KW]
|
12029
|
-
const float * const src_data = (float *)((char *) src1->data + in*
|
11642
|
+
const float * const src_data = (float *)((char *) src1->data + in*ofs0 + iic*ofs1); // [IH, IW]
|
12030
11643
|
|
12031
|
-
for (int64_t ikh = 0; ikh < KH; ikh++) {
|
11644
|
+
for (int64_t ikh = 0; ikh < KH; ikh++) { // 1
|
12032
11645
|
for (int64_t ikw = 0; ikw < KW; ikw++) {
|
12033
11646
|
const int64_t iiw = iow*s0 + ikw*d0 - p0;
|
12034
11647
|
const int64_t iih = ioh*s1 + ikh*d1 - p1;
|
12035
11648
|
|
12036
|
-
if (
|
11649
|
+
if (iih < 0 || iih >= IH || iiw < 0 || iiw >= IW) {
|
11650
|
+
dst_data[iic*(KH*KW) + ikh*KW + ikw] = 0;
|
11651
|
+
} else {
|
12037
11652
|
dst_data[iic*(KH*KW) + ikh*KW + ikw] = GGML_FP32_TO_FP16(src_data[iih*IW + iiw]);
|
12038
11653
|
}
|
12039
11654
|
}
|
@@ -12045,180 +11660,7 @@ static void ggml_compute_forward_conv_2d_stage_0_f32(
|
|
12045
11660
|
}
|
12046
11661
|
}
|
12047
11662
|
|
12048
|
-
|
12049
|
-
// src0: [OC, IC, KH, KW]
|
12050
|
-
// src1: [N, OH, OW, IC * KH * KW]
|
12051
|
-
// result: [N, OC, OH, OW]
|
12052
|
-
static void ggml_compute_forward_conv_2d_stage_1_f16(
|
12053
|
-
const struct ggml_compute_params * params,
|
12054
|
-
const struct ggml_tensor * src0,
|
12055
|
-
const struct ggml_tensor * src1,
|
12056
|
-
struct ggml_tensor * dst) {
|
12057
|
-
GGML_ASSERT(src0->type == GGML_TYPE_F16);
|
12058
|
-
GGML_ASSERT(src1->type == GGML_TYPE_F16);
|
12059
|
-
GGML_ASSERT( dst->type == GGML_TYPE_F32);
|
12060
|
-
|
12061
|
-
int64_t t0 = ggml_perf_time_us();
|
12062
|
-
UNUSED(t0);
|
12063
|
-
|
12064
|
-
if (params->type == GGML_TASK_INIT) {
|
12065
|
-
return;
|
12066
|
-
}
|
12067
|
-
|
12068
|
-
if (params->type == GGML_TASK_FINALIZE) {
|
12069
|
-
return;
|
12070
|
-
}
|
12071
|
-
|
12072
|
-
GGML_TENSOR_BINARY_OP_LOCALS;
|
12073
|
-
|
12074
|
-
GGML_ASSERT(nb00 == sizeof(ggml_fp16_t));
|
12075
|
-
GGML_ASSERT(nb10 == sizeof(ggml_fp16_t));
|
12076
|
-
GGML_ASSERT(nb0 == sizeof(float));
|
12077
|
-
|
12078
|
-
const int N = ne13;
|
12079
|
-
const int OH = ne12;
|
12080
|
-
const int OW = ne11;
|
12081
|
-
|
12082
|
-
const int OC = ne03;
|
12083
|
-
const int IC = ne02;
|
12084
|
-
const int KH = ne01;
|
12085
|
-
const int KW = ne00;
|
12086
|
-
|
12087
|
-
const int ith = params->ith;
|
12088
|
-
const int nth = params->nth;
|
12089
|
-
|
12090
|
-
int64_t m = OC;
|
12091
|
-
int64_t n = OH * OW;
|
12092
|
-
int64_t k = IC * KH * KW;
|
12093
|
-
|
12094
|
-
// [N, OC, OH, OW] = [OC, IC * KH * KW] x [N*OH*OW, IC * KH * KW]
|
12095
|
-
for (int i = 0; i < N; i++) {
|
12096
|
-
ggml_fp16_t * A = (ggml_fp16_t *)src0->data; // [m, k]
|
12097
|
-
ggml_fp16_t * B = (ggml_fp16_t *)src1->data + i * m * k; // [n, k]
|
12098
|
-
float * C = (float *)dst->data + i * m * n; // [m, n]
|
12099
|
-
|
12100
|
-
gemm_f16_out_f32(m, n, k, A, B, C, ith, nth);
|
12101
|
-
}
|
12102
|
-
}
|
12103
|
-
|
12104
|
-
static void ggml_compute_forward_conv_2d_f16_f32(
|
12105
|
-
const struct ggml_compute_params * params,
|
12106
|
-
const struct ggml_tensor * src0,
|
12107
|
-
const struct ggml_tensor * src1,
|
12108
|
-
struct ggml_tensor * dst) {
|
12109
|
-
GGML_ASSERT(src0->type == GGML_TYPE_F16);
|
12110
|
-
GGML_ASSERT(src1->type == GGML_TYPE_F32);
|
12111
|
-
GGML_ASSERT( dst->type == GGML_TYPE_F32);
|
12112
|
-
|
12113
|
-
int64_t t0 = ggml_perf_time_us();
|
12114
|
-
UNUSED(t0);
|
12115
|
-
|
12116
|
-
GGML_TENSOR_BINARY_OP_LOCALS
|
12117
|
-
|
12118
|
-
// src1: image [N, IC, IH, IW]
|
12119
|
-
// src0: kernel [OC, IC, KH, KW]
|
12120
|
-
// dst: result [N, OC, OH, OW]
|
12121
|
-
// ne12: IC
|
12122
|
-
// ne0: OW
|
12123
|
-
// ne1: OH
|
12124
|
-
// nk0: KW
|
12125
|
-
// nk1: KH
|
12126
|
-
// ne13: N
|
12127
|
-
|
12128
|
-
const int N = ne13;
|
12129
|
-
const int IC = ne12;
|
12130
|
-
const int IH = ne11;
|
12131
|
-
const int IW = ne10;
|
12132
|
-
|
12133
|
-
const int OC = ne03;
|
12134
|
-
// const int IC = ne02;
|
12135
|
-
const int KH = ne01;
|
12136
|
-
const int KW = ne00;
|
12137
|
-
|
12138
|
-
const int OH = ne1;
|
12139
|
-
const int OW = ne0;
|
12140
|
-
|
12141
|
-
const int ith = params->ith;
|
12142
|
-
const int nth = params->nth;
|
12143
|
-
|
12144
|
-
// const int nk0 = ne00;
|
12145
|
-
// const int nk1 = ne01;
|
12146
|
-
|
12147
|
-
// size of the convolution row - the kernel size unrolled across all channels
|
12148
|
-
// const int ew0 = nk0*nk1*ne02;
|
12149
|
-
// ew0: IC*KH*KW
|
12150
|
-
|
12151
|
-
const int32_t s0 = ((const int32_t*)(dst->op_params))[0];
|
12152
|
-
const int32_t s1 = ((const int32_t*)(dst->op_params))[1];
|
12153
|
-
const int32_t p0 = ((const int32_t*)(dst->op_params))[2];
|
12154
|
-
const int32_t p1 = ((const int32_t*)(dst->op_params))[3];
|
12155
|
-
const int32_t d0 = ((const int32_t*)(dst->op_params))[4];
|
12156
|
-
const int32_t d1 = ((const int32_t*)(dst->op_params))[5];
|
12157
|
-
|
12158
|
-
GGML_ASSERT(nb00 == sizeof(ggml_fp16_t));
|
12159
|
-
GGML_ASSERT(nb10 == sizeof(float));
|
12160
|
-
|
12161
|
-
if (params->type == GGML_TASK_INIT) {
|
12162
|
-
memset(params->wdata, 0, params->wsize);
|
12163
|
-
|
12164
|
-
// prepare source data (src1)
|
12165
|
-
// im2col: [N, IC, IH, IW] => [N*OH*OW, IC*KH*KW]
|
12166
|
-
|
12167
|
-
{
|
12168
|
-
ggml_fp16_t * const wdata = (ggml_fp16_t *) params->wdata + 0;
|
12169
|
-
|
12170
|
-
for (int in = 0; in < N; in++) {
|
12171
|
-
for (int iic = 0; iic < IC; iic++) {
|
12172
|
-
for (int ioh = 0; ioh < OH; ioh++) {
|
12173
|
-
for (int iow = 0; iow < OW; iow++) {
|
12174
|
-
|
12175
|
-
// micro kernel
|
12176
|
-
ggml_fp16_t * dst_data = wdata + (in*OH*OW + ioh*OW + iow)*(IC*KH*KW); // [IC, KH, KW]
|
12177
|
-
const float * const src_data = (float *)((char *) src1->data + in*nb13 + iic*nb12); // [IH, IW]
|
12178
|
-
|
12179
|
-
for (int ikh = 0; ikh < KH; ikh++) {
|
12180
|
-
for (int ikw = 0; ikw < KW; ikw++) {
|
12181
|
-
const int iiw = iow*s0 + ikw*d0 - p0;
|
12182
|
-
const int iih = ioh*s1 + ikh*d1 - p1;
|
12183
|
-
|
12184
|
-
if (!(iih < 0 || iih >= IH || iiw < 0 || iiw >= IW)) {
|
12185
|
-
dst_data[iic*(KH*KW) + ikh*KW + ikw] = GGML_FP32_TO_FP16(src_data[iih*IW + iiw]);
|
12186
|
-
}
|
12187
|
-
}
|
12188
|
-
}
|
12189
|
-
}
|
12190
|
-
}
|
12191
|
-
}
|
12192
|
-
}
|
12193
|
-
}
|
12194
|
-
|
12195
|
-
return;
|
12196
|
-
}
|
12197
|
-
|
12198
|
-
if (params->type == GGML_TASK_FINALIZE) {
|
12199
|
-
return;
|
12200
|
-
}
|
12201
|
-
|
12202
|
-
ggml_fp16_t * const wdata = (ggml_fp16_t *) params->wdata + 0;
|
12203
|
-
// wdata: [N*OH*OW, IC*KH*KW]
|
12204
|
-
// dst: result [N, OC, OH, OW]
|
12205
|
-
// src0: kernel [OC, IC, KH, KW]
|
12206
|
-
|
12207
|
-
int64_t m = OC;
|
12208
|
-
int64_t n = OH * OW;
|
12209
|
-
int64_t k = IC * KH * KW;
|
12210
|
-
|
12211
|
-
// [N, OC, OH, OW] = [OC, IC * KH * KW] x [N*OH*OW, IC * KH * KW]
|
12212
|
-
for (int i = 0; i < N; i++) {
|
12213
|
-
ggml_fp16_t * A = (ggml_fp16_t *)src0->data; // [m, k]
|
12214
|
-
ggml_fp16_t * B = (ggml_fp16_t *)wdata + i * m * k; // [n, k]
|
12215
|
-
float * C = (float *)dst->data + i * m * n; // [m * k]
|
12216
|
-
|
12217
|
-
gemm_f16_out_f32(m, n, k, A, B, C, ith, nth);
|
12218
|
-
}
|
12219
|
-
}
|
12220
|
-
|
12221
|
-
static void ggml_compute_forward_conv_2d(
|
11663
|
+
static void ggml_compute_forward_im2col(
|
12222
11664
|
const struct ggml_compute_params * params,
|
12223
11665
|
const struct ggml_tensor * src0,
|
12224
11666
|
const struct ggml_tensor * src1,
|
@@ -12226,50 +11668,7 @@ static void ggml_compute_forward_conv_2d(
|
|
12226
11668
|
switch (src0->type) {
|
12227
11669
|
case GGML_TYPE_F16:
|
12228
11670
|
{
|
12229
|
-
|
12230
|
-
} break;
|
12231
|
-
case GGML_TYPE_F32:
|
12232
|
-
{
|
12233
|
-
//ggml_compute_forward_conv_2d_f32(params, src0, src1, dst);
|
12234
|
-
GGML_ASSERT(false);
|
12235
|
-
} break;
|
12236
|
-
default:
|
12237
|
-
{
|
12238
|
-
GGML_ASSERT(false);
|
12239
|
-
} break;
|
12240
|
-
}
|
12241
|
-
}
|
12242
|
-
|
12243
|
-
static void ggml_compute_forward_conv_2d_stage_0(
|
12244
|
-
const struct ggml_compute_params * params,
|
12245
|
-
const struct ggml_tensor * src0,
|
12246
|
-
const struct ggml_tensor * src1,
|
12247
|
-
struct ggml_tensor * dst) {
|
12248
|
-
switch (src0->type) {
|
12249
|
-
case GGML_TYPE_F16:
|
12250
|
-
{
|
12251
|
-
ggml_compute_forward_conv_2d_stage_0_f32(params, src0, src1, dst);
|
12252
|
-
} break;
|
12253
|
-
case GGML_TYPE_F32:
|
12254
|
-
{
|
12255
|
-
GGML_ASSERT(false);
|
12256
|
-
} break;
|
12257
|
-
default:
|
12258
|
-
{
|
12259
|
-
GGML_ASSERT(false);
|
12260
|
-
} break;
|
12261
|
-
}
|
12262
|
-
}
|
12263
|
-
|
12264
|
-
static void ggml_compute_forward_conv_2d_stage_1(
|
12265
|
-
const struct ggml_compute_params * params,
|
12266
|
-
const struct ggml_tensor * src0,
|
12267
|
-
const struct ggml_tensor * src1,
|
12268
|
-
struct ggml_tensor * dst) {
|
12269
|
-
switch (src0->type) {
|
12270
|
-
case GGML_TYPE_F16:
|
12271
|
-
{
|
12272
|
-
ggml_compute_forward_conv_2d_stage_1_f16(params, src0, src1, dst);
|
11671
|
+
ggml_compute_forward_im2col_f16(params, src0, src1, dst);
|
12273
11672
|
} break;
|
12274
11673
|
case GGML_TYPE_F32:
|
12275
11674
|
{
|
@@ -12454,14 +11853,11 @@ static void ggml_compute_forward_pool_1d(
|
|
12454
11853
|
ggml_compute_forward_pool_1d_sk_p0(params, op, src0, k0, dst);
|
12455
11854
|
}
|
12456
11855
|
|
12457
|
-
//
|
11856
|
+
// ggml_compute_forward_pool_2d
|
12458
11857
|
|
12459
|
-
static void
|
11858
|
+
static void ggml_compute_forward_pool_2d(
|
12460
11859
|
const struct ggml_compute_params * params,
|
12461
|
-
const enum ggml_op_pool op,
|
12462
11860
|
const struct ggml_tensor * src,
|
12463
|
-
const int k0,
|
12464
|
-
const int k1,
|
12465
11861
|
struct ggml_tensor * dst) {
|
12466
11862
|
assert(src->type == GGML_TYPE_F32);
|
12467
11863
|
assert(params->ith == 0);
|
@@ -12470,6 +11866,14 @@ static void ggml_compute_forward_pool_2d_sk_p0(
|
|
12470
11866
|
return;
|
12471
11867
|
}
|
12472
11868
|
|
11869
|
+
const int32_t * opts = (const int32_t *)dst->op_params;
|
11870
|
+
enum ggml_op_pool op = opts[0];
|
11871
|
+
const int k0 = opts[1];
|
11872
|
+
const int k1 = opts[2];
|
11873
|
+
const int s0 = opts[3];
|
11874
|
+
const int s1 = opts[4];
|
11875
|
+
const int p0 = opts[5];
|
11876
|
+
const int p1 = opts[6];
|
12473
11877
|
const char * cdata = (const char*)src->data;
|
12474
11878
|
const char * const data_end = cdata + ggml_nbytes(src);
|
12475
11879
|
|
@@ -12480,6 +11884,8 @@ static void ggml_compute_forward_pool_2d_sk_p0(
|
|
12480
11884
|
float * dplane = (float *)dst->data;
|
12481
11885
|
|
12482
11886
|
const int ka = k0 * k1;
|
11887
|
+
const int offset0 = -p0;
|
11888
|
+
const int offset1 = -p1;
|
12483
11889
|
|
12484
11890
|
while (cdata < data_end) {
|
12485
11891
|
for (int oy = 0; oy < py; ++oy) {
|
@@ -12492,13 +11898,15 @@ static void ggml_compute_forward_pool_2d_sk_p0(
|
|
12492
11898
|
case GGML_OP_POOL_COUNT: GGML_ASSERT(false); break;
|
12493
11899
|
}
|
12494
11900
|
|
12495
|
-
const int ix = ox *
|
12496
|
-
const int iy = oy *
|
11901
|
+
const int ix = offset0 + ox * s0;
|
11902
|
+
const int iy = offset1 + oy * s1;
|
12497
11903
|
|
12498
11904
|
for (int ky = 0; ky < k1; ++ky) {
|
11905
|
+
if (iy + ky < 0 || iy + ky >= src->ne[1]) continue;
|
12499
11906
|
const float * const srow = (const float *)(cdata + src->nb[1] * (iy + ky));
|
12500
11907
|
for (int kx = 0; kx < k0; ++kx) {
|
12501
11908
|
int j = ix + kx;
|
11909
|
+
if (j < 0 || j >= src->ne[0]) continue;
|
12502
11910
|
switch (op) {
|
12503
11911
|
case GGML_OP_POOL_AVG: *out += srow[j]; break;
|
12504
11912
|
case GGML_OP_POOL_MAX: if (srow[j] > *out) *out = srow[j]; break;
|
@@ -12519,29 +11927,6 @@ static void ggml_compute_forward_pool_2d_sk_p0(
|
|
12519
11927
|
}
|
12520
11928
|
}
|
12521
11929
|
|
12522
|
-
// ggml_compute_forward_pool_2d
|
12523
|
-
|
12524
|
-
static void ggml_compute_forward_pool_2d(
|
12525
|
-
const struct ggml_compute_params * params,
|
12526
|
-
const struct ggml_tensor * src0,
|
12527
|
-
struct ggml_tensor * dst) {
|
12528
|
-
|
12529
|
-
const int32_t * opts = (const int32_t *)dst->op_params;
|
12530
|
-
enum ggml_op_pool op = opts[0];
|
12531
|
-
const int k0 = opts[1];
|
12532
|
-
const int k1 = opts[2];
|
12533
|
-
const int s0 = opts[3];
|
12534
|
-
const int s1 = opts[4];
|
12535
|
-
const int p0 = opts[5];
|
12536
|
-
const int p1 = opts[6];
|
12537
|
-
GGML_ASSERT(p0 == 0);
|
12538
|
-
GGML_ASSERT(p1 == 0); // padding not supported
|
12539
|
-
GGML_ASSERT(k0 == s0);
|
12540
|
-
GGML_ASSERT(k1 == s1); // only s = k supported
|
12541
|
-
|
12542
|
-
ggml_compute_forward_pool_2d_sk_p0(params, op, src0, k0, k1, dst);
|
12543
|
-
}
|
12544
|
-
|
12545
11930
|
// ggml_compute_forward_upscale
|
12546
11931
|
|
12547
11932
|
static void ggml_compute_forward_upscale_f32(
|
@@ -13743,6 +13128,10 @@ static void ggml_compute_forward_unary(
|
|
13743
13128
|
{
|
13744
13129
|
ggml_compute_forward_silu(params, src0, dst);
|
13745
13130
|
} break;
|
13131
|
+
case GGML_UNARY_OP_LEAKY:
|
13132
|
+
{
|
13133
|
+
ggml_compute_forward_leaky(params, src0, dst);
|
13134
|
+
} break;
|
13746
13135
|
default:
|
13747
13136
|
{
|
13748
13137
|
GGML_ASSERT(false);
|
@@ -14496,33 +13885,13 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm
|
|
14496
13885
|
{
|
14497
13886
|
ggml_compute_forward_clamp(params, tensor->src[0], tensor);
|
14498
13887
|
} break;
|
14499
|
-
case GGML_OP_CONV_1D:
|
14500
|
-
{
|
14501
|
-
ggml_compute_forward_conv_1d(params, tensor->src[0], tensor->src[1], tensor);
|
14502
|
-
} break;
|
14503
|
-
case GGML_OP_CONV_1D_STAGE_0:
|
14504
|
-
{
|
14505
|
-
ggml_compute_forward_conv_1d_stage_0(params, tensor->src[0], tensor->src[1], tensor);
|
14506
|
-
} break;
|
14507
|
-
case GGML_OP_CONV_1D_STAGE_1:
|
14508
|
-
{
|
14509
|
-
ggml_compute_forward_conv_1d_stage_1(params, tensor->src[0], tensor->src[1], tensor);
|
14510
|
-
} break;
|
14511
13888
|
case GGML_OP_CONV_TRANSPOSE_1D:
|
14512
13889
|
{
|
14513
13890
|
ggml_compute_forward_conv_transpose_1d(params, tensor->src[0], tensor->src[1], tensor);
|
14514
13891
|
} break;
|
14515
|
-
case
|
13892
|
+
case GGML_OP_IM2COL:
|
14516
13893
|
{
|
14517
|
-
|
14518
|
-
} break;
|
14519
|
-
case GGML_OP_CONV_2D_STAGE_0:
|
14520
|
-
{
|
14521
|
-
ggml_compute_forward_conv_2d_stage_0(params, tensor->src[0], tensor->src[1], tensor);
|
14522
|
-
} break;
|
14523
|
-
case GGML_OP_CONV_2D_STAGE_1:
|
14524
|
-
{
|
14525
|
-
ggml_compute_forward_conv_2d_stage_1(params, tensor->src[0], tensor->src[1], tensor);
|
13894
|
+
ggml_compute_forward_im2col(params, tensor->src[0], tensor->src[1], tensor);
|
14526
13895
|
} break;
|
14527
13896
|
case GGML_OP_CONV_TRANSPOSE_2D:
|
14528
13897
|
{
|
@@ -14651,62 +14020,109 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm
|
|
14651
14020
|
|
14652
14021
|
////////////////////////////////////////////////////////////////////////////////
|
14653
14022
|
|
14654
|
-
|
14023
|
+
static size_t ggml_hash_size(size_t min_sz) {
|
14024
|
+
// next primes after powers of two
|
14025
|
+
static const size_t primes[] = {
|
14026
|
+
2, 3, 5, 11, 17, 37, 67, 131, 257, 521, 1031,
|
14027
|
+
2053, 4099, 8209, 16411, 32771, 65537, 131101,
|
14028
|
+
262147, 524309, 1048583, 2097169, 4194319, 8388617,
|
14029
|
+
16777259, 33554467, 67108879, 134217757, 268435459,
|
14030
|
+
536870923, 1073741827, 2147483659
|
14031
|
+
};
|
14032
|
+
static const size_t n_primes = sizeof(primes)/sizeof(primes[0]);
|
14033
|
+
|
14034
|
+
// find the smallest prime that is larger or equal to min_sz
|
14035
|
+
size_t l = 0;
|
14036
|
+
size_t r = n_primes;
|
14037
|
+
while (l < r) {
|
14038
|
+
size_t m = (l + r)/2;
|
14039
|
+
if (primes[m] < min_sz) {
|
14040
|
+
l = m + 1;
|
14041
|
+
} else {
|
14042
|
+
r = m;
|
14043
|
+
}
|
14044
|
+
}
|
14045
|
+
size_t sz = l < n_primes ? primes[l] : min_sz | 1;
|
14046
|
+
return sz;
|
14047
|
+
}
|
14655
14048
|
|
14656
|
-
static size_t
|
14657
|
-
return (size_t)p
|
14049
|
+
static size_t ggml_hash(const void * p) {
|
14050
|
+
return (size_t)p;
|
14658
14051
|
}
|
14659
14052
|
|
14660
|
-
|
14661
|
-
size_t h =
|
14053
|
+
size_t ggml_hash_find(const struct ggml_hash_set hash_set, struct ggml_tensor * key) {
|
14054
|
+
size_t h = ggml_hash(key) % hash_set.size;
|
14662
14055
|
|
14663
14056
|
// linear probing
|
14664
14057
|
size_t i = h;
|
14665
|
-
while (
|
14666
|
-
i = (i + 1) %
|
14058
|
+
while (hash_set.keys[i] != NULL && hash_set.keys[i] != key) {
|
14059
|
+
i = (i + 1) % hash_set.size;
|
14667
14060
|
if (i == h) {
|
14668
14061
|
// visited all hash table entries -> not found
|
14669
|
-
return
|
14062
|
+
return GGML_HASHTABLE_FULL;
|
14670
14063
|
}
|
14671
14064
|
}
|
14672
14065
|
return i;
|
14673
14066
|
}
|
14674
14067
|
|
14675
|
-
|
14676
|
-
size_t i =
|
14068
|
+
bool ggml_hash_contains(struct ggml_hash_set hash_set, struct ggml_tensor * key) {
|
14069
|
+
size_t i = ggml_hash_find(hash_set, key);
|
14070
|
+
return i != GGML_HASHTABLE_FULL && hash_set.keys[i] == key;
|
14071
|
+
}
|
14072
|
+
|
14073
|
+
size_t ggml_hash_insert(struct ggml_hash_set hash_set, struct ggml_tensor * key) {
|
14074
|
+
size_t i = ggml_hash_find(hash_set, key);
|
14677
14075
|
|
14678
|
-
GGML_ASSERT(i
|
14076
|
+
GGML_ASSERT(i != GGML_HASHTABLE_FULL);
|
14679
14077
|
|
14680
|
-
if (
|
14681
|
-
return
|
14078
|
+
if (hash_set.keys[i] == key) {
|
14079
|
+
return GGML_HASHTABLE_ALREADY_EXISTS;
|
14682
14080
|
}
|
14683
14081
|
|
14684
14082
|
// insert
|
14685
|
-
GGML_ASSERT(
|
14686
|
-
|
14687
|
-
return
|
14083
|
+
GGML_ASSERT(hash_set.keys[i] == NULL);
|
14084
|
+
hash_set.keys[i] = key;
|
14085
|
+
return i;
|
14086
|
+
}
|
14087
|
+
|
14088
|
+
size_t ggml_hash_find_or_insert(struct ggml_hash_set hash_set, struct ggml_tensor * key) {
|
14089
|
+
size_t i = ggml_hash_find(hash_set, key);
|
14090
|
+
|
14091
|
+
GGML_ASSERT(i != GGML_HASHTABLE_FULL);
|
14092
|
+
|
14093
|
+
hash_set.keys[i] = key;
|
14094
|
+
return i;
|
14095
|
+
}
|
14096
|
+
|
14097
|
+
static struct ggml_hash_set ggml_hash_set_new(size_t size) {
|
14098
|
+
size = ggml_hash_size(size);
|
14099
|
+
struct ggml_hash_set result;
|
14100
|
+
result.size = size;
|
14101
|
+
result.keys = malloc(sizeof(struct ggml_tensor *) * size);
|
14102
|
+
memset(result.keys, 0, sizeof(struct ggml_tensor *) * size);
|
14103
|
+
return result;
|
14688
14104
|
}
|
14689
14105
|
|
14690
|
-
static
|
14691
|
-
|
14692
|
-
return (i < GGML_GRAPH_HASHTABLE_SIZE) && (hash_table[i] == p);
|
14106
|
+
static void ggml_hash_set_free(struct ggml_hash_set hash_set) {
|
14107
|
+
free(hash_set.keys);
|
14693
14108
|
}
|
14694
14109
|
|
14695
14110
|
struct hash_map {
|
14696
|
-
|
14697
|
-
|
14111
|
+
struct ggml_hash_set set;
|
14112
|
+
struct ggml_tensor ** vals;
|
14698
14113
|
};
|
14699
14114
|
|
14700
|
-
static struct hash_map *
|
14115
|
+
static struct hash_map * ggml_new_hash_map(size_t size) {
|
14701
14116
|
struct hash_map * result = malloc(sizeof(struct hash_map));
|
14702
|
-
|
14703
|
-
|
14704
|
-
|
14705
|
-
}
|
14117
|
+
result->set = ggml_hash_set_new(size);
|
14118
|
+
result->vals = malloc(sizeof(struct ggml_tensor *) * result->set.size);
|
14119
|
+
memset(result->vals, 0, sizeof(struct ggml_tensor *) * result->set.size);
|
14706
14120
|
return result;
|
14707
14121
|
}
|
14708
14122
|
|
14709
|
-
static void
|
14123
|
+
static void ggml_hash_map_free(struct hash_map * map) {
|
14124
|
+
ggml_hash_set_free(map->set);
|
14125
|
+
free(map->vals);
|
14710
14126
|
free(map);
|
14711
14127
|
}
|
14712
14128
|
|
@@ -14726,7 +14142,7 @@ static struct ggml_tensor * ggml_recompute_graph_node(
|
|
14726
14142
|
return node;
|
14727
14143
|
}
|
14728
14144
|
|
14729
|
-
if (!
|
14145
|
+
if (!ggml_hash_contains(graph->visited_hash_table, node)) {
|
14730
14146
|
return node;
|
14731
14147
|
}
|
14732
14148
|
|
@@ -14741,17 +14157,17 @@ static struct ggml_tensor * ggml_recompute_graph_node(
|
|
14741
14157
|
return node;
|
14742
14158
|
}
|
14743
14159
|
|
14744
|
-
size_t i =
|
14745
|
-
GGML_ASSERT(i
|
14746
|
-
if (replacements->keys[i] == node) {
|
14747
|
-
return
|
14160
|
+
size_t i = ggml_hash_find(replacements->set, node);
|
14161
|
+
GGML_ASSERT(i != GGML_HASHTABLE_FULL); // assert that not full
|
14162
|
+
if (replacements->set.keys[i] == node) {
|
14163
|
+
return replacements->vals[i];
|
14748
14164
|
}
|
14749
14165
|
|
14750
14166
|
struct ggml_tensor * clone = ggml_new_tensor(ctx, node->type, node->n_dims, node->ne);
|
14751
14167
|
|
14752
14168
|
// insert clone into replacements
|
14753
|
-
GGML_ASSERT(replacements->keys[i] == NULL); // assert that we don't overwrite
|
14754
|
-
replacements->keys[i] = node;
|
14169
|
+
GGML_ASSERT(replacements->set.keys[i] == NULL); // assert that we don't overwrite
|
14170
|
+
replacements->set.keys[i] = node;
|
14755
14171
|
replacements->vals[i] = clone;
|
14756
14172
|
|
14757
14173
|
clone->op = node->op;
|
@@ -14788,26 +14204,26 @@ void ggml_build_backward_gradient_checkpointing(
|
|
14788
14204
|
struct ggml_cgraph * gb_tmp,
|
14789
14205
|
struct ggml_tensor * * checkpoints,
|
14790
14206
|
int n_checkpoints) {
|
14791
|
-
|
14207
|
+
ggml_graph_cpy(gf, gb_tmp);
|
14792
14208
|
ggml_build_backward_expand(ctx, gf, gb_tmp, true);
|
14793
14209
|
|
14794
14210
|
if (n_checkpoints <= 0) {
|
14795
|
-
|
14211
|
+
ggml_graph_cpy(gb_tmp, gb);
|
14796
14212
|
return;
|
14797
14213
|
}
|
14798
14214
|
|
14799
|
-
struct hash_map * replacements =
|
14215
|
+
struct hash_map * replacements = ggml_new_hash_map(gf->n_nodes + gf->n_leafs + n_checkpoints);
|
14800
14216
|
|
14801
14217
|
// insert checkpoints in replacements
|
14802
14218
|
for (int i = 0; i < n_checkpoints; ++i) {
|
14803
|
-
size_t k =
|
14804
|
-
GGML_ASSERT(k
|
14805
|
-
GGML_ASSERT(replacements->keys[k] == NULL); // assert that we don't overwrite
|
14806
|
-
replacements->keys[k] = checkpoints[i];
|
14807
|
-
replacements->vals[k]
|
14219
|
+
size_t k = ggml_hash_find(replacements->set, checkpoints[i]);
|
14220
|
+
GGML_ASSERT(k != GGML_HASHTABLE_FULL); // assert that not full
|
14221
|
+
GGML_ASSERT(replacements->set.keys[k] == NULL); // assert that we don't overwrite
|
14222
|
+
replacements->set.keys[k] = checkpoints[i];
|
14223
|
+
replacements->vals[k] = checkpoints[i];
|
14808
14224
|
}
|
14809
14225
|
|
14810
|
-
|
14226
|
+
ggml_graph_cpy(gf, gb);
|
14811
14227
|
// rewrite gb_tmp->nodes[gf->n_nodes:gb_tmp->n_nodes],
|
14812
14228
|
// replacing references to gb_tmp->nodes[0:gf->n_nodes] ( == gf->nodes[0:gf->n_nodes]),
|
14813
14229
|
// by recomputing them from checkpoints
|
@@ -14824,21 +14240,21 @@ void ggml_build_backward_gradient_checkpointing(
|
|
14824
14240
|
ggml_build_forward_expand(gb, node);
|
14825
14241
|
}
|
14826
14242
|
|
14827
|
-
|
14243
|
+
ggml_hash_map_free(replacements);
|
14828
14244
|
}
|
14829
14245
|
|
14830
14246
|
// functions to change gradients considering the case that input a might be initial gradient with zero value
|
14831
14247
|
|
14832
|
-
static struct ggml_tensor * ggml_add_or_set(struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b,
|
14833
|
-
if (
|
14248
|
+
static struct ggml_tensor * ggml_add_or_set(struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b, struct ggml_hash_set zero_table) {
|
14249
|
+
if (ggml_hash_contains(zero_table, a)) {
|
14834
14250
|
return b;
|
14835
14251
|
} else {
|
14836
14252
|
return ggml_add_impl(ctx, a, b, false);
|
14837
14253
|
}
|
14838
14254
|
}
|
14839
14255
|
|
14840
|
-
static struct ggml_tensor * ggml_acc_or_set(struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b, size_t nb1, size_t nb2, size_t nb3, size_t offset,
|
14841
|
-
if (
|
14256
|
+
static struct ggml_tensor * ggml_acc_or_set(struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b, size_t nb1, size_t nb2, size_t nb3, size_t offset, struct ggml_hash_set zero_table) {
|
14257
|
+
if (ggml_hash_contains(zero_table, a)) {
|
14842
14258
|
struct ggml_tensor * a_zero = ggml_scale(ctx, a, ggml_new_f32(ctx, 0));
|
14843
14259
|
return ggml_acc_impl(ctx, a_zero, b, nb1, nb2, nb3, offset, false);
|
14844
14260
|
} else {
|
@@ -14846,23 +14262,23 @@ static struct ggml_tensor * ggml_acc_or_set(struct ggml_context * ctx, struct gg
|
|
14846
14262
|
}
|
14847
14263
|
}
|
14848
14264
|
|
14849
|
-
static struct ggml_tensor * ggml_add1_or_set(struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b,
|
14850
|
-
if (
|
14265
|
+
static struct ggml_tensor * ggml_add1_or_set(struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b, struct ggml_hash_set zero_table) {
|
14266
|
+
if (ggml_hash_contains(zero_table, a)) {
|
14851
14267
|
return ggml_repeat(ctx, b, a);
|
14852
14268
|
} else {
|
14853
14269
|
return ggml_add1_impl(ctx, a, b, false);
|
14854
14270
|
}
|
14855
14271
|
}
|
14856
14272
|
|
14857
|
-
static struct ggml_tensor * ggml_sub_or_set(struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b,
|
14858
|
-
if (
|
14273
|
+
static struct ggml_tensor * ggml_sub_or_set(struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b, struct ggml_hash_set zero_table) {
|
14274
|
+
if (ggml_hash_contains(zero_table, a)) {
|
14859
14275
|
return ggml_neg(ctx, b);
|
14860
14276
|
} else {
|
14861
14277
|
return ggml_sub_impl(ctx, a, b, false);
|
14862
14278
|
}
|
14863
14279
|
}
|
14864
14280
|
|
14865
|
-
static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor * tensor,
|
14281
|
+
static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor * tensor, struct ggml_hash_set zero_table) {
|
14866
14282
|
struct ggml_tensor * src0 = tensor->src[0];
|
14867
14283
|
struct ggml_tensor * src1 = tensor->src[1];
|
14868
14284
|
|
@@ -15457,31 +14873,11 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor
|
|
15457
14873
|
{
|
15458
14874
|
GGML_ASSERT(false); // TODO: not implemented
|
15459
14875
|
} break;
|
15460
|
-
case GGML_OP_CONV_1D:
|
15461
|
-
{
|
15462
|
-
GGML_ASSERT(false); // TODO: not implemented
|
15463
|
-
} break;
|
15464
|
-
case GGML_OP_CONV_1D_STAGE_0:
|
15465
|
-
{
|
15466
|
-
GGML_ASSERT(false); // TODO: not implemented
|
15467
|
-
} break;
|
15468
|
-
case GGML_OP_CONV_1D_STAGE_1:
|
15469
|
-
{
|
15470
|
-
GGML_ASSERT(false); // TODO: not implemented
|
15471
|
-
} break;
|
15472
14876
|
case GGML_OP_CONV_TRANSPOSE_1D:
|
15473
14877
|
{
|
15474
14878
|
GGML_ASSERT(false); // TODO: not implemented
|
15475
14879
|
} break;
|
15476
|
-
case
|
15477
|
-
{
|
15478
|
-
GGML_ASSERT(false); // TODO: not implemented
|
15479
|
-
} break;
|
15480
|
-
case GGML_OP_CONV_2D_STAGE_0:
|
15481
|
-
{
|
15482
|
-
GGML_ASSERT(false); // TODO: not implemented
|
15483
|
-
} break;
|
15484
|
-
case GGML_OP_CONV_2D_STAGE_1:
|
14880
|
+
case GGML_OP_IM2COL:
|
15485
14881
|
{
|
15486
14882
|
GGML_ASSERT(false); // TODO: not implemented
|
15487
14883
|
} break;
|
@@ -15695,7 +15091,7 @@ static void ggml_visit_parents(struct ggml_cgraph * cgraph, struct ggml_tensor *
|
|
15695
15091
|
}
|
15696
15092
|
|
15697
15093
|
// check if already visited
|
15698
|
-
if (
|
15094
|
+
if (ggml_hash_insert(cgraph->visited_hash_table, node) == GGML_HASHTABLE_ALREADY_EXISTS) {
|
15699
15095
|
return;
|
15700
15096
|
}
|
15701
15097
|
|
@@ -15711,7 +15107,7 @@ static void ggml_visit_parents(struct ggml_cgraph * cgraph, struct ggml_tensor *
|
|
15711
15107
|
|
15712
15108
|
if (node->op == GGML_OP_NONE && node->grad == NULL) {
|
15713
15109
|
// reached a leaf node, not part of the gradient graph (e.g. a constant)
|
15714
|
-
GGML_ASSERT(cgraph->n_leafs <
|
15110
|
+
GGML_ASSERT(cgraph->n_leafs < cgraph->size);
|
15715
15111
|
|
15716
15112
|
if (strlen(node->name) == 0) {
|
15717
15113
|
ggml_format_name(node, "leaf_%d", cgraph->n_leafs);
|
@@ -15720,22 +15116,24 @@ static void ggml_visit_parents(struct ggml_cgraph * cgraph, struct ggml_tensor *
|
|
15720
15116
|
cgraph->leafs[cgraph->n_leafs] = node;
|
15721
15117
|
cgraph->n_leafs++;
|
15722
15118
|
} else {
|
15723
|
-
GGML_ASSERT(cgraph->n_nodes <
|
15119
|
+
GGML_ASSERT(cgraph->n_nodes < cgraph->size);
|
15724
15120
|
|
15725
15121
|
if (strlen(node->name) == 0) {
|
15726
15122
|
ggml_format_name(node, "node_%d", cgraph->n_nodes);
|
15727
15123
|
}
|
15728
15124
|
|
15729
15125
|
cgraph->nodes[cgraph->n_nodes] = node;
|
15730
|
-
cgraph->grads
|
15126
|
+
if (cgraph->grads) {
|
15127
|
+
cgraph->grads[cgraph->n_nodes] = node->grad;
|
15128
|
+
}
|
15731
15129
|
cgraph->n_nodes++;
|
15732
15130
|
}
|
15733
15131
|
}
|
15734
15132
|
|
15735
15133
|
static void ggml_build_forward_impl(struct ggml_cgraph * cgraph, struct ggml_tensor * tensor, bool expand) {
|
15736
15134
|
if (!expand) {
|
15737
|
-
|
15738
|
-
cgraph
|
15135
|
+
// TODO: this branch isn't accessible anymore, maybe move this to ggml_build_forward_expand
|
15136
|
+
ggml_graph_clear(cgraph);
|
15739
15137
|
}
|
15740
15138
|
|
15741
15139
|
const int n0 = cgraph->n_nodes;
|
@@ -15756,25 +15154,6 @@ void ggml_build_forward_expand(struct ggml_cgraph * cgraph, struct ggml_tensor *
|
|
15756
15154
|
ggml_build_forward_impl(cgraph, tensor, true);
|
15757
15155
|
}
|
15758
15156
|
|
15759
|
-
struct ggml_cgraph ggml_build_forward(struct ggml_tensor * tensor) {
|
15760
|
-
struct ggml_cgraph result = {
|
15761
|
-
/*.n_nodes =*/ 0,
|
15762
|
-
/*.n_leafs =*/ 0,
|
15763
|
-
/*.nodes =*/ { NULL },
|
15764
|
-
/*.grads =*/ { NULL },
|
15765
|
-
/*.leafs =*/ { NULL },
|
15766
|
-
/*.hash_table =*/ { NULL },
|
15767
|
-
/*.order =*/ GGML_CGRAPH_EVAL_ORDER_LEFT_TO_RIGHT,
|
15768
|
-
/*.perf_runs =*/ 0,
|
15769
|
-
/*.perf_cycles =*/ 0,
|
15770
|
-
/*.perf_time_us =*/ 0,
|
15771
|
-
};
|
15772
|
-
|
15773
|
-
ggml_build_forward_impl(&result, tensor, false);
|
15774
|
-
|
15775
|
-
return result;
|
15776
|
-
}
|
15777
|
-
|
15778
15157
|
void ggml_build_backward_expand(struct ggml_context * ctx, struct ggml_cgraph * gf, struct ggml_cgraph * gb, bool keep) {
|
15779
15158
|
GGML_ASSERT(gf->n_nodes > 0);
|
15780
15159
|
|
@@ -15791,11 +15170,10 @@ void ggml_build_backward_expand(struct ggml_context * ctx, struct ggml_cgraph *
|
|
15791
15170
|
}
|
15792
15171
|
|
15793
15172
|
// remember original gradients which start with zero values
|
15794
|
-
|
15795
|
-
memset(zero_table, 0, sizeof(void*) * GGML_GRAPH_HASHTABLE_SIZE);
|
15173
|
+
struct ggml_hash_set zero_table = ggml_hash_set_new(gf->size);
|
15796
15174
|
for (int i = 0; i < gf->n_nodes; i++) {
|
15797
15175
|
if (gf->grads[i]) {
|
15798
|
-
|
15176
|
+
ggml_hash_insert(zero_table, gf->grads[i]);
|
15799
15177
|
}
|
15800
15178
|
}
|
15801
15179
|
|
@@ -15818,26 +15196,54 @@ void ggml_build_backward_expand(struct ggml_context * ctx, struct ggml_cgraph *
|
|
15818
15196
|
}
|
15819
15197
|
}
|
15820
15198
|
|
15821
|
-
|
15199
|
+
ggml_hash_set_free(zero_table);
|
15822
15200
|
}
|
15823
15201
|
|
15824
|
-
|
15825
|
-
|
15826
|
-
|
15827
|
-
|
15202
|
+
static size_t ggml_graph_nbytes(size_t size, bool grads) {
|
15203
|
+
size_t nbytes = sizeof(struct ggml_cgraph);
|
15204
|
+
nbytes += size * sizeof(struct ggml_tensor *) * 2; // leafs + nodes
|
15205
|
+
if (grads) {
|
15206
|
+
nbytes += size * sizeof(struct ggml_tensor *); // grads
|
15207
|
+
}
|
15208
|
+
nbytes += ggml_hash_size(size * 2) * sizeof(struct ggml_tensor *); // hash set
|
15209
|
+
return nbytes;
|
15828
15210
|
}
|
15829
15211
|
|
15830
|
-
|
15831
|
-
|
15212
|
+
size_t ggml_graph_overhead_custom(size_t size, bool grads) {
|
15213
|
+
return GGML_OBJECT_SIZE + GGML_PAD(ggml_graph_nbytes(size, grads), GGML_MEM_ALIGN);
|
15214
|
+
}
|
15215
|
+
|
15216
|
+
size_t ggml_graph_overhead(void) {
|
15217
|
+
return ggml_graph_overhead_custom(GGML_DEFAULT_GRAPH_SIZE, false);
|
15218
|
+
}
|
15219
|
+
|
15220
|
+
struct ggml_cgraph * ggml_new_graph_custom(struct ggml_context * ctx, size_t size, bool grads) {
|
15221
|
+
const size_t obj_size = ggml_graph_nbytes(size, grads);
|
15222
|
+
struct ggml_object * obj = ggml_new_object(ctx, GGML_OBJECT_GRAPH, obj_size);
|
15832
15223
|
struct ggml_cgraph * cgraph = (struct ggml_cgraph *) ((char *) ctx->mem_buffer + obj->offs);
|
15833
15224
|
|
15225
|
+
struct ggml_tensor ** data_start = (struct ggml_tensor **) (cgraph + 1);
|
15226
|
+
|
15227
|
+
size_t hash_size = ggml_hash_size(size * 2);
|
15228
|
+
struct ggml_tensor ** nodes_ptr = data_start;
|
15229
|
+
struct ggml_tensor ** leafs_ptr = nodes_ptr + size;
|
15230
|
+
struct ggml_tensor ** hash_keys_ptr = leafs_ptr + size;
|
15231
|
+
struct ggml_tensor ** grads_ptr = grads ? hash_keys_ptr + hash_size : NULL;
|
15232
|
+
|
15233
|
+
// check that we allocated the correct amount of memory
|
15234
|
+
assert(obj_size == (size_t) (
|
15235
|
+
(grads ? (char *)(grads_ptr + size) : (char *)(hash_keys_ptr + hash_size)) - (char *)cgraph));
|
15236
|
+
|
15237
|
+
memset(hash_keys_ptr, 0, hash_size * sizeof(struct ggml_tensor *));
|
15238
|
+
|
15834
15239
|
*cgraph = (struct ggml_cgraph) {
|
15240
|
+
/*.size =*/ size,
|
15835
15241
|
/*.n_nodes =*/ 0,
|
15836
15242
|
/*.n_leafs =*/ 0,
|
15837
|
-
/*.nodes =*/
|
15838
|
-
/*.grads =*/
|
15839
|
-
/*.leafs =*/
|
15840
|
-
/*.hash_table =*/ {
|
15243
|
+
/*.nodes =*/ nodes_ptr,
|
15244
|
+
/*.grads =*/ grads_ptr,
|
15245
|
+
/*.leafs =*/ leafs_ptr,
|
15246
|
+
/*.hash_table =*/ { hash_size, hash_keys_ptr },
|
15841
15247
|
/*.order =*/ GGML_CGRAPH_EVAL_ORDER_LEFT_TO_RIGHT,
|
15842
15248
|
/*.perf_runs =*/ 0,
|
15843
15249
|
/*.perf_cycles =*/ 0,
|
@@ -15847,14 +15253,85 @@ struct ggml_cgraph * ggml_new_graph(struct ggml_context * ctx) {
|
|
15847
15253
|
return cgraph;
|
15848
15254
|
}
|
15849
15255
|
|
15850
|
-
struct ggml_cgraph *
|
15851
|
-
|
15852
|
-
|
15256
|
+
struct ggml_cgraph * ggml_new_graph(struct ggml_context * ctx) {
|
15257
|
+
return ggml_new_graph_custom(ctx, GGML_DEFAULT_GRAPH_SIZE, false);
|
15258
|
+
}
|
15259
|
+
|
15260
|
+
struct ggml_cgraph * ggml_graph_view(struct ggml_context * ctx, struct ggml_cgraph * cgraph0, int i0, int i1) {
|
15261
|
+
const size_t obj_size = sizeof(struct ggml_cgraph);
|
15262
|
+
struct ggml_object * obj = ggml_new_object(ctx, GGML_OBJECT_GRAPH, obj_size);
|
15263
|
+
struct ggml_cgraph * cgraph = (struct ggml_cgraph *) ((char *) ctx->mem_buffer + obj->offs);
|
15264
|
+
|
15265
|
+
*cgraph = (struct ggml_cgraph) {
|
15266
|
+
/*.size =*/ 0,
|
15267
|
+
/*.n_nodes =*/ i1 - i0,
|
15268
|
+
/*.n_leafs =*/ 0,
|
15269
|
+
/*.nodes =*/ cgraph0->nodes + i0,
|
15270
|
+
/*.grads =*/ cgraph0->grads ? cgraph0->grads + i0 : NULL,
|
15271
|
+
/*.leafs =*/ NULL,
|
15272
|
+
/*.hash_table =*/ { 0, NULL },
|
15273
|
+
/*.order =*/ cgraph0->order,
|
15274
|
+
/*.perf_runs =*/ 0,
|
15275
|
+
/*.perf_cycles =*/ 0,
|
15276
|
+
/*.perf_time_us =*/ 0,
|
15277
|
+
};
|
15278
|
+
|
15853
15279
|
return cgraph;
|
15854
15280
|
}
|
15855
15281
|
|
15856
|
-
|
15857
|
-
|
15282
|
+
void ggml_graph_cpy(struct ggml_cgraph * src, struct ggml_cgraph * dst) {
|
15283
|
+
GGML_ASSERT(dst->size >= src->n_leafs);
|
15284
|
+
GGML_ASSERT(dst->size >= src->n_nodes);
|
15285
|
+
GGML_ASSERT(dst->visited_hash_table.size >= src->visited_hash_table.size);
|
15286
|
+
|
15287
|
+
dst->n_leafs = src->n_leafs;
|
15288
|
+
dst->n_nodes = src->n_nodes;
|
15289
|
+
dst->order = src->order;
|
15290
|
+
|
15291
|
+
for (int i = 0; i < src->n_leafs; ++i) {
|
15292
|
+
dst->leafs[i] = src->leafs[i];
|
15293
|
+
}
|
15294
|
+
|
15295
|
+
for (int i = 0; i < src->n_nodes; ++i) {
|
15296
|
+
dst->nodes[i] = src->nodes[i];
|
15297
|
+
}
|
15298
|
+
|
15299
|
+
if (src->grads) {
|
15300
|
+
GGML_ASSERT(dst->grads != NULL);
|
15301
|
+
for (int i = 0; i < src->n_nodes; ++i) {
|
15302
|
+
dst->grads[i] = src->grads[i];
|
15303
|
+
}
|
15304
|
+
}
|
15305
|
+
|
15306
|
+
for (size_t i = 0; i < src->visited_hash_table.size; ++i) {
|
15307
|
+
if (src->visited_hash_table.keys[i]) {
|
15308
|
+
ggml_hash_insert(dst->visited_hash_table, src->visited_hash_table.keys[i]);
|
15309
|
+
}
|
15310
|
+
}
|
15311
|
+
}
|
15312
|
+
|
15313
|
+
struct ggml_cgraph * ggml_graph_dup(struct ggml_context * ctx, struct ggml_cgraph * cgraph) {
|
15314
|
+
struct ggml_cgraph * result = ggml_new_graph_custom(ctx, cgraph->size, cgraph->grads != NULL);
|
15315
|
+
ggml_graph_cpy(cgraph, result);
|
15316
|
+
return result;
|
15317
|
+
}
|
15318
|
+
|
15319
|
+
void ggml_graph_reset(struct ggml_cgraph * cgraph) {
|
15320
|
+
GGML_ASSERT(cgraph->grads != NULL);
|
15321
|
+
|
15322
|
+
for (int i = 0; i < cgraph->n_nodes; i++) {
|
15323
|
+
struct ggml_tensor * grad = cgraph->grads[i];
|
15324
|
+
|
15325
|
+
if (grad) {
|
15326
|
+
ggml_set_zero(grad);
|
15327
|
+
}
|
15328
|
+
}
|
15329
|
+
}
|
15330
|
+
|
15331
|
+
void ggml_graph_clear(struct ggml_cgraph * cgraph) {
|
15332
|
+
cgraph->n_leafs = 0;
|
15333
|
+
cgraph->n_nodes = 0;
|
15334
|
+
memset(cgraph->visited_hash_table.keys, 0, cgraph->visited_hash_table.size * sizeof(struct ggml_tensor *));
|
15858
15335
|
}
|
15859
15336
|
|
15860
15337
|
//
|
@@ -15966,45 +15443,266 @@ static void clear_numa_thread_affinity(void) {
|
|
15966
15443
|
strerror(rv));
|
15967
15444
|
}
|
15968
15445
|
|
15969
|
-
CPU_FREE(cpus);
|
15970
|
-
}
|
15971
|
-
#else
|
15972
|
-
// TODO: Windows etc.
|
15973
|
-
// (the linux implementation may also work on BSD, someone should test)
|
15974
|
-
static void set_numa_thread_affinity(int thread_n, int n_threads) { UNUSED(thread_n); UNUSED(n_threads); }
|
15975
|
-
static void clear_numa_thread_affinity(void) {}
|
15976
|
-
#endif
|
15977
|
-
|
15978
|
-
struct ggml_compute_state_shared {
|
15979
|
-
const struct ggml_cgraph * cgraph;
|
15980
|
-
const struct ggml_cplan * cplan;
|
15981
|
-
|
15982
|
-
int64_t perf_node_start_cycles;
|
15983
|
-
int64_t perf_node_start_time_us;
|
15984
|
-
|
15985
|
-
const int n_threads;
|
15986
|
-
|
15987
|
-
// synchronization primitives
|
15988
|
-
atomic_int n_active; // num active threads
|
15989
|
-
atomic_int node_n; // active graph node
|
15990
|
-
|
15991
|
-
bool (*abort_callback)(void * data); // abort ggml_graph_compute when true
|
15992
|
-
void * abort_callback_data;
|
15993
|
-
};
|
15994
|
-
|
15995
|
-
struct ggml_compute_state {
|
15996
|
-
ggml_thread_t thrd;
|
15997
|
-
int ith;
|
15998
|
-
struct ggml_compute_state_shared * shared;
|
15999
|
-
};
|
16000
|
-
|
16001
|
-
static void ggml_graph_compute_perf_stats_node(struct ggml_tensor * node, const struct ggml_compute_state_shared * st) {
|
16002
|
-
int64_t cycles_cur = ggml_perf_cycles() - st->perf_node_start_cycles;
|
16003
|
-
int64_t time_us_cur = ggml_perf_time_us() - st->perf_node_start_time_us;
|
15446
|
+
CPU_FREE(cpus);
|
15447
|
+
}
|
15448
|
+
#else
|
15449
|
+
// TODO: Windows etc.
|
15450
|
+
// (the linux implementation may also work on BSD, someone should test)
|
15451
|
+
static void set_numa_thread_affinity(int thread_n, int n_threads) { UNUSED(thread_n); UNUSED(n_threads); }
|
15452
|
+
static void clear_numa_thread_affinity(void) {}
|
15453
|
+
#endif
|
15454
|
+
|
15455
|
+
struct ggml_compute_state_shared {
|
15456
|
+
const struct ggml_cgraph * cgraph;
|
15457
|
+
const struct ggml_cplan * cplan;
|
15458
|
+
|
15459
|
+
int64_t perf_node_start_cycles;
|
15460
|
+
int64_t perf_node_start_time_us;
|
15461
|
+
|
15462
|
+
const int n_threads;
|
15463
|
+
|
15464
|
+
// synchronization primitives
|
15465
|
+
atomic_int n_active; // num active threads
|
15466
|
+
atomic_int node_n; // active graph node
|
15467
|
+
|
15468
|
+
bool (*abort_callback)(void * data); // abort ggml_graph_compute when true
|
15469
|
+
void * abort_callback_data;
|
15470
|
+
};
|
15471
|
+
|
15472
|
+
struct ggml_compute_state {
|
15473
|
+
ggml_thread_t thrd;
|
15474
|
+
int ith;
|
15475
|
+
struct ggml_compute_state_shared * shared;
|
15476
|
+
};
|
15477
|
+
|
15478
|
+
static void ggml_graph_compute_perf_stats_node(struct ggml_tensor * node, const struct ggml_compute_state_shared * st) {
|
15479
|
+
int64_t cycles_cur = ggml_perf_cycles() - st->perf_node_start_cycles;
|
15480
|
+
int64_t time_us_cur = ggml_perf_time_us() - st->perf_node_start_time_us;
|
15481
|
+
|
15482
|
+
node->perf_runs++;
|
15483
|
+
node->perf_cycles += cycles_cur;
|
15484
|
+
node->perf_time_us += time_us_cur;
|
15485
|
+
}
|
15486
|
+
|
15487
|
+
static int ggml_get_n_tasks(struct ggml_tensor * node, int n_threads) {
|
15488
|
+
int n_tasks = 0;
|
15489
|
+
|
15490
|
+
switch (node->op) {
|
15491
|
+
case GGML_OP_CPY:
|
15492
|
+
case GGML_OP_DUP:
|
15493
|
+
case GGML_OP_ADD:
|
15494
|
+
case GGML_OP_ADD1:
|
15495
|
+
case GGML_OP_ACC:
|
15496
|
+
{
|
15497
|
+
n_tasks = n_threads;
|
15498
|
+
} break;
|
15499
|
+
case GGML_OP_SUB:
|
15500
|
+
case GGML_OP_DIV:
|
15501
|
+
case GGML_OP_SQR:
|
15502
|
+
case GGML_OP_SQRT:
|
15503
|
+
case GGML_OP_LOG:
|
15504
|
+
case GGML_OP_SUM:
|
15505
|
+
case GGML_OP_SUM_ROWS:
|
15506
|
+
case GGML_OP_MEAN:
|
15507
|
+
case GGML_OP_ARGMAX:
|
15508
|
+
case GGML_OP_REPEAT:
|
15509
|
+
case GGML_OP_REPEAT_BACK:
|
15510
|
+
{
|
15511
|
+
n_tasks = 1;
|
15512
|
+
} break;
|
15513
|
+
case GGML_OP_UNARY:
|
15514
|
+
switch (ggml_get_unary_op(node)) {
|
15515
|
+
case GGML_UNARY_OP_ABS:
|
15516
|
+
case GGML_UNARY_OP_SGN:
|
15517
|
+
case GGML_UNARY_OP_NEG:
|
15518
|
+
case GGML_UNARY_OP_STEP:
|
15519
|
+
case GGML_UNARY_OP_TANH:
|
15520
|
+
case GGML_UNARY_OP_ELU:
|
15521
|
+
case GGML_UNARY_OP_RELU:
|
15522
|
+
case GGML_UNARY_OP_LEAKY:
|
15523
|
+
{
|
15524
|
+
n_tasks = 1;
|
15525
|
+
} break;
|
15526
|
+
|
15527
|
+
case GGML_UNARY_OP_GELU:
|
15528
|
+
case GGML_UNARY_OP_GELU_QUICK:
|
15529
|
+
case GGML_UNARY_OP_SILU:
|
15530
|
+
{
|
15531
|
+
n_tasks = n_threads;
|
15532
|
+
} break;
|
15533
|
+
}
|
15534
|
+
break;
|
15535
|
+
case GGML_OP_SILU_BACK:
|
15536
|
+
case GGML_OP_MUL:
|
15537
|
+
case GGML_OP_NORM:
|
15538
|
+
case GGML_OP_RMS_NORM:
|
15539
|
+
case GGML_OP_RMS_NORM_BACK:
|
15540
|
+
case GGML_OP_GROUP_NORM:
|
15541
|
+
case GGML_OP_CONCAT:
|
15542
|
+
{
|
15543
|
+
n_tasks = n_threads;
|
15544
|
+
} break;
|
15545
|
+
case GGML_OP_MUL_MAT:
|
15546
|
+
{
|
15547
|
+
n_tasks = n_threads;
|
15548
|
+
|
15549
|
+
// TODO: use different scheduling for different matrix sizes
|
15550
|
+
//const int nr0 = ggml_nrows(node->src[0]);
|
15551
|
+
//const int nr1 = ggml_nrows(node->src[1]);
|
15552
|
+
|
15553
|
+
//n_tasks = MIN(n_threads, MAX(1, nr0/128));
|
15554
|
+
//printf("nr0 = %8d, nr1 = %8d, nr0*nr1 = %8d, n_tasks%d\n", nr0, nr1, nr0*nr1, n_tasks);
|
15555
|
+
|
15556
|
+
#if defined(GGML_USE_CUBLAS)
|
15557
|
+
if (ggml_cuda_can_mul_mat(node->src[0], node->src[1], node)) {
|
15558
|
+
n_tasks = 1; // TODO: this actually is doing nothing
|
15559
|
+
// the threads are still spinning
|
15560
|
+
}
|
15561
|
+
#elif defined(GGML_USE_CLBLAST)
|
15562
|
+
if (ggml_cl_can_mul_mat(node->src[0], node->src[1], node)) {
|
15563
|
+
n_tasks = 1; // TODO: this actually is doing nothing
|
15564
|
+
// the threads are still spinning
|
15565
|
+
}
|
15566
|
+
#endif
|
15567
|
+
#if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS)
|
15568
|
+
if (ggml_compute_forward_mul_mat_use_blas(node->src[0], node->src[1], node)) {
|
15569
|
+
n_tasks = 1; // TODO: this actually is doing nothing
|
15570
|
+
// the threads are still spinning
|
15571
|
+
}
|
15572
|
+
#endif
|
15573
|
+
} break;
|
15574
|
+
case GGML_OP_OUT_PROD:
|
15575
|
+
{
|
15576
|
+
n_tasks = n_threads;
|
15577
|
+
} break;
|
15578
|
+
case GGML_OP_SCALE:
|
15579
|
+
case GGML_OP_SET:
|
15580
|
+
case GGML_OP_CONT:
|
15581
|
+
case GGML_OP_RESHAPE:
|
15582
|
+
case GGML_OP_VIEW:
|
15583
|
+
case GGML_OP_PERMUTE:
|
15584
|
+
case GGML_OP_TRANSPOSE:
|
15585
|
+
case GGML_OP_GET_ROWS:
|
15586
|
+
case GGML_OP_GET_ROWS_BACK:
|
15587
|
+
case GGML_OP_DIAG:
|
15588
|
+
{
|
15589
|
+
n_tasks = 1;
|
15590
|
+
} break;
|
15591
|
+
case GGML_OP_DIAG_MASK_ZERO:
|
15592
|
+
case GGML_OP_DIAG_MASK_INF:
|
15593
|
+
case GGML_OP_SOFT_MAX:
|
15594
|
+
case GGML_OP_SOFT_MAX_BACK:
|
15595
|
+
case GGML_OP_ROPE:
|
15596
|
+
case GGML_OP_ROPE_BACK:
|
15597
|
+
case GGML_OP_ADD_REL_POS:
|
15598
|
+
{
|
15599
|
+
n_tasks = n_threads;
|
15600
|
+
} break;
|
15601
|
+
case GGML_OP_ALIBI:
|
15602
|
+
{
|
15603
|
+
n_tasks = 1; //TODO
|
15604
|
+
} break;
|
15605
|
+
case GGML_OP_CLAMP:
|
15606
|
+
{
|
15607
|
+
n_tasks = 1; //TODO
|
15608
|
+
} break;
|
15609
|
+
case GGML_OP_CONV_TRANSPOSE_1D:
|
15610
|
+
{
|
15611
|
+
n_tasks = n_threads;
|
15612
|
+
} break;
|
15613
|
+
case GGML_OP_IM2COL:
|
15614
|
+
{
|
15615
|
+
n_tasks = n_threads;
|
15616
|
+
} break;
|
15617
|
+
case GGML_OP_CONV_TRANSPOSE_2D:
|
15618
|
+
{
|
15619
|
+
n_tasks = n_threads;
|
15620
|
+
} break;
|
15621
|
+
case GGML_OP_POOL_1D:
|
15622
|
+
case GGML_OP_POOL_2D:
|
15623
|
+
{
|
15624
|
+
n_tasks = 1;
|
15625
|
+
} break;
|
15626
|
+
case GGML_OP_UPSCALE:
|
15627
|
+
{
|
15628
|
+
n_tasks = n_threads;
|
15629
|
+
} break;
|
15630
|
+
case GGML_OP_FLASH_ATTN:
|
15631
|
+
{
|
15632
|
+
n_tasks = n_threads;
|
15633
|
+
} break;
|
15634
|
+
case GGML_OP_FLASH_FF:
|
15635
|
+
{
|
15636
|
+
n_tasks = n_threads;
|
15637
|
+
} break;
|
15638
|
+
case GGML_OP_FLASH_ATTN_BACK:
|
15639
|
+
{
|
15640
|
+
n_tasks = n_threads;
|
15641
|
+
} break;
|
15642
|
+
case GGML_OP_WIN_PART:
|
15643
|
+
case GGML_OP_WIN_UNPART:
|
15644
|
+
case GGML_OP_GET_REL_POS:
|
15645
|
+
case GGML_OP_MAP_UNARY:
|
15646
|
+
case GGML_OP_MAP_BINARY:
|
15647
|
+
case GGML_OP_MAP_CUSTOM1_F32:
|
15648
|
+
case GGML_OP_MAP_CUSTOM2_F32:
|
15649
|
+
case GGML_OP_MAP_CUSTOM3_F32:
|
15650
|
+
{
|
15651
|
+
n_tasks = 1;
|
15652
|
+
} break;
|
15653
|
+
case GGML_OP_MAP_CUSTOM1:
|
15654
|
+
{
|
15655
|
+
struct ggml_map_custom1_op_params * p = (struct ggml_map_custom1_op_params *) node->op_params;
|
15656
|
+
if (p->n_tasks == GGML_N_TASKS_MAX) {
|
15657
|
+
n_tasks = n_threads;
|
15658
|
+
} else {
|
15659
|
+
n_tasks = MIN(p->n_tasks, n_threads);
|
15660
|
+
}
|
15661
|
+
} break;
|
15662
|
+
case GGML_OP_MAP_CUSTOM2:
|
15663
|
+
{
|
15664
|
+
struct ggml_map_custom2_op_params * p = (struct ggml_map_custom2_op_params *) node->op_params;
|
15665
|
+
if (p->n_tasks == GGML_N_TASKS_MAX) {
|
15666
|
+
n_tasks = n_threads;
|
15667
|
+
} else {
|
15668
|
+
n_tasks = MIN(p->n_tasks, n_threads);
|
15669
|
+
}
|
15670
|
+
} break;
|
15671
|
+
case GGML_OP_MAP_CUSTOM3:
|
15672
|
+
{
|
15673
|
+
struct ggml_map_custom3_op_params * p = (struct ggml_map_custom3_op_params *) node->op_params;
|
15674
|
+
if (p->n_tasks == GGML_N_TASKS_MAX) {
|
15675
|
+
n_tasks = n_threads;
|
15676
|
+
} else {
|
15677
|
+
n_tasks = MIN(p->n_tasks, n_threads);
|
15678
|
+
}
|
15679
|
+
} break;
|
15680
|
+
case GGML_OP_CROSS_ENTROPY_LOSS:
|
15681
|
+
{
|
15682
|
+
n_tasks = n_threads;
|
15683
|
+
} break;
|
15684
|
+
case GGML_OP_CROSS_ENTROPY_LOSS_BACK:
|
15685
|
+
{
|
15686
|
+
n_tasks = n_threads;
|
15687
|
+
} break;
|
15688
|
+
case GGML_OP_NONE:
|
15689
|
+
{
|
15690
|
+
n_tasks = 1;
|
15691
|
+
} break;
|
15692
|
+
case GGML_OP_COUNT:
|
15693
|
+
{
|
15694
|
+
GGML_ASSERT(false);
|
15695
|
+
} break;
|
15696
|
+
default:
|
15697
|
+
{
|
15698
|
+
printf("%s: op %s not implemented\n", __func__, ggml_op_name(node->op));
|
15699
|
+
GGML_ASSERT(false);
|
15700
|
+
} break;
|
15701
|
+
}
|
15702
|
+
|
15703
|
+
assert(n_tasks > 0);
|
16004
15704
|
|
16005
|
-
|
16006
|
-
node->perf_cycles += cycles_cur;
|
16007
|
-
node->perf_time_us += time_us_cur;
|
15705
|
+
return n_tasks;
|
16008
15706
|
}
|
16009
15707
|
|
16010
15708
|
static thread_ret_t ggml_graph_compute_thread(void * data) {
|
@@ -16013,7 +15711,6 @@ static thread_ret_t ggml_graph_compute_thread(void * data) {
|
|
16013
15711
|
const struct ggml_cgraph * cgraph = state->shared->cgraph;
|
16014
15712
|
const struct ggml_cplan * cplan = state->shared->cplan;
|
16015
15713
|
|
16016
|
-
const int * n_tasks_arr = cplan->n_tasks;
|
16017
15714
|
const int n_threads = state->shared->n_threads;
|
16018
15715
|
|
16019
15716
|
set_numa_thread_affinity(state->ith, n_threads);
|
@@ -16038,9 +15735,9 @@ static thread_ret_t ggml_graph_compute_thread(void * data) {
|
|
16038
15735
|
|
16039
15736
|
if (node_n != -1) {
|
16040
15737
|
/* FINALIZE */
|
16041
|
-
struct ggml_tensor * node =
|
15738
|
+
struct ggml_tensor * node = cgraph->nodes[node_n];
|
16042
15739
|
if (GGML_OP_HAS_FINALIZE[node->op]) {
|
16043
|
-
params.nth =
|
15740
|
+
params.nth = ggml_get_n_tasks(node, n_threads);
|
16044
15741
|
ggml_compute_forward(¶ms, node);
|
16045
15742
|
}
|
16046
15743
|
ggml_graph_compute_perf_stats_node(node, state->shared);
|
@@ -16051,7 +15748,7 @@ static thread_ret_t ggml_graph_compute_thread(void * data) {
|
|
16051
15748
|
GGML_PRINT_DEBUG_5("%s: %d/%d\n", __func__, node_n, cgraph->n_nodes);
|
16052
15749
|
|
16053
15750
|
struct ggml_tensor * node = cgraph->nodes[node_n];
|
16054
|
-
const int n_tasks =
|
15751
|
+
const int n_tasks = ggml_get_n_tasks(node, n_threads);
|
16055
15752
|
|
16056
15753
|
state->shared->perf_node_start_cycles = ggml_perf_cycles();
|
16057
15754
|
state->shared->perf_node_start_time_us = ggml_perf_time_us();
|
@@ -16109,7 +15806,7 @@ static thread_ret_t ggml_graph_compute_thread(void * data) {
|
|
16109
15806
|
|
16110
15807
|
/* COMPUTE */
|
16111
15808
|
struct ggml_tensor * node = cgraph->nodes[node_n];
|
16112
|
-
const int n_tasks =
|
15809
|
+
const int n_tasks = ggml_get_n_tasks(node, n_threads);
|
16113
15810
|
|
16114
15811
|
struct ggml_compute_params params = {
|
16115
15812
|
/*.type =*/ GGML_TASK_COMPUTE,
|
@@ -16143,121 +15840,46 @@ struct ggml_cplan ggml_graph_plan(struct ggml_cgraph * cgraph, int n_threads) {
|
|
16143
15840
|
|
16144
15841
|
struct ggml_tensor * node = cgraph->nodes[i];
|
16145
15842
|
|
15843
|
+
size_t cur = 0;
|
15844
|
+
|
16146
15845
|
switch (node->op) {
|
16147
15846
|
case GGML_OP_CPY:
|
16148
15847
|
case GGML_OP_DUP:
|
16149
15848
|
{
|
16150
15849
|
n_tasks = n_threads;
|
16151
15850
|
|
16152
|
-
size_t cur = 0;
|
16153
15851
|
if (ggml_is_quantized(node->type)) {
|
16154
15852
|
cur = ggml_type_size(GGML_TYPE_F32) * node->ne[0] * n_tasks;
|
16155
15853
|
}
|
16156
|
-
|
16157
|
-
work_size = MAX(work_size, cur);
|
16158
15854
|
} break;
|
16159
15855
|
case GGML_OP_ADD:
|
16160
15856
|
case GGML_OP_ADD1:
|
16161
15857
|
{
|
16162
15858
|
n_tasks = n_threads;
|
16163
15859
|
|
16164
|
-
size_t cur = 0;
|
16165
|
-
|
16166
15860
|
if (ggml_is_quantized(node->src[0]->type)) {
|
16167
15861
|
cur = ggml_type_size(GGML_TYPE_F32) * node->src[0]->ne[0] * n_tasks;
|
16168
15862
|
}
|
16169
|
-
|
16170
|
-
work_size = MAX(work_size, cur);
|
16171
15863
|
} break;
|
16172
15864
|
case GGML_OP_ACC:
|
16173
15865
|
{
|
16174
15866
|
n_tasks = n_threads;
|
16175
15867
|
|
16176
|
-
size_t cur = 0;
|
16177
|
-
|
16178
15868
|
if (ggml_is_quantized(node->src[0]->type)) {
|
16179
15869
|
cur = ggml_type_size(GGML_TYPE_F32) * node->src[1]->ne[0] * n_tasks;
|
16180
15870
|
}
|
16181
|
-
|
16182
|
-
work_size = MAX(work_size, cur);
|
16183
|
-
} break;
|
16184
|
-
case GGML_OP_SUB:
|
16185
|
-
case GGML_OP_DIV:
|
16186
|
-
case GGML_OP_SQR:
|
16187
|
-
case GGML_OP_SQRT:
|
16188
|
-
case GGML_OP_LOG:
|
16189
|
-
case GGML_OP_SUM:
|
16190
|
-
case GGML_OP_SUM_ROWS:
|
16191
|
-
case GGML_OP_MEAN:
|
16192
|
-
case GGML_OP_ARGMAX:
|
16193
|
-
case GGML_OP_REPEAT:
|
16194
|
-
case GGML_OP_REPEAT_BACK:
|
16195
|
-
{
|
16196
|
-
n_tasks = 1;
|
16197
|
-
} break;
|
16198
|
-
|
16199
|
-
case GGML_OP_UNARY:
|
16200
|
-
{
|
16201
|
-
switch (ggml_get_unary_op(node)) {
|
16202
|
-
case GGML_UNARY_OP_ABS:
|
16203
|
-
case GGML_UNARY_OP_SGN:
|
16204
|
-
case GGML_UNARY_OP_NEG:
|
16205
|
-
case GGML_UNARY_OP_STEP:
|
16206
|
-
case GGML_UNARY_OP_TANH:
|
16207
|
-
case GGML_UNARY_OP_ELU:
|
16208
|
-
case GGML_UNARY_OP_RELU:
|
16209
|
-
{
|
16210
|
-
n_tasks = 1;
|
16211
|
-
} break;
|
16212
|
-
|
16213
|
-
case GGML_UNARY_OP_GELU:
|
16214
|
-
case GGML_UNARY_OP_GELU_QUICK:
|
16215
|
-
case GGML_UNARY_OP_SILU:
|
16216
|
-
{
|
16217
|
-
n_tasks = n_threads;
|
16218
|
-
} break;
|
16219
|
-
}
|
16220
|
-
} break;
|
16221
|
-
case GGML_OP_SILU_BACK:
|
16222
|
-
case GGML_OP_MUL:
|
16223
|
-
case GGML_OP_NORM:
|
16224
|
-
case GGML_OP_RMS_NORM:
|
16225
|
-
case GGML_OP_RMS_NORM_BACK:
|
16226
|
-
case GGML_OP_GROUP_NORM:
|
16227
|
-
{
|
16228
|
-
n_tasks = n_threads;
|
16229
15871
|
} break;
|
16230
|
-
case GGML_OP_CONCAT:
|
16231
15872
|
case GGML_OP_MUL_MAT:
|
16232
15873
|
{
|
16233
|
-
n_tasks = n_threads;
|
16234
|
-
|
16235
|
-
// TODO: use different scheduling for different matrix sizes
|
16236
|
-
//const int nr0 = ggml_nrows(node->src[0]);
|
16237
|
-
//const int nr1 = ggml_nrows(node->src[1]);
|
16238
|
-
|
16239
|
-
//n_tasks = MIN(n_threads, MAX(1, nr0/128));
|
16240
|
-
//printf("nr0 = %8d, nr1 = %8d, nr0*nr1 = %8d, n_tasks%d\n", nr0, nr1, nr0*nr1, n_tasks);
|
16241
|
-
|
16242
|
-
size_t cur = 0;
|
16243
15874
|
const enum ggml_type vec_dot_type = type_traits[node->src[0]->type].vec_dot_type;
|
16244
15875
|
|
16245
|
-
#if defined(
|
16246
|
-
if (ggml_cuda_can_mul_mat(node->src[0], node->src[1], node)) {
|
16247
|
-
n_tasks = 1; // TODO: this actually is doing nothing
|
16248
|
-
// the threads are still spinning
|
16249
|
-
} else
|
16250
|
-
#elif defined(GGML_USE_CLBLAST)
|
15876
|
+
#if defined(GGML_USE_CLBLAST)
|
16251
15877
|
if (ggml_cl_can_mul_mat(node->src[0], node->src[1], node)) {
|
16252
|
-
n_tasks = 1; // TODO: this actually is doing nothing
|
16253
|
-
// the threads are still spinning
|
16254
15878
|
cur = ggml_cl_mul_mat_get_wsize(node->src[0], node->src[1], node);
|
16255
15879
|
} else
|
16256
15880
|
#endif
|
16257
15881
|
#if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS)
|
16258
15882
|
if (ggml_compute_forward_mul_mat_use_blas(node->src[0], node->src[1], node)) {
|
16259
|
-
n_tasks = 1; // TODO: this actually is doing nothing
|
16260
|
-
// the threads are still spinning
|
16261
15883
|
if (node->src[0]->type != GGML_TYPE_F32) {
|
16262
15884
|
// here we need memory just for single 2D matrix from src0
|
16263
15885
|
cur = ggml_type_size(GGML_TYPE_F32)*(node->src[0]->ne[0]*node->src[0]->ne[1]);
|
@@ -16266,108 +15888,18 @@ struct ggml_cplan ggml_graph_plan(struct ggml_cgraph * cgraph, int n_threads) {
|
|
16266
15888
|
#endif
|
16267
15889
|
if (node->src[1]->type != vec_dot_type) {
|
16268
15890
|
cur = ggml_type_size(vec_dot_type)*ggml_nelements(node->src[1])/ggml_blck_size(vec_dot_type);
|
16269
|
-
} else {
|
16270
|
-
cur = 0;
|
16271
15891
|
}
|
16272
|
-
|
16273
|
-
work_size = MAX(work_size, cur);
|
16274
15892
|
} break;
|
16275
15893
|
case GGML_OP_OUT_PROD:
|
16276
15894
|
{
|
16277
15895
|
n_tasks = n_threads;
|
16278
15896
|
|
16279
|
-
size_t cur = 0;
|
16280
|
-
|
16281
15897
|
if (ggml_is_quantized(node->src[0]->type)) {
|
16282
15898
|
cur = ggml_type_size(GGML_TYPE_F32) * node->src[0]->ne[0] * n_tasks;
|
16283
15899
|
}
|
16284
|
-
|
16285
|
-
work_size = MAX(work_size, cur);
|
16286
|
-
} break;
|
16287
|
-
case GGML_OP_SCALE:
|
16288
|
-
{
|
16289
|
-
n_tasks = 1;
|
16290
|
-
} break;
|
16291
|
-
case GGML_OP_SET:
|
16292
|
-
case GGML_OP_CONT:
|
16293
|
-
case GGML_OP_RESHAPE:
|
16294
|
-
case GGML_OP_VIEW:
|
16295
|
-
case GGML_OP_PERMUTE:
|
16296
|
-
case GGML_OP_TRANSPOSE:
|
16297
|
-
case GGML_OP_GET_ROWS:
|
16298
|
-
case GGML_OP_GET_ROWS_BACK:
|
16299
|
-
case GGML_OP_DIAG:
|
16300
|
-
{
|
16301
|
-
n_tasks = 1;
|
16302
|
-
} break;
|
16303
|
-
case GGML_OP_DIAG_MASK_ZERO:
|
16304
|
-
case GGML_OP_DIAG_MASK_INF:
|
16305
|
-
case GGML_OP_SOFT_MAX:
|
16306
|
-
case GGML_OP_SOFT_MAX_BACK:
|
16307
|
-
case GGML_OP_ROPE:
|
16308
|
-
case GGML_OP_ROPE_BACK:
|
16309
|
-
case GGML_OP_ADD_REL_POS:
|
16310
|
-
{
|
16311
|
-
n_tasks = n_threads;
|
16312
|
-
} break;
|
16313
|
-
case GGML_OP_ALIBI:
|
16314
|
-
{
|
16315
|
-
n_tasks = 1; //TODO
|
16316
|
-
} break;
|
16317
|
-
case GGML_OP_CLAMP:
|
16318
|
-
{
|
16319
|
-
n_tasks = 1; //TODO
|
16320
|
-
} break;
|
16321
|
-
case GGML_OP_CONV_1D:
|
16322
|
-
{
|
16323
|
-
n_tasks = n_threads;
|
16324
|
-
|
16325
|
-
GGML_ASSERT(node->src[0]->ne[3] == 1);
|
16326
|
-
GGML_ASSERT(node->src[1]->ne[2] == 1);
|
16327
|
-
GGML_ASSERT(node->src[1]->ne[3] == 1);
|
16328
|
-
|
16329
|
-
const int64_t ne00 = node->src[0]->ne[0];
|
16330
|
-
const int64_t ne01 = node->src[0]->ne[1];
|
16331
|
-
const int64_t ne02 = node->src[0]->ne[2];
|
16332
|
-
|
16333
|
-
const int64_t ne10 = node->src[1]->ne[0];
|
16334
|
-
const int64_t ne11 = node->src[1]->ne[1];
|
16335
|
-
|
16336
|
-
const int64_t ne0 = node->ne[0];
|
16337
|
-
const int64_t ne1 = node->ne[1];
|
16338
|
-
const int64_t nk = ne00;
|
16339
|
-
const int64_t ew0 = nk * ne01;
|
16340
|
-
|
16341
|
-
UNUSED(ne02);
|
16342
|
-
UNUSED(ne10);
|
16343
|
-
UNUSED(ne11);
|
16344
|
-
|
16345
|
-
size_t cur = 0;
|
16346
|
-
|
16347
|
-
if (node->src[0]->type == GGML_TYPE_F16 &&
|
16348
|
-
node->src[1]->type == GGML_TYPE_F32) {
|
16349
|
-
cur = sizeof(ggml_fp16_t)*(ne0*ne1*ew0);
|
16350
|
-
} else if (node->src[0]->type == GGML_TYPE_F32 &&
|
16351
|
-
node->src[1]->type == GGML_TYPE_F32) {
|
16352
|
-
cur = sizeof(float)*(ne0*ne1*ew0);
|
16353
|
-
} else {
|
16354
|
-
GGML_ASSERT(false);
|
16355
|
-
}
|
16356
|
-
|
16357
|
-
work_size = MAX(work_size, cur);
|
16358
|
-
} break;
|
16359
|
-
case GGML_OP_CONV_1D_STAGE_0:
|
16360
|
-
{
|
16361
|
-
n_tasks = n_threads;
|
16362
|
-
} break;
|
16363
|
-
case GGML_OP_CONV_1D_STAGE_1:
|
16364
|
-
{
|
16365
|
-
n_tasks = n_threads;
|
16366
15900
|
} break;
|
16367
15901
|
case GGML_OP_CONV_TRANSPOSE_1D:
|
16368
15902
|
{
|
16369
|
-
n_tasks = n_threads;
|
16370
|
-
|
16371
15903
|
GGML_ASSERT(node->src[0]->ne[3] == 1);
|
16372
15904
|
GGML_ASSERT(node->src[1]->ne[2] == 1);
|
16373
15905
|
GGML_ASSERT(node->src[1]->ne[3] == 1);
|
@@ -16379,7 +15911,6 @@ struct ggml_cplan ggml_graph_plan(struct ggml_cgraph * cgraph, int n_threads) {
|
|
16379
15911
|
const int64_t ne10 = node->src[1]->ne[0]; // L
|
16380
15912
|
const int64_t ne11 = node->src[1]->ne[1]; // Cin
|
16381
15913
|
|
16382
|
-
size_t cur = 0;
|
16383
15914
|
if (node->src[0]->type == GGML_TYPE_F16 &&
|
16384
15915
|
node->src[1]->type == GGML_TYPE_F32) {
|
16385
15916
|
cur += sizeof(ggml_fp16_t)*ne00*ne01*ne02;
|
@@ -16391,59 +15922,13 @@ struct ggml_cplan ggml_graph_plan(struct ggml_cgraph * cgraph, int n_threads) {
|
|
16391
15922
|
} else {
|
16392
15923
|
GGML_ASSERT(false);
|
16393
15924
|
}
|
16394
|
-
|
16395
|
-
work_size = MAX(work_size, cur);
|
16396
|
-
} break;
|
16397
|
-
case GGML_OP_CONV_2D:
|
16398
|
-
{
|
16399
|
-
n_tasks = n_threads;
|
16400
|
-
|
16401
|
-
const int64_t ne00 = node->src[0]->ne[0]; // W
|
16402
|
-
const int64_t ne01 = node->src[0]->ne[1]; // H
|
16403
|
-
const int64_t ne02 = node->src[0]->ne[2]; // C
|
16404
|
-
const int64_t ne03 = node->src[0]->ne[3]; // N
|
16405
|
-
|
16406
|
-
const int64_t ne10 = node->src[1]->ne[0]; // W
|
16407
|
-
const int64_t ne11 = node->src[1]->ne[1]; // H
|
16408
|
-
const int64_t ne12 = node->src[1]->ne[2]; // C
|
16409
|
-
|
16410
|
-
const int64_t ne0 = node->ne[0];
|
16411
|
-
const int64_t ne1 = node->ne[1];
|
16412
|
-
const int64_t ne2 = node->ne[2];
|
16413
|
-
const int64_t ne3 = node->ne[3];
|
16414
|
-
const int64_t nk = ne00*ne01;
|
16415
|
-
const int64_t ew0 = nk * ne02;
|
16416
|
-
|
16417
|
-
UNUSED(ne03);
|
16418
|
-
UNUSED(ne2);
|
16419
|
-
|
16420
|
-
size_t cur = 0;
|
16421
|
-
|
16422
|
-
if (node->src[0]->type == GGML_TYPE_F16 &&
|
16423
|
-
node->src[1]->type == GGML_TYPE_F32) {
|
16424
|
-
// im2col: [N*OH*OW, IC*KH*KW]
|
16425
|
-
cur = sizeof(ggml_fp16_t)*(ne3*ne0*ne1*ew0);
|
16426
|
-
} else if (node->src[0]->type == GGML_TYPE_F32 &&
|
16427
|
-
node->src[1]->type == GGML_TYPE_F32) {
|
16428
|
-
cur = sizeof(float)* (ne10*ne11*ne12);
|
16429
|
-
} else {
|
16430
|
-
GGML_ASSERT(false);
|
16431
|
-
}
|
16432
|
-
|
16433
|
-
work_size = MAX(work_size, cur);
|
16434
|
-
} break;
|
16435
|
-
case GGML_OP_CONV_2D_STAGE_0:
|
16436
|
-
{
|
16437
|
-
n_tasks = n_threads;
|
16438
15925
|
} break;
|
16439
|
-
case
|
15926
|
+
case GGML_OP_IM2COL:
|
16440
15927
|
{
|
16441
15928
|
n_tasks = n_threads;
|
16442
15929
|
} break;
|
16443
15930
|
case GGML_OP_CONV_TRANSPOSE_2D:
|
16444
15931
|
{
|
16445
|
-
n_tasks = n_threads;
|
16446
|
-
|
16447
15932
|
const int64_t ne00 = node->src[0]->ne[0]; // W
|
16448
15933
|
const int64_t ne01 = node->src[0]->ne[1]; // H
|
16449
15934
|
const int64_t ne02 = node->src[0]->ne[2]; // Channels Out
|
@@ -16453,141 +15938,66 @@ struct ggml_cplan ggml_graph_plan(struct ggml_cgraph * cgraph, int n_threads) {
|
|
16453
15938
|
const int64_t ne11 = node->src[1]->ne[1]; // H
|
16454
15939
|
const int64_t ne12 = node->src[1]->ne[2]; // Channels In
|
16455
15940
|
|
16456
|
-
size_t cur = 0;
|
16457
15941
|
cur += sizeof(ggml_fp16_t)*ne00*ne01*ne02*ne03;
|
16458
15942
|
cur += sizeof(ggml_fp16_t)*ne10*ne11*ne12;
|
16459
|
-
|
16460
|
-
work_size = MAX(work_size, cur);
|
16461
|
-
} break;
|
16462
|
-
case GGML_OP_POOL_1D:
|
16463
|
-
case GGML_OP_POOL_2D:
|
16464
|
-
{
|
16465
|
-
n_tasks = 1;
|
16466
|
-
} break;
|
16467
|
-
case GGML_OP_UPSCALE:
|
16468
|
-
{
|
16469
|
-
n_tasks = n_threads;
|
16470
15943
|
} break;
|
16471
15944
|
case GGML_OP_FLASH_ATTN:
|
16472
15945
|
{
|
16473
15946
|
n_tasks = n_threads;
|
16474
15947
|
|
16475
|
-
size_t cur = 0;
|
16476
|
-
|
16477
15948
|
const int64_t ne11 = ggml_up(node->src[1]->ne[1], GGML_SOFT_MAX_UNROLL);
|
16478
15949
|
|
16479
15950
|
if (node->src[1]->type == GGML_TYPE_F32) {
|
16480
15951
|
cur = sizeof(float)*ne11*n_tasks; // TODO: this can become (n_tasks-1)
|
16481
15952
|
cur += sizeof(float)*ne11*n_tasks; // this is overestimated by x2
|
16482
|
-
}
|
16483
|
-
|
16484
|
-
if (node->src[1]->type == GGML_TYPE_F16) {
|
15953
|
+
} else if (node->src[1]->type == GGML_TYPE_F16) {
|
16485
15954
|
cur = sizeof(float)*ne11*n_tasks; // TODO: this can become (n_tasks-1)
|
16486
15955
|
cur += sizeof(float)*ne11*n_tasks; // this is overestimated by x2
|
16487
15956
|
}
|
16488
|
-
|
16489
|
-
work_size = MAX(work_size, cur);
|
16490
15957
|
} break;
|
16491
15958
|
case GGML_OP_FLASH_FF:
|
16492
15959
|
{
|
16493
15960
|
n_tasks = n_threads;
|
16494
15961
|
|
16495
|
-
size_t cur = 0;
|
16496
|
-
|
16497
15962
|
if (node->src[1]->type == GGML_TYPE_F32) {
|
16498
15963
|
cur = sizeof(float)*node->src[1]->ne[1]*n_tasks; // TODO: this can become (n_tasks-1)
|
16499
15964
|
cur += sizeof(float)*node->src[1]->ne[1]*n_tasks; // this is overestimated by x2
|
16500
|
-
}
|
16501
|
-
|
16502
|
-
if (node->src[1]->type == GGML_TYPE_F16) {
|
15965
|
+
} else if (node->src[1]->type == GGML_TYPE_F16) {
|
16503
15966
|
cur = sizeof(float)*node->src[1]->ne[1]*n_tasks; // TODO: this can become (n_tasks-1)
|
16504
15967
|
cur += sizeof(float)*node->src[1]->ne[1]*n_tasks; // this is overestimated by x2
|
16505
15968
|
}
|
16506
|
-
|
16507
|
-
work_size = MAX(work_size, cur);
|
16508
15969
|
} break;
|
16509
15970
|
case GGML_OP_FLASH_ATTN_BACK:
|
16510
15971
|
{
|
16511
15972
|
n_tasks = n_threads;
|
16512
15973
|
|
16513
|
-
size_t cur = 0;
|
16514
|
-
|
16515
15974
|
const int64_t D = node->src[0]->ne[0];
|
16516
15975
|
const int64_t ne11 = ggml_up(node->src[1]->ne[1], GGML_SOFT_MAX_UNROLL);
|
16517
15976
|
const int64_t mxDn = MAX(D, ne11) * 2; // *2 because of S and SM in ggml_compute_forward_flash_attn_back
|
16518
15977
|
if (node->src[1]->type == GGML_TYPE_F32) {
|
16519
15978
|
cur = sizeof(float)*mxDn*n_tasks; // TODO: this can become (n_tasks-1)
|
16520
15979
|
cur += sizeof(float)*mxDn*n_tasks; // this is overestimated by x2
|
16521
|
-
}
|
16522
|
-
|
16523
|
-
if (node->src[1]->type == GGML_TYPE_F16) {
|
15980
|
+
} else if (node->src[1]->type == GGML_TYPE_F16) {
|
16524
15981
|
cur = sizeof(float)*mxDn*n_tasks; // TODO: this can become (n_tasks-1)
|
16525
15982
|
cur += sizeof(float)*mxDn*n_tasks; // this is overestimated by x2
|
16526
15983
|
}
|
16527
|
-
|
16528
|
-
work_size = MAX(work_size, cur);
|
16529
|
-
} break;
|
16530
|
-
case GGML_OP_WIN_PART:
|
16531
|
-
case GGML_OP_WIN_UNPART:
|
16532
|
-
case GGML_OP_GET_REL_POS:
|
16533
|
-
case GGML_OP_MAP_UNARY:
|
16534
|
-
case GGML_OP_MAP_BINARY:
|
16535
|
-
case GGML_OP_MAP_CUSTOM1_F32:
|
16536
|
-
case GGML_OP_MAP_CUSTOM2_F32:
|
16537
|
-
case GGML_OP_MAP_CUSTOM3_F32:
|
16538
|
-
{
|
16539
|
-
n_tasks = 1;
|
16540
|
-
} break;
|
16541
|
-
case GGML_OP_MAP_CUSTOM1:
|
16542
|
-
{
|
16543
|
-
struct ggml_map_custom1_op_params * p = (struct ggml_map_custom1_op_params *) node->op_params;
|
16544
|
-
if (p->n_tasks == GGML_N_TASKS_MAX) {
|
16545
|
-
n_tasks = n_threads;
|
16546
|
-
} else {
|
16547
|
-
n_tasks = MIN(p->n_tasks, n_threads);
|
16548
|
-
}
|
16549
|
-
} break;
|
16550
|
-
case GGML_OP_MAP_CUSTOM2:
|
16551
|
-
{
|
16552
|
-
struct ggml_map_custom2_op_params * p = (struct ggml_map_custom2_op_params *) node->op_params;
|
16553
|
-
if (p->n_tasks == GGML_N_TASKS_MAX) {
|
16554
|
-
n_tasks = n_threads;
|
16555
|
-
} else {
|
16556
|
-
n_tasks = MIN(p->n_tasks, n_threads);
|
16557
|
-
}
|
16558
|
-
} break;
|
16559
|
-
case GGML_OP_MAP_CUSTOM3:
|
16560
|
-
{
|
16561
|
-
struct ggml_map_custom3_op_params * p = (struct ggml_map_custom3_op_params *) node->op_params;
|
16562
|
-
if (p->n_tasks == GGML_N_TASKS_MAX) {
|
16563
|
-
n_tasks = n_threads;
|
16564
|
-
} else {
|
16565
|
-
n_tasks = MIN(p->n_tasks, n_threads);
|
16566
|
-
}
|
16567
15984
|
} break;
|
15985
|
+
|
16568
15986
|
case GGML_OP_CROSS_ENTROPY_LOSS:
|
16569
15987
|
{
|
16570
15988
|
n_tasks = n_threads;
|
16571
15989
|
|
16572
|
-
|
16573
|
-
|
16574
|
-
work_size = MAX(work_size, cur);
|
16575
|
-
} break;
|
16576
|
-
case GGML_OP_CROSS_ENTROPY_LOSS_BACK:
|
16577
|
-
{
|
16578
|
-
n_tasks = n_threads;
|
16579
|
-
} break;
|
16580
|
-
case GGML_OP_NONE:
|
16581
|
-
{
|
16582
|
-
n_tasks = 1;
|
15990
|
+
cur = ggml_type_size(node->type)*(n_tasks + node->src[0]->ne[0]*n_tasks);
|
16583
15991
|
} break;
|
16584
15992
|
case GGML_OP_COUNT:
|
16585
15993
|
{
|
16586
15994
|
GGML_ASSERT(false);
|
16587
15995
|
} break;
|
15996
|
+
default:
|
15997
|
+
break;
|
16588
15998
|
}
|
16589
15999
|
|
16590
|
-
|
16000
|
+
work_size = MAX(work_size, cur);
|
16591
16001
|
}
|
16592
16002
|
|
16593
16003
|
if (work_size > 0) {
|
@@ -16609,12 +16019,6 @@ int ggml_graph_compute(struct ggml_cgraph * cgraph, struct ggml_cplan * cplan) {
|
|
16609
16019
|
if (cplan->work_size > 0) {
|
16610
16020
|
GGML_ASSERT(cplan->work_data);
|
16611
16021
|
}
|
16612
|
-
|
16613
|
-
for (int i = 0; i < cgraph->n_nodes; ++i) {
|
16614
|
-
if (cgraph->nodes[i]->op != GGML_OP_NONE) {
|
16615
|
-
GGML_ASSERT(cplan->n_tasks[i] > 0);
|
16616
|
-
}
|
16617
|
-
}
|
16618
16022
|
}
|
16619
16023
|
|
16620
16024
|
const int n_threads = cplan->n_threads;
|
@@ -16687,16 +16091,6 @@ int ggml_graph_compute(struct ggml_cgraph * cgraph, struct ggml_cplan * cplan) {
|
|
16687
16091
|
return compute_status;
|
16688
16092
|
}
|
16689
16093
|
|
16690
|
-
void ggml_graph_reset(struct ggml_cgraph * cgraph) {
|
16691
|
-
for (int i = 0; i < cgraph->n_nodes; i++) {
|
16692
|
-
struct ggml_tensor * grad = cgraph->grads[i];
|
16693
|
-
|
16694
|
-
if (grad) {
|
16695
|
-
ggml_set_zero(grad);
|
16696
|
-
}
|
16697
|
-
}
|
16698
|
-
}
|
16699
|
-
|
16700
16094
|
void ggml_graph_compute_with_ctx(struct ggml_context * ctx, struct ggml_cgraph * cgraph, int n_threads) {
|
16701
16095
|
struct ggml_cplan cplan = ggml_graph_plan(cgraph, n_threads);
|
16702
16096
|
|
@@ -16823,12 +16217,12 @@ void ggml_graph_export(const struct ggml_cgraph * cgraph, const char * fname) {
|
|
16823
16217
|
const uint32_t magic = GGML_FILE_MAGIC;
|
16824
16218
|
const uint32_t version = GGML_FILE_VERSION;
|
16825
16219
|
const uint32_t n_leafs = cgraph->n_leafs;
|
16826
|
-
const uint32_t
|
16220
|
+
const uint32_t n_nodes = cgraph->n_nodes;
|
16827
16221
|
|
16828
16222
|
fwrite(&magic, sizeof(uint32_t), 1, fout);
|
16829
16223
|
fwrite(&version, sizeof(uint32_t), 1, fout);
|
16830
16224
|
fwrite(&n_leafs, sizeof(uint32_t), 1, fout);
|
16831
|
-
fwrite(&
|
16225
|
+
fwrite(&n_nodes, sizeof(uint32_t), 1, fout);
|
16832
16226
|
fwrite(&size_eval, sizeof(uint64_t), 1, fout);
|
16833
16227
|
}
|
16834
16228
|
|
@@ -16916,7 +16310,7 @@ void ggml_graph_export(const struct ggml_cgraph * cgraph, const char * fname) {
|
|
16916
16310
|
if (idx == -1) {
|
16917
16311
|
for (int k = 0; k < cgraph->n_nodes; ++k) {
|
16918
16312
|
if (args[j] == cgraph->nodes[k]) {
|
16919
|
-
idx =
|
16313
|
+
idx = cgraph->n_leafs + k;
|
16920
16314
|
break;
|
16921
16315
|
}
|
16922
16316
|
}
|
@@ -16943,11 +16337,11 @@ void ggml_graph_export(const struct ggml_cgraph * cgraph, const char * fname) {
|
|
16943
16337
|
}
|
16944
16338
|
}
|
16945
16339
|
|
16946
|
-
struct ggml_cgraph ggml_graph_import(const char * fname, struct ggml_context ** ctx_data, struct ggml_context ** ctx_eval) {
|
16340
|
+
struct ggml_cgraph * ggml_graph_import(const char * fname, struct ggml_context ** ctx_data, struct ggml_context ** ctx_eval) {
|
16947
16341
|
assert(*ctx_data == NULL);
|
16948
16342
|
assert(*ctx_eval == NULL);
|
16949
16343
|
|
16950
|
-
struct ggml_cgraph result =
|
16344
|
+
struct ggml_cgraph * result = NULL;
|
16951
16345
|
|
16952
16346
|
struct ggml_tensor * data = NULL;
|
16953
16347
|
|
@@ -17019,13 +16413,11 @@ struct ggml_cgraph ggml_graph_import(const char * fname, struct ggml_context **
|
|
17019
16413
|
const uint32_t n_leafs = *(const uint32_t *) ptr; ptr += sizeof(n_leafs);
|
17020
16414
|
const uint32_t n_nodes = *(const uint32_t *) ptr; ptr += sizeof(n_nodes);
|
17021
16415
|
const uint64_t size_eval = *(const uint64_t *) ptr; ptr += sizeof(size_eval);
|
17022
|
-
|
17023
|
-
result.n_leafs = n_leafs;
|
17024
|
-
result.n_nodes = n_nodes;
|
16416
|
+
const int graph_size = MAX(n_leafs, n_nodes);
|
17025
16417
|
|
17026
16418
|
// create the data context
|
17027
16419
|
{
|
17028
|
-
const size_t overhead = (n_leafs + n_nodes)*ggml_tensor_overhead();
|
16420
|
+
const size_t overhead = (n_leafs + n_nodes)*ggml_tensor_overhead() + ggml_graph_overhead_custom(graph_size, false);
|
17029
16421
|
|
17030
16422
|
struct ggml_init_params params = {
|
17031
16423
|
.mem_size = size_eval + overhead,
|
@@ -17041,6 +16433,12 @@ struct ggml_cgraph ggml_graph_import(const char * fname, struct ggml_context **
|
|
17041
16433
|
}
|
17042
16434
|
}
|
17043
16435
|
|
16436
|
+
result = ggml_new_graph_custom(*ctx_eval, graph_size, false);
|
16437
|
+
|
16438
|
+
result->n_leafs = n_leafs;
|
16439
|
+
result->n_nodes = n_nodes;
|
16440
|
+
|
16441
|
+
|
17044
16442
|
// leafs
|
17045
16443
|
{
|
17046
16444
|
uint32_t type;
|
@@ -17079,7 +16477,7 @@ struct ggml_cgraph ggml_graph_import(const char * fname, struct ggml_context **
|
|
17079
16477
|
tensor->nb[j] = nb[j];
|
17080
16478
|
}
|
17081
16479
|
|
17082
|
-
result
|
16480
|
+
result->leafs[i] = tensor;
|
17083
16481
|
|
17084
16482
|
ptr += ggml_nbytes(tensor);
|
17085
16483
|
|
@@ -17131,10 +16529,10 @@ struct ggml_cgraph ggml_graph_import(const char * fname, struct ggml_context **
|
|
17131
16529
|
continue;
|
17132
16530
|
}
|
17133
16531
|
|
17134
|
-
if (arg_idx <
|
17135
|
-
args[j] = result
|
16532
|
+
if (arg_idx < result->n_leafs) {
|
16533
|
+
args[j] = result->leafs[arg_idx];
|
17136
16534
|
} else {
|
17137
|
-
args[j] = result
|
16535
|
+
args[j] = result->nodes[arg_idx - result->n_leafs];
|
17138
16536
|
}
|
17139
16537
|
}
|
17140
16538
|
|
@@ -17186,7 +16584,7 @@ struct ggml_cgraph ggml_graph_import(const char * fname, struct ggml_context **
|
|
17186
16584
|
tensor->src[j] = args[j];
|
17187
16585
|
}
|
17188
16586
|
|
17189
|
-
result
|
16587
|
+
result->nodes[i] = tensor;
|
17190
16588
|
|
17191
16589
|
fprintf(stderr, "%s: loaded node %d: '%16s', %3d dims, %9zu bytes\n", __func__, i, tensor->name, n_dims, ggml_nbytes(tensor));
|
17192
16590
|
}
|
@@ -18091,10 +17489,11 @@ struct ggml_opt_params ggml_opt_default_params(enum ggml_opt_type type) {
|
|
18091
17489
|
case GGML_OPT_ADAM:
|
18092
17490
|
{
|
18093
17491
|
result = (struct ggml_opt_params) {
|
18094
|
-
.type
|
18095
|
-
.
|
18096
|
-
.
|
18097
|
-
.
|
17492
|
+
.type = GGML_OPT_ADAM,
|
17493
|
+
.graph_size = GGML_DEFAULT_GRAPH_SIZE,
|
17494
|
+
.n_threads = 1, // FIXME: GGML_DEFAULT_N_THREADS ?
|
17495
|
+
.past = 0,
|
17496
|
+
.delta = 1e-5f,
|
18098
17497
|
|
18099
17498
|
.max_no_improvement = 100,
|
18100
17499
|
|
@@ -18121,10 +17520,11 @@ struct ggml_opt_params ggml_opt_default_params(enum ggml_opt_type type) {
|
|
18121
17520
|
case GGML_OPT_LBFGS:
|
18122
17521
|
{
|
18123
17522
|
result = (struct ggml_opt_params) {
|
18124
|
-
.type
|
18125
|
-
.
|
18126
|
-
.
|
18127
|
-
.
|
17523
|
+
.type = GGML_OPT_LBFGS,
|
17524
|
+
.graph_size = GGML_DEFAULT_GRAPH_SIZE,
|
17525
|
+
.n_threads = 1,
|
17526
|
+
.past = 0,
|
17527
|
+
.delta = 1e-5f,
|
18128
17528
|
|
18129
17529
|
.max_no_improvement = 0,
|
18130
17530
|
|
@@ -18266,14 +17666,11 @@ enum ggml_opt_result ggml_opt_resume(
|
|
18266
17666
|
struct ggml_tensor * f) {
|
18267
17667
|
|
18268
17668
|
// build forward + backward compute graphs
|
18269
|
-
struct
|
18270
|
-
|
18271
|
-
|
18272
|
-
struct ggml_cgraph * gf = (struct ggml_cgraph *) gfbuf->data;
|
18273
|
-
struct ggml_cgraph * gb = (struct ggml_cgraph *) gbbuf->data;
|
17669
|
+
struct ggml_cgraph * gf = ggml_new_graph_custom(ctx, opt->params.graph_size, true);
|
17670
|
+
ggml_build_forward_expand(gf, f);
|
18274
17671
|
|
18275
|
-
*
|
18276
|
-
|
17672
|
+
struct ggml_cgraph * gb = ggml_graph_dup(ctx, gf);
|
17673
|
+
ggml_build_backward_expand(ctx, gf, gb, true);
|
18277
17674
|
|
18278
17675
|
return ggml_opt_resume_g(ctx, opt, f, gf, gb, NULL, NULL);
|
18279
17676
|
}
|
@@ -18729,7 +18126,7 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
|
|
18729
18126
|
{
|
18730
18127
|
ctx->kv = malloc(ctx->header.n_kv * sizeof(struct gguf_kv));
|
18731
18128
|
|
18732
|
-
for (
|
18129
|
+
for (uint64_t i = 0; i < ctx->header.n_kv; ++i) {
|
18733
18130
|
struct gguf_kv * kv = &ctx->kv[i];
|
18734
18131
|
|
18735
18132
|
//fprintf(stderr, "%s: reading kv %d\n", __func__, i);
|
@@ -18776,7 +18173,7 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
|
|
18776
18173
|
case GGUF_TYPE_STRING:
|
18777
18174
|
{
|
18778
18175
|
kv->value.arr.data = malloc(kv->value.arr.n * sizeof(struct gguf_str));
|
18779
|
-
for (
|
18176
|
+
for (uint64_t j = 0; j < kv->value.arr.n; ++j) {
|
18780
18177
|
ok = ok && gguf_fread_str(file, &((struct gguf_str *) kv->value.arr.data)[j], &offset);
|
18781
18178
|
}
|
18782
18179
|
} break;
|
@@ -18804,7 +18201,7 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
|
|
18804
18201
|
{
|
18805
18202
|
ctx->infos = malloc(ctx->header.n_tensors * sizeof(struct gguf_tensor_info));
|
18806
18203
|
|
18807
|
-
for (
|
18204
|
+
for (uint64_t i = 0; i < ctx->header.n_tensors; ++i) {
|
18808
18205
|
struct gguf_tensor_info * info = &ctx->infos[i];
|
18809
18206
|
|
18810
18207
|
for (int j = 0; j < GGML_MAX_DIMS; ++j) {
|
@@ -18851,7 +18248,7 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
|
|
18851
18248
|
// compute the total size of the data section, taking into account the alignment
|
18852
18249
|
{
|
18853
18250
|
ctx->size = 0;
|
18854
|
-
for (
|
18251
|
+
for (uint64_t i = 0; i < ctx->header.n_tensors; ++i) {
|
18855
18252
|
struct gguf_tensor_info * info = &ctx->infos[i];
|
18856
18253
|
|
18857
18254
|
const int64_t ne =
|
@@ -18920,7 +18317,7 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
|
|
18920
18317
|
ggml_set_no_alloc(ctx_data, true);
|
18921
18318
|
|
18922
18319
|
// create the tensors
|
18923
|
-
for (
|
18320
|
+
for (uint64_t i = 0; i < ctx->header.n_tensors; ++i) {
|
18924
18321
|
const int64_t ne[GGML_MAX_DIMS] = {
|
18925
18322
|
ctx->infos[i].ne[0],
|
18926
18323
|
ctx->infos[i].ne[1],
|
@@ -19055,24 +18452,29 @@ int gguf_find_key(const struct gguf_context * ctx, const char * key) {
|
|
19055
18452
|
}
|
19056
18453
|
|
19057
18454
|
const char * gguf_get_key(const struct gguf_context * ctx, int key_id) {
|
18455
|
+
GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
|
19058
18456
|
return ctx->kv[key_id].key.data;
|
19059
18457
|
}
|
19060
18458
|
|
19061
18459
|
enum gguf_type gguf_get_kv_type(const struct gguf_context * ctx, int key_id) {
|
18460
|
+
GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
|
19062
18461
|
return ctx->kv[key_id].type;
|
19063
18462
|
}
|
19064
18463
|
|
19065
18464
|
enum gguf_type gguf_get_arr_type(const struct gguf_context * ctx, int key_id) {
|
18465
|
+
GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
|
19066
18466
|
GGML_ASSERT(ctx->kv[key_id].type == GGUF_TYPE_ARRAY);
|
19067
18467
|
return ctx->kv[key_id].value.arr.type;
|
19068
18468
|
}
|
19069
18469
|
|
19070
18470
|
const void * gguf_get_arr_data(const struct gguf_context * ctx, int key_id) {
|
18471
|
+
GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
|
19071
18472
|
GGML_ASSERT(ctx->kv[key_id].type == GGUF_TYPE_ARRAY);
|
19072
18473
|
return ctx->kv[key_id].value.arr.data;
|
19073
18474
|
}
|
19074
18475
|
|
19075
18476
|
const char * gguf_get_arr_str(const struct gguf_context * ctx, int key_id, int i) {
|
18477
|
+
GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
|
19076
18478
|
GGML_ASSERT(ctx->kv[key_id].type == GGUF_TYPE_ARRAY);
|
19077
18479
|
struct gguf_kv * kv = &ctx->kv[key_id];
|
19078
18480
|
struct gguf_str * str = &((struct gguf_str *) kv->value.arr.data)[i];
|
@@ -19080,70 +18482,90 @@ const char * gguf_get_arr_str(const struct gguf_context * ctx, int key_id, int i
|
|
19080
18482
|
}
|
19081
18483
|
|
19082
18484
|
int gguf_get_arr_n(const struct gguf_context * ctx, int key_id) {
|
18485
|
+
GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
|
19083
18486
|
GGML_ASSERT(ctx->kv[key_id].type == GGUF_TYPE_ARRAY);
|
19084
18487
|
return ctx->kv[key_id].value.arr.n;
|
19085
18488
|
}
|
19086
18489
|
|
19087
18490
|
uint8_t gguf_get_val_u8(const struct gguf_context * ctx, int key_id) {
|
18491
|
+
GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
|
19088
18492
|
GGML_ASSERT(ctx->kv[key_id].type == GGUF_TYPE_UINT8);
|
19089
18493
|
return ctx->kv[key_id].value.uint8;
|
19090
18494
|
}
|
19091
18495
|
|
19092
18496
|
int8_t gguf_get_val_i8(const struct gguf_context * ctx, int key_id) {
|
18497
|
+
GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
|
19093
18498
|
GGML_ASSERT(ctx->kv[key_id].type == GGUF_TYPE_INT8);
|
19094
18499
|
return ctx->kv[key_id].value.int8;
|
19095
18500
|
}
|
19096
18501
|
|
19097
18502
|
uint16_t gguf_get_val_u16(const struct gguf_context * ctx, int key_id) {
|
18503
|
+
GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
|
19098
18504
|
GGML_ASSERT(ctx->kv[key_id].type == GGUF_TYPE_UINT16);
|
19099
18505
|
return ctx->kv[key_id].value.uint16;
|
19100
18506
|
}
|
19101
18507
|
|
19102
18508
|
int16_t gguf_get_val_i16(const struct gguf_context * ctx, int key_id) {
|
18509
|
+
GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
|
19103
18510
|
GGML_ASSERT(ctx->kv[key_id].type == GGUF_TYPE_INT16);
|
19104
18511
|
return ctx->kv[key_id].value.int16;
|
19105
18512
|
}
|
19106
18513
|
|
19107
18514
|
uint32_t gguf_get_val_u32(const struct gguf_context * ctx, int key_id) {
|
18515
|
+
GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
|
19108
18516
|
GGML_ASSERT(ctx->kv[key_id].type == GGUF_TYPE_UINT32);
|
19109
18517
|
return ctx->kv[key_id].value.uint32;
|
19110
18518
|
}
|
19111
18519
|
|
19112
18520
|
int32_t gguf_get_val_i32(const struct gguf_context * ctx, int key_id) {
|
18521
|
+
GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
|
19113
18522
|
GGML_ASSERT(ctx->kv[key_id].type == GGUF_TYPE_INT32);
|
19114
18523
|
return ctx->kv[key_id].value.int32;
|
19115
18524
|
}
|
19116
18525
|
|
19117
18526
|
float gguf_get_val_f32(const struct gguf_context * ctx, int key_id) {
|
18527
|
+
GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
|
19118
18528
|
GGML_ASSERT(ctx->kv[key_id].type == GGUF_TYPE_FLOAT32);
|
19119
18529
|
return ctx->kv[key_id].value.float32;
|
19120
18530
|
}
|
19121
18531
|
|
19122
18532
|
uint64_t gguf_get_val_u64(const struct gguf_context * ctx, int key_id) {
|
18533
|
+
GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
|
19123
18534
|
GGML_ASSERT(ctx->kv[key_id].type == GGUF_TYPE_UINT64);
|
19124
18535
|
return ctx->kv[key_id].value.uint64;
|
19125
18536
|
}
|
19126
18537
|
|
19127
18538
|
int64_t gguf_get_val_i64(const struct gguf_context * ctx, int key_id) {
|
18539
|
+
GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
|
19128
18540
|
GGML_ASSERT(ctx->kv[key_id].type == GGUF_TYPE_INT64);
|
19129
18541
|
return ctx->kv[key_id].value.int64;
|
19130
18542
|
}
|
19131
18543
|
|
19132
18544
|
double gguf_get_val_f64(const struct gguf_context * ctx, int key_id) {
|
18545
|
+
GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
|
19133
18546
|
GGML_ASSERT(ctx->kv[key_id].type == GGUF_TYPE_FLOAT64);
|
19134
18547
|
return ctx->kv[key_id].value.float64;
|
19135
18548
|
}
|
19136
18549
|
|
19137
18550
|
bool gguf_get_val_bool(const struct gguf_context * ctx, int key_id) {
|
18551
|
+
GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
|
19138
18552
|
GGML_ASSERT(ctx->kv[key_id].type == GGUF_TYPE_BOOL);
|
19139
18553
|
return ctx->kv[key_id].value.bool_;
|
19140
18554
|
}
|
19141
18555
|
|
19142
18556
|
const char * gguf_get_val_str(const struct gguf_context * ctx, int key_id) {
|
18557
|
+
GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
|
19143
18558
|
GGML_ASSERT(ctx->kv[key_id].type == GGUF_TYPE_STRING);
|
19144
18559
|
return ctx->kv[key_id].value.str.data;
|
19145
18560
|
}
|
19146
18561
|
|
18562
|
+
const void * gguf_get_val_data(const struct gguf_context * ctx, int key_id) {
|
18563
|
+
GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
|
18564
|
+
GGML_ASSERT(ctx->kv[key_id].type != GGUF_TYPE_ARRAY);
|
18565
|
+
GGML_ASSERT(ctx->kv[key_id].type != GGUF_TYPE_STRING);
|
18566
|
+
return &ctx->kv[key_id].value;
|
18567
|
+
}
|
18568
|
+
|
19147
18569
|
int gguf_get_n_tensors(const struct gguf_context * ctx) {
|
19148
18570
|
return ctx->header.n_tensors;
|
19149
18571
|
}
|