llama_cpp 0.9.2 → 0.9.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +7 -1
- data/ext/llama_cpp/llama_cpp.cpp +12 -0
- data/ext/llama_cpp/src/ggml-alloc.c +378 -208
- data/ext/llama_cpp/src/ggml-alloc.h +68 -16
- data/ext/llama_cpp/src/ggml-backend-impl.h +87 -0
- data/ext/llama_cpp/src/ggml-backend.c +578 -13
- data/ext/llama_cpp/src/ggml-backend.h +70 -77
- data/ext/llama_cpp/src/ggml-cuda.cu +194 -8
- data/ext/llama_cpp/src/ggml-impl.h +13 -7
- data/ext/llama_cpp/src/ggml-metal.h +1 -1
- data/ext/llama_cpp/src/ggml-metal.m +113 -32
- data/ext/llama_cpp/src/ggml-metal.metal +107 -1
- data/ext/llama_cpp/src/ggml-quants.c +173 -73
- data/ext/llama_cpp/src/ggml.c +826 -1482
- data/ext/llama_cpp/src/ggml.h +63 -45
- data/ext/llama_cpp/src/llama.cpp +364 -38
- data/ext/llama_cpp/src/llama.h +6 -0
- data/lib/llama_cpp/version.rb +2 -2
- data/sig/llama_cpp.rbs +2 -0
- metadata +3 -2
data/ext/llama_cpp/src/ggml.c
CHANGED
@@ -100,6 +100,49 @@ typedef void * thread_ret_t;
|
|
100
100
|
#include <hbwmalloc.h>
|
101
101
|
#endif
|
102
102
|
|
103
|
+
#if defined(__APPLE__)
|
104
|
+
#include <TargetConditionals.h>
|
105
|
+
#endif
|
106
|
+
|
107
|
+
#if (defined(__linux__) || defined(__APPLE__) || defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__)) && \
|
108
|
+
(!defined(TARGET_OS_TV) && !defined(TARGET_OS_WATCH))
|
109
|
+
|
110
|
+
#include <sys/wait.h>
|
111
|
+
|
112
|
+
void ggml_print_backtrace(void) {
|
113
|
+
/*
|
114
|
+
#include <execinfo.h>
|
115
|
+
#include <dlfcn.h>
|
116
|
+
|
117
|
+
void * trace[100];
|
118
|
+
|
119
|
+
int nptrs = backtrace(trace, sizeof(trace)/sizeof(trace[0]));
|
120
|
+
|
121
|
+
backtrace_symbols_fd(trace, nptrs, STDERR_FILENO);
|
122
|
+
*/
|
123
|
+
|
124
|
+
// backtrack_symbols does not show line numbers, use gdb instead
|
125
|
+
char attach[32];
|
126
|
+
snprintf(attach, sizeof(attach), "attach %d", getpid());
|
127
|
+
int pid = fork();
|
128
|
+
if (pid == 0) {
|
129
|
+
execlp("gdb", "gdb", "--batch",
|
130
|
+
"-ex", "set style enabled on",
|
131
|
+
"-ex", attach,
|
132
|
+
"-ex", "bt -frame-info source-and-location",
|
133
|
+
"-ex", "detach",
|
134
|
+
"-ex", "quit",
|
135
|
+
NULL);
|
136
|
+
} else {
|
137
|
+
waitpid(pid, NULL, 0);
|
138
|
+
}
|
139
|
+
}
|
140
|
+
#else
|
141
|
+
void ggml_print_backtrace(void) {
|
142
|
+
// platform not supported
|
143
|
+
}
|
144
|
+
#endif
|
145
|
+
|
103
146
|
/*#define GGML_PERF*/
|
104
147
|
#define GGML_DEBUG 0
|
105
148
|
#define GGML_GELU_FP16
|
@@ -228,6 +271,12 @@ inline static void * ggml_aligned_malloc(size_t size) {
|
|
228
271
|
// floating point type used to accumulate sums
|
229
272
|
typedef double ggml_float;
|
230
273
|
|
274
|
+
#undef MIN
|
275
|
+
#undef MAX
|
276
|
+
|
277
|
+
#define MIN(a, b) ((a) < (b) ? (a) : (b))
|
278
|
+
#define MAX(a, b) ((a) > (b) ? (a) : (b))
|
279
|
+
|
231
280
|
//
|
232
281
|
// global data
|
233
282
|
//
|
@@ -561,6 +610,18 @@ ggml_type_traits_t ggml_internal_get_type_traits(enum ggml_type type) {
|
|
561
610
|
// simd mappings
|
562
611
|
//
|
563
612
|
|
613
|
+
#if defined(__ARM_NEON)
|
614
|
+
#if !defined(__aarch64__)
|
615
|
+
|
616
|
+
// 64-bit compatibility
|
617
|
+
|
618
|
+
inline static float vaddvq_f32(float32x4_t v) {
|
619
|
+
return vgetq_lane_f32(v, 0) + vgetq_lane_f32(v, 1) + vgetq_lane_f32(v, 2) + vgetq_lane_f32(v, 3);
|
620
|
+
}
|
621
|
+
|
622
|
+
#endif
|
623
|
+
#endif
|
624
|
+
|
564
625
|
// we define a common set of C macros which map to specific intrinsics based on the current architecture
|
565
626
|
// we then implement the fundamental computation operations below using only these macros
|
566
627
|
// adding support for new architectures requires to define the corresponding SIMD macros
|
@@ -1352,6 +1413,7 @@ inline static void ggml_vec_step_f32 (const int n, float * y, const float * x) {
|
|
1352
1413
|
inline static void ggml_vec_tanh_f32 (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = tanhf(x[i]); }
|
1353
1414
|
inline static void ggml_vec_elu_f32 (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = (x[i] > 0.f) ? x[i] : expf(x[i])-1; }
|
1354
1415
|
inline static void ggml_vec_relu_f32 (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = (x[i] > 0.f) ? x[i] : 0.f; }
|
1416
|
+
inline static void ggml_vec_leaky_f32 (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = (x[i] > 0.f) ? x[i] : 0.1f*x[i]; }
|
1355
1417
|
|
1356
1418
|
static const float GELU_COEF_A = 0.044715f;
|
1357
1419
|
static const float GELU_QUICK_COEF = -1.702f;
|
@@ -1572,13 +1634,8 @@ static const char * GGML_OP_NAME[GGML_OP_COUNT] = {
|
|
1572
1634
|
"ROPE_BACK",
|
1573
1635
|
"ALIBI",
|
1574
1636
|
"CLAMP",
|
1575
|
-
"CONV_1D",
|
1576
|
-
"CONV_1D_STAGE_0",
|
1577
|
-
"CONV_1D_STAGE_1",
|
1578
1637
|
"CONV_TRANSPOSE_1D",
|
1579
|
-
"
|
1580
|
-
"CONV_2D_STAGE_0",
|
1581
|
-
"CONV_2D_STAGE_1",
|
1638
|
+
"IM2COL",
|
1582
1639
|
"CONV_TRANSPOSE_2D",
|
1583
1640
|
"POOL_1D",
|
1584
1641
|
"POOL_2D",
|
@@ -1609,7 +1666,7 @@ static const char * GGML_OP_NAME[GGML_OP_COUNT] = {
|
|
1609
1666
|
"CROSS_ENTROPY_LOSS_BACK",
|
1610
1667
|
};
|
1611
1668
|
|
1612
|
-
static_assert(GGML_OP_COUNT ==
|
1669
|
+
static_assert(GGML_OP_COUNT == 68, "GGML_OP_COUNT != 68");
|
1613
1670
|
|
1614
1671
|
static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
|
1615
1672
|
"none",
|
@@ -1659,13 +1716,8 @@ static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
|
|
1659
1716
|
"rope_back(x)",
|
1660
1717
|
"alibi(x)",
|
1661
1718
|
"clamp(x)",
|
1662
|
-
"conv_1d(x)",
|
1663
|
-
"conv_1d_stage_0(x)",
|
1664
|
-
"conv_1d_stage_1(x)",
|
1665
1719
|
"conv_transpose_1d(x)",
|
1666
|
-
"
|
1667
|
-
"conv_2d_stage_0(x)",
|
1668
|
-
"conv_2d_stage_1(x)",
|
1720
|
+
"im2col(x)",
|
1669
1721
|
"conv_transpose_2d(x)",
|
1670
1722
|
"pool_1d(x)",
|
1671
1723
|
"pool_2d(x)",
|
@@ -1696,7 +1748,7 @@ static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
|
|
1696
1748
|
"cross_entropy_loss_back(x,y)",
|
1697
1749
|
};
|
1698
1750
|
|
1699
|
-
static_assert(GGML_OP_COUNT ==
|
1751
|
+
static_assert(GGML_OP_COUNT == 68, "GGML_OP_COUNT != 68");
|
1700
1752
|
|
1701
1753
|
static_assert(GGML_OP_POOL_COUNT == 2, "GGML_OP_POOL_COUNT != 2");
|
1702
1754
|
|
@@ -1724,13 +1776,7 @@ static void ggml_setup_op_has_task_pass(void) {
|
|
1724
1776
|
p[GGML_OP_GET_ROWS_BACK ] = true;
|
1725
1777
|
p[GGML_OP_DIAG_MASK_INF ] = true;
|
1726
1778
|
p[GGML_OP_DIAG_MASK_ZERO ] = true;
|
1727
|
-
p[GGML_OP_CONV_1D ] = true;
|
1728
|
-
p[GGML_OP_CONV_1D_STAGE_0 ] = true;
|
1729
|
-
p[GGML_OP_CONV_1D_STAGE_1 ] = true;
|
1730
1779
|
p[GGML_OP_CONV_TRANSPOSE_1D ] = true;
|
1731
|
-
p[GGML_OP_CONV_2D ] = true;
|
1732
|
-
p[GGML_OP_CONV_2D_STAGE_0 ] = true;
|
1733
|
-
p[GGML_OP_CONV_2D_STAGE_1 ] = true;
|
1734
1780
|
p[GGML_OP_CONV_TRANSPOSE_2D ] = true;
|
1735
1781
|
p[GGML_OP_FLASH_ATTN_BACK ] = true;
|
1736
1782
|
p[GGML_OP_CROSS_ENTROPY_LOSS ] = true;
|
@@ -3769,6 +3815,14 @@ struct ggml_tensor * ggml_relu_inplace(
|
|
3769
3815
|
return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_RELU);
|
3770
3816
|
}
|
3771
3817
|
|
3818
|
+
// ggml_leaky
|
3819
|
+
|
3820
|
+
struct ggml_tensor * ggml_leaky(
|
3821
|
+
struct ggml_context * ctx,
|
3822
|
+
struct ggml_tensor * a) {
|
3823
|
+
return ggml_unary(ctx, a, GGML_UNARY_OP_LEAKY);
|
3824
|
+
}
|
3825
|
+
|
3772
3826
|
// ggml_gelu
|
3773
3827
|
|
3774
3828
|
struct ggml_tensor * ggml_gelu(
|
@@ -5076,82 +5130,6 @@ static int64_t ggml_calc_conv_output_size(int64_t ins, int64_t ks, int s, int p,
|
|
5076
5130
|
return (ins + 2 * p - d * (ks - 1) - 1) / s + 1;
|
5077
5131
|
}
|
5078
5132
|
|
5079
|
-
// im2col: [N, IC, IL] => [N, OL, IC*K]
|
5080
|
-
// a: [OC,IC, K]
|
5081
|
-
// b: [N, IC, IL]
|
5082
|
-
// result: [N, OL, IC*K]
|
5083
|
-
static struct ggml_tensor * ggml_conv_1d_stage_0(
|
5084
|
-
struct ggml_context * ctx,
|
5085
|
-
struct ggml_tensor * a,
|
5086
|
-
struct ggml_tensor * b,
|
5087
|
-
int s0,
|
5088
|
-
int p0,
|
5089
|
-
int d0) {
|
5090
|
-
GGML_ASSERT(a->ne[1] == b->ne[1]);
|
5091
|
-
bool is_node = false;
|
5092
|
-
|
5093
|
-
if (a->grad || b->grad) {
|
5094
|
-
GGML_ASSERT(false); // TODO: implement backward
|
5095
|
-
is_node = true;
|
5096
|
-
}
|
5097
|
-
|
5098
|
-
const int64_t OL = ggml_calc_conv_output_size(b->ne[0], a->ne[0], s0, p0, d0);
|
5099
|
-
|
5100
|
-
const int64_t ne[4] = {
|
5101
|
-
a->ne[1] * a->ne[0],
|
5102
|
-
OL,
|
5103
|
-
b->ne[2],
|
5104
|
-
1,
|
5105
|
-
};
|
5106
|
-
struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F16, 4, ne);
|
5107
|
-
|
5108
|
-
int32_t params[] = { s0, p0, d0 };
|
5109
|
-
ggml_set_op_params(result, params, sizeof(params));
|
5110
|
-
|
5111
|
-
result->op = GGML_OP_CONV_1D_STAGE_0;
|
5112
|
-
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
5113
|
-
result->src[0] = a;
|
5114
|
-
result->src[1] = b;
|
5115
|
-
|
5116
|
-
return result;
|
5117
|
-
}
|
5118
|
-
|
5119
|
-
// ggml_conv_1d_stage_1
|
5120
|
-
|
5121
|
-
// gemm: [N, OC, OL] = [OC, IC * K] x [N*OL, IC * K]
|
5122
|
-
// a: [OC, IC, K]
|
5123
|
-
// b: [N, OL, IC * K]
|
5124
|
-
// result: [N, OC, OL]
|
5125
|
-
static struct ggml_tensor * ggml_conv_1d_stage_1(
|
5126
|
-
struct ggml_context * ctx,
|
5127
|
-
struct ggml_tensor * a,
|
5128
|
-
struct ggml_tensor * b) {
|
5129
|
-
|
5130
|
-
bool is_node = false;
|
5131
|
-
|
5132
|
-
if (a->grad || b->grad) {
|
5133
|
-
GGML_ASSERT(false); // TODO: implement backward
|
5134
|
-
is_node = true;
|
5135
|
-
}
|
5136
|
-
|
5137
|
-
const int64_t ne[4] = {
|
5138
|
-
b->ne[1],
|
5139
|
-
a->ne[2],
|
5140
|
-
b->ne[2],
|
5141
|
-
1,
|
5142
|
-
};
|
5143
|
-
struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
|
5144
|
-
|
5145
|
-
result->op = GGML_OP_CONV_1D_STAGE_1;
|
5146
|
-
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
5147
|
-
result->src[0] = a;
|
5148
|
-
result->src[1] = b;
|
5149
|
-
|
5150
|
-
return result;
|
5151
|
-
}
|
5152
|
-
|
5153
|
-
// ggml_conv_1d
|
5154
|
-
|
5155
5133
|
GGML_API struct ggml_tensor * ggml_conv_1d(
|
5156
5134
|
struct ggml_context * ctx,
|
5157
5135
|
struct ggml_tensor * a,
|
@@ -5159,43 +5137,17 @@ GGML_API struct ggml_tensor * ggml_conv_1d(
|
|
5159
5137
|
int s0,
|
5160
5138
|
int p0,
|
5161
5139
|
int d0) {
|
5162
|
-
struct ggml_tensor *
|
5163
|
-
result = ggml_conv_1d_stage_1(ctx, a, result);
|
5164
|
-
return result;
|
5165
|
-
}
|
5166
|
-
|
5167
|
-
// GGML_API struct ggml_tensor * ggml_conv_1d(
|
5168
|
-
// struct ggml_context * ctx,
|
5169
|
-
// struct ggml_tensor * a,
|
5170
|
-
// struct ggml_tensor * b,
|
5171
|
-
// int s0,
|
5172
|
-
// int p0,
|
5173
|
-
// int d0) {
|
5174
|
-
// GGML_ASSERT(ggml_is_matrix(b));
|
5175
|
-
// GGML_ASSERT(a->ne[1] == b->ne[1]);
|
5176
|
-
// bool is_node = false;
|
5177
|
-
|
5178
|
-
// if (a->grad || b->grad) {
|
5179
|
-
// GGML_ASSERT(false); // TODO: implement backward
|
5180
|
-
// is_node = true;
|
5181
|
-
// }
|
5140
|
+
struct ggml_tensor * im2col = ggml_im2col(ctx, a, b, s0, 0, p0, 0, d0, 0, false); // [N, OL, IC * K]
|
5182
5141
|
|
5183
|
-
|
5184
|
-
|
5185
|
-
|
5186
|
-
//
|
5187
|
-
// struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 2, ne);
|
5142
|
+
struct ggml_tensor * result =
|
5143
|
+
ggml_mul_mat(ctx,
|
5144
|
+
ggml_reshape_2d(ctx, im2col, im2col->ne[0], (im2col->ne[2] * im2col->ne[1])), // [N, OL, IC * K] => [N*OL, IC * K]
|
5145
|
+
ggml_reshape_2d(ctx, a, (a->ne[0] * a->ne[1]), a->ne[2])); // [OC,IC, K] => [OC, IC * K]
|
5188
5146
|
|
5189
|
-
|
5190
|
-
// ggml_set_op_params(result, params, sizeof(params));
|
5147
|
+
result = ggml_reshape_3d(ctx, result, im2col->ne[1], a->ne[2], im2col->ne[2]); // [N, OC, OL]
|
5191
5148
|
|
5192
|
-
|
5193
|
-
|
5194
|
-
// result->src[0] = a;
|
5195
|
-
// result->src[1] = b;
|
5196
|
-
|
5197
|
-
// return result;
|
5198
|
-
// }
|
5149
|
+
return result;
|
5150
|
+
}
|
5199
5151
|
|
5200
5152
|
// ggml_conv_1d_ph
|
5201
5153
|
|
@@ -5258,7 +5210,7 @@ GGML_API struct ggml_tensor * ggml_conv_transpose_1d(
|
|
5258
5210
|
// a: [OC,IC, KH, KW]
|
5259
5211
|
// b: [N, IC, IH, IW]
|
5260
5212
|
// result: [N, OH, OW, IC*KH*KW]
|
5261
|
-
|
5213
|
+
struct ggml_tensor * ggml_im2col(
|
5262
5214
|
struct ggml_context * ctx,
|
5263
5215
|
struct ggml_tensor * a,
|
5264
5216
|
struct ggml_tensor * b,
|
@@ -5267,9 +5219,14 @@ static struct ggml_tensor * ggml_conv_2d_stage_0(
|
|
5267
5219
|
int p0,
|
5268
5220
|
int p1,
|
5269
5221
|
int d0,
|
5270
|
-
int d1
|
5222
|
+
int d1,
|
5223
|
+
bool is_2D) {
|
5271
5224
|
|
5272
|
-
|
5225
|
+
if(is_2D) {
|
5226
|
+
GGML_ASSERT(a->ne[2] == b->ne[2]);
|
5227
|
+
} else {
|
5228
|
+
GGML_ASSERT(a->ne[1] == b->ne[1]);
|
5229
|
+
}
|
5273
5230
|
bool is_node = false;
|
5274
5231
|
|
5275
5232
|
if (a->grad || b->grad) {
|
@@ -5277,81 +5234,51 @@ static struct ggml_tensor * ggml_conv_2d_stage_0(
|
|
5277
5234
|
is_node = true;
|
5278
5235
|
}
|
5279
5236
|
|
5280
|
-
const int64_t OH = ggml_calc_conv_output_size(b->ne[1], a->ne[1], s1, p1, d1);
|
5281
|
-
const int64_t OW =
|
5237
|
+
const int64_t OH = is_2D ? ggml_calc_conv_output_size(b->ne[1], a->ne[1], s1, p1, d1) : 0;
|
5238
|
+
const int64_t OW = ggml_calc_conv_output_size(b->ne[0], a->ne[0], s0, p0, d0);
|
5282
5239
|
|
5283
5240
|
const int64_t ne[4] = {
|
5284
|
-
a->ne[2] * a->ne[1] * a->ne[0],
|
5241
|
+
is_2D ? (a->ne[2] * a->ne[1] * a->ne[0]) : a->ne[1] * a->ne[0],
|
5285
5242
|
OW,
|
5286
|
-
OH,
|
5287
|
-
b->ne[3],
|
5243
|
+
is_2D ? OH : b->ne[2],
|
5244
|
+
is_2D ? b->ne[3] : 1,
|
5288
5245
|
};
|
5289
|
-
struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F16, 4, ne);
|
5290
5246
|
|
5291
|
-
|
5247
|
+
struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F16, 4, ne);
|
5248
|
+
int32_t params[] = { s0, s1, p0, p1, d0, d1, (is_2D ? 1 : 0) };
|
5292
5249
|
ggml_set_op_params(result, params, sizeof(params));
|
5293
5250
|
|
5294
|
-
result->op =
|
5295
|
-
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
5296
|
-
result->src[0] = a;
|
5297
|
-
result->src[1] = b;
|
5298
|
-
|
5299
|
-
return result;
|
5300
|
-
|
5301
|
-
}
|
5302
|
-
|
5303
|
-
// gemm: [N, OC, OH, OW] = [OC, IC * KH * KW] x [N*OH*OW, IC * KH * KW]
|
5304
|
-
// a: [OC, IC, KH, KW]
|
5305
|
-
// b: [N, OH, OW, IC * KH * KW]
|
5306
|
-
// result: [N, OC, OH, OW]
|
5307
|
-
static struct ggml_tensor * ggml_conv_2d_stage_1(
|
5308
|
-
struct ggml_context * ctx,
|
5309
|
-
struct ggml_tensor * a,
|
5310
|
-
struct ggml_tensor * b) {
|
5311
|
-
|
5312
|
-
bool is_node = false;
|
5313
|
-
|
5314
|
-
if (a->grad || b->grad) {
|
5315
|
-
GGML_ASSERT(false); // TODO: implement backward
|
5316
|
-
is_node = true;
|
5317
|
-
}
|
5318
|
-
|
5319
|
-
const int64_t ne[4] = {
|
5320
|
-
b->ne[1],
|
5321
|
-
b->ne[2],
|
5322
|
-
a->ne[3],
|
5323
|
-
b->ne[3],
|
5324
|
-
};
|
5325
|
-
struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
|
5326
|
-
|
5327
|
-
result->op = GGML_OP_CONV_2D_STAGE_1;
|
5251
|
+
result->op = GGML_OP_IM2COL;
|
5328
5252
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
5329
5253
|
result->src[0] = a;
|
5330
5254
|
result->src[1] = b;
|
5331
5255
|
|
5332
5256
|
return result;
|
5333
|
-
|
5334
5257
|
}
|
5335
5258
|
|
5336
5259
|
// a: [OC,IC, KH, KW]
|
5337
5260
|
// b: [N, IC, IH, IW]
|
5338
5261
|
// result: [N, OC, OH, OW]
|
5339
5262
|
struct ggml_tensor * ggml_conv_2d(
|
5340
|
-
|
5341
|
-
|
5342
|
-
|
5343
|
-
|
5344
|
-
|
5345
|
-
|
5346
|
-
|
5347
|
-
|
5348
|
-
|
5263
|
+
struct ggml_context * ctx,
|
5264
|
+
struct ggml_tensor * a,
|
5265
|
+
struct ggml_tensor * b,
|
5266
|
+
int s0,
|
5267
|
+
int s1,
|
5268
|
+
int p0,
|
5269
|
+
int p1,
|
5270
|
+
int d0,
|
5271
|
+
int d1) {
|
5272
|
+
struct ggml_tensor * im2col = ggml_im2col(ctx, a, b, s0, s1, p0, p1, d0, d1, true); // [N, OH, OW, IC * KH * KW]
|
5349
5273
|
|
5350
|
-
struct ggml_tensor * result =
|
5351
|
-
|
5274
|
+
struct ggml_tensor * result =
|
5275
|
+
ggml_mul_mat(ctx,
|
5276
|
+
ggml_reshape_2d(ctx, im2col, im2col->ne[0], im2col->ne[3] * im2col->ne[2] * im2col->ne[1]), // [N, OH, OW, IC * KH * KW] => [N*OH*OW, IC * KH * KW]
|
5277
|
+
ggml_reshape_2d(ctx, a, (a->ne[0] * a->ne[1] * a->ne[2]), a->ne[3])); // [OC,IC, KH, KW] => [OC, IC * KH * KW]
|
5352
5278
|
|
5353
|
-
|
5279
|
+
result = ggml_reshape_4d(ctx, result, im2col->ne[1], im2col->ne[2], a->ne[3], im2col->ne[3]); // [N, OC, OH, OW]
|
5354
5280
|
|
5281
|
+
return result;
|
5355
5282
|
}
|
5356
5283
|
|
5357
5284
|
// ggml_conv_2d_sk_p0
|
@@ -5411,7 +5338,7 @@ struct ggml_tensor * ggml_conv_transpose_2d_p0(
|
|
5411
5338
|
|
5412
5339
|
// ggml_pool_*
|
5413
5340
|
|
5414
|
-
static int64_t ggml_calc_pool_output_size(int64_t ins, int ks, int s,
|
5341
|
+
static int64_t ggml_calc_pool_output_size(int64_t ins, int ks, int s, float p) {
|
5415
5342
|
return (ins + 2 * p - ks) / s + 1;
|
5416
5343
|
}
|
5417
5344
|
|
@@ -5458,8 +5385,8 @@ struct ggml_tensor * ggml_pool_2d(
|
|
5458
5385
|
int k1,
|
5459
5386
|
int s0,
|
5460
5387
|
int s1,
|
5461
|
-
|
5462
|
-
|
5388
|
+
float p0,
|
5389
|
+
float p1) {
|
5463
5390
|
|
5464
5391
|
bool is_node = false;
|
5465
5392
|
|
@@ -8921,6 +8848,48 @@ static void ggml_compute_forward_silu(
|
|
8921
8848
|
}
|
8922
8849
|
}
|
8923
8850
|
|
8851
|
+
// ggml_compute_forward_leaky
|
8852
|
+
|
8853
|
+
static void ggml_compute_forward_leaky_f32(
|
8854
|
+
const struct ggml_compute_params * params,
|
8855
|
+
const struct ggml_tensor * src0,
|
8856
|
+
struct ggml_tensor * dst) {
|
8857
|
+
assert(params->ith == 0);
|
8858
|
+
assert(ggml_are_same_shape(src0, dst));
|
8859
|
+
|
8860
|
+
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
8861
|
+
return;
|
8862
|
+
}
|
8863
|
+
|
8864
|
+
const int n = ggml_nrows(src0);
|
8865
|
+
const int nc = src0->ne[0];
|
8866
|
+
|
8867
|
+
assert(dst->nb[0] == sizeof(float));
|
8868
|
+
assert(src0->nb[0] == sizeof(float));
|
8869
|
+
|
8870
|
+
for (int i = 0; i < n; i++) {
|
8871
|
+
ggml_vec_leaky_f32(nc,
|
8872
|
+
(float *) ((char *) dst->data + i*( dst->nb[1])),
|
8873
|
+
(float *) ((char *) src0->data + i*(src0->nb[1])));
|
8874
|
+
}
|
8875
|
+
}
|
8876
|
+
|
8877
|
+
static void ggml_compute_forward_leaky(
|
8878
|
+
const struct ggml_compute_params * params,
|
8879
|
+
const struct ggml_tensor * src0,
|
8880
|
+
struct ggml_tensor * dst) {
|
8881
|
+
switch (src0->type) {
|
8882
|
+
case GGML_TYPE_F32:
|
8883
|
+
{
|
8884
|
+
ggml_compute_forward_leaky_f32(params, src0, dst);
|
8885
|
+
} break;
|
8886
|
+
default:
|
8887
|
+
{
|
8888
|
+
GGML_ASSERT(false);
|
8889
|
+
} break;
|
8890
|
+
}
|
8891
|
+
}
|
8892
|
+
|
8924
8893
|
// ggml_compute_forward_silu_back
|
8925
8894
|
|
8926
8895
|
static void ggml_compute_forward_silu_back_f32(
|
@@ -9404,6 +9373,8 @@ static bool ggml_compute_forward_mul_mat_use_blas(
|
|
9404
9373
|
// TODO: find the optimal values for these
|
9405
9374
|
if (ggml_is_contiguous(src0) &&
|
9406
9375
|
ggml_is_contiguous(src1) &&
|
9376
|
+
src0->type == GGML_TYPE_F32 &&
|
9377
|
+
src1->type == GGML_TYPE_F32 &&
|
9407
9378
|
(ne0 >= 32 && ne1 >= 32 && ne10 >= 32)) {
|
9408
9379
|
|
9409
9380
|
/*printf("BLAS: %d %d %d %d %d\n", ne0, ne1, ne10, ne00, ne01);*/
|
@@ -9442,7 +9413,7 @@ static void ggml_compute_forward_mul_mat(
|
|
9442
9413
|
|
9443
9414
|
// we don't support permuted src0 or src1
|
9444
9415
|
GGML_ASSERT(nb00 == ggml_type_size(type));
|
9445
|
-
GGML_ASSERT(nb10 ==
|
9416
|
+
GGML_ASSERT(nb10 == ggml_type_size(src1->type));
|
9446
9417
|
|
9447
9418
|
// dst cannot be transposed or permuted
|
9448
9419
|
GGML_ASSERT(nb0 == sizeof(float));
|
@@ -11340,9 +11311,9 @@ static void ggml_compute_forward_rope_back(
|
|
11340
11311
|
}
|
11341
11312
|
}
|
11342
11313
|
|
11343
|
-
//
|
11314
|
+
// ggml_compute_forward_conv_transpose_1d
|
11344
11315
|
|
11345
|
-
static void
|
11316
|
+
static void ggml_compute_forward_conv_transpose_1d_f16_f32(
|
11346
11317
|
const struct ggml_compute_params * params,
|
11347
11318
|
const struct ggml_tensor * src0,
|
11348
11319
|
const struct ggml_tensor * src1,
|
@@ -11359,14 +11330,7 @@ static void ggml_compute_forward_conv_1d_f16_f32(
|
|
11359
11330
|
const int ith = params->ith;
|
11360
11331
|
const int nth = params->nth;
|
11361
11332
|
|
11362
|
-
const int nk = ne00;
|
11363
|
-
|
11364
|
-
// size of the convolution row - the kernel size unrolled across all input channels
|
11365
|
-
const int ew0 = nk*ne01;
|
11366
|
-
|
11367
|
-
const int32_t s0 = ((const int32_t*)(dst->op_params))[0];
|
11368
|
-
const int32_t p0 = ((const int32_t*)(dst->op_params))[1];
|
11369
|
-
const int32_t d0 = ((const int32_t*)(dst->op_params))[2];
|
11333
|
+
const int nk = ne00*ne01*ne02;
|
11370
11334
|
|
11371
11335
|
GGML_ASSERT(nb00 == sizeof(ggml_fp16_t));
|
11372
11336
|
GGML_ASSERT(nb10 == sizeof(float));
|
@@ -11374,23 +11338,37 @@ static void ggml_compute_forward_conv_1d_f16_f32(
|
|
11374
11338
|
if (params->type == GGML_TASK_INIT) {
|
11375
11339
|
memset(params->wdata, 0, params->wsize);
|
11376
11340
|
|
11377
|
-
|
11341
|
+
// permute kernel data (src0) from (K x Cout x Cin) to (Cin x K x Cout)
|
11342
|
+
{
|
11343
|
+
ggml_fp16_t * const wdata = (ggml_fp16_t *) params->wdata + 0;
|
11344
|
+
|
11345
|
+
for (int64_t i02 = 0; i02 < ne02; i02++) {
|
11346
|
+
for (int64_t i01 = 0; i01 < ne01; i01++) {
|
11347
|
+
const ggml_fp16_t * const src = (ggml_fp16_t *)((char *) src0->data + i02*nb02 + i01*nb01);
|
11348
|
+
ggml_fp16_t * dst_data = wdata + i01*ne00*ne02;
|
11349
|
+
for (int64_t i00 = 0; i00 < ne00; i00++) {
|
11350
|
+
dst_data[i00*ne02 + i02] = src[i00];
|
11351
|
+
}
|
11352
|
+
}
|
11353
|
+
}
|
11354
|
+
}
|
11378
11355
|
|
11379
|
-
|
11380
|
-
|
11356
|
+
// permute source data (src1) from (L x Cin) to (Cin x L)
|
11357
|
+
{
|
11358
|
+
ggml_fp16_t * const wdata = (ggml_fp16_t *) params->wdata + nk;
|
11381
11359
|
ggml_fp16_t * dst_data = wdata;
|
11382
11360
|
|
11383
|
-
for (int64_t
|
11384
|
-
|
11385
|
-
|
11386
|
-
|
11387
|
-
if(!(idx0 < 0 || idx0 >= ne10)) {
|
11388
|
-
dst_data[i0*ew0 + i11*nk + ik] = GGML_FP32_TO_FP16(src[idx0]);
|
11389
|
-
}
|
11361
|
+
for (int64_t i11 = 0; i11 < ne11; i11++) {
|
11362
|
+
const float * const src = (float *)((char *) src1->data + i11*nb11);
|
11363
|
+
for (int64_t i10 = 0; i10 < ne10; i10++) {
|
11364
|
+
dst_data[i10*ne11 + i11] = GGML_FP32_TO_FP16(src[i10]);
|
11390
11365
|
}
|
11391
11366
|
}
|
11392
11367
|
}
|
11393
11368
|
|
11369
|
+
// need to zero dst since we are accumulating into it
|
11370
|
+
memset(dst->data, 0, ggml_nbytes(dst));
|
11371
|
+
|
11394
11372
|
return;
|
11395
11373
|
}
|
11396
11374
|
|
@@ -11398,8 +11376,10 @@ static void ggml_compute_forward_conv_1d_f16_f32(
|
|
11398
11376
|
return;
|
11399
11377
|
}
|
11400
11378
|
|
11379
|
+
const int32_t s0 = ((const int32_t*)(dst->op_params))[0];
|
11380
|
+
|
11401
11381
|
// total rows in dst
|
11402
|
-
const int nr =
|
11382
|
+
const int nr = ne1;
|
11403
11383
|
|
11404
11384
|
// rows per thread
|
11405
11385
|
const int dr = (nr + nth - 1)/nth;
|
@@ -11408,22 +11388,26 @@ static void ggml_compute_forward_conv_1d_f16_f32(
|
|
11408
11388
|
const int ir0 = dr*ith;
|
11409
11389
|
const int ir1 = MIN(ir0 + dr, nr);
|
11410
11390
|
|
11411
|
-
ggml_fp16_t * const wdata
|
11412
|
-
|
11413
|
-
for (int i2 = 0; i2 < ne2; i2++) {
|
11414
|
-
for (int i1 = ir0; i1 < ir1; i1++) {
|
11415
|
-
float * dst_data = (float *)((char *) dst->data + i2*nb2 + i1*nb1);
|
11391
|
+
ggml_fp16_t * const wdata = (ggml_fp16_t *) params->wdata + 0;
|
11392
|
+
ggml_fp16_t * const wdata_src = wdata + nk;
|
11416
11393
|
|
11417
|
-
|
11418
|
-
|
11419
|
-
|
11420
|
-
|
11394
|
+
for (int i1 = ir0; i1 < ir1; i1++) {
|
11395
|
+
float * dst_data = (float *)((char *) dst->data + i1*nb1);
|
11396
|
+
ggml_fp16_t * wdata_kernel = wdata + i1*ne02*ne00;
|
11397
|
+
for (int i10 = 0; i10 < ne10; i10++) {
|
11398
|
+
const int i1n = i10*ne11;
|
11399
|
+
for (int i00 = 0; i00 < ne00; i00++) {
|
11400
|
+
float v = 0;
|
11401
|
+
ggml_vec_dot_f16(ne02, &v,
|
11402
|
+
(ggml_fp16_t *) wdata_src + i1n,
|
11403
|
+
(ggml_fp16_t *) wdata_kernel + i00*ne02);
|
11404
|
+
dst_data[i10*s0 + i00] += v;
|
11421
11405
|
}
|
11422
11406
|
}
|
11423
11407
|
}
|
11424
11408
|
}
|
11425
11409
|
|
11426
|
-
static void
|
11410
|
+
static void ggml_compute_forward_conv_transpose_1d_f32(
|
11427
11411
|
const struct ggml_compute_params * params,
|
11428
11412
|
const struct ggml_tensor * src0,
|
11429
11413
|
const struct ggml_tensor * src1,
|
@@ -11440,13 +11424,7 @@ static void ggml_compute_forward_conv_1d_f32(
|
|
11440
11424
|
const int ith = params->ith;
|
11441
11425
|
const int nth = params->nth;
|
11442
11426
|
|
11443
|
-
const int nk = ne00;
|
11444
|
-
|
11445
|
-
const int ew0 = nk*ne01;
|
11446
|
-
|
11447
|
-
const int32_t s0 = ((const int32_t*)(dst->op_params))[0];
|
11448
|
-
const int32_t p0 = ((const int32_t*)(dst->op_params))[1];
|
11449
|
-
const int32_t d0 = ((const int32_t*)(dst->op_params))[2];
|
11427
|
+
const int nk = ne00*ne01*ne02;
|
11450
11428
|
|
11451
11429
|
GGML_ASSERT(nb00 == sizeof(float));
|
11452
11430
|
GGML_ASSERT(nb10 == sizeof(float));
|
@@ -11454,23 +11432,37 @@ static void ggml_compute_forward_conv_1d_f32(
|
|
11454
11432
|
if (params->type == GGML_TASK_INIT) {
|
11455
11433
|
memset(params->wdata, 0, params->wsize);
|
11456
11434
|
|
11457
|
-
|
11435
|
+
// prepare kernel data (src0) from (K x Cout x Cin) to (Cin x K x Cout)
|
11436
|
+
{
|
11437
|
+
float * const wdata = (float *) params->wdata + 0;
|
11438
|
+
|
11439
|
+
for (int64_t i02 = 0; i02 < ne02; i02++) {
|
11440
|
+
for (int64_t i01 = 0; i01 < ne01; i01++) {
|
11441
|
+
const float * const src = (float *)((char *) src0->data + i02*nb02 + i01*nb01);
|
11442
|
+
float * dst_data = wdata + i01*ne00*ne02;
|
11443
|
+
for (int64_t i00 = 0; i00 < ne00; i00++) {
|
11444
|
+
dst_data[i00*ne02 + i02] = src[i00];
|
11445
|
+
}
|
11446
|
+
}
|
11447
|
+
}
|
11448
|
+
}
|
11458
11449
|
|
11459
|
-
|
11460
|
-
|
11450
|
+
// prepare source data (src1)
|
11451
|
+
{
|
11452
|
+
float * const wdata = (float *) params->wdata + nk;
|
11461
11453
|
float * dst_data = wdata;
|
11462
11454
|
|
11463
|
-
for (int64_t
|
11464
|
-
|
11465
|
-
|
11466
|
-
|
11467
|
-
if(!(idx0 < 0 || idx0 >= ne10)) {
|
11468
|
-
dst_data[i0*ew0 + i11*nk + ik] = src[idx0];
|
11469
|
-
}
|
11455
|
+
for (int64_t i11 = 0; i11 < ne11; i11++) {
|
11456
|
+
const float * const src = (float *)((char *) src1->data + i11*nb11);
|
11457
|
+
for (int64_t i10 = 0; i10 < ne10; i10++) {
|
11458
|
+
dst_data[i10*ne11 + i11] = src[i10];
|
11470
11459
|
}
|
11471
11460
|
}
|
11472
11461
|
}
|
11473
11462
|
|
11463
|
+
// need to zero dst since we are accumulating into it
|
11464
|
+
memset(dst->data, 0, ggml_nbytes(dst));
|
11465
|
+
|
11474
11466
|
return;
|
11475
11467
|
}
|
11476
11468
|
|
@@ -11478,8 +11470,10 @@ static void ggml_compute_forward_conv_1d_f32(
|
|
11478
11470
|
return;
|
11479
11471
|
}
|
11480
11472
|
|
11473
|
+
const int32_t s0 = ((const int32_t*)(dst->op_params))[0];
|
11474
|
+
|
11481
11475
|
// total rows in dst
|
11482
|
-
const int nr =
|
11476
|
+
const int nr = ne1;
|
11483
11477
|
|
11484
11478
|
// rows per thread
|
11485
11479
|
const int dr = (nr + nth - 1)/nth;
|
@@ -11488,441 +11482,8 @@ static void ggml_compute_forward_conv_1d_f32(
|
|
11488
11482
|
const int ir0 = dr*ith;
|
11489
11483
|
const int ir1 = MIN(ir0 + dr, nr);
|
11490
11484
|
|
11491
|
-
float * const wdata
|
11492
|
-
|
11493
|
-
for (int i2 = 0; i2 < ne2; i2++) {
|
11494
|
-
for (int i1 = ir0; i1 < ir1; i1++) {
|
11495
|
-
float * dst_data = (float *)((char *) dst->data + i2*nb2 + i1*nb1);
|
11496
|
-
|
11497
|
-
for (int i0 = 0; i0 < ne0; i0++) {
|
11498
|
-
ggml_vec_dot_f32(ew0, dst_data + i0,
|
11499
|
-
(float *) ((char *) src0->data + i1*nb02),
|
11500
|
-
(float *) wdata + i2*nb2 + i0*ew0);
|
11501
|
-
}
|
11502
|
-
}
|
11503
|
-
}
|
11504
|
-
}
|
11505
|
-
|
11506
|
-
// TODO: reuse ggml_mul_mat or implement ggml_im2col and remove stage_0 and stage_1
|
11507
|
-
static void gemm_f16_out_f32(int64_t m, int64_t n, int64_t k,
|
11508
|
-
ggml_fp16_t * A,
|
11509
|
-
ggml_fp16_t * B,
|
11510
|
-
float * C,
|
11511
|
-
const int ith, const int nth) {
|
11512
|
-
// does not seem to make a difference
|
11513
|
-
int64_t m0, m1, n0, n1;
|
11514
|
-
// patches per thread
|
11515
|
-
if (m > n) {
|
11516
|
-
n0 = 0;
|
11517
|
-
n1 = n;
|
11518
|
-
|
11519
|
-
// total patches in dst
|
11520
|
-
const int np = m;
|
11521
|
-
|
11522
|
-
// patches per thread
|
11523
|
-
const int dp = (np + nth - 1)/nth;
|
11524
|
-
|
11525
|
-
// patch range for this thread
|
11526
|
-
m0 = dp*ith;
|
11527
|
-
m1 = MIN(m0 + dp, np);
|
11528
|
-
} else {
|
11529
|
-
m0 = 0;
|
11530
|
-
m1 = m;
|
11531
|
-
|
11532
|
-
// total patches in dst
|
11533
|
-
const int np = n;
|
11534
|
-
|
11535
|
-
// patches per thread
|
11536
|
-
const int dp = (np + nth - 1)/nth;
|
11537
|
-
|
11538
|
-
// patch range for this thread
|
11539
|
-
n0 = dp*ith;
|
11540
|
-
n1 = MIN(n0 + dp, np);
|
11541
|
-
}
|
11542
|
-
|
11543
|
-
// block-tiling attempt
|
11544
|
-
int64_t blck_n = 16;
|
11545
|
-
int64_t blck_m = 16;
|
11546
|
-
|
11547
|
-
// int64_t CACHE_SIZE = 2 * 1024 * 1024; // 2MB
|
11548
|
-
// int64_t blck_size = CACHE_SIZE / (sizeof(float) + 2 * sizeof(ggml_fp16_t) * K);
|
11549
|
-
// if (blck_size > 0) {
|
11550
|
-
// blck_0 = 4;
|
11551
|
-
// blck_1 = blck_size / blck_0;
|
11552
|
-
// if (blck_1 < 0) {
|
11553
|
-
// blck_1 = 1;
|
11554
|
-
// }
|
11555
|
-
// // blck_0 = (int64_t)sqrt(blck_size);
|
11556
|
-
// // blck_1 = blck_0;
|
11557
|
-
// }
|
11558
|
-
// // printf("%zd %zd %zd %zd\n", blck_size, K, blck_0, blck_1);
|
11559
|
-
|
11560
|
-
for (int j = n0; j < n1; j+=blck_n) {
|
11561
|
-
for (int i = m0; i < m1; i+=blck_m) {
|
11562
|
-
// printf("i j k => %d %d %d\n", i, j, K);
|
11563
|
-
for (int ii = i; ii < i + blck_m && ii < m1; ii++) {
|
11564
|
-
for (int jj = j; jj < j + blck_n && jj < n1; jj++) {
|
11565
|
-
ggml_vec_dot_f16(k,
|
11566
|
-
C + ii*n + jj,
|
11567
|
-
A + ii * k,
|
11568
|
-
B + jj * k);
|
11569
|
-
}
|
11570
|
-
}
|
11571
|
-
}
|
11572
|
-
}
|
11573
|
-
}
|
11574
|
-
|
11575
|
-
// src0: kernel [OC, IC, K]
|
11576
|
-
// src1: signal [N, IC, IL]
|
11577
|
-
// dst: result [N, OL, IC*K]
|
11578
|
-
static void ggml_compute_forward_conv_1d_stage_0_f32(
|
11579
|
-
const struct ggml_compute_params * params,
|
11580
|
-
const struct ggml_tensor * src0,
|
11581
|
-
const struct ggml_tensor * src1,
|
11582
|
-
struct ggml_tensor * dst) {
|
11583
|
-
GGML_ASSERT(src0->type == GGML_TYPE_F16);
|
11584
|
-
GGML_ASSERT(src1->type == GGML_TYPE_F32);
|
11585
|
-
GGML_ASSERT( dst->type == GGML_TYPE_F16);
|
11586
|
-
|
11587
|
-
int64_t t0 = ggml_perf_time_us();
|
11588
|
-
UNUSED(t0);
|
11589
|
-
|
11590
|
-
GGML_TENSOR_BINARY_OP_LOCALS;
|
11591
|
-
|
11592
|
-
const int64_t N = ne12;
|
11593
|
-
const int64_t IC = ne11;
|
11594
|
-
const int64_t IL = ne10;
|
11595
|
-
|
11596
|
-
const int64_t K = ne00;
|
11597
|
-
|
11598
|
-
const int64_t OL = ne1;
|
11599
|
-
|
11600
|
-
const int ith = params->ith;
|
11601
|
-
const int nth = params->nth;
|
11602
|
-
|
11603
|
-
const int32_t s0 = ((const int32_t*)(dst->op_params))[0];
|
11604
|
-
const int32_t p0 = ((const int32_t*)(dst->op_params))[1];
|
11605
|
-
const int32_t d0 = ((const int32_t*)(dst->op_params))[2];
|
11606
|
-
|
11607
|
-
GGML_ASSERT(nb00 == sizeof(ggml_fp16_t));
|
11608
|
-
GGML_ASSERT(nb10 == sizeof(float));
|
11609
|
-
|
11610
|
-
if (params->type == GGML_TASK_INIT) {
|
11611
|
-
memset(dst->data, 0, ggml_nbytes(dst));
|
11612
|
-
return;
|
11613
|
-
}
|
11614
|
-
|
11615
|
-
if (params->type == GGML_TASK_FINALIZE) {
|
11616
|
-
return;
|
11617
|
-
}
|
11618
|
-
|
11619
|
-
// im2col: [N, IC, IL] => [N, OL, IC*K]
|
11620
|
-
{
|
11621
|
-
ggml_fp16_t * const wdata = (ggml_fp16_t *) dst->data;
|
11622
|
-
|
11623
|
-
for (int64_t in = 0; in < N; in++) {
|
11624
|
-
for (int64_t iol = 0; iol < OL; iol++) {
|
11625
|
-
for (int64_t iic = ith; iic < IC; iic+=nth) {
|
11626
|
-
|
11627
|
-
// micro kernel
|
11628
|
-
ggml_fp16_t * dst_data = wdata + (in*OL + iol)*(IC*K); // [IC, K]
|
11629
|
-
const float * const src_data = (float *)((char *) src1->data + in*nb12 + iic*nb11); // [IL]
|
11630
|
-
|
11631
|
-
for (int64_t ik = 0; ik < K; ik++) {
|
11632
|
-
const int64_t iil = iol*s0 + ik*d0 - p0;
|
11633
|
-
|
11634
|
-
if (!(iil < 0 || iil >= IL)) {
|
11635
|
-
dst_data[iic*K + ik] = GGML_FP32_TO_FP16(src_data[iil]);
|
11636
|
-
}
|
11637
|
-
}
|
11638
|
-
}
|
11639
|
-
}
|
11640
|
-
}
|
11641
|
-
}
|
11642
|
-
}
|
11643
|
-
|
11644
|
-
// gemm: [N, OC, OL] = [OC, IC * K] x [N*OL, IC * K]
|
11645
|
-
// src0: [OC, IC, K]
|
11646
|
-
// src1: [N, OL, IC * K]
|
11647
|
-
// result: [N, OC, OL]
|
11648
|
-
static void ggml_compute_forward_conv_1d_stage_1_f16(
|
11649
|
-
const struct ggml_compute_params * params,
|
11650
|
-
const struct ggml_tensor * src0,
|
11651
|
-
const struct ggml_tensor * src1,
|
11652
|
-
struct ggml_tensor * dst) {
|
11653
|
-
GGML_ASSERT(src0->type == GGML_TYPE_F16);
|
11654
|
-
GGML_ASSERT(src1->type == GGML_TYPE_F16);
|
11655
|
-
GGML_ASSERT( dst->type == GGML_TYPE_F32);
|
11656
|
-
|
11657
|
-
int64_t t0 = ggml_perf_time_us();
|
11658
|
-
UNUSED(t0);
|
11659
|
-
|
11660
|
-
if (params->type == GGML_TASK_INIT) {
|
11661
|
-
return;
|
11662
|
-
}
|
11663
|
-
|
11664
|
-
if (params->type == GGML_TASK_FINALIZE) {
|
11665
|
-
return;
|
11666
|
-
}
|
11667
|
-
|
11668
|
-
GGML_TENSOR_BINARY_OP_LOCALS;
|
11669
|
-
|
11670
|
-
GGML_ASSERT(nb00 == sizeof(ggml_fp16_t));
|
11671
|
-
GGML_ASSERT(nb10 == sizeof(ggml_fp16_t));
|
11672
|
-
GGML_ASSERT(nb0 == sizeof(float));
|
11673
|
-
|
11674
|
-
const int N = ne12;
|
11675
|
-
const int OL = ne11;
|
11676
|
-
|
11677
|
-
const int OC = ne02;
|
11678
|
-
const int IC = ne01;
|
11679
|
-
const int K = ne00;
|
11680
|
-
|
11681
|
-
const int ith = params->ith;
|
11682
|
-
const int nth = params->nth;
|
11683
|
-
|
11684
|
-
int64_t m = OC;
|
11685
|
-
int64_t n = OL;
|
11686
|
-
int64_t k = IC * K;
|
11687
|
-
|
11688
|
-
// [N, OC, OL] = [OC, IC * K] x [N*OL, IC * K]
|
11689
|
-
for (int i = 0; i < N; i++) {
|
11690
|
-
ggml_fp16_t * A = (ggml_fp16_t *)src0->data; // [m, k]
|
11691
|
-
ggml_fp16_t * B = (ggml_fp16_t *)src1->data + i * m * k; // [n, k]
|
11692
|
-
float * C = (float *)dst->data + i * m * n; // [m, n]
|
11693
|
-
|
11694
|
-
gemm_f16_out_f32(m, n, k, A, B, C, ith, nth);
|
11695
|
-
}
|
11696
|
-
}
|
11697
|
-
|
11698
|
-
static void ggml_compute_forward_conv_1d(
|
11699
|
-
const struct ggml_compute_params * params,
|
11700
|
-
const struct ggml_tensor * src0,
|
11701
|
-
const struct ggml_tensor * src1,
|
11702
|
-
struct ggml_tensor * dst) {
|
11703
|
-
switch(src0->type) {
|
11704
|
-
case GGML_TYPE_F16:
|
11705
|
-
{
|
11706
|
-
ggml_compute_forward_conv_1d_f16_f32(params, src0, src1, dst);
|
11707
|
-
} break;
|
11708
|
-
case GGML_TYPE_F32:
|
11709
|
-
{
|
11710
|
-
ggml_compute_forward_conv_1d_f32(params, src0, src1, dst);
|
11711
|
-
} break;
|
11712
|
-
default:
|
11713
|
-
{
|
11714
|
-
GGML_ASSERT(false);
|
11715
|
-
} break;
|
11716
|
-
}
|
11717
|
-
}
|
11718
|
-
|
11719
|
-
static void ggml_compute_forward_conv_1d_stage_0(
|
11720
|
-
const struct ggml_compute_params * params,
|
11721
|
-
const struct ggml_tensor * src0,
|
11722
|
-
const struct ggml_tensor * src1,
|
11723
|
-
struct ggml_tensor * dst) {
|
11724
|
-
switch(src0->type) {
|
11725
|
-
case GGML_TYPE_F16:
|
11726
|
-
{
|
11727
|
-
ggml_compute_forward_conv_1d_stage_0_f32(params, src0, src1, dst);
|
11728
|
-
} break;
|
11729
|
-
default:
|
11730
|
-
{
|
11731
|
-
GGML_ASSERT(false);
|
11732
|
-
} break;
|
11733
|
-
}
|
11734
|
-
}
|
11735
|
-
|
11736
|
-
static void ggml_compute_forward_conv_1d_stage_1(
|
11737
|
-
const struct ggml_compute_params * params,
|
11738
|
-
const struct ggml_tensor * src0,
|
11739
|
-
const struct ggml_tensor * src1,
|
11740
|
-
struct ggml_tensor * dst) {
|
11741
|
-
switch(src0->type) {
|
11742
|
-
case GGML_TYPE_F16:
|
11743
|
-
{
|
11744
|
-
ggml_compute_forward_conv_1d_stage_1_f16(params, src0, src1, dst);
|
11745
|
-
} break;
|
11746
|
-
default:
|
11747
|
-
{
|
11748
|
-
GGML_ASSERT(false);
|
11749
|
-
} break;
|
11750
|
-
}
|
11751
|
-
}
|
11752
|
-
|
11753
|
-
// ggml_compute_forward_conv_transpose_1d
|
11754
|
-
|
11755
|
-
static void ggml_compute_forward_conv_transpose_1d_f16_f32(
|
11756
|
-
const struct ggml_compute_params * params,
|
11757
|
-
const struct ggml_tensor * src0,
|
11758
|
-
const struct ggml_tensor * src1,
|
11759
|
-
struct ggml_tensor * dst) {
|
11760
|
-
GGML_ASSERT(src0->type == GGML_TYPE_F16);
|
11761
|
-
GGML_ASSERT(src1->type == GGML_TYPE_F32);
|
11762
|
-
GGML_ASSERT( dst->type == GGML_TYPE_F32);
|
11763
|
-
|
11764
|
-
int64_t t0 = ggml_perf_time_us();
|
11765
|
-
UNUSED(t0);
|
11766
|
-
|
11767
|
-
GGML_TENSOR_BINARY_OP_LOCALS
|
11768
|
-
|
11769
|
-
const int ith = params->ith;
|
11770
|
-
const int nth = params->nth;
|
11771
|
-
|
11772
|
-
const int nk = ne00*ne01*ne02;
|
11773
|
-
|
11774
|
-
GGML_ASSERT(nb00 == sizeof(ggml_fp16_t));
|
11775
|
-
GGML_ASSERT(nb10 == sizeof(float));
|
11776
|
-
|
11777
|
-
if (params->type == GGML_TASK_INIT) {
|
11778
|
-
memset(params->wdata, 0, params->wsize);
|
11779
|
-
|
11780
|
-
// permute kernel data (src0) from (K x Cout x Cin) to (Cin x K x Cout)
|
11781
|
-
{
|
11782
|
-
ggml_fp16_t * const wdata = (ggml_fp16_t *) params->wdata + 0;
|
11783
|
-
|
11784
|
-
for (int64_t i02 = 0; i02 < ne02; i02++) {
|
11785
|
-
for (int64_t i01 = 0; i01 < ne01; i01++) {
|
11786
|
-
const ggml_fp16_t * const src = (ggml_fp16_t *)((char *) src0->data + i02*nb02 + i01*nb01);
|
11787
|
-
ggml_fp16_t * dst_data = wdata + i01*ne00*ne02;
|
11788
|
-
for (int64_t i00 = 0; i00 < ne00; i00++) {
|
11789
|
-
dst_data[i00*ne02 + i02] = src[i00];
|
11790
|
-
}
|
11791
|
-
}
|
11792
|
-
}
|
11793
|
-
}
|
11794
|
-
|
11795
|
-
// permute source data (src1) from (L x Cin) to (Cin x L)
|
11796
|
-
{
|
11797
|
-
ggml_fp16_t * const wdata = (ggml_fp16_t *) params->wdata + nk;
|
11798
|
-
ggml_fp16_t * dst_data = wdata;
|
11799
|
-
|
11800
|
-
for (int64_t i11 = 0; i11 < ne11; i11++) {
|
11801
|
-
const float * const src = (float *)((char *) src1->data + i11*nb11);
|
11802
|
-
for (int64_t i10 = 0; i10 < ne10; i10++) {
|
11803
|
-
dst_data[i10*ne11 + i11] = GGML_FP32_TO_FP16(src[i10]);
|
11804
|
-
}
|
11805
|
-
}
|
11806
|
-
}
|
11807
|
-
|
11808
|
-
// need to zero dst since we are accumulating into it
|
11809
|
-
memset(dst->data, 0, ggml_nbytes(dst));
|
11810
|
-
|
11811
|
-
return;
|
11812
|
-
}
|
11813
|
-
|
11814
|
-
if (params->type == GGML_TASK_FINALIZE) {
|
11815
|
-
return;
|
11816
|
-
}
|
11817
|
-
|
11818
|
-
const int32_t s0 = ((const int32_t*)(dst->op_params))[0];
|
11819
|
-
|
11820
|
-
// total rows in dst
|
11821
|
-
const int nr = ne1;
|
11822
|
-
|
11823
|
-
// rows per thread
|
11824
|
-
const int dr = (nr + nth - 1)/nth;
|
11825
|
-
|
11826
|
-
// row range for this thread
|
11827
|
-
const int ir0 = dr*ith;
|
11828
|
-
const int ir1 = MIN(ir0 + dr, nr);
|
11829
|
-
|
11830
|
-
ggml_fp16_t * const wdata = (ggml_fp16_t *) params->wdata + 0;
|
11831
|
-
ggml_fp16_t * const wdata_src = wdata + nk;
|
11832
|
-
|
11833
|
-
for (int i1 = ir0; i1 < ir1; i1++) {
|
11834
|
-
float * dst_data = (float *)((char *) dst->data + i1*nb1);
|
11835
|
-
ggml_fp16_t * wdata_kernel = wdata + i1*ne02*ne00;
|
11836
|
-
for (int i10 = 0; i10 < ne10; i10++) {
|
11837
|
-
const int i1n = i10*ne11;
|
11838
|
-
for (int i00 = 0; i00 < ne00; i00++) {
|
11839
|
-
float v = 0;
|
11840
|
-
ggml_vec_dot_f16(ne02, &v,
|
11841
|
-
(ggml_fp16_t *) wdata_src + i1n,
|
11842
|
-
(ggml_fp16_t *) wdata_kernel + i00*ne02);
|
11843
|
-
dst_data[i10*s0 + i00] += v;
|
11844
|
-
}
|
11845
|
-
}
|
11846
|
-
}
|
11847
|
-
}
|
11848
|
-
|
11849
|
-
static void ggml_compute_forward_conv_transpose_1d_f32(
|
11850
|
-
const struct ggml_compute_params * params,
|
11851
|
-
const struct ggml_tensor * src0,
|
11852
|
-
const struct ggml_tensor * src1,
|
11853
|
-
struct ggml_tensor * dst) {
|
11854
|
-
GGML_ASSERT(src0->type == GGML_TYPE_F32);
|
11855
|
-
GGML_ASSERT(src1->type == GGML_TYPE_F32);
|
11856
|
-
GGML_ASSERT( dst->type == GGML_TYPE_F32);
|
11857
|
-
|
11858
|
-
int64_t t0 = ggml_perf_time_us();
|
11859
|
-
UNUSED(t0);
|
11860
|
-
|
11861
|
-
GGML_TENSOR_BINARY_OP_LOCALS
|
11862
|
-
|
11863
|
-
const int ith = params->ith;
|
11864
|
-
const int nth = params->nth;
|
11865
|
-
|
11866
|
-
const int nk = ne00*ne01*ne02;
|
11867
|
-
|
11868
|
-
GGML_ASSERT(nb00 == sizeof(float));
|
11869
|
-
GGML_ASSERT(nb10 == sizeof(float));
|
11870
|
-
|
11871
|
-
if (params->type == GGML_TASK_INIT) {
|
11872
|
-
memset(params->wdata, 0, params->wsize);
|
11873
|
-
|
11874
|
-
// prepare kernel data (src0) from (K x Cout x Cin) to (Cin x K x Cout)
|
11875
|
-
{
|
11876
|
-
float * const wdata = (float *) params->wdata + 0;
|
11877
|
-
|
11878
|
-
for (int64_t i02 = 0; i02 < ne02; i02++) {
|
11879
|
-
for (int64_t i01 = 0; i01 < ne01; i01++) {
|
11880
|
-
const float * const src = (float *)((char *) src0->data + i02*nb02 + i01*nb01);
|
11881
|
-
float * dst_data = wdata + i01*ne00*ne02;
|
11882
|
-
for (int64_t i00 = 0; i00 < ne00; i00++) {
|
11883
|
-
dst_data[i00*ne02 + i02] = src[i00];
|
11884
|
-
}
|
11885
|
-
}
|
11886
|
-
}
|
11887
|
-
}
|
11888
|
-
|
11889
|
-
// prepare source data (src1)
|
11890
|
-
{
|
11891
|
-
float * const wdata = (float *) params->wdata + nk;
|
11892
|
-
float * dst_data = wdata;
|
11893
|
-
|
11894
|
-
for (int64_t i11 = 0; i11 < ne11; i11++) {
|
11895
|
-
const float * const src = (float *)((char *) src1->data + i11*nb11);
|
11896
|
-
for (int64_t i10 = 0; i10 < ne10; i10++) {
|
11897
|
-
dst_data[i10*ne11 + i11] = src[i10];
|
11898
|
-
}
|
11899
|
-
}
|
11900
|
-
}
|
11901
|
-
|
11902
|
-
// need to zero dst since we are accumulating into it
|
11903
|
-
memset(dst->data, 0, ggml_nbytes(dst));
|
11904
|
-
|
11905
|
-
return;
|
11906
|
-
}
|
11907
|
-
|
11908
|
-
if (params->type == GGML_TASK_FINALIZE) {
|
11909
|
-
return;
|
11910
|
-
}
|
11911
|
-
|
11912
|
-
const int32_t s0 = ((const int32_t*)(dst->op_params))[0];
|
11913
|
-
|
11914
|
-
// total rows in dst
|
11915
|
-
const int nr = ne1;
|
11916
|
-
|
11917
|
-
// rows per thread
|
11918
|
-
const int dr = (nr + nth - 1)/nth;
|
11919
|
-
|
11920
|
-
// row range for this thread
|
11921
|
-
const int ir0 = dr*ith;
|
11922
|
-
const int ir1 = MIN(ir0 + dr, nr);
|
11923
|
-
|
11924
|
-
float * const wdata = (float *) params->wdata + 0;
|
11925
|
-
float * const wdata_src = wdata + nk;
|
11485
|
+
float * const wdata = (float *) params->wdata + 0;
|
11486
|
+
float * const wdata_src = wdata + nk;
|
11926
11487
|
|
11927
11488
|
for (int i1 = ir0; i1 < ir1; i1++) {
|
11928
11489
|
float * dst_data = (float *)((char *) dst->data + i1*nb1);
|
@@ -11961,12 +11522,10 @@ static void ggml_compute_forward_conv_transpose_1d(
|
|
11961
11522
|
}
|
11962
11523
|
}
|
11963
11524
|
|
11964
|
-
// ggml_compute_forward_conv_2d
|
11965
|
-
|
11966
11525
|
// src0: kernel [OC, IC, KH, KW]
|
11967
11526
|
// src1: image [N, IC, IH, IW]
|
11968
11527
|
// dst: result [N, OH, OW, IC*KH*KW]
|
11969
|
-
static void
|
11528
|
+
static void ggml_compute_forward_im2col_f16(
|
11970
11529
|
const struct ggml_compute_params * params,
|
11971
11530
|
const struct ggml_tensor * src0,
|
11972
11531
|
const struct ggml_tensor * src1,
|
@@ -11980,34 +11539,35 @@ static void ggml_compute_forward_conv_2d_stage_0_f32(
|
|
11980
11539
|
|
11981
11540
|
GGML_TENSOR_BINARY_OP_LOCALS;
|
11982
11541
|
|
11983
|
-
const
|
11984
|
-
const
|
11985
|
-
const
|
11542
|
+
const int32_t s0 = ((const int32_t *)(dst->op_params))[0];
|
11543
|
+
const int32_t s1 = ((const int32_t *)(dst->op_params))[1];
|
11544
|
+
const int32_t p0 = ((const int32_t *)(dst->op_params))[2];
|
11545
|
+
const int32_t p1 = ((const int32_t *)(dst->op_params))[3];
|
11546
|
+
const int32_t d0 = ((const int32_t *)(dst->op_params))[4];
|
11547
|
+
const int32_t d1 = ((const int32_t *)(dst->op_params))[5];
|
11548
|
+
const bool is_2D = ((const int32_t *)(dst->op_params))[6] == 1;
|
11549
|
+
|
11550
|
+
const int ith = params->ith;
|
11551
|
+
const int nth = params->nth;
|
11552
|
+
|
11553
|
+
const int64_t N = is_2D ? ne13 : ne12;
|
11554
|
+
const int64_t IC = is_2D ? ne12 : ne11;
|
11555
|
+
const int64_t IH = is_2D ? ne11 : 1;
|
11986
11556
|
const int64_t IW = ne10;
|
11987
11557
|
|
11988
|
-
|
11989
|
-
// const int64_t IC = ne02;
|
11990
|
-
const int64_t KH = ne01;
|
11558
|
+
const int64_t KH = is_2D ? ne01 : 1;
|
11991
11559
|
const int64_t KW = ne00;
|
11992
11560
|
|
11993
|
-
const int64_t OH = ne2;
|
11561
|
+
const int64_t OH = is_2D ? ne2 : 1;
|
11994
11562
|
const int64_t OW = ne1;
|
11995
11563
|
|
11996
|
-
|
11997
|
-
|
11998
|
-
|
11999
|
-
const int32_t s0 = ((const int32_t*)(dst->op_params))[0];
|
12000
|
-
const int32_t s1 = ((const int32_t*)(dst->op_params))[1];
|
12001
|
-
const int32_t p0 = ((const int32_t*)(dst->op_params))[2];
|
12002
|
-
const int32_t p1 = ((const int32_t*)(dst->op_params))[3];
|
12003
|
-
const int32_t d0 = ((const int32_t*)(dst->op_params))[4];
|
12004
|
-
const int32_t d1 = ((const int32_t*)(dst->op_params))[5];
|
11564
|
+
int ofs0 = is_2D ? nb13 : nb12;
|
11565
|
+
int ofs1 = is_2D ? nb12 : nb11;
|
12005
11566
|
|
12006
11567
|
GGML_ASSERT(nb00 == sizeof(ggml_fp16_t));
|
12007
11568
|
GGML_ASSERT(nb10 == sizeof(float));
|
12008
11569
|
|
12009
11570
|
if (params->type == GGML_TASK_INIT) {
|
12010
|
-
memset(dst->data, 0, ggml_nbytes(dst));
|
12011
11571
|
return;
|
12012
11572
|
}
|
12013
11573
|
|
@@ -12020,20 +11580,22 @@ static void ggml_compute_forward_conv_2d_stage_0_f32(
|
|
12020
11580
|
ggml_fp16_t * const wdata = (ggml_fp16_t *) dst->data;
|
12021
11581
|
|
12022
11582
|
for (int64_t in = 0; in < N; in++) {
|
12023
|
-
for (int64_t ioh = 0; ioh < OH; ioh++) {
|
11583
|
+
for (int64_t ioh = 0; ioh < OH; ioh++) { // 1
|
12024
11584
|
for (int64_t iow = 0; iow < OW; iow++) {
|
12025
|
-
for (int64_t iic = ith; iic < IC; iic+=nth) {
|
11585
|
+
for (int64_t iic = ith; iic < IC; iic += nth) {
|
12026
11586
|
|
12027
11587
|
// micro kernel
|
12028
11588
|
ggml_fp16_t * dst_data = wdata + (in*OH*OW + ioh*OW + iow)*(IC*KH*KW); // [IC, KH, KW]
|
12029
|
-
const float * const src_data = (float *)((char *) src1->data + in*
|
11589
|
+
const float * const src_data = (float *)((char *) src1->data + in*ofs0 + iic*ofs1); // [IH, IW]
|
12030
11590
|
|
12031
|
-
for (int64_t ikh = 0; ikh < KH; ikh++) {
|
11591
|
+
for (int64_t ikh = 0; ikh < KH; ikh++) { // 1
|
12032
11592
|
for (int64_t ikw = 0; ikw < KW; ikw++) {
|
12033
11593
|
const int64_t iiw = iow*s0 + ikw*d0 - p0;
|
12034
11594
|
const int64_t iih = ioh*s1 + ikh*d1 - p1;
|
12035
11595
|
|
12036
|
-
if (
|
11596
|
+
if (iih < 0 || iih >= IH || iiw < 0 || iiw >= IW) {
|
11597
|
+
dst_data[iic*(KH*KW) + ikh*KW + ikw] = 0;
|
11598
|
+
} else {
|
12037
11599
|
dst_data[iic*(KH*KW) + ikh*KW + ikw] = GGML_FP32_TO_FP16(src_data[iih*IW + iiw]);
|
12038
11600
|
}
|
12039
11601
|
}
|
@@ -12045,223 +11607,7 @@ static void ggml_compute_forward_conv_2d_stage_0_f32(
|
|
12045
11607
|
}
|
12046
11608
|
}
|
12047
11609
|
|
12048
|
-
|
12049
|
-
// src0: [OC, IC, KH, KW]
|
12050
|
-
// src1: [N, OH, OW, IC * KH * KW]
|
12051
|
-
// result: [N, OC, OH, OW]
|
12052
|
-
static void ggml_compute_forward_conv_2d_stage_1_f16(
|
12053
|
-
const struct ggml_compute_params * params,
|
12054
|
-
const struct ggml_tensor * src0,
|
12055
|
-
const struct ggml_tensor * src1,
|
12056
|
-
struct ggml_tensor * dst) {
|
12057
|
-
GGML_ASSERT(src0->type == GGML_TYPE_F16);
|
12058
|
-
GGML_ASSERT(src1->type == GGML_TYPE_F16);
|
12059
|
-
GGML_ASSERT( dst->type == GGML_TYPE_F32);
|
12060
|
-
|
12061
|
-
int64_t t0 = ggml_perf_time_us();
|
12062
|
-
UNUSED(t0);
|
12063
|
-
|
12064
|
-
if (params->type == GGML_TASK_INIT) {
|
12065
|
-
return;
|
12066
|
-
}
|
12067
|
-
|
12068
|
-
if (params->type == GGML_TASK_FINALIZE) {
|
12069
|
-
return;
|
12070
|
-
}
|
12071
|
-
|
12072
|
-
GGML_TENSOR_BINARY_OP_LOCALS;
|
12073
|
-
|
12074
|
-
GGML_ASSERT(nb00 == sizeof(ggml_fp16_t));
|
12075
|
-
GGML_ASSERT(nb10 == sizeof(ggml_fp16_t));
|
12076
|
-
GGML_ASSERT(nb0 == sizeof(float));
|
12077
|
-
|
12078
|
-
const int N = ne13;
|
12079
|
-
const int OH = ne12;
|
12080
|
-
const int OW = ne11;
|
12081
|
-
|
12082
|
-
const int OC = ne03;
|
12083
|
-
const int IC = ne02;
|
12084
|
-
const int KH = ne01;
|
12085
|
-
const int KW = ne00;
|
12086
|
-
|
12087
|
-
const int ith = params->ith;
|
12088
|
-
const int nth = params->nth;
|
12089
|
-
|
12090
|
-
int64_t m = OC;
|
12091
|
-
int64_t n = OH * OW;
|
12092
|
-
int64_t k = IC * KH * KW;
|
12093
|
-
|
12094
|
-
// [N, OC, OH, OW] = [OC, IC * KH * KW] x [N*OH*OW, IC * KH * KW]
|
12095
|
-
for (int i = 0; i < N; i++) {
|
12096
|
-
ggml_fp16_t * A = (ggml_fp16_t *)src0->data; // [m, k]
|
12097
|
-
ggml_fp16_t * B = (ggml_fp16_t *)src1->data + i * m * k; // [n, k]
|
12098
|
-
float * C = (float *)dst->data + i * m * n; // [m, n]
|
12099
|
-
|
12100
|
-
gemm_f16_out_f32(m, n, k, A, B, C, ith, nth);
|
12101
|
-
}
|
12102
|
-
}
|
12103
|
-
|
12104
|
-
static void ggml_compute_forward_conv_2d_f16_f32(
|
12105
|
-
const struct ggml_compute_params * params,
|
12106
|
-
const struct ggml_tensor * src0,
|
12107
|
-
const struct ggml_tensor * src1,
|
12108
|
-
struct ggml_tensor * dst) {
|
12109
|
-
GGML_ASSERT(src0->type == GGML_TYPE_F16);
|
12110
|
-
GGML_ASSERT(src1->type == GGML_TYPE_F32);
|
12111
|
-
GGML_ASSERT( dst->type == GGML_TYPE_F32);
|
12112
|
-
|
12113
|
-
int64_t t0 = ggml_perf_time_us();
|
12114
|
-
UNUSED(t0);
|
12115
|
-
|
12116
|
-
GGML_TENSOR_BINARY_OP_LOCALS
|
12117
|
-
|
12118
|
-
// src1: image [N, IC, IH, IW]
|
12119
|
-
// src0: kernel [OC, IC, KH, KW]
|
12120
|
-
// dst: result [N, OC, OH, OW]
|
12121
|
-
// ne12: IC
|
12122
|
-
// ne0: OW
|
12123
|
-
// ne1: OH
|
12124
|
-
// nk0: KW
|
12125
|
-
// nk1: KH
|
12126
|
-
// ne13: N
|
12127
|
-
|
12128
|
-
const int N = ne13;
|
12129
|
-
const int IC = ne12;
|
12130
|
-
const int IH = ne11;
|
12131
|
-
const int IW = ne10;
|
12132
|
-
|
12133
|
-
const int OC = ne03;
|
12134
|
-
// const int IC = ne02;
|
12135
|
-
const int KH = ne01;
|
12136
|
-
const int KW = ne00;
|
12137
|
-
|
12138
|
-
const int OH = ne1;
|
12139
|
-
const int OW = ne0;
|
12140
|
-
|
12141
|
-
const int ith = params->ith;
|
12142
|
-
const int nth = params->nth;
|
12143
|
-
|
12144
|
-
// const int nk0 = ne00;
|
12145
|
-
// const int nk1 = ne01;
|
12146
|
-
|
12147
|
-
// size of the convolution row - the kernel size unrolled across all channels
|
12148
|
-
// const int ew0 = nk0*nk1*ne02;
|
12149
|
-
// ew0: IC*KH*KW
|
12150
|
-
|
12151
|
-
const int32_t s0 = ((const int32_t*)(dst->op_params))[0];
|
12152
|
-
const int32_t s1 = ((const int32_t*)(dst->op_params))[1];
|
12153
|
-
const int32_t p0 = ((const int32_t*)(dst->op_params))[2];
|
12154
|
-
const int32_t p1 = ((const int32_t*)(dst->op_params))[3];
|
12155
|
-
const int32_t d0 = ((const int32_t*)(dst->op_params))[4];
|
12156
|
-
const int32_t d1 = ((const int32_t*)(dst->op_params))[5];
|
12157
|
-
|
12158
|
-
GGML_ASSERT(nb00 == sizeof(ggml_fp16_t));
|
12159
|
-
GGML_ASSERT(nb10 == sizeof(float));
|
12160
|
-
|
12161
|
-
if (params->type == GGML_TASK_INIT) {
|
12162
|
-
memset(params->wdata, 0, params->wsize);
|
12163
|
-
|
12164
|
-
// prepare source data (src1)
|
12165
|
-
// im2col: [N, IC, IH, IW] => [N*OH*OW, IC*KH*KW]
|
12166
|
-
|
12167
|
-
{
|
12168
|
-
ggml_fp16_t * const wdata = (ggml_fp16_t *) params->wdata + 0;
|
12169
|
-
|
12170
|
-
for (int in = 0; in < N; in++) {
|
12171
|
-
for (int iic = 0; iic < IC; iic++) {
|
12172
|
-
for (int ioh = 0; ioh < OH; ioh++) {
|
12173
|
-
for (int iow = 0; iow < OW; iow++) {
|
12174
|
-
|
12175
|
-
// micro kernel
|
12176
|
-
ggml_fp16_t * dst_data = wdata + (in*OH*OW + ioh*OW + iow)*(IC*KH*KW); // [IC, KH, KW]
|
12177
|
-
const float * const src_data = (float *)((char *) src1->data + in*nb13 + iic*nb12); // [IH, IW]
|
12178
|
-
|
12179
|
-
for (int ikh = 0; ikh < KH; ikh++) {
|
12180
|
-
for (int ikw = 0; ikw < KW; ikw++) {
|
12181
|
-
const int iiw = iow*s0 + ikw*d0 - p0;
|
12182
|
-
const int iih = ioh*s1 + ikh*d1 - p1;
|
12183
|
-
|
12184
|
-
if (!(iih < 0 || iih >= IH || iiw < 0 || iiw >= IW)) {
|
12185
|
-
dst_data[iic*(KH*KW) + ikh*KW + ikw] = GGML_FP32_TO_FP16(src_data[iih*IW + iiw]);
|
12186
|
-
}
|
12187
|
-
}
|
12188
|
-
}
|
12189
|
-
}
|
12190
|
-
}
|
12191
|
-
}
|
12192
|
-
}
|
12193
|
-
}
|
12194
|
-
|
12195
|
-
return;
|
12196
|
-
}
|
12197
|
-
|
12198
|
-
if (params->type == GGML_TASK_FINALIZE) {
|
12199
|
-
return;
|
12200
|
-
}
|
12201
|
-
|
12202
|
-
ggml_fp16_t * const wdata = (ggml_fp16_t *) params->wdata + 0;
|
12203
|
-
// wdata: [N*OH*OW, IC*KH*KW]
|
12204
|
-
// dst: result [N, OC, OH, OW]
|
12205
|
-
// src0: kernel [OC, IC, KH, KW]
|
12206
|
-
|
12207
|
-
int64_t m = OC;
|
12208
|
-
int64_t n = OH * OW;
|
12209
|
-
int64_t k = IC * KH * KW;
|
12210
|
-
|
12211
|
-
// [N, OC, OH, OW] = [OC, IC * KH * KW] x [N*OH*OW, IC * KH * KW]
|
12212
|
-
for (int i = 0; i < N; i++) {
|
12213
|
-
ggml_fp16_t * A = (ggml_fp16_t *)src0->data; // [m, k]
|
12214
|
-
ggml_fp16_t * B = (ggml_fp16_t *)wdata + i * m * k; // [n, k]
|
12215
|
-
float * C = (float *)dst->data + i * m * n; // [m * k]
|
12216
|
-
|
12217
|
-
gemm_f16_out_f32(m, n, k, A, B, C, ith, nth);
|
12218
|
-
}
|
12219
|
-
}
|
12220
|
-
|
12221
|
-
static void ggml_compute_forward_conv_2d(
|
12222
|
-
const struct ggml_compute_params * params,
|
12223
|
-
const struct ggml_tensor * src0,
|
12224
|
-
const struct ggml_tensor * src1,
|
12225
|
-
struct ggml_tensor * dst) {
|
12226
|
-
switch (src0->type) {
|
12227
|
-
case GGML_TYPE_F16:
|
12228
|
-
{
|
12229
|
-
ggml_compute_forward_conv_2d_f16_f32(params, src0, src1, dst);
|
12230
|
-
} break;
|
12231
|
-
case GGML_TYPE_F32:
|
12232
|
-
{
|
12233
|
-
//ggml_compute_forward_conv_2d_f32(params, src0, src1, dst);
|
12234
|
-
GGML_ASSERT(false);
|
12235
|
-
} break;
|
12236
|
-
default:
|
12237
|
-
{
|
12238
|
-
GGML_ASSERT(false);
|
12239
|
-
} break;
|
12240
|
-
}
|
12241
|
-
}
|
12242
|
-
|
12243
|
-
static void ggml_compute_forward_conv_2d_stage_0(
|
12244
|
-
const struct ggml_compute_params * params,
|
12245
|
-
const struct ggml_tensor * src0,
|
12246
|
-
const struct ggml_tensor * src1,
|
12247
|
-
struct ggml_tensor * dst) {
|
12248
|
-
switch (src0->type) {
|
12249
|
-
case GGML_TYPE_F16:
|
12250
|
-
{
|
12251
|
-
ggml_compute_forward_conv_2d_stage_0_f32(params, src0, src1, dst);
|
12252
|
-
} break;
|
12253
|
-
case GGML_TYPE_F32:
|
12254
|
-
{
|
12255
|
-
GGML_ASSERT(false);
|
12256
|
-
} break;
|
12257
|
-
default:
|
12258
|
-
{
|
12259
|
-
GGML_ASSERT(false);
|
12260
|
-
} break;
|
12261
|
-
}
|
12262
|
-
}
|
12263
|
-
|
12264
|
-
static void ggml_compute_forward_conv_2d_stage_1(
|
11610
|
+
static void ggml_compute_forward_im2col(
|
12265
11611
|
const struct ggml_compute_params * params,
|
12266
11612
|
const struct ggml_tensor * src0,
|
12267
11613
|
const struct ggml_tensor * src1,
|
@@ -12269,7 +11615,7 @@ static void ggml_compute_forward_conv_2d_stage_1(
|
|
12269
11615
|
switch (src0->type) {
|
12270
11616
|
case GGML_TYPE_F16:
|
12271
11617
|
{
|
12272
|
-
|
11618
|
+
ggml_compute_forward_im2col_f16(params, src0, src1, dst);
|
12273
11619
|
} break;
|
12274
11620
|
case GGML_TYPE_F32:
|
12275
11621
|
{
|
@@ -12454,14 +11800,11 @@ static void ggml_compute_forward_pool_1d(
|
|
12454
11800
|
ggml_compute_forward_pool_1d_sk_p0(params, op, src0, k0, dst);
|
12455
11801
|
}
|
12456
11802
|
|
12457
|
-
//
|
11803
|
+
// ggml_compute_forward_pool_2d
|
12458
11804
|
|
12459
|
-
static void
|
11805
|
+
static void ggml_compute_forward_pool_2d(
|
12460
11806
|
const struct ggml_compute_params * params,
|
12461
|
-
const enum ggml_op_pool op,
|
12462
11807
|
const struct ggml_tensor * src,
|
12463
|
-
const int k0,
|
12464
|
-
const int k1,
|
12465
11808
|
struct ggml_tensor * dst) {
|
12466
11809
|
assert(src->type == GGML_TYPE_F32);
|
12467
11810
|
assert(params->ith == 0);
|
@@ -12470,6 +11813,14 @@ static void ggml_compute_forward_pool_2d_sk_p0(
|
|
12470
11813
|
return;
|
12471
11814
|
}
|
12472
11815
|
|
11816
|
+
const int32_t * opts = (const int32_t *)dst->op_params;
|
11817
|
+
enum ggml_op_pool op = opts[0];
|
11818
|
+
const int k0 = opts[1];
|
11819
|
+
const int k1 = opts[2];
|
11820
|
+
const int s0 = opts[3];
|
11821
|
+
const int s1 = opts[4];
|
11822
|
+
const int p0 = opts[5];
|
11823
|
+
const int p1 = opts[6];
|
12473
11824
|
const char * cdata = (const char*)src->data;
|
12474
11825
|
const char * const data_end = cdata + ggml_nbytes(src);
|
12475
11826
|
|
@@ -12480,6 +11831,8 @@ static void ggml_compute_forward_pool_2d_sk_p0(
|
|
12480
11831
|
float * dplane = (float *)dst->data;
|
12481
11832
|
|
12482
11833
|
const int ka = k0 * k1;
|
11834
|
+
const int offset0 = -p0;
|
11835
|
+
const int offset1 = -p1;
|
12483
11836
|
|
12484
11837
|
while (cdata < data_end) {
|
12485
11838
|
for (int oy = 0; oy < py; ++oy) {
|
@@ -12492,13 +11845,15 @@ static void ggml_compute_forward_pool_2d_sk_p0(
|
|
12492
11845
|
case GGML_OP_POOL_COUNT: GGML_ASSERT(false); break;
|
12493
11846
|
}
|
12494
11847
|
|
12495
|
-
const int ix = ox *
|
12496
|
-
const int iy = oy *
|
11848
|
+
const int ix = offset0 + ox * s0;
|
11849
|
+
const int iy = offset1 + oy * s1;
|
12497
11850
|
|
12498
11851
|
for (int ky = 0; ky < k1; ++ky) {
|
11852
|
+
if (iy + ky < 0 || iy + ky >= src->ne[1]) continue;
|
12499
11853
|
const float * const srow = (const float *)(cdata + src->nb[1] * (iy + ky));
|
12500
11854
|
for (int kx = 0; kx < k0; ++kx) {
|
12501
11855
|
int j = ix + kx;
|
11856
|
+
if (j < 0 || j >= src->ne[0]) continue;
|
12502
11857
|
switch (op) {
|
12503
11858
|
case GGML_OP_POOL_AVG: *out += srow[j]; break;
|
12504
11859
|
case GGML_OP_POOL_MAX: if (srow[j] > *out) *out = srow[j]; break;
|
@@ -12515,31 +11870,8 @@ static void ggml_compute_forward_pool_2d_sk_p0(
|
|
12515
11870
|
}
|
12516
11871
|
|
12517
11872
|
cdata += src->nb[2];
|
12518
|
-
dplane += pa;
|
12519
|
-
}
|
12520
|
-
}
|
12521
|
-
|
12522
|
-
// ggml_compute_forward_pool_2d
|
12523
|
-
|
12524
|
-
static void ggml_compute_forward_pool_2d(
|
12525
|
-
const struct ggml_compute_params * params,
|
12526
|
-
const struct ggml_tensor * src0,
|
12527
|
-
struct ggml_tensor * dst) {
|
12528
|
-
|
12529
|
-
const int32_t * opts = (const int32_t *)dst->op_params;
|
12530
|
-
enum ggml_op_pool op = opts[0];
|
12531
|
-
const int k0 = opts[1];
|
12532
|
-
const int k1 = opts[2];
|
12533
|
-
const int s0 = opts[3];
|
12534
|
-
const int s1 = opts[4];
|
12535
|
-
const int p0 = opts[5];
|
12536
|
-
const int p1 = opts[6];
|
12537
|
-
GGML_ASSERT(p0 == 0);
|
12538
|
-
GGML_ASSERT(p1 == 0); // padding not supported
|
12539
|
-
GGML_ASSERT(k0 == s0);
|
12540
|
-
GGML_ASSERT(k1 == s1); // only s = k supported
|
12541
|
-
|
12542
|
-
ggml_compute_forward_pool_2d_sk_p0(params, op, src0, k0, k1, dst);
|
11873
|
+
dplane += pa;
|
11874
|
+
}
|
12543
11875
|
}
|
12544
11876
|
|
12545
11877
|
// ggml_compute_forward_upscale
|
@@ -13743,6 +13075,10 @@ static void ggml_compute_forward_unary(
|
|
13743
13075
|
{
|
13744
13076
|
ggml_compute_forward_silu(params, src0, dst);
|
13745
13077
|
} break;
|
13078
|
+
case GGML_UNARY_OP_LEAKY:
|
13079
|
+
{
|
13080
|
+
ggml_compute_forward_leaky(params, src0, dst);
|
13081
|
+
} break;
|
13746
13082
|
default:
|
13747
13083
|
{
|
13748
13084
|
GGML_ASSERT(false);
|
@@ -14496,33 +13832,13 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm
|
|
14496
13832
|
{
|
14497
13833
|
ggml_compute_forward_clamp(params, tensor->src[0], tensor);
|
14498
13834
|
} break;
|
14499
|
-
case GGML_OP_CONV_1D:
|
14500
|
-
{
|
14501
|
-
ggml_compute_forward_conv_1d(params, tensor->src[0], tensor->src[1], tensor);
|
14502
|
-
} break;
|
14503
|
-
case GGML_OP_CONV_1D_STAGE_0:
|
14504
|
-
{
|
14505
|
-
ggml_compute_forward_conv_1d_stage_0(params, tensor->src[0], tensor->src[1], tensor);
|
14506
|
-
} break;
|
14507
|
-
case GGML_OP_CONV_1D_STAGE_1:
|
14508
|
-
{
|
14509
|
-
ggml_compute_forward_conv_1d_stage_1(params, tensor->src[0], tensor->src[1], tensor);
|
14510
|
-
} break;
|
14511
13835
|
case GGML_OP_CONV_TRANSPOSE_1D:
|
14512
13836
|
{
|
14513
13837
|
ggml_compute_forward_conv_transpose_1d(params, tensor->src[0], tensor->src[1], tensor);
|
14514
13838
|
} break;
|
14515
|
-
case
|
14516
|
-
{
|
14517
|
-
ggml_compute_forward_conv_2d(params, tensor->src[0], tensor->src[1], tensor);
|
14518
|
-
} break;
|
14519
|
-
case GGML_OP_CONV_2D_STAGE_0:
|
13839
|
+
case GGML_OP_IM2COL:
|
14520
13840
|
{
|
14521
|
-
|
14522
|
-
} break;
|
14523
|
-
case GGML_OP_CONV_2D_STAGE_1:
|
14524
|
-
{
|
14525
|
-
ggml_compute_forward_conv_2d_stage_1(params, tensor->src[0], tensor->src[1], tensor);
|
13841
|
+
ggml_compute_forward_im2col(params, tensor->src[0], tensor->src[1], tensor);
|
14526
13842
|
} break;
|
14527
13843
|
case GGML_OP_CONV_TRANSPOSE_2D:
|
14528
13844
|
{
|
@@ -14651,62 +13967,109 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm
|
|
14651
13967
|
|
14652
13968
|
////////////////////////////////////////////////////////////////////////////////
|
14653
13969
|
|
14654
|
-
|
13970
|
+
static size_t ggml_hash_size(size_t min_sz) {
|
13971
|
+
// next primes after powers of two
|
13972
|
+
static const size_t primes[] = {
|
13973
|
+
2, 3, 5, 11, 17, 37, 67, 131, 257, 521, 1031,
|
13974
|
+
2053, 4099, 8209, 16411, 32771, 65537, 131101,
|
13975
|
+
262147, 524309, 1048583, 2097169, 4194319, 8388617,
|
13976
|
+
16777259, 33554467, 67108879, 134217757, 268435459,
|
13977
|
+
536870923, 1073741827, 2147483659
|
13978
|
+
};
|
13979
|
+
static const size_t n_primes = sizeof(primes)/sizeof(primes[0]);
|
13980
|
+
|
13981
|
+
// find the smallest prime that is larger or equal to min_sz
|
13982
|
+
size_t l = 0;
|
13983
|
+
size_t r = n_primes;
|
13984
|
+
while (l < r) {
|
13985
|
+
size_t m = (l + r)/2;
|
13986
|
+
if (primes[m] < min_sz) {
|
13987
|
+
l = m + 1;
|
13988
|
+
} else {
|
13989
|
+
r = m;
|
13990
|
+
}
|
13991
|
+
}
|
13992
|
+
size_t sz = l < n_primes ? primes[l] : min_sz | 1;
|
13993
|
+
return sz;
|
13994
|
+
}
|
14655
13995
|
|
14656
|
-
static size_t
|
14657
|
-
return (size_t)p
|
13996
|
+
static size_t ggml_hash(const void * p) {
|
13997
|
+
return (size_t)p;
|
14658
13998
|
}
|
14659
13999
|
|
14660
|
-
|
14661
|
-
size_t h =
|
14000
|
+
size_t ggml_hash_find(const struct ggml_hash_set hash_set, struct ggml_tensor * key) {
|
14001
|
+
size_t h = ggml_hash(key) % hash_set.size;
|
14662
14002
|
|
14663
14003
|
// linear probing
|
14664
14004
|
size_t i = h;
|
14665
|
-
while (
|
14666
|
-
i = (i + 1) %
|
14005
|
+
while (hash_set.keys[i] != NULL && hash_set.keys[i] != key) {
|
14006
|
+
i = (i + 1) % hash_set.size;
|
14667
14007
|
if (i == h) {
|
14668
14008
|
// visited all hash table entries -> not found
|
14669
|
-
return
|
14009
|
+
return GGML_HASHTABLE_FULL;
|
14670
14010
|
}
|
14671
14011
|
}
|
14672
14012
|
return i;
|
14673
14013
|
}
|
14674
14014
|
|
14675
|
-
|
14676
|
-
size_t i =
|
14015
|
+
bool ggml_hash_contains(struct ggml_hash_set hash_set, struct ggml_tensor * key) {
|
14016
|
+
size_t i = ggml_hash_find(hash_set, key);
|
14017
|
+
return i != GGML_HASHTABLE_FULL && hash_set.keys[i] == key;
|
14018
|
+
}
|
14019
|
+
|
14020
|
+
size_t ggml_hash_insert(struct ggml_hash_set hash_set, struct ggml_tensor * key) {
|
14021
|
+
size_t i = ggml_hash_find(hash_set, key);
|
14677
14022
|
|
14678
|
-
GGML_ASSERT(i
|
14023
|
+
GGML_ASSERT(i != GGML_HASHTABLE_FULL);
|
14679
14024
|
|
14680
|
-
if (
|
14681
|
-
return
|
14025
|
+
if (hash_set.keys[i] == key) {
|
14026
|
+
return GGML_HASHTABLE_ALREADY_EXISTS;
|
14682
14027
|
}
|
14683
14028
|
|
14684
14029
|
// insert
|
14685
|
-
GGML_ASSERT(
|
14686
|
-
|
14687
|
-
return
|
14030
|
+
GGML_ASSERT(hash_set.keys[i] == NULL);
|
14031
|
+
hash_set.keys[i] = key;
|
14032
|
+
return i;
|
14033
|
+
}
|
14034
|
+
|
14035
|
+
size_t ggml_hash_find_or_insert(struct ggml_hash_set hash_set, struct ggml_tensor * key) {
|
14036
|
+
size_t i = ggml_hash_find(hash_set, key);
|
14037
|
+
|
14038
|
+
GGML_ASSERT(i != GGML_HASHTABLE_FULL);
|
14039
|
+
|
14040
|
+
hash_set.keys[i] = key;
|
14041
|
+
return i;
|
14042
|
+
}
|
14043
|
+
|
14044
|
+
static struct ggml_hash_set ggml_hash_set_new(size_t size) {
|
14045
|
+
size = ggml_hash_size(size);
|
14046
|
+
struct ggml_hash_set result;
|
14047
|
+
result.size = size;
|
14048
|
+
result.keys = malloc(sizeof(struct ggml_tensor *) * size);
|
14049
|
+
memset(result.keys, 0, sizeof(struct ggml_tensor *) * size);
|
14050
|
+
return result;
|
14688
14051
|
}
|
14689
14052
|
|
14690
|
-
static
|
14691
|
-
|
14692
|
-
return (i < GGML_GRAPH_HASHTABLE_SIZE) && (hash_table[i] == p);
|
14053
|
+
static void ggml_hash_set_free(struct ggml_hash_set hash_set) {
|
14054
|
+
free(hash_set.keys);
|
14693
14055
|
}
|
14694
14056
|
|
14695
14057
|
struct hash_map {
|
14696
|
-
|
14697
|
-
|
14058
|
+
struct ggml_hash_set set;
|
14059
|
+
struct ggml_tensor ** vals;
|
14698
14060
|
};
|
14699
14061
|
|
14700
|
-
static struct hash_map *
|
14062
|
+
static struct hash_map * ggml_new_hash_map(size_t size) {
|
14701
14063
|
struct hash_map * result = malloc(sizeof(struct hash_map));
|
14702
|
-
|
14703
|
-
|
14704
|
-
|
14705
|
-
}
|
14064
|
+
result->set = ggml_hash_set_new(size);
|
14065
|
+
result->vals = malloc(sizeof(struct ggml_tensor *) * result->set.size);
|
14066
|
+
memset(result->vals, 0, sizeof(struct ggml_tensor *) * result->set.size);
|
14706
14067
|
return result;
|
14707
14068
|
}
|
14708
14069
|
|
14709
|
-
static void
|
14070
|
+
static void ggml_hash_map_free(struct hash_map * map) {
|
14071
|
+
ggml_hash_set_free(map->set);
|
14072
|
+
free(map->vals);
|
14710
14073
|
free(map);
|
14711
14074
|
}
|
14712
14075
|
|
@@ -14726,7 +14089,7 @@ static struct ggml_tensor * ggml_recompute_graph_node(
|
|
14726
14089
|
return node;
|
14727
14090
|
}
|
14728
14091
|
|
14729
|
-
if (!
|
14092
|
+
if (!ggml_hash_contains(graph->visited_hash_table, node)) {
|
14730
14093
|
return node;
|
14731
14094
|
}
|
14732
14095
|
|
@@ -14741,17 +14104,17 @@ static struct ggml_tensor * ggml_recompute_graph_node(
|
|
14741
14104
|
return node;
|
14742
14105
|
}
|
14743
14106
|
|
14744
|
-
size_t i =
|
14745
|
-
GGML_ASSERT(i
|
14746
|
-
if (replacements->keys[i] == node) {
|
14747
|
-
return
|
14107
|
+
size_t i = ggml_hash_find(replacements->set, node);
|
14108
|
+
GGML_ASSERT(i != GGML_HASHTABLE_FULL); // assert that not full
|
14109
|
+
if (replacements->set.keys[i] == node) {
|
14110
|
+
return replacements->vals[i];
|
14748
14111
|
}
|
14749
14112
|
|
14750
14113
|
struct ggml_tensor * clone = ggml_new_tensor(ctx, node->type, node->n_dims, node->ne);
|
14751
14114
|
|
14752
14115
|
// insert clone into replacements
|
14753
|
-
GGML_ASSERT(replacements->keys[i] == NULL); // assert that we don't overwrite
|
14754
|
-
replacements->keys[i] = node;
|
14116
|
+
GGML_ASSERT(replacements->set.keys[i] == NULL); // assert that we don't overwrite
|
14117
|
+
replacements->set.keys[i] = node;
|
14755
14118
|
replacements->vals[i] = clone;
|
14756
14119
|
|
14757
14120
|
clone->op = node->op;
|
@@ -14788,26 +14151,26 @@ void ggml_build_backward_gradient_checkpointing(
|
|
14788
14151
|
struct ggml_cgraph * gb_tmp,
|
14789
14152
|
struct ggml_tensor * * checkpoints,
|
14790
14153
|
int n_checkpoints) {
|
14791
|
-
|
14154
|
+
ggml_graph_cpy(gf, gb_tmp);
|
14792
14155
|
ggml_build_backward_expand(ctx, gf, gb_tmp, true);
|
14793
14156
|
|
14794
14157
|
if (n_checkpoints <= 0) {
|
14795
|
-
|
14158
|
+
ggml_graph_cpy(gb_tmp, gb);
|
14796
14159
|
return;
|
14797
14160
|
}
|
14798
14161
|
|
14799
|
-
struct hash_map * replacements =
|
14162
|
+
struct hash_map * replacements = ggml_new_hash_map(gf->n_nodes + gf->n_leafs + n_checkpoints);
|
14800
14163
|
|
14801
14164
|
// insert checkpoints in replacements
|
14802
14165
|
for (int i = 0; i < n_checkpoints; ++i) {
|
14803
|
-
size_t k =
|
14804
|
-
GGML_ASSERT(k
|
14805
|
-
GGML_ASSERT(replacements->keys[k] == NULL); // assert that we don't overwrite
|
14806
|
-
replacements->keys[k] = checkpoints[i];
|
14807
|
-
replacements->vals[k]
|
14166
|
+
size_t k = ggml_hash_find(replacements->set, checkpoints[i]);
|
14167
|
+
GGML_ASSERT(k != GGML_HASHTABLE_FULL); // assert that not full
|
14168
|
+
GGML_ASSERT(replacements->set.keys[k] == NULL); // assert that we don't overwrite
|
14169
|
+
replacements->set.keys[k] = checkpoints[i];
|
14170
|
+
replacements->vals[k] = checkpoints[i];
|
14808
14171
|
}
|
14809
14172
|
|
14810
|
-
|
14173
|
+
ggml_graph_cpy(gf, gb);
|
14811
14174
|
// rewrite gb_tmp->nodes[gf->n_nodes:gb_tmp->n_nodes],
|
14812
14175
|
// replacing references to gb_tmp->nodes[0:gf->n_nodes] ( == gf->nodes[0:gf->n_nodes]),
|
14813
14176
|
// by recomputing them from checkpoints
|
@@ -14824,21 +14187,21 @@ void ggml_build_backward_gradient_checkpointing(
|
|
14824
14187
|
ggml_build_forward_expand(gb, node);
|
14825
14188
|
}
|
14826
14189
|
|
14827
|
-
|
14190
|
+
ggml_hash_map_free(replacements);
|
14828
14191
|
}
|
14829
14192
|
|
14830
14193
|
// functions to change gradients considering the case that input a might be initial gradient with zero value
|
14831
14194
|
|
14832
|
-
static struct ggml_tensor * ggml_add_or_set(struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b,
|
14833
|
-
if (
|
14195
|
+
static struct ggml_tensor * ggml_add_or_set(struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b, struct ggml_hash_set zero_table) {
|
14196
|
+
if (ggml_hash_contains(zero_table, a)) {
|
14834
14197
|
return b;
|
14835
14198
|
} else {
|
14836
14199
|
return ggml_add_impl(ctx, a, b, false);
|
14837
14200
|
}
|
14838
14201
|
}
|
14839
14202
|
|
14840
|
-
static struct ggml_tensor * ggml_acc_or_set(struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b, size_t nb1, size_t nb2, size_t nb3, size_t offset,
|
14841
|
-
if (
|
14203
|
+
static struct ggml_tensor * ggml_acc_or_set(struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b, size_t nb1, size_t nb2, size_t nb3, size_t offset, struct ggml_hash_set zero_table) {
|
14204
|
+
if (ggml_hash_contains(zero_table, a)) {
|
14842
14205
|
struct ggml_tensor * a_zero = ggml_scale(ctx, a, ggml_new_f32(ctx, 0));
|
14843
14206
|
return ggml_acc_impl(ctx, a_zero, b, nb1, nb2, nb3, offset, false);
|
14844
14207
|
} else {
|
@@ -14846,23 +14209,23 @@ static struct ggml_tensor * ggml_acc_or_set(struct ggml_context * ctx, struct gg
|
|
14846
14209
|
}
|
14847
14210
|
}
|
14848
14211
|
|
14849
|
-
static struct ggml_tensor * ggml_add1_or_set(struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b,
|
14850
|
-
if (
|
14212
|
+
static struct ggml_tensor * ggml_add1_or_set(struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b, struct ggml_hash_set zero_table) {
|
14213
|
+
if (ggml_hash_contains(zero_table, a)) {
|
14851
14214
|
return ggml_repeat(ctx, b, a);
|
14852
14215
|
} else {
|
14853
14216
|
return ggml_add1_impl(ctx, a, b, false);
|
14854
14217
|
}
|
14855
14218
|
}
|
14856
14219
|
|
14857
|
-
static struct ggml_tensor * ggml_sub_or_set(struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b,
|
14858
|
-
if (
|
14220
|
+
static struct ggml_tensor * ggml_sub_or_set(struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b, struct ggml_hash_set zero_table) {
|
14221
|
+
if (ggml_hash_contains(zero_table, a)) {
|
14859
14222
|
return ggml_neg(ctx, b);
|
14860
14223
|
} else {
|
14861
14224
|
return ggml_sub_impl(ctx, a, b, false);
|
14862
14225
|
}
|
14863
14226
|
}
|
14864
14227
|
|
14865
|
-
static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor * tensor,
|
14228
|
+
static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor * tensor, struct ggml_hash_set zero_table) {
|
14866
14229
|
struct ggml_tensor * src0 = tensor->src[0];
|
14867
14230
|
struct ggml_tensor * src1 = tensor->src[1];
|
14868
14231
|
|
@@ -15457,31 +14820,11 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor
|
|
15457
14820
|
{
|
15458
14821
|
GGML_ASSERT(false); // TODO: not implemented
|
15459
14822
|
} break;
|
15460
|
-
case GGML_OP_CONV_1D:
|
15461
|
-
{
|
15462
|
-
GGML_ASSERT(false); // TODO: not implemented
|
15463
|
-
} break;
|
15464
|
-
case GGML_OP_CONV_1D_STAGE_0:
|
15465
|
-
{
|
15466
|
-
GGML_ASSERT(false); // TODO: not implemented
|
15467
|
-
} break;
|
15468
|
-
case GGML_OP_CONV_1D_STAGE_1:
|
15469
|
-
{
|
15470
|
-
GGML_ASSERT(false); // TODO: not implemented
|
15471
|
-
} break;
|
15472
14823
|
case GGML_OP_CONV_TRANSPOSE_1D:
|
15473
14824
|
{
|
15474
14825
|
GGML_ASSERT(false); // TODO: not implemented
|
15475
14826
|
} break;
|
15476
|
-
case
|
15477
|
-
{
|
15478
|
-
GGML_ASSERT(false); // TODO: not implemented
|
15479
|
-
} break;
|
15480
|
-
case GGML_OP_CONV_2D_STAGE_0:
|
15481
|
-
{
|
15482
|
-
GGML_ASSERT(false); // TODO: not implemented
|
15483
|
-
} break;
|
15484
|
-
case GGML_OP_CONV_2D_STAGE_1:
|
14827
|
+
case GGML_OP_IM2COL:
|
15485
14828
|
{
|
15486
14829
|
GGML_ASSERT(false); // TODO: not implemented
|
15487
14830
|
} break;
|
@@ -15695,7 +15038,7 @@ static void ggml_visit_parents(struct ggml_cgraph * cgraph, struct ggml_tensor *
|
|
15695
15038
|
}
|
15696
15039
|
|
15697
15040
|
// check if already visited
|
15698
|
-
if (
|
15041
|
+
if (ggml_hash_insert(cgraph->visited_hash_table, node) == GGML_HASHTABLE_ALREADY_EXISTS) {
|
15699
15042
|
return;
|
15700
15043
|
}
|
15701
15044
|
|
@@ -15711,7 +15054,7 @@ static void ggml_visit_parents(struct ggml_cgraph * cgraph, struct ggml_tensor *
|
|
15711
15054
|
|
15712
15055
|
if (node->op == GGML_OP_NONE && node->grad == NULL) {
|
15713
15056
|
// reached a leaf node, not part of the gradient graph (e.g. a constant)
|
15714
|
-
GGML_ASSERT(cgraph->n_leafs <
|
15057
|
+
GGML_ASSERT(cgraph->n_leafs < cgraph->size);
|
15715
15058
|
|
15716
15059
|
if (strlen(node->name) == 0) {
|
15717
15060
|
ggml_format_name(node, "leaf_%d", cgraph->n_leafs);
|
@@ -15720,22 +15063,24 @@ static void ggml_visit_parents(struct ggml_cgraph * cgraph, struct ggml_tensor *
|
|
15720
15063
|
cgraph->leafs[cgraph->n_leafs] = node;
|
15721
15064
|
cgraph->n_leafs++;
|
15722
15065
|
} else {
|
15723
|
-
GGML_ASSERT(cgraph->n_nodes <
|
15066
|
+
GGML_ASSERT(cgraph->n_nodes < cgraph->size);
|
15724
15067
|
|
15725
15068
|
if (strlen(node->name) == 0) {
|
15726
15069
|
ggml_format_name(node, "node_%d", cgraph->n_nodes);
|
15727
15070
|
}
|
15728
15071
|
|
15729
15072
|
cgraph->nodes[cgraph->n_nodes] = node;
|
15730
|
-
cgraph->grads
|
15073
|
+
if (cgraph->grads) {
|
15074
|
+
cgraph->grads[cgraph->n_nodes] = node->grad;
|
15075
|
+
}
|
15731
15076
|
cgraph->n_nodes++;
|
15732
15077
|
}
|
15733
15078
|
}
|
15734
15079
|
|
15735
15080
|
static void ggml_build_forward_impl(struct ggml_cgraph * cgraph, struct ggml_tensor * tensor, bool expand) {
|
15736
15081
|
if (!expand) {
|
15737
|
-
|
15738
|
-
cgraph
|
15082
|
+
// TODO: this branch isn't accessible anymore, maybe move this to ggml_build_forward_expand
|
15083
|
+
ggml_graph_clear(cgraph);
|
15739
15084
|
}
|
15740
15085
|
|
15741
15086
|
const int n0 = cgraph->n_nodes;
|
@@ -15756,25 +15101,6 @@ void ggml_build_forward_expand(struct ggml_cgraph * cgraph, struct ggml_tensor *
|
|
15756
15101
|
ggml_build_forward_impl(cgraph, tensor, true);
|
15757
15102
|
}
|
15758
15103
|
|
15759
|
-
struct ggml_cgraph ggml_build_forward(struct ggml_tensor * tensor) {
|
15760
|
-
struct ggml_cgraph result = {
|
15761
|
-
/*.n_nodes =*/ 0,
|
15762
|
-
/*.n_leafs =*/ 0,
|
15763
|
-
/*.nodes =*/ { NULL },
|
15764
|
-
/*.grads =*/ { NULL },
|
15765
|
-
/*.leafs =*/ { NULL },
|
15766
|
-
/*.hash_table =*/ { NULL },
|
15767
|
-
/*.order =*/ GGML_CGRAPH_EVAL_ORDER_LEFT_TO_RIGHT,
|
15768
|
-
/*.perf_runs =*/ 0,
|
15769
|
-
/*.perf_cycles =*/ 0,
|
15770
|
-
/*.perf_time_us =*/ 0,
|
15771
|
-
};
|
15772
|
-
|
15773
|
-
ggml_build_forward_impl(&result, tensor, false);
|
15774
|
-
|
15775
|
-
return result;
|
15776
|
-
}
|
15777
|
-
|
15778
15104
|
void ggml_build_backward_expand(struct ggml_context * ctx, struct ggml_cgraph * gf, struct ggml_cgraph * gb, bool keep) {
|
15779
15105
|
GGML_ASSERT(gf->n_nodes > 0);
|
15780
15106
|
|
@@ -15791,11 +15117,10 @@ void ggml_build_backward_expand(struct ggml_context * ctx, struct ggml_cgraph *
|
|
15791
15117
|
}
|
15792
15118
|
|
15793
15119
|
// remember original gradients which start with zero values
|
15794
|
-
|
15795
|
-
memset(zero_table, 0, sizeof(void*) * GGML_GRAPH_HASHTABLE_SIZE);
|
15120
|
+
struct ggml_hash_set zero_table = ggml_hash_set_new(gf->size);
|
15796
15121
|
for (int i = 0; i < gf->n_nodes; i++) {
|
15797
15122
|
if (gf->grads[i]) {
|
15798
|
-
|
15123
|
+
ggml_hash_insert(zero_table, gf->grads[i]);
|
15799
15124
|
}
|
15800
15125
|
}
|
15801
15126
|
|
@@ -15818,26 +15143,54 @@ void ggml_build_backward_expand(struct ggml_context * ctx, struct ggml_cgraph *
|
|
15818
15143
|
}
|
15819
15144
|
}
|
15820
15145
|
|
15821
|
-
|
15146
|
+
ggml_hash_set_free(zero_table);
|
15822
15147
|
}
|
15823
15148
|
|
15824
|
-
|
15825
|
-
|
15826
|
-
|
15827
|
-
|
15149
|
+
static size_t ggml_graph_nbytes(size_t size, bool grads) {
|
15150
|
+
size_t nbytes = sizeof(struct ggml_cgraph);
|
15151
|
+
nbytes += size * sizeof(struct ggml_tensor *) * 2; // leafs + nodes
|
15152
|
+
if (grads) {
|
15153
|
+
nbytes += size * sizeof(struct ggml_tensor *); // grads
|
15154
|
+
}
|
15155
|
+
nbytes += ggml_hash_size(size * 2) * sizeof(struct ggml_tensor *); // hash set
|
15156
|
+
return nbytes;
|
15828
15157
|
}
|
15829
15158
|
|
15830
|
-
|
15831
|
-
|
15159
|
+
size_t ggml_graph_overhead_custom(size_t size, bool grads) {
|
15160
|
+
return GGML_OBJECT_SIZE + GGML_PAD(ggml_graph_nbytes(size, grads), GGML_MEM_ALIGN);
|
15161
|
+
}
|
15162
|
+
|
15163
|
+
size_t ggml_graph_overhead(void) {
|
15164
|
+
return ggml_graph_overhead_custom(GGML_DEFAULT_GRAPH_SIZE, false);
|
15165
|
+
}
|
15166
|
+
|
15167
|
+
struct ggml_cgraph * ggml_new_graph_custom(struct ggml_context * ctx, size_t size, bool grads) {
|
15168
|
+
const size_t obj_size = ggml_graph_nbytes(size, grads);
|
15169
|
+
struct ggml_object * obj = ggml_new_object(ctx, GGML_OBJECT_GRAPH, obj_size);
|
15832
15170
|
struct ggml_cgraph * cgraph = (struct ggml_cgraph *) ((char *) ctx->mem_buffer + obj->offs);
|
15833
15171
|
|
15172
|
+
struct ggml_tensor ** data_start = (struct ggml_tensor **) (cgraph + 1);
|
15173
|
+
|
15174
|
+
size_t hash_size = ggml_hash_size(size * 2);
|
15175
|
+
struct ggml_tensor ** nodes_ptr = data_start;
|
15176
|
+
struct ggml_tensor ** leafs_ptr = nodes_ptr + size;
|
15177
|
+
struct ggml_tensor ** hash_keys_ptr = leafs_ptr + size;
|
15178
|
+
struct ggml_tensor ** grads_ptr = grads ? hash_keys_ptr + hash_size : NULL;
|
15179
|
+
|
15180
|
+
// check that we allocated the correct amount of memory
|
15181
|
+
assert(obj_size == (size_t) (
|
15182
|
+
(grads ? (char *)(grads_ptr + size) : (char *)(hash_keys_ptr + hash_size)) - (char *)cgraph));
|
15183
|
+
|
15184
|
+
memset(hash_keys_ptr, 0, hash_size * sizeof(struct ggml_tensor *));
|
15185
|
+
|
15834
15186
|
*cgraph = (struct ggml_cgraph) {
|
15187
|
+
/*.size =*/ size,
|
15835
15188
|
/*.n_nodes =*/ 0,
|
15836
15189
|
/*.n_leafs =*/ 0,
|
15837
|
-
/*.nodes =*/
|
15838
|
-
/*.grads =*/
|
15839
|
-
/*.leafs =*/
|
15840
|
-
/*.hash_table =*/ {
|
15190
|
+
/*.nodes =*/ nodes_ptr,
|
15191
|
+
/*.grads =*/ grads_ptr,
|
15192
|
+
/*.leafs =*/ leafs_ptr,
|
15193
|
+
/*.hash_table =*/ { hash_size, hash_keys_ptr },
|
15841
15194
|
/*.order =*/ GGML_CGRAPH_EVAL_ORDER_LEFT_TO_RIGHT,
|
15842
15195
|
/*.perf_runs =*/ 0,
|
15843
15196
|
/*.perf_cycles =*/ 0,
|
@@ -15847,14 +15200,85 @@ struct ggml_cgraph * ggml_new_graph(struct ggml_context * ctx) {
|
|
15847
15200
|
return cgraph;
|
15848
15201
|
}
|
15849
15202
|
|
15850
|
-
struct ggml_cgraph *
|
15851
|
-
|
15852
|
-
|
15203
|
+
struct ggml_cgraph * ggml_new_graph(struct ggml_context * ctx) {
|
15204
|
+
return ggml_new_graph_custom(ctx, GGML_DEFAULT_GRAPH_SIZE, false);
|
15205
|
+
}
|
15206
|
+
|
15207
|
+
struct ggml_cgraph * ggml_graph_view(struct ggml_context * ctx, struct ggml_cgraph * cgraph0, int i0, int i1) {
|
15208
|
+
const size_t obj_size = sizeof(struct ggml_cgraph);
|
15209
|
+
struct ggml_object * obj = ggml_new_object(ctx, GGML_OBJECT_GRAPH, obj_size);
|
15210
|
+
struct ggml_cgraph * cgraph = (struct ggml_cgraph *) ((char *) ctx->mem_buffer + obj->offs);
|
15211
|
+
|
15212
|
+
*cgraph = (struct ggml_cgraph) {
|
15213
|
+
/*.size =*/ 0,
|
15214
|
+
/*.n_nodes =*/ i1 - i0,
|
15215
|
+
/*.n_leafs =*/ 0,
|
15216
|
+
/*.nodes =*/ cgraph0->nodes + i0,
|
15217
|
+
/*.grads =*/ cgraph0->grads ? cgraph0->grads + i0 : NULL,
|
15218
|
+
/*.leafs =*/ NULL,
|
15219
|
+
/*.hash_table =*/ { 0, NULL },
|
15220
|
+
/*.order =*/ cgraph0->order,
|
15221
|
+
/*.perf_runs =*/ 0,
|
15222
|
+
/*.perf_cycles =*/ 0,
|
15223
|
+
/*.perf_time_us =*/ 0,
|
15224
|
+
};
|
15225
|
+
|
15853
15226
|
return cgraph;
|
15854
15227
|
}
|
15855
15228
|
|
15856
|
-
|
15857
|
-
|
15229
|
+
void ggml_graph_cpy(struct ggml_cgraph * src, struct ggml_cgraph * dst) {
|
15230
|
+
GGML_ASSERT(dst->size >= src->n_leafs);
|
15231
|
+
GGML_ASSERT(dst->size >= src->n_nodes);
|
15232
|
+
GGML_ASSERT(dst->visited_hash_table.size >= src->visited_hash_table.size);
|
15233
|
+
|
15234
|
+
dst->n_leafs = src->n_leafs;
|
15235
|
+
dst->n_nodes = src->n_nodes;
|
15236
|
+
dst->order = src->order;
|
15237
|
+
|
15238
|
+
for (int i = 0; i < src->n_leafs; ++i) {
|
15239
|
+
dst->leafs[i] = src->leafs[i];
|
15240
|
+
}
|
15241
|
+
|
15242
|
+
for (int i = 0; i < src->n_nodes; ++i) {
|
15243
|
+
dst->nodes[i] = src->nodes[i];
|
15244
|
+
}
|
15245
|
+
|
15246
|
+
if (src->grads) {
|
15247
|
+
GGML_ASSERT(dst->grads != NULL);
|
15248
|
+
for (int i = 0; i < src->n_nodes; ++i) {
|
15249
|
+
dst->grads[i] = src->grads[i];
|
15250
|
+
}
|
15251
|
+
}
|
15252
|
+
|
15253
|
+
for (size_t i = 0; i < src->visited_hash_table.size; ++i) {
|
15254
|
+
if (src->visited_hash_table.keys[i]) {
|
15255
|
+
ggml_hash_insert(dst->visited_hash_table, src->visited_hash_table.keys[i]);
|
15256
|
+
}
|
15257
|
+
}
|
15258
|
+
}
|
15259
|
+
|
15260
|
+
struct ggml_cgraph * ggml_graph_dup(struct ggml_context * ctx, struct ggml_cgraph * cgraph) {
|
15261
|
+
struct ggml_cgraph * result = ggml_new_graph_custom(ctx, cgraph->size, cgraph->grads != NULL);
|
15262
|
+
ggml_graph_cpy(cgraph, result);
|
15263
|
+
return result;
|
15264
|
+
}
|
15265
|
+
|
15266
|
+
void ggml_graph_reset(struct ggml_cgraph * cgraph) {
|
15267
|
+
GGML_ASSERT(cgraph->grads != NULL);
|
15268
|
+
|
15269
|
+
for (int i = 0; i < cgraph->n_nodes; i++) {
|
15270
|
+
struct ggml_tensor * grad = cgraph->grads[i];
|
15271
|
+
|
15272
|
+
if (grad) {
|
15273
|
+
ggml_set_zero(grad);
|
15274
|
+
}
|
15275
|
+
}
|
15276
|
+
}
|
15277
|
+
|
15278
|
+
void ggml_graph_clear(struct ggml_cgraph * cgraph) {
|
15279
|
+
cgraph->n_leafs = 0;
|
15280
|
+
cgraph->n_nodes = 0;
|
15281
|
+
memset(cgraph->visited_hash_table.keys, 0, cgraph->visited_hash_table.size * sizeof(struct ggml_tensor *));
|
15858
15282
|
}
|
15859
15283
|
|
15860
15284
|
//
|
@@ -15966,45 +15390,266 @@ static void clear_numa_thread_affinity(void) {
|
|
15966
15390
|
strerror(rv));
|
15967
15391
|
}
|
15968
15392
|
|
15969
|
-
CPU_FREE(cpus);
|
15970
|
-
}
|
15971
|
-
#else
|
15972
|
-
// TODO: Windows etc.
|
15973
|
-
// (the linux implementation may also work on BSD, someone should test)
|
15974
|
-
static void set_numa_thread_affinity(int thread_n, int n_threads) { UNUSED(thread_n); UNUSED(n_threads); }
|
15975
|
-
static void clear_numa_thread_affinity(void) {}
|
15976
|
-
#endif
|
15977
|
-
|
15978
|
-
struct ggml_compute_state_shared {
|
15979
|
-
const struct ggml_cgraph * cgraph;
|
15980
|
-
const struct ggml_cplan * cplan;
|
15981
|
-
|
15982
|
-
int64_t perf_node_start_cycles;
|
15983
|
-
int64_t perf_node_start_time_us;
|
15984
|
-
|
15985
|
-
const int n_threads;
|
15986
|
-
|
15987
|
-
// synchronization primitives
|
15988
|
-
atomic_int n_active; // num active threads
|
15989
|
-
atomic_int node_n; // active graph node
|
15990
|
-
|
15991
|
-
bool (*abort_callback)(void * data); // abort ggml_graph_compute when true
|
15992
|
-
void * abort_callback_data;
|
15993
|
-
};
|
15994
|
-
|
15995
|
-
struct ggml_compute_state {
|
15996
|
-
ggml_thread_t thrd;
|
15997
|
-
int ith;
|
15998
|
-
struct ggml_compute_state_shared * shared;
|
15999
|
-
};
|
16000
|
-
|
16001
|
-
static void ggml_graph_compute_perf_stats_node(struct ggml_tensor * node, const struct ggml_compute_state_shared * st) {
|
16002
|
-
int64_t cycles_cur = ggml_perf_cycles() - st->perf_node_start_cycles;
|
16003
|
-
int64_t time_us_cur = ggml_perf_time_us() - st->perf_node_start_time_us;
|
15393
|
+
CPU_FREE(cpus);
|
15394
|
+
}
|
15395
|
+
#else
|
15396
|
+
// TODO: Windows etc.
|
15397
|
+
// (the linux implementation may also work on BSD, someone should test)
|
15398
|
+
static void set_numa_thread_affinity(int thread_n, int n_threads) { UNUSED(thread_n); UNUSED(n_threads); }
|
15399
|
+
static void clear_numa_thread_affinity(void) {}
|
15400
|
+
#endif
|
15401
|
+
|
15402
|
+
struct ggml_compute_state_shared {
|
15403
|
+
const struct ggml_cgraph * cgraph;
|
15404
|
+
const struct ggml_cplan * cplan;
|
15405
|
+
|
15406
|
+
int64_t perf_node_start_cycles;
|
15407
|
+
int64_t perf_node_start_time_us;
|
15408
|
+
|
15409
|
+
const int n_threads;
|
15410
|
+
|
15411
|
+
// synchronization primitives
|
15412
|
+
atomic_int n_active; // num active threads
|
15413
|
+
atomic_int node_n; // active graph node
|
15414
|
+
|
15415
|
+
bool (*abort_callback)(void * data); // abort ggml_graph_compute when true
|
15416
|
+
void * abort_callback_data;
|
15417
|
+
};
|
15418
|
+
|
15419
|
+
struct ggml_compute_state {
|
15420
|
+
ggml_thread_t thrd;
|
15421
|
+
int ith;
|
15422
|
+
struct ggml_compute_state_shared * shared;
|
15423
|
+
};
|
15424
|
+
|
15425
|
+
static void ggml_graph_compute_perf_stats_node(struct ggml_tensor * node, const struct ggml_compute_state_shared * st) {
|
15426
|
+
int64_t cycles_cur = ggml_perf_cycles() - st->perf_node_start_cycles;
|
15427
|
+
int64_t time_us_cur = ggml_perf_time_us() - st->perf_node_start_time_us;
|
15428
|
+
|
15429
|
+
node->perf_runs++;
|
15430
|
+
node->perf_cycles += cycles_cur;
|
15431
|
+
node->perf_time_us += time_us_cur;
|
15432
|
+
}
|
15433
|
+
|
15434
|
+
static int ggml_get_n_tasks(struct ggml_tensor * node, int n_threads) {
|
15435
|
+
int n_tasks = 0;
|
15436
|
+
|
15437
|
+
switch (node->op) {
|
15438
|
+
case GGML_OP_CPY:
|
15439
|
+
case GGML_OP_DUP:
|
15440
|
+
case GGML_OP_ADD:
|
15441
|
+
case GGML_OP_ADD1:
|
15442
|
+
case GGML_OP_ACC:
|
15443
|
+
{
|
15444
|
+
n_tasks = n_threads;
|
15445
|
+
} break;
|
15446
|
+
case GGML_OP_SUB:
|
15447
|
+
case GGML_OP_DIV:
|
15448
|
+
case GGML_OP_SQR:
|
15449
|
+
case GGML_OP_SQRT:
|
15450
|
+
case GGML_OP_LOG:
|
15451
|
+
case GGML_OP_SUM:
|
15452
|
+
case GGML_OP_SUM_ROWS:
|
15453
|
+
case GGML_OP_MEAN:
|
15454
|
+
case GGML_OP_ARGMAX:
|
15455
|
+
case GGML_OP_REPEAT:
|
15456
|
+
case GGML_OP_REPEAT_BACK:
|
15457
|
+
{
|
15458
|
+
n_tasks = 1;
|
15459
|
+
} break;
|
15460
|
+
case GGML_OP_UNARY:
|
15461
|
+
switch (ggml_get_unary_op(node)) {
|
15462
|
+
case GGML_UNARY_OP_ABS:
|
15463
|
+
case GGML_UNARY_OP_SGN:
|
15464
|
+
case GGML_UNARY_OP_NEG:
|
15465
|
+
case GGML_UNARY_OP_STEP:
|
15466
|
+
case GGML_UNARY_OP_TANH:
|
15467
|
+
case GGML_UNARY_OP_ELU:
|
15468
|
+
case GGML_UNARY_OP_RELU:
|
15469
|
+
case GGML_UNARY_OP_LEAKY:
|
15470
|
+
{
|
15471
|
+
n_tasks = 1;
|
15472
|
+
} break;
|
15473
|
+
|
15474
|
+
case GGML_UNARY_OP_GELU:
|
15475
|
+
case GGML_UNARY_OP_GELU_QUICK:
|
15476
|
+
case GGML_UNARY_OP_SILU:
|
15477
|
+
{
|
15478
|
+
n_tasks = n_threads;
|
15479
|
+
} break;
|
15480
|
+
}
|
15481
|
+
break;
|
15482
|
+
case GGML_OP_SILU_BACK:
|
15483
|
+
case GGML_OP_MUL:
|
15484
|
+
case GGML_OP_NORM:
|
15485
|
+
case GGML_OP_RMS_NORM:
|
15486
|
+
case GGML_OP_RMS_NORM_BACK:
|
15487
|
+
case GGML_OP_GROUP_NORM:
|
15488
|
+
case GGML_OP_CONCAT:
|
15489
|
+
{
|
15490
|
+
n_tasks = n_threads;
|
15491
|
+
} break;
|
15492
|
+
case GGML_OP_MUL_MAT:
|
15493
|
+
{
|
15494
|
+
n_tasks = n_threads;
|
15495
|
+
|
15496
|
+
// TODO: use different scheduling for different matrix sizes
|
15497
|
+
//const int nr0 = ggml_nrows(node->src[0]);
|
15498
|
+
//const int nr1 = ggml_nrows(node->src[1]);
|
15499
|
+
|
15500
|
+
//n_tasks = MIN(n_threads, MAX(1, nr0/128));
|
15501
|
+
//printf("nr0 = %8d, nr1 = %8d, nr0*nr1 = %8d, n_tasks%d\n", nr0, nr1, nr0*nr1, n_tasks);
|
15502
|
+
|
15503
|
+
#if defined(GGML_USE_CUBLAS)
|
15504
|
+
if (ggml_cuda_can_mul_mat(node->src[0], node->src[1], node)) {
|
15505
|
+
n_tasks = 1; // TODO: this actually is doing nothing
|
15506
|
+
// the threads are still spinning
|
15507
|
+
}
|
15508
|
+
#elif defined(GGML_USE_CLBLAST)
|
15509
|
+
if (ggml_cl_can_mul_mat(node->src[0], node->src[1], node)) {
|
15510
|
+
n_tasks = 1; // TODO: this actually is doing nothing
|
15511
|
+
// the threads are still spinning
|
15512
|
+
}
|
15513
|
+
#endif
|
15514
|
+
#if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS)
|
15515
|
+
if (ggml_compute_forward_mul_mat_use_blas(node->src[0], node->src[1], node)) {
|
15516
|
+
n_tasks = 1; // TODO: this actually is doing nothing
|
15517
|
+
// the threads are still spinning
|
15518
|
+
}
|
15519
|
+
#endif
|
15520
|
+
} break;
|
15521
|
+
case GGML_OP_OUT_PROD:
|
15522
|
+
{
|
15523
|
+
n_tasks = n_threads;
|
15524
|
+
} break;
|
15525
|
+
case GGML_OP_SCALE:
|
15526
|
+
case GGML_OP_SET:
|
15527
|
+
case GGML_OP_CONT:
|
15528
|
+
case GGML_OP_RESHAPE:
|
15529
|
+
case GGML_OP_VIEW:
|
15530
|
+
case GGML_OP_PERMUTE:
|
15531
|
+
case GGML_OP_TRANSPOSE:
|
15532
|
+
case GGML_OP_GET_ROWS:
|
15533
|
+
case GGML_OP_GET_ROWS_BACK:
|
15534
|
+
case GGML_OP_DIAG:
|
15535
|
+
{
|
15536
|
+
n_tasks = 1;
|
15537
|
+
} break;
|
15538
|
+
case GGML_OP_DIAG_MASK_ZERO:
|
15539
|
+
case GGML_OP_DIAG_MASK_INF:
|
15540
|
+
case GGML_OP_SOFT_MAX:
|
15541
|
+
case GGML_OP_SOFT_MAX_BACK:
|
15542
|
+
case GGML_OP_ROPE:
|
15543
|
+
case GGML_OP_ROPE_BACK:
|
15544
|
+
case GGML_OP_ADD_REL_POS:
|
15545
|
+
{
|
15546
|
+
n_tasks = n_threads;
|
15547
|
+
} break;
|
15548
|
+
case GGML_OP_ALIBI:
|
15549
|
+
{
|
15550
|
+
n_tasks = 1; //TODO
|
15551
|
+
} break;
|
15552
|
+
case GGML_OP_CLAMP:
|
15553
|
+
{
|
15554
|
+
n_tasks = 1; //TODO
|
15555
|
+
} break;
|
15556
|
+
case GGML_OP_CONV_TRANSPOSE_1D:
|
15557
|
+
{
|
15558
|
+
n_tasks = n_threads;
|
15559
|
+
} break;
|
15560
|
+
case GGML_OP_IM2COL:
|
15561
|
+
{
|
15562
|
+
n_tasks = n_threads;
|
15563
|
+
} break;
|
15564
|
+
case GGML_OP_CONV_TRANSPOSE_2D:
|
15565
|
+
{
|
15566
|
+
n_tasks = n_threads;
|
15567
|
+
} break;
|
15568
|
+
case GGML_OP_POOL_1D:
|
15569
|
+
case GGML_OP_POOL_2D:
|
15570
|
+
{
|
15571
|
+
n_tasks = 1;
|
15572
|
+
} break;
|
15573
|
+
case GGML_OP_UPSCALE:
|
15574
|
+
{
|
15575
|
+
n_tasks = n_threads;
|
15576
|
+
} break;
|
15577
|
+
case GGML_OP_FLASH_ATTN:
|
15578
|
+
{
|
15579
|
+
n_tasks = n_threads;
|
15580
|
+
} break;
|
15581
|
+
case GGML_OP_FLASH_FF:
|
15582
|
+
{
|
15583
|
+
n_tasks = n_threads;
|
15584
|
+
} break;
|
15585
|
+
case GGML_OP_FLASH_ATTN_BACK:
|
15586
|
+
{
|
15587
|
+
n_tasks = n_threads;
|
15588
|
+
} break;
|
15589
|
+
case GGML_OP_WIN_PART:
|
15590
|
+
case GGML_OP_WIN_UNPART:
|
15591
|
+
case GGML_OP_GET_REL_POS:
|
15592
|
+
case GGML_OP_MAP_UNARY:
|
15593
|
+
case GGML_OP_MAP_BINARY:
|
15594
|
+
case GGML_OP_MAP_CUSTOM1_F32:
|
15595
|
+
case GGML_OP_MAP_CUSTOM2_F32:
|
15596
|
+
case GGML_OP_MAP_CUSTOM3_F32:
|
15597
|
+
{
|
15598
|
+
n_tasks = 1;
|
15599
|
+
} break;
|
15600
|
+
case GGML_OP_MAP_CUSTOM1:
|
15601
|
+
{
|
15602
|
+
struct ggml_map_custom1_op_params * p = (struct ggml_map_custom1_op_params *) node->op_params;
|
15603
|
+
if (p->n_tasks == GGML_N_TASKS_MAX) {
|
15604
|
+
n_tasks = n_threads;
|
15605
|
+
} else {
|
15606
|
+
n_tasks = MIN(p->n_tasks, n_threads);
|
15607
|
+
}
|
15608
|
+
} break;
|
15609
|
+
case GGML_OP_MAP_CUSTOM2:
|
15610
|
+
{
|
15611
|
+
struct ggml_map_custom2_op_params * p = (struct ggml_map_custom2_op_params *) node->op_params;
|
15612
|
+
if (p->n_tasks == GGML_N_TASKS_MAX) {
|
15613
|
+
n_tasks = n_threads;
|
15614
|
+
} else {
|
15615
|
+
n_tasks = MIN(p->n_tasks, n_threads);
|
15616
|
+
}
|
15617
|
+
} break;
|
15618
|
+
case GGML_OP_MAP_CUSTOM3:
|
15619
|
+
{
|
15620
|
+
struct ggml_map_custom3_op_params * p = (struct ggml_map_custom3_op_params *) node->op_params;
|
15621
|
+
if (p->n_tasks == GGML_N_TASKS_MAX) {
|
15622
|
+
n_tasks = n_threads;
|
15623
|
+
} else {
|
15624
|
+
n_tasks = MIN(p->n_tasks, n_threads);
|
15625
|
+
}
|
15626
|
+
} break;
|
15627
|
+
case GGML_OP_CROSS_ENTROPY_LOSS:
|
15628
|
+
{
|
15629
|
+
n_tasks = n_threads;
|
15630
|
+
} break;
|
15631
|
+
case GGML_OP_CROSS_ENTROPY_LOSS_BACK:
|
15632
|
+
{
|
15633
|
+
n_tasks = n_threads;
|
15634
|
+
} break;
|
15635
|
+
case GGML_OP_NONE:
|
15636
|
+
{
|
15637
|
+
n_tasks = 1;
|
15638
|
+
} break;
|
15639
|
+
case GGML_OP_COUNT:
|
15640
|
+
{
|
15641
|
+
GGML_ASSERT(false);
|
15642
|
+
} break;
|
15643
|
+
default:
|
15644
|
+
{
|
15645
|
+
printf("%s: op %s not implemented\n", __func__, ggml_op_name(node->op));
|
15646
|
+
GGML_ASSERT(false);
|
15647
|
+
} break;
|
15648
|
+
}
|
15649
|
+
|
15650
|
+
assert(n_tasks > 0);
|
16004
15651
|
|
16005
|
-
|
16006
|
-
node->perf_cycles += cycles_cur;
|
16007
|
-
node->perf_time_us += time_us_cur;
|
15652
|
+
return n_tasks;
|
16008
15653
|
}
|
16009
15654
|
|
16010
15655
|
static thread_ret_t ggml_graph_compute_thread(void * data) {
|
@@ -16013,7 +15658,6 @@ static thread_ret_t ggml_graph_compute_thread(void * data) {
|
|
16013
15658
|
const struct ggml_cgraph * cgraph = state->shared->cgraph;
|
16014
15659
|
const struct ggml_cplan * cplan = state->shared->cplan;
|
16015
15660
|
|
16016
|
-
const int * n_tasks_arr = cplan->n_tasks;
|
16017
15661
|
const int n_threads = state->shared->n_threads;
|
16018
15662
|
|
16019
15663
|
set_numa_thread_affinity(state->ith, n_threads);
|
@@ -16038,9 +15682,9 @@ static thread_ret_t ggml_graph_compute_thread(void * data) {
|
|
16038
15682
|
|
16039
15683
|
if (node_n != -1) {
|
16040
15684
|
/* FINALIZE */
|
16041
|
-
struct ggml_tensor * node =
|
15685
|
+
struct ggml_tensor * node = cgraph->nodes[node_n];
|
16042
15686
|
if (GGML_OP_HAS_FINALIZE[node->op]) {
|
16043
|
-
params.nth =
|
15687
|
+
params.nth = ggml_get_n_tasks(node, n_threads);
|
16044
15688
|
ggml_compute_forward(¶ms, node);
|
16045
15689
|
}
|
16046
15690
|
ggml_graph_compute_perf_stats_node(node, state->shared);
|
@@ -16051,7 +15695,7 @@ static thread_ret_t ggml_graph_compute_thread(void * data) {
|
|
16051
15695
|
GGML_PRINT_DEBUG_5("%s: %d/%d\n", __func__, node_n, cgraph->n_nodes);
|
16052
15696
|
|
16053
15697
|
struct ggml_tensor * node = cgraph->nodes[node_n];
|
16054
|
-
const int n_tasks =
|
15698
|
+
const int n_tasks = ggml_get_n_tasks(node, n_threads);
|
16055
15699
|
|
16056
15700
|
state->shared->perf_node_start_cycles = ggml_perf_cycles();
|
16057
15701
|
state->shared->perf_node_start_time_us = ggml_perf_time_us();
|
@@ -16109,7 +15753,7 @@ static thread_ret_t ggml_graph_compute_thread(void * data) {
|
|
16109
15753
|
|
16110
15754
|
/* COMPUTE */
|
16111
15755
|
struct ggml_tensor * node = cgraph->nodes[node_n];
|
16112
|
-
const int n_tasks =
|
15756
|
+
const int n_tasks = ggml_get_n_tasks(node, n_threads);
|
16113
15757
|
|
16114
15758
|
struct ggml_compute_params params = {
|
16115
15759
|
/*.type =*/ GGML_TASK_COMPUTE,
|
@@ -16143,121 +15787,46 @@ struct ggml_cplan ggml_graph_plan(struct ggml_cgraph * cgraph, int n_threads) {
|
|
16143
15787
|
|
16144
15788
|
struct ggml_tensor * node = cgraph->nodes[i];
|
16145
15789
|
|
15790
|
+
size_t cur = 0;
|
15791
|
+
|
16146
15792
|
switch (node->op) {
|
16147
15793
|
case GGML_OP_CPY:
|
16148
15794
|
case GGML_OP_DUP:
|
16149
15795
|
{
|
16150
15796
|
n_tasks = n_threads;
|
16151
15797
|
|
16152
|
-
size_t cur = 0;
|
16153
15798
|
if (ggml_is_quantized(node->type)) {
|
16154
15799
|
cur = ggml_type_size(GGML_TYPE_F32) * node->ne[0] * n_tasks;
|
16155
15800
|
}
|
16156
|
-
|
16157
|
-
work_size = MAX(work_size, cur);
|
16158
15801
|
} break;
|
16159
15802
|
case GGML_OP_ADD:
|
16160
15803
|
case GGML_OP_ADD1:
|
16161
15804
|
{
|
16162
15805
|
n_tasks = n_threads;
|
16163
15806
|
|
16164
|
-
size_t cur = 0;
|
16165
|
-
|
16166
15807
|
if (ggml_is_quantized(node->src[0]->type)) {
|
16167
15808
|
cur = ggml_type_size(GGML_TYPE_F32) * node->src[0]->ne[0] * n_tasks;
|
16168
15809
|
}
|
16169
|
-
|
16170
|
-
work_size = MAX(work_size, cur);
|
16171
15810
|
} break;
|
16172
15811
|
case GGML_OP_ACC:
|
16173
15812
|
{
|
16174
15813
|
n_tasks = n_threads;
|
16175
15814
|
|
16176
|
-
size_t cur = 0;
|
16177
|
-
|
16178
15815
|
if (ggml_is_quantized(node->src[0]->type)) {
|
16179
15816
|
cur = ggml_type_size(GGML_TYPE_F32) * node->src[1]->ne[0] * n_tasks;
|
16180
15817
|
}
|
16181
|
-
|
16182
|
-
work_size = MAX(work_size, cur);
|
16183
|
-
} break;
|
16184
|
-
case GGML_OP_SUB:
|
16185
|
-
case GGML_OP_DIV:
|
16186
|
-
case GGML_OP_SQR:
|
16187
|
-
case GGML_OP_SQRT:
|
16188
|
-
case GGML_OP_LOG:
|
16189
|
-
case GGML_OP_SUM:
|
16190
|
-
case GGML_OP_SUM_ROWS:
|
16191
|
-
case GGML_OP_MEAN:
|
16192
|
-
case GGML_OP_ARGMAX:
|
16193
|
-
case GGML_OP_REPEAT:
|
16194
|
-
case GGML_OP_REPEAT_BACK:
|
16195
|
-
{
|
16196
|
-
n_tasks = 1;
|
16197
|
-
} break;
|
16198
|
-
|
16199
|
-
case GGML_OP_UNARY:
|
16200
|
-
{
|
16201
|
-
switch (ggml_get_unary_op(node)) {
|
16202
|
-
case GGML_UNARY_OP_ABS:
|
16203
|
-
case GGML_UNARY_OP_SGN:
|
16204
|
-
case GGML_UNARY_OP_NEG:
|
16205
|
-
case GGML_UNARY_OP_STEP:
|
16206
|
-
case GGML_UNARY_OP_TANH:
|
16207
|
-
case GGML_UNARY_OP_ELU:
|
16208
|
-
case GGML_UNARY_OP_RELU:
|
16209
|
-
{
|
16210
|
-
n_tasks = 1;
|
16211
|
-
} break;
|
16212
|
-
|
16213
|
-
case GGML_UNARY_OP_GELU:
|
16214
|
-
case GGML_UNARY_OP_GELU_QUICK:
|
16215
|
-
case GGML_UNARY_OP_SILU:
|
16216
|
-
{
|
16217
|
-
n_tasks = n_threads;
|
16218
|
-
} break;
|
16219
|
-
}
|
16220
15818
|
} break;
|
16221
|
-
case GGML_OP_SILU_BACK:
|
16222
|
-
case GGML_OP_MUL:
|
16223
|
-
case GGML_OP_NORM:
|
16224
|
-
case GGML_OP_RMS_NORM:
|
16225
|
-
case GGML_OP_RMS_NORM_BACK:
|
16226
|
-
case GGML_OP_GROUP_NORM:
|
16227
|
-
{
|
16228
|
-
n_tasks = n_threads;
|
16229
|
-
} break;
|
16230
|
-
case GGML_OP_CONCAT:
|
16231
15819
|
case GGML_OP_MUL_MAT:
|
16232
15820
|
{
|
16233
|
-
n_tasks = n_threads;
|
16234
|
-
|
16235
|
-
// TODO: use different scheduling for different matrix sizes
|
16236
|
-
//const int nr0 = ggml_nrows(node->src[0]);
|
16237
|
-
//const int nr1 = ggml_nrows(node->src[1]);
|
16238
|
-
|
16239
|
-
//n_tasks = MIN(n_threads, MAX(1, nr0/128));
|
16240
|
-
//printf("nr0 = %8d, nr1 = %8d, nr0*nr1 = %8d, n_tasks%d\n", nr0, nr1, nr0*nr1, n_tasks);
|
16241
|
-
|
16242
|
-
size_t cur = 0;
|
16243
15821
|
const enum ggml_type vec_dot_type = type_traits[node->src[0]->type].vec_dot_type;
|
16244
15822
|
|
16245
|
-
#if defined(
|
16246
|
-
if (ggml_cuda_can_mul_mat(node->src[0], node->src[1], node)) {
|
16247
|
-
n_tasks = 1; // TODO: this actually is doing nothing
|
16248
|
-
// the threads are still spinning
|
16249
|
-
} else
|
16250
|
-
#elif defined(GGML_USE_CLBLAST)
|
15823
|
+
#if defined(GGML_USE_CLBLAST)
|
16251
15824
|
if (ggml_cl_can_mul_mat(node->src[0], node->src[1], node)) {
|
16252
|
-
n_tasks = 1; // TODO: this actually is doing nothing
|
16253
|
-
// the threads are still spinning
|
16254
15825
|
cur = ggml_cl_mul_mat_get_wsize(node->src[0], node->src[1], node);
|
16255
15826
|
} else
|
16256
15827
|
#endif
|
16257
15828
|
#if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS)
|
16258
15829
|
if (ggml_compute_forward_mul_mat_use_blas(node->src[0], node->src[1], node)) {
|
16259
|
-
n_tasks = 1; // TODO: this actually is doing nothing
|
16260
|
-
// the threads are still spinning
|
16261
15830
|
if (node->src[0]->type != GGML_TYPE_F32) {
|
16262
15831
|
// here we need memory just for single 2D matrix from src0
|
16263
15832
|
cur = ggml_type_size(GGML_TYPE_F32)*(node->src[0]->ne[0]*node->src[0]->ne[1]);
|
@@ -16266,108 +15835,18 @@ struct ggml_cplan ggml_graph_plan(struct ggml_cgraph * cgraph, int n_threads) {
|
|
16266
15835
|
#endif
|
16267
15836
|
if (node->src[1]->type != vec_dot_type) {
|
16268
15837
|
cur = ggml_type_size(vec_dot_type)*ggml_nelements(node->src[1])/ggml_blck_size(vec_dot_type);
|
16269
|
-
} else {
|
16270
|
-
cur = 0;
|
16271
15838
|
}
|
16272
|
-
|
16273
|
-
work_size = MAX(work_size, cur);
|
16274
15839
|
} break;
|
16275
15840
|
case GGML_OP_OUT_PROD:
|
16276
15841
|
{
|
16277
15842
|
n_tasks = n_threads;
|
16278
15843
|
|
16279
|
-
size_t cur = 0;
|
16280
|
-
|
16281
15844
|
if (ggml_is_quantized(node->src[0]->type)) {
|
16282
15845
|
cur = ggml_type_size(GGML_TYPE_F32) * node->src[0]->ne[0] * n_tasks;
|
16283
15846
|
}
|
16284
|
-
|
16285
|
-
work_size = MAX(work_size, cur);
|
16286
|
-
} break;
|
16287
|
-
case GGML_OP_SCALE:
|
16288
|
-
{
|
16289
|
-
n_tasks = 1;
|
16290
|
-
} break;
|
16291
|
-
case GGML_OP_SET:
|
16292
|
-
case GGML_OP_CONT:
|
16293
|
-
case GGML_OP_RESHAPE:
|
16294
|
-
case GGML_OP_VIEW:
|
16295
|
-
case GGML_OP_PERMUTE:
|
16296
|
-
case GGML_OP_TRANSPOSE:
|
16297
|
-
case GGML_OP_GET_ROWS:
|
16298
|
-
case GGML_OP_GET_ROWS_BACK:
|
16299
|
-
case GGML_OP_DIAG:
|
16300
|
-
{
|
16301
|
-
n_tasks = 1;
|
16302
|
-
} break;
|
16303
|
-
case GGML_OP_DIAG_MASK_ZERO:
|
16304
|
-
case GGML_OP_DIAG_MASK_INF:
|
16305
|
-
case GGML_OP_SOFT_MAX:
|
16306
|
-
case GGML_OP_SOFT_MAX_BACK:
|
16307
|
-
case GGML_OP_ROPE:
|
16308
|
-
case GGML_OP_ROPE_BACK:
|
16309
|
-
case GGML_OP_ADD_REL_POS:
|
16310
|
-
{
|
16311
|
-
n_tasks = n_threads;
|
16312
|
-
} break;
|
16313
|
-
case GGML_OP_ALIBI:
|
16314
|
-
{
|
16315
|
-
n_tasks = 1; //TODO
|
16316
|
-
} break;
|
16317
|
-
case GGML_OP_CLAMP:
|
16318
|
-
{
|
16319
|
-
n_tasks = 1; //TODO
|
16320
|
-
} break;
|
16321
|
-
case GGML_OP_CONV_1D:
|
16322
|
-
{
|
16323
|
-
n_tasks = n_threads;
|
16324
|
-
|
16325
|
-
GGML_ASSERT(node->src[0]->ne[3] == 1);
|
16326
|
-
GGML_ASSERT(node->src[1]->ne[2] == 1);
|
16327
|
-
GGML_ASSERT(node->src[1]->ne[3] == 1);
|
16328
|
-
|
16329
|
-
const int64_t ne00 = node->src[0]->ne[0];
|
16330
|
-
const int64_t ne01 = node->src[0]->ne[1];
|
16331
|
-
const int64_t ne02 = node->src[0]->ne[2];
|
16332
|
-
|
16333
|
-
const int64_t ne10 = node->src[1]->ne[0];
|
16334
|
-
const int64_t ne11 = node->src[1]->ne[1];
|
16335
|
-
|
16336
|
-
const int64_t ne0 = node->ne[0];
|
16337
|
-
const int64_t ne1 = node->ne[1];
|
16338
|
-
const int64_t nk = ne00;
|
16339
|
-
const int64_t ew0 = nk * ne01;
|
16340
|
-
|
16341
|
-
UNUSED(ne02);
|
16342
|
-
UNUSED(ne10);
|
16343
|
-
UNUSED(ne11);
|
16344
|
-
|
16345
|
-
size_t cur = 0;
|
16346
|
-
|
16347
|
-
if (node->src[0]->type == GGML_TYPE_F16 &&
|
16348
|
-
node->src[1]->type == GGML_TYPE_F32) {
|
16349
|
-
cur = sizeof(ggml_fp16_t)*(ne0*ne1*ew0);
|
16350
|
-
} else if (node->src[0]->type == GGML_TYPE_F32 &&
|
16351
|
-
node->src[1]->type == GGML_TYPE_F32) {
|
16352
|
-
cur = sizeof(float)*(ne0*ne1*ew0);
|
16353
|
-
} else {
|
16354
|
-
GGML_ASSERT(false);
|
16355
|
-
}
|
16356
|
-
|
16357
|
-
work_size = MAX(work_size, cur);
|
16358
|
-
} break;
|
16359
|
-
case GGML_OP_CONV_1D_STAGE_0:
|
16360
|
-
{
|
16361
|
-
n_tasks = n_threads;
|
16362
|
-
} break;
|
16363
|
-
case GGML_OP_CONV_1D_STAGE_1:
|
16364
|
-
{
|
16365
|
-
n_tasks = n_threads;
|
16366
15847
|
} break;
|
16367
15848
|
case GGML_OP_CONV_TRANSPOSE_1D:
|
16368
15849
|
{
|
16369
|
-
n_tasks = n_threads;
|
16370
|
-
|
16371
15850
|
GGML_ASSERT(node->src[0]->ne[3] == 1);
|
16372
15851
|
GGML_ASSERT(node->src[1]->ne[2] == 1);
|
16373
15852
|
GGML_ASSERT(node->src[1]->ne[3] == 1);
|
@@ -16379,7 +15858,6 @@ struct ggml_cplan ggml_graph_plan(struct ggml_cgraph * cgraph, int n_threads) {
|
|
16379
15858
|
const int64_t ne10 = node->src[1]->ne[0]; // L
|
16380
15859
|
const int64_t ne11 = node->src[1]->ne[1]; // Cin
|
16381
15860
|
|
16382
|
-
size_t cur = 0;
|
16383
15861
|
if (node->src[0]->type == GGML_TYPE_F16 &&
|
16384
15862
|
node->src[1]->type == GGML_TYPE_F32) {
|
16385
15863
|
cur += sizeof(ggml_fp16_t)*ne00*ne01*ne02;
|
@@ -16391,59 +15869,13 @@ struct ggml_cplan ggml_graph_plan(struct ggml_cgraph * cgraph, int n_threads) {
|
|
16391
15869
|
} else {
|
16392
15870
|
GGML_ASSERT(false);
|
16393
15871
|
}
|
16394
|
-
|
16395
|
-
work_size = MAX(work_size, cur);
|
16396
|
-
} break;
|
16397
|
-
case GGML_OP_CONV_2D:
|
16398
|
-
{
|
16399
|
-
n_tasks = n_threads;
|
16400
|
-
|
16401
|
-
const int64_t ne00 = node->src[0]->ne[0]; // W
|
16402
|
-
const int64_t ne01 = node->src[0]->ne[1]; // H
|
16403
|
-
const int64_t ne02 = node->src[0]->ne[2]; // C
|
16404
|
-
const int64_t ne03 = node->src[0]->ne[3]; // N
|
16405
|
-
|
16406
|
-
const int64_t ne10 = node->src[1]->ne[0]; // W
|
16407
|
-
const int64_t ne11 = node->src[1]->ne[1]; // H
|
16408
|
-
const int64_t ne12 = node->src[1]->ne[2]; // C
|
16409
|
-
|
16410
|
-
const int64_t ne0 = node->ne[0];
|
16411
|
-
const int64_t ne1 = node->ne[1];
|
16412
|
-
const int64_t ne2 = node->ne[2];
|
16413
|
-
const int64_t ne3 = node->ne[3];
|
16414
|
-
const int64_t nk = ne00*ne01;
|
16415
|
-
const int64_t ew0 = nk * ne02;
|
16416
|
-
|
16417
|
-
UNUSED(ne03);
|
16418
|
-
UNUSED(ne2);
|
16419
|
-
|
16420
|
-
size_t cur = 0;
|
16421
|
-
|
16422
|
-
if (node->src[0]->type == GGML_TYPE_F16 &&
|
16423
|
-
node->src[1]->type == GGML_TYPE_F32) {
|
16424
|
-
// im2col: [N*OH*OW, IC*KH*KW]
|
16425
|
-
cur = sizeof(ggml_fp16_t)*(ne3*ne0*ne1*ew0);
|
16426
|
-
} else if (node->src[0]->type == GGML_TYPE_F32 &&
|
16427
|
-
node->src[1]->type == GGML_TYPE_F32) {
|
16428
|
-
cur = sizeof(float)* (ne10*ne11*ne12);
|
16429
|
-
} else {
|
16430
|
-
GGML_ASSERT(false);
|
16431
|
-
}
|
16432
|
-
|
16433
|
-
work_size = MAX(work_size, cur);
|
16434
|
-
} break;
|
16435
|
-
case GGML_OP_CONV_2D_STAGE_0:
|
16436
|
-
{
|
16437
|
-
n_tasks = n_threads;
|
16438
15872
|
} break;
|
16439
|
-
case
|
15873
|
+
case GGML_OP_IM2COL:
|
16440
15874
|
{
|
16441
15875
|
n_tasks = n_threads;
|
16442
15876
|
} break;
|
16443
15877
|
case GGML_OP_CONV_TRANSPOSE_2D:
|
16444
15878
|
{
|
16445
|
-
n_tasks = n_threads;
|
16446
|
-
|
16447
15879
|
const int64_t ne00 = node->src[0]->ne[0]; // W
|
16448
15880
|
const int64_t ne01 = node->src[0]->ne[1]; // H
|
16449
15881
|
const int64_t ne02 = node->src[0]->ne[2]; // Channels Out
|
@@ -16453,141 +15885,66 @@ struct ggml_cplan ggml_graph_plan(struct ggml_cgraph * cgraph, int n_threads) {
|
|
16453
15885
|
const int64_t ne11 = node->src[1]->ne[1]; // H
|
16454
15886
|
const int64_t ne12 = node->src[1]->ne[2]; // Channels In
|
16455
15887
|
|
16456
|
-
size_t cur = 0;
|
16457
15888
|
cur += sizeof(ggml_fp16_t)*ne00*ne01*ne02*ne03;
|
16458
15889
|
cur += sizeof(ggml_fp16_t)*ne10*ne11*ne12;
|
16459
|
-
|
16460
|
-
work_size = MAX(work_size, cur);
|
16461
|
-
} break;
|
16462
|
-
case GGML_OP_POOL_1D:
|
16463
|
-
case GGML_OP_POOL_2D:
|
16464
|
-
{
|
16465
|
-
n_tasks = 1;
|
16466
|
-
} break;
|
16467
|
-
case GGML_OP_UPSCALE:
|
16468
|
-
{
|
16469
|
-
n_tasks = n_threads;
|
16470
15890
|
} break;
|
16471
15891
|
case GGML_OP_FLASH_ATTN:
|
16472
15892
|
{
|
16473
15893
|
n_tasks = n_threads;
|
16474
15894
|
|
16475
|
-
size_t cur = 0;
|
16476
|
-
|
16477
15895
|
const int64_t ne11 = ggml_up(node->src[1]->ne[1], GGML_SOFT_MAX_UNROLL);
|
16478
15896
|
|
16479
15897
|
if (node->src[1]->type == GGML_TYPE_F32) {
|
16480
15898
|
cur = sizeof(float)*ne11*n_tasks; // TODO: this can become (n_tasks-1)
|
16481
15899
|
cur += sizeof(float)*ne11*n_tasks; // this is overestimated by x2
|
16482
|
-
}
|
16483
|
-
|
16484
|
-
if (node->src[1]->type == GGML_TYPE_F16) {
|
15900
|
+
} else if (node->src[1]->type == GGML_TYPE_F16) {
|
16485
15901
|
cur = sizeof(float)*ne11*n_tasks; // TODO: this can become (n_tasks-1)
|
16486
15902
|
cur += sizeof(float)*ne11*n_tasks; // this is overestimated by x2
|
16487
15903
|
}
|
16488
|
-
|
16489
|
-
work_size = MAX(work_size, cur);
|
16490
15904
|
} break;
|
16491
15905
|
case GGML_OP_FLASH_FF:
|
16492
15906
|
{
|
16493
15907
|
n_tasks = n_threads;
|
16494
15908
|
|
16495
|
-
size_t cur = 0;
|
16496
|
-
|
16497
15909
|
if (node->src[1]->type == GGML_TYPE_F32) {
|
16498
15910
|
cur = sizeof(float)*node->src[1]->ne[1]*n_tasks; // TODO: this can become (n_tasks-1)
|
16499
15911
|
cur += sizeof(float)*node->src[1]->ne[1]*n_tasks; // this is overestimated by x2
|
16500
|
-
}
|
16501
|
-
|
16502
|
-
if (node->src[1]->type == GGML_TYPE_F16) {
|
15912
|
+
} else if (node->src[1]->type == GGML_TYPE_F16) {
|
16503
15913
|
cur = sizeof(float)*node->src[1]->ne[1]*n_tasks; // TODO: this can become (n_tasks-1)
|
16504
15914
|
cur += sizeof(float)*node->src[1]->ne[1]*n_tasks; // this is overestimated by x2
|
16505
15915
|
}
|
16506
|
-
|
16507
|
-
work_size = MAX(work_size, cur);
|
16508
15916
|
} break;
|
16509
15917
|
case GGML_OP_FLASH_ATTN_BACK:
|
16510
15918
|
{
|
16511
15919
|
n_tasks = n_threads;
|
16512
15920
|
|
16513
|
-
size_t cur = 0;
|
16514
|
-
|
16515
15921
|
const int64_t D = node->src[0]->ne[0];
|
16516
15922
|
const int64_t ne11 = ggml_up(node->src[1]->ne[1], GGML_SOFT_MAX_UNROLL);
|
16517
15923
|
const int64_t mxDn = MAX(D, ne11) * 2; // *2 because of S and SM in ggml_compute_forward_flash_attn_back
|
16518
15924
|
if (node->src[1]->type == GGML_TYPE_F32) {
|
16519
15925
|
cur = sizeof(float)*mxDn*n_tasks; // TODO: this can become (n_tasks-1)
|
16520
15926
|
cur += sizeof(float)*mxDn*n_tasks; // this is overestimated by x2
|
16521
|
-
}
|
16522
|
-
|
16523
|
-
if (node->src[1]->type == GGML_TYPE_F16) {
|
15927
|
+
} else if (node->src[1]->type == GGML_TYPE_F16) {
|
16524
15928
|
cur = sizeof(float)*mxDn*n_tasks; // TODO: this can become (n_tasks-1)
|
16525
15929
|
cur += sizeof(float)*mxDn*n_tasks; // this is overestimated by x2
|
16526
15930
|
}
|
16527
|
-
|
16528
|
-
work_size = MAX(work_size, cur);
|
16529
|
-
} break;
|
16530
|
-
case GGML_OP_WIN_PART:
|
16531
|
-
case GGML_OP_WIN_UNPART:
|
16532
|
-
case GGML_OP_GET_REL_POS:
|
16533
|
-
case GGML_OP_MAP_UNARY:
|
16534
|
-
case GGML_OP_MAP_BINARY:
|
16535
|
-
case GGML_OP_MAP_CUSTOM1_F32:
|
16536
|
-
case GGML_OP_MAP_CUSTOM2_F32:
|
16537
|
-
case GGML_OP_MAP_CUSTOM3_F32:
|
16538
|
-
{
|
16539
|
-
n_tasks = 1;
|
16540
|
-
} break;
|
16541
|
-
case GGML_OP_MAP_CUSTOM1:
|
16542
|
-
{
|
16543
|
-
struct ggml_map_custom1_op_params * p = (struct ggml_map_custom1_op_params *) node->op_params;
|
16544
|
-
if (p->n_tasks == GGML_N_TASKS_MAX) {
|
16545
|
-
n_tasks = n_threads;
|
16546
|
-
} else {
|
16547
|
-
n_tasks = MIN(p->n_tasks, n_threads);
|
16548
|
-
}
|
16549
|
-
} break;
|
16550
|
-
case GGML_OP_MAP_CUSTOM2:
|
16551
|
-
{
|
16552
|
-
struct ggml_map_custom2_op_params * p = (struct ggml_map_custom2_op_params *) node->op_params;
|
16553
|
-
if (p->n_tasks == GGML_N_TASKS_MAX) {
|
16554
|
-
n_tasks = n_threads;
|
16555
|
-
} else {
|
16556
|
-
n_tasks = MIN(p->n_tasks, n_threads);
|
16557
|
-
}
|
16558
|
-
} break;
|
16559
|
-
case GGML_OP_MAP_CUSTOM3:
|
16560
|
-
{
|
16561
|
-
struct ggml_map_custom3_op_params * p = (struct ggml_map_custom3_op_params *) node->op_params;
|
16562
|
-
if (p->n_tasks == GGML_N_TASKS_MAX) {
|
16563
|
-
n_tasks = n_threads;
|
16564
|
-
} else {
|
16565
|
-
n_tasks = MIN(p->n_tasks, n_threads);
|
16566
|
-
}
|
16567
15931
|
} break;
|
15932
|
+
|
16568
15933
|
case GGML_OP_CROSS_ENTROPY_LOSS:
|
16569
15934
|
{
|
16570
15935
|
n_tasks = n_threads;
|
16571
15936
|
|
16572
|
-
|
16573
|
-
|
16574
|
-
work_size = MAX(work_size, cur);
|
16575
|
-
} break;
|
16576
|
-
case GGML_OP_CROSS_ENTROPY_LOSS_BACK:
|
16577
|
-
{
|
16578
|
-
n_tasks = n_threads;
|
16579
|
-
} break;
|
16580
|
-
case GGML_OP_NONE:
|
16581
|
-
{
|
16582
|
-
n_tasks = 1;
|
15937
|
+
cur = ggml_type_size(node->type)*(n_tasks + node->src[0]->ne[0]*n_tasks);
|
16583
15938
|
} break;
|
16584
15939
|
case GGML_OP_COUNT:
|
16585
15940
|
{
|
16586
15941
|
GGML_ASSERT(false);
|
16587
15942
|
} break;
|
15943
|
+
default:
|
15944
|
+
break;
|
16588
15945
|
}
|
16589
15946
|
|
16590
|
-
|
15947
|
+
work_size = MAX(work_size, cur);
|
16591
15948
|
}
|
16592
15949
|
|
16593
15950
|
if (work_size > 0) {
|
@@ -16609,12 +15966,6 @@ int ggml_graph_compute(struct ggml_cgraph * cgraph, struct ggml_cplan * cplan) {
|
|
16609
15966
|
if (cplan->work_size > 0) {
|
16610
15967
|
GGML_ASSERT(cplan->work_data);
|
16611
15968
|
}
|
16612
|
-
|
16613
|
-
for (int i = 0; i < cgraph->n_nodes; ++i) {
|
16614
|
-
if (cgraph->nodes[i]->op != GGML_OP_NONE) {
|
16615
|
-
GGML_ASSERT(cplan->n_tasks[i] > 0);
|
16616
|
-
}
|
16617
|
-
}
|
16618
15969
|
}
|
16619
15970
|
|
16620
15971
|
const int n_threads = cplan->n_threads;
|
@@ -16687,16 +16038,6 @@ int ggml_graph_compute(struct ggml_cgraph * cgraph, struct ggml_cplan * cplan) {
|
|
16687
16038
|
return compute_status;
|
16688
16039
|
}
|
16689
16040
|
|
16690
|
-
void ggml_graph_reset(struct ggml_cgraph * cgraph) {
|
16691
|
-
for (int i = 0; i < cgraph->n_nodes; i++) {
|
16692
|
-
struct ggml_tensor * grad = cgraph->grads[i];
|
16693
|
-
|
16694
|
-
if (grad) {
|
16695
|
-
ggml_set_zero(grad);
|
16696
|
-
}
|
16697
|
-
}
|
16698
|
-
}
|
16699
|
-
|
16700
16041
|
void ggml_graph_compute_with_ctx(struct ggml_context * ctx, struct ggml_cgraph * cgraph, int n_threads) {
|
16701
16042
|
struct ggml_cplan cplan = ggml_graph_plan(cgraph, n_threads);
|
16702
16043
|
|
@@ -16823,12 +16164,12 @@ void ggml_graph_export(const struct ggml_cgraph * cgraph, const char * fname) {
|
|
16823
16164
|
const uint32_t magic = GGML_FILE_MAGIC;
|
16824
16165
|
const uint32_t version = GGML_FILE_VERSION;
|
16825
16166
|
const uint32_t n_leafs = cgraph->n_leafs;
|
16826
|
-
const uint32_t
|
16167
|
+
const uint32_t n_nodes = cgraph->n_nodes;
|
16827
16168
|
|
16828
16169
|
fwrite(&magic, sizeof(uint32_t), 1, fout);
|
16829
16170
|
fwrite(&version, sizeof(uint32_t), 1, fout);
|
16830
16171
|
fwrite(&n_leafs, sizeof(uint32_t), 1, fout);
|
16831
|
-
fwrite(&
|
16172
|
+
fwrite(&n_nodes, sizeof(uint32_t), 1, fout);
|
16832
16173
|
fwrite(&size_eval, sizeof(uint64_t), 1, fout);
|
16833
16174
|
}
|
16834
16175
|
|
@@ -16916,7 +16257,7 @@ void ggml_graph_export(const struct ggml_cgraph * cgraph, const char * fname) {
|
|
16916
16257
|
if (idx == -1) {
|
16917
16258
|
for (int k = 0; k < cgraph->n_nodes; ++k) {
|
16918
16259
|
if (args[j] == cgraph->nodes[k]) {
|
16919
|
-
idx =
|
16260
|
+
idx = cgraph->n_leafs + k;
|
16920
16261
|
break;
|
16921
16262
|
}
|
16922
16263
|
}
|
@@ -16943,11 +16284,11 @@ void ggml_graph_export(const struct ggml_cgraph * cgraph, const char * fname) {
|
|
16943
16284
|
}
|
16944
16285
|
}
|
16945
16286
|
|
16946
|
-
struct ggml_cgraph ggml_graph_import(const char * fname, struct ggml_context ** ctx_data, struct ggml_context ** ctx_eval) {
|
16287
|
+
struct ggml_cgraph * ggml_graph_import(const char * fname, struct ggml_context ** ctx_data, struct ggml_context ** ctx_eval) {
|
16947
16288
|
assert(*ctx_data == NULL);
|
16948
16289
|
assert(*ctx_eval == NULL);
|
16949
16290
|
|
16950
|
-
struct ggml_cgraph result =
|
16291
|
+
struct ggml_cgraph * result = NULL;
|
16951
16292
|
|
16952
16293
|
struct ggml_tensor * data = NULL;
|
16953
16294
|
|
@@ -17019,13 +16360,11 @@ struct ggml_cgraph ggml_graph_import(const char * fname, struct ggml_context **
|
|
17019
16360
|
const uint32_t n_leafs = *(const uint32_t *) ptr; ptr += sizeof(n_leafs);
|
17020
16361
|
const uint32_t n_nodes = *(const uint32_t *) ptr; ptr += sizeof(n_nodes);
|
17021
16362
|
const uint64_t size_eval = *(const uint64_t *) ptr; ptr += sizeof(size_eval);
|
17022
|
-
|
17023
|
-
result.n_leafs = n_leafs;
|
17024
|
-
result.n_nodes = n_nodes;
|
16363
|
+
const int graph_size = MAX(n_leafs, n_nodes);
|
17025
16364
|
|
17026
16365
|
// create the data context
|
17027
16366
|
{
|
17028
|
-
const size_t overhead = (n_leafs + n_nodes)*ggml_tensor_overhead();
|
16367
|
+
const size_t overhead = (n_leafs + n_nodes)*ggml_tensor_overhead() + ggml_graph_overhead_custom(graph_size, false);
|
17029
16368
|
|
17030
16369
|
struct ggml_init_params params = {
|
17031
16370
|
.mem_size = size_eval + overhead,
|
@@ -17041,6 +16380,12 @@ struct ggml_cgraph ggml_graph_import(const char * fname, struct ggml_context **
|
|
17041
16380
|
}
|
17042
16381
|
}
|
17043
16382
|
|
16383
|
+
result = ggml_new_graph_custom(*ctx_eval, graph_size, false);
|
16384
|
+
|
16385
|
+
result->n_leafs = n_leafs;
|
16386
|
+
result->n_nodes = n_nodes;
|
16387
|
+
|
16388
|
+
|
17044
16389
|
// leafs
|
17045
16390
|
{
|
17046
16391
|
uint32_t type;
|
@@ -17079,7 +16424,7 @@ struct ggml_cgraph ggml_graph_import(const char * fname, struct ggml_context **
|
|
17079
16424
|
tensor->nb[j] = nb[j];
|
17080
16425
|
}
|
17081
16426
|
|
17082
|
-
result
|
16427
|
+
result->leafs[i] = tensor;
|
17083
16428
|
|
17084
16429
|
ptr += ggml_nbytes(tensor);
|
17085
16430
|
|
@@ -17131,10 +16476,10 @@ struct ggml_cgraph ggml_graph_import(const char * fname, struct ggml_context **
|
|
17131
16476
|
continue;
|
17132
16477
|
}
|
17133
16478
|
|
17134
|
-
if (arg_idx <
|
17135
|
-
args[j] = result
|
16479
|
+
if (arg_idx < result->n_leafs) {
|
16480
|
+
args[j] = result->leafs[arg_idx];
|
17136
16481
|
} else {
|
17137
|
-
args[j] = result
|
16482
|
+
args[j] = result->nodes[arg_idx - result->n_leafs];
|
17138
16483
|
}
|
17139
16484
|
}
|
17140
16485
|
|
@@ -17186,7 +16531,7 @@ struct ggml_cgraph ggml_graph_import(const char * fname, struct ggml_context **
|
|
17186
16531
|
tensor->src[j] = args[j];
|
17187
16532
|
}
|
17188
16533
|
|
17189
|
-
result
|
16534
|
+
result->nodes[i] = tensor;
|
17190
16535
|
|
17191
16536
|
fprintf(stderr, "%s: loaded node %d: '%16s', %3d dims, %9zu bytes\n", __func__, i, tensor->name, n_dims, ggml_nbytes(tensor));
|
17192
16537
|
}
|
@@ -18091,10 +17436,11 @@ struct ggml_opt_params ggml_opt_default_params(enum ggml_opt_type type) {
|
|
18091
17436
|
case GGML_OPT_ADAM:
|
18092
17437
|
{
|
18093
17438
|
result = (struct ggml_opt_params) {
|
18094
|
-
.type
|
18095
|
-
.
|
18096
|
-
.
|
18097
|
-
.
|
17439
|
+
.type = GGML_OPT_ADAM,
|
17440
|
+
.graph_size = GGML_DEFAULT_GRAPH_SIZE,
|
17441
|
+
.n_threads = 1, // FIXME: GGML_DEFAULT_N_THREADS ?
|
17442
|
+
.past = 0,
|
17443
|
+
.delta = 1e-5f,
|
18098
17444
|
|
18099
17445
|
.max_no_improvement = 100,
|
18100
17446
|
|
@@ -18121,10 +17467,11 @@ struct ggml_opt_params ggml_opt_default_params(enum ggml_opt_type type) {
|
|
18121
17467
|
case GGML_OPT_LBFGS:
|
18122
17468
|
{
|
18123
17469
|
result = (struct ggml_opt_params) {
|
18124
|
-
.type
|
18125
|
-
.
|
18126
|
-
.
|
18127
|
-
.
|
17470
|
+
.type = GGML_OPT_LBFGS,
|
17471
|
+
.graph_size = GGML_DEFAULT_GRAPH_SIZE,
|
17472
|
+
.n_threads = 1,
|
17473
|
+
.past = 0,
|
17474
|
+
.delta = 1e-5f,
|
18128
17475
|
|
18129
17476
|
.max_no_improvement = 0,
|
18130
17477
|
|
@@ -18266,14 +17613,11 @@ enum ggml_opt_result ggml_opt_resume(
|
|
18266
17613
|
struct ggml_tensor * f) {
|
18267
17614
|
|
18268
17615
|
// build forward + backward compute graphs
|
18269
|
-
struct
|
18270
|
-
|
18271
|
-
|
18272
|
-
struct ggml_cgraph * gf = (struct ggml_cgraph *) gfbuf->data;
|
18273
|
-
struct ggml_cgraph * gb = (struct ggml_cgraph *) gbbuf->data;
|
17616
|
+
struct ggml_cgraph * gf = ggml_new_graph_custom(ctx, opt->params.graph_size, true);
|
17617
|
+
ggml_build_forward_expand(gf, f);
|
18274
17618
|
|
18275
|
-
*
|
18276
|
-
|
17619
|
+
struct ggml_cgraph * gb = ggml_graph_dup(ctx, gf);
|
17620
|
+
ggml_build_backward_expand(ctx, gf, gb, true);
|
18277
17621
|
|
18278
17622
|
return ggml_opt_resume_g(ctx, opt, f, gf, gb, NULL, NULL);
|
18279
17623
|
}
|
@@ -18729,7 +18073,7 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
|
|
18729
18073
|
{
|
18730
18074
|
ctx->kv = malloc(ctx->header.n_kv * sizeof(struct gguf_kv));
|
18731
18075
|
|
18732
|
-
for (
|
18076
|
+
for (uint64_t i = 0; i < ctx->header.n_kv; ++i) {
|
18733
18077
|
struct gguf_kv * kv = &ctx->kv[i];
|
18734
18078
|
|
18735
18079
|
//fprintf(stderr, "%s: reading kv %d\n", __func__, i);
|
@@ -18776,7 +18120,7 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
|
|
18776
18120
|
case GGUF_TYPE_STRING:
|
18777
18121
|
{
|
18778
18122
|
kv->value.arr.data = malloc(kv->value.arr.n * sizeof(struct gguf_str));
|
18779
|
-
for (
|
18123
|
+
for (uint64_t j = 0; j < kv->value.arr.n; ++j) {
|
18780
18124
|
ok = ok && gguf_fread_str(file, &((struct gguf_str *) kv->value.arr.data)[j], &offset);
|
18781
18125
|
}
|
18782
18126
|
} break;
|
@@ -18804,7 +18148,7 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
|
|
18804
18148
|
{
|
18805
18149
|
ctx->infos = malloc(ctx->header.n_tensors * sizeof(struct gguf_tensor_info));
|
18806
18150
|
|
18807
|
-
for (
|
18151
|
+
for (uint64_t i = 0; i < ctx->header.n_tensors; ++i) {
|
18808
18152
|
struct gguf_tensor_info * info = &ctx->infos[i];
|
18809
18153
|
|
18810
18154
|
for (int j = 0; j < GGML_MAX_DIMS; ++j) {
|
@@ -18851,7 +18195,7 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
|
|
18851
18195
|
// compute the total size of the data section, taking into account the alignment
|
18852
18196
|
{
|
18853
18197
|
ctx->size = 0;
|
18854
|
-
for (
|
18198
|
+
for (uint64_t i = 0; i < ctx->header.n_tensors; ++i) {
|
18855
18199
|
struct gguf_tensor_info * info = &ctx->infos[i];
|
18856
18200
|
|
18857
18201
|
const int64_t ne =
|
@@ -18920,7 +18264,7 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
|
|
18920
18264
|
ggml_set_no_alloc(ctx_data, true);
|
18921
18265
|
|
18922
18266
|
// create the tensors
|
18923
|
-
for (
|
18267
|
+
for (uint64_t i = 0; i < ctx->header.n_tensors; ++i) {
|
18924
18268
|
const int64_t ne[GGML_MAX_DIMS] = {
|
18925
18269
|
ctx->infos[i].ne[0],
|
18926
18270
|
ctx->infos[i].ne[1],
|