llama_cpp 0.9.2 → 0.9.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +11 -1
- data/ext/llama_cpp/llama_cpp.cpp +12 -0
- data/ext/llama_cpp/src/ggml-alloc.c +378 -208
- data/ext/llama_cpp/src/ggml-alloc.h +68 -16
- data/ext/llama_cpp/src/ggml-backend-impl.h +87 -0
- data/ext/llama_cpp/src/ggml-backend.c +578 -13
- data/ext/llama_cpp/src/ggml-backend.h +70 -77
- data/ext/llama_cpp/src/ggml-cuda.cu +260 -46
- data/ext/llama_cpp/src/ggml-impl.h +13 -7
- data/ext/llama_cpp/src/ggml-metal.h +1 -1
- data/ext/llama_cpp/src/ggml-metal.m +113 -32
- data/ext/llama_cpp/src/ggml-metal.metal +107 -1
- data/ext/llama_cpp/src/ggml-quants.c +174 -74
- data/ext/llama_cpp/src/ggml.c +881 -1459
- data/ext/llama_cpp/src/ggml.h +64 -45
- data/ext/llama_cpp/src/llama.cpp +555 -49
- data/ext/llama_cpp/src/llama.h +77 -3
- data/lib/llama_cpp/version.rb +2 -2
- data/sig/llama_cpp.rbs +2 -0
- metadata +3 -2
data/ext/llama_cpp/src/ggml.c
CHANGED
@@ -100,6 +100,49 @@ typedef void * thread_ret_t;
|
|
100
100
|
#include <hbwmalloc.h>
|
101
101
|
#endif
|
102
102
|
|
103
|
+
#if defined(__APPLE__)
|
104
|
+
#include <TargetConditionals.h>
|
105
|
+
#endif
|
106
|
+
|
107
|
+
#if (defined(__linux__) || defined(__APPLE__) || defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__)) && \
|
108
|
+
(!defined(TARGET_OS_TV) && !defined(TARGET_OS_WATCH))
|
109
|
+
|
110
|
+
#include <sys/wait.h>
|
111
|
+
|
112
|
+
void ggml_print_backtrace(void) {
|
113
|
+
/*
|
114
|
+
#include <execinfo.h>
|
115
|
+
#include <dlfcn.h>
|
116
|
+
|
117
|
+
void * trace[100];
|
118
|
+
|
119
|
+
int nptrs = backtrace(trace, sizeof(trace)/sizeof(trace[0]));
|
120
|
+
|
121
|
+
backtrace_symbols_fd(trace, nptrs, STDERR_FILENO);
|
122
|
+
*/
|
123
|
+
|
124
|
+
// backtrack_symbols does not show line numbers, use gdb instead
|
125
|
+
char attach[32];
|
126
|
+
snprintf(attach, sizeof(attach), "attach %d", getpid());
|
127
|
+
int pid = fork();
|
128
|
+
if (pid == 0) {
|
129
|
+
execlp("gdb", "gdb", "--batch",
|
130
|
+
"-ex", "set style enabled on",
|
131
|
+
"-ex", attach,
|
132
|
+
"-ex", "bt -frame-info source-and-location",
|
133
|
+
"-ex", "detach",
|
134
|
+
"-ex", "quit",
|
135
|
+
NULL);
|
136
|
+
} else {
|
137
|
+
waitpid(pid, NULL, 0);
|
138
|
+
}
|
139
|
+
}
|
140
|
+
#else
|
141
|
+
void ggml_print_backtrace(void) {
|
142
|
+
// platform not supported
|
143
|
+
}
|
144
|
+
#endif
|
145
|
+
|
103
146
|
/*#define GGML_PERF*/
|
104
147
|
#define GGML_DEBUG 0
|
105
148
|
#define GGML_GELU_FP16
|
@@ -228,6 +271,12 @@ inline static void * ggml_aligned_malloc(size_t size) {
|
|
228
271
|
// floating point type used to accumulate sums
|
229
272
|
typedef double ggml_float;
|
230
273
|
|
274
|
+
#undef MIN
|
275
|
+
#undef MAX
|
276
|
+
|
277
|
+
#define MIN(a, b) ((a) < (b) ? (a) : (b))
|
278
|
+
#define MAX(a, b) ((a) > (b) ? (a) : (b))
|
279
|
+
|
231
280
|
//
|
232
281
|
// global data
|
233
282
|
//
|
@@ -561,6 +610,18 @@ ggml_type_traits_t ggml_internal_get_type_traits(enum ggml_type type) {
|
|
561
610
|
// simd mappings
|
562
611
|
//
|
563
612
|
|
613
|
+
#if defined(__ARM_NEON)
|
614
|
+
#if !defined(__aarch64__)
|
615
|
+
|
616
|
+
// 64-bit compatibility
|
617
|
+
|
618
|
+
inline static float vaddvq_f32(float32x4_t v) {
|
619
|
+
return vgetq_lane_f32(v, 0) + vgetq_lane_f32(v, 1) + vgetq_lane_f32(v, 2) + vgetq_lane_f32(v, 3);
|
620
|
+
}
|
621
|
+
|
622
|
+
#endif
|
623
|
+
#endif
|
624
|
+
|
564
625
|
// we define a common set of C macros which map to specific intrinsics based on the current architecture
|
565
626
|
// we then implement the fundamental computation operations below using only these macros
|
566
627
|
// adding support for new architectures requires to define the corresponding SIMD macros
|
@@ -1352,6 +1413,7 @@ inline static void ggml_vec_step_f32 (const int n, float * y, const float * x) {
|
|
1352
1413
|
inline static void ggml_vec_tanh_f32 (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = tanhf(x[i]); }
|
1353
1414
|
inline static void ggml_vec_elu_f32 (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = (x[i] > 0.f) ? x[i] : expf(x[i])-1; }
|
1354
1415
|
inline static void ggml_vec_relu_f32 (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = (x[i] > 0.f) ? x[i] : 0.f; }
|
1416
|
+
inline static void ggml_vec_leaky_f32 (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = (x[i] > 0.f) ? x[i] : 0.1f*x[i]; }
|
1355
1417
|
|
1356
1418
|
static const float GELU_COEF_A = 0.044715f;
|
1357
1419
|
static const float GELU_QUICK_COEF = -1.702f;
|
@@ -1572,13 +1634,8 @@ static const char * GGML_OP_NAME[GGML_OP_COUNT] = {
|
|
1572
1634
|
"ROPE_BACK",
|
1573
1635
|
"ALIBI",
|
1574
1636
|
"CLAMP",
|
1575
|
-
"CONV_1D",
|
1576
|
-
"CONV_1D_STAGE_0",
|
1577
|
-
"CONV_1D_STAGE_1",
|
1578
1637
|
"CONV_TRANSPOSE_1D",
|
1579
|
-
"
|
1580
|
-
"CONV_2D_STAGE_0",
|
1581
|
-
"CONV_2D_STAGE_1",
|
1638
|
+
"IM2COL",
|
1582
1639
|
"CONV_TRANSPOSE_2D",
|
1583
1640
|
"POOL_1D",
|
1584
1641
|
"POOL_2D",
|
@@ -1609,7 +1666,7 @@ static const char * GGML_OP_NAME[GGML_OP_COUNT] = {
|
|
1609
1666
|
"CROSS_ENTROPY_LOSS_BACK",
|
1610
1667
|
};
|
1611
1668
|
|
1612
|
-
static_assert(GGML_OP_COUNT ==
|
1669
|
+
static_assert(GGML_OP_COUNT == 68, "GGML_OP_COUNT != 68");
|
1613
1670
|
|
1614
1671
|
static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
|
1615
1672
|
"none",
|
@@ -1659,13 +1716,8 @@ static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
|
|
1659
1716
|
"rope_back(x)",
|
1660
1717
|
"alibi(x)",
|
1661
1718
|
"clamp(x)",
|
1662
|
-
"conv_1d(x)",
|
1663
|
-
"conv_1d_stage_0(x)",
|
1664
|
-
"conv_1d_stage_1(x)",
|
1665
1719
|
"conv_transpose_1d(x)",
|
1666
|
-
"
|
1667
|
-
"conv_2d_stage_0(x)",
|
1668
|
-
"conv_2d_stage_1(x)",
|
1720
|
+
"im2col(x)",
|
1669
1721
|
"conv_transpose_2d(x)",
|
1670
1722
|
"pool_1d(x)",
|
1671
1723
|
"pool_2d(x)",
|
@@ -1696,7 +1748,7 @@ static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
|
|
1696
1748
|
"cross_entropy_loss_back(x,y)",
|
1697
1749
|
};
|
1698
1750
|
|
1699
|
-
static_assert(GGML_OP_COUNT ==
|
1751
|
+
static_assert(GGML_OP_COUNT == 68, "GGML_OP_COUNT != 68");
|
1700
1752
|
|
1701
1753
|
static_assert(GGML_OP_POOL_COUNT == 2, "GGML_OP_POOL_COUNT != 2");
|
1702
1754
|
|
@@ -1724,13 +1776,7 @@ static void ggml_setup_op_has_task_pass(void) {
|
|
1724
1776
|
p[GGML_OP_GET_ROWS_BACK ] = true;
|
1725
1777
|
p[GGML_OP_DIAG_MASK_INF ] = true;
|
1726
1778
|
p[GGML_OP_DIAG_MASK_ZERO ] = true;
|
1727
|
-
p[GGML_OP_CONV_1D ] = true;
|
1728
|
-
p[GGML_OP_CONV_1D_STAGE_0 ] = true;
|
1729
|
-
p[GGML_OP_CONV_1D_STAGE_1 ] = true;
|
1730
1779
|
p[GGML_OP_CONV_TRANSPOSE_1D ] = true;
|
1731
|
-
p[GGML_OP_CONV_2D ] = true;
|
1732
|
-
p[GGML_OP_CONV_2D_STAGE_0 ] = true;
|
1733
|
-
p[GGML_OP_CONV_2D_STAGE_1 ] = true;
|
1734
1780
|
p[GGML_OP_CONV_TRANSPOSE_2D ] = true;
|
1735
1781
|
p[GGML_OP_FLASH_ATTN_BACK ] = true;
|
1736
1782
|
p[GGML_OP_CROSS_ENTROPY_LOSS ] = true;
|
@@ -3769,6 +3815,14 @@ struct ggml_tensor * ggml_relu_inplace(
|
|
3769
3815
|
return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_RELU);
|
3770
3816
|
}
|
3771
3817
|
|
3818
|
+
// ggml_leaky
|
3819
|
+
|
3820
|
+
struct ggml_tensor * ggml_leaky(
|
3821
|
+
struct ggml_context * ctx,
|
3822
|
+
struct ggml_tensor * a) {
|
3823
|
+
return ggml_unary(ctx, a, GGML_UNARY_OP_LEAKY);
|
3824
|
+
}
|
3825
|
+
|
3772
3826
|
// ggml_gelu
|
3773
3827
|
|
3774
3828
|
struct ggml_tensor * ggml_gelu(
|
@@ -5076,82 +5130,6 @@ static int64_t ggml_calc_conv_output_size(int64_t ins, int64_t ks, int s, int p,
|
|
5076
5130
|
return (ins + 2 * p - d * (ks - 1) - 1) / s + 1;
|
5077
5131
|
}
|
5078
5132
|
|
5079
|
-
// im2col: [N, IC, IL] => [N, OL, IC*K]
|
5080
|
-
// a: [OC,IC, K]
|
5081
|
-
// b: [N, IC, IL]
|
5082
|
-
// result: [N, OL, IC*K]
|
5083
|
-
static struct ggml_tensor * ggml_conv_1d_stage_0(
|
5084
|
-
struct ggml_context * ctx,
|
5085
|
-
struct ggml_tensor * a,
|
5086
|
-
struct ggml_tensor * b,
|
5087
|
-
int s0,
|
5088
|
-
int p0,
|
5089
|
-
int d0) {
|
5090
|
-
GGML_ASSERT(a->ne[1] == b->ne[1]);
|
5091
|
-
bool is_node = false;
|
5092
|
-
|
5093
|
-
if (a->grad || b->grad) {
|
5094
|
-
GGML_ASSERT(false); // TODO: implement backward
|
5095
|
-
is_node = true;
|
5096
|
-
}
|
5097
|
-
|
5098
|
-
const int64_t OL = ggml_calc_conv_output_size(b->ne[0], a->ne[0], s0, p0, d0);
|
5099
|
-
|
5100
|
-
const int64_t ne[4] = {
|
5101
|
-
a->ne[1] * a->ne[0],
|
5102
|
-
OL,
|
5103
|
-
b->ne[2],
|
5104
|
-
1,
|
5105
|
-
};
|
5106
|
-
struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F16, 4, ne);
|
5107
|
-
|
5108
|
-
int32_t params[] = { s0, p0, d0 };
|
5109
|
-
ggml_set_op_params(result, params, sizeof(params));
|
5110
|
-
|
5111
|
-
result->op = GGML_OP_CONV_1D_STAGE_0;
|
5112
|
-
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
5113
|
-
result->src[0] = a;
|
5114
|
-
result->src[1] = b;
|
5115
|
-
|
5116
|
-
return result;
|
5117
|
-
}
|
5118
|
-
|
5119
|
-
// ggml_conv_1d_stage_1
|
5120
|
-
|
5121
|
-
// gemm: [N, OC, OL] = [OC, IC * K] x [N*OL, IC * K]
|
5122
|
-
// a: [OC, IC, K]
|
5123
|
-
// b: [N, OL, IC * K]
|
5124
|
-
// result: [N, OC, OL]
|
5125
|
-
static struct ggml_tensor * ggml_conv_1d_stage_1(
|
5126
|
-
struct ggml_context * ctx,
|
5127
|
-
struct ggml_tensor * a,
|
5128
|
-
struct ggml_tensor * b) {
|
5129
|
-
|
5130
|
-
bool is_node = false;
|
5131
|
-
|
5132
|
-
if (a->grad || b->grad) {
|
5133
|
-
GGML_ASSERT(false); // TODO: implement backward
|
5134
|
-
is_node = true;
|
5135
|
-
}
|
5136
|
-
|
5137
|
-
const int64_t ne[4] = {
|
5138
|
-
b->ne[1],
|
5139
|
-
a->ne[2],
|
5140
|
-
b->ne[2],
|
5141
|
-
1,
|
5142
|
-
};
|
5143
|
-
struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
|
5144
|
-
|
5145
|
-
result->op = GGML_OP_CONV_1D_STAGE_1;
|
5146
|
-
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
5147
|
-
result->src[0] = a;
|
5148
|
-
result->src[1] = b;
|
5149
|
-
|
5150
|
-
return result;
|
5151
|
-
}
|
5152
|
-
|
5153
|
-
// ggml_conv_1d
|
5154
|
-
|
5155
5133
|
GGML_API struct ggml_tensor * ggml_conv_1d(
|
5156
5134
|
struct ggml_context * ctx,
|
5157
5135
|
struct ggml_tensor * a,
|
@@ -5159,43 +5137,17 @@ GGML_API struct ggml_tensor * ggml_conv_1d(
|
|
5159
5137
|
int s0,
|
5160
5138
|
int p0,
|
5161
5139
|
int d0) {
|
5162
|
-
struct ggml_tensor *
|
5163
|
-
result = ggml_conv_1d_stage_1(ctx, a, result);
|
5164
|
-
return result;
|
5165
|
-
}
|
5166
|
-
|
5167
|
-
// GGML_API struct ggml_tensor * ggml_conv_1d(
|
5168
|
-
// struct ggml_context * ctx,
|
5169
|
-
// struct ggml_tensor * a,
|
5170
|
-
// struct ggml_tensor * b,
|
5171
|
-
// int s0,
|
5172
|
-
// int p0,
|
5173
|
-
// int d0) {
|
5174
|
-
// GGML_ASSERT(ggml_is_matrix(b));
|
5175
|
-
// GGML_ASSERT(a->ne[1] == b->ne[1]);
|
5176
|
-
// bool is_node = false;
|
5177
|
-
|
5178
|
-
// if (a->grad || b->grad) {
|
5179
|
-
// GGML_ASSERT(false); // TODO: implement backward
|
5180
|
-
// is_node = true;
|
5181
|
-
// }
|
5140
|
+
struct ggml_tensor * im2col = ggml_im2col(ctx, a, b, s0, 0, p0, 0, d0, 0, false); // [N, OL, IC * K]
|
5182
5141
|
|
5183
|
-
|
5184
|
-
|
5185
|
-
|
5186
|
-
//
|
5187
|
-
// struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 2, ne);
|
5142
|
+
struct ggml_tensor * result =
|
5143
|
+
ggml_mul_mat(ctx,
|
5144
|
+
ggml_reshape_2d(ctx, im2col, im2col->ne[0], (im2col->ne[2] * im2col->ne[1])), // [N, OL, IC * K] => [N*OL, IC * K]
|
5145
|
+
ggml_reshape_2d(ctx, a, (a->ne[0] * a->ne[1]), a->ne[2])); // [OC,IC, K] => [OC, IC * K]
|
5188
5146
|
|
5189
|
-
|
5190
|
-
// ggml_set_op_params(result, params, sizeof(params));
|
5147
|
+
result = ggml_reshape_3d(ctx, result, im2col->ne[1], a->ne[2], im2col->ne[2]); // [N, OC, OL]
|
5191
5148
|
|
5192
|
-
|
5193
|
-
|
5194
|
-
// result->src[0] = a;
|
5195
|
-
// result->src[1] = b;
|
5196
|
-
|
5197
|
-
// return result;
|
5198
|
-
// }
|
5149
|
+
return result;
|
5150
|
+
}
|
5199
5151
|
|
5200
5152
|
// ggml_conv_1d_ph
|
5201
5153
|
|
@@ -5258,7 +5210,7 @@ GGML_API struct ggml_tensor * ggml_conv_transpose_1d(
|
|
5258
5210
|
// a: [OC,IC, KH, KW]
|
5259
5211
|
// b: [N, IC, IH, IW]
|
5260
5212
|
// result: [N, OH, OW, IC*KH*KW]
|
5261
|
-
|
5213
|
+
struct ggml_tensor * ggml_im2col(
|
5262
5214
|
struct ggml_context * ctx,
|
5263
5215
|
struct ggml_tensor * a,
|
5264
5216
|
struct ggml_tensor * b,
|
@@ -5267,9 +5219,14 @@ static struct ggml_tensor * ggml_conv_2d_stage_0(
|
|
5267
5219
|
int p0,
|
5268
5220
|
int p1,
|
5269
5221
|
int d0,
|
5270
|
-
int d1
|
5222
|
+
int d1,
|
5223
|
+
bool is_2D) {
|
5271
5224
|
|
5272
|
-
|
5225
|
+
if(is_2D) {
|
5226
|
+
GGML_ASSERT(a->ne[2] == b->ne[2]);
|
5227
|
+
} else {
|
5228
|
+
GGML_ASSERT(a->ne[1] == b->ne[1]);
|
5229
|
+
}
|
5273
5230
|
bool is_node = false;
|
5274
5231
|
|
5275
5232
|
if (a->grad || b->grad) {
|
@@ -5277,81 +5234,51 @@ static struct ggml_tensor * ggml_conv_2d_stage_0(
|
|
5277
5234
|
is_node = true;
|
5278
5235
|
}
|
5279
5236
|
|
5280
|
-
const int64_t OH = ggml_calc_conv_output_size(b->ne[1], a->ne[1], s1, p1, d1);
|
5281
|
-
const int64_t OW =
|
5237
|
+
const int64_t OH = is_2D ? ggml_calc_conv_output_size(b->ne[1], a->ne[1], s1, p1, d1) : 0;
|
5238
|
+
const int64_t OW = ggml_calc_conv_output_size(b->ne[0], a->ne[0], s0, p0, d0);
|
5282
5239
|
|
5283
5240
|
const int64_t ne[4] = {
|
5284
|
-
a->ne[2] * a->ne[1] * a->ne[0],
|
5241
|
+
is_2D ? (a->ne[2] * a->ne[1] * a->ne[0]) : a->ne[1] * a->ne[0],
|
5285
5242
|
OW,
|
5286
|
-
OH,
|
5287
|
-
b->ne[3],
|
5243
|
+
is_2D ? OH : b->ne[2],
|
5244
|
+
is_2D ? b->ne[3] : 1,
|
5288
5245
|
};
|
5289
|
-
struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F16, 4, ne);
|
5290
5246
|
|
5291
|
-
|
5247
|
+
struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F16, 4, ne);
|
5248
|
+
int32_t params[] = { s0, s1, p0, p1, d0, d1, (is_2D ? 1 : 0) };
|
5292
5249
|
ggml_set_op_params(result, params, sizeof(params));
|
5293
5250
|
|
5294
|
-
result->op =
|
5295
|
-
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
5296
|
-
result->src[0] = a;
|
5297
|
-
result->src[1] = b;
|
5298
|
-
|
5299
|
-
return result;
|
5300
|
-
|
5301
|
-
}
|
5302
|
-
|
5303
|
-
// gemm: [N, OC, OH, OW] = [OC, IC * KH * KW] x [N*OH*OW, IC * KH * KW]
|
5304
|
-
// a: [OC, IC, KH, KW]
|
5305
|
-
// b: [N, OH, OW, IC * KH * KW]
|
5306
|
-
// result: [N, OC, OH, OW]
|
5307
|
-
static struct ggml_tensor * ggml_conv_2d_stage_1(
|
5308
|
-
struct ggml_context * ctx,
|
5309
|
-
struct ggml_tensor * a,
|
5310
|
-
struct ggml_tensor * b) {
|
5311
|
-
|
5312
|
-
bool is_node = false;
|
5313
|
-
|
5314
|
-
if (a->grad || b->grad) {
|
5315
|
-
GGML_ASSERT(false); // TODO: implement backward
|
5316
|
-
is_node = true;
|
5317
|
-
}
|
5318
|
-
|
5319
|
-
const int64_t ne[4] = {
|
5320
|
-
b->ne[1],
|
5321
|
-
b->ne[2],
|
5322
|
-
a->ne[3],
|
5323
|
-
b->ne[3],
|
5324
|
-
};
|
5325
|
-
struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
|
5326
|
-
|
5327
|
-
result->op = GGML_OP_CONV_2D_STAGE_1;
|
5251
|
+
result->op = GGML_OP_IM2COL;
|
5328
5252
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
5329
5253
|
result->src[0] = a;
|
5330
5254
|
result->src[1] = b;
|
5331
5255
|
|
5332
5256
|
return result;
|
5333
|
-
|
5334
5257
|
}
|
5335
5258
|
|
5336
5259
|
// a: [OC,IC, KH, KW]
|
5337
5260
|
// b: [N, IC, IH, IW]
|
5338
5261
|
// result: [N, OC, OH, OW]
|
5339
5262
|
struct ggml_tensor * ggml_conv_2d(
|
5340
|
-
|
5341
|
-
|
5342
|
-
|
5343
|
-
|
5344
|
-
|
5345
|
-
|
5346
|
-
|
5347
|
-
|
5348
|
-
|
5263
|
+
struct ggml_context * ctx,
|
5264
|
+
struct ggml_tensor * a,
|
5265
|
+
struct ggml_tensor * b,
|
5266
|
+
int s0,
|
5267
|
+
int s1,
|
5268
|
+
int p0,
|
5269
|
+
int p1,
|
5270
|
+
int d0,
|
5271
|
+
int d1) {
|
5272
|
+
struct ggml_tensor * im2col = ggml_im2col(ctx, a, b, s0, s1, p0, p1, d0, d1, true); // [N, OH, OW, IC * KH * KW]
|
5349
5273
|
|
5350
|
-
struct ggml_tensor * result =
|
5351
|
-
|
5274
|
+
struct ggml_tensor * result =
|
5275
|
+
ggml_mul_mat(ctx,
|
5276
|
+
ggml_reshape_2d(ctx, im2col, im2col->ne[0], im2col->ne[3] * im2col->ne[2] * im2col->ne[1]), // [N, OH, OW, IC * KH * KW] => [N*OH*OW, IC * KH * KW]
|
5277
|
+
ggml_reshape_2d(ctx, a, (a->ne[0] * a->ne[1] * a->ne[2]), a->ne[3])); // [OC,IC, KH, KW] => [OC, IC * KH * KW]
|
5352
5278
|
|
5353
|
-
|
5279
|
+
result = ggml_reshape_4d(ctx, result, im2col->ne[1], im2col->ne[2], a->ne[3], im2col->ne[3]); // [N, OC, OH, OW]
|
5354
5280
|
|
5281
|
+
return result;
|
5355
5282
|
}
|
5356
5283
|
|
5357
5284
|
// ggml_conv_2d_sk_p0
|
@@ -5411,7 +5338,7 @@ struct ggml_tensor * ggml_conv_transpose_2d_p0(
|
|
5411
5338
|
|
5412
5339
|
// ggml_pool_*
|
5413
5340
|
|
5414
|
-
static int64_t ggml_calc_pool_output_size(int64_t ins, int ks, int s,
|
5341
|
+
static int64_t ggml_calc_pool_output_size(int64_t ins, int ks, int s, float p) {
|
5415
5342
|
return (ins + 2 * p - ks) / s + 1;
|
5416
5343
|
}
|
5417
5344
|
|
@@ -5458,8 +5385,8 @@ struct ggml_tensor * ggml_pool_2d(
|
|
5458
5385
|
int k1,
|
5459
5386
|
int s0,
|
5460
5387
|
int s1,
|
5461
|
-
|
5462
|
-
|
5388
|
+
float p0,
|
5389
|
+
float p1) {
|
5463
5390
|
|
5464
5391
|
bool is_node = false;
|
5465
5392
|
|
@@ -8921,6 +8848,48 @@ static void ggml_compute_forward_silu(
|
|
8921
8848
|
}
|
8922
8849
|
}
|
8923
8850
|
|
8851
|
+
// ggml_compute_forward_leaky
|
8852
|
+
|
8853
|
+
static void ggml_compute_forward_leaky_f32(
|
8854
|
+
const struct ggml_compute_params * params,
|
8855
|
+
const struct ggml_tensor * src0,
|
8856
|
+
struct ggml_tensor * dst) {
|
8857
|
+
assert(params->ith == 0);
|
8858
|
+
assert(ggml_are_same_shape(src0, dst));
|
8859
|
+
|
8860
|
+
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
8861
|
+
return;
|
8862
|
+
}
|
8863
|
+
|
8864
|
+
const int n = ggml_nrows(src0);
|
8865
|
+
const int nc = src0->ne[0];
|
8866
|
+
|
8867
|
+
assert(dst->nb[0] == sizeof(float));
|
8868
|
+
assert(src0->nb[0] == sizeof(float));
|
8869
|
+
|
8870
|
+
for (int i = 0; i < n; i++) {
|
8871
|
+
ggml_vec_leaky_f32(nc,
|
8872
|
+
(float *) ((char *) dst->data + i*( dst->nb[1])),
|
8873
|
+
(float *) ((char *) src0->data + i*(src0->nb[1])));
|
8874
|
+
}
|
8875
|
+
}
|
8876
|
+
|
8877
|
+
static void ggml_compute_forward_leaky(
|
8878
|
+
const struct ggml_compute_params * params,
|
8879
|
+
const struct ggml_tensor * src0,
|
8880
|
+
struct ggml_tensor * dst) {
|
8881
|
+
switch (src0->type) {
|
8882
|
+
case GGML_TYPE_F32:
|
8883
|
+
{
|
8884
|
+
ggml_compute_forward_leaky_f32(params, src0, dst);
|
8885
|
+
} break;
|
8886
|
+
default:
|
8887
|
+
{
|
8888
|
+
GGML_ASSERT(false);
|
8889
|
+
} break;
|
8890
|
+
}
|
8891
|
+
}
|
8892
|
+
|
8924
8893
|
// ggml_compute_forward_silu_back
|
8925
8894
|
|
8926
8895
|
static void ggml_compute_forward_silu_back_f32(
|
@@ -9404,6 +9373,8 @@ static bool ggml_compute_forward_mul_mat_use_blas(
|
|
9404
9373
|
// TODO: find the optimal values for these
|
9405
9374
|
if (ggml_is_contiguous(src0) &&
|
9406
9375
|
ggml_is_contiguous(src1) &&
|
9376
|
+
src0->type == GGML_TYPE_F32 &&
|
9377
|
+
src1->type == GGML_TYPE_F32 &&
|
9407
9378
|
(ne0 >= 32 && ne1 >= 32 && ne10 >= 32)) {
|
9408
9379
|
|
9409
9380
|
/*printf("BLAS: %d %d %d %d %d\n", ne0, ne1, ne10, ne00, ne01);*/
|
@@ -9442,7 +9413,7 @@ static void ggml_compute_forward_mul_mat(
|
|
9442
9413
|
|
9443
9414
|
// we don't support permuted src0 or src1
|
9444
9415
|
GGML_ASSERT(nb00 == ggml_type_size(type));
|
9445
|
-
GGML_ASSERT(nb10 ==
|
9416
|
+
GGML_ASSERT(nb10 == ggml_type_size(src1->type));
|
9446
9417
|
|
9447
9418
|
// dst cannot be transposed or permuted
|
9448
9419
|
GGML_ASSERT(nb0 == sizeof(float));
|
@@ -9640,10 +9611,12 @@ static void ggml_compute_forward_out_prod_f32(
|
|
9640
9611
|
const int ith = params->ith;
|
9641
9612
|
const int nth = params->nth;
|
9642
9613
|
|
9614
|
+
GGML_ASSERT(ne0 == ne00);
|
9615
|
+
GGML_ASSERT(ne1 == ne10);
|
9616
|
+
GGML_ASSERT(ne2 == ne02);
|
9643
9617
|
GGML_ASSERT(ne02 == ne12);
|
9644
|
-
GGML_ASSERT(ne03 == ne13);
|
9645
|
-
GGML_ASSERT(ne2 == ne12);
|
9646
9618
|
GGML_ASSERT(ne3 == ne13);
|
9619
|
+
GGML_ASSERT(ne03 == ne13);
|
9647
9620
|
|
9648
9621
|
// we don't support permuted src0 or src1
|
9649
9622
|
GGML_ASSERT(nb00 == sizeof(float));
|
@@ -9654,18 +9627,25 @@ static void ggml_compute_forward_out_prod_f32(
|
|
9654
9627
|
// GGML_ASSERT(nb1 <= nb2);
|
9655
9628
|
// GGML_ASSERT(nb2 <= nb3);
|
9656
9629
|
|
9657
|
-
GGML_ASSERT(ne0 == ne00);
|
9658
|
-
GGML_ASSERT(ne1 == ne10);
|
9659
|
-
GGML_ASSERT(ne2 == ne02);
|
9660
|
-
GGML_ASSERT(ne3 == ne03);
|
9661
|
-
|
9662
9630
|
// nb01 >= nb00 - src0 is not transposed
|
9663
9631
|
// compute by src0 rows
|
9664
9632
|
|
9665
9633
|
// TODO: #if defined(GGML_USE_CUBLAS) ggml_cuda_out_prod
|
9666
|
-
// TODO: #if defined(
|
9634
|
+
// TODO: #if defined(GGML_USE_CLBLAST)
|
9635
|
+
|
9636
|
+
#if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS)
|
9637
|
+
bool use_blas = ggml_is_matrix(src0) &&
|
9638
|
+
ggml_is_matrix(src1) &&
|
9639
|
+
ggml_is_contiguous(src0) &&
|
9640
|
+
(ggml_is_contiguous(src1) || ggml_is_transposed(src1));
|
9641
|
+
#endif
|
9667
9642
|
|
9668
9643
|
if (params->type == GGML_TASK_INIT) {
|
9644
|
+
#if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS) // gemm beta will zero dst
|
9645
|
+
if (use_blas) {
|
9646
|
+
return;
|
9647
|
+
}
|
9648
|
+
#endif
|
9669
9649
|
ggml_vec_set_f32(ne0*ne1*ne2*ne3, dst->data, 0);
|
9670
9650
|
return;
|
9671
9651
|
}
|
@@ -9674,6 +9654,50 @@ static void ggml_compute_forward_out_prod_f32(
|
|
9674
9654
|
return;
|
9675
9655
|
}
|
9676
9656
|
|
9657
|
+
#if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS)
|
9658
|
+
if (use_blas) {
|
9659
|
+
if (params->ith != 0) { // All threads other than the first do no work.
|
9660
|
+
return;
|
9661
|
+
}
|
9662
|
+
// Arguments to ggml_compute_forward_out_prod (expressed as major,minor)
|
9663
|
+
// src0: (k,n)
|
9664
|
+
// src1: (k,m)
|
9665
|
+
// dst: (m,n)
|
9666
|
+
//
|
9667
|
+
// Arguments to sgemm (see https://github.com/Reference-LAPACK/lapack/blob/master/BLAS/SRC/sgemm.f)
|
9668
|
+
// Also expressed as (major,minor)
|
9669
|
+
// a: (m,k): so src1 transposed
|
9670
|
+
// b: (k,n): so src0
|
9671
|
+
// c: (m,n)
|
9672
|
+
//
|
9673
|
+
// However, if ggml_is_transposed(src1) is true, then
|
9674
|
+
// src1->data already contains a transposed version, so sgemm mustn't
|
9675
|
+
// transpose it further.
|
9676
|
+
|
9677
|
+
int n = src0->ne[0];
|
9678
|
+
int k = src0->ne[1];
|
9679
|
+
int m = src1->ne[0];
|
9680
|
+
|
9681
|
+
int transposeA, lda;
|
9682
|
+
|
9683
|
+
if (!ggml_is_transposed(src1)) {
|
9684
|
+
transposeA = CblasTrans;
|
9685
|
+
lda = m;
|
9686
|
+
} else {
|
9687
|
+
transposeA = CblasNoTrans;
|
9688
|
+
lda = k;
|
9689
|
+
}
|
9690
|
+
|
9691
|
+
float * a = (float *) ((char *) src1->data);
|
9692
|
+
float * b = (float *) ((char *) src0->data);
|
9693
|
+
float * c = (float *) ((char *) dst->data);
|
9694
|
+
|
9695
|
+
cblas_sgemm(CblasRowMajor, transposeA, CblasNoTrans, m, n, k, 1.0, a, lda, b, n, 0.0, c, n);
|
9696
|
+
|
9697
|
+
return;
|
9698
|
+
}
|
9699
|
+
#endif
|
9700
|
+
|
9677
9701
|
// dst[:,:,:,:] = 0
|
9678
9702
|
// for i2,i3:
|
9679
9703
|
// for i1:
|
@@ -11340,9 +11364,9 @@ static void ggml_compute_forward_rope_back(
|
|
11340
11364
|
}
|
11341
11365
|
}
|
11342
11366
|
|
11343
|
-
//
|
11367
|
+
// ggml_compute_forward_conv_transpose_1d
|
11344
11368
|
|
11345
|
-
static void
|
11369
|
+
static void ggml_compute_forward_conv_transpose_1d_f16_f32(
|
11346
11370
|
const struct ggml_compute_params * params,
|
11347
11371
|
const struct ggml_tensor * src0,
|
11348
11372
|
const struct ggml_tensor * src1,
|
@@ -11359,14 +11383,7 @@ static void ggml_compute_forward_conv_1d_f16_f32(
|
|
11359
11383
|
const int ith = params->ith;
|
11360
11384
|
const int nth = params->nth;
|
11361
11385
|
|
11362
|
-
const int nk = ne00;
|
11363
|
-
|
11364
|
-
// size of the convolution row - the kernel size unrolled across all input channels
|
11365
|
-
const int ew0 = nk*ne01;
|
11366
|
-
|
11367
|
-
const int32_t s0 = ((const int32_t*)(dst->op_params))[0];
|
11368
|
-
const int32_t p0 = ((const int32_t*)(dst->op_params))[1];
|
11369
|
-
const int32_t d0 = ((const int32_t*)(dst->op_params))[2];
|
11386
|
+
const int nk = ne00*ne01*ne02;
|
11370
11387
|
|
11371
11388
|
GGML_ASSERT(nb00 == sizeof(ggml_fp16_t));
|
11372
11389
|
GGML_ASSERT(nb10 == sizeof(float));
|
@@ -11374,23 +11391,37 @@ static void ggml_compute_forward_conv_1d_f16_f32(
|
|
11374
11391
|
if (params->type == GGML_TASK_INIT) {
|
11375
11392
|
memset(params->wdata, 0, params->wsize);
|
11376
11393
|
|
11377
|
-
|
11394
|
+
// permute kernel data (src0) from (K x Cout x Cin) to (Cin x K x Cout)
|
11395
|
+
{
|
11396
|
+
ggml_fp16_t * const wdata = (ggml_fp16_t *) params->wdata + 0;
|
11397
|
+
|
11398
|
+
for (int64_t i02 = 0; i02 < ne02; i02++) {
|
11399
|
+
for (int64_t i01 = 0; i01 < ne01; i01++) {
|
11400
|
+
const ggml_fp16_t * const src = (ggml_fp16_t *)((char *) src0->data + i02*nb02 + i01*nb01);
|
11401
|
+
ggml_fp16_t * dst_data = wdata + i01*ne00*ne02;
|
11402
|
+
for (int64_t i00 = 0; i00 < ne00; i00++) {
|
11403
|
+
dst_data[i00*ne02 + i02] = src[i00];
|
11404
|
+
}
|
11405
|
+
}
|
11406
|
+
}
|
11407
|
+
}
|
11378
11408
|
|
11379
|
-
|
11380
|
-
|
11409
|
+
// permute source data (src1) from (L x Cin) to (Cin x L)
|
11410
|
+
{
|
11411
|
+
ggml_fp16_t * const wdata = (ggml_fp16_t *) params->wdata + nk;
|
11381
11412
|
ggml_fp16_t * dst_data = wdata;
|
11382
11413
|
|
11383
|
-
for (int64_t
|
11384
|
-
|
11385
|
-
|
11386
|
-
|
11387
|
-
if(!(idx0 < 0 || idx0 >= ne10)) {
|
11388
|
-
dst_data[i0*ew0 + i11*nk + ik] = GGML_FP32_TO_FP16(src[idx0]);
|
11389
|
-
}
|
11414
|
+
for (int64_t i11 = 0; i11 < ne11; i11++) {
|
11415
|
+
const float * const src = (float *)((char *) src1->data + i11*nb11);
|
11416
|
+
for (int64_t i10 = 0; i10 < ne10; i10++) {
|
11417
|
+
dst_data[i10*ne11 + i11] = GGML_FP32_TO_FP16(src[i10]);
|
11390
11418
|
}
|
11391
11419
|
}
|
11392
11420
|
}
|
11393
11421
|
|
11422
|
+
// need to zero dst since we are accumulating into it
|
11423
|
+
memset(dst->data, 0, ggml_nbytes(dst));
|
11424
|
+
|
11394
11425
|
return;
|
11395
11426
|
}
|
11396
11427
|
|
@@ -11398,8 +11429,10 @@ static void ggml_compute_forward_conv_1d_f16_f32(
|
|
11398
11429
|
return;
|
11399
11430
|
}
|
11400
11431
|
|
11432
|
+
const int32_t s0 = ((const int32_t*)(dst->op_params))[0];
|
11433
|
+
|
11401
11434
|
// total rows in dst
|
11402
|
-
const int nr =
|
11435
|
+
const int nr = ne1;
|
11403
11436
|
|
11404
11437
|
// rows per thread
|
11405
11438
|
const int dr = (nr + nth - 1)/nth;
|
@@ -11408,22 +11441,26 @@ static void ggml_compute_forward_conv_1d_f16_f32(
|
|
11408
11441
|
const int ir0 = dr*ith;
|
11409
11442
|
const int ir1 = MIN(ir0 + dr, nr);
|
11410
11443
|
|
11411
|
-
ggml_fp16_t * const wdata
|
11412
|
-
|
11413
|
-
for (int i2 = 0; i2 < ne2; i2++) {
|
11414
|
-
for (int i1 = ir0; i1 < ir1; i1++) {
|
11415
|
-
float * dst_data = (float *)((char *) dst->data + i2*nb2 + i1*nb1);
|
11444
|
+
ggml_fp16_t * const wdata = (ggml_fp16_t *) params->wdata + 0;
|
11445
|
+
ggml_fp16_t * const wdata_src = wdata + nk;
|
11416
11446
|
|
11417
|
-
|
11418
|
-
|
11419
|
-
|
11420
|
-
|
11447
|
+
for (int i1 = ir0; i1 < ir1; i1++) {
|
11448
|
+
float * dst_data = (float *)((char *) dst->data + i1*nb1);
|
11449
|
+
ggml_fp16_t * wdata_kernel = wdata + i1*ne02*ne00;
|
11450
|
+
for (int i10 = 0; i10 < ne10; i10++) {
|
11451
|
+
const int i1n = i10*ne11;
|
11452
|
+
for (int i00 = 0; i00 < ne00; i00++) {
|
11453
|
+
float v = 0;
|
11454
|
+
ggml_vec_dot_f16(ne02, &v,
|
11455
|
+
(ggml_fp16_t *) wdata_src + i1n,
|
11456
|
+
(ggml_fp16_t *) wdata_kernel + i00*ne02);
|
11457
|
+
dst_data[i10*s0 + i00] += v;
|
11421
11458
|
}
|
11422
11459
|
}
|
11423
11460
|
}
|
11424
11461
|
}
|
11425
11462
|
|
11426
|
-
static void
|
11463
|
+
static void ggml_compute_forward_conv_transpose_1d_f32(
|
11427
11464
|
const struct ggml_compute_params * params,
|
11428
11465
|
const struct ggml_tensor * src0,
|
11429
11466
|
const struct ggml_tensor * src1,
|
@@ -11440,430 +11477,7 @@ static void ggml_compute_forward_conv_1d_f32(
|
|
11440
11477
|
const int ith = params->ith;
|
11441
11478
|
const int nth = params->nth;
|
11442
11479
|
|
11443
|
-
const int nk = ne00;
|
11444
|
-
|
11445
|
-
const int ew0 = nk*ne01;
|
11446
|
-
|
11447
|
-
const int32_t s0 = ((const int32_t*)(dst->op_params))[0];
|
11448
|
-
const int32_t p0 = ((const int32_t*)(dst->op_params))[1];
|
11449
|
-
const int32_t d0 = ((const int32_t*)(dst->op_params))[2];
|
11450
|
-
|
11451
|
-
GGML_ASSERT(nb00 == sizeof(float));
|
11452
|
-
GGML_ASSERT(nb10 == sizeof(float));
|
11453
|
-
|
11454
|
-
if (params->type == GGML_TASK_INIT) {
|
11455
|
-
memset(params->wdata, 0, params->wsize);
|
11456
|
-
|
11457
|
-
float * const wdata = (float *) params->wdata + 0;
|
11458
|
-
|
11459
|
-
for (int64_t i11 = 0; i11 < ne11; i11++) {
|
11460
|
-
const float * const src = (float *)((char *) src1->data + i11*nb11);
|
11461
|
-
float * dst_data = wdata;
|
11462
|
-
|
11463
|
-
for (int64_t i0 = 0; i0 < ne0; i0++) {
|
11464
|
-
for (int64_t ik = 0; ik < nk; ik++) {
|
11465
|
-
const int idx0 = i0*s0 + ik*d0 - p0;
|
11466
|
-
|
11467
|
-
if(!(idx0 < 0 || idx0 >= ne10)) {
|
11468
|
-
dst_data[i0*ew0 + i11*nk + ik] = src[idx0];
|
11469
|
-
}
|
11470
|
-
}
|
11471
|
-
}
|
11472
|
-
}
|
11473
|
-
|
11474
|
-
return;
|
11475
|
-
}
|
11476
|
-
|
11477
|
-
if (params->type == GGML_TASK_FINALIZE) {
|
11478
|
-
return;
|
11479
|
-
}
|
11480
|
-
|
11481
|
-
// total rows in dst
|
11482
|
-
const int nr = ne02;
|
11483
|
-
|
11484
|
-
// rows per thread
|
11485
|
-
const int dr = (nr + nth - 1)/nth;
|
11486
|
-
|
11487
|
-
// row range for this thread
|
11488
|
-
const int ir0 = dr*ith;
|
11489
|
-
const int ir1 = MIN(ir0 + dr, nr);
|
11490
|
-
|
11491
|
-
float * const wdata = (float *) params->wdata + 0;
|
11492
|
-
|
11493
|
-
for (int i2 = 0; i2 < ne2; i2++) {
|
11494
|
-
for (int i1 = ir0; i1 < ir1; i1++) {
|
11495
|
-
float * dst_data = (float *)((char *) dst->data + i2*nb2 + i1*nb1);
|
11496
|
-
|
11497
|
-
for (int i0 = 0; i0 < ne0; i0++) {
|
11498
|
-
ggml_vec_dot_f32(ew0, dst_data + i0,
|
11499
|
-
(float *) ((char *) src0->data + i1*nb02),
|
11500
|
-
(float *) wdata + i2*nb2 + i0*ew0);
|
11501
|
-
}
|
11502
|
-
}
|
11503
|
-
}
|
11504
|
-
}
|
11505
|
-
|
11506
|
-
// TODO: reuse ggml_mul_mat or implement ggml_im2col and remove stage_0 and stage_1
|
11507
|
-
static void gemm_f16_out_f32(int64_t m, int64_t n, int64_t k,
|
11508
|
-
ggml_fp16_t * A,
|
11509
|
-
ggml_fp16_t * B,
|
11510
|
-
float * C,
|
11511
|
-
const int ith, const int nth) {
|
11512
|
-
// does not seem to make a difference
|
11513
|
-
int64_t m0, m1, n0, n1;
|
11514
|
-
// patches per thread
|
11515
|
-
if (m > n) {
|
11516
|
-
n0 = 0;
|
11517
|
-
n1 = n;
|
11518
|
-
|
11519
|
-
// total patches in dst
|
11520
|
-
const int np = m;
|
11521
|
-
|
11522
|
-
// patches per thread
|
11523
|
-
const int dp = (np + nth - 1)/nth;
|
11524
|
-
|
11525
|
-
// patch range for this thread
|
11526
|
-
m0 = dp*ith;
|
11527
|
-
m1 = MIN(m0 + dp, np);
|
11528
|
-
} else {
|
11529
|
-
m0 = 0;
|
11530
|
-
m1 = m;
|
11531
|
-
|
11532
|
-
// total patches in dst
|
11533
|
-
const int np = n;
|
11534
|
-
|
11535
|
-
// patches per thread
|
11536
|
-
const int dp = (np + nth - 1)/nth;
|
11537
|
-
|
11538
|
-
// patch range for this thread
|
11539
|
-
n0 = dp*ith;
|
11540
|
-
n1 = MIN(n0 + dp, np);
|
11541
|
-
}
|
11542
|
-
|
11543
|
-
// block-tiling attempt
|
11544
|
-
int64_t blck_n = 16;
|
11545
|
-
int64_t blck_m = 16;
|
11546
|
-
|
11547
|
-
// int64_t CACHE_SIZE = 2 * 1024 * 1024; // 2MB
|
11548
|
-
// int64_t blck_size = CACHE_SIZE / (sizeof(float) + 2 * sizeof(ggml_fp16_t) * K);
|
11549
|
-
// if (blck_size > 0) {
|
11550
|
-
// blck_0 = 4;
|
11551
|
-
// blck_1 = blck_size / blck_0;
|
11552
|
-
// if (blck_1 < 0) {
|
11553
|
-
// blck_1 = 1;
|
11554
|
-
// }
|
11555
|
-
// // blck_0 = (int64_t)sqrt(blck_size);
|
11556
|
-
// // blck_1 = blck_0;
|
11557
|
-
// }
|
11558
|
-
// // printf("%zd %zd %zd %zd\n", blck_size, K, blck_0, blck_1);
|
11559
|
-
|
11560
|
-
for (int j = n0; j < n1; j+=blck_n) {
|
11561
|
-
for (int i = m0; i < m1; i+=blck_m) {
|
11562
|
-
// printf("i j k => %d %d %d\n", i, j, K);
|
11563
|
-
for (int ii = i; ii < i + blck_m && ii < m1; ii++) {
|
11564
|
-
for (int jj = j; jj < j + blck_n && jj < n1; jj++) {
|
11565
|
-
ggml_vec_dot_f16(k,
|
11566
|
-
C + ii*n + jj,
|
11567
|
-
A + ii * k,
|
11568
|
-
B + jj * k);
|
11569
|
-
}
|
11570
|
-
}
|
11571
|
-
}
|
11572
|
-
}
|
11573
|
-
}
|
11574
|
-
|
11575
|
-
// src0: kernel [OC, IC, K]
|
11576
|
-
// src1: signal [N, IC, IL]
|
11577
|
-
// dst: result [N, OL, IC*K]
|
11578
|
-
static void ggml_compute_forward_conv_1d_stage_0_f32(
|
11579
|
-
const struct ggml_compute_params * params,
|
11580
|
-
const struct ggml_tensor * src0,
|
11581
|
-
const struct ggml_tensor * src1,
|
11582
|
-
struct ggml_tensor * dst) {
|
11583
|
-
GGML_ASSERT(src0->type == GGML_TYPE_F16);
|
11584
|
-
GGML_ASSERT(src1->type == GGML_TYPE_F32);
|
11585
|
-
GGML_ASSERT( dst->type == GGML_TYPE_F16);
|
11586
|
-
|
11587
|
-
int64_t t0 = ggml_perf_time_us();
|
11588
|
-
UNUSED(t0);
|
11589
|
-
|
11590
|
-
GGML_TENSOR_BINARY_OP_LOCALS;
|
11591
|
-
|
11592
|
-
const int64_t N = ne12;
|
11593
|
-
const int64_t IC = ne11;
|
11594
|
-
const int64_t IL = ne10;
|
11595
|
-
|
11596
|
-
const int64_t K = ne00;
|
11597
|
-
|
11598
|
-
const int64_t OL = ne1;
|
11599
|
-
|
11600
|
-
const int ith = params->ith;
|
11601
|
-
const int nth = params->nth;
|
11602
|
-
|
11603
|
-
const int32_t s0 = ((const int32_t*)(dst->op_params))[0];
|
11604
|
-
const int32_t p0 = ((const int32_t*)(dst->op_params))[1];
|
11605
|
-
const int32_t d0 = ((const int32_t*)(dst->op_params))[2];
|
11606
|
-
|
11607
|
-
GGML_ASSERT(nb00 == sizeof(ggml_fp16_t));
|
11608
|
-
GGML_ASSERT(nb10 == sizeof(float));
|
11609
|
-
|
11610
|
-
if (params->type == GGML_TASK_INIT) {
|
11611
|
-
memset(dst->data, 0, ggml_nbytes(dst));
|
11612
|
-
return;
|
11613
|
-
}
|
11614
|
-
|
11615
|
-
if (params->type == GGML_TASK_FINALIZE) {
|
11616
|
-
return;
|
11617
|
-
}
|
11618
|
-
|
11619
|
-
// im2col: [N, IC, IL] => [N, OL, IC*K]
|
11620
|
-
{
|
11621
|
-
ggml_fp16_t * const wdata = (ggml_fp16_t *) dst->data;
|
11622
|
-
|
11623
|
-
for (int64_t in = 0; in < N; in++) {
|
11624
|
-
for (int64_t iol = 0; iol < OL; iol++) {
|
11625
|
-
for (int64_t iic = ith; iic < IC; iic+=nth) {
|
11626
|
-
|
11627
|
-
// micro kernel
|
11628
|
-
ggml_fp16_t * dst_data = wdata + (in*OL + iol)*(IC*K); // [IC, K]
|
11629
|
-
const float * const src_data = (float *)((char *) src1->data + in*nb12 + iic*nb11); // [IL]
|
11630
|
-
|
11631
|
-
for (int64_t ik = 0; ik < K; ik++) {
|
11632
|
-
const int64_t iil = iol*s0 + ik*d0 - p0;
|
11633
|
-
|
11634
|
-
if (!(iil < 0 || iil >= IL)) {
|
11635
|
-
dst_data[iic*K + ik] = GGML_FP32_TO_FP16(src_data[iil]);
|
11636
|
-
}
|
11637
|
-
}
|
11638
|
-
}
|
11639
|
-
}
|
11640
|
-
}
|
11641
|
-
}
|
11642
|
-
}
|
11643
|
-
|
11644
|
-
// gemm: [N, OC, OL] = [OC, IC * K] x [N*OL, IC * K]
|
11645
|
-
// src0: [OC, IC, K]
|
11646
|
-
// src1: [N, OL, IC * K]
|
11647
|
-
// result: [N, OC, OL]
|
11648
|
-
static void ggml_compute_forward_conv_1d_stage_1_f16(
|
11649
|
-
const struct ggml_compute_params * params,
|
11650
|
-
const struct ggml_tensor * src0,
|
11651
|
-
const struct ggml_tensor * src1,
|
11652
|
-
struct ggml_tensor * dst) {
|
11653
|
-
GGML_ASSERT(src0->type == GGML_TYPE_F16);
|
11654
|
-
GGML_ASSERT(src1->type == GGML_TYPE_F16);
|
11655
|
-
GGML_ASSERT( dst->type == GGML_TYPE_F32);
|
11656
|
-
|
11657
|
-
int64_t t0 = ggml_perf_time_us();
|
11658
|
-
UNUSED(t0);
|
11659
|
-
|
11660
|
-
if (params->type == GGML_TASK_INIT) {
|
11661
|
-
return;
|
11662
|
-
}
|
11663
|
-
|
11664
|
-
if (params->type == GGML_TASK_FINALIZE) {
|
11665
|
-
return;
|
11666
|
-
}
|
11667
|
-
|
11668
|
-
GGML_TENSOR_BINARY_OP_LOCALS;
|
11669
|
-
|
11670
|
-
GGML_ASSERT(nb00 == sizeof(ggml_fp16_t));
|
11671
|
-
GGML_ASSERT(nb10 == sizeof(ggml_fp16_t));
|
11672
|
-
GGML_ASSERT(nb0 == sizeof(float));
|
11673
|
-
|
11674
|
-
const int N = ne12;
|
11675
|
-
const int OL = ne11;
|
11676
|
-
|
11677
|
-
const int OC = ne02;
|
11678
|
-
const int IC = ne01;
|
11679
|
-
const int K = ne00;
|
11680
|
-
|
11681
|
-
const int ith = params->ith;
|
11682
|
-
const int nth = params->nth;
|
11683
|
-
|
11684
|
-
int64_t m = OC;
|
11685
|
-
int64_t n = OL;
|
11686
|
-
int64_t k = IC * K;
|
11687
|
-
|
11688
|
-
// [N, OC, OL] = [OC, IC * K] x [N*OL, IC * K]
|
11689
|
-
for (int i = 0; i < N; i++) {
|
11690
|
-
ggml_fp16_t * A = (ggml_fp16_t *)src0->data; // [m, k]
|
11691
|
-
ggml_fp16_t * B = (ggml_fp16_t *)src1->data + i * m * k; // [n, k]
|
11692
|
-
float * C = (float *)dst->data + i * m * n; // [m, n]
|
11693
|
-
|
11694
|
-
gemm_f16_out_f32(m, n, k, A, B, C, ith, nth);
|
11695
|
-
}
|
11696
|
-
}
|
11697
|
-
|
11698
|
-
static void ggml_compute_forward_conv_1d(
|
11699
|
-
const struct ggml_compute_params * params,
|
11700
|
-
const struct ggml_tensor * src0,
|
11701
|
-
const struct ggml_tensor * src1,
|
11702
|
-
struct ggml_tensor * dst) {
|
11703
|
-
switch(src0->type) {
|
11704
|
-
case GGML_TYPE_F16:
|
11705
|
-
{
|
11706
|
-
ggml_compute_forward_conv_1d_f16_f32(params, src0, src1, dst);
|
11707
|
-
} break;
|
11708
|
-
case GGML_TYPE_F32:
|
11709
|
-
{
|
11710
|
-
ggml_compute_forward_conv_1d_f32(params, src0, src1, dst);
|
11711
|
-
} break;
|
11712
|
-
default:
|
11713
|
-
{
|
11714
|
-
GGML_ASSERT(false);
|
11715
|
-
} break;
|
11716
|
-
}
|
11717
|
-
}
|
11718
|
-
|
11719
|
-
static void ggml_compute_forward_conv_1d_stage_0(
|
11720
|
-
const struct ggml_compute_params * params,
|
11721
|
-
const struct ggml_tensor * src0,
|
11722
|
-
const struct ggml_tensor * src1,
|
11723
|
-
struct ggml_tensor * dst) {
|
11724
|
-
switch(src0->type) {
|
11725
|
-
case GGML_TYPE_F16:
|
11726
|
-
{
|
11727
|
-
ggml_compute_forward_conv_1d_stage_0_f32(params, src0, src1, dst);
|
11728
|
-
} break;
|
11729
|
-
default:
|
11730
|
-
{
|
11731
|
-
GGML_ASSERT(false);
|
11732
|
-
} break;
|
11733
|
-
}
|
11734
|
-
}
|
11735
|
-
|
11736
|
-
static void ggml_compute_forward_conv_1d_stage_1(
|
11737
|
-
const struct ggml_compute_params * params,
|
11738
|
-
const struct ggml_tensor * src0,
|
11739
|
-
const struct ggml_tensor * src1,
|
11740
|
-
struct ggml_tensor * dst) {
|
11741
|
-
switch(src0->type) {
|
11742
|
-
case GGML_TYPE_F16:
|
11743
|
-
{
|
11744
|
-
ggml_compute_forward_conv_1d_stage_1_f16(params, src0, src1, dst);
|
11745
|
-
} break;
|
11746
|
-
default:
|
11747
|
-
{
|
11748
|
-
GGML_ASSERT(false);
|
11749
|
-
} break;
|
11750
|
-
}
|
11751
|
-
}
|
11752
|
-
|
11753
|
-
// ggml_compute_forward_conv_transpose_1d
|
11754
|
-
|
11755
|
-
static void ggml_compute_forward_conv_transpose_1d_f16_f32(
|
11756
|
-
const struct ggml_compute_params * params,
|
11757
|
-
const struct ggml_tensor * src0,
|
11758
|
-
const struct ggml_tensor * src1,
|
11759
|
-
struct ggml_tensor * dst) {
|
11760
|
-
GGML_ASSERT(src0->type == GGML_TYPE_F16);
|
11761
|
-
GGML_ASSERT(src1->type == GGML_TYPE_F32);
|
11762
|
-
GGML_ASSERT( dst->type == GGML_TYPE_F32);
|
11763
|
-
|
11764
|
-
int64_t t0 = ggml_perf_time_us();
|
11765
|
-
UNUSED(t0);
|
11766
|
-
|
11767
|
-
GGML_TENSOR_BINARY_OP_LOCALS
|
11768
|
-
|
11769
|
-
const int ith = params->ith;
|
11770
|
-
const int nth = params->nth;
|
11771
|
-
|
11772
|
-
const int nk = ne00*ne01*ne02;
|
11773
|
-
|
11774
|
-
GGML_ASSERT(nb00 == sizeof(ggml_fp16_t));
|
11775
|
-
GGML_ASSERT(nb10 == sizeof(float));
|
11776
|
-
|
11777
|
-
if (params->type == GGML_TASK_INIT) {
|
11778
|
-
memset(params->wdata, 0, params->wsize);
|
11779
|
-
|
11780
|
-
// permute kernel data (src0) from (K x Cout x Cin) to (Cin x K x Cout)
|
11781
|
-
{
|
11782
|
-
ggml_fp16_t * const wdata = (ggml_fp16_t *) params->wdata + 0;
|
11783
|
-
|
11784
|
-
for (int64_t i02 = 0; i02 < ne02; i02++) {
|
11785
|
-
for (int64_t i01 = 0; i01 < ne01; i01++) {
|
11786
|
-
const ggml_fp16_t * const src = (ggml_fp16_t *)((char *) src0->data + i02*nb02 + i01*nb01);
|
11787
|
-
ggml_fp16_t * dst_data = wdata + i01*ne00*ne02;
|
11788
|
-
for (int64_t i00 = 0; i00 < ne00; i00++) {
|
11789
|
-
dst_data[i00*ne02 + i02] = src[i00];
|
11790
|
-
}
|
11791
|
-
}
|
11792
|
-
}
|
11793
|
-
}
|
11794
|
-
|
11795
|
-
// permute source data (src1) from (L x Cin) to (Cin x L)
|
11796
|
-
{
|
11797
|
-
ggml_fp16_t * const wdata = (ggml_fp16_t *) params->wdata + nk;
|
11798
|
-
ggml_fp16_t * dst_data = wdata;
|
11799
|
-
|
11800
|
-
for (int64_t i11 = 0; i11 < ne11; i11++) {
|
11801
|
-
const float * const src = (float *)((char *) src1->data + i11*nb11);
|
11802
|
-
for (int64_t i10 = 0; i10 < ne10; i10++) {
|
11803
|
-
dst_data[i10*ne11 + i11] = GGML_FP32_TO_FP16(src[i10]);
|
11804
|
-
}
|
11805
|
-
}
|
11806
|
-
}
|
11807
|
-
|
11808
|
-
// need to zero dst since we are accumulating into it
|
11809
|
-
memset(dst->data, 0, ggml_nbytes(dst));
|
11810
|
-
|
11811
|
-
return;
|
11812
|
-
}
|
11813
|
-
|
11814
|
-
if (params->type == GGML_TASK_FINALIZE) {
|
11815
|
-
return;
|
11816
|
-
}
|
11817
|
-
|
11818
|
-
const int32_t s0 = ((const int32_t*)(dst->op_params))[0];
|
11819
|
-
|
11820
|
-
// total rows in dst
|
11821
|
-
const int nr = ne1;
|
11822
|
-
|
11823
|
-
// rows per thread
|
11824
|
-
const int dr = (nr + nth - 1)/nth;
|
11825
|
-
|
11826
|
-
// row range for this thread
|
11827
|
-
const int ir0 = dr*ith;
|
11828
|
-
const int ir1 = MIN(ir0 + dr, nr);
|
11829
|
-
|
11830
|
-
ggml_fp16_t * const wdata = (ggml_fp16_t *) params->wdata + 0;
|
11831
|
-
ggml_fp16_t * const wdata_src = wdata + nk;
|
11832
|
-
|
11833
|
-
for (int i1 = ir0; i1 < ir1; i1++) {
|
11834
|
-
float * dst_data = (float *)((char *) dst->data + i1*nb1);
|
11835
|
-
ggml_fp16_t * wdata_kernel = wdata + i1*ne02*ne00;
|
11836
|
-
for (int i10 = 0; i10 < ne10; i10++) {
|
11837
|
-
const int i1n = i10*ne11;
|
11838
|
-
for (int i00 = 0; i00 < ne00; i00++) {
|
11839
|
-
float v = 0;
|
11840
|
-
ggml_vec_dot_f16(ne02, &v,
|
11841
|
-
(ggml_fp16_t *) wdata_src + i1n,
|
11842
|
-
(ggml_fp16_t *) wdata_kernel + i00*ne02);
|
11843
|
-
dst_data[i10*s0 + i00] += v;
|
11844
|
-
}
|
11845
|
-
}
|
11846
|
-
}
|
11847
|
-
}
|
11848
|
-
|
11849
|
-
static void ggml_compute_forward_conv_transpose_1d_f32(
|
11850
|
-
const struct ggml_compute_params * params,
|
11851
|
-
const struct ggml_tensor * src0,
|
11852
|
-
const struct ggml_tensor * src1,
|
11853
|
-
struct ggml_tensor * dst) {
|
11854
|
-
GGML_ASSERT(src0->type == GGML_TYPE_F32);
|
11855
|
-
GGML_ASSERT(src1->type == GGML_TYPE_F32);
|
11856
|
-
GGML_ASSERT( dst->type == GGML_TYPE_F32);
|
11857
|
-
|
11858
|
-
int64_t t0 = ggml_perf_time_us();
|
11859
|
-
UNUSED(t0);
|
11860
|
-
|
11861
|
-
GGML_TENSOR_BINARY_OP_LOCALS
|
11862
|
-
|
11863
|
-
const int ith = params->ith;
|
11864
|
-
const int nth = params->nth;
|
11865
|
-
|
11866
|
-
const int nk = ne00*ne01*ne02;
|
11480
|
+
const int nk = ne00*ne01*ne02;
|
11867
11481
|
|
11868
11482
|
GGML_ASSERT(nb00 == sizeof(float));
|
11869
11483
|
GGML_ASSERT(nb10 == sizeof(float));
|
@@ -11961,12 +11575,10 @@ static void ggml_compute_forward_conv_transpose_1d(
|
|
11961
11575
|
}
|
11962
11576
|
}
|
11963
11577
|
|
11964
|
-
// ggml_compute_forward_conv_2d
|
11965
|
-
|
11966
11578
|
// src0: kernel [OC, IC, KH, KW]
|
11967
11579
|
// src1: image [N, IC, IH, IW]
|
11968
11580
|
// dst: result [N, OH, OW, IC*KH*KW]
|
11969
|
-
static void
|
11581
|
+
static void ggml_compute_forward_im2col_f16(
|
11970
11582
|
const struct ggml_compute_params * params,
|
11971
11583
|
const struct ggml_tensor * src0,
|
11972
11584
|
const struct ggml_tensor * src1,
|
@@ -11980,34 +11592,35 @@ static void ggml_compute_forward_conv_2d_stage_0_f32(
|
|
11980
11592
|
|
11981
11593
|
GGML_TENSOR_BINARY_OP_LOCALS;
|
11982
11594
|
|
11983
|
-
const
|
11984
|
-
const
|
11985
|
-
const
|
11595
|
+
const int32_t s0 = ((const int32_t *)(dst->op_params))[0];
|
11596
|
+
const int32_t s1 = ((const int32_t *)(dst->op_params))[1];
|
11597
|
+
const int32_t p0 = ((const int32_t *)(dst->op_params))[2];
|
11598
|
+
const int32_t p1 = ((const int32_t *)(dst->op_params))[3];
|
11599
|
+
const int32_t d0 = ((const int32_t *)(dst->op_params))[4];
|
11600
|
+
const int32_t d1 = ((const int32_t *)(dst->op_params))[5];
|
11601
|
+
const bool is_2D = ((const int32_t *)(dst->op_params))[6] == 1;
|
11602
|
+
|
11603
|
+
const int ith = params->ith;
|
11604
|
+
const int nth = params->nth;
|
11605
|
+
|
11606
|
+
const int64_t N = is_2D ? ne13 : ne12;
|
11607
|
+
const int64_t IC = is_2D ? ne12 : ne11;
|
11608
|
+
const int64_t IH = is_2D ? ne11 : 1;
|
11986
11609
|
const int64_t IW = ne10;
|
11987
11610
|
|
11988
|
-
|
11989
|
-
// const int64_t IC = ne02;
|
11990
|
-
const int64_t KH = ne01;
|
11611
|
+
const int64_t KH = is_2D ? ne01 : 1;
|
11991
11612
|
const int64_t KW = ne00;
|
11992
11613
|
|
11993
|
-
const int64_t OH = ne2;
|
11614
|
+
const int64_t OH = is_2D ? ne2 : 1;
|
11994
11615
|
const int64_t OW = ne1;
|
11995
11616
|
|
11996
|
-
|
11997
|
-
|
11998
|
-
|
11999
|
-
const int32_t s0 = ((const int32_t*)(dst->op_params))[0];
|
12000
|
-
const int32_t s1 = ((const int32_t*)(dst->op_params))[1];
|
12001
|
-
const int32_t p0 = ((const int32_t*)(dst->op_params))[2];
|
12002
|
-
const int32_t p1 = ((const int32_t*)(dst->op_params))[3];
|
12003
|
-
const int32_t d0 = ((const int32_t*)(dst->op_params))[4];
|
12004
|
-
const int32_t d1 = ((const int32_t*)(dst->op_params))[5];
|
11617
|
+
int ofs0 = is_2D ? nb13 : nb12;
|
11618
|
+
int ofs1 = is_2D ? nb12 : nb11;
|
12005
11619
|
|
12006
11620
|
GGML_ASSERT(nb00 == sizeof(ggml_fp16_t));
|
12007
11621
|
GGML_ASSERT(nb10 == sizeof(float));
|
12008
11622
|
|
12009
11623
|
if (params->type == GGML_TASK_INIT) {
|
12010
|
-
memset(dst->data, 0, ggml_nbytes(dst));
|
12011
11624
|
return;
|
12012
11625
|
}
|
12013
11626
|
|
@@ -12020,20 +11633,22 @@ static void ggml_compute_forward_conv_2d_stage_0_f32(
|
|
12020
11633
|
ggml_fp16_t * const wdata = (ggml_fp16_t *) dst->data;
|
12021
11634
|
|
12022
11635
|
for (int64_t in = 0; in < N; in++) {
|
12023
|
-
for (int64_t ioh = 0; ioh < OH; ioh++) {
|
11636
|
+
for (int64_t ioh = 0; ioh < OH; ioh++) { // 1
|
12024
11637
|
for (int64_t iow = 0; iow < OW; iow++) {
|
12025
|
-
for (int64_t iic = ith; iic < IC; iic+=nth) {
|
11638
|
+
for (int64_t iic = ith; iic < IC; iic += nth) {
|
12026
11639
|
|
12027
11640
|
// micro kernel
|
12028
11641
|
ggml_fp16_t * dst_data = wdata + (in*OH*OW + ioh*OW + iow)*(IC*KH*KW); // [IC, KH, KW]
|
12029
|
-
const float * const src_data = (float *)((char *) src1->data + in*
|
11642
|
+
const float * const src_data = (float *)((char *) src1->data + in*ofs0 + iic*ofs1); // [IH, IW]
|
12030
11643
|
|
12031
|
-
for (int64_t ikh = 0; ikh < KH; ikh++) {
|
11644
|
+
for (int64_t ikh = 0; ikh < KH; ikh++) { // 1
|
12032
11645
|
for (int64_t ikw = 0; ikw < KW; ikw++) {
|
12033
11646
|
const int64_t iiw = iow*s0 + ikw*d0 - p0;
|
12034
11647
|
const int64_t iih = ioh*s1 + ikh*d1 - p1;
|
12035
11648
|
|
12036
|
-
if (
|
11649
|
+
if (iih < 0 || iih >= IH || iiw < 0 || iiw >= IW) {
|
11650
|
+
dst_data[iic*(KH*KW) + ikh*KW + ikw] = 0;
|
11651
|
+
} else {
|
12037
11652
|
dst_data[iic*(KH*KW) + ikh*KW + ikw] = GGML_FP32_TO_FP16(src_data[iih*IW + iiw]);
|
12038
11653
|
}
|
12039
11654
|
}
|
@@ -12045,180 +11660,7 @@ static void ggml_compute_forward_conv_2d_stage_0_f32(
|
|
12045
11660
|
}
|
12046
11661
|
}
|
12047
11662
|
|
12048
|
-
|
12049
|
-
// src0: [OC, IC, KH, KW]
|
12050
|
-
// src1: [N, OH, OW, IC * KH * KW]
|
12051
|
-
// result: [N, OC, OH, OW]
|
12052
|
-
static void ggml_compute_forward_conv_2d_stage_1_f16(
|
12053
|
-
const struct ggml_compute_params * params,
|
12054
|
-
const struct ggml_tensor * src0,
|
12055
|
-
const struct ggml_tensor * src1,
|
12056
|
-
struct ggml_tensor * dst) {
|
12057
|
-
GGML_ASSERT(src0->type == GGML_TYPE_F16);
|
12058
|
-
GGML_ASSERT(src1->type == GGML_TYPE_F16);
|
12059
|
-
GGML_ASSERT( dst->type == GGML_TYPE_F32);
|
12060
|
-
|
12061
|
-
int64_t t0 = ggml_perf_time_us();
|
12062
|
-
UNUSED(t0);
|
12063
|
-
|
12064
|
-
if (params->type == GGML_TASK_INIT) {
|
12065
|
-
return;
|
12066
|
-
}
|
12067
|
-
|
12068
|
-
if (params->type == GGML_TASK_FINALIZE) {
|
12069
|
-
return;
|
12070
|
-
}
|
12071
|
-
|
12072
|
-
GGML_TENSOR_BINARY_OP_LOCALS;
|
12073
|
-
|
12074
|
-
GGML_ASSERT(nb00 == sizeof(ggml_fp16_t));
|
12075
|
-
GGML_ASSERT(nb10 == sizeof(ggml_fp16_t));
|
12076
|
-
GGML_ASSERT(nb0 == sizeof(float));
|
12077
|
-
|
12078
|
-
const int N = ne13;
|
12079
|
-
const int OH = ne12;
|
12080
|
-
const int OW = ne11;
|
12081
|
-
|
12082
|
-
const int OC = ne03;
|
12083
|
-
const int IC = ne02;
|
12084
|
-
const int KH = ne01;
|
12085
|
-
const int KW = ne00;
|
12086
|
-
|
12087
|
-
const int ith = params->ith;
|
12088
|
-
const int nth = params->nth;
|
12089
|
-
|
12090
|
-
int64_t m = OC;
|
12091
|
-
int64_t n = OH * OW;
|
12092
|
-
int64_t k = IC * KH * KW;
|
12093
|
-
|
12094
|
-
// [N, OC, OH, OW] = [OC, IC * KH * KW] x [N*OH*OW, IC * KH * KW]
|
12095
|
-
for (int i = 0; i < N; i++) {
|
12096
|
-
ggml_fp16_t * A = (ggml_fp16_t *)src0->data; // [m, k]
|
12097
|
-
ggml_fp16_t * B = (ggml_fp16_t *)src1->data + i * m * k; // [n, k]
|
12098
|
-
float * C = (float *)dst->data + i * m * n; // [m, n]
|
12099
|
-
|
12100
|
-
gemm_f16_out_f32(m, n, k, A, B, C, ith, nth);
|
12101
|
-
}
|
12102
|
-
}
|
12103
|
-
|
12104
|
-
static void ggml_compute_forward_conv_2d_f16_f32(
|
12105
|
-
const struct ggml_compute_params * params,
|
12106
|
-
const struct ggml_tensor * src0,
|
12107
|
-
const struct ggml_tensor * src1,
|
12108
|
-
struct ggml_tensor * dst) {
|
12109
|
-
GGML_ASSERT(src0->type == GGML_TYPE_F16);
|
12110
|
-
GGML_ASSERT(src1->type == GGML_TYPE_F32);
|
12111
|
-
GGML_ASSERT( dst->type == GGML_TYPE_F32);
|
12112
|
-
|
12113
|
-
int64_t t0 = ggml_perf_time_us();
|
12114
|
-
UNUSED(t0);
|
12115
|
-
|
12116
|
-
GGML_TENSOR_BINARY_OP_LOCALS
|
12117
|
-
|
12118
|
-
// src1: image [N, IC, IH, IW]
|
12119
|
-
// src0: kernel [OC, IC, KH, KW]
|
12120
|
-
// dst: result [N, OC, OH, OW]
|
12121
|
-
// ne12: IC
|
12122
|
-
// ne0: OW
|
12123
|
-
// ne1: OH
|
12124
|
-
// nk0: KW
|
12125
|
-
// nk1: KH
|
12126
|
-
// ne13: N
|
12127
|
-
|
12128
|
-
const int N = ne13;
|
12129
|
-
const int IC = ne12;
|
12130
|
-
const int IH = ne11;
|
12131
|
-
const int IW = ne10;
|
12132
|
-
|
12133
|
-
const int OC = ne03;
|
12134
|
-
// const int IC = ne02;
|
12135
|
-
const int KH = ne01;
|
12136
|
-
const int KW = ne00;
|
12137
|
-
|
12138
|
-
const int OH = ne1;
|
12139
|
-
const int OW = ne0;
|
12140
|
-
|
12141
|
-
const int ith = params->ith;
|
12142
|
-
const int nth = params->nth;
|
12143
|
-
|
12144
|
-
// const int nk0 = ne00;
|
12145
|
-
// const int nk1 = ne01;
|
12146
|
-
|
12147
|
-
// size of the convolution row - the kernel size unrolled across all channels
|
12148
|
-
// const int ew0 = nk0*nk1*ne02;
|
12149
|
-
// ew0: IC*KH*KW
|
12150
|
-
|
12151
|
-
const int32_t s0 = ((const int32_t*)(dst->op_params))[0];
|
12152
|
-
const int32_t s1 = ((const int32_t*)(dst->op_params))[1];
|
12153
|
-
const int32_t p0 = ((const int32_t*)(dst->op_params))[2];
|
12154
|
-
const int32_t p1 = ((const int32_t*)(dst->op_params))[3];
|
12155
|
-
const int32_t d0 = ((const int32_t*)(dst->op_params))[4];
|
12156
|
-
const int32_t d1 = ((const int32_t*)(dst->op_params))[5];
|
12157
|
-
|
12158
|
-
GGML_ASSERT(nb00 == sizeof(ggml_fp16_t));
|
12159
|
-
GGML_ASSERT(nb10 == sizeof(float));
|
12160
|
-
|
12161
|
-
if (params->type == GGML_TASK_INIT) {
|
12162
|
-
memset(params->wdata, 0, params->wsize);
|
12163
|
-
|
12164
|
-
// prepare source data (src1)
|
12165
|
-
// im2col: [N, IC, IH, IW] => [N*OH*OW, IC*KH*KW]
|
12166
|
-
|
12167
|
-
{
|
12168
|
-
ggml_fp16_t * const wdata = (ggml_fp16_t *) params->wdata + 0;
|
12169
|
-
|
12170
|
-
for (int in = 0; in < N; in++) {
|
12171
|
-
for (int iic = 0; iic < IC; iic++) {
|
12172
|
-
for (int ioh = 0; ioh < OH; ioh++) {
|
12173
|
-
for (int iow = 0; iow < OW; iow++) {
|
12174
|
-
|
12175
|
-
// micro kernel
|
12176
|
-
ggml_fp16_t * dst_data = wdata + (in*OH*OW + ioh*OW + iow)*(IC*KH*KW); // [IC, KH, KW]
|
12177
|
-
const float * const src_data = (float *)((char *) src1->data + in*nb13 + iic*nb12); // [IH, IW]
|
12178
|
-
|
12179
|
-
for (int ikh = 0; ikh < KH; ikh++) {
|
12180
|
-
for (int ikw = 0; ikw < KW; ikw++) {
|
12181
|
-
const int iiw = iow*s0 + ikw*d0 - p0;
|
12182
|
-
const int iih = ioh*s1 + ikh*d1 - p1;
|
12183
|
-
|
12184
|
-
if (!(iih < 0 || iih >= IH || iiw < 0 || iiw >= IW)) {
|
12185
|
-
dst_data[iic*(KH*KW) + ikh*KW + ikw] = GGML_FP32_TO_FP16(src_data[iih*IW + iiw]);
|
12186
|
-
}
|
12187
|
-
}
|
12188
|
-
}
|
12189
|
-
}
|
12190
|
-
}
|
12191
|
-
}
|
12192
|
-
}
|
12193
|
-
}
|
12194
|
-
|
12195
|
-
return;
|
12196
|
-
}
|
12197
|
-
|
12198
|
-
if (params->type == GGML_TASK_FINALIZE) {
|
12199
|
-
return;
|
12200
|
-
}
|
12201
|
-
|
12202
|
-
ggml_fp16_t * const wdata = (ggml_fp16_t *) params->wdata + 0;
|
12203
|
-
// wdata: [N*OH*OW, IC*KH*KW]
|
12204
|
-
// dst: result [N, OC, OH, OW]
|
12205
|
-
// src0: kernel [OC, IC, KH, KW]
|
12206
|
-
|
12207
|
-
int64_t m = OC;
|
12208
|
-
int64_t n = OH * OW;
|
12209
|
-
int64_t k = IC * KH * KW;
|
12210
|
-
|
12211
|
-
// [N, OC, OH, OW] = [OC, IC * KH * KW] x [N*OH*OW, IC * KH * KW]
|
12212
|
-
for (int i = 0; i < N; i++) {
|
12213
|
-
ggml_fp16_t * A = (ggml_fp16_t *)src0->data; // [m, k]
|
12214
|
-
ggml_fp16_t * B = (ggml_fp16_t *)wdata + i * m * k; // [n, k]
|
12215
|
-
float * C = (float *)dst->data + i * m * n; // [m * k]
|
12216
|
-
|
12217
|
-
gemm_f16_out_f32(m, n, k, A, B, C, ith, nth);
|
12218
|
-
}
|
12219
|
-
}
|
12220
|
-
|
12221
|
-
static void ggml_compute_forward_conv_2d(
|
11663
|
+
static void ggml_compute_forward_im2col(
|
12222
11664
|
const struct ggml_compute_params * params,
|
12223
11665
|
const struct ggml_tensor * src0,
|
12224
11666
|
const struct ggml_tensor * src1,
|
@@ -12226,50 +11668,7 @@ static void ggml_compute_forward_conv_2d(
|
|
12226
11668
|
switch (src0->type) {
|
12227
11669
|
case GGML_TYPE_F16:
|
12228
11670
|
{
|
12229
|
-
|
12230
|
-
} break;
|
12231
|
-
case GGML_TYPE_F32:
|
12232
|
-
{
|
12233
|
-
//ggml_compute_forward_conv_2d_f32(params, src0, src1, dst);
|
12234
|
-
GGML_ASSERT(false);
|
12235
|
-
} break;
|
12236
|
-
default:
|
12237
|
-
{
|
12238
|
-
GGML_ASSERT(false);
|
12239
|
-
} break;
|
12240
|
-
}
|
12241
|
-
}
|
12242
|
-
|
12243
|
-
static void ggml_compute_forward_conv_2d_stage_0(
|
12244
|
-
const struct ggml_compute_params * params,
|
12245
|
-
const struct ggml_tensor * src0,
|
12246
|
-
const struct ggml_tensor * src1,
|
12247
|
-
struct ggml_tensor * dst) {
|
12248
|
-
switch (src0->type) {
|
12249
|
-
case GGML_TYPE_F16:
|
12250
|
-
{
|
12251
|
-
ggml_compute_forward_conv_2d_stage_0_f32(params, src0, src1, dst);
|
12252
|
-
} break;
|
12253
|
-
case GGML_TYPE_F32:
|
12254
|
-
{
|
12255
|
-
GGML_ASSERT(false);
|
12256
|
-
} break;
|
12257
|
-
default:
|
12258
|
-
{
|
12259
|
-
GGML_ASSERT(false);
|
12260
|
-
} break;
|
12261
|
-
}
|
12262
|
-
}
|
12263
|
-
|
12264
|
-
static void ggml_compute_forward_conv_2d_stage_1(
|
12265
|
-
const struct ggml_compute_params * params,
|
12266
|
-
const struct ggml_tensor * src0,
|
12267
|
-
const struct ggml_tensor * src1,
|
12268
|
-
struct ggml_tensor * dst) {
|
12269
|
-
switch (src0->type) {
|
12270
|
-
case GGML_TYPE_F16:
|
12271
|
-
{
|
12272
|
-
ggml_compute_forward_conv_2d_stage_1_f16(params, src0, src1, dst);
|
11671
|
+
ggml_compute_forward_im2col_f16(params, src0, src1, dst);
|
12273
11672
|
} break;
|
12274
11673
|
case GGML_TYPE_F32:
|
12275
11674
|
{
|
@@ -12454,14 +11853,11 @@ static void ggml_compute_forward_pool_1d(
|
|
12454
11853
|
ggml_compute_forward_pool_1d_sk_p0(params, op, src0, k0, dst);
|
12455
11854
|
}
|
12456
11855
|
|
12457
|
-
//
|
11856
|
+
// ggml_compute_forward_pool_2d
|
12458
11857
|
|
12459
|
-
static void
|
11858
|
+
static void ggml_compute_forward_pool_2d(
|
12460
11859
|
const struct ggml_compute_params * params,
|
12461
|
-
const enum ggml_op_pool op,
|
12462
11860
|
const struct ggml_tensor * src,
|
12463
|
-
const int k0,
|
12464
|
-
const int k1,
|
12465
11861
|
struct ggml_tensor * dst) {
|
12466
11862
|
assert(src->type == GGML_TYPE_F32);
|
12467
11863
|
assert(params->ith == 0);
|
@@ -12470,6 +11866,14 @@ static void ggml_compute_forward_pool_2d_sk_p0(
|
|
12470
11866
|
return;
|
12471
11867
|
}
|
12472
11868
|
|
11869
|
+
const int32_t * opts = (const int32_t *)dst->op_params;
|
11870
|
+
enum ggml_op_pool op = opts[0];
|
11871
|
+
const int k0 = opts[1];
|
11872
|
+
const int k1 = opts[2];
|
11873
|
+
const int s0 = opts[3];
|
11874
|
+
const int s1 = opts[4];
|
11875
|
+
const int p0 = opts[5];
|
11876
|
+
const int p1 = opts[6];
|
12473
11877
|
const char * cdata = (const char*)src->data;
|
12474
11878
|
const char * const data_end = cdata + ggml_nbytes(src);
|
12475
11879
|
|
@@ -12480,6 +11884,8 @@ static void ggml_compute_forward_pool_2d_sk_p0(
|
|
12480
11884
|
float * dplane = (float *)dst->data;
|
12481
11885
|
|
12482
11886
|
const int ka = k0 * k1;
|
11887
|
+
const int offset0 = -p0;
|
11888
|
+
const int offset1 = -p1;
|
12483
11889
|
|
12484
11890
|
while (cdata < data_end) {
|
12485
11891
|
for (int oy = 0; oy < py; ++oy) {
|
@@ -12492,13 +11898,15 @@ static void ggml_compute_forward_pool_2d_sk_p0(
|
|
12492
11898
|
case GGML_OP_POOL_COUNT: GGML_ASSERT(false); break;
|
12493
11899
|
}
|
12494
11900
|
|
12495
|
-
const int ix = ox *
|
12496
|
-
const int iy = oy *
|
11901
|
+
const int ix = offset0 + ox * s0;
|
11902
|
+
const int iy = offset1 + oy * s1;
|
12497
11903
|
|
12498
11904
|
for (int ky = 0; ky < k1; ++ky) {
|
11905
|
+
if (iy + ky < 0 || iy + ky >= src->ne[1]) continue;
|
12499
11906
|
const float * const srow = (const float *)(cdata + src->nb[1] * (iy + ky));
|
12500
11907
|
for (int kx = 0; kx < k0; ++kx) {
|
12501
11908
|
int j = ix + kx;
|
11909
|
+
if (j < 0 || j >= src->ne[0]) continue;
|
12502
11910
|
switch (op) {
|
12503
11911
|
case GGML_OP_POOL_AVG: *out += srow[j]; break;
|
12504
11912
|
case GGML_OP_POOL_MAX: if (srow[j] > *out) *out = srow[j]; break;
|
@@ -12519,29 +11927,6 @@ static void ggml_compute_forward_pool_2d_sk_p0(
|
|
12519
11927
|
}
|
12520
11928
|
}
|
12521
11929
|
|
12522
|
-
// ggml_compute_forward_pool_2d
|
12523
|
-
|
12524
|
-
static void ggml_compute_forward_pool_2d(
|
12525
|
-
const struct ggml_compute_params * params,
|
12526
|
-
const struct ggml_tensor * src0,
|
12527
|
-
struct ggml_tensor * dst) {
|
12528
|
-
|
12529
|
-
const int32_t * opts = (const int32_t *)dst->op_params;
|
12530
|
-
enum ggml_op_pool op = opts[0];
|
12531
|
-
const int k0 = opts[1];
|
12532
|
-
const int k1 = opts[2];
|
12533
|
-
const int s0 = opts[3];
|
12534
|
-
const int s1 = opts[4];
|
12535
|
-
const int p0 = opts[5];
|
12536
|
-
const int p1 = opts[6];
|
12537
|
-
GGML_ASSERT(p0 == 0);
|
12538
|
-
GGML_ASSERT(p1 == 0); // padding not supported
|
12539
|
-
GGML_ASSERT(k0 == s0);
|
12540
|
-
GGML_ASSERT(k1 == s1); // only s = k supported
|
12541
|
-
|
12542
|
-
ggml_compute_forward_pool_2d_sk_p0(params, op, src0, k0, k1, dst);
|
12543
|
-
}
|
12544
|
-
|
12545
11930
|
// ggml_compute_forward_upscale
|
12546
11931
|
|
12547
11932
|
static void ggml_compute_forward_upscale_f32(
|
@@ -13743,6 +13128,10 @@ static void ggml_compute_forward_unary(
|
|
13743
13128
|
{
|
13744
13129
|
ggml_compute_forward_silu(params, src0, dst);
|
13745
13130
|
} break;
|
13131
|
+
case GGML_UNARY_OP_LEAKY:
|
13132
|
+
{
|
13133
|
+
ggml_compute_forward_leaky(params, src0, dst);
|
13134
|
+
} break;
|
13746
13135
|
default:
|
13747
13136
|
{
|
13748
13137
|
GGML_ASSERT(false);
|
@@ -14496,33 +13885,13 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm
|
|
14496
13885
|
{
|
14497
13886
|
ggml_compute_forward_clamp(params, tensor->src[0], tensor);
|
14498
13887
|
} break;
|
14499
|
-
case GGML_OP_CONV_1D:
|
14500
|
-
{
|
14501
|
-
ggml_compute_forward_conv_1d(params, tensor->src[0], tensor->src[1], tensor);
|
14502
|
-
} break;
|
14503
|
-
case GGML_OP_CONV_1D_STAGE_0:
|
14504
|
-
{
|
14505
|
-
ggml_compute_forward_conv_1d_stage_0(params, tensor->src[0], tensor->src[1], tensor);
|
14506
|
-
} break;
|
14507
|
-
case GGML_OP_CONV_1D_STAGE_1:
|
14508
|
-
{
|
14509
|
-
ggml_compute_forward_conv_1d_stage_1(params, tensor->src[0], tensor->src[1], tensor);
|
14510
|
-
} break;
|
14511
13888
|
case GGML_OP_CONV_TRANSPOSE_1D:
|
14512
13889
|
{
|
14513
13890
|
ggml_compute_forward_conv_transpose_1d(params, tensor->src[0], tensor->src[1], tensor);
|
14514
13891
|
} break;
|
14515
|
-
case
|
13892
|
+
case GGML_OP_IM2COL:
|
14516
13893
|
{
|
14517
|
-
|
14518
|
-
} break;
|
14519
|
-
case GGML_OP_CONV_2D_STAGE_0:
|
14520
|
-
{
|
14521
|
-
ggml_compute_forward_conv_2d_stage_0(params, tensor->src[0], tensor->src[1], tensor);
|
14522
|
-
} break;
|
14523
|
-
case GGML_OP_CONV_2D_STAGE_1:
|
14524
|
-
{
|
14525
|
-
ggml_compute_forward_conv_2d_stage_1(params, tensor->src[0], tensor->src[1], tensor);
|
13894
|
+
ggml_compute_forward_im2col(params, tensor->src[0], tensor->src[1], tensor);
|
14526
13895
|
} break;
|
14527
13896
|
case GGML_OP_CONV_TRANSPOSE_2D:
|
14528
13897
|
{
|
@@ -14651,62 +14020,109 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm
|
|
14651
14020
|
|
14652
14021
|
////////////////////////////////////////////////////////////////////////////////
|
14653
14022
|
|
14654
|
-
|
14023
|
+
static size_t ggml_hash_size(size_t min_sz) {
|
14024
|
+
// next primes after powers of two
|
14025
|
+
static const size_t primes[] = {
|
14026
|
+
2, 3, 5, 11, 17, 37, 67, 131, 257, 521, 1031,
|
14027
|
+
2053, 4099, 8209, 16411, 32771, 65537, 131101,
|
14028
|
+
262147, 524309, 1048583, 2097169, 4194319, 8388617,
|
14029
|
+
16777259, 33554467, 67108879, 134217757, 268435459,
|
14030
|
+
536870923, 1073741827, 2147483659
|
14031
|
+
};
|
14032
|
+
static const size_t n_primes = sizeof(primes)/sizeof(primes[0]);
|
14033
|
+
|
14034
|
+
// find the smallest prime that is larger or equal to min_sz
|
14035
|
+
size_t l = 0;
|
14036
|
+
size_t r = n_primes;
|
14037
|
+
while (l < r) {
|
14038
|
+
size_t m = (l + r)/2;
|
14039
|
+
if (primes[m] < min_sz) {
|
14040
|
+
l = m + 1;
|
14041
|
+
} else {
|
14042
|
+
r = m;
|
14043
|
+
}
|
14044
|
+
}
|
14045
|
+
size_t sz = l < n_primes ? primes[l] : min_sz | 1;
|
14046
|
+
return sz;
|
14047
|
+
}
|
14655
14048
|
|
14656
|
-
static size_t
|
14657
|
-
return (size_t)p
|
14049
|
+
static size_t ggml_hash(const void * p) {
|
14050
|
+
return (size_t)p;
|
14658
14051
|
}
|
14659
14052
|
|
14660
|
-
|
14661
|
-
size_t h =
|
14053
|
+
size_t ggml_hash_find(const struct ggml_hash_set hash_set, struct ggml_tensor * key) {
|
14054
|
+
size_t h = ggml_hash(key) % hash_set.size;
|
14662
14055
|
|
14663
14056
|
// linear probing
|
14664
14057
|
size_t i = h;
|
14665
|
-
while (
|
14666
|
-
i = (i + 1) %
|
14058
|
+
while (hash_set.keys[i] != NULL && hash_set.keys[i] != key) {
|
14059
|
+
i = (i + 1) % hash_set.size;
|
14667
14060
|
if (i == h) {
|
14668
14061
|
// visited all hash table entries -> not found
|
14669
|
-
return
|
14062
|
+
return GGML_HASHTABLE_FULL;
|
14670
14063
|
}
|
14671
14064
|
}
|
14672
14065
|
return i;
|
14673
14066
|
}
|
14674
14067
|
|
14675
|
-
|
14676
|
-
size_t i =
|
14068
|
+
bool ggml_hash_contains(struct ggml_hash_set hash_set, struct ggml_tensor * key) {
|
14069
|
+
size_t i = ggml_hash_find(hash_set, key);
|
14070
|
+
return i != GGML_HASHTABLE_FULL && hash_set.keys[i] == key;
|
14071
|
+
}
|
14072
|
+
|
14073
|
+
size_t ggml_hash_insert(struct ggml_hash_set hash_set, struct ggml_tensor * key) {
|
14074
|
+
size_t i = ggml_hash_find(hash_set, key);
|
14677
14075
|
|
14678
|
-
GGML_ASSERT(i
|
14076
|
+
GGML_ASSERT(i != GGML_HASHTABLE_FULL);
|
14679
14077
|
|
14680
|
-
if (
|
14681
|
-
return
|
14078
|
+
if (hash_set.keys[i] == key) {
|
14079
|
+
return GGML_HASHTABLE_ALREADY_EXISTS;
|
14682
14080
|
}
|
14683
14081
|
|
14684
14082
|
// insert
|
14685
|
-
GGML_ASSERT(
|
14686
|
-
|
14687
|
-
return
|
14083
|
+
GGML_ASSERT(hash_set.keys[i] == NULL);
|
14084
|
+
hash_set.keys[i] = key;
|
14085
|
+
return i;
|
14086
|
+
}
|
14087
|
+
|
14088
|
+
size_t ggml_hash_find_or_insert(struct ggml_hash_set hash_set, struct ggml_tensor * key) {
|
14089
|
+
size_t i = ggml_hash_find(hash_set, key);
|
14090
|
+
|
14091
|
+
GGML_ASSERT(i != GGML_HASHTABLE_FULL);
|
14092
|
+
|
14093
|
+
hash_set.keys[i] = key;
|
14094
|
+
return i;
|
14095
|
+
}
|
14096
|
+
|
14097
|
+
static struct ggml_hash_set ggml_hash_set_new(size_t size) {
|
14098
|
+
size = ggml_hash_size(size);
|
14099
|
+
struct ggml_hash_set result;
|
14100
|
+
result.size = size;
|
14101
|
+
result.keys = malloc(sizeof(struct ggml_tensor *) * size);
|
14102
|
+
memset(result.keys, 0, sizeof(struct ggml_tensor *) * size);
|
14103
|
+
return result;
|
14688
14104
|
}
|
14689
14105
|
|
14690
|
-
static
|
14691
|
-
|
14692
|
-
return (i < GGML_GRAPH_HASHTABLE_SIZE) && (hash_table[i] == p);
|
14106
|
+
static void ggml_hash_set_free(struct ggml_hash_set hash_set) {
|
14107
|
+
free(hash_set.keys);
|
14693
14108
|
}
|
14694
14109
|
|
14695
14110
|
struct hash_map {
|
14696
|
-
|
14697
|
-
|
14111
|
+
struct ggml_hash_set set;
|
14112
|
+
struct ggml_tensor ** vals;
|
14698
14113
|
};
|
14699
14114
|
|
14700
|
-
static struct hash_map *
|
14115
|
+
static struct hash_map * ggml_new_hash_map(size_t size) {
|
14701
14116
|
struct hash_map * result = malloc(sizeof(struct hash_map));
|
14702
|
-
|
14703
|
-
|
14704
|
-
|
14705
|
-
}
|
14117
|
+
result->set = ggml_hash_set_new(size);
|
14118
|
+
result->vals = malloc(sizeof(struct ggml_tensor *) * result->set.size);
|
14119
|
+
memset(result->vals, 0, sizeof(struct ggml_tensor *) * result->set.size);
|
14706
14120
|
return result;
|
14707
14121
|
}
|
14708
14122
|
|
14709
|
-
static void
|
14123
|
+
static void ggml_hash_map_free(struct hash_map * map) {
|
14124
|
+
ggml_hash_set_free(map->set);
|
14125
|
+
free(map->vals);
|
14710
14126
|
free(map);
|
14711
14127
|
}
|
14712
14128
|
|
@@ -14726,7 +14142,7 @@ static struct ggml_tensor * ggml_recompute_graph_node(
|
|
14726
14142
|
return node;
|
14727
14143
|
}
|
14728
14144
|
|
14729
|
-
if (!
|
14145
|
+
if (!ggml_hash_contains(graph->visited_hash_table, node)) {
|
14730
14146
|
return node;
|
14731
14147
|
}
|
14732
14148
|
|
@@ -14741,17 +14157,17 @@ static struct ggml_tensor * ggml_recompute_graph_node(
|
|
14741
14157
|
return node;
|
14742
14158
|
}
|
14743
14159
|
|
14744
|
-
size_t i =
|
14745
|
-
GGML_ASSERT(i
|
14746
|
-
if (replacements->keys[i] == node) {
|
14747
|
-
return
|
14160
|
+
size_t i = ggml_hash_find(replacements->set, node);
|
14161
|
+
GGML_ASSERT(i != GGML_HASHTABLE_FULL); // assert that not full
|
14162
|
+
if (replacements->set.keys[i] == node) {
|
14163
|
+
return replacements->vals[i];
|
14748
14164
|
}
|
14749
14165
|
|
14750
14166
|
struct ggml_tensor * clone = ggml_new_tensor(ctx, node->type, node->n_dims, node->ne);
|
14751
14167
|
|
14752
14168
|
// insert clone into replacements
|
14753
|
-
GGML_ASSERT(replacements->keys[i] == NULL); // assert that we don't overwrite
|
14754
|
-
replacements->keys[i] = node;
|
14169
|
+
GGML_ASSERT(replacements->set.keys[i] == NULL); // assert that we don't overwrite
|
14170
|
+
replacements->set.keys[i] = node;
|
14755
14171
|
replacements->vals[i] = clone;
|
14756
14172
|
|
14757
14173
|
clone->op = node->op;
|
@@ -14788,26 +14204,26 @@ void ggml_build_backward_gradient_checkpointing(
|
|
14788
14204
|
struct ggml_cgraph * gb_tmp,
|
14789
14205
|
struct ggml_tensor * * checkpoints,
|
14790
14206
|
int n_checkpoints) {
|
14791
|
-
|
14207
|
+
ggml_graph_cpy(gf, gb_tmp);
|
14792
14208
|
ggml_build_backward_expand(ctx, gf, gb_tmp, true);
|
14793
14209
|
|
14794
14210
|
if (n_checkpoints <= 0) {
|
14795
|
-
|
14211
|
+
ggml_graph_cpy(gb_tmp, gb);
|
14796
14212
|
return;
|
14797
14213
|
}
|
14798
14214
|
|
14799
|
-
struct hash_map * replacements =
|
14215
|
+
struct hash_map * replacements = ggml_new_hash_map(gf->n_nodes + gf->n_leafs + n_checkpoints);
|
14800
14216
|
|
14801
14217
|
// insert checkpoints in replacements
|
14802
14218
|
for (int i = 0; i < n_checkpoints; ++i) {
|
14803
|
-
size_t k =
|
14804
|
-
GGML_ASSERT(k
|
14805
|
-
GGML_ASSERT(replacements->keys[k] == NULL); // assert that we don't overwrite
|
14806
|
-
replacements->keys[k] = checkpoints[i];
|
14807
|
-
replacements->vals[k]
|
14219
|
+
size_t k = ggml_hash_find(replacements->set, checkpoints[i]);
|
14220
|
+
GGML_ASSERT(k != GGML_HASHTABLE_FULL); // assert that not full
|
14221
|
+
GGML_ASSERT(replacements->set.keys[k] == NULL); // assert that we don't overwrite
|
14222
|
+
replacements->set.keys[k] = checkpoints[i];
|
14223
|
+
replacements->vals[k] = checkpoints[i];
|
14808
14224
|
}
|
14809
14225
|
|
14810
|
-
|
14226
|
+
ggml_graph_cpy(gf, gb);
|
14811
14227
|
// rewrite gb_tmp->nodes[gf->n_nodes:gb_tmp->n_nodes],
|
14812
14228
|
// replacing references to gb_tmp->nodes[0:gf->n_nodes] ( == gf->nodes[0:gf->n_nodes]),
|
14813
14229
|
// by recomputing them from checkpoints
|
@@ -14824,21 +14240,21 @@ void ggml_build_backward_gradient_checkpointing(
|
|
14824
14240
|
ggml_build_forward_expand(gb, node);
|
14825
14241
|
}
|
14826
14242
|
|
14827
|
-
|
14243
|
+
ggml_hash_map_free(replacements);
|
14828
14244
|
}
|
14829
14245
|
|
14830
14246
|
// functions to change gradients considering the case that input a might be initial gradient with zero value
|
14831
14247
|
|
14832
|
-
static struct ggml_tensor * ggml_add_or_set(struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b,
|
14833
|
-
if (
|
14248
|
+
static struct ggml_tensor * ggml_add_or_set(struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b, struct ggml_hash_set zero_table) {
|
14249
|
+
if (ggml_hash_contains(zero_table, a)) {
|
14834
14250
|
return b;
|
14835
14251
|
} else {
|
14836
14252
|
return ggml_add_impl(ctx, a, b, false);
|
14837
14253
|
}
|
14838
14254
|
}
|
14839
14255
|
|
14840
|
-
static struct ggml_tensor * ggml_acc_or_set(struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b, size_t nb1, size_t nb2, size_t nb3, size_t offset,
|
14841
|
-
if (
|
14256
|
+
static struct ggml_tensor * ggml_acc_or_set(struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b, size_t nb1, size_t nb2, size_t nb3, size_t offset, struct ggml_hash_set zero_table) {
|
14257
|
+
if (ggml_hash_contains(zero_table, a)) {
|
14842
14258
|
struct ggml_tensor * a_zero = ggml_scale(ctx, a, ggml_new_f32(ctx, 0));
|
14843
14259
|
return ggml_acc_impl(ctx, a_zero, b, nb1, nb2, nb3, offset, false);
|
14844
14260
|
} else {
|
@@ -14846,23 +14262,23 @@ static struct ggml_tensor * ggml_acc_or_set(struct ggml_context * ctx, struct gg
|
|
14846
14262
|
}
|
14847
14263
|
}
|
14848
14264
|
|
14849
|
-
static struct ggml_tensor * ggml_add1_or_set(struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b,
|
14850
|
-
if (
|
14265
|
+
static struct ggml_tensor * ggml_add1_or_set(struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b, struct ggml_hash_set zero_table) {
|
14266
|
+
if (ggml_hash_contains(zero_table, a)) {
|
14851
14267
|
return ggml_repeat(ctx, b, a);
|
14852
14268
|
} else {
|
14853
14269
|
return ggml_add1_impl(ctx, a, b, false);
|
14854
14270
|
}
|
14855
14271
|
}
|
14856
14272
|
|
14857
|
-
static struct ggml_tensor * ggml_sub_or_set(struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b,
|
14858
|
-
if (
|
14273
|
+
static struct ggml_tensor * ggml_sub_or_set(struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b, struct ggml_hash_set zero_table) {
|
14274
|
+
if (ggml_hash_contains(zero_table, a)) {
|
14859
14275
|
return ggml_neg(ctx, b);
|
14860
14276
|
} else {
|
14861
14277
|
return ggml_sub_impl(ctx, a, b, false);
|
14862
14278
|
}
|
14863
14279
|
}
|
14864
14280
|
|
14865
|
-
static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor * tensor,
|
14281
|
+
static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor * tensor, struct ggml_hash_set zero_table) {
|
14866
14282
|
struct ggml_tensor * src0 = tensor->src[0];
|
14867
14283
|
struct ggml_tensor * src1 = tensor->src[1];
|
14868
14284
|
|
@@ -15457,31 +14873,11 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor
|
|
15457
14873
|
{
|
15458
14874
|
GGML_ASSERT(false); // TODO: not implemented
|
15459
14875
|
} break;
|
15460
|
-
case GGML_OP_CONV_1D:
|
15461
|
-
{
|
15462
|
-
GGML_ASSERT(false); // TODO: not implemented
|
15463
|
-
} break;
|
15464
|
-
case GGML_OP_CONV_1D_STAGE_0:
|
15465
|
-
{
|
15466
|
-
GGML_ASSERT(false); // TODO: not implemented
|
15467
|
-
} break;
|
15468
|
-
case GGML_OP_CONV_1D_STAGE_1:
|
15469
|
-
{
|
15470
|
-
GGML_ASSERT(false); // TODO: not implemented
|
15471
|
-
} break;
|
15472
14876
|
case GGML_OP_CONV_TRANSPOSE_1D:
|
15473
14877
|
{
|
15474
14878
|
GGML_ASSERT(false); // TODO: not implemented
|
15475
14879
|
} break;
|
15476
|
-
case
|
15477
|
-
{
|
15478
|
-
GGML_ASSERT(false); // TODO: not implemented
|
15479
|
-
} break;
|
15480
|
-
case GGML_OP_CONV_2D_STAGE_0:
|
15481
|
-
{
|
15482
|
-
GGML_ASSERT(false); // TODO: not implemented
|
15483
|
-
} break;
|
15484
|
-
case GGML_OP_CONV_2D_STAGE_1:
|
14880
|
+
case GGML_OP_IM2COL:
|
15485
14881
|
{
|
15486
14882
|
GGML_ASSERT(false); // TODO: not implemented
|
15487
14883
|
} break;
|
@@ -15695,7 +15091,7 @@ static void ggml_visit_parents(struct ggml_cgraph * cgraph, struct ggml_tensor *
|
|
15695
15091
|
}
|
15696
15092
|
|
15697
15093
|
// check if already visited
|
15698
|
-
if (
|
15094
|
+
if (ggml_hash_insert(cgraph->visited_hash_table, node) == GGML_HASHTABLE_ALREADY_EXISTS) {
|
15699
15095
|
return;
|
15700
15096
|
}
|
15701
15097
|
|
@@ -15711,7 +15107,7 @@ static void ggml_visit_parents(struct ggml_cgraph * cgraph, struct ggml_tensor *
|
|
15711
15107
|
|
15712
15108
|
if (node->op == GGML_OP_NONE && node->grad == NULL) {
|
15713
15109
|
// reached a leaf node, not part of the gradient graph (e.g. a constant)
|
15714
|
-
GGML_ASSERT(cgraph->n_leafs <
|
15110
|
+
GGML_ASSERT(cgraph->n_leafs < cgraph->size);
|
15715
15111
|
|
15716
15112
|
if (strlen(node->name) == 0) {
|
15717
15113
|
ggml_format_name(node, "leaf_%d", cgraph->n_leafs);
|
@@ -15720,22 +15116,24 @@ static void ggml_visit_parents(struct ggml_cgraph * cgraph, struct ggml_tensor *
|
|
15720
15116
|
cgraph->leafs[cgraph->n_leafs] = node;
|
15721
15117
|
cgraph->n_leafs++;
|
15722
15118
|
} else {
|
15723
|
-
GGML_ASSERT(cgraph->n_nodes <
|
15119
|
+
GGML_ASSERT(cgraph->n_nodes < cgraph->size);
|
15724
15120
|
|
15725
15121
|
if (strlen(node->name) == 0) {
|
15726
15122
|
ggml_format_name(node, "node_%d", cgraph->n_nodes);
|
15727
15123
|
}
|
15728
15124
|
|
15729
15125
|
cgraph->nodes[cgraph->n_nodes] = node;
|
15730
|
-
cgraph->grads
|
15126
|
+
if (cgraph->grads) {
|
15127
|
+
cgraph->grads[cgraph->n_nodes] = node->grad;
|
15128
|
+
}
|
15731
15129
|
cgraph->n_nodes++;
|
15732
15130
|
}
|
15733
15131
|
}
|
15734
15132
|
|
15735
15133
|
static void ggml_build_forward_impl(struct ggml_cgraph * cgraph, struct ggml_tensor * tensor, bool expand) {
|
15736
15134
|
if (!expand) {
|
15737
|
-
|
15738
|
-
cgraph
|
15135
|
+
// TODO: this branch isn't accessible anymore, maybe move this to ggml_build_forward_expand
|
15136
|
+
ggml_graph_clear(cgraph);
|
15739
15137
|
}
|
15740
15138
|
|
15741
15139
|
const int n0 = cgraph->n_nodes;
|
@@ -15756,25 +15154,6 @@ void ggml_build_forward_expand(struct ggml_cgraph * cgraph, struct ggml_tensor *
|
|
15756
15154
|
ggml_build_forward_impl(cgraph, tensor, true);
|
15757
15155
|
}
|
15758
15156
|
|
15759
|
-
struct ggml_cgraph ggml_build_forward(struct ggml_tensor * tensor) {
|
15760
|
-
struct ggml_cgraph result = {
|
15761
|
-
/*.n_nodes =*/ 0,
|
15762
|
-
/*.n_leafs =*/ 0,
|
15763
|
-
/*.nodes =*/ { NULL },
|
15764
|
-
/*.grads =*/ { NULL },
|
15765
|
-
/*.leafs =*/ { NULL },
|
15766
|
-
/*.hash_table =*/ { NULL },
|
15767
|
-
/*.order =*/ GGML_CGRAPH_EVAL_ORDER_LEFT_TO_RIGHT,
|
15768
|
-
/*.perf_runs =*/ 0,
|
15769
|
-
/*.perf_cycles =*/ 0,
|
15770
|
-
/*.perf_time_us =*/ 0,
|
15771
|
-
};
|
15772
|
-
|
15773
|
-
ggml_build_forward_impl(&result, tensor, false);
|
15774
|
-
|
15775
|
-
return result;
|
15776
|
-
}
|
15777
|
-
|
15778
15157
|
void ggml_build_backward_expand(struct ggml_context * ctx, struct ggml_cgraph * gf, struct ggml_cgraph * gb, bool keep) {
|
15779
15158
|
GGML_ASSERT(gf->n_nodes > 0);
|
15780
15159
|
|
@@ -15791,11 +15170,10 @@ void ggml_build_backward_expand(struct ggml_context * ctx, struct ggml_cgraph *
|
|
15791
15170
|
}
|
15792
15171
|
|
15793
15172
|
// remember original gradients which start with zero values
|
15794
|
-
|
15795
|
-
memset(zero_table, 0, sizeof(void*) * GGML_GRAPH_HASHTABLE_SIZE);
|
15173
|
+
struct ggml_hash_set zero_table = ggml_hash_set_new(gf->size);
|
15796
15174
|
for (int i = 0; i < gf->n_nodes; i++) {
|
15797
15175
|
if (gf->grads[i]) {
|
15798
|
-
|
15176
|
+
ggml_hash_insert(zero_table, gf->grads[i]);
|
15799
15177
|
}
|
15800
15178
|
}
|
15801
15179
|
|
@@ -15818,26 +15196,54 @@ void ggml_build_backward_expand(struct ggml_context * ctx, struct ggml_cgraph *
|
|
15818
15196
|
}
|
15819
15197
|
}
|
15820
15198
|
|
15821
|
-
|
15199
|
+
ggml_hash_set_free(zero_table);
|
15822
15200
|
}
|
15823
15201
|
|
15824
|
-
|
15825
|
-
|
15826
|
-
|
15827
|
-
|
15202
|
+
static size_t ggml_graph_nbytes(size_t size, bool grads) {
|
15203
|
+
size_t nbytes = sizeof(struct ggml_cgraph);
|
15204
|
+
nbytes += size * sizeof(struct ggml_tensor *) * 2; // leafs + nodes
|
15205
|
+
if (grads) {
|
15206
|
+
nbytes += size * sizeof(struct ggml_tensor *); // grads
|
15207
|
+
}
|
15208
|
+
nbytes += ggml_hash_size(size * 2) * sizeof(struct ggml_tensor *); // hash set
|
15209
|
+
return nbytes;
|
15828
15210
|
}
|
15829
15211
|
|
15830
|
-
|
15831
|
-
|
15212
|
+
size_t ggml_graph_overhead_custom(size_t size, bool grads) {
|
15213
|
+
return GGML_OBJECT_SIZE + GGML_PAD(ggml_graph_nbytes(size, grads), GGML_MEM_ALIGN);
|
15214
|
+
}
|
15215
|
+
|
15216
|
+
size_t ggml_graph_overhead(void) {
|
15217
|
+
return ggml_graph_overhead_custom(GGML_DEFAULT_GRAPH_SIZE, false);
|
15218
|
+
}
|
15219
|
+
|
15220
|
+
struct ggml_cgraph * ggml_new_graph_custom(struct ggml_context * ctx, size_t size, bool grads) {
|
15221
|
+
const size_t obj_size = ggml_graph_nbytes(size, grads);
|
15222
|
+
struct ggml_object * obj = ggml_new_object(ctx, GGML_OBJECT_GRAPH, obj_size);
|
15832
15223
|
struct ggml_cgraph * cgraph = (struct ggml_cgraph *) ((char *) ctx->mem_buffer + obj->offs);
|
15833
15224
|
|
15225
|
+
struct ggml_tensor ** data_start = (struct ggml_tensor **) (cgraph + 1);
|
15226
|
+
|
15227
|
+
size_t hash_size = ggml_hash_size(size * 2);
|
15228
|
+
struct ggml_tensor ** nodes_ptr = data_start;
|
15229
|
+
struct ggml_tensor ** leafs_ptr = nodes_ptr + size;
|
15230
|
+
struct ggml_tensor ** hash_keys_ptr = leafs_ptr + size;
|
15231
|
+
struct ggml_tensor ** grads_ptr = grads ? hash_keys_ptr + hash_size : NULL;
|
15232
|
+
|
15233
|
+
// check that we allocated the correct amount of memory
|
15234
|
+
assert(obj_size == (size_t) (
|
15235
|
+
(grads ? (char *)(grads_ptr + size) : (char *)(hash_keys_ptr + hash_size)) - (char *)cgraph));
|
15236
|
+
|
15237
|
+
memset(hash_keys_ptr, 0, hash_size * sizeof(struct ggml_tensor *));
|
15238
|
+
|
15834
15239
|
*cgraph = (struct ggml_cgraph) {
|
15240
|
+
/*.size =*/ size,
|
15835
15241
|
/*.n_nodes =*/ 0,
|
15836
15242
|
/*.n_leafs =*/ 0,
|
15837
|
-
/*.nodes =*/
|
15838
|
-
/*.grads =*/
|
15839
|
-
/*.leafs =*/
|
15840
|
-
/*.hash_table =*/ {
|
15243
|
+
/*.nodes =*/ nodes_ptr,
|
15244
|
+
/*.grads =*/ grads_ptr,
|
15245
|
+
/*.leafs =*/ leafs_ptr,
|
15246
|
+
/*.hash_table =*/ { hash_size, hash_keys_ptr },
|
15841
15247
|
/*.order =*/ GGML_CGRAPH_EVAL_ORDER_LEFT_TO_RIGHT,
|
15842
15248
|
/*.perf_runs =*/ 0,
|
15843
15249
|
/*.perf_cycles =*/ 0,
|
@@ -15847,14 +15253,85 @@ struct ggml_cgraph * ggml_new_graph(struct ggml_context * ctx) {
|
|
15847
15253
|
return cgraph;
|
15848
15254
|
}
|
15849
15255
|
|
15850
|
-
struct ggml_cgraph *
|
15851
|
-
|
15852
|
-
|
15256
|
+
struct ggml_cgraph * ggml_new_graph(struct ggml_context * ctx) {
|
15257
|
+
return ggml_new_graph_custom(ctx, GGML_DEFAULT_GRAPH_SIZE, false);
|
15258
|
+
}
|
15259
|
+
|
15260
|
+
struct ggml_cgraph * ggml_graph_view(struct ggml_context * ctx, struct ggml_cgraph * cgraph0, int i0, int i1) {
|
15261
|
+
const size_t obj_size = sizeof(struct ggml_cgraph);
|
15262
|
+
struct ggml_object * obj = ggml_new_object(ctx, GGML_OBJECT_GRAPH, obj_size);
|
15263
|
+
struct ggml_cgraph * cgraph = (struct ggml_cgraph *) ((char *) ctx->mem_buffer + obj->offs);
|
15264
|
+
|
15265
|
+
*cgraph = (struct ggml_cgraph) {
|
15266
|
+
/*.size =*/ 0,
|
15267
|
+
/*.n_nodes =*/ i1 - i0,
|
15268
|
+
/*.n_leafs =*/ 0,
|
15269
|
+
/*.nodes =*/ cgraph0->nodes + i0,
|
15270
|
+
/*.grads =*/ cgraph0->grads ? cgraph0->grads + i0 : NULL,
|
15271
|
+
/*.leafs =*/ NULL,
|
15272
|
+
/*.hash_table =*/ { 0, NULL },
|
15273
|
+
/*.order =*/ cgraph0->order,
|
15274
|
+
/*.perf_runs =*/ 0,
|
15275
|
+
/*.perf_cycles =*/ 0,
|
15276
|
+
/*.perf_time_us =*/ 0,
|
15277
|
+
};
|
15278
|
+
|
15853
15279
|
return cgraph;
|
15854
15280
|
}
|
15855
15281
|
|
15856
|
-
|
15857
|
-
|
15282
|
+
void ggml_graph_cpy(struct ggml_cgraph * src, struct ggml_cgraph * dst) {
|
15283
|
+
GGML_ASSERT(dst->size >= src->n_leafs);
|
15284
|
+
GGML_ASSERT(dst->size >= src->n_nodes);
|
15285
|
+
GGML_ASSERT(dst->visited_hash_table.size >= src->visited_hash_table.size);
|
15286
|
+
|
15287
|
+
dst->n_leafs = src->n_leafs;
|
15288
|
+
dst->n_nodes = src->n_nodes;
|
15289
|
+
dst->order = src->order;
|
15290
|
+
|
15291
|
+
for (int i = 0; i < src->n_leafs; ++i) {
|
15292
|
+
dst->leafs[i] = src->leafs[i];
|
15293
|
+
}
|
15294
|
+
|
15295
|
+
for (int i = 0; i < src->n_nodes; ++i) {
|
15296
|
+
dst->nodes[i] = src->nodes[i];
|
15297
|
+
}
|
15298
|
+
|
15299
|
+
if (src->grads) {
|
15300
|
+
GGML_ASSERT(dst->grads != NULL);
|
15301
|
+
for (int i = 0; i < src->n_nodes; ++i) {
|
15302
|
+
dst->grads[i] = src->grads[i];
|
15303
|
+
}
|
15304
|
+
}
|
15305
|
+
|
15306
|
+
for (size_t i = 0; i < src->visited_hash_table.size; ++i) {
|
15307
|
+
if (src->visited_hash_table.keys[i]) {
|
15308
|
+
ggml_hash_insert(dst->visited_hash_table, src->visited_hash_table.keys[i]);
|
15309
|
+
}
|
15310
|
+
}
|
15311
|
+
}
|
15312
|
+
|
15313
|
+
struct ggml_cgraph * ggml_graph_dup(struct ggml_context * ctx, struct ggml_cgraph * cgraph) {
|
15314
|
+
struct ggml_cgraph * result = ggml_new_graph_custom(ctx, cgraph->size, cgraph->grads != NULL);
|
15315
|
+
ggml_graph_cpy(cgraph, result);
|
15316
|
+
return result;
|
15317
|
+
}
|
15318
|
+
|
15319
|
+
void ggml_graph_reset(struct ggml_cgraph * cgraph) {
|
15320
|
+
GGML_ASSERT(cgraph->grads != NULL);
|
15321
|
+
|
15322
|
+
for (int i = 0; i < cgraph->n_nodes; i++) {
|
15323
|
+
struct ggml_tensor * grad = cgraph->grads[i];
|
15324
|
+
|
15325
|
+
if (grad) {
|
15326
|
+
ggml_set_zero(grad);
|
15327
|
+
}
|
15328
|
+
}
|
15329
|
+
}
|
15330
|
+
|
15331
|
+
void ggml_graph_clear(struct ggml_cgraph * cgraph) {
|
15332
|
+
cgraph->n_leafs = 0;
|
15333
|
+
cgraph->n_nodes = 0;
|
15334
|
+
memset(cgraph->visited_hash_table.keys, 0, cgraph->visited_hash_table.size * sizeof(struct ggml_tensor *));
|
15858
15335
|
}
|
15859
15336
|
|
15860
15337
|
//
|
@@ -15966,45 +15443,266 @@ static void clear_numa_thread_affinity(void) {
|
|
15966
15443
|
strerror(rv));
|
15967
15444
|
}
|
15968
15445
|
|
15969
|
-
CPU_FREE(cpus);
|
15970
|
-
}
|
15971
|
-
#else
|
15972
|
-
// TODO: Windows etc.
|
15973
|
-
// (the linux implementation may also work on BSD, someone should test)
|
15974
|
-
static void set_numa_thread_affinity(int thread_n, int n_threads) { UNUSED(thread_n); UNUSED(n_threads); }
|
15975
|
-
static void clear_numa_thread_affinity(void) {}
|
15976
|
-
#endif
|
15977
|
-
|
15978
|
-
struct ggml_compute_state_shared {
|
15979
|
-
const struct ggml_cgraph * cgraph;
|
15980
|
-
const struct ggml_cplan * cplan;
|
15981
|
-
|
15982
|
-
int64_t perf_node_start_cycles;
|
15983
|
-
int64_t perf_node_start_time_us;
|
15984
|
-
|
15985
|
-
const int n_threads;
|
15986
|
-
|
15987
|
-
// synchronization primitives
|
15988
|
-
atomic_int n_active; // num active threads
|
15989
|
-
atomic_int node_n; // active graph node
|
15990
|
-
|
15991
|
-
bool (*abort_callback)(void * data); // abort ggml_graph_compute when true
|
15992
|
-
void * abort_callback_data;
|
15993
|
-
};
|
15994
|
-
|
15995
|
-
struct ggml_compute_state {
|
15996
|
-
ggml_thread_t thrd;
|
15997
|
-
int ith;
|
15998
|
-
struct ggml_compute_state_shared * shared;
|
15999
|
-
};
|
16000
|
-
|
16001
|
-
static void ggml_graph_compute_perf_stats_node(struct ggml_tensor * node, const struct ggml_compute_state_shared * st) {
|
16002
|
-
int64_t cycles_cur = ggml_perf_cycles() - st->perf_node_start_cycles;
|
16003
|
-
int64_t time_us_cur = ggml_perf_time_us() - st->perf_node_start_time_us;
|
15446
|
+
CPU_FREE(cpus);
|
15447
|
+
}
|
15448
|
+
#else
|
15449
|
+
// TODO: Windows etc.
|
15450
|
+
// (the linux implementation may also work on BSD, someone should test)
|
15451
|
+
static void set_numa_thread_affinity(int thread_n, int n_threads) { UNUSED(thread_n); UNUSED(n_threads); }
|
15452
|
+
static void clear_numa_thread_affinity(void) {}
|
15453
|
+
#endif
|
15454
|
+
|
15455
|
+
struct ggml_compute_state_shared {
|
15456
|
+
const struct ggml_cgraph * cgraph;
|
15457
|
+
const struct ggml_cplan * cplan;
|
15458
|
+
|
15459
|
+
int64_t perf_node_start_cycles;
|
15460
|
+
int64_t perf_node_start_time_us;
|
15461
|
+
|
15462
|
+
const int n_threads;
|
15463
|
+
|
15464
|
+
// synchronization primitives
|
15465
|
+
atomic_int n_active; // num active threads
|
15466
|
+
atomic_int node_n; // active graph node
|
15467
|
+
|
15468
|
+
bool (*abort_callback)(void * data); // abort ggml_graph_compute when true
|
15469
|
+
void * abort_callback_data;
|
15470
|
+
};
|
15471
|
+
|
15472
|
+
struct ggml_compute_state {
|
15473
|
+
ggml_thread_t thrd;
|
15474
|
+
int ith;
|
15475
|
+
struct ggml_compute_state_shared * shared;
|
15476
|
+
};
|
15477
|
+
|
15478
|
+
static void ggml_graph_compute_perf_stats_node(struct ggml_tensor * node, const struct ggml_compute_state_shared * st) {
|
15479
|
+
int64_t cycles_cur = ggml_perf_cycles() - st->perf_node_start_cycles;
|
15480
|
+
int64_t time_us_cur = ggml_perf_time_us() - st->perf_node_start_time_us;
|
15481
|
+
|
15482
|
+
node->perf_runs++;
|
15483
|
+
node->perf_cycles += cycles_cur;
|
15484
|
+
node->perf_time_us += time_us_cur;
|
15485
|
+
}
|
15486
|
+
|
15487
|
+
static int ggml_get_n_tasks(struct ggml_tensor * node, int n_threads) {
|
15488
|
+
int n_tasks = 0;
|
15489
|
+
|
15490
|
+
switch (node->op) {
|
15491
|
+
case GGML_OP_CPY:
|
15492
|
+
case GGML_OP_DUP:
|
15493
|
+
case GGML_OP_ADD:
|
15494
|
+
case GGML_OP_ADD1:
|
15495
|
+
case GGML_OP_ACC:
|
15496
|
+
{
|
15497
|
+
n_tasks = n_threads;
|
15498
|
+
} break;
|
15499
|
+
case GGML_OP_SUB:
|
15500
|
+
case GGML_OP_DIV:
|
15501
|
+
case GGML_OP_SQR:
|
15502
|
+
case GGML_OP_SQRT:
|
15503
|
+
case GGML_OP_LOG:
|
15504
|
+
case GGML_OP_SUM:
|
15505
|
+
case GGML_OP_SUM_ROWS:
|
15506
|
+
case GGML_OP_MEAN:
|
15507
|
+
case GGML_OP_ARGMAX:
|
15508
|
+
case GGML_OP_REPEAT:
|
15509
|
+
case GGML_OP_REPEAT_BACK:
|
15510
|
+
{
|
15511
|
+
n_tasks = 1;
|
15512
|
+
} break;
|
15513
|
+
case GGML_OP_UNARY:
|
15514
|
+
switch (ggml_get_unary_op(node)) {
|
15515
|
+
case GGML_UNARY_OP_ABS:
|
15516
|
+
case GGML_UNARY_OP_SGN:
|
15517
|
+
case GGML_UNARY_OP_NEG:
|
15518
|
+
case GGML_UNARY_OP_STEP:
|
15519
|
+
case GGML_UNARY_OP_TANH:
|
15520
|
+
case GGML_UNARY_OP_ELU:
|
15521
|
+
case GGML_UNARY_OP_RELU:
|
15522
|
+
case GGML_UNARY_OP_LEAKY:
|
15523
|
+
{
|
15524
|
+
n_tasks = 1;
|
15525
|
+
} break;
|
15526
|
+
|
15527
|
+
case GGML_UNARY_OP_GELU:
|
15528
|
+
case GGML_UNARY_OP_GELU_QUICK:
|
15529
|
+
case GGML_UNARY_OP_SILU:
|
15530
|
+
{
|
15531
|
+
n_tasks = n_threads;
|
15532
|
+
} break;
|
15533
|
+
}
|
15534
|
+
break;
|
15535
|
+
case GGML_OP_SILU_BACK:
|
15536
|
+
case GGML_OP_MUL:
|
15537
|
+
case GGML_OP_NORM:
|
15538
|
+
case GGML_OP_RMS_NORM:
|
15539
|
+
case GGML_OP_RMS_NORM_BACK:
|
15540
|
+
case GGML_OP_GROUP_NORM:
|
15541
|
+
case GGML_OP_CONCAT:
|
15542
|
+
{
|
15543
|
+
n_tasks = n_threads;
|
15544
|
+
} break;
|
15545
|
+
case GGML_OP_MUL_MAT:
|
15546
|
+
{
|
15547
|
+
n_tasks = n_threads;
|
15548
|
+
|
15549
|
+
// TODO: use different scheduling for different matrix sizes
|
15550
|
+
//const int nr0 = ggml_nrows(node->src[0]);
|
15551
|
+
//const int nr1 = ggml_nrows(node->src[1]);
|
15552
|
+
|
15553
|
+
//n_tasks = MIN(n_threads, MAX(1, nr0/128));
|
15554
|
+
//printf("nr0 = %8d, nr1 = %8d, nr0*nr1 = %8d, n_tasks%d\n", nr0, nr1, nr0*nr1, n_tasks);
|
15555
|
+
|
15556
|
+
#if defined(GGML_USE_CUBLAS)
|
15557
|
+
if (ggml_cuda_can_mul_mat(node->src[0], node->src[1], node)) {
|
15558
|
+
n_tasks = 1; // TODO: this actually is doing nothing
|
15559
|
+
// the threads are still spinning
|
15560
|
+
}
|
15561
|
+
#elif defined(GGML_USE_CLBLAST)
|
15562
|
+
if (ggml_cl_can_mul_mat(node->src[0], node->src[1], node)) {
|
15563
|
+
n_tasks = 1; // TODO: this actually is doing nothing
|
15564
|
+
// the threads are still spinning
|
15565
|
+
}
|
15566
|
+
#endif
|
15567
|
+
#if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS)
|
15568
|
+
if (ggml_compute_forward_mul_mat_use_blas(node->src[0], node->src[1], node)) {
|
15569
|
+
n_tasks = 1; // TODO: this actually is doing nothing
|
15570
|
+
// the threads are still spinning
|
15571
|
+
}
|
15572
|
+
#endif
|
15573
|
+
} break;
|
15574
|
+
case GGML_OP_OUT_PROD:
|
15575
|
+
{
|
15576
|
+
n_tasks = n_threads;
|
15577
|
+
} break;
|
15578
|
+
case GGML_OP_SCALE:
|
15579
|
+
case GGML_OP_SET:
|
15580
|
+
case GGML_OP_CONT:
|
15581
|
+
case GGML_OP_RESHAPE:
|
15582
|
+
case GGML_OP_VIEW:
|
15583
|
+
case GGML_OP_PERMUTE:
|
15584
|
+
case GGML_OP_TRANSPOSE:
|
15585
|
+
case GGML_OP_GET_ROWS:
|
15586
|
+
case GGML_OP_GET_ROWS_BACK:
|
15587
|
+
case GGML_OP_DIAG:
|
15588
|
+
{
|
15589
|
+
n_tasks = 1;
|
15590
|
+
} break;
|
15591
|
+
case GGML_OP_DIAG_MASK_ZERO:
|
15592
|
+
case GGML_OP_DIAG_MASK_INF:
|
15593
|
+
case GGML_OP_SOFT_MAX:
|
15594
|
+
case GGML_OP_SOFT_MAX_BACK:
|
15595
|
+
case GGML_OP_ROPE:
|
15596
|
+
case GGML_OP_ROPE_BACK:
|
15597
|
+
case GGML_OP_ADD_REL_POS:
|
15598
|
+
{
|
15599
|
+
n_tasks = n_threads;
|
15600
|
+
} break;
|
15601
|
+
case GGML_OP_ALIBI:
|
15602
|
+
{
|
15603
|
+
n_tasks = 1; //TODO
|
15604
|
+
} break;
|
15605
|
+
case GGML_OP_CLAMP:
|
15606
|
+
{
|
15607
|
+
n_tasks = 1; //TODO
|
15608
|
+
} break;
|
15609
|
+
case GGML_OP_CONV_TRANSPOSE_1D:
|
15610
|
+
{
|
15611
|
+
n_tasks = n_threads;
|
15612
|
+
} break;
|
15613
|
+
case GGML_OP_IM2COL:
|
15614
|
+
{
|
15615
|
+
n_tasks = n_threads;
|
15616
|
+
} break;
|
15617
|
+
case GGML_OP_CONV_TRANSPOSE_2D:
|
15618
|
+
{
|
15619
|
+
n_tasks = n_threads;
|
15620
|
+
} break;
|
15621
|
+
case GGML_OP_POOL_1D:
|
15622
|
+
case GGML_OP_POOL_2D:
|
15623
|
+
{
|
15624
|
+
n_tasks = 1;
|
15625
|
+
} break;
|
15626
|
+
case GGML_OP_UPSCALE:
|
15627
|
+
{
|
15628
|
+
n_tasks = n_threads;
|
15629
|
+
} break;
|
15630
|
+
case GGML_OP_FLASH_ATTN:
|
15631
|
+
{
|
15632
|
+
n_tasks = n_threads;
|
15633
|
+
} break;
|
15634
|
+
case GGML_OP_FLASH_FF:
|
15635
|
+
{
|
15636
|
+
n_tasks = n_threads;
|
15637
|
+
} break;
|
15638
|
+
case GGML_OP_FLASH_ATTN_BACK:
|
15639
|
+
{
|
15640
|
+
n_tasks = n_threads;
|
15641
|
+
} break;
|
15642
|
+
case GGML_OP_WIN_PART:
|
15643
|
+
case GGML_OP_WIN_UNPART:
|
15644
|
+
case GGML_OP_GET_REL_POS:
|
15645
|
+
case GGML_OP_MAP_UNARY:
|
15646
|
+
case GGML_OP_MAP_BINARY:
|
15647
|
+
case GGML_OP_MAP_CUSTOM1_F32:
|
15648
|
+
case GGML_OP_MAP_CUSTOM2_F32:
|
15649
|
+
case GGML_OP_MAP_CUSTOM3_F32:
|
15650
|
+
{
|
15651
|
+
n_tasks = 1;
|
15652
|
+
} break;
|
15653
|
+
case GGML_OP_MAP_CUSTOM1:
|
15654
|
+
{
|
15655
|
+
struct ggml_map_custom1_op_params * p = (struct ggml_map_custom1_op_params *) node->op_params;
|
15656
|
+
if (p->n_tasks == GGML_N_TASKS_MAX) {
|
15657
|
+
n_tasks = n_threads;
|
15658
|
+
} else {
|
15659
|
+
n_tasks = MIN(p->n_tasks, n_threads);
|
15660
|
+
}
|
15661
|
+
} break;
|
15662
|
+
case GGML_OP_MAP_CUSTOM2:
|
15663
|
+
{
|
15664
|
+
struct ggml_map_custom2_op_params * p = (struct ggml_map_custom2_op_params *) node->op_params;
|
15665
|
+
if (p->n_tasks == GGML_N_TASKS_MAX) {
|
15666
|
+
n_tasks = n_threads;
|
15667
|
+
} else {
|
15668
|
+
n_tasks = MIN(p->n_tasks, n_threads);
|
15669
|
+
}
|
15670
|
+
} break;
|
15671
|
+
case GGML_OP_MAP_CUSTOM3:
|
15672
|
+
{
|
15673
|
+
struct ggml_map_custom3_op_params * p = (struct ggml_map_custom3_op_params *) node->op_params;
|
15674
|
+
if (p->n_tasks == GGML_N_TASKS_MAX) {
|
15675
|
+
n_tasks = n_threads;
|
15676
|
+
} else {
|
15677
|
+
n_tasks = MIN(p->n_tasks, n_threads);
|
15678
|
+
}
|
15679
|
+
} break;
|
15680
|
+
case GGML_OP_CROSS_ENTROPY_LOSS:
|
15681
|
+
{
|
15682
|
+
n_tasks = n_threads;
|
15683
|
+
} break;
|
15684
|
+
case GGML_OP_CROSS_ENTROPY_LOSS_BACK:
|
15685
|
+
{
|
15686
|
+
n_tasks = n_threads;
|
15687
|
+
} break;
|
15688
|
+
case GGML_OP_NONE:
|
15689
|
+
{
|
15690
|
+
n_tasks = 1;
|
15691
|
+
} break;
|
15692
|
+
case GGML_OP_COUNT:
|
15693
|
+
{
|
15694
|
+
GGML_ASSERT(false);
|
15695
|
+
} break;
|
15696
|
+
default:
|
15697
|
+
{
|
15698
|
+
printf("%s: op %s not implemented\n", __func__, ggml_op_name(node->op));
|
15699
|
+
GGML_ASSERT(false);
|
15700
|
+
} break;
|
15701
|
+
}
|
15702
|
+
|
15703
|
+
assert(n_tasks > 0);
|
16004
15704
|
|
16005
|
-
|
16006
|
-
node->perf_cycles += cycles_cur;
|
16007
|
-
node->perf_time_us += time_us_cur;
|
15705
|
+
return n_tasks;
|
16008
15706
|
}
|
16009
15707
|
|
16010
15708
|
static thread_ret_t ggml_graph_compute_thread(void * data) {
|
@@ -16013,7 +15711,6 @@ static thread_ret_t ggml_graph_compute_thread(void * data) {
|
|
16013
15711
|
const struct ggml_cgraph * cgraph = state->shared->cgraph;
|
16014
15712
|
const struct ggml_cplan * cplan = state->shared->cplan;
|
16015
15713
|
|
16016
|
-
const int * n_tasks_arr = cplan->n_tasks;
|
16017
15714
|
const int n_threads = state->shared->n_threads;
|
16018
15715
|
|
16019
15716
|
set_numa_thread_affinity(state->ith, n_threads);
|
@@ -16038,9 +15735,9 @@ static thread_ret_t ggml_graph_compute_thread(void * data) {
|
|
16038
15735
|
|
16039
15736
|
if (node_n != -1) {
|
16040
15737
|
/* FINALIZE */
|
16041
|
-
struct ggml_tensor * node =
|
15738
|
+
struct ggml_tensor * node = cgraph->nodes[node_n];
|
16042
15739
|
if (GGML_OP_HAS_FINALIZE[node->op]) {
|
16043
|
-
params.nth =
|
15740
|
+
params.nth = ggml_get_n_tasks(node, n_threads);
|
16044
15741
|
ggml_compute_forward(¶ms, node);
|
16045
15742
|
}
|
16046
15743
|
ggml_graph_compute_perf_stats_node(node, state->shared);
|
@@ -16051,7 +15748,7 @@ static thread_ret_t ggml_graph_compute_thread(void * data) {
|
|
16051
15748
|
GGML_PRINT_DEBUG_5("%s: %d/%d\n", __func__, node_n, cgraph->n_nodes);
|
16052
15749
|
|
16053
15750
|
struct ggml_tensor * node = cgraph->nodes[node_n];
|
16054
|
-
const int n_tasks =
|
15751
|
+
const int n_tasks = ggml_get_n_tasks(node, n_threads);
|
16055
15752
|
|
16056
15753
|
state->shared->perf_node_start_cycles = ggml_perf_cycles();
|
16057
15754
|
state->shared->perf_node_start_time_us = ggml_perf_time_us();
|
@@ -16109,7 +15806,7 @@ static thread_ret_t ggml_graph_compute_thread(void * data) {
|
|
16109
15806
|
|
16110
15807
|
/* COMPUTE */
|
16111
15808
|
struct ggml_tensor * node = cgraph->nodes[node_n];
|
16112
|
-
const int n_tasks =
|
15809
|
+
const int n_tasks = ggml_get_n_tasks(node, n_threads);
|
16113
15810
|
|
16114
15811
|
struct ggml_compute_params params = {
|
16115
15812
|
/*.type =*/ GGML_TASK_COMPUTE,
|
@@ -16143,121 +15840,46 @@ struct ggml_cplan ggml_graph_plan(struct ggml_cgraph * cgraph, int n_threads) {
|
|
16143
15840
|
|
16144
15841
|
struct ggml_tensor * node = cgraph->nodes[i];
|
16145
15842
|
|
15843
|
+
size_t cur = 0;
|
15844
|
+
|
16146
15845
|
switch (node->op) {
|
16147
15846
|
case GGML_OP_CPY:
|
16148
15847
|
case GGML_OP_DUP:
|
16149
15848
|
{
|
16150
15849
|
n_tasks = n_threads;
|
16151
15850
|
|
16152
|
-
size_t cur = 0;
|
16153
15851
|
if (ggml_is_quantized(node->type)) {
|
16154
15852
|
cur = ggml_type_size(GGML_TYPE_F32) * node->ne[0] * n_tasks;
|
16155
15853
|
}
|
16156
|
-
|
16157
|
-
work_size = MAX(work_size, cur);
|
16158
15854
|
} break;
|
16159
15855
|
case GGML_OP_ADD:
|
16160
15856
|
case GGML_OP_ADD1:
|
16161
15857
|
{
|
16162
15858
|
n_tasks = n_threads;
|
16163
15859
|
|
16164
|
-
size_t cur = 0;
|
16165
|
-
|
16166
15860
|
if (ggml_is_quantized(node->src[0]->type)) {
|
16167
15861
|
cur = ggml_type_size(GGML_TYPE_F32) * node->src[0]->ne[0] * n_tasks;
|
16168
15862
|
}
|
16169
|
-
|
16170
|
-
work_size = MAX(work_size, cur);
|
16171
15863
|
} break;
|
16172
15864
|
case GGML_OP_ACC:
|
16173
15865
|
{
|
16174
15866
|
n_tasks = n_threads;
|
16175
15867
|
|
16176
|
-
size_t cur = 0;
|
16177
|
-
|
16178
15868
|
if (ggml_is_quantized(node->src[0]->type)) {
|
16179
15869
|
cur = ggml_type_size(GGML_TYPE_F32) * node->src[1]->ne[0] * n_tasks;
|
16180
15870
|
}
|
16181
|
-
|
16182
|
-
work_size = MAX(work_size, cur);
|
16183
|
-
} break;
|
16184
|
-
case GGML_OP_SUB:
|
16185
|
-
case GGML_OP_DIV:
|
16186
|
-
case GGML_OP_SQR:
|
16187
|
-
case GGML_OP_SQRT:
|
16188
|
-
case GGML_OP_LOG:
|
16189
|
-
case GGML_OP_SUM:
|
16190
|
-
case GGML_OP_SUM_ROWS:
|
16191
|
-
case GGML_OP_MEAN:
|
16192
|
-
case GGML_OP_ARGMAX:
|
16193
|
-
case GGML_OP_REPEAT:
|
16194
|
-
case GGML_OP_REPEAT_BACK:
|
16195
|
-
{
|
16196
|
-
n_tasks = 1;
|
16197
|
-
} break;
|
16198
|
-
|
16199
|
-
case GGML_OP_UNARY:
|
16200
|
-
{
|
16201
|
-
switch (ggml_get_unary_op(node)) {
|
16202
|
-
case GGML_UNARY_OP_ABS:
|
16203
|
-
case GGML_UNARY_OP_SGN:
|
16204
|
-
case GGML_UNARY_OP_NEG:
|
16205
|
-
case GGML_UNARY_OP_STEP:
|
16206
|
-
case GGML_UNARY_OP_TANH:
|
16207
|
-
case GGML_UNARY_OP_ELU:
|
16208
|
-
case GGML_UNARY_OP_RELU:
|
16209
|
-
{
|
16210
|
-
n_tasks = 1;
|
16211
|
-
} break;
|
16212
|
-
|
16213
|
-
case GGML_UNARY_OP_GELU:
|
16214
|
-
case GGML_UNARY_OP_GELU_QUICK:
|
16215
|
-
case GGML_UNARY_OP_SILU:
|
16216
|
-
{
|
16217
|
-
n_tasks = n_threads;
|
16218
|
-
} break;
|
16219
|
-
}
|
16220
|
-
} break;
|
16221
|
-
case GGML_OP_SILU_BACK:
|
16222
|
-
case GGML_OP_MUL:
|
16223
|
-
case GGML_OP_NORM:
|
16224
|
-
case GGML_OP_RMS_NORM:
|
16225
|
-
case GGML_OP_RMS_NORM_BACK:
|
16226
|
-
case GGML_OP_GROUP_NORM:
|
16227
|
-
{
|
16228
|
-
n_tasks = n_threads;
|
16229
15871
|
} break;
|
16230
|
-
case GGML_OP_CONCAT:
|
16231
15872
|
case GGML_OP_MUL_MAT:
|
16232
15873
|
{
|
16233
|
-
n_tasks = n_threads;
|
16234
|
-
|
16235
|
-
// TODO: use different scheduling for different matrix sizes
|
16236
|
-
//const int nr0 = ggml_nrows(node->src[0]);
|
16237
|
-
//const int nr1 = ggml_nrows(node->src[1]);
|
16238
|
-
|
16239
|
-
//n_tasks = MIN(n_threads, MAX(1, nr0/128));
|
16240
|
-
//printf("nr0 = %8d, nr1 = %8d, nr0*nr1 = %8d, n_tasks%d\n", nr0, nr1, nr0*nr1, n_tasks);
|
16241
|
-
|
16242
|
-
size_t cur = 0;
|
16243
15874
|
const enum ggml_type vec_dot_type = type_traits[node->src[0]->type].vec_dot_type;
|
16244
15875
|
|
16245
|
-
#if defined(
|
16246
|
-
if (ggml_cuda_can_mul_mat(node->src[0], node->src[1], node)) {
|
16247
|
-
n_tasks = 1; // TODO: this actually is doing nothing
|
16248
|
-
// the threads are still spinning
|
16249
|
-
} else
|
16250
|
-
#elif defined(GGML_USE_CLBLAST)
|
15876
|
+
#if defined(GGML_USE_CLBLAST)
|
16251
15877
|
if (ggml_cl_can_mul_mat(node->src[0], node->src[1], node)) {
|
16252
|
-
n_tasks = 1; // TODO: this actually is doing nothing
|
16253
|
-
// the threads are still spinning
|
16254
15878
|
cur = ggml_cl_mul_mat_get_wsize(node->src[0], node->src[1], node);
|
16255
15879
|
} else
|
16256
15880
|
#endif
|
16257
15881
|
#if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS)
|
16258
15882
|
if (ggml_compute_forward_mul_mat_use_blas(node->src[0], node->src[1], node)) {
|
16259
|
-
n_tasks = 1; // TODO: this actually is doing nothing
|
16260
|
-
// the threads are still spinning
|
16261
15883
|
if (node->src[0]->type != GGML_TYPE_F32) {
|
16262
15884
|
// here we need memory just for single 2D matrix from src0
|
16263
15885
|
cur = ggml_type_size(GGML_TYPE_F32)*(node->src[0]->ne[0]*node->src[0]->ne[1]);
|
@@ -16266,108 +15888,18 @@ struct ggml_cplan ggml_graph_plan(struct ggml_cgraph * cgraph, int n_threads) {
|
|
16266
15888
|
#endif
|
16267
15889
|
if (node->src[1]->type != vec_dot_type) {
|
16268
15890
|
cur = ggml_type_size(vec_dot_type)*ggml_nelements(node->src[1])/ggml_blck_size(vec_dot_type);
|
16269
|
-
} else {
|
16270
|
-
cur = 0;
|
16271
15891
|
}
|
16272
|
-
|
16273
|
-
work_size = MAX(work_size, cur);
|
16274
15892
|
} break;
|
16275
15893
|
case GGML_OP_OUT_PROD:
|
16276
15894
|
{
|
16277
15895
|
n_tasks = n_threads;
|
16278
15896
|
|
16279
|
-
size_t cur = 0;
|
16280
|
-
|
16281
15897
|
if (ggml_is_quantized(node->src[0]->type)) {
|
16282
15898
|
cur = ggml_type_size(GGML_TYPE_F32) * node->src[0]->ne[0] * n_tasks;
|
16283
15899
|
}
|
16284
|
-
|
16285
|
-
work_size = MAX(work_size, cur);
|
16286
|
-
} break;
|
16287
|
-
case GGML_OP_SCALE:
|
16288
|
-
{
|
16289
|
-
n_tasks = 1;
|
16290
|
-
} break;
|
16291
|
-
case GGML_OP_SET:
|
16292
|
-
case GGML_OP_CONT:
|
16293
|
-
case GGML_OP_RESHAPE:
|
16294
|
-
case GGML_OP_VIEW:
|
16295
|
-
case GGML_OP_PERMUTE:
|
16296
|
-
case GGML_OP_TRANSPOSE:
|
16297
|
-
case GGML_OP_GET_ROWS:
|
16298
|
-
case GGML_OP_GET_ROWS_BACK:
|
16299
|
-
case GGML_OP_DIAG:
|
16300
|
-
{
|
16301
|
-
n_tasks = 1;
|
16302
|
-
} break;
|
16303
|
-
case GGML_OP_DIAG_MASK_ZERO:
|
16304
|
-
case GGML_OP_DIAG_MASK_INF:
|
16305
|
-
case GGML_OP_SOFT_MAX:
|
16306
|
-
case GGML_OP_SOFT_MAX_BACK:
|
16307
|
-
case GGML_OP_ROPE:
|
16308
|
-
case GGML_OP_ROPE_BACK:
|
16309
|
-
case GGML_OP_ADD_REL_POS:
|
16310
|
-
{
|
16311
|
-
n_tasks = n_threads;
|
16312
|
-
} break;
|
16313
|
-
case GGML_OP_ALIBI:
|
16314
|
-
{
|
16315
|
-
n_tasks = 1; //TODO
|
16316
|
-
} break;
|
16317
|
-
case GGML_OP_CLAMP:
|
16318
|
-
{
|
16319
|
-
n_tasks = 1; //TODO
|
16320
|
-
} break;
|
16321
|
-
case GGML_OP_CONV_1D:
|
16322
|
-
{
|
16323
|
-
n_tasks = n_threads;
|
16324
|
-
|
16325
|
-
GGML_ASSERT(node->src[0]->ne[3] == 1);
|
16326
|
-
GGML_ASSERT(node->src[1]->ne[2] == 1);
|
16327
|
-
GGML_ASSERT(node->src[1]->ne[3] == 1);
|
16328
|
-
|
16329
|
-
const int64_t ne00 = node->src[0]->ne[0];
|
16330
|
-
const int64_t ne01 = node->src[0]->ne[1];
|
16331
|
-
const int64_t ne02 = node->src[0]->ne[2];
|
16332
|
-
|
16333
|
-
const int64_t ne10 = node->src[1]->ne[0];
|
16334
|
-
const int64_t ne11 = node->src[1]->ne[1];
|
16335
|
-
|
16336
|
-
const int64_t ne0 = node->ne[0];
|
16337
|
-
const int64_t ne1 = node->ne[1];
|
16338
|
-
const int64_t nk = ne00;
|
16339
|
-
const int64_t ew0 = nk * ne01;
|
16340
|
-
|
16341
|
-
UNUSED(ne02);
|
16342
|
-
UNUSED(ne10);
|
16343
|
-
UNUSED(ne11);
|
16344
|
-
|
16345
|
-
size_t cur = 0;
|
16346
|
-
|
16347
|
-
if (node->src[0]->type == GGML_TYPE_F16 &&
|
16348
|
-
node->src[1]->type == GGML_TYPE_F32) {
|
16349
|
-
cur = sizeof(ggml_fp16_t)*(ne0*ne1*ew0);
|
16350
|
-
} else if (node->src[0]->type == GGML_TYPE_F32 &&
|
16351
|
-
node->src[1]->type == GGML_TYPE_F32) {
|
16352
|
-
cur = sizeof(float)*(ne0*ne1*ew0);
|
16353
|
-
} else {
|
16354
|
-
GGML_ASSERT(false);
|
16355
|
-
}
|
16356
|
-
|
16357
|
-
work_size = MAX(work_size, cur);
|
16358
|
-
} break;
|
16359
|
-
case GGML_OP_CONV_1D_STAGE_0:
|
16360
|
-
{
|
16361
|
-
n_tasks = n_threads;
|
16362
|
-
} break;
|
16363
|
-
case GGML_OP_CONV_1D_STAGE_1:
|
16364
|
-
{
|
16365
|
-
n_tasks = n_threads;
|
16366
15900
|
} break;
|
16367
15901
|
case GGML_OP_CONV_TRANSPOSE_1D:
|
16368
15902
|
{
|
16369
|
-
n_tasks = n_threads;
|
16370
|
-
|
16371
15903
|
GGML_ASSERT(node->src[0]->ne[3] == 1);
|
16372
15904
|
GGML_ASSERT(node->src[1]->ne[2] == 1);
|
16373
15905
|
GGML_ASSERT(node->src[1]->ne[3] == 1);
|
@@ -16379,7 +15911,6 @@ struct ggml_cplan ggml_graph_plan(struct ggml_cgraph * cgraph, int n_threads) {
|
|
16379
15911
|
const int64_t ne10 = node->src[1]->ne[0]; // L
|
16380
15912
|
const int64_t ne11 = node->src[1]->ne[1]; // Cin
|
16381
15913
|
|
16382
|
-
size_t cur = 0;
|
16383
15914
|
if (node->src[0]->type == GGML_TYPE_F16 &&
|
16384
15915
|
node->src[1]->type == GGML_TYPE_F32) {
|
16385
15916
|
cur += sizeof(ggml_fp16_t)*ne00*ne01*ne02;
|
@@ -16391,59 +15922,13 @@ struct ggml_cplan ggml_graph_plan(struct ggml_cgraph * cgraph, int n_threads) {
|
|
16391
15922
|
} else {
|
16392
15923
|
GGML_ASSERT(false);
|
16393
15924
|
}
|
16394
|
-
|
16395
|
-
work_size = MAX(work_size, cur);
|
16396
|
-
} break;
|
16397
|
-
case GGML_OP_CONV_2D:
|
16398
|
-
{
|
16399
|
-
n_tasks = n_threads;
|
16400
|
-
|
16401
|
-
const int64_t ne00 = node->src[0]->ne[0]; // W
|
16402
|
-
const int64_t ne01 = node->src[0]->ne[1]; // H
|
16403
|
-
const int64_t ne02 = node->src[0]->ne[2]; // C
|
16404
|
-
const int64_t ne03 = node->src[0]->ne[3]; // N
|
16405
|
-
|
16406
|
-
const int64_t ne10 = node->src[1]->ne[0]; // W
|
16407
|
-
const int64_t ne11 = node->src[1]->ne[1]; // H
|
16408
|
-
const int64_t ne12 = node->src[1]->ne[2]; // C
|
16409
|
-
|
16410
|
-
const int64_t ne0 = node->ne[0];
|
16411
|
-
const int64_t ne1 = node->ne[1];
|
16412
|
-
const int64_t ne2 = node->ne[2];
|
16413
|
-
const int64_t ne3 = node->ne[3];
|
16414
|
-
const int64_t nk = ne00*ne01;
|
16415
|
-
const int64_t ew0 = nk * ne02;
|
16416
|
-
|
16417
|
-
UNUSED(ne03);
|
16418
|
-
UNUSED(ne2);
|
16419
|
-
|
16420
|
-
size_t cur = 0;
|
16421
|
-
|
16422
|
-
if (node->src[0]->type == GGML_TYPE_F16 &&
|
16423
|
-
node->src[1]->type == GGML_TYPE_F32) {
|
16424
|
-
// im2col: [N*OH*OW, IC*KH*KW]
|
16425
|
-
cur = sizeof(ggml_fp16_t)*(ne3*ne0*ne1*ew0);
|
16426
|
-
} else if (node->src[0]->type == GGML_TYPE_F32 &&
|
16427
|
-
node->src[1]->type == GGML_TYPE_F32) {
|
16428
|
-
cur = sizeof(float)* (ne10*ne11*ne12);
|
16429
|
-
} else {
|
16430
|
-
GGML_ASSERT(false);
|
16431
|
-
}
|
16432
|
-
|
16433
|
-
work_size = MAX(work_size, cur);
|
16434
|
-
} break;
|
16435
|
-
case GGML_OP_CONV_2D_STAGE_0:
|
16436
|
-
{
|
16437
|
-
n_tasks = n_threads;
|
16438
15925
|
} break;
|
16439
|
-
case
|
15926
|
+
case GGML_OP_IM2COL:
|
16440
15927
|
{
|
16441
15928
|
n_tasks = n_threads;
|
16442
15929
|
} break;
|
16443
15930
|
case GGML_OP_CONV_TRANSPOSE_2D:
|
16444
15931
|
{
|
16445
|
-
n_tasks = n_threads;
|
16446
|
-
|
16447
15932
|
const int64_t ne00 = node->src[0]->ne[0]; // W
|
16448
15933
|
const int64_t ne01 = node->src[0]->ne[1]; // H
|
16449
15934
|
const int64_t ne02 = node->src[0]->ne[2]; // Channels Out
|
@@ -16453,141 +15938,66 @@ struct ggml_cplan ggml_graph_plan(struct ggml_cgraph * cgraph, int n_threads) {
|
|
16453
15938
|
const int64_t ne11 = node->src[1]->ne[1]; // H
|
16454
15939
|
const int64_t ne12 = node->src[1]->ne[2]; // Channels In
|
16455
15940
|
|
16456
|
-
size_t cur = 0;
|
16457
15941
|
cur += sizeof(ggml_fp16_t)*ne00*ne01*ne02*ne03;
|
16458
15942
|
cur += sizeof(ggml_fp16_t)*ne10*ne11*ne12;
|
16459
|
-
|
16460
|
-
work_size = MAX(work_size, cur);
|
16461
|
-
} break;
|
16462
|
-
case GGML_OP_POOL_1D:
|
16463
|
-
case GGML_OP_POOL_2D:
|
16464
|
-
{
|
16465
|
-
n_tasks = 1;
|
16466
|
-
} break;
|
16467
|
-
case GGML_OP_UPSCALE:
|
16468
|
-
{
|
16469
|
-
n_tasks = n_threads;
|
16470
15943
|
} break;
|
16471
15944
|
case GGML_OP_FLASH_ATTN:
|
16472
15945
|
{
|
16473
15946
|
n_tasks = n_threads;
|
16474
15947
|
|
16475
|
-
size_t cur = 0;
|
16476
|
-
|
16477
15948
|
const int64_t ne11 = ggml_up(node->src[1]->ne[1], GGML_SOFT_MAX_UNROLL);
|
16478
15949
|
|
16479
15950
|
if (node->src[1]->type == GGML_TYPE_F32) {
|
16480
15951
|
cur = sizeof(float)*ne11*n_tasks; // TODO: this can become (n_tasks-1)
|
16481
15952
|
cur += sizeof(float)*ne11*n_tasks; // this is overestimated by x2
|
16482
|
-
}
|
16483
|
-
|
16484
|
-
if (node->src[1]->type == GGML_TYPE_F16) {
|
15953
|
+
} else if (node->src[1]->type == GGML_TYPE_F16) {
|
16485
15954
|
cur = sizeof(float)*ne11*n_tasks; // TODO: this can become (n_tasks-1)
|
16486
15955
|
cur += sizeof(float)*ne11*n_tasks; // this is overestimated by x2
|
16487
15956
|
}
|
16488
|
-
|
16489
|
-
work_size = MAX(work_size, cur);
|
16490
15957
|
} break;
|
16491
15958
|
case GGML_OP_FLASH_FF:
|
16492
15959
|
{
|
16493
15960
|
n_tasks = n_threads;
|
16494
15961
|
|
16495
|
-
size_t cur = 0;
|
16496
|
-
|
16497
15962
|
if (node->src[1]->type == GGML_TYPE_F32) {
|
16498
15963
|
cur = sizeof(float)*node->src[1]->ne[1]*n_tasks; // TODO: this can become (n_tasks-1)
|
16499
15964
|
cur += sizeof(float)*node->src[1]->ne[1]*n_tasks; // this is overestimated by x2
|
16500
|
-
}
|
16501
|
-
|
16502
|
-
if (node->src[1]->type == GGML_TYPE_F16) {
|
15965
|
+
} else if (node->src[1]->type == GGML_TYPE_F16) {
|
16503
15966
|
cur = sizeof(float)*node->src[1]->ne[1]*n_tasks; // TODO: this can become (n_tasks-1)
|
16504
15967
|
cur += sizeof(float)*node->src[1]->ne[1]*n_tasks; // this is overestimated by x2
|
16505
15968
|
}
|
16506
|
-
|
16507
|
-
work_size = MAX(work_size, cur);
|
16508
15969
|
} break;
|
16509
15970
|
case GGML_OP_FLASH_ATTN_BACK:
|
16510
15971
|
{
|
16511
15972
|
n_tasks = n_threads;
|
16512
15973
|
|
16513
|
-
size_t cur = 0;
|
16514
|
-
|
16515
15974
|
const int64_t D = node->src[0]->ne[0];
|
16516
15975
|
const int64_t ne11 = ggml_up(node->src[1]->ne[1], GGML_SOFT_MAX_UNROLL);
|
16517
15976
|
const int64_t mxDn = MAX(D, ne11) * 2; // *2 because of S and SM in ggml_compute_forward_flash_attn_back
|
16518
15977
|
if (node->src[1]->type == GGML_TYPE_F32) {
|
16519
15978
|
cur = sizeof(float)*mxDn*n_tasks; // TODO: this can become (n_tasks-1)
|
16520
15979
|
cur += sizeof(float)*mxDn*n_tasks; // this is overestimated by x2
|
16521
|
-
}
|
16522
|
-
|
16523
|
-
if (node->src[1]->type == GGML_TYPE_F16) {
|
15980
|
+
} else if (node->src[1]->type == GGML_TYPE_F16) {
|
16524
15981
|
cur = sizeof(float)*mxDn*n_tasks; // TODO: this can become (n_tasks-1)
|
16525
15982
|
cur += sizeof(float)*mxDn*n_tasks; // this is overestimated by x2
|
16526
15983
|
}
|
16527
|
-
|
16528
|
-
work_size = MAX(work_size, cur);
|
16529
|
-
} break;
|
16530
|
-
case GGML_OP_WIN_PART:
|
16531
|
-
case GGML_OP_WIN_UNPART:
|
16532
|
-
case GGML_OP_GET_REL_POS:
|
16533
|
-
case GGML_OP_MAP_UNARY:
|
16534
|
-
case GGML_OP_MAP_BINARY:
|
16535
|
-
case GGML_OP_MAP_CUSTOM1_F32:
|
16536
|
-
case GGML_OP_MAP_CUSTOM2_F32:
|
16537
|
-
case GGML_OP_MAP_CUSTOM3_F32:
|
16538
|
-
{
|
16539
|
-
n_tasks = 1;
|
16540
|
-
} break;
|
16541
|
-
case GGML_OP_MAP_CUSTOM1:
|
16542
|
-
{
|
16543
|
-
struct ggml_map_custom1_op_params * p = (struct ggml_map_custom1_op_params *) node->op_params;
|
16544
|
-
if (p->n_tasks == GGML_N_TASKS_MAX) {
|
16545
|
-
n_tasks = n_threads;
|
16546
|
-
} else {
|
16547
|
-
n_tasks = MIN(p->n_tasks, n_threads);
|
16548
|
-
}
|
16549
|
-
} break;
|
16550
|
-
case GGML_OP_MAP_CUSTOM2:
|
16551
|
-
{
|
16552
|
-
struct ggml_map_custom2_op_params * p = (struct ggml_map_custom2_op_params *) node->op_params;
|
16553
|
-
if (p->n_tasks == GGML_N_TASKS_MAX) {
|
16554
|
-
n_tasks = n_threads;
|
16555
|
-
} else {
|
16556
|
-
n_tasks = MIN(p->n_tasks, n_threads);
|
16557
|
-
}
|
16558
|
-
} break;
|
16559
|
-
case GGML_OP_MAP_CUSTOM3:
|
16560
|
-
{
|
16561
|
-
struct ggml_map_custom3_op_params * p = (struct ggml_map_custom3_op_params *) node->op_params;
|
16562
|
-
if (p->n_tasks == GGML_N_TASKS_MAX) {
|
16563
|
-
n_tasks = n_threads;
|
16564
|
-
} else {
|
16565
|
-
n_tasks = MIN(p->n_tasks, n_threads);
|
16566
|
-
}
|
16567
15984
|
} break;
|
15985
|
+
|
16568
15986
|
case GGML_OP_CROSS_ENTROPY_LOSS:
|
16569
15987
|
{
|
16570
15988
|
n_tasks = n_threads;
|
16571
15989
|
|
16572
|
-
|
16573
|
-
|
16574
|
-
work_size = MAX(work_size, cur);
|
16575
|
-
} break;
|
16576
|
-
case GGML_OP_CROSS_ENTROPY_LOSS_BACK:
|
16577
|
-
{
|
16578
|
-
n_tasks = n_threads;
|
16579
|
-
} break;
|
16580
|
-
case GGML_OP_NONE:
|
16581
|
-
{
|
16582
|
-
n_tasks = 1;
|
15990
|
+
cur = ggml_type_size(node->type)*(n_tasks + node->src[0]->ne[0]*n_tasks);
|
16583
15991
|
} break;
|
16584
15992
|
case GGML_OP_COUNT:
|
16585
15993
|
{
|
16586
15994
|
GGML_ASSERT(false);
|
16587
15995
|
} break;
|
15996
|
+
default:
|
15997
|
+
break;
|
16588
15998
|
}
|
16589
15999
|
|
16590
|
-
|
16000
|
+
work_size = MAX(work_size, cur);
|
16591
16001
|
}
|
16592
16002
|
|
16593
16003
|
if (work_size > 0) {
|
@@ -16609,12 +16019,6 @@ int ggml_graph_compute(struct ggml_cgraph * cgraph, struct ggml_cplan * cplan) {
|
|
16609
16019
|
if (cplan->work_size > 0) {
|
16610
16020
|
GGML_ASSERT(cplan->work_data);
|
16611
16021
|
}
|
16612
|
-
|
16613
|
-
for (int i = 0; i < cgraph->n_nodes; ++i) {
|
16614
|
-
if (cgraph->nodes[i]->op != GGML_OP_NONE) {
|
16615
|
-
GGML_ASSERT(cplan->n_tasks[i] > 0);
|
16616
|
-
}
|
16617
|
-
}
|
16618
16022
|
}
|
16619
16023
|
|
16620
16024
|
const int n_threads = cplan->n_threads;
|
@@ -16687,16 +16091,6 @@ int ggml_graph_compute(struct ggml_cgraph * cgraph, struct ggml_cplan * cplan) {
|
|
16687
16091
|
return compute_status;
|
16688
16092
|
}
|
16689
16093
|
|
16690
|
-
void ggml_graph_reset(struct ggml_cgraph * cgraph) {
|
16691
|
-
for (int i = 0; i < cgraph->n_nodes; i++) {
|
16692
|
-
struct ggml_tensor * grad = cgraph->grads[i];
|
16693
|
-
|
16694
|
-
if (grad) {
|
16695
|
-
ggml_set_zero(grad);
|
16696
|
-
}
|
16697
|
-
}
|
16698
|
-
}
|
16699
|
-
|
16700
16094
|
void ggml_graph_compute_with_ctx(struct ggml_context * ctx, struct ggml_cgraph * cgraph, int n_threads) {
|
16701
16095
|
struct ggml_cplan cplan = ggml_graph_plan(cgraph, n_threads);
|
16702
16096
|
|
@@ -16823,12 +16217,12 @@ void ggml_graph_export(const struct ggml_cgraph * cgraph, const char * fname) {
|
|
16823
16217
|
const uint32_t magic = GGML_FILE_MAGIC;
|
16824
16218
|
const uint32_t version = GGML_FILE_VERSION;
|
16825
16219
|
const uint32_t n_leafs = cgraph->n_leafs;
|
16826
|
-
const uint32_t
|
16220
|
+
const uint32_t n_nodes = cgraph->n_nodes;
|
16827
16221
|
|
16828
16222
|
fwrite(&magic, sizeof(uint32_t), 1, fout);
|
16829
16223
|
fwrite(&version, sizeof(uint32_t), 1, fout);
|
16830
16224
|
fwrite(&n_leafs, sizeof(uint32_t), 1, fout);
|
16831
|
-
fwrite(&
|
16225
|
+
fwrite(&n_nodes, sizeof(uint32_t), 1, fout);
|
16832
16226
|
fwrite(&size_eval, sizeof(uint64_t), 1, fout);
|
16833
16227
|
}
|
16834
16228
|
|
@@ -16916,7 +16310,7 @@ void ggml_graph_export(const struct ggml_cgraph * cgraph, const char * fname) {
|
|
16916
16310
|
if (idx == -1) {
|
16917
16311
|
for (int k = 0; k < cgraph->n_nodes; ++k) {
|
16918
16312
|
if (args[j] == cgraph->nodes[k]) {
|
16919
|
-
idx =
|
16313
|
+
idx = cgraph->n_leafs + k;
|
16920
16314
|
break;
|
16921
16315
|
}
|
16922
16316
|
}
|
@@ -16943,11 +16337,11 @@ void ggml_graph_export(const struct ggml_cgraph * cgraph, const char * fname) {
|
|
16943
16337
|
}
|
16944
16338
|
}
|
16945
16339
|
|
16946
|
-
struct ggml_cgraph ggml_graph_import(const char * fname, struct ggml_context ** ctx_data, struct ggml_context ** ctx_eval) {
|
16340
|
+
struct ggml_cgraph * ggml_graph_import(const char * fname, struct ggml_context ** ctx_data, struct ggml_context ** ctx_eval) {
|
16947
16341
|
assert(*ctx_data == NULL);
|
16948
16342
|
assert(*ctx_eval == NULL);
|
16949
16343
|
|
16950
|
-
struct ggml_cgraph result =
|
16344
|
+
struct ggml_cgraph * result = NULL;
|
16951
16345
|
|
16952
16346
|
struct ggml_tensor * data = NULL;
|
16953
16347
|
|
@@ -17019,13 +16413,11 @@ struct ggml_cgraph ggml_graph_import(const char * fname, struct ggml_context **
|
|
17019
16413
|
const uint32_t n_leafs = *(const uint32_t *) ptr; ptr += sizeof(n_leafs);
|
17020
16414
|
const uint32_t n_nodes = *(const uint32_t *) ptr; ptr += sizeof(n_nodes);
|
17021
16415
|
const uint64_t size_eval = *(const uint64_t *) ptr; ptr += sizeof(size_eval);
|
17022
|
-
|
17023
|
-
result.n_leafs = n_leafs;
|
17024
|
-
result.n_nodes = n_nodes;
|
16416
|
+
const int graph_size = MAX(n_leafs, n_nodes);
|
17025
16417
|
|
17026
16418
|
// create the data context
|
17027
16419
|
{
|
17028
|
-
const size_t overhead = (n_leafs + n_nodes)*ggml_tensor_overhead();
|
16420
|
+
const size_t overhead = (n_leafs + n_nodes)*ggml_tensor_overhead() + ggml_graph_overhead_custom(graph_size, false);
|
17029
16421
|
|
17030
16422
|
struct ggml_init_params params = {
|
17031
16423
|
.mem_size = size_eval + overhead,
|
@@ -17041,6 +16433,12 @@ struct ggml_cgraph ggml_graph_import(const char * fname, struct ggml_context **
|
|
17041
16433
|
}
|
17042
16434
|
}
|
17043
16435
|
|
16436
|
+
result = ggml_new_graph_custom(*ctx_eval, graph_size, false);
|
16437
|
+
|
16438
|
+
result->n_leafs = n_leafs;
|
16439
|
+
result->n_nodes = n_nodes;
|
16440
|
+
|
16441
|
+
|
17044
16442
|
// leafs
|
17045
16443
|
{
|
17046
16444
|
uint32_t type;
|
@@ -17079,7 +16477,7 @@ struct ggml_cgraph ggml_graph_import(const char * fname, struct ggml_context **
|
|
17079
16477
|
tensor->nb[j] = nb[j];
|
17080
16478
|
}
|
17081
16479
|
|
17082
|
-
result
|
16480
|
+
result->leafs[i] = tensor;
|
17083
16481
|
|
17084
16482
|
ptr += ggml_nbytes(tensor);
|
17085
16483
|
|
@@ -17131,10 +16529,10 @@ struct ggml_cgraph ggml_graph_import(const char * fname, struct ggml_context **
|
|
17131
16529
|
continue;
|
17132
16530
|
}
|
17133
16531
|
|
17134
|
-
if (arg_idx <
|
17135
|
-
args[j] = result
|
16532
|
+
if (arg_idx < result->n_leafs) {
|
16533
|
+
args[j] = result->leafs[arg_idx];
|
17136
16534
|
} else {
|
17137
|
-
args[j] = result
|
16535
|
+
args[j] = result->nodes[arg_idx - result->n_leafs];
|
17138
16536
|
}
|
17139
16537
|
}
|
17140
16538
|
|
@@ -17186,7 +16584,7 @@ struct ggml_cgraph ggml_graph_import(const char * fname, struct ggml_context **
|
|
17186
16584
|
tensor->src[j] = args[j];
|
17187
16585
|
}
|
17188
16586
|
|
17189
|
-
result
|
16587
|
+
result->nodes[i] = tensor;
|
17190
16588
|
|
17191
16589
|
fprintf(stderr, "%s: loaded node %d: '%16s', %3d dims, %9zu bytes\n", __func__, i, tensor->name, n_dims, ggml_nbytes(tensor));
|
17192
16590
|
}
|
@@ -18091,10 +17489,11 @@ struct ggml_opt_params ggml_opt_default_params(enum ggml_opt_type type) {
|
|
18091
17489
|
case GGML_OPT_ADAM:
|
18092
17490
|
{
|
18093
17491
|
result = (struct ggml_opt_params) {
|
18094
|
-
.type
|
18095
|
-
.
|
18096
|
-
.
|
18097
|
-
.
|
17492
|
+
.type = GGML_OPT_ADAM,
|
17493
|
+
.graph_size = GGML_DEFAULT_GRAPH_SIZE,
|
17494
|
+
.n_threads = 1, // FIXME: GGML_DEFAULT_N_THREADS ?
|
17495
|
+
.past = 0,
|
17496
|
+
.delta = 1e-5f,
|
18098
17497
|
|
18099
17498
|
.max_no_improvement = 100,
|
18100
17499
|
|
@@ -18121,10 +17520,11 @@ struct ggml_opt_params ggml_opt_default_params(enum ggml_opt_type type) {
|
|
18121
17520
|
case GGML_OPT_LBFGS:
|
18122
17521
|
{
|
18123
17522
|
result = (struct ggml_opt_params) {
|
18124
|
-
.type
|
18125
|
-
.
|
18126
|
-
.
|
18127
|
-
.
|
17523
|
+
.type = GGML_OPT_LBFGS,
|
17524
|
+
.graph_size = GGML_DEFAULT_GRAPH_SIZE,
|
17525
|
+
.n_threads = 1,
|
17526
|
+
.past = 0,
|
17527
|
+
.delta = 1e-5f,
|
18128
17528
|
|
18129
17529
|
.max_no_improvement = 0,
|
18130
17530
|
|
@@ -18266,14 +17666,11 @@ enum ggml_opt_result ggml_opt_resume(
|
|
18266
17666
|
struct ggml_tensor * f) {
|
18267
17667
|
|
18268
17668
|
// build forward + backward compute graphs
|
18269
|
-
struct
|
18270
|
-
|
18271
|
-
|
18272
|
-
struct ggml_cgraph * gf = (struct ggml_cgraph *) gfbuf->data;
|
18273
|
-
struct ggml_cgraph * gb = (struct ggml_cgraph *) gbbuf->data;
|
17669
|
+
struct ggml_cgraph * gf = ggml_new_graph_custom(ctx, opt->params.graph_size, true);
|
17670
|
+
ggml_build_forward_expand(gf, f);
|
18274
17671
|
|
18275
|
-
*
|
18276
|
-
|
17672
|
+
struct ggml_cgraph * gb = ggml_graph_dup(ctx, gf);
|
17673
|
+
ggml_build_backward_expand(ctx, gf, gb, true);
|
18277
17674
|
|
18278
17675
|
return ggml_opt_resume_g(ctx, opt, f, gf, gb, NULL, NULL);
|
18279
17676
|
}
|
@@ -18729,7 +18126,7 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
|
|
18729
18126
|
{
|
18730
18127
|
ctx->kv = malloc(ctx->header.n_kv * sizeof(struct gguf_kv));
|
18731
18128
|
|
18732
|
-
for (
|
18129
|
+
for (uint64_t i = 0; i < ctx->header.n_kv; ++i) {
|
18733
18130
|
struct gguf_kv * kv = &ctx->kv[i];
|
18734
18131
|
|
18735
18132
|
//fprintf(stderr, "%s: reading kv %d\n", __func__, i);
|
@@ -18776,7 +18173,7 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
|
|
18776
18173
|
case GGUF_TYPE_STRING:
|
18777
18174
|
{
|
18778
18175
|
kv->value.arr.data = malloc(kv->value.arr.n * sizeof(struct gguf_str));
|
18779
|
-
for (
|
18176
|
+
for (uint64_t j = 0; j < kv->value.arr.n; ++j) {
|
18780
18177
|
ok = ok && gguf_fread_str(file, &((struct gguf_str *) kv->value.arr.data)[j], &offset);
|
18781
18178
|
}
|
18782
18179
|
} break;
|
@@ -18804,7 +18201,7 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
|
|
18804
18201
|
{
|
18805
18202
|
ctx->infos = malloc(ctx->header.n_tensors * sizeof(struct gguf_tensor_info));
|
18806
18203
|
|
18807
|
-
for (
|
18204
|
+
for (uint64_t i = 0; i < ctx->header.n_tensors; ++i) {
|
18808
18205
|
struct gguf_tensor_info * info = &ctx->infos[i];
|
18809
18206
|
|
18810
18207
|
for (int j = 0; j < GGML_MAX_DIMS; ++j) {
|
@@ -18851,7 +18248,7 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
|
|
18851
18248
|
// compute the total size of the data section, taking into account the alignment
|
18852
18249
|
{
|
18853
18250
|
ctx->size = 0;
|
18854
|
-
for (
|
18251
|
+
for (uint64_t i = 0; i < ctx->header.n_tensors; ++i) {
|
18855
18252
|
struct gguf_tensor_info * info = &ctx->infos[i];
|
18856
18253
|
|
18857
18254
|
const int64_t ne =
|
@@ -18920,7 +18317,7 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
|
|
18920
18317
|
ggml_set_no_alloc(ctx_data, true);
|
18921
18318
|
|
18922
18319
|
// create the tensors
|
18923
|
-
for (
|
18320
|
+
for (uint64_t i = 0; i < ctx->header.n_tensors; ++i) {
|
18924
18321
|
const int64_t ne[GGML_MAX_DIMS] = {
|
18925
18322
|
ctx->infos[i].ne[0],
|
18926
18323
|
ctx->infos[i].ne[1],
|
@@ -19055,24 +18452,29 @@ int gguf_find_key(const struct gguf_context * ctx, const char * key) {
|
|
19055
18452
|
}
|
19056
18453
|
|
19057
18454
|
const char * gguf_get_key(const struct gguf_context * ctx, int key_id) {
|
18455
|
+
GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
|
19058
18456
|
return ctx->kv[key_id].key.data;
|
19059
18457
|
}
|
19060
18458
|
|
19061
18459
|
enum gguf_type gguf_get_kv_type(const struct gguf_context * ctx, int key_id) {
|
18460
|
+
GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
|
19062
18461
|
return ctx->kv[key_id].type;
|
19063
18462
|
}
|
19064
18463
|
|
19065
18464
|
enum gguf_type gguf_get_arr_type(const struct gguf_context * ctx, int key_id) {
|
18465
|
+
GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
|
19066
18466
|
GGML_ASSERT(ctx->kv[key_id].type == GGUF_TYPE_ARRAY);
|
19067
18467
|
return ctx->kv[key_id].value.arr.type;
|
19068
18468
|
}
|
19069
18469
|
|
19070
18470
|
const void * gguf_get_arr_data(const struct gguf_context * ctx, int key_id) {
|
18471
|
+
GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
|
19071
18472
|
GGML_ASSERT(ctx->kv[key_id].type == GGUF_TYPE_ARRAY);
|
19072
18473
|
return ctx->kv[key_id].value.arr.data;
|
19073
18474
|
}
|
19074
18475
|
|
19075
18476
|
const char * gguf_get_arr_str(const struct gguf_context * ctx, int key_id, int i) {
|
18477
|
+
GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
|
19076
18478
|
GGML_ASSERT(ctx->kv[key_id].type == GGUF_TYPE_ARRAY);
|
19077
18479
|
struct gguf_kv * kv = &ctx->kv[key_id];
|
19078
18480
|
struct gguf_str * str = &((struct gguf_str *) kv->value.arr.data)[i];
|
@@ -19080,70 +18482,90 @@ const char * gguf_get_arr_str(const struct gguf_context * ctx, int key_id, int i
|
|
19080
18482
|
}
|
19081
18483
|
|
19082
18484
|
int gguf_get_arr_n(const struct gguf_context * ctx, int key_id) {
|
18485
|
+
GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
|
19083
18486
|
GGML_ASSERT(ctx->kv[key_id].type == GGUF_TYPE_ARRAY);
|
19084
18487
|
return ctx->kv[key_id].value.arr.n;
|
19085
18488
|
}
|
19086
18489
|
|
19087
18490
|
uint8_t gguf_get_val_u8(const struct gguf_context * ctx, int key_id) {
|
18491
|
+
GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
|
19088
18492
|
GGML_ASSERT(ctx->kv[key_id].type == GGUF_TYPE_UINT8);
|
19089
18493
|
return ctx->kv[key_id].value.uint8;
|
19090
18494
|
}
|
19091
18495
|
|
19092
18496
|
int8_t gguf_get_val_i8(const struct gguf_context * ctx, int key_id) {
|
18497
|
+
GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
|
19093
18498
|
GGML_ASSERT(ctx->kv[key_id].type == GGUF_TYPE_INT8);
|
19094
18499
|
return ctx->kv[key_id].value.int8;
|
19095
18500
|
}
|
19096
18501
|
|
19097
18502
|
uint16_t gguf_get_val_u16(const struct gguf_context * ctx, int key_id) {
|
18503
|
+
GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
|
19098
18504
|
GGML_ASSERT(ctx->kv[key_id].type == GGUF_TYPE_UINT16);
|
19099
18505
|
return ctx->kv[key_id].value.uint16;
|
19100
18506
|
}
|
19101
18507
|
|
19102
18508
|
int16_t gguf_get_val_i16(const struct gguf_context * ctx, int key_id) {
|
18509
|
+
GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
|
19103
18510
|
GGML_ASSERT(ctx->kv[key_id].type == GGUF_TYPE_INT16);
|
19104
18511
|
return ctx->kv[key_id].value.int16;
|
19105
18512
|
}
|
19106
18513
|
|
19107
18514
|
uint32_t gguf_get_val_u32(const struct gguf_context * ctx, int key_id) {
|
18515
|
+
GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
|
19108
18516
|
GGML_ASSERT(ctx->kv[key_id].type == GGUF_TYPE_UINT32);
|
19109
18517
|
return ctx->kv[key_id].value.uint32;
|
19110
18518
|
}
|
19111
18519
|
|
19112
18520
|
int32_t gguf_get_val_i32(const struct gguf_context * ctx, int key_id) {
|
18521
|
+
GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
|
19113
18522
|
GGML_ASSERT(ctx->kv[key_id].type == GGUF_TYPE_INT32);
|
19114
18523
|
return ctx->kv[key_id].value.int32;
|
19115
18524
|
}
|
19116
18525
|
|
19117
18526
|
float gguf_get_val_f32(const struct gguf_context * ctx, int key_id) {
|
18527
|
+
GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
|
19118
18528
|
GGML_ASSERT(ctx->kv[key_id].type == GGUF_TYPE_FLOAT32);
|
19119
18529
|
return ctx->kv[key_id].value.float32;
|
19120
18530
|
}
|
19121
18531
|
|
19122
18532
|
uint64_t gguf_get_val_u64(const struct gguf_context * ctx, int key_id) {
|
18533
|
+
GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
|
19123
18534
|
GGML_ASSERT(ctx->kv[key_id].type == GGUF_TYPE_UINT64);
|
19124
18535
|
return ctx->kv[key_id].value.uint64;
|
19125
18536
|
}
|
19126
18537
|
|
19127
18538
|
int64_t gguf_get_val_i64(const struct gguf_context * ctx, int key_id) {
|
18539
|
+
GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
|
19128
18540
|
GGML_ASSERT(ctx->kv[key_id].type == GGUF_TYPE_INT64);
|
19129
18541
|
return ctx->kv[key_id].value.int64;
|
19130
18542
|
}
|
19131
18543
|
|
19132
18544
|
double gguf_get_val_f64(const struct gguf_context * ctx, int key_id) {
|
18545
|
+
GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
|
19133
18546
|
GGML_ASSERT(ctx->kv[key_id].type == GGUF_TYPE_FLOAT64);
|
19134
18547
|
return ctx->kv[key_id].value.float64;
|
19135
18548
|
}
|
19136
18549
|
|
19137
18550
|
bool gguf_get_val_bool(const struct gguf_context * ctx, int key_id) {
|
18551
|
+
GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
|
19138
18552
|
GGML_ASSERT(ctx->kv[key_id].type == GGUF_TYPE_BOOL);
|
19139
18553
|
return ctx->kv[key_id].value.bool_;
|
19140
18554
|
}
|
19141
18555
|
|
19142
18556
|
const char * gguf_get_val_str(const struct gguf_context * ctx, int key_id) {
|
18557
|
+
GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
|
19143
18558
|
GGML_ASSERT(ctx->kv[key_id].type == GGUF_TYPE_STRING);
|
19144
18559
|
return ctx->kv[key_id].value.str.data;
|
19145
18560
|
}
|
19146
18561
|
|
18562
|
+
const void * gguf_get_val_data(const struct gguf_context * ctx, int key_id) {
|
18563
|
+
GGML_ASSERT(key_id >= 0 && key_id < gguf_get_n_kv(ctx));
|
18564
|
+
GGML_ASSERT(ctx->kv[key_id].type != GGUF_TYPE_ARRAY);
|
18565
|
+
GGML_ASSERT(ctx->kv[key_id].type != GGUF_TYPE_STRING);
|
18566
|
+
return &ctx->kv[key_id].value;
|
18567
|
+
}
|
18568
|
+
|
19147
18569
|
int gguf_get_n_tensors(const struct gguf_context * ctx) {
|
19148
18570
|
return ctx->header.n_tensors;
|
19149
18571
|
}
|