llama_cpp 0.3.3 → 0.3.5
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +31 -0
- data/ext/llama_cpp/extconf.rb +1 -0
- data/ext/llama_cpp/llama_cpp.cpp +439 -9
- data/ext/llama_cpp/src/ggml-cuda.cu +759 -136
- data/ext/llama_cpp/src/ggml-metal.h +7 -0
- data/ext/llama_cpp/src/ggml-metal.m +250 -111
- data/ext/llama_cpp/src/ggml-metal.metal +614 -483
- data/ext/llama_cpp/src/ggml.c +793 -1032
- data/ext/llama_cpp/src/ggml.h +95 -18
- data/ext/llama_cpp/src/k_quants.c +327 -3
- data/ext/llama_cpp/src/k_quants.h +8 -0
- data/ext/llama_cpp/src/llama.cpp +626 -166
- data/ext/llama_cpp/src/llama.h +94 -10
- data/lib/llama_cpp/version.rb +2 -2
- data/lib/llama_cpp.rb +1 -0
- data/sig/llama_cpp.rbs +36 -1
- metadata +2 -2
data/ext/llama_cpp/src/ggml.c
CHANGED
@@ -31,11 +31,17 @@
|
|
31
31
|
#include <unistd.h>
|
32
32
|
#endif
|
33
33
|
|
34
|
+
// static_assert should be a #define, but if it's not,
|
35
|
+
// fall back to the _Static_assert C11 keyword.
|
34
36
|
// if C99 - static_assert is noop
|
35
37
|
// ref: https://stackoverflow.com/a/53923785/4039976
|
36
38
|
#ifndef static_assert
|
39
|
+
#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201100L)
|
40
|
+
#define static_assert(cond, msg) _Static_assert(cond, msg)
|
41
|
+
#else
|
37
42
|
#define static_assert(cond, msg) struct global_scope_noop_trick
|
38
43
|
#endif
|
44
|
+
#endif
|
39
45
|
|
40
46
|
#if defined(_MSC_VER)
|
41
47
|
// disable "possible loss of data" to avoid hundreds of casts
|
@@ -112,10 +118,6 @@ typedef void * thread_ret_t;
|
|
112
118
|
#endif
|
113
119
|
#endif
|
114
120
|
|
115
|
-
#ifdef __HAIKU__
|
116
|
-
#define static_assert(cond, msg) _Static_assert(cond, msg)
|
117
|
-
#endif
|
118
|
-
|
119
121
|
/*#define GGML_PERF*/
|
120
122
|
#define GGML_DEBUG 0
|
121
123
|
#define GGML_GELU_FP16
|
@@ -3438,7 +3440,9 @@ inline static void ggml_vec_mad_f32(const int n, float * restrict y, const float
|
|
3438
3440
|
|
3439
3441
|
//inline static void ggml_vec_scale_f32(const int n, float * y, const float v) { for (int i = 0; i < n; ++i) y[i] *= v; }
|
3440
3442
|
inline static void ggml_vec_scale_f32(const int n, float * y, const float v) {
|
3441
|
-
#if defined(
|
3443
|
+
#if defined(GGML_USE_ACCELERATE)
|
3444
|
+
vDSP_vsmul(y, 1, &v, y, 1, n);
|
3445
|
+
#elif defined(GGML_SIMD)
|
3442
3446
|
const int np = (n & ~(GGML_F32_STEP - 1));
|
3443
3447
|
|
3444
3448
|
GGML_F32_VEC vx = GGML_F32_VEC_SET1(v);
|
@@ -3601,7 +3605,7 @@ inline static void ggml_vec_sum_f32(const int n, float * s, const float * x) {
|
|
3601
3605
|
#endif
|
3602
3606
|
}
|
3603
3607
|
|
3604
|
-
inline static void
|
3608
|
+
inline static void ggml_vec_sum_f32_ggf(const int n, ggml_float * s, const float * x) {
|
3605
3609
|
ggml_float sum = 0.0;
|
3606
3610
|
for (int i = 0; i < n; ++i) {
|
3607
3611
|
sum += (ggml_float)x[i];
|
@@ -3609,6 +3613,14 @@ inline static void ggml_vec_sum_ggf(const int n, ggml_float * s, const float * x
|
|
3609
3613
|
*s = sum;
|
3610
3614
|
}
|
3611
3615
|
|
3616
|
+
inline static void ggml_vec_sum_f16_ggf(const int n, float * s, const ggml_fp16_t * x) {
|
3617
|
+
float sum = 0.0f;
|
3618
|
+
for (int i = 0; i < n; ++i) {
|
3619
|
+
sum += GGML_FP16_TO_FP32(x[i]);
|
3620
|
+
}
|
3621
|
+
*s = sum;
|
3622
|
+
}
|
3623
|
+
|
3612
3624
|
inline static void ggml_vec_max_f32(const int n, float * s, const float * x) {
|
3613
3625
|
#ifndef GGML_USE_ACCELERATE
|
3614
3626
|
float max = -INFINITY;
|
@@ -3748,16 +3760,6 @@ static const char * GGML_OP_NAME[GGML_OP_COUNT] = {
|
|
3748
3760
|
"ARGMAX",
|
3749
3761
|
"REPEAT",
|
3750
3762
|
"REPEAT_BACK",
|
3751
|
-
"ABS",
|
3752
|
-
"SGN",
|
3753
|
-
"NEG",
|
3754
|
-
"STEP",
|
3755
|
-
"TANH",
|
3756
|
-
"ELU",
|
3757
|
-
"RELU",
|
3758
|
-
"GELU",
|
3759
|
-
"GELU_QUICK",
|
3760
|
-
"SILU",
|
3761
3763
|
"SILU_BACK",
|
3762
3764
|
"NORM",
|
3763
3765
|
"RMS_NORM",
|
@@ -3796,6 +3798,8 @@ static const char * GGML_OP_NAME[GGML_OP_COUNT] = {
|
|
3796
3798
|
"WIN_PART",
|
3797
3799
|
"WIN_UNPART",
|
3798
3800
|
|
3801
|
+
"UNARY",
|
3802
|
+
|
3799
3803
|
"MAP_UNARY",
|
3800
3804
|
"MAP_BINARY",
|
3801
3805
|
|
@@ -3807,7 +3811,7 @@ static const char * GGML_OP_NAME[GGML_OP_COUNT] = {
|
|
3807
3811
|
"CROSS_ENTROPY_LOSS_BACK",
|
3808
3812
|
};
|
3809
3813
|
|
3810
|
-
static_assert(GGML_OP_COUNT ==
|
3814
|
+
static_assert(GGML_OP_COUNT == 59, "GGML_OP_COUNT != 59");
|
3811
3815
|
|
3812
3816
|
static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
|
3813
3817
|
"none",
|
@@ -3828,16 +3832,6 @@ static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
|
|
3828
3832
|
"argmax(x)",
|
3829
3833
|
"repeat(x)",
|
3830
3834
|
"repeat_back(x)",
|
3831
|
-
"abs(x)",
|
3832
|
-
"sgn(x)",
|
3833
|
-
"-x",
|
3834
|
-
"step(x)",
|
3835
|
-
"tanh(x)",
|
3836
|
-
"elu(x)",
|
3837
|
-
"relu(x)",
|
3838
|
-
"gelu(x)",
|
3839
|
-
"gelu_quick(x)",
|
3840
|
-
"silu(x)",
|
3841
3835
|
"silu_back(x)",
|
3842
3836
|
"norm(x)",
|
3843
3837
|
"rms_norm(x)",
|
@@ -3876,6 +3870,8 @@ static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
|
|
3876
3870
|
"win_part(x)",
|
3877
3871
|
"win_unpart(x)",
|
3878
3872
|
|
3873
|
+
"unary(x)",
|
3874
|
+
|
3879
3875
|
"f(x)",
|
3880
3876
|
"f(x,y)",
|
3881
3877
|
|
@@ -3887,7 +3883,7 @@ static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
|
|
3887
3883
|
"cross_entropy_loss_back(x,y)",
|
3888
3884
|
};
|
3889
3885
|
|
3890
|
-
static_assert(GGML_OP_COUNT ==
|
3886
|
+
static_assert(GGML_OP_COUNT == 59, "GGML_OP_COUNT != 59");
|
3891
3887
|
|
3892
3888
|
static_assert(GGML_OP_POOL_COUNT == 2, "GGML_OP_POOL_COUNT != 2");
|
3893
3889
|
|
@@ -4075,8 +4071,8 @@ bool ggml_is_numa(void) {
|
|
4075
4071
|
////////////////////////////////////////////////////////////////////////////////
|
4076
4072
|
|
4077
4073
|
void ggml_print_object(const struct ggml_object * obj) {
|
4078
|
-
GGML_PRINT(" - ggml_object: offset = %zu, size = %zu, next = %p\n",
|
4079
|
-
obj->offs, obj->size, (const void *) obj->next);
|
4074
|
+
GGML_PRINT(" - ggml_object: type = %d, offset = %zu, size = %zu, next = %p\n",
|
4075
|
+
obj->type, obj->offs, obj->size, (const void *) obj->next);
|
4080
4076
|
}
|
4081
4077
|
|
4082
4078
|
void ggml_print_objects(const struct ggml_context * ctx) {
|
@@ -4143,6 +4139,10 @@ const char * ggml_op_name(enum ggml_op op) {
|
|
4143
4139
|
return GGML_OP_NAME[op];
|
4144
4140
|
}
|
4145
4141
|
|
4142
|
+
const char * ggml_op_symbol(enum ggml_op op) {
|
4143
|
+
return GGML_OP_SYMBOL[op];
|
4144
|
+
}
|
4145
|
+
|
4146
4146
|
size_t ggml_element_size(const struct ggml_tensor * tensor) {
|
4147
4147
|
return GGML_TYPE_SIZE[tensor->type];
|
4148
4148
|
}
|
@@ -4212,7 +4212,7 @@ enum ggml_type ggml_ftype_to_ggml_type(enum ggml_ftype ftype) {
|
|
4212
4212
|
}
|
4213
4213
|
|
4214
4214
|
size_t ggml_tensor_overhead(void) {
|
4215
|
-
return GGML_OBJECT_SIZE + GGML_TENSOR_SIZE
|
4215
|
+
return GGML_OBJECT_SIZE + GGML_TENSOR_SIZE;
|
4216
4216
|
}
|
4217
4217
|
|
4218
4218
|
bool ggml_is_transposed(const struct ggml_tensor * tensor) {
|
@@ -4229,6 +4229,15 @@ bool ggml_is_contiguous(const struct ggml_tensor * tensor) {
|
|
4229
4229
|
tensor->nb[3] == tensor->nb[2]*tensor->ne[2];
|
4230
4230
|
}
|
4231
4231
|
|
4232
|
+
static inline bool ggml_is_contiguous_except_dim_1(const struct ggml_tensor * tensor) {
|
4233
|
+
static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
|
4234
|
+
|
4235
|
+
return
|
4236
|
+
tensor->nb[0] == GGML_TYPE_SIZE[tensor->type] &&
|
4237
|
+
tensor->nb[2] == tensor->nb[1]*tensor->ne[1] &&
|
4238
|
+
tensor->nb[3] == tensor->nb[2]*tensor->ne[2];
|
4239
|
+
}
|
4240
|
+
|
4232
4241
|
bool ggml_is_permuted(const struct ggml_tensor * tensor) {
|
4233
4242
|
static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
|
4234
4243
|
|
@@ -4374,7 +4383,7 @@ struct ggml_context * ggml_init(struct ggml_init_params params) {
|
|
4374
4383
|
return NULL;
|
4375
4384
|
}
|
4376
4385
|
|
4377
|
-
const size_t mem_size =
|
4386
|
+
const size_t mem_size = params.mem_buffer ? params.mem_size : GGML_PAD(params.mem_size, GGML_MEM_ALIGN);
|
4378
4387
|
|
4379
4388
|
*ctx = (struct ggml_context) {
|
4380
4389
|
/*.mem_size =*/ mem_size,
|
@@ -4410,8 +4419,8 @@ void ggml_free(struct ggml_context * ctx) {
|
|
4410
4419
|
if (&g_state.contexts[i].context == ctx) {
|
4411
4420
|
g_state.contexts[i].used = false;
|
4412
4421
|
|
4413
|
-
GGML_PRINT_DEBUG("%s: context %d
|
4414
|
-
__func__, i, ctx
|
4422
|
+
GGML_PRINT_DEBUG("%s: context %d has been freed. memory used = %zu\n",
|
4423
|
+
__func__, i, ggml_used_mem(ctx));
|
4415
4424
|
|
4416
4425
|
if (ctx->mem_buffer_owned) {
|
4417
4426
|
GGML_ALIGNED_FREE(ctx->mem_buffer);
|
@@ -4441,6 +4450,10 @@ size_t ggml_set_scratch(struct ggml_context * ctx, struct ggml_scratch scratch)
|
|
4441
4450
|
return result;
|
4442
4451
|
}
|
4443
4452
|
|
4453
|
+
bool ggml_get_no_alloc(struct ggml_context * ctx) {
|
4454
|
+
return ctx->no_alloc;
|
4455
|
+
}
|
4456
|
+
|
4444
4457
|
void ggml_set_no_alloc(struct ggml_context * ctx, bool no_alloc) {
|
4445
4458
|
ctx->no_alloc = no_alloc;
|
4446
4459
|
}
|
@@ -4459,12 +4472,14 @@ size_t ggml_get_max_tensor_size(const struct ggml_context * ctx) {
|
|
4459
4472
|
struct ggml_object * obj = ctx->objects_begin;
|
4460
4473
|
|
4461
4474
|
while (obj != NULL) {
|
4462
|
-
|
4475
|
+
if (obj->type == GGML_OBJECT_TENSOR) {
|
4476
|
+
struct ggml_tensor * tensor = (struct ggml_tensor *) ((char *) ctx->mem_buffer + obj->offs);
|
4463
4477
|
|
4464
|
-
|
4478
|
+
const size_t size = ggml_nbytes(tensor);
|
4465
4479
|
|
4466
|
-
|
4467
|
-
|
4480
|
+
if (max_size < size) {
|
4481
|
+
max_size = size;
|
4482
|
+
}
|
4468
4483
|
}
|
4469
4484
|
|
4470
4485
|
obj = obj->next;
|
@@ -4478,7 +4493,7 @@ size_t ggml_get_max_tensor_size(const struct ggml_context * ctx) {
|
|
4478
4493
|
// this is an error prone process, but it is necessary to support inplace
|
4479
4494
|
// operators when using scratch buffers
|
4480
4495
|
// TODO: implement a better way
|
4481
|
-
void ggml_scratch_save(struct ggml_context * ctx) {
|
4496
|
+
static void ggml_scratch_save(struct ggml_context * ctx) {
|
4482
4497
|
// this is needed to allow opt tensors to store their data
|
4483
4498
|
// TODO: again, need to find a better way
|
4484
4499
|
ctx->no_alloc_save = ctx->no_alloc;
|
@@ -4488,7 +4503,7 @@ void ggml_scratch_save(struct ggml_context * ctx) {
|
|
4488
4503
|
ctx->scratch.data = NULL;
|
4489
4504
|
}
|
4490
4505
|
|
4491
|
-
void ggml_scratch_load(struct ggml_context * ctx) {
|
4506
|
+
static void ggml_scratch_load(struct ggml_context * ctx) {
|
4492
4507
|
ctx->no_alloc = ctx->no_alloc_save;
|
4493
4508
|
|
4494
4509
|
ctx->scratch = ctx->scratch_save;
|
@@ -4496,12 +4511,7 @@ void ggml_scratch_load(struct ggml_context * ctx) {
|
|
4496
4511
|
|
4497
4512
|
////////////////////////////////////////////////////////////////////////////////
|
4498
4513
|
|
4499
|
-
struct
|
4500
|
-
struct ggml_context * ctx,
|
4501
|
-
enum ggml_type type,
|
4502
|
-
int n_dims,
|
4503
|
-
const int64_t* ne,
|
4504
|
-
void* data) {
|
4514
|
+
static struct ggml_object * ggml_new_object(struct ggml_context * ctx, enum ggml_object_type type, size_t size) {
|
4505
4515
|
// always insert objects at the end of the context's memory pool
|
4506
4516
|
struct ggml_object * obj_cur = ctx->objects_end;
|
4507
4517
|
|
@@ -4509,77 +4519,79 @@ struct ggml_tensor * ggml_new_tensor_impl(
|
|
4509
4519
|
const size_t cur_size = obj_cur == NULL ? 0 : obj_cur->size;
|
4510
4520
|
const size_t cur_end = cur_offs + cur_size;
|
4511
4521
|
|
4512
|
-
|
4513
|
-
|
4514
|
-
if (data == NULL && !ctx->no_alloc) {
|
4515
|
-
size_needed += GGML_TYPE_SIZE[type]*(ne[0]/GGML_BLCK_SIZE[type]);
|
4516
|
-
for (int i = 1; i < n_dims; i++) {
|
4517
|
-
size_needed *= ne[i];
|
4518
|
-
}
|
4519
|
-
// align to GGML_MEM_ALIGN
|
4520
|
-
size_needed = ((size_needed + GGML_MEM_ALIGN - 1)/GGML_MEM_ALIGN)*GGML_MEM_ALIGN;
|
4521
|
-
}
|
4522
|
+
// align to GGML_MEM_ALIGN
|
4523
|
+
size_t size_needed = GGML_PAD(size, GGML_MEM_ALIGN);
|
4522
4524
|
|
4523
4525
|
char * const mem_buffer = ctx->mem_buffer;
|
4524
4526
|
struct ggml_object * const obj_new = (struct ggml_object *)(mem_buffer + cur_end);
|
4525
4527
|
|
4526
|
-
if (
|
4527
|
-
|
4528
|
+
if (cur_end + size_needed + GGML_OBJECT_SIZE > ctx->mem_size) {
|
4529
|
+
GGML_PRINT("%s: not enough space in the context's memory pool (needed %zu, available %zu)\n",
|
4530
|
+
__func__, cur_end + size_needed, ctx->mem_size);
|
4531
|
+
assert(false);
|
4532
|
+
return NULL;
|
4533
|
+
}
|
4534
|
+
|
4535
|
+
*obj_new = (struct ggml_object) {
|
4536
|
+
.offs = cur_end + GGML_OBJECT_SIZE,
|
4537
|
+
.size = size_needed,
|
4538
|
+
.next = NULL,
|
4539
|
+
.type = type,
|
4540
|
+
};
|
4528
4541
|
|
4529
|
-
|
4530
|
-
GGML_PRINT("%s: not enough space in the context's memory pool (needed %zu, available %zu)\n",
|
4531
|
-
__func__, cur_end + size_needed + GGML_OBJECT_SIZE, ctx->mem_size);
|
4532
|
-
assert(false);
|
4533
|
-
return NULL;
|
4534
|
-
}
|
4542
|
+
ggml_assert_aligned(mem_buffer + obj_new->offs);
|
4535
4543
|
|
4536
|
-
|
4537
|
-
|
4538
|
-
.size = size_needed,
|
4539
|
-
.next = NULL,
|
4540
|
-
};
|
4544
|
+
if (obj_cur != NULL) {
|
4545
|
+
obj_cur->next = obj_new;
|
4541
4546
|
} else {
|
4542
|
-
|
4543
|
-
|
4544
|
-
|
4545
|
-
|
4546
|
-
|
4547
|
+
// this is the first object in this context
|
4548
|
+
ctx->objects_begin = obj_new;
|
4549
|
+
}
|
4550
|
+
|
4551
|
+
ctx->objects_end = obj_new;
|
4552
|
+
|
4553
|
+
//printf("%s: inserted new object at %zu, size = %zu\n", __func__, cur_end, obj_new->size);
|
4554
|
+
|
4555
|
+
return obj_new;
|
4556
|
+
}
|
4557
|
+
|
4558
|
+
static struct ggml_tensor * ggml_new_tensor_impl(
|
4559
|
+
struct ggml_context * ctx,
|
4560
|
+
enum ggml_type type,
|
4561
|
+
int n_dims,
|
4562
|
+
const int64_t* ne,
|
4563
|
+
void* data) {
|
4564
|
+
|
4565
|
+
size_t data_size = 0;
|
4566
|
+
|
4567
|
+
if (data == NULL && !ctx->no_alloc) {
|
4568
|
+
data_size += GGML_TYPE_SIZE[type]*(ne[0]/GGML_BLCK_SIZE[type]);
|
4569
|
+
for (int i = 1; i < n_dims; i++) {
|
4570
|
+
data_size *= ne[i];
|
4547
4571
|
}
|
4572
|
+
}
|
4548
4573
|
|
4549
|
-
|
4550
|
-
|
4551
|
-
|
4574
|
+
if (ctx->scratch.data != NULL && data == NULL) {
|
4575
|
+
// allocate tensor data in the scratch buffer
|
4576
|
+
if (ctx->scratch.offs + data_size > ctx->scratch.size) {
|
4577
|
+
GGML_PRINT("%s: not enough space in the scratch memory pool (needed %zu, available %zu)\n",
|
4578
|
+
__func__, ctx->scratch.offs + data_size, ctx->scratch.size);
|
4552
4579
|
assert(false);
|
4553
4580
|
return NULL;
|
4554
4581
|
}
|
4555
4582
|
|
4556
4583
|
data = (char * const) ctx->scratch.data + ctx->scratch.offs;
|
4557
4584
|
|
4558
|
-
|
4559
|
-
.offs = cur_end + GGML_OBJECT_SIZE,
|
4560
|
-
.size = GGML_TENSOR_SIZE,
|
4561
|
-
.next = NULL,
|
4562
|
-
};
|
4563
|
-
|
4564
|
-
//printf("scratch offs = %zu, size_needed = %zu\n", ctx->scratch.offs, size_needed);
|
4565
|
-
|
4566
|
-
ctx->scratch.offs += size_needed;
|
4567
|
-
}
|
4585
|
+
ctx->scratch.offs += data_size;
|
4568
4586
|
|
4569
|
-
|
4570
|
-
obj_cur->next = obj_new;
|
4571
|
-
} else {
|
4572
|
-
// this is the first object in this context
|
4573
|
-
ctx->objects_begin = obj_new;
|
4587
|
+
data_size = 0;
|
4574
4588
|
}
|
4575
4589
|
|
4576
|
-
|
4577
|
-
|
4578
|
-
//printf("%s: inserted new object at %zu, size = %zu\n", __func__, cur_end, obj_new->size);
|
4590
|
+
struct ggml_object * const obj_new = ggml_new_object(ctx, GGML_OBJECT_TENSOR, GGML_TENSOR_SIZE + data_size);
|
4579
4591
|
|
4580
|
-
|
4592
|
+
// TODO: for recoverable errors, we would need to free the data allocated from the scratch buffer here
|
4581
4593
|
|
4582
|
-
|
4594
|
+
struct ggml_tensor * const result = (struct ggml_tensor *)((char *)ctx->mem_buffer + obj_new->offs);
|
4583
4595
|
|
4584
4596
|
*result = (struct ggml_tensor) {
|
4585
4597
|
/*.type =*/ type,
|
@@ -4588,6 +4600,7 @@ struct ggml_tensor * ggml_new_tensor_impl(
|
|
4588
4600
|
/*.ne =*/ { 1, 1, 1, 1 },
|
4589
4601
|
/*.nb =*/ { 0, 0, 0, 0 },
|
4590
4602
|
/*.op =*/ GGML_OP_NONE,
|
4603
|
+
/*.op_params =*/ {0},
|
4591
4604
|
/*.is_param =*/ false,
|
4592
4605
|
/*.grad =*/ NULL,
|
4593
4606
|
/*.src =*/ { NULL },
|
@@ -4618,6 +4631,21 @@ struct ggml_tensor * ggml_new_tensor_impl(
|
|
4618
4631
|
return result;
|
4619
4632
|
}
|
4620
4633
|
|
4634
|
+
static void ggml_set_op_params(struct ggml_tensor * tensor, const void * params, size_t params_size) {
|
4635
|
+
assert(params_size <= GGML_MAX_OP_PARAMS);
|
4636
|
+
memcpy(tensor->op_params, params, params_size);
|
4637
|
+
}
|
4638
|
+
|
4639
|
+
static int32_t ggml_get_op_params_i32(const struct ggml_tensor * tensor, uint32_t i) {
|
4640
|
+
assert(i < GGML_MAX_OP_PARAMS / sizeof(int32_t));
|
4641
|
+
return ((const int32_t *)(tensor->op_params))[i];
|
4642
|
+
}
|
4643
|
+
|
4644
|
+
static void ggml_set_op_params_i32(struct ggml_tensor * tensor, uint32_t i, int32_t value) {
|
4645
|
+
assert(i < GGML_MAX_OP_PARAMS / sizeof(int32_t));
|
4646
|
+
((int32_t *)(tensor->op_params))[i] = value;
|
4647
|
+
}
|
4648
|
+
|
4621
4649
|
struct ggml_tensor * ggml_new_tensor(
|
4622
4650
|
struct ggml_context * ctx,
|
4623
4651
|
enum ggml_type type,
|
@@ -4949,6 +4977,11 @@ float * ggml_get_data_f32(const struct ggml_tensor * tensor) {
|
|
4949
4977
|
return (float *)(tensor->data);
|
4950
4978
|
}
|
4951
4979
|
|
4980
|
+
enum ggml_unary_op ggml_get_unary_op(const struct ggml_tensor * tensor) {
|
4981
|
+
GGML_ASSERT(tensor->op == GGML_OP_UNARY);
|
4982
|
+
return (enum ggml_unary_op) ggml_get_op_params_i32(tensor, 0);
|
4983
|
+
}
|
4984
|
+
|
4952
4985
|
const char * ggml_get_name(const struct ggml_tensor * tensor) {
|
4953
4986
|
return tensor->name;
|
4954
4987
|
}
|
@@ -4987,9 +5020,11 @@ struct ggml_tensor * ggml_get_tensor(struct ggml_context * ctx, const char * nam
|
|
4987
5020
|
char * const mem_buffer = ctx->mem_buffer;
|
4988
5021
|
|
4989
5022
|
while (obj != NULL) {
|
4990
|
-
|
4991
|
-
|
4992
|
-
|
5023
|
+
if (obj->type == GGML_OBJECT_TENSOR) {
|
5024
|
+
struct ggml_tensor * cur = (struct ggml_tensor *)(mem_buffer + obj->offs);
|
5025
|
+
if (strcmp(cur->name, name) == 0) {
|
5026
|
+
return cur;
|
5027
|
+
}
|
4993
5028
|
}
|
4994
5029
|
|
4995
5030
|
obj = obj->next;
|
@@ -5002,7 +5037,7 @@ struct ggml_tensor * ggml_get_tensor(struct ggml_context * ctx, const char * nam
|
|
5002
5037
|
|
5003
5038
|
// ggml_dup
|
5004
5039
|
|
5005
|
-
struct ggml_tensor * ggml_dup_impl(
|
5040
|
+
static struct ggml_tensor * ggml_dup_impl(
|
5006
5041
|
struct ggml_context * ctx,
|
5007
5042
|
struct ggml_tensor * a,
|
5008
5043
|
bool inplace) {
|
@@ -5017,7 +5052,6 @@ struct ggml_tensor * ggml_dup_impl(
|
|
5017
5052
|
result->op = GGML_OP_DUP;
|
5018
5053
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
5019
5054
|
result->src[0] = a;
|
5020
|
-
result->src[1] = NULL;
|
5021
5055
|
|
5022
5056
|
return result;
|
5023
5057
|
}
|
@@ -5036,7 +5070,7 @@ struct ggml_tensor * ggml_dup_inplace(
|
|
5036
5070
|
|
5037
5071
|
// ggml_add
|
5038
5072
|
|
5039
|
-
struct ggml_tensor * ggml_add_impl(
|
5073
|
+
static struct ggml_tensor * ggml_add_impl(
|
5040
5074
|
struct ggml_context * ctx,
|
5041
5075
|
struct ggml_tensor * a,
|
5042
5076
|
struct ggml_tensor * b,
|
@@ -5079,7 +5113,7 @@ struct ggml_tensor * ggml_add_inplace(
|
|
5079
5113
|
|
5080
5114
|
// ggml_add1
|
5081
5115
|
|
5082
|
-
struct ggml_tensor * ggml_add1_impl(
|
5116
|
+
static struct ggml_tensor * ggml_add1_impl(
|
5083
5117
|
struct ggml_context * ctx,
|
5084
5118
|
struct ggml_tensor * a,
|
5085
5119
|
struct ggml_tensor * b,
|
@@ -5119,7 +5153,7 @@ struct ggml_tensor * ggml_add1_inplace(
|
|
5119
5153
|
|
5120
5154
|
// ggml_acc
|
5121
5155
|
|
5122
|
-
struct ggml_tensor * ggml_acc_impl(
|
5156
|
+
static struct ggml_tensor * ggml_acc_impl(
|
5123
5157
|
struct ggml_context * ctx,
|
5124
5158
|
struct ggml_tensor * a,
|
5125
5159
|
struct ggml_tensor * b,
|
@@ -5141,23 +5175,13 @@ struct ggml_tensor * ggml_acc_impl(
|
|
5141
5175
|
|
5142
5176
|
struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
5143
5177
|
|
5144
|
-
|
5145
|
-
|
5146
|
-
struct ggml_tensor * c = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, 5);
|
5147
|
-
|
5148
|
-
((int32_t *) c->data)[0] = nb1;
|
5149
|
-
((int32_t *) c->data)[1] = nb2;
|
5150
|
-
((int32_t *) c->data)[2] = nb3;
|
5151
|
-
((int32_t *) c->data)[3] = offset;
|
5152
|
-
((int32_t *) c->data)[4] = inplace ? 1 : 0;
|
5153
|
-
|
5154
|
-
ggml_scratch_load(ctx);
|
5178
|
+
int32_t params[] = { nb1, nb2, nb3, offset, inplace ? 1 : 0 };
|
5179
|
+
ggml_set_op_params(result, params, sizeof(params));
|
5155
5180
|
|
5156
5181
|
result->op = GGML_OP_ACC;
|
5157
5182
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
5158
5183
|
result->src[0] = a;
|
5159
5184
|
result->src[1] = b;
|
5160
|
-
result->src[2] = c;
|
5161
5185
|
|
5162
5186
|
return result;
|
5163
5187
|
}
|
@@ -5186,7 +5210,7 @@ struct ggml_tensor * ggml_acc_inplace(
|
|
5186
5210
|
|
5187
5211
|
// ggml_sub
|
5188
5212
|
|
5189
|
-
struct ggml_tensor * ggml_sub_impl(
|
5213
|
+
static struct ggml_tensor * ggml_sub_impl(
|
5190
5214
|
struct ggml_context * ctx,
|
5191
5215
|
struct ggml_tensor * a,
|
5192
5216
|
struct ggml_tensor * b,
|
@@ -5225,7 +5249,7 @@ struct ggml_tensor * ggml_sub_inplace(
|
|
5225
5249
|
|
5226
5250
|
// ggml_mul
|
5227
5251
|
|
5228
|
-
struct ggml_tensor * ggml_mul_impl(
|
5252
|
+
static struct ggml_tensor * ggml_mul_impl(
|
5229
5253
|
struct ggml_context * ctx,
|
5230
5254
|
struct ggml_tensor * a,
|
5231
5255
|
struct ggml_tensor * b,
|
@@ -5272,7 +5296,7 @@ struct ggml_tensor * ggml_mul_inplace(
|
|
5272
5296
|
|
5273
5297
|
// ggml_div
|
5274
5298
|
|
5275
|
-
struct ggml_tensor * ggml_div_impl(
|
5299
|
+
static struct ggml_tensor * ggml_div_impl(
|
5276
5300
|
struct ggml_context * ctx,
|
5277
5301
|
struct ggml_tensor * a,
|
5278
5302
|
struct ggml_tensor * b,
|
@@ -5315,7 +5339,7 @@ struct ggml_tensor * ggml_div_inplace(
|
|
5315
5339
|
|
5316
5340
|
// ggml_sqr
|
5317
5341
|
|
5318
|
-
struct ggml_tensor * ggml_sqr_impl(
|
5342
|
+
static struct ggml_tensor * ggml_sqr_impl(
|
5319
5343
|
struct ggml_context * ctx,
|
5320
5344
|
struct ggml_tensor * a,
|
5321
5345
|
bool inplace) {
|
@@ -5330,7 +5354,6 @@ struct ggml_tensor * ggml_sqr_impl(
|
|
5330
5354
|
result->op = GGML_OP_SQR;
|
5331
5355
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
5332
5356
|
result->src[0] = a;
|
5333
|
-
result->src[1] = NULL;
|
5334
5357
|
|
5335
5358
|
return result;
|
5336
5359
|
}
|
@@ -5349,7 +5372,7 @@ struct ggml_tensor * ggml_sqr_inplace(
|
|
5349
5372
|
|
5350
5373
|
// ggml_sqrt
|
5351
5374
|
|
5352
|
-
struct ggml_tensor * ggml_sqrt_impl(
|
5375
|
+
static struct ggml_tensor * ggml_sqrt_impl(
|
5353
5376
|
struct ggml_context * ctx,
|
5354
5377
|
struct ggml_tensor * a,
|
5355
5378
|
bool inplace) {
|
@@ -5364,7 +5387,6 @@ struct ggml_tensor * ggml_sqrt_impl(
|
|
5364
5387
|
result->op = GGML_OP_SQRT;
|
5365
5388
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
5366
5389
|
result->src[0] = a;
|
5367
|
-
result->src[1] = NULL;
|
5368
5390
|
|
5369
5391
|
return result;
|
5370
5392
|
}
|
@@ -5384,7 +5406,7 @@ struct ggml_tensor * ggml_sqrt_inplace(
|
|
5384
5406
|
|
5385
5407
|
// ggml_log
|
5386
5408
|
|
5387
|
-
struct ggml_tensor * ggml_log_impl(
|
5409
|
+
static struct ggml_tensor * ggml_log_impl(
|
5388
5410
|
struct ggml_context * ctx,
|
5389
5411
|
struct ggml_tensor * a,
|
5390
5412
|
bool inplace) {
|
@@ -5399,7 +5421,6 @@ struct ggml_tensor * ggml_log_impl(
|
|
5399
5421
|
result->op = GGML_OP_LOG;
|
5400
5422
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
5401
5423
|
result->src[0] = a;
|
5402
|
-
result->src[1] = NULL;
|
5403
5424
|
|
5404
5425
|
return result;
|
5405
5426
|
}
|
@@ -5432,7 +5453,6 @@ struct ggml_tensor * ggml_sum(
|
|
5432
5453
|
result->op = GGML_OP_SUM;
|
5433
5454
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
5434
5455
|
result->src[0] = a;
|
5435
|
-
result->src[1] = NULL;
|
5436
5456
|
|
5437
5457
|
return result;
|
5438
5458
|
}
|
@@ -5459,7 +5479,6 @@ struct ggml_tensor * ggml_sum_rows(
|
|
5459
5479
|
result->op = GGML_OP_SUM_ROWS;
|
5460
5480
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
5461
5481
|
result->src[0] = a;
|
5462
|
-
result->src[1] = NULL;
|
5463
5482
|
|
5464
5483
|
return result;
|
5465
5484
|
}
|
@@ -5482,7 +5501,6 @@ struct ggml_tensor * ggml_mean(
|
|
5482
5501
|
result->op = GGML_OP_MEAN;
|
5483
5502
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
5484
5503
|
result->src[0] = a;
|
5485
|
-
result->src[1] = NULL;
|
5486
5504
|
|
5487
5505
|
return result;
|
5488
5506
|
}
|
@@ -5506,7 +5524,6 @@ struct ggml_tensor * ggml_argmax(
|
|
5506
5524
|
result->op = GGML_OP_ARGMAX;
|
5507
5525
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
5508
5526
|
result->src[0] = a;
|
5509
|
-
result->src[1] = NULL;
|
5510
5527
|
|
5511
5528
|
return result;
|
5512
5529
|
}
|
@@ -5569,343 +5586,142 @@ struct ggml_tensor * ggml_repeat_back(
|
|
5569
5586
|
|
5570
5587
|
// ggml_abs
|
5571
5588
|
|
5572
|
-
struct ggml_tensor * ggml_abs_impl(
|
5573
|
-
struct ggml_context * ctx,
|
5574
|
-
struct ggml_tensor * a,
|
5575
|
-
bool inplace) {
|
5576
|
-
bool is_node = false;
|
5577
|
-
|
5578
|
-
if (!inplace && (a->grad)) {
|
5579
|
-
is_node = true;
|
5580
|
-
}
|
5581
|
-
|
5582
|
-
struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
5583
|
-
|
5584
|
-
result->op = GGML_OP_ABS;
|
5585
|
-
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
5586
|
-
result->src[0] = a;
|
5587
|
-
result->src[1] = NULL;
|
5588
|
-
|
5589
|
-
return result;
|
5590
|
-
}
|
5591
|
-
|
5592
5589
|
struct ggml_tensor * ggml_abs(
|
5593
5590
|
struct ggml_context * ctx,
|
5594
5591
|
struct ggml_tensor * a) {
|
5595
|
-
return
|
5592
|
+
return ggml_unary(ctx, a, GGML_UNARY_OP_ABS);
|
5596
5593
|
}
|
5597
5594
|
|
5598
5595
|
struct ggml_tensor * ggml_abs_inplace(
|
5599
5596
|
struct ggml_context * ctx,
|
5600
5597
|
struct ggml_tensor * a) {
|
5601
|
-
return
|
5598
|
+
return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_ABS);
|
5602
5599
|
}
|
5603
5600
|
|
5604
|
-
|
5605
5601
|
// ggml_sgn
|
5606
5602
|
|
5607
|
-
struct ggml_tensor * ggml_sgn_impl(
|
5608
|
-
struct ggml_context * ctx,
|
5609
|
-
struct ggml_tensor * a,
|
5610
|
-
bool inplace) {
|
5611
|
-
bool is_node = false;
|
5612
|
-
|
5613
|
-
if (!inplace && (a->grad)) {
|
5614
|
-
is_node = true;
|
5615
|
-
}
|
5616
|
-
|
5617
|
-
struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
5618
|
-
|
5619
|
-
result->op = GGML_OP_SGN;
|
5620
|
-
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
5621
|
-
result->src[0] = a;
|
5622
|
-
result->src[1] = NULL;
|
5623
|
-
|
5624
|
-
return result;
|
5625
|
-
}
|
5626
|
-
|
5627
5603
|
struct ggml_tensor * ggml_sgn(
|
5628
5604
|
struct ggml_context * ctx,
|
5629
5605
|
struct ggml_tensor * a) {
|
5630
|
-
return
|
5606
|
+
return ggml_unary(ctx, a, GGML_UNARY_OP_SGN);
|
5631
5607
|
}
|
5632
5608
|
|
5633
5609
|
struct ggml_tensor * ggml_sgn_inplace(
|
5634
5610
|
struct ggml_context * ctx,
|
5635
5611
|
struct ggml_tensor * a) {
|
5636
|
-
return
|
5612
|
+
return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_SGN);
|
5637
5613
|
}
|
5638
5614
|
|
5639
5615
|
// ggml_neg
|
5640
5616
|
|
5641
|
-
struct ggml_tensor * ggml_neg_impl(
|
5642
|
-
struct ggml_context * ctx,
|
5643
|
-
struct ggml_tensor * a,
|
5644
|
-
bool inplace) {
|
5645
|
-
bool is_node = false;
|
5646
|
-
|
5647
|
-
if (!inplace && (a->grad)) {
|
5648
|
-
is_node = true;
|
5649
|
-
}
|
5650
|
-
|
5651
|
-
struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
5652
|
-
|
5653
|
-
result->op = GGML_OP_NEG;
|
5654
|
-
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
5655
|
-
result->src[0] = a;
|
5656
|
-
result->src[1] = NULL;
|
5657
|
-
|
5658
|
-
return result;
|
5659
|
-
}
|
5660
|
-
|
5661
5617
|
struct ggml_tensor * ggml_neg(
|
5662
5618
|
struct ggml_context * ctx,
|
5663
5619
|
struct ggml_tensor * a) {
|
5664
|
-
return
|
5620
|
+
return ggml_unary(ctx, a, GGML_UNARY_OP_NEG);
|
5665
5621
|
}
|
5666
5622
|
|
5667
5623
|
struct ggml_tensor * ggml_neg_inplace(
|
5668
5624
|
struct ggml_context * ctx,
|
5669
5625
|
struct ggml_tensor * a) {
|
5670
|
-
return
|
5626
|
+
return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_NEG);
|
5671
5627
|
}
|
5672
5628
|
|
5673
5629
|
// ggml_step
|
5674
5630
|
|
5675
|
-
struct ggml_tensor * ggml_step_impl(
|
5676
|
-
struct ggml_context * ctx,
|
5677
|
-
struct ggml_tensor * a,
|
5678
|
-
bool inplace) {
|
5679
|
-
bool is_node = false;
|
5680
|
-
|
5681
|
-
if (!inplace && (a->grad)) {
|
5682
|
-
is_node = true;
|
5683
|
-
}
|
5684
|
-
|
5685
|
-
struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
5686
|
-
|
5687
|
-
result->op = GGML_OP_STEP;
|
5688
|
-
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
5689
|
-
result->src[0] = a;
|
5690
|
-
result->src[1] = NULL;
|
5691
|
-
|
5692
|
-
return result;
|
5693
|
-
}
|
5694
|
-
|
5695
5631
|
struct ggml_tensor * ggml_step(
|
5696
5632
|
struct ggml_context * ctx,
|
5697
5633
|
struct ggml_tensor * a) {
|
5698
|
-
return
|
5634
|
+
return ggml_unary(ctx, a, GGML_UNARY_OP_STEP);
|
5699
5635
|
}
|
5700
5636
|
|
5701
5637
|
struct ggml_tensor * ggml_step_inplace(
|
5702
5638
|
struct ggml_context * ctx,
|
5703
5639
|
struct ggml_tensor * a) {
|
5704
|
-
return
|
5640
|
+
return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_STEP);
|
5705
5641
|
}
|
5706
5642
|
|
5707
5643
|
// ggml_tanh
|
5708
5644
|
|
5709
|
-
struct ggml_tensor * ggml_tanh_impl(
|
5710
|
-
struct ggml_context * ctx,
|
5711
|
-
struct ggml_tensor * a,
|
5712
|
-
bool inplace) {
|
5713
|
-
bool is_node = false;
|
5714
|
-
|
5715
|
-
if (!inplace && (a->grad)) {
|
5716
|
-
is_node = true;
|
5717
|
-
}
|
5718
|
-
|
5719
|
-
struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
5720
|
-
|
5721
|
-
result->op = GGML_OP_TANH;
|
5722
|
-
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
5723
|
-
result->src[0] = a;
|
5724
|
-
result->src[1] = NULL;
|
5725
|
-
|
5726
|
-
return result;
|
5727
|
-
}
|
5728
|
-
|
5729
5645
|
struct ggml_tensor * ggml_tanh(
|
5730
5646
|
struct ggml_context * ctx,
|
5731
5647
|
struct ggml_tensor * a) {
|
5732
|
-
return
|
5648
|
+
return ggml_unary(ctx, a, GGML_UNARY_OP_TANH);
|
5733
5649
|
}
|
5734
5650
|
|
5735
5651
|
struct ggml_tensor * ggml_tanh_inplace(
|
5736
5652
|
struct ggml_context * ctx,
|
5737
5653
|
struct ggml_tensor * a) {
|
5738
|
-
return
|
5654
|
+
return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_TANH);
|
5739
5655
|
}
|
5740
5656
|
|
5741
5657
|
// ggml_elu
|
5742
5658
|
|
5743
|
-
struct ggml_tensor * ggml_elu_impl(
|
5744
|
-
struct ggml_context * ctx,
|
5745
|
-
struct ggml_tensor * a,
|
5746
|
-
bool inplace) {
|
5747
|
-
bool is_node = false;
|
5748
|
-
|
5749
|
-
if (!inplace && (a->grad)) {
|
5750
|
-
is_node = true;
|
5751
|
-
}
|
5752
|
-
|
5753
|
-
struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
5754
|
-
|
5755
|
-
result->op = GGML_OP_ELU;
|
5756
|
-
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
5757
|
-
result->src[0] = a;
|
5758
|
-
result->src[1] = NULL;
|
5759
|
-
|
5760
|
-
return result;
|
5761
|
-
}
|
5762
|
-
|
5763
5659
|
struct ggml_tensor * ggml_elu(
|
5764
5660
|
struct ggml_context * ctx,
|
5765
5661
|
struct ggml_tensor * a) {
|
5766
|
-
return
|
5662
|
+
return ggml_unary(ctx, a, GGML_UNARY_OP_ELU);
|
5767
5663
|
}
|
5768
5664
|
|
5769
5665
|
struct ggml_tensor * ggml_elu_inplace(
|
5770
5666
|
struct ggml_context * ctx,
|
5771
5667
|
struct ggml_tensor * a) {
|
5772
|
-
return
|
5668
|
+
return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_ELU);
|
5773
5669
|
}
|
5774
5670
|
|
5775
5671
|
// ggml_relu
|
5776
5672
|
|
5777
|
-
struct ggml_tensor * ggml_relu_impl(
|
5778
|
-
struct ggml_context * ctx,
|
5779
|
-
struct ggml_tensor * a,
|
5780
|
-
bool inplace) {
|
5781
|
-
bool is_node = false;
|
5782
|
-
|
5783
|
-
if (!inplace && (a->grad)) {
|
5784
|
-
is_node = true;
|
5785
|
-
}
|
5786
|
-
|
5787
|
-
struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
5788
|
-
|
5789
|
-
result->op = GGML_OP_RELU;
|
5790
|
-
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
5791
|
-
result->src[0] = a;
|
5792
|
-
result->src[1] = NULL;
|
5793
|
-
|
5794
|
-
return result;
|
5795
|
-
}
|
5796
|
-
|
5797
5673
|
struct ggml_tensor * ggml_relu(
|
5798
5674
|
struct ggml_context * ctx,
|
5799
5675
|
struct ggml_tensor * a) {
|
5800
|
-
return
|
5676
|
+
return ggml_unary(ctx, a, GGML_UNARY_OP_RELU);
|
5801
5677
|
}
|
5802
5678
|
|
5803
5679
|
struct ggml_tensor * ggml_relu_inplace(
|
5804
5680
|
struct ggml_context * ctx,
|
5805
5681
|
struct ggml_tensor * a) {
|
5806
|
-
return
|
5682
|
+
return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_RELU);
|
5807
5683
|
}
|
5808
5684
|
|
5809
5685
|
// ggml_gelu
|
5810
5686
|
|
5811
|
-
struct ggml_tensor * ggml_gelu_impl(
|
5812
|
-
struct ggml_context * ctx,
|
5813
|
-
struct ggml_tensor * a,
|
5814
|
-
bool inplace) {
|
5815
|
-
bool is_node = false;
|
5816
|
-
|
5817
|
-
if (!inplace && (a->grad)) {
|
5818
|
-
is_node = true;
|
5819
|
-
}
|
5820
|
-
|
5821
|
-
struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
5822
|
-
|
5823
|
-
result->op = GGML_OP_GELU;
|
5824
|
-
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
5825
|
-
result->src[0] = a;
|
5826
|
-
result->src[1] = NULL;
|
5827
|
-
|
5828
|
-
return result;
|
5829
|
-
}
|
5830
|
-
|
5831
5687
|
struct ggml_tensor * ggml_gelu(
|
5832
5688
|
struct ggml_context * ctx,
|
5833
5689
|
struct ggml_tensor * a) {
|
5834
|
-
return
|
5690
|
+
return ggml_unary(ctx, a, GGML_UNARY_OP_GELU);
|
5835
5691
|
}
|
5836
5692
|
|
5837
5693
|
struct ggml_tensor * ggml_gelu_inplace(
|
5838
5694
|
struct ggml_context * ctx,
|
5839
5695
|
struct ggml_tensor * a) {
|
5840
|
-
return
|
5696
|
+
return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_GELU);
|
5841
5697
|
}
|
5842
5698
|
|
5843
5699
|
// ggml_gelu_quick
|
5844
5700
|
|
5845
|
-
struct ggml_tensor * ggml_gelu_quick_impl(
|
5846
|
-
struct ggml_context * ctx,
|
5847
|
-
struct ggml_tensor * a,
|
5848
|
-
bool inplace) {
|
5849
|
-
bool is_node = false;
|
5850
|
-
|
5851
|
-
if (!inplace && (a->grad)) {
|
5852
|
-
is_node = true;
|
5853
|
-
}
|
5854
|
-
|
5855
|
-
struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
5856
|
-
|
5857
|
-
result->op = GGML_OP_GELU_QUICK;
|
5858
|
-
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
5859
|
-
result->src[0] = a;
|
5860
|
-
result->src[1] = NULL;
|
5861
|
-
|
5862
|
-
return result;
|
5863
|
-
}
|
5864
|
-
|
5865
5701
|
struct ggml_tensor * ggml_gelu_quick(
|
5866
5702
|
struct ggml_context * ctx,
|
5867
5703
|
struct ggml_tensor * a) {
|
5868
|
-
return
|
5704
|
+
return ggml_unary(ctx, a, GGML_UNARY_OP_GELU_QUICK);
|
5869
5705
|
}
|
5870
5706
|
|
5871
5707
|
struct ggml_tensor * ggml_gelu_quick_inplace(
|
5872
5708
|
struct ggml_context * ctx,
|
5873
5709
|
struct ggml_tensor * a) {
|
5874
|
-
return
|
5710
|
+
return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_GELU_QUICK);
|
5875
5711
|
}
|
5876
5712
|
|
5877
5713
|
// ggml_silu
|
5878
5714
|
|
5879
|
-
struct ggml_tensor *
|
5880
|
-
struct ggml_context * ctx,
|
5881
|
-
struct ggml_tensor * a,
|
5882
|
-
bool inplace) {
|
5883
|
-
bool is_node = false;
|
5884
|
-
|
5885
|
-
if (!inplace && (a->grad)) {
|
5886
|
-
is_node = true;
|
5887
|
-
}
|
5888
|
-
|
5889
|
-
struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
5890
|
-
|
5891
|
-
result->op = GGML_OP_SILU;
|
5892
|
-
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
5893
|
-
result->src[0] = a;
|
5894
|
-
result->src[1] = NULL;
|
5895
|
-
|
5896
|
-
return result;
|
5897
|
-
}
|
5898
|
-
|
5899
|
-
struct ggml_tensor * ggml_silu(
|
5715
|
+
struct ggml_tensor * ggml_silu(
|
5900
5716
|
struct ggml_context * ctx,
|
5901
5717
|
struct ggml_tensor * a) {
|
5902
|
-
return
|
5718
|
+
return ggml_unary(ctx, a, GGML_UNARY_OP_SILU);
|
5903
5719
|
}
|
5904
5720
|
|
5905
5721
|
struct ggml_tensor * ggml_silu_inplace(
|
5906
5722
|
struct ggml_context * ctx,
|
5907
5723
|
struct ggml_tensor * a) {
|
5908
|
-
return
|
5724
|
+
return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_SILU);
|
5909
5725
|
}
|
5910
5726
|
|
5911
5727
|
// ggml_silu_back
|
@@ -5933,7 +5749,7 @@ struct ggml_tensor * ggml_silu_back(
|
|
5933
5749
|
|
5934
5750
|
// ggml_norm
|
5935
5751
|
|
5936
|
-
struct ggml_tensor * ggml_norm_impl(
|
5752
|
+
static struct ggml_tensor * ggml_norm_impl(
|
5937
5753
|
struct ggml_context * ctx,
|
5938
5754
|
struct ggml_tensor * a,
|
5939
5755
|
bool inplace) {
|
@@ -5946,10 +5762,11 @@ struct ggml_tensor * ggml_norm_impl(
|
|
5946
5762
|
|
5947
5763
|
struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
5948
5764
|
|
5765
|
+
// TODO: maybe store epsilon here?
|
5766
|
+
|
5949
5767
|
result->op = GGML_OP_NORM;
|
5950
5768
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
5951
5769
|
result->src[0] = a;
|
5952
|
-
result->src[1] = NULL; // TODO: maybe store epsilon here?
|
5953
5770
|
|
5954
5771
|
return result;
|
5955
5772
|
}
|
@@ -5966,9 +5783,10 @@ struct ggml_tensor * ggml_norm_inplace(
|
|
5966
5783
|
return ggml_norm_impl(ctx, a, true);
|
5967
5784
|
}
|
5968
5785
|
|
5969
|
-
struct ggml_tensor * ggml_rms_norm_impl(
|
5786
|
+
static struct ggml_tensor * ggml_rms_norm_impl(
|
5970
5787
|
struct ggml_context * ctx,
|
5971
5788
|
struct ggml_tensor * a,
|
5789
|
+
float eps,
|
5972
5790
|
bool inplace) {
|
5973
5791
|
bool is_node = false;
|
5974
5792
|
|
@@ -5978,24 +5796,27 @@ struct ggml_tensor * ggml_rms_norm_impl(
|
|
5978
5796
|
|
5979
5797
|
struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
5980
5798
|
|
5799
|
+
ggml_set_op_params(result, &eps, sizeof(eps));
|
5800
|
+
|
5981
5801
|
result->op = GGML_OP_RMS_NORM;
|
5982
5802
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
5983
5803
|
result->src[0] = a;
|
5984
|
-
result->src[1] = NULL; // TODO: maybe store epsilon here?
|
5985
5804
|
|
5986
5805
|
return result;
|
5987
5806
|
}
|
5988
5807
|
|
5989
5808
|
struct ggml_tensor * ggml_rms_norm(
|
5990
5809
|
struct ggml_context * ctx,
|
5991
|
-
struct ggml_tensor * a
|
5992
|
-
|
5810
|
+
struct ggml_tensor * a,
|
5811
|
+
float eps) {
|
5812
|
+
return ggml_rms_norm_impl(ctx, a, eps, false);
|
5993
5813
|
}
|
5994
5814
|
|
5995
5815
|
struct ggml_tensor * ggml_rms_norm_inplace(
|
5996
5816
|
struct ggml_context * ctx,
|
5997
|
-
struct ggml_tensor * a
|
5998
|
-
|
5817
|
+
struct ggml_tensor * a,
|
5818
|
+
float eps) {
|
5819
|
+
return ggml_rms_norm_impl(ctx, a, eps, true);
|
5999
5820
|
}
|
6000
5821
|
|
6001
5822
|
struct ggml_tensor * ggml_rms_norm_back(
|
@@ -6074,7 +5895,7 @@ struct ggml_tensor * ggml_out_prod(
|
|
6074
5895
|
|
6075
5896
|
// ggml_scale
|
6076
5897
|
|
6077
|
-
struct ggml_tensor * ggml_scale_impl(
|
5898
|
+
static struct ggml_tensor * ggml_scale_impl(
|
6078
5899
|
struct ggml_context * ctx,
|
6079
5900
|
struct ggml_tensor * a,
|
6080
5901
|
struct ggml_tensor * b,
|
@@ -6114,7 +5935,7 @@ struct ggml_tensor * ggml_scale_inplace(
|
|
6114
5935
|
|
6115
5936
|
// ggml_set
|
6116
5937
|
|
6117
|
-
struct ggml_tensor * ggml_set_impl(
|
5938
|
+
static struct ggml_tensor * ggml_set_impl(
|
6118
5939
|
struct ggml_context * ctx,
|
6119
5940
|
struct ggml_tensor * a,
|
6120
5941
|
struct ggml_tensor * b,
|
@@ -6134,23 +5955,13 @@ struct ggml_tensor * ggml_set_impl(
|
|
6134
5955
|
// make a view of the destination
|
6135
5956
|
struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
6136
5957
|
|
6137
|
-
|
6138
|
-
|
6139
|
-
struct ggml_tensor * c = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, 5);
|
6140
|
-
|
6141
|
-
(( int32_t * ) c->data)[0] = nb1;
|
6142
|
-
(( int32_t * ) c->data)[1] = nb2;
|
6143
|
-
(( int32_t * ) c->data)[2] = nb3;
|
6144
|
-
(( int32_t * ) c->data)[3] = offset;
|
6145
|
-
(( int32_t * ) c->data)[4] = inplace ? 1 : 0;
|
6146
|
-
|
6147
|
-
ggml_scratch_load(ctx);
|
5958
|
+
int32_t params[] = { nb1, nb2, nb3, offset, inplace ? 1 : 0 };
|
5959
|
+
ggml_set_op_params(result, params, sizeof(params));
|
6148
5960
|
|
6149
5961
|
result->op = GGML_OP_SET;
|
6150
5962
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
6151
5963
|
result->src[0] = a;
|
6152
5964
|
result->src[1] = b;
|
6153
|
-
result->src[2] = c;
|
6154
5965
|
|
6155
5966
|
return result;
|
6156
5967
|
}
|
@@ -6214,7 +6025,7 @@ struct ggml_tensor * ggml_set_2d_inplace(
|
|
6214
6025
|
|
6215
6026
|
// ggml_cpy
|
6216
6027
|
|
6217
|
-
struct ggml_tensor * ggml_cpy_impl(
|
6028
|
+
static struct ggml_tensor * ggml_cpy_impl(
|
6218
6029
|
struct ggml_context * ctx,
|
6219
6030
|
struct ggml_tensor * a,
|
6220
6031
|
struct ggml_tensor * b,
|
@@ -6259,7 +6070,7 @@ struct ggml_tensor * ggml_cpy_inplace(
|
|
6259
6070
|
|
6260
6071
|
// ggml_cont
|
6261
6072
|
|
6262
|
-
struct ggml_tensor * ggml_cont_impl(
|
6073
|
+
static struct ggml_tensor * ggml_cont_impl(
|
6263
6074
|
struct ggml_context * ctx,
|
6264
6075
|
struct ggml_tensor * a,
|
6265
6076
|
bool inplace) {
|
@@ -6275,7 +6086,6 @@ struct ggml_tensor * ggml_cont_impl(
|
|
6275
6086
|
result->op = GGML_OP_CONT;
|
6276
6087
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
6277
6088
|
result->src[0] = a;
|
6278
|
-
result->src[1] = NULL;
|
6279
6089
|
|
6280
6090
|
return result;
|
6281
6091
|
}
|
@@ -6319,7 +6129,6 @@ struct ggml_tensor * ggml_reshape(
|
|
6319
6129
|
result->op = GGML_OP_RESHAPE;
|
6320
6130
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
6321
6131
|
result->src[0] = a;
|
6322
|
-
result->src[1] = NULL;
|
6323
6132
|
|
6324
6133
|
return result;
|
6325
6134
|
}
|
@@ -6344,7 +6153,6 @@ struct ggml_tensor * ggml_reshape_1d(
|
|
6344
6153
|
result->op = GGML_OP_RESHAPE;
|
6345
6154
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
6346
6155
|
result->src[0] = a;
|
6347
|
-
result->src[1] = NULL;
|
6348
6156
|
|
6349
6157
|
return result;
|
6350
6158
|
}
|
@@ -6370,7 +6178,6 @@ struct ggml_tensor * ggml_reshape_2d(
|
|
6370
6178
|
result->op = GGML_OP_RESHAPE;
|
6371
6179
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
6372
6180
|
result->src[0] = a;
|
6373
|
-
result->src[1] = NULL;
|
6374
6181
|
|
6375
6182
|
return result;
|
6376
6183
|
}
|
@@ -6397,7 +6204,6 @@ struct ggml_tensor * ggml_reshape_3d(
|
|
6397
6204
|
result->op = GGML_OP_RESHAPE;
|
6398
6205
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
6399
6206
|
result->src[0] = a;
|
6400
|
-
result->src[1] = NULL;
|
6401
6207
|
|
6402
6208
|
return result;
|
6403
6209
|
}
|
@@ -6426,7 +6232,6 @@ struct ggml_tensor * ggml_reshape_4d(
|
|
6426
6232
|
result->op = GGML_OP_RESHAPE;
|
6427
6233
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
6428
6234
|
result->src[0] = a;
|
6429
|
-
result->src[1] = NULL;
|
6430
6235
|
|
6431
6236
|
return result;
|
6432
6237
|
}
|
@@ -6448,19 +6253,11 @@ struct ggml_tensor * ggml_view_1d(
|
|
6448
6253
|
struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, 1, &ne0, (char *) a->data + offset);
|
6449
6254
|
ggml_format_name(result, "%s (view)", a->name);
|
6450
6255
|
|
6451
|
-
|
6452
|
-
|
6453
|
-
struct ggml_tensor * offs = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, 2);
|
6454
|
-
ggml_set_name(offs, "offset");
|
6455
|
-
memcpy(offs->data, &offset, 2*sizeof(int32_t));
|
6456
|
-
|
6457
|
-
ggml_scratch_load(ctx);
|
6256
|
+
ggml_set_op_params(result, &offset, sizeof(offset));
|
6458
6257
|
|
6459
6258
|
result->op = GGML_OP_VIEW;
|
6460
6259
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
6461
6260
|
result->src[0] = a;
|
6462
|
-
result->src[1] = NULL;
|
6463
|
-
result->src[2] = offs;
|
6464
6261
|
|
6465
6262
|
return result;
|
6466
6263
|
}
|
@@ -6486,13 +6283,7 @@ struct ggml_tensor * ggml_view_2d(
|
|
6486
6283
|
struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, 2, ne, (char *) a->data + offset);
|
6487
6284
|
ggml_format_name(result, "%s (view)", a->name);
|
6488
6285
|
|
6489
|
-
|
6490
|
-
|
6491
|
-
struct ggml_tensor * offs = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, 2);
|
6492
|
-
ggml_set_name(offs, "offset");
|
6493
|
-
memcpy(offs->data, &offset, 2*sizeof(int32_t));
|
6494
|
-
|
6495
|
-
ggml_scratch_load(ctx);
|
6286
|
+
ggml_set_op_params(result, &offset, sizeof(offset));
|
6496
6287
|
|
6497
6288
|
result->nb[1] = nb1;
|
6498
6289
|
result->nb[2] = result->nb[1]*ne1;
|
@@ -6501,8 +6292,6 @@ struct ggml_tensor * ggml_view_2d(
|
|
6501
6292
|
result->op = GGML_OP_VIEW;
|
6502
6293
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
6503
6294
|
result->src[0] = a;
|
6504
|
-
result->src[1] = NULL;
|
6505
|
-
result->src[2] = offs;
|
6506
6295
|
|
6507
6296
|
return result;
|
6508
6297
|
}
|
@@ -6530,13 +6319,7 @@ struct ggml_tensor * ggml_view_3d(
|
|
6530
6319
|
struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, 3, ne, (char *) a->data + offset);
|
6531
6320
|
ggml_format_name(result, "%s (view)", a->name);
|
6532
6321
|
|
6533
|
-
|
6534
|
-
|
6535
|
-
struct ggml_tensor * offs = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, 2);
|
6536
|
-
ggml_set_name(offs, "offset");
|
6537
|
-
memcpy(offs->data, &offset, 2*sizeof(int32_t));
|
6538
|
-
|
6539
|
-
ggml_scratch_load(ctx);
|
6322
|
+
ggml_set_op_params(result, &offset, sizeof(offset));
|
6540
6323
|
|
6541
6324
|
result->nb[1] = nb1;
|
6542
6325
|
result->nb[2] = nb2;
|
@@ -6545,8 +6328,6 @@ struct ggml_tensor * ggml_view_3d(
|
|
6545
6328
|
result->op = GGML_OP_VIEW;
|
6546
6329
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
6547
6330
|
result->src[0] = a;
|
6548
|
-
result->src[1] = NULL;
|
6549
|
-
result->src[2] = offs;
|
6550
6331
|
|
6551
6332
|
return result;
|
6552
6333
|
}
|
@@ -6576,13 +6357,7 @@ struct ggml_tensor * ggml_view_4d(
|
|
6576
6357
|
struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, 4, ne, (char *) a->data + offset);
|
6577
6358
|
ggml_format_name(result, "%s (view)", a->name);
|
6578
6359
|
|
6579
|
-
|
6580
|
-
|
6581
|
-
struct ggml_tensor * offs = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, 2);
|
6582
|
-
ggml_set_name(offs, "offset");
|
6583
|
-
memcpy(offs->data, &offset, 2*sizeof(int32_t));
|
6584
|
-
|
6585
|
-
ggml_scratch_load(ctx);
|
6360
|
+
ggml_set_op_params(result, &offset, sizeof(offset));
|
6586
6361
|
|
6587
6362
|
result->nb[1] = nb1;
|
6588
6363
|
result->nb[2] = nb2;
|
@@ -6591,8 +6366,6 @@ struct ggml_tensor * ggml_view_4d(
|
|
6591
6366
|
result->op = GGML_OP_VIEW;
|
6592
6367
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
6593
6368
|
result->src[0] = a;
|
6594
|
-
result->src[1] = NULL;
|
6595
|
-
result->src[2] = offs;
|
6596
6369
|
|
6597
6370
|
return result;
|
6598
6371
|
}
|
@@ -6653,22 +6426,9 @@ struct ggml_tensor * ggml_permute(
|
|
6653
6426
|
result->op = GGML_OP_PERMUTE;
|
6654
6427
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
6655
6428
|
result->src[0] = a;
|
6656
|
-
result->src[1] = NULL;
|
6657
|
-
|
6658
|
-
if (is_node) {
|
6659
|
-
ggml_scratch_save(ctx);
|
6660
|
-
|
6661
|
-
struct ggml_tensor * b = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, 4);
|
6662
|
-
|
6663
|
-
((int32_t *) b->data)[0] = axis0;
|
6664
|
-
((int32_t *) b->data)[1] = axis1;
|
6665
|
-
((int32_t *) b->data)[2] = axis2;
|
6666
|
-
((int32_t *) b->data)[3] = axis3;
|
6667
6429
|
|
6668
|
-
|
6669
|
-
|
6670
|
-
result->src[2] = b;
|
6671
|
-
}
|
6430
|
+
int32_t params[] = { axis0, axis1, axis2, axis3 };
|
6431
|
+
ggml_set_op_params(result, ¶ms, sizeof(params));
|
6672
6432
|
|
6673
6433
|
return result;
|
6674
6434
|
}
|
@@ -6696,7 +6456,6 @@ struct ggml_tensor * ggml_transpose(
|
|
6696
6456
|
result->op = GGML_OP_TRANSPOSE;
|
6697
6457
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
6698
6458
|
result->src[0] = a;
|
6699
|
-
result->src[1] = NULL;
|
6700
6459
|
|
6701
6460
|
return result;
|
6702
6461
|
}
|
@@ -6774,7 +6533,6 @@ struct ggml_tensor * ggml_diag(
|
|
6774
6533
|
result->op = GGML_OP_DIAG;
|
6775
6534
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
6776
6535
|
result->src[0] = a;
|
6777
|
-
result->src[1] = NULL;
|
6778
6536
|
|
6779
6537
|
return result;
|
6780
6538
|
}
|
@@ -6782,7 +6540,7 @@ struct ggml_tensor * ggml_diag(
|
|
6782
6540
|
|
6783
6541
|
// ggml_diag_mask_inf
|
6784
6542
|
|
6785
|
-
struct ggml_tensor * ggml_diag_mask_inf_impl(
|
6543
|
+
static struct ggml_tensor * ggml_diag_mask_inf_impl(
|
6786
6544
|
struct ggml_context * ctx,
|
6787
6545
|
struct ggml_tensor * a,
|
6788
6546
|
int n_past,
|
@@ -6795,19 +6553,12 @@ struct ggml_tensor * ggml_diag_mask_inf_impl(
|
|
6795
6553
|
|
6796
6554
|
struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
6797
6555
|
|
6798
|
-
|
6799
|
-
|
6800
|
-
struct ggml_tensor * b = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, 2);
|
6801
|
-
|
6802
|
-
((int32_t *) b->data)[0] = n_past;
|
6803
|
-
((int32_t *) b->data)[1] = inplace ? 1 : 0;
|
6804
|
-
|
6805
|
-
ggml_scratch_load(ctx);
|
6556
|
+
int32_t params[] = { n_past, inplace ? 1 : 0 };
|
6557
|
+
ggml_set_op_params(result, ¶ms, sizeof(params));
|
6806
6558
|
|
6807
6559
|
result->op = GGML_OP_DIAG_MASK_INF;
|
6808
6560
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
6809
6561
|
result->src[0] = a;
|
6810
|
-
result->src[1] = b;
|
6811
6562
|
|
6812
6563
|
return result;
|
6813
6564
|
}
|
@@ -6829,7 +6580,7 @@ struct ggml_tensor * ggml_diag_mask_inf_inplace(
|
|
6829
6580
|
|
6830
6581
|
// ggml_diag_mask_zero
|
6831
6582
|
|
6832
|
-
struct ggml_tensor * ggml_diag_mask_zero_impl(
|
6583
|
+
static struct ggml_tensor * ggml_diag_mask_zero_impl(
|
6833
6584
|
struct ggml_context * ctx,
|
6834
6585
|
struct ggml_tensor * a,
|
6835
6586
|
int n_past,
|
@@ -6842,20 +6593,12 @@ struct ggml_tensor * ggml_diag_mask_zero_impl(
|
|
6842
6593
|
|
6843
6594
|
struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
6844
6595
|
|
6845
|
-
|
6846
|
-
|
6847
|
-
struct ggml_tensor * b = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, 2);
|
6848
|
-
ggml_set_name(b, "n_past, inplace");
|
6849
|
-
|
6850
|
-
((int32_t *) b->data)[0] = n_past;
|
6851
|
-
((int32_t *) b->data)[1] = inplace ? 1 : 0;
|
6852
|
-
|
6853
|
-
ggml_scratch_load(ctx);
|
6596
|
+
int32_t params[] = { n_past, inplace ? 1 : 0 };
|
6597
|
+
ggml_set_op_params(result, ¶ms, sizeof(params));
|
6854
6598
|
|
6855
6599
|
result->op = GGML_OP_DIAG_MASK_ZERO;
|
6856
6600
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
6857
6601
|
result->src[0] = a;
|
6858
|
-
result->src[1] = b;
|
6859
6602
|
|
6860
6603
|
return result;
|
6861
6604
|
}
|
@@ -6876,7 +6619,7 @@ struct ggml_tensor * ggml_diag_mask_zero_inplace(
|
|
6876
6619
|
|
6877
6620
|
// ggml_soft_max
|
6878
6621
|
|
6879
|
-
struct ggml_tensor * ggml_soft_max_impl(
|
6622
|
+
static struct ggml_tensor * ggml_soft_max_impl(
|
6880
6623
|
struct ggml_context * ctx,
|
6881
6624
|
struct ggml_tensor * a,
|
6882
6625
|
bool inplace) {
|
@@ -6891,7 +6634,6 @@ struct ggml_tensor * ggml_soft_max_impl(
|
|
6891
6634
|
result->op = GGML_OP_SOFT_MAX;
|
6892
6635
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
6893
6636
|
result->src[0] = a;
|
6894
|
-
result->src[1] = NULL;
|
6895
6637
|
|
6896
6638
|
return result;
|
6897
6639
|
}
|
@@ -6911,7 +6653,7 @@ struct ggml_tensor * ggml_soft_max_inplace(
|
|
6911
6653
|
|
6912
6654
|
// ggml_soft_max_back
|
6913
6655
|
|
6914
|
-
struct ggml_tensor * ggml_soft_max_back_impl(
|
6656
|
+
static struct ggml_tensor * ggml_soft_max_back_impl(
|
6915
6657
|
struct ggml_context * ctx,
|
6916
6658
|
struct ggml_tensor * a,
|
6917
6659
|
struct ggml_tensor * b,
|
@@ -6948,13 +6690,15 @@ struct ggml_tensor * ggml_soft_max_back_inplace(
|
|
6948
6690
|
|
6949
6691
|
// ggml_rope
|
6950
6692
|
|
6951
|
-
struct ggml_tensor * ggml_rope_impl(
|
6693
|
+
static struct ggml_tensor * ggml_rope_impl(
|
6952
6694
|
struct ggml_context * ctx,
|
6953
6695
|
struct ggml_tensor * a,
|
6954
6696
|
int n_past,
|
6955
6697
|
int n_dims,
|
6956
6698
|
int mode,
|
6957
6699
|
int n_ctx,
|
6700
|
+
float freq_base,
|
6701
|
+
float freq_scale,
|
6958
6702
|
bool inplace) {
|
6959
6703
|
GGML_ASSERT(n_past >= 0);
|
6960
6704
|
bool is_node = false;
|
@@ -6965,21 +6709,14 @@ struct ggml_tensor * ggml_rope_impl(
|
|
6965
6709
|
|
6966
6710
|
struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
6967
6711
|
|
6968
|
-
|
6969
|
-
|
6970
|
-
|
6971
|
-
|
6972
|
-
((int32_t *) b->data)[0] = n_past;
|
6973
|
-
((int32_t *) b->data)[1] = n_dims;
|
6974
|
-
((int32_t *) b->data)[2] = mode;
|
6975
|
-
((int32_t *) b->data)[3] = n_ctx;
|
6976
|
-
|
6977
|
-
ggml_scratch_load(ctx);
|
6712
|
+
int32_t params[6] = { n_past, n_dims, mode, n_ctx };
|
6713
|
+
memcpy(params + 4, &freq_base, sizeof(float));
|
6714
|
+
memcpy(params + 5, &freq_scale, sizeof(float));
|
6715
|
+
ggml_set_op_params(result, ¶ms, sizeof(params));
|
6978
6716
|
|
6979
6717
|
result->op = GGML_OP_ROPE;
|
6980
6718
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
6981
6719
|
result->src[0] = a;
|
6982
|
-
result->src[1] = b;
|
6983
6720
|
|
6984
6721
|
return result;
|
6985
6722
|
}
|
@@ -6991,7 +6728,7 @@ struct ggml_tensor * ggml_rope(
|
|
6991
6728
|
int n_dims,
|
6992
6729
|
int mode,
|
6993
6730
|
int n_ctx) {
|
6994
|
-
return ggml_rope_impl(ctx, a, n_past, n_dims, mode, n_ctx, false);
|
6731
|
+
return ggml_rope_impl(ctx, a, n_past, n_dims, mode, n_ctx, 10000.0f, 1.0f, false);
|
6995
6732
|
}
|
6996
6733
|
|
6997
6734
|
struct ggml_tensor * ggml_rope_inplace(
|
@@ -7001,7 +6738,19 @@ struct ggml_tensor * ggml_rope_inplace(
|
|
7001
6738
|
int n_dims,
|
7002
6739
|
int mode,
|
7003
6740
|
int n_ctx) {
|
7004
|
-
return ggml_rope_impl(ctx, a, n_past, n_dims, mode, n_ctx, true);
|
6741
|
+
return ggml_rope_impl(ctx, a, n_past, n_dims, mode, n_ctx, 10000.0f, 1.0f, true);
|
6742
|
+
}
|
6743
|
+
|
6744
|
+
struct ggml_tensor * ggml_rope_custom_inplace(
|
6745
|
+
struct ggml_context * ctx,
|
6746
|
+
struct ggml_tensor * a,
|
6747
|
+
int n_past,
|
6748
|
+
int n_dims,
|
6749
|
+
int mode,
|
6750
|
+
int n_ctx,
|
6751
|
+
float freq_base,
|
6752
|
+
float freq_scale) {
|
6753
|
+
return ggml_rope_impl(ctx, a, n_past, n_dims, mode, n_ctx, freq_base, freq_scale, true);
|
7005
6754
|
}
|
7006
6755
|
|
7007
6756
|
// ggml_rope_back
|
@@ -7011,7 +6760,8 @@ struct ggml_tensor * ggml_rope_back(
|
|
7011
6760
|
struct ggml_tensor * a,
|
7012
6761
|
int n_past,
|
7013
6762
|
int n_dims,
|
7014
|
-
int mode
|
6763
|
+
int mode,
|
6764
|
+
int n_ctx) {
|
7015
6765
|
GGML_ASSERT(n_past >= 0);
|
7016
6766
|
GGML_ASSERT((mode & 4) == 0 && "ggml_rope_back() for ChatGLM not implemented yet");
|
7017
6767
|
|
@@ -7023,21 +6773,12 @@ struct ggml_tensor * ggml_rope_back(
|
|
7023
6773
|
|
7024
6774
|
struct ggml_tensor * result = ggml_dup_tensor(ctx, a);
|
7025
6775
|
|
7026
|
-
|
7027
|
-
|
7028
|
-
struct ggml_tensor * b = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, 3);
|
7029
|
-
ggml_set_name(b, "n_past, n_dims, mode");
|
7030
|
-
|
7031
|
-
((int32_t *) b->data)[0] = n_past;
|
7032
|
-
((int32_t *) b->data)[1] = n_dims;
|
7033
|
-
((int32_t *) b->data)[2] = mode;
|
7034
|
-
|
7035
|
-
ggml_scratch_load(ctx);
|
6776
|
+
int32_t params[] = { n_past, n_dims, mode, n_ctx };
|
6777
|
+
ggml_set_op_params(result, ¶ms, sizeof(params));
|
7036
6778
|
|
7037
6779
|
result->op = GGML_OP_ROPE_BACK;
|
7038
6780
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
7039
6781
|
result->src[0] = a;
|
7040
|
-
result->src[1] = b;
|
7041
6782
|
|
7042
6783
|
return result;
|
7043
6784
|
}
|
@@ -7062,21 +6803,13 @@ struct ggml_tensor * ggml_alibi(
|
|
7062
6803
|
//struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
7063
6804
|
struct ggml_tensor * result = ggml_view_tensor(ctx, a);
|
7064
6805
|
|
7065
|
-
|
7066
|
-
|
7067
|
-
|
7068
|
-
|
7069
|
-
((int32_t *) b->data)[0] = n_past;
|
7070
|
-
((int32_t *) b->data)[1] = n_head;
|
7071
|
-
GGML_ASSERT(sizeof(float) == sizeof(int32_t));
|
7072
|
-
(((float *) b->data)[2]) = bias_max;
|
7073
|
-
|
7074
|
-
ggml_scratch_load(ctx);
|
6806
|
+
int32_t op_params[3] = { n_past, n_head };
|
6807
|
+
memcpy(op_params + 2, &bias_max, sizeof(float));
|
6808
|
+
ggml_set_op_params(result, &op_params, sizeof(op_params));
|
7075
6809
|
|
7076
6810
|
result->op = GGML_OP_ALIBI;
|
7077
6811
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
7078
6812
|
result->src[0] = a;
|
7079
|
-
result->src[1] = b;
|
7080
6813
|
|
7081
6814
|
return result;
|
7082
6815
|
}
|
@@ -7098,19 +6831,12 @@ struct ggml_tensor * ggml_clamp(
|
|
7098
6831
|
// TODO: when implement backward, fix this:
|
7099
6832
|
struct ggml_tensor * result = ggml_view_tensor(ctx, a);
|
7100
6833
|
|
7101
|
-
|
7102
|
-
|
7103
|
-
struct ggml_tensor * b = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, 2);
|
7104
|
-
|
7105
|
-
((float *) b->data)[0] = min;
|
7106
|
-
((float *) b->data)[1] = max;
|
7107
|
-
|
7108
|
-
ggml_scratch_load(ctx);
|
6834
|
+
float params[] = { min, max };
|
6835
|
+
ggml_set_op_params(result, ¶ms, sizeof(params));
|
7109
6836
|
|
7110
6837
|
result->op = GGML_OP_CLAMP;
|
7111
6838
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
7112
6839
|
result->src[0] = a;
|
7113
|
-
result->src[1] = b;
|
7114
6840
|
|
7115
6841
|
return result;
|
7116
6842
|
}
|
@@ -7143,18 +6869,13 @@ GGML_API struct ggml_tensor * ggml_conv_1d(
|
|
7143
6869
|
};
|
7144
6870
|
struct ggml_tensor* result = ggml_new_tensor(ctx, GGML_TYPE_F32, 2, ne);
|
7145
6871
|
|
7146
|
-
|
7147
|
-
|
7148
|
-
((int32_t*)c->data)[0] = s0;
|
7149
|
-
((int32_t*)c->data)[1] = p0;
|
7150
|
-
((int32_t*)c->data)[2] = d0;
|
7151
|
-
ggml_scratch_load(ctx);
|
6872
|
+
int32_t params[] = { s0, p0, d0 };
|
6873
|
+
ggml_set_op_params(result, ¶ms, sizeof(params));
|
7152
6874
|
|
7153
6875
|
result->op = GGML_OP_CONV_1D;
|
7154
6876
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
7155
6877
|
result->src[0] = a;
|
7156
6878
|
result->src[1] = b;
|
7157
|
-
result->src[2] = c;
|
7158
6879
|
|
7159
6880
|
return result;
|
7160
6881
|
}
|
@@ -7187,21 +6908,13 @@ struct ggml_tensor* ggml_conv_2d(
|
|
7187
6908
|
};
|
7188
6909
|
struct ggml_tensor* result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
|
7189
6910
|
|
7190
|
-
|
7191
|
-
|
7192
|
-
((int32_t*)c->data)[0] = s0;
|
7193
|
-
((int32_t*)c->data)[1] = s1;
|
7194
|
-
((int32_t*)c->data)[2] = p0;
|
7195
|
-
((int32_t*)c->data)[3] = p1;
|
7196
|
-
((int32_t*)c->data)[4] = d0;
|
7197
|
-
((int32_t*)c->data)[5] = d1;
|
7198
|
-
ggml_scratch_load(ctx);
|
6911
|
+
int32_t params[] = { s0, s1, p0, p1, d0, d1 };
|
6912
|
+
ggml_set_op_params(result, ¶ms, sizeof(params));
|
7199
6913
|
|
7200
6914
|
result->op = GGML_OP_CONV_2D;
|
7201
6915
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
7202
6916
|
result->src[0] = a;
|
7203
6917
|
result->src[1] = b;
|
7204
|
-
result->src[2] = c;
|
7205
6918
|
|
7206
6919
|
return result;
|
7207
6920
|
|
@@ -7225,7 +6938,7 @@ static int64_t ggml_calc_pool_output_size(int64_t ins, int ks, int s, int p) {
|
|
7225
6938
|
return (ins + 2 * p - ks) / s + 1;
|
7226
6939
|
}
|
7227
6940
|
|
7228
|
-
//
|
6941
|
+
// ggml_pool_1d
|
7229
6942
|
|
7230
6943
|
struct ggml_tensor* ggml_pool_1d(
|
7231
6944
|
struct ggml_context * ctx,
|
@@ -7248,18 +6961,12 @@ struct ggml_tensor* ggml_pool_1d(
|
|
7248
6961
|
};
|
7249
6962
|
struct ggml_tensor* result = ggml_new_tensor(ctx, GGML_TYPE_F32, 2, ne);
|
7250
6963
|
|
7251
|
-
|
7252
|
-
|
7253
|
-
((int32_t*)c->data)[0] = op;
|
7254
|
-
((int32_t*)c->data)[1] = k0;
|
7255
|
-
((int32_t*)c->data)[2] = s0;
|
7256
|
-
((int32_t*)c->data)[3] = p0;
|
7257
|
-
ggml_scratch_load(ctx);
|
6964
|
+
int32_t params[] = { op, k0, s0, p0 };
|
6965
|
+
ggml_set_op_params(result, ¶ms, sizeof(params));
|
7258
6966
|
|
7259
6967
|
result->op = GGML_OP_POOL_1D;
|
7260
6968
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
7261
6969
|
result->src[0] = a;
|
7262
|
-
result->src[1] = c;
|
7263
6970
|
|
7264
6971
|
return result;
|
7265
6972
|
}
|
@@ -7291,21 +6998,12 @@ struct ggml_tensor* ggml_pool_2d(
|
|
7291
6998
|
};
|
7292
6999
|
struct ggml_tensor* result = ggml_new_tensor(ctx, GGML_TYPE_F32, 3, ne);
|
7293
7000
|
|
7294
|
-
|
7295
|
-
|
7296
|
-
((int32_t*)c->data)[0] = op;
|
7297
|
-
((int32_t*)c->data)[1] = k0;
|
7298
|
-
((int32_t*)c->data)[2] = k1;
|
7299
|
-
((int32_t*)c->data)[3] = s0;
|
7300
|
-
((int32_t*)c->data)[4] = s1;
|
7301
|
-
((int32_t*)c->data)[5] = p0;
|
7302
|
-
((int32_t*)c->data)[6] = p1;
|
7303
|
-
ggml_scratch_load(ctx);
|
7001
|
+
int32_t params[] = { op, k0, k1, s0, s1, p0, p1 };
|
7002
|
+
ggml_set_op_params(result, ¶ms, sizeof(params));
|
7304
7003
|
|
7305
7004
|
result->op = GGML_OP_POOL_2D;
|
7306
7005
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
7307
7006
|
result->src[0] = a;
|
7308
|
-
result->src[1] = c;
|
7309
7007
|
|
7310
7008
|
return result;
|
7311
7009
|
}
|
@@ -7328,14 +7026,16 @@ struct ggml_tensor * ggml_flash_attn(
|
|
7328
7026
|
}
|
7329
7027
|
|
7330
7028
|
//struct ggml_tensor * result = ggml_dup_tensor(ctx, q);
|
7331
|
-
struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32,
|
7029
|
+
struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, q->n_dims, q->ne);
|
7030
|
+
|
7031
|
+
int32_t t = masked ? 1 : 0;
|
7032
|
+
ggml_set_op_params(result, &t, sizeof(t));
|
7332
7033
|
|
7333
7034
|
result->op = GGML_OP_FLASH_ATTN;
|
7334
7035
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
7335
7036
|
result->src[0] = q;
|
7336
7037
|
result->src[1] = k;
|
7337
7038
|
result->src[2] = v;
|
7338
|
-
result->src[3] = ggml_new_i32(ctx, masked ? 1 : 0);
|
7339
7039
|
|
7340
7040
|
return result;
|
7341
7041
|
}
|
@@ -7359,7 +7059,7 @@ struct ggml_tensor * ggml_flash_ff(
|
|
7359
7059
|
}
|
7360
7060
|
|
7361
7061
|
//struct ggml_tensor * result = ggml_dup_tensor(ctx, a);
|
7362
|
-
struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32,
|
7062
|
+
struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, a->n_dims, a->ne);
|
7363
7063
|
|
7364
7064
|
result->op = GGML_OP_FLASH_FF;
|
7365
7065
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
@@ -7425,13 +7125,15 @@ struct ggml_tensor * ggml_flash_attn_back(
|
|
7425
7125
|
|
7426
7126
|
struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
|
7427
7127
|
|
7128
|
+
int32_t masked_i = masked ? 1 : 0;
|
7129
|
+
ggml_set_op_params(result, &masked_i, sizeof(masked_i));
|
7130
|
+
|
7428
7131
|
result->op = GGML_OP_FLASH_ATTN_BACK;
|
7429
7132
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
7430
7133
|
result->src[0] = q;
|
7431
7134
|
result->src[1] = k;
|
7432
7135
|
result->src[2] = v;
|
7433
7136
|
result->src[3] = d;
|
7434
|
-
result->src[4] = ggml_new_i32(ctx, masked ? 1 : 0);
|
7435
7137
|
|
7436
7138
|
return result;
|
7437
7139
|
}
|
@@ -7464,21 +7166,12 @@ struct ggml_tensor * ggml_win_part(
|
|
7464
7166
|
|
7465
7167
|
struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
|
7466
7168
|
|
7467
|
-
|
7468
|
-
|
7469
|
-
struct ggml_tensor * b = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, 3);
|
7470
|
-
|
7471
|
-
((int32_t *) b->data)[0] = npx;
|
7472
|
-
((int32_t *) b->data)[1] = npy;
|
7473
|
-
((int32_t *) b->data)[2] = w;
|
7474
|
-
|
7475
|
-
ggml_scratch_load(ctx);
|
7169
|
+
int32_t params[] = { npx, npy, w };
|
7170
|
+
ggml_set_op_params(result, ¶ms, sizeof(params));
|
7476
7171
|
|
7477
7172
|
result->op = GGML_OP_WIN_PART;
|
7478
7173
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
7479
7174
|
result->src[0] = a;
|
7480
|
-
result->src[1] = NULL;
|
7481
|
-
result->src[2] = b;
|
7482
7175
|
|
7483
7176
|
return result;
|
7484
7177
|
}
|
@@ -7503,26 +7196,57 @@ struct ggml_tensor * ggml_win_unpart(
|
|
7503
7196
|
const int64_t ne[4] = { a->ne[0], w0, h0, 1, };
|
7504
7197
|
struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 3, ne);
|
7505
7198
|
|
7506
|
-
|
7199
|
+
int32_t params[] = { w };
|
7200
|
+
ggml_set_op_params(result, ¶ms, sizeof(params));
|
7507
7201
|
|
7508
|
-
|
7202
|
+
result->op = GGML_OP_WIN_UNPART;
|
7203
|
+
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
7204
|
+
result->src[0] = a;
|
7509
7205
|
|
7510
|
-
|
7206
|
+
return result;
|
7207
|
+
}
|
7511
7208
|
|
7512
|
-
|
7209
|
+
// gmml_unary
|
7513
7210
|
|
7514
|
-
|
7211
|
+
static struct ggml_tensor * ggml_unary_impl(
|
7212
|
+
struct ggml_context * ctx,
|
7213
|
+
struct ggml_tensor * a,
|
7214
|
+
enum ggml_unary_op op,
|
7215
|
+
bool inplace) {
|
7216
|
+
bool is_node = false;
|
7217
|
+
|
7218
|
+
if (!inplace && (a->grad)) {
|
7219
|
+
is_node = true;
|
7220
|
+
}
|
7221
|
+
|
7222
|
+
struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
7223
|
+
|
7224
|
+
ggml_set_op_params_i32(result, 0, (int32_t) op);
|
7225
|
+
|
7226
|
+
result->op = GGML_OP_UNARY;
|
7515
7227
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
7516
7228
|
result->src[0] = a;
|
7517
|
-
result->src[1] = NULL;
|
7518
|
-
result->src[2] = b;
|
7519
7229
|
|
7520
7230
|
return result;
|
7521
7231
|
}
|
7522
7232
|
|
7233
|
+
struct ggml_tensor * ggml_unary(
|
7234
|
+
struct ggml_context * ctx,
|
7235
|
+
struct ggml_tensor * a,
|
7236
|
+
enum ggml_unary_op op) {
|
7237
|
+
return ggml_unary_impl(ctx, a, op, false);
|
7238
|
+
}
|
7239
|
+
|
7240
|
+
struct ggml_tensor * ggml_unary_inplace(
|
7241
|
+
struct ggml_context * ctx,
|
7242
|
+
struct ggml_tensor * a,
|
7243
|
+
enum ggml_unary_op op) {
|
7244
|
+
return ggml_unary_impl(ctx, a, op, true);
|
7245
|
+
}
|
7246
|
+
|
7523
7247
|
// ggml_map_unary
|
7524
7248
|
|
7525
|
-
struct ggml_tensor * ggml_map_unary_impl_f32(
|
7249
|
+
static struct ggml_tensor * ggml_map_unary_impl_f32(
|
7526
7250
|
struct ggml_context * ctx,
|
7527
7251
|
struct ggml_tensor * a,
|
7528
7252
|
const ggml_unary_op_f32_t fun,
|
@@ -7533,19 +7257,13 @@ struct ggml_tensor * ggml_map_unary_impl_f32(
|
|
7533
7257
|
is_node = true;
|
7534
7258
|
}
|
7535
7259
|
|
7536
|
-
struct ggml_tensor *result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
7537
|
-
|
7538
|
-
ggml_scratch_save(ctx);
|
7539
|
-
|
7540
|
-
struct ggml_tensor * addr_tensor = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, sizeof(void *) / sizeof(int32_t));
|
7541
|
-
*((void (**)(void))addr_tensor->data) = (void (*)(void))fun;
|
7260
|
+
struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
7542
7261
|
|
7543
|
-
|
7262
|
+
ggml_set_op_params(result, (const void *) &fun, sizeof(fun));
|
7544
7263
|
|
7545
7264
|
result->op = GGML_OP_MAP_UNARY;
|
7546
7265
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
7547
7266
|
result->src[0] = a;
|
7548
|
-
result->src[2] = addr_tensor;
|
7549
7267
|
|
7550
7268
|
return result;
|
7551
7269
|
}
|
@@ -7566,7 +7284,7 @@ struct ggml_tensor * ggml_map_unary_inplace_f32(
|
|
7566
7284
|
|
7567
7285
|
// ggml_map_binary
|
7568
7286
|
|
7569
|
-
struct ggml_tensor * ggml_map_binary_impl_f32(
|
7287
|
+
static struct ggml_tensor * ggml_map_binary_impl_f32(
|
7570
7288
|
struct ggml_context * ctx,
|
7571
7289
|
struct ggml_tensor * a,
|
7572
7290
|
struct ggml_tensor * b,
|
@@ -7580,20 +7298,14 @@ struct ggml_tensor * ggml_map_binary_impl_f32(
|
|
7580
7298
|
is_node = true;
|
7581
7299
|
}
|
7582
7300
|
|
7583
|
-
struct ggml_tensor *result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
7584
|
-
|
7585
|
-
ggml_scratch_save(ctx);
|
7586
|
-
|
7587
|
-
struct ggml_tensor * addr_tensor = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, sizeof(void *) / sizeof(int32_t));
|
7588
|
-
*((void (**)(void))addr_tensor->data) = (void (*)(void))fun;
|
7301
|
+
struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
7589
7302
|
|
7590
|
-
|
7303
|
+
ggml_set_op_params(result, (const void *) &fun, sizeof(fun));
|
7591
7304
|
|
7592
7305
|
result->op = GGML_OP_MAP_BINARY;
|
7593
7306
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
7594
7307
|
result->src[0] = a;
|
7595
7308
|
result->src[1] = b;
|
7596
|
-
result->src[2] = addr_tensor;
|
7597
7309
|
|
7598
7310
|
return result;
|
7599
7311
|
}
|
@@ -7616,7 +7328,7 @@ struct ggml_tensor * ggml_map_binary_inplace_f32(
|
|
7616
7328
|
|
7617
7329
|
// ggml_map_custom1
|
7618
7330
|
|
7619
|
-
struct ggml_tensor * ggml_map_custom1_impl_f32(
|
7331
|
+
static struct ggml_tensor * ggml_map_custom1_impl_f32(
|
7620
7332
|
struct ggml_context * ctx,
|
7621
7333
|
struct ggml_tensor * a,
|
7622
7334
|
const ggml_custom1_op_f32_t fun,
|
@@ -7627,19 +7339,13 @@ struct ggml_tensor * ggml_map_custom1_impl_f32(
|
|
7627
7339
|
is_node = true;
|
7628
7340
|
}
|
7629
7341
|
|
7630
|
-
struct ggml_tensor *result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
7631
|
-
|
7632
|
-
ggml_scratch_save(ctx);
|
7633
|
-
|
7634
|
-
struct ggml_tensor * addr_tensor = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, sizeof(void *) / sizeof(int32_t));
|
7635
|
-
*((void (**)(void))addr_tensor->data) = (void (*)(void))fun;
|
7342
|
+
struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
7636
7343
|
|
7637
|
-
|
7344
|
+
ggml_set_op_params(result, (const void *) &fun, sizeof(fun));
|
7638
7345
|
|
7639
7346
|
result->op = GGML_OP_MAP_CUSTOM1;
|
7640
7347
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
7641
7348
|
result->src[0] = a;
|
7642
|
-
result->src[2] = addr_tensor;
|
7643
7349
|
|
7644
7350
|
return result;
|
7645
7351
|
}
|
@@ -7660,7 +7366,7 @@ struct ggml_tensor * ggml_map_custom1_inplace_f32(
|
|
7660
7366
|
|
7661
7367
|
// ggml_map_custom2
|
7662
7368
|
|
7663
|
-
struct ggml_tensor * ggml_map_custom2_impl_f32(
|
7369
|
+
static struct ggml_tensor * ggml_map_custom2_impl_f32(
|
7664
7370
|
struct ggml_context * ctx,
|
7665
7371
|
struct ggml_tensor * a,
|
7666
7372
|
struct ggml_tensor * b,
|
@@ -7672,20 +7378,14 @@ struct ggml_tensor * ggml_map_custom2_impl_f32(
|
|
7672
7378
|
is_node = true;
|
7673
7379
|
}
|
7674
7380
|
|
7675
|
-
struct ggml_tensor *result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
7676
|
-
|
7677
|
-
ggml_scratch_save(ctx);
|
7678
|
-
|
7679
|
-
struct ggml_tensor * addr_tensor = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, sizeof(void *) / sizeof(int32_t));
|
7680
|
-
*((void (**)(void))addr_tensor->data) = (void (*)(void))fun;
|
7381
|
+
struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
7681
7382
|
|
7682
|
-
|
7383
|
+
ggml_set_op_params(result, (const void *) &fun, sizeof(fun));
|
7683
7384
|
|
7684
7385
|
result->op = GGML_OP_MAP_CUSTOM2;
|
7685
7386
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
7686
7387
|
result->src[0] = a;
|
7687
7388
|
result->src[1] = b;
|
7688
|
-
result->src[2] = addr_tensor;
|
7689
7389
|
|
7690
7390
|
return result;
|
7691
7391
|
}
|
@@ -7708,7 +7408,7 @@ struct ggml_tensor * ggml_map_custom2_inplace_f32(
|
|
7708
7408
|
|
7709
7409
|
// ggml_map_custom3
|
7710
7410
|
|
7711
|
-
struct ggml_tensor * ggml_map_custom3_impl_f32(
|
7411
|
+
static struct ggml_tensor * ggml_map_custom3_impl_f32(
|
7712
7412
|
struct ggml_context * ctx,
|
7713
7413
|
struct ggml_tensor * a,
|
7714
7414
|
struct ggml_tensor * b,
|
@@ -7721,21 +7421,15 @@ struct ggml_tensor * ggml_map_custom3_impl_f32(
|
|
7721
7421
|
is_node = true;
|
7722
7422
|
}
|
7723
7423
|
|
7724
|
-
struct ggml_tensor *result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
7725
|
-
|
7726
|
-
ggml_scratch_save(ctx);
|
7727
|
-
|
7728
|
-
struct ggml_tensor * addr_tensor = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, sizeof(void *) / sizeof(int32_t));
|
7729
|
-
*((void (**)(void))addr_tensor->data) = (void (*)(void))fun;
|
7424
|
+
struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
7730
7425
|
|
7731
|
-
|
7426
|
+
ggml_set_op_params(result, (const void *) &fun, sizeof(fun));
|
7732
7427
|
|
7733
7428
|
result->op = GGML_OP_MAP_CUSTOM3;
|
7734
7429
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
7735
7430
|
result->src[0] = a;
|
7736
7431
|
result->src[1] = b;
|
7737
|
-
result->src[2] =
|
7738
|
-
result->src[3] = c;
|
7432
|
+
result->src[2] = c;
|
7739
7433
|
|
7740
7434
|
return result;
|
7741
7435
|
}
|
@@ -8963,21 +8657,17 @@ static void ggml_compute_forward_acc_f32(
|
|
8963
8657
|
const struct ggml_compute_params * params,
|
8964
8658
|
const struct ggml_tensor * src0,
|
8965
8659
|
const struct ggml_tensor * src1,
|
8966
|
-
const struct ggml_tensor * opt0,
|
8967
8660
|
struct ggml_tensor * dst) {
|
8968
8661
|
GGML_ASSERT(ggml_are_same_shape(src0, dst));
|
8969
8662
|
GGML_ASSERT(ggml_is_contiguous(dst) && ggml_is_contiguous(src0));
|
8970
8663
|
|
8971
|
-
GGML_ASSERT(opt0->type == GGML_TYPE_I32);
|
8972
|
-
GGML_ASSERT(ggml_nelements(opt0) == 5);
|
8973
|
-
|
8974
8664
|
// view src0 and dst with these strides and data offset inbytes during acc
|
8975
8665
|
// nb0 is implicitely element_size because src0 and dst are contiguous
|
8976
|
-
size_t nb1 = ((int32_t *)
|
8977
|
-
size_t nb2 = ((int32_t *)
|
8978
|
-
size_t nb3 = ((int32_t *)
|
8979
|
-
size_t offset = ((int32_t *)
|
8980
|
-
bool inplace = (bool) ((int32_t *)
|
8666
|
+
size_t nb1 = ((int32_t *) dst->op_params)[0];
|
8667
|
+
size_t nb2 = ((int32_t *) dst->op_params)[1];
|
8668
|
+
size_t nb3 = ((int32_t *) dst->op_params)[2];
|
8669
|
+
size_t offset = ((int32_t *) dst->op_params)[3];
|
8670
|
+
bool inplace = (bool) ((int32_t *) dst->op_params)[4];
|
8981
8671
|
|
8982
8672
|
if (!inplace && (params->type == GGML_TASK_INIT)) {
|
8983
8673
|
// memcpy needs to be synchronized across threads to avoid race conditions.
|
@@ -9046,13 +8736,12 @@ static void ggml_compute_forward_acc(
|
|
9046
8736
|
const struct ggml_compute_params * params,
|
9047
8737
|
const struct ggml_tensor * src0,
|
9048
8738
|
const struct ggml_tensor * src1,
|
9049
|
-
const struct ggml_tensor * opt0,
|
9050
8739
|
struct ggml_tensor * dst) {
|
9051
8740
|
|
9052
8741
|
switch (src0->type) {
|
9053
8742
|
case GGML_TYPE_F32:
|
9054
8743
|
{
|
9055
|
-
ggml_compute_forward_acc_f32(params, src0, src1,
|
8744
|
+
ggml_compute_forward_acc_f32(params, src0, src1, dst);
|
9056
8745
|
} break;
|
9057
8746
|
case GGML_TYPE_F16:
|
9058
8747
|
case GGML_TYPE_Q4_0:
|
@@ -9484,7 +9173,7 @@ static void ggml_compute_forward_sum_f32(
|
|
9484
9173
|
for (int64_t i03 = 0; i03 < ne03; i03++) {
|
9485
9174
|
for (int64_t i02 = 0; i02 < ne02; i02++) {
|
9486
9175
|
for (int64_t i01 = 0; i01 < ne01; i01++) {
|
9487
|
-
|
9176
|
+
ggml_vec_sum_f32_ggf(ne00,
|
9488
9177
|
&row_sum,
|
9489
9178
|
(float *) ((char *) src0->data + i01*nb01 + i02*nb02 + i03*nb03));
|
9490
9179
|
sum += row_sum;
|
@@ -9494,6 +9183,38 @@ static void ggml_compute_forward_sum_f32(
|
|
9494
9183
|
((float *) dst->data)[0] = sum;
|
9495
9184
|
}
|
9496
9185
|
|
9186
|
+
static void ggml_compute_forward_sum_f16(
|
9187
|
+
const struct ggml_compute_params * params,
|
9188
|
+
const struct ggml_tensor * src0,
|
9189
|
+
struct ggml_tensor * dst) {
|
9190
|
+
assert(params->ith == 0);
|
9191
|
+
assert(ggml_is_scalar(dst));
|
9192
|
+
|
9193
|
+
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
9194
|
+
return;
|
9195
|
+
}
|
9196
|
+
|
9197
|
+
assert(src0->nb[0] == sizeof(ggml_fp16_t));
|
9198
|
+
|
9199
|
+
GGML_TENSOR_LOCALS(int64_t, ne0, src0, ne);
|
9200
|
+
GGML_TENSOR_LOCALS(size_t, nb0, src0, nb);
|
9201
|
+
|
9202
|
+
float sum = 0;
|
9203
|
+
float row_sum = 0;
|
9204
|
+
|
9205
|
+
for (int64_t i03 = 0; i03 < ne03; i03++) {
|
9206
|
+
for (int64_t i02 = 0; i02 < ne02; i02++) {
|
9207
|
+
for (int64_t i01 = 0; i01 < ne01; i01++) {
|
9208
|
+
ggml_vec_sum_f16_ggf(ne00,
|
9209
|
+
&row_sum,
|
9210
|
+
(ggml_fp16_t *) ((char *) src0->data + i01 * nb01 + i02 * nb02 + i03 * nb03));
|
9211
|
+
sum += row_sum;
|
9212
|
+
}
|
9213
|
+
}
|
9214
|
+
}
|
9215
|
+
((ggml_fp16_t *) dst->data)[0] = GGML_FP32_TO_FP16(sum);
|
9216
|
+
}
|
9217
|
+
|
9497
9218
|
static void ggml_compute_forward_sum(
|
9498
9219
|
const struct ggml_compute_params * params,
|
9499
9220
|
const struct ggml_tensor * src0,
|
@@ -9503,6 +9224,10 @@ static void ggml_compute_forward_sum(
|
|
9503
9224
|
{
|
9504
9225
|
ggml_compute_forward_sum_f32(params, src0, dst);
|
9505
9226
|
} break;
|
9227
|
+
case GGML_TYPE_F16:
|
9228
|
+
{
|
9229
|
+
ggml_compute_forward_sum_f16(params, src0, dst);
|
9230
|
+
} break;
|
9506
9231
|
default:
|
9507
9232
|
{
|
9508
9233
|
GGML_ASSERT(false);
|
@@ -10098,8 +9823,8 @@ static void ggml_compute_forward_gelu_f32(
|
|
10098
9823
|
const struct ggml_compute_params * params,
|
10099
9824
|
const struct ggml_tensor * src0,
|
10100
9825
|
struct ggml_tensor * dst) {
|
10101
|
-
GGML_ASSERT(
|
10102
|
-
GGML_ASSERT(
|
9826
|
+
GGML_ASSERT(ggml_is_contiguous_except_dim_1(src0));
|
9827
|
+
GGML_ASSERT(ggml_is_contiguous_except_dim_1(dst));
|
10103
9828
|
GGML_ASSERT(ggml_are_same_shape(src0, dst));
|
10104
9829
|
|
10105
9830
|
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
@@ -10157,8 +9882,8 @@ static void ggml_compute_forward_gelu_quick_f32(
|
|
10157
9882
|
const struct ggml_compute_params * params,
|
10158
9883
|
const struct ggml_tensor * src0,
|
10159
9884
|
struct ggml_tensor * dst) {
|
10160
|
-
GGML_ASSERT(
|
10161
|
-
GGML_ASSERT(
|
9885
|
+
GGML_ASSERT(ggml_is_contiguous_except_dim_1(src0));
|
9886
|
+
GGML_ASSERT(ggml_is_contiguous_except_dim_1(dst));
|
10162
9887
|
GGML_ASSERT(ggml_are_same_shape(src0, dst));
|
10163
9888
|
|
10164
9889
|
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
@@ -10216,8 +9941,8 @@ static void ggml_compute_forward_silu_f32(
|
|
10216
9941
|
const struct ggml_compute_params * params,
|
10217
9942
|
const struct ggml_tensor * src0,
|
10218
9943
|
struct ggml_tensor * dst) {
|
10219
|
-
GGML_ASSERT(
|
10220
|
-
GGML_ASSERT(
|
9944
|
+
GGML_ASSERT(ggml_is_contiguous_except_dim_1(src0));
|
9945
|
+
GGML_ASSERT(ggml_is_contiguous_except_dim_1(dst));
|
10221
9946
|
GGML_ASSERT(ggml_are_same_shape(src0, dst));
|
10222
9947
|
|
10223
9948
|
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
@@ -10269,7 +9994,6 @@ static void ggml_compute_forward_silu(
|
|
10269
9994
|
}
|
10270
9995
|
}
|
10271
9996
|
|
10272
|
-
|
10273
9997
|
// ggml_compute_forward_silu_back
|
10274
9998
|
|
10275
9999
|
static void ggml_compute_forward_silu_back_f32(
|
@@ -10277,9 +10001,9 @@ static void ggml_compute_forward_silu_back_f32(
|
|
10277
10001
|
const struct ggml_tensor * src0,
|
10278
10002
|
const struct ggml_tensor * grad,
|
10279
10003
|
struct ggml_tensor * dst) {
|
10280
|
-
GGML_ASSERT(
|
10281
|
-
GGML_ASSERT(
|
10282
|
-
GGML_ASSERT(
|
10004
|
+
GGML_ASSERT(ggml_is_contiguous_except_dim_1(grad));
|
10005
|
+
GGML_ASSERT(ggml_is_contiguous_except_dim_1(src0));
|
10006
|
+
GGML_ASSERT(ggml_is_contiguous_except_dim_1(dst));
|
10283
10007
|
GGML_ASSERT(ggml_are_same_shape(src0, dst));
|
10284
10008
|
GGML_ASSERT(ggml_are_same_shape(src0, grad));
|
10285
10009
|
|
@@ -10419,7 +10143,8 @@ static void ggml_compute_forward_rms_norm_f32(
|
|
10419
10143
|
|
10420
10144
|
GGML_TENSOR_UNARY_OP_LOCALS;
|
10421
10145
|
|
10422
|
-
|
10146
|
+
float eps;
|
10147
|
+
memcpy(&eps, dst->op_params, sizeof(float));
|
10423
10148
|
|
10424
10149
|
// TODO: optimize
|
10425
10150
|
for (int64_t i03 = 0; i03 < ne03; i03++) {
|
@@ -10684,6 +10409,8 @@ static void ggml_compute_forward_mul_mat(
|
|
10684
10409
|
|
10685
10410
|
const enum ggml_type type = src0->type;
|
10686
10411
|
|
10412
|
+
const bool src1_cont = ggml_is_contiguous(src1);
|
10413
|
+
|
10687
10414
|
ggml_vec_dot_t const vec_dot = type_traits[type].vec_dot;
|
10688
10415
|
enum ggml_type const vec_dot_type = type_traits[type].vec_dot_type;
|
10689
10416
|
ggml_from_float_t const from_float_to_vec_dot = type_traits[vec_dot_type].from_float;
|
@@ -10747,7 +10474,7 @@ static void ggml_compute_forward_mul_mat(
|
|
10747
10474
|
float * d = (float *) ((char *) dst->data + i02*nb2 + i03*nb3);
|
10748
10475
|
|
10749
10476
|
if (type != GGML_TYPE_F32) {
|
10750
|
-
|
10477
|
+
float * const wdata = params->wdata;
|
10751
10478
|
ggml_to_float_t const to_float = type_traits[type].to_float;
|
10752
10479
|
|
10753
10480
|
size_t id = 0;
|
@@ -10805,7 +10532,7 @@ static void ggml_compute_forward_mul_mat(
|
|
10805
10532
|
// src1 rows
|
10806
10533
|
const int64_t nr1 = ne11*ne12*ne13;
|
10807
10534
|
|
10808
|
-
void * wdata
|
10535
|
+
const void * wdata = (src1->type == vec_dot_type) ? src1->data : params->wdata;
|
10809
10536
|
const size_t row_size = ne10*GGML_TYPE_SIZE[vec_dot_type]/GGML_BLCK_SIZE[vec_dot_type];
|
10810
10537
|
|
10811
10538
|
for (int64_t ir1 = 0; ir1 < nr1; ++ir1) {
|
@@ -10828,7 +10555,15 @@ static void ggml_compute_forward_mul_mat(
|
|
10828
10555
|
const int64_t i3 = i13;
|
10829
10556
|
|
10830
10557
|
const char * src0_row = (const char *) src0->data + ( 0 + i02*nb02 + i03*nb03 );
|
10831
|
-
|
10558
|
+
|
10559
|
+
// desc: when src1 is not a contiguous memory block we have to calculate the offset using the strides
|
10560
|
+
// if it is, then we have either copied the data to params->wdata and made it contiguous or we are using
|
10561
|
+
// the original src1 data pointer, so we should index using the indices directly
|
10562
|
+
// TODO: this is a bit of a hack, we should probably have a better way to handle this
|
10563
|
+
const char * src1_col = (const char *) wdata +
|
10564
|
+
(src1_cont || src1->type != vec_dot_type
|
10565
|
+
? (i11 + i12*ne11 + i13*ne12*ne11)*row_size
|
10566
|
+
: (i11*nb11 + i12*nb12 + i13*nb13));
|
10832
10567
|
|
10833
10568
|
float * dst_col = (float *) ((char *) dst->data + (i1*nb1 + i2*nb2 + i3*nb3));
|
10834
10569
|
|
@@ -11062,21 +10797,17 @@ static void ggml_compute_forward_set_f32(
|
|
11062
10797
|
const struct ggml_compute_params * params,
|
11063
10798
|
const struct ggml_tensor * src0,
|
11064
10799
|
const struct ggml_tensor * src1,
|
11065
|
-
const struct ggml_tensor * opt0,
|
11066
10800
|
struct ggml_tensor * dst) {
|
11067
10801
|
GGML_ASSERT(ggml_are_same_shape(src0, dst));
|
11068
10802
|
GGML_ASSERT(ggml_is_contiguous(dst) && ggml_is_contiguous(src0));
|
11069
10803
|
|
11070
|
-
GGML_ASSERT(opt0->type == GGML_TYPE_I32);
|
11071
|
-
GGML_ASSERT(ggml_nelements(opt0) == 5);
|
11072
|
-
|
11073
10804
|
// view src0 and dst with these strides and data offset inbytes during set
|
11074
10805
|
// nb0 is implicitely element_size because src0 and dst are contiguous
|
11075
|
-
size_t nb1 = ((int32_t *)
|
11076
|
-
size_t nb2 = ((int32_t *)
|
11077
|
-
size_t nb3 = ((int32_t *)
|
11078
|
-
size_t offset = ((int32_t *)
|
11079
|
-
bool inplace = (bool) ((int32_t *)
|
10806
|
+
size_t nb1 = ((int32_t *) dst->op_params)[0];
|
10807
|
+
size_t nb2 = ((int32_t *) dst->op_params)[1];
|
10808
|
+
size_t nb3 = ((int32_t *) dst->op_params)[2];
|
10809
|
+
size_t offset = ((int32_t *) dst->op_params)[3];
|
10810
|
+
bool inplace = (bool) ((int32_t *) dst->op_params)[4];
|
11080
10811
|
|
11081
10812
|
if (!inplace && (params->type == GGML_TASK_INIT)) {
|
11082
10813
|
// memcpy needs to be synchronized across threads to avoid race conditions.
|
@@ -11136,13 +10867,12 @@ static void ggml_compute_forward_set(
|
|
11136
10867
|
const struct ggml_compute_params * params,
|
11137
10868
|
const struct ggml_tensor * src0,
|
11138
10869
|
const struct ggml_tensor * src1,
|
11139
|
-
const struct ggml_tensor * opt0,
|
11140
10870
|
struct ggml_tensor * dst) {
|
11141
10871
|
|
11142
10872
|
switch (src0->type) {
|
11143
10873
|
case GGML_TYPE_F32:
|
11144
10874
|
{
|
11145
|
-
ggml_compute_forward_set_f32(params, src0, src1,
|
10875
|
+
ggml_compute_forward_set_f32(params, src0, src1, dst);
|
11146
10876
|
} break;
|
11147
10877
|
case GGML_TYPE_F16:
|
11148
10878
|
case GGML_TYPE_Q4_0:
|
@@ -11538,17 +11268,14 @@ static void ggml_compute_forward_diag(
|
|
11538
11268
|
static void ggml_compute_forward_diag_mask_f32(
|
11539
11269
|
const struct ggml_compute_params * params,
|
11540
11270
|
const struct ggml_tensor * src0,
|
11541
|
-
const struct ggml_tensor * src1,
|
11542
11271
|
struct ggml_tensor * dst,
|
11543
11272
|
const float value) {
|
11544
|
-
GGML_ASSERT(src1->type == GGML_TYPE_I32);
|
11545
|
-
GGML_ASSERT(ggml_nelements(src1) == 2);
|
11546
11273
|
|
11547
11274
|
const int ith = params->ith;
|
11548
11275
|
const int nth = params->nth;
|
11549
11276
|
|
11550
|
-
const int n_past = ((int32_t *)
|
11551
|
-
const bool inplace = (bool)((int32_t *)
|
11277
|
+
const int n_past = ((int32_t *) dst->op_params)[0];
|
11278
|
+
const bool inplace = (bool)((int32_t *) dst->op_params)[1];
|
11552
11279
|
|
11553
11280
|
GGML_ASSERT(n_past >= 0);
|
11554
11281
|
|
@@ -11591,12 +11318,11 @@ static void ggml_compute_forward_diag_mask_f32(
|
|
11591
11318
|
static void ggml_compute_forward_diag_mask_inf(
|
11592
11319
|
const struct ggml_compute_params * params,
|
11593
11320
|
const struct ggml_tensor * src0,
|
11594
|
-
const struct ggml_tensor * src1,
|
11595
11321
|
struct ggml_tensor * dst) {
|
11596
11322
|
switch (src0->type) {
|
11597
11323
|
case GGML_TYPE_F32:
|
11598
11324
|
{
|
11599
|
-
ggml_compute_forward_diag_mask_f32(params, src0,
|
11325
|
+
ggml_compute_forward_diag_mask_f32(params, src0, dst, -INFINITY);
|
11600
11326
|
} break;
|
11601
11327
|
default:
|
11602
11328
|
{
|
@@ -11608,12 +11334,11 @@ static void ggml_compute_forward_diag_mask_inf(
|
|
11608
11334
|
static void ggml_compute_forward_diag_mask_zero(
|
11609
11335
|
const struct ggml_compute_params * params,
|
11610
11336
|
const struct ggml_tensor * src0,
|
11611
|
-
const struct ggml_tensor * src1,
|
11612
11337
|
struct ggml_tensor * dst) {
|
11613
11338
|
switch (src0->type) {
|
11614
11339
|
case GGML_TYPE_F32:
|
11615
11340
|
{
|
11616
|
-
ggml_compute_forward_diag_mask_f32(params, src0,
|
11341
|
+
ggml_compute_forward_diag_mask_f32(params, src0, dst, 0);
|
11617
11342
|
} break;
|
11618
11343
|
default:
|
11619
11344
|
{
|
@@ -11811,20 +11536,17 @@ static void ggml_compute_forward_soft_max_back(
|
|
11811
11536
|
static void ggml_compute_forward_alibi_f32(
|
11812
11537
|
const struct ggml_compute_params * params,
|
11813
11538
|
const struct ggml_tensor * src0,
|
11814
|
-
const struct ggml_tensor * src1,
|
11815
11539
|
struct ggml_tensor * dst) {
|
11816
11540
|
assert(params->ith == 0);
|
11817
11541
|
|
11818
|
-
GGML_ASSERT(src1->type == GGML_TYPE_I32);
|
11819
|
-
GGML_ASSERT(ggml_nelements(src1) == 3);
|
11820
|
-
|
11821
11542
|
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
11822
11543
|
return;
|
11823
11544
|
}
|
11824
11545
|
|
11825
|
-
const int
|
11826
|
-
const int
|
11827
|
-
|
11546
|
+
const int n_past = ((int32_t *) dst->op_params)[0];
|
11547
|
+
const int n_head = ((int32_t *) dst->op_params)[1];
|
11548
|
+
float max_bias;
|
11549
|
+
memcpy(&max_bias, (int32_t *) dst->op_params + 2, sizeof(float));
|
11828
11550
|
|
11829
11551
|
assert(n_past >= 0);
|
11830
11552
|
|
@@ -11877,20 +11599,17 @@ static void ggml_compute_forward_alibi_f32(
|
|
11877
11599
|
static void ggml_compute_forward_alibi_f16(
|
11878
11600
|
const struct ggml_compute_params * params,
|
11879
11601
|
const struct ggml_tensor * src0,
|
11880
|
-
const struct ggml_tensor * src1,
|
11881
11602
|
struct ggml_tensor * dst) {
|
11882
11603
|
assert(params->ith == 0);
|
11883
11604
|
|
11884
|
-
GGML_ASSERT(src1->type == GGML_TYPE_I32);
|
11885
|
-
GGML_ASSERT(ggml_nelements(src1) == 3);
|
11886
|
-
|
11887
11605
|
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
11888
11606
|
return;
|
11889
11607
|
}
|
11890
11608
|
|
11891
|
-
const int
|
11892
|
-
const int
|
11893
|
-
|
11609
|
+
const int n_past = ((int32_t *) dst->op_params)[0];
|
11610
|
+
const int n_head = ((int32_t *) dst->op_params)[1];
|
11611
|
+
float max_bias;
|
11612
|
+
memcpy(&max_bias, (int32_t *) dst->op_params + 2, sizeof(float));
|
11894
11613
|
|
11895
11614
|
assert(n_past >= 0);
|
11896
11615
|
|
@@ -11943,16 +11662,15 @@ static void ggml_compute_forward_alibi_f16(
|
|
11943
11662
|
static void ggml_compute_forward_alibi(
|
11944
11663
|
const struct ggml_compute_params * params,
|
11945
11664
|
const struct ggml_tensor * src0,
|
11946
|
-
const struct ggml_tensor * src1,
|
11947
11665
|
struct ggml_tensor * dst) {
|
11948
11666
|
switch (src0->type) {
|
11949
11667
|
case GGML_TYPE_F16:
|
11950
11668
|
{
|
11951
|
-
ggml_compute_forward_alibi_f16(params, src0,
|
11669
|
+
ggml_compute_forward_alibi_f16(params, src0, dst);
|
11952
11670
|
} break;
|
11953
11671
|
case GGML_TYPE_F32:
|
11954
11672
|
{
|
11955
|
-
ggml_compute_forward_alibi_f32(params, src0,
|
11673
|
+
ggml_compute_forward_alibi_f32(params, src0, dst);
|
11956
11674
|
} break;
|
11957
11675
|
case GGML_TYPE_Q4_0:
|
11958
11676
|
case GGML_TYPE_Q4_1:
|
@@ -11982,19 +11700,17 @@ static void ggml_compute_forward_alibi(
|
|
11982
11700
|
static void ggml_compute_forward_clamp_f32(
|
11983
11701
|
const struct ggml_compute_params * params,
|
11984
11702
|
const struct ggml_tensor * src0,
|
11985
|
-
const struct ggml_tensor * src1,
|
11986
11703
|
struct ggml_tensor * dst) {
|
11987
11704
|
assert(params->ith == 0);
|
11988
11705
|
|
11989
|
-
GGML_ASSERT(src1->type == GGML_TYPE_F32);
|
11990
|
-
GGML_ASSERT(ggml_nelements(src1) == 2);
|
11991
|
-
|
11992
11706
|
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
11993
11707
|
return;
|
11994
11708
|
}
|
11995
11709
|
|
11996
|
-
|
11997
|
-
|
11710
|
+
float min;
|
11711
|
+
float max;
|
11712
|
+
memcpy(&min, (float *) dst->op_params + 0, sizeof(float));
|
11713
|
+
memcpy(&max, (float *) dst->op_params + 1, sizeof(float));
|
11998
11714
|
|
11999
11715
|
const int ith = params->ith;
|
12000
11716
|
const int nth = params->nth;
|
@@ -12024,12 +11740,11 @@ static void ggml_compute_forward_clamp_f32(
|
|
12024
11740
|
static void ggml_compute_forward_clamp(
|
12025
11741
|
const struct ggml_compute_params * params,
|
12026
11742
|
const struct ggml_tensor * src0,
|
12027
|
-
const struct ggml_tensor * src1,
|
12028
11743
|
struct ggml_tensor * dst) {
|
12029
11744
|
switch (src0->type) {
|
12030
11745
|
case GGML_TYPE_F32:
|
12031
11746
|
{
|
12032
|
-
ggml_compute_forward_clamp_f32(params, src0,
|
11747
|
+
ggml_compute_forward_clamp_f32(params, src0, dst);
|
12033
11748
|
} break;
|
12034
11749
|
case GGML_TYPE_F16:
|
12035
11750
|
case GGML_TYPE_Q4_0:
|
@@ -12059,19 +11774,21 @@ static void ggml_compute_forward_clamp(
|
|
12059
11774
|
static void ggml_compute_forward_rope_f32(
|
12060
11775
|
const struct ggml_compute_params * params,
|
12061
11776
|
const struct ggml_tensor * src0,
|
12062
|
-
const struct ggml_tensor * src1,
|
12063
11777
|
struct ggml_tensor * dst) {
|
12064
|
-
GGML_ASSERT(src1->type == GGML_TYPE_I32);
|
12065
|
-
GGML_ASSERT(ggml_nelements(src1) == 4);
|
12066
11778
|
|
12067
11779
|
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
12068
11780
|
return;
|
12069
11781
|
}
|
12070
11782
|
|
12071
|
-
|
12072
|
-
|
12073
|
-
|
12074
|
-
const int
|
11783
|
+
float freq_base;
|
11784
|
+
float freq_scale;
|
11785
|
+
|
11786
|
+
const int n_past = ((int32_t *) dst->op_params)[0];
|
11787
|
+
const int n_dims = ((int32_t *) dst->op_params)[1];
|
11788
|
+
const int mode = ((int32_t *) dst->op_params)[2];
|
11789
|
+
const int n_ctx = ((int32_t *) dst->op_params)[3];
|
11790
|
+
memcpy(&freq_base, (int32_t *) dst->op_params + 4, sizeof(float));
|
11791
|
+
memcpy(&freq_scale, (int32_t *) dst->op_params + 5, sizeof(float));
|
12075
11792
|
|
12076
11793
|
assert(n_past >= 0);
|
12077
11794
|
|
@@ -12100,7 +11817,7 @@ static void ggml_compute_forward_rope_f32(
|
|
12100
11817
|
// row index used to determine which thread to use
|
12101
11818
|
int ir = 0;
|
12102
11819
|
|
12103
|
-
const float theta_scale = powf(
|
11820
|
+
const float theta_scale = powf(freq_base, -2.0f/n_dims);
|
12104
11821
|
|
12105
11822
|
const bool is_neox = mode & 2;
|
12106
11823
|
const bool is_glm = mode & 4;
|
@@ -12112,7 +11829,7 @@ static void ggml_compute_forward_rope_f32(
|
|
12112
11829
|
if (ir++ < ir0) continue;
|
12113
11830
|
if (ir > ir1) break;
|
12114
11831
|
|
12115
|
-
float theta = (float)p;
|
11832
|
+
float theta = freq_scale * (float)p;
|
12116
11833
|
|
12117
11834
|
if (is_glm) {
|
12118
11835
|
theta = MIN(p, n_ctx - 2);
|
@@ -12186,19 +11903,21 @@ static void ggml_compute_forward_rope_f32(
|
|
12186
11903
|
static void ggml_compute_forward_rope_f16(
|
12187
11904
|
const struct ggml_compute_params * params,
|
12188
11905
|
const struct ggml_tensor * src0,
|
12189
|
-
const struct ggml_tensor * src1,
|
12190
11906
|
struct ggml_tensor * dst) {
|
12191
|
-
GGML_ASSERT(src1->type == GGML_TYPE_I32);
|
12192
|
-
GGML_ASSERT(ggml_nelements(src1) == 4);
|
12193
11907
|
|
12194
11908
|
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
12195
11909
|
return;
|
12196
11910
|
}
|
12197
11911
|
|
12198
|
-
|
12199
|
-
|
12200
|
-
|
12201
|
-
const int
|
11912
|
+
float freq_base;
|
11913
|
+
float freq_scale;
|
11914
|
+
|
11915
|
+
const int n_past = ((int32_t *) dst->op_params)[0];
|
11916
|
+
const int n_dims = ((int32_t *) dst->op_params)[1];
|
11917
|
+
const int mode = ((int32_t *) dst->op_params)[2];
|
11918
|
+
const int n_ctx = ((int32_t *) dst->op_params)[3];
|
11919
|
+
memcpy(&freq_base, (int32_t *) dst->op_params + 4, sizeof(float));
|
11920
|
+
memcpy(&freq_scale, (int32_t *) dst->op_params + 5, sizeof(float));
|
12202
11921
|
|
12203
11922
|
assert(n_past >= 0);
|
12204
11923
|
|
@@ -12227,7 +11946,7 @@ static void ggml_compute_forward_rope_f16(
|
|
12227
11946
|
// row index used to determine which thread to use
|
12228
11947
|
int ir = 0;
|
12229
11948
|
|
12230
|
-
const float theta_scale = powf(
|
11949
|
+
const float theta_scale = powf(freq_base, -2.0f/n_dims);
|
12231
11950
|
|
12232
11951
|
const bool is_neox = mode & 2;
|
12233
11952
|
const bool is_glm = mode & 4;
|
@@ -12239,7 +11958,7 @@ static void ggml_compute_forward_rope_f16(
|
|
12239
11958
|
if (ir++ < ir0) continue;
|
12240
11959
|
if (ir > ir1) break;
|
12241
11960
|
|
12242
|
-
float theta = (float)p;
|
11961
|
+
float theta = freq_scale * (float)p;
|
12243
11962
|
|
12244
11963
|
if (is_glm) {
|
12245
11964
|
theta = MIN(p, n_ctx - 2);
|
@@ -12300,7 +12019,7 @@ static void ggml_compute_forward_rope_f16(
|
|
12300
12019
|
const float x0 = GGML_FP16_TO_FP32(src[0]);
|
12301
12020
|
const float x1 = GGML_FP16_TO_FP32(src[n_dims/2]);
|
12302
12021
|
|
12303
|
-
dst_data[0]
|
12022
|
+
dst_data[0] = GGML_FP32_TO_FP16(x0*cos_theta - x1*sin_theta);
|
12304
12023
|
dst_data[n_dims/2] = GGML_FP32_TO_FP16(x0*sin_theta + x1*cos_theta);
|
12305
12024
|
}
|
12306
12025
|
}
|
@@ -12313,16 +12032,15 @@ static void ggml_compute_forward_rope_f16(
|
|
12313
12032
|
static void ggml_compute_forward_rope(
|
12314
12033
|
const struct ggml_compute_params * params,
|
12315
12034
|
const struct ggml_tensor * src0,
|
12316
|
-
const struct ggml_tensor * src1,
|
12317
12035
|
struct ggml_tensor * dst) {
|
12318
12036
|
switch (src0->type) {
|
12319
12037
|
case GGML_TYPE_F16:
|
12320
12038
|
{
|
12321
|
-
ggml_compute_forward_rope_f16(params, src0,
|
12039
|
+
ggml_compute_forward_rope_f16(params, src0, dst);
|
12322
12040
|
} break;
|
12323
12041
|
case GGML_TYPE_F32:
|
12324
12042
|
{
|
12325
|
-
ggml_compute_forward_rope_f32(params, src0,
|
12043
|
+
ggml_compute_forward_rope_f32(params, src0, dst);
|
12326
12044
|
} break;
|
12327
12045
|
default:
|
12328
12046
|
{
|
@@ -12336,10 +12054,7 @@ static void ggml_compute_forward_rope(
|
|
12336
12054
|
static void ggml_compute_forward_rope_back_f32(
|
12337
12055
|
const struct ggml_compute_params * params,
|
12338
12056
|
const struct ggml_tensor * src0,
|
12339
|
-
const struct ggml_tensor * src1,
|
12340
12057
|
struct ggml_tensor * dst) {
|
12341
|
-
assert(src1->type == GGML_TYPE_I32);
|
12342
|
-
assert(ggml_nelements(src1) == 3);
|
12343
12058
|
|
12344
12059
|
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
12345
12060
|
return;
|
@@ -12349,9 +12064,9 @@ static void ggml_compute_forward_rope_back_f32(
|
|
12349
12064
|
// dx = rope_back(dy, src1)
|
12350
12065
|
// src0 is dy, src1 contains options
|
12351
12066
|
|
12352
|
-
const int n_past = ((int32_t *)
|
12353
|
-
const int n_dims = ((int32_t *)
|
12354
|
-
const int mode = ((int32_t *)
|
12067
|
+
const int n_past = ((int32_t *) dst->op_params)[0];
|
12068
|
+
const int n_dims = ((int32_t *) dst->op_params)[1];
|
12069
|
+
const int mode = ((int32_t *) dst->op_params)[2];
|
12355
12070
|
|
12356
12071
|
assert(n_past >= 0);
|
12357
12072
|
|
@@ -12435,10 +12150,7 @@ static void ggml_compute_forward_rope_back_f32(
|
|
12435
12150
|
static void ggml_compute_forward_rope_back_f16(
|
12436
12151
|
const struct ggml_compute_params * params,
|
12437
12152
|
const struct ggml_tensor * src0,
|
12438
|
-
const struct ggml_tensor * src1,
|
12439
12153
|
struct ggml_tensor * dst) {
|
12440
|
-
assert(src1->type == GGML_TYPE_I32);
|
12441
|
-
assert(ggml_nelements(src1) == 3);
|
12442
12154
|
|
12443
12155
|
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
12444
12156
|
return;
|
@@ -12448,9 +12160,9 @@ static void ggml_compute_forward_rope_back_f16(
|
|
12448
12160
|
// dx = rope_back(dy, src1)
|
12449
12161
|
// src0 is dy, src1 contains options
|
12450
12162
|
|
12451
|
-
const int n_past = ((int32_t *)
|
12452
|
-
const int n_dims = ((int32_t *)
|
12453
|
-
const int mode = ((int32_t *)
|
12163
|
+
const int n_past = ((int32_t *) dst->op_params)[0];
|
12164
|
+
const int n_dims = ((int32_t *) dst->op_params)[1];
|
12165
|
+
const int mode = ((int32_t *) dst->op_params)[2];
|
12454
12166
|
|
12455
12167
|
assert(n_past >= 0);
|
12456
12168
|
|
@@ -12534,16 +12246,15 @@ static void ggml_compute_forward_rope_back_f16(
|
|
12534
12246
|
static void ggml_compute_forward_rope_back(
|
12535
12247
|
const struct ggml_compute_params * params,
|
12536
12248
|
const struct ggml_tensor * src0,
|
12537
|
-
const struct ggml_tensor * src1,
|
12538
12249
|
struct ggml_tensor * dst) {
|
12539
12250
|
switch (src0->type) {
|
12540
12251
|
case GGML_TYPE_F16:
|
12541
12252
|
{
|
12542
|
-
ggml_compute_forward_rope_back_f16(params, src0,
|
12253
|
+
ggml_compute_forward_rope_back_f16(params, src0, dst);
|
12543
12254
|
} break;
|
12544
12255
|
case GGML_TYPE_F32:
|
12545
12256
|
{
|
12546
|
-
ggml_compute_forward_rope_back_f32(params, src0,
|
12257
|
+
ggml_compute_forward_rope_back_f32(params, src0, dst);
|
12547
12258
|
} break;
|
12548
12259
|
default:
|
12549
12260
|
{
|
@@ -12740,7 +12451,7 @@ static void ggml_compute_forward_conv_1d_s1_ph(
|
|
12740
12451
|
const struct ggml_compute_params * params,
|
12741
12452
|
const struct ggml_tensor * src0,
|
12742
12453
|
const struct ggml_tensor * src1,
|
12743
|
-
|
12454
|
+
struct ggml_tensor * dst) {
|
12744
12455
|
switch (src0->type) {
|
12745
12456
|
case GGML_TYPE_F16:
|
12746
12457
|
{
|
@@ -12943,7 +12654,7 @@ static void ggml_compute_forward_conv_1d_s2_ph(
|
|
12943
12654
|
const struct ggml_compute_params * params,
|
12944
12655
|
const struct ggml_tensor * src0,
|
12945
12656
|
const struct ggml_tensor * src1,
|
12946
|
-
|
12657
|
+
struct ggml_tensor * dst) {
|
12947
12658
|
switch (src0->type) {
|
12948
12659
|
case GGML_TYPE_F16:
|
12949
12660
|
{
|
@@ -12963,14 +12674,13 @@ static void ggml_compute_forward_conv_1d_s2_ph(
|
|
12963
12674
|
// ggml_compute_forward_conv_1d
|
12964
12675
|
|
12965
12676
|
static void ggml_compute_forward_conv_1d(
|
12966
|
-
|
12967
|
-
|
12968
|
-
|
12969
|
-
|
12970
|
-
|
12971
|
-
const int32_t
|
12972
|
-
const int32_t
|
12973
|
-
const int32_t d0 = ((const int32_t*)(opt0->data))[2];
|
12677
|
+
const struct ggml_compute_params * params,
|
12678
|
+
const struct ggml_tensor * src0,
|
12679
|
+
const struct ggml_tensor * src1,
|
12680
|
+
struct ggml_tensor * dst) {
|
12681
|
+
const int32_t s0 = ((const int32_t*)(dst->op_params))[0];
|
12682
|
+
const int32_t p0 = ((const int32_t*)(dst->op_params))[1];
|
12683
|
+
const int32_t d0 = ((const int32_t*)(dst->op_params))[2];
|
12974
12684
|
GGML_ASSERT(d0 == 1); // dilation not supported
|
12975
12685
|
GGML_ASSERT(p0 == src0->ne[0]/2); // only half padding supported
|
12976
12686
|
if (s0 == 1) {
|
@@ -12982,9 +12692,9 @@ static void ggml_compute_forward_conv_1d(
|
|
12982
12692
|
};
|
12983
12693
|
}
|
12984
12694
|
|
12985
|
-
//
|
12695
|
+
// ggml_compute_forward_conv_2d
|
12986
12696
|
|
12987
|
-
static void
|
12697
|
+
static void ggml_compute_forward_conv_2d_f16_f32(
|
12988
12698
|
const struct ggml_compute_params * params,
|
12989
12699
|
const struct ggml_tensor * src0,
|
12990
12700
|
const struct ggml_tensor * src1,
|
@@ -13007,28 +12717,37 @@ static void ggml_compute_forward_conv_2d_sk_p0_f16_f32(
|
|
13007
12717
|
// size of the convolution row - the kernel size unrolled across all channels
|
13008
12718
|
const int ew0 = nk0*nk1*ne02;
|
13009
12719
|
|
12720
|
+
const int32_t s0 = ((const int32_t*)(dst->op_params))[0];
|
12721
|
+
const int32_t s1 = ((const int32_t*)(dst->op_params))[1];
|
12722
|
+
const int32_t p0 = ((const int32_t*)(dst->op_params))[2];
|
12723
|
+
const int32_t p1 = ((const int32_t*)(dst->op_params))[3];
|
12724
|
+
const int32_t d0 = ((const int32_t*)(dst->op_params))[4];
|
12725
|
+
const int32_t d1 = ((const int32_t*)(dst->op_params))[5];
|
12726
|
+
|
13010
12727
|
GGML_ASSERT(nb00 == sizeof(ggml_fp16_t));
|
13011
12728
|
GGML_ASSERT(nb10 == sizeof(float));
|
13012
12729
|
|
13013
12730
|
if (params->type == GGML_TASK_INIT) {
|
13014
|
-
// TODO: fix this memset (wsize is overestimated)
|
13015
12731
|
memset(params->wdata, 0, params->wsize);
|
13016
12732
|
|
13017
12733
|
// prepare source data (src1)
|
13018
12734
|
{
|
13019
12735
|
ggml_fp16_t * const wdata = (ggml_fp16_t *) params->wdata + 0;
|
13020
12736
|
|
13021
|
-
for (int
|
13022
|
-
|
13023
|
-
|
13024
|
-
|
12737
|
+
for (int i12 = 0; i12 < ne12; i12++) {
|
12738
|
+
const float * const src = (float *)((char *) src1->data + i12*nb12);
|
12739
|
+
ggml_fp16_t * dst_data = wdata;
|
12740
|
+
|
12741
|
+
for (int i1 = 0; i1 < ne1; i1++) {
|
12742
|
+
for (int i0 = 0; i0 < ne0; i0++) {
|
12743
|
+
for (int ik1 = 0; ik1 < nk1; ik1++) {
|
12744
|
+
for (int ik0 = 0; ik0 < nk0; ik0++) {
|
12745
|
+
const int idx0 = i0*s0 + ik0*d0 - p0;
|
12746
|
+
const int idx1 = i1*s1 + ik1*d1 - p1;
|
13025
12747
|
|
13026
|
-
|
13027
|
-
for (int i0 = 0; i0 < ne0; i0++) {
|
13028
|
-
for (int ik1 = 0; ik1 < nk1; ik1++) {
|
13029
|
-
for (int ik0 = 0; ik0 < nk0; ik0++) {
|
12748
|
+
if (!(idx1 < 0 || idx1 >= ne11 || idx0 < 0 || idx0 >= ne10)) {
|
13030
12749
|
dst_data[(i1*ne0 + i0)*ew0 + i12*(nk0*nk1) + ik1*nk0 + ik0] =
|
13031
|
-
GGML_FP32_TO_FP16(src[
|
12750
|
+
GGML_FP32_TO_FP16(src[idx1*ne10 + idx0]);
|
13032
12751
|
}
|
13033
12752
|
}
|
13034
12753
|
}
|
@@ -13071,19 +12790,19 @@ static void ggml_compute_forward_conv_2d_sk_p0_f16_f32(
|
|
13071
12790
|
}
|
13072
12791
|
}
|
13073
12792
|
|
13074
|
-
static void
|
12793
|
+
static void ggml_compute_forward_conv_2d(
|
13075
12794
|
const struct ggml_compute_params * params,
|
13076
12795
|
const struct ggml_tensor * src0,
|
13077
12796
|
const struct ggml_tensor * src1,
|
13078
|
-
|
12797
|
+
struct ggml_tensor * dst) {
|
13079
12798
|
switch (src0->type) {
|
13080
12799
|
case GGML_TYPE_F16:
|
13081
12800
|
{
|
13082
|
-
|
12801
|
+
ggml_compute_forward_conv_2d_f16_f32(params, src0, src1, dst);
|
13083
12802
|
} break;
|
13084
12803
|
case GGML_TYPE_F32:
|
13085
12804
|
{
|
13086
|
-
//
|
12805
|
+
//ggml_compute_forward_conv_2d_f32(params, src0, src1, dst);
|
13087
12806
|
GGML_ASSERT(false);
|
13088
12807
|
} break;
|
13089
12808
|
default:
|
@@ -13093,32 +12812,6 @@ static void ggml_compute_forward_conv_2d_sk_p0(
|
|
13093
12812
|
}
|
13094
12813
|
}
|
13095
12814
|
|
13096
|
-
// ggml_compute_forward_conv_2d
|
13097
|
-
|
13098
|
-
static void ggml_compute_forward_conv_2d(
|
13099
|
-
const struct ggml_compute_params* params,
|
13100
|
-
const struct ggml_tensor* src0,
|
13101
|
-
const struct ggml_tensor* src1,
|
13102
|
-
const struct ggml_tensor* opt0,
|
13103
|
-
struct ggml_tensor* dst) {
|
13104
|
-
const int32_t s0 = ((const int32_t*)(opt0->data))[0];
|
13105
|
-
const int32_t s1 = ((const int32_t*)(opt0->data))[1];
|
13106
|
-
const int32_t p0 = ((const int32_t*)(opt0->data))[2];
|
13107
|
-
const int32_t p1 = ((const int32_t*)(opt0->data))[3];
|
13108
|
-
const int32_t d0 = ((const int32_t*)(opt0->data))[4];
|
13109
|
-
const int32_t d1 = ((const int32_t*)(opt0->data))[5];
|
13110
|
-
GGML_ASSERT(d0 == 1); // dilation not supported
|
13111
|
-
GGML_ASSERT(d1 == 1);
|
13112
|
-
GGML_ASSERT(p0 == 0); // padding not supported
|
13113
|
-
GGML_ASSERT(p1 == 0);
|
13114
|
-
|
13115
|
-
if (s0 == src0->ne[0] && s1 == src0->ne[1]) {
|
13116
|
-
ggml_compute_forward_conv_2d_sk_p0(params, src0, src1, dst);
|
13117
|
-
} else {
|
13118
|
-
GGML_ASSERT(false); // only stride equal to kernel size is supported
|
13119
|
-
}
|
13120
|
-
}
|
13121
|
-
|
13122
12815
|
// ggml_compute_forward_pool_1d_sk_p0
|
13123
12816
|
|
13124
12817
|
static void ggml_compute_forward_pool_1d_sk_p0(
|
@@ -13174,12 +12867,11 @@ static void ggml_compute_forward_pool_1d_sk_p0(
|
|
13174
12867
|
// ggml_compute_forward_pool_1d
|
13175
12868
|
|
13176
12869
|
static void ggml_compute_forward_pool_1d(
|
13177
|
-
|
13178
|
-
|
13179
|
-
|
13180
|
-
|
13181
|
-
|
13182
|
-
const int* opts = (const int*)opt0->data;
|
12870
|
+
const struct ggml_compute_params * params,
|
12871
|
+
const struct ggml_tensor * src0,
|
12872
|
+
struct ggml_tensor * dst) {
|
12873
|
+
|
12874
|
+
const int32_t* opts = (const int32_t*)dst->op_params;
|
13183
12875
|
enum ggml_op_pool op = opts[0];
|
13184
12876
|
const int k0 = opts[1];
|
13185
12877
|
const int s0 = opts[2];
|
@@ -13193,12 +12885,12 @@ static void ggml_compute_forward_pool_1d(
|
|
13193
12885
|
// ggml_compute_forward_pool_2d_sk_p0
|
13194
12886
|
|
13195
12887
|
static void ggml_compute_forward_pool_2d_sk_p0(
|
13196
|
-
|
13197
|
-
|
13198
|
-
|
13199
|
-
|
13200
|
-
|
13201
|
-
|
12888
|
+
const struct ggml_compute_params * params,
|
12889
|
+
const enum ggml_op_pool op,
|
12890
|
+
const struct ggml_tensor * src,
|
12891
|
+
const int k0,
|
12892
|
+
const int k1,
|
12893
|
+
struct ggml_tensor * dst) {
|
13202
12894
|
assert(src->type == GGML_TYPE_F32);
|
13203
12895
|
assert(params->ith == 0);
|
13204
12896
|
|
@@ -13258,12 +12950,11 @@ static void ggml_compute_forward_pool_2d_sk_p0(
|
|
13258
12950
|
// ggml_compute_forward_pool_2d
|
13259
12951
|
|
13260
12952
|
static void ggml_compute_forward_pool_2d(
|
13261
|
-
|
13262
|
-
|
13263
|
-
|
13264
|
-
|
13265
|
-
|
13266
|
-
const int* opts = (const int*)opt0->data;
|
12953
|
+
const struct ggml_compute_params * params,
|
12954
|
+
const struct ggml_tensor * src0,
|
12955
|
+
struct ggml_tensor * dst) {
|
12956
|
+
|
12957
|
+
const int32_t * opts = (const int32_t *)dst->op_params;
|
13267
12958
|
enum ggml_op_pool op = opts[0];
|
13268
12959
|
const int k0 = opts[1];
|
13269
12960
|
const int k1 = opts[2];
|
@@ -13288,7 +12979,7 @@ static void ggml_compute_forward_flash_attn_f32(
|
|
13288
12979
|
const struct ggml_tensor * k,
|
13289
12980
|
const struct ggml_tensor * v,
|
13290
12981
|
const bool masked,
|
13291
|
-
|
12982
|
+
struct ggml_tensor * dst) {
|
13292
12983
|
int64_t t0 = ggml_perf_time_us();
|
13293
12984
|
UNUSED(t0);
|
13294
12985
|
|
@@ -13466,7 +13157,7 @@ static void ggml_compute_forward_flash_attn_f16(
|
|
13466
13157
|
const struct ggml_tensor * k,
|
13467
13158
|
const struct ggml_tensor * v,
|
13468
13159
|
const bool masked,
|
13469
|
-
|
13160
|
+
struct ggml_tensor * dst) {
|
13470
13161
|
int64_t t0 = ggml_perf_time_us();
|
13471
13162
|
UNUSED(t0);
|
13472
13163
|
|
@@ -14231,7 +13922,6 @@ static void ggml_compute_forward_flash_attn_back(
|
|
14231
13922
|
static void ggml_compute_forward_win_part_f32(
|
14232
13923
|
const struct ggml_compute_params * params,
|
14233
13924
|
const struct ggml_tensor * src0,
|
14234
|
-
const struct ggml_tensor * opt0,
|
14235
13925
|
struct ggml_tensor * dst) {
|
14236
13926
|
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
14237
13927
|
return;
|
@@ -14240,9 +13930,9 @@ static void ggml_compute_forward_win_part_f32(
|
|
14240
13930
|
GGML_TENSOR_LOCALS(int64_t, ne0, src0, ne);
|
14241
13931
|
GGML_TENSOR_LOCALS(int64_t, ne, dst, ne);
|
14242
13932
|
|
14243
|
-
const int32_t nep0 = ((const int32_t *)(
|
14244
|
-
const int32_t nep1 = ((const int32_t *)(
|
14245
|
-
const int32_t w = ((const int32_t *)(
|
13933
|
+
const int32_t nep0 = ((const int32_t *)(dst->op_params))[0];
|
13934
|
+
const int32_t nep1 = ((const int32_t *)(dst->op_params))[1];
|
13935
|
+
const int32_t w = ((const int32_t *)(dst->op_params))[2];
|
14246
13936
|
|
14247
13937
|
assert(ne00 == ne0);
|
14248
13938
|
assert(ne3 == nep0*nep1);
|
@@ -14276,12 +13966,11 @@ static void ggml_compute_forward_win_part_f32(
|
|
14276
13966
|
static void ggml_compute_forward_win_part(
|
14277
13967
|
const struct ggml_compute_params * params,
|
14278
13968
|
const struct ggml_tensor * src0,
|
14279
|
-
const struct ggml_tensor * opt0,
|
14280
13969
|
struct ggml_tensor * dst) {
|
14281
13970
|
switch (src0->type) {
|
14282
13971
|
case GGML_TYPE_F32:
|
14283
13972
|
{
|
14284
|
-
ggml_compute_forward_win_part_f32(params, src0,
|
13973
|
+
ggml_compute_forward_win_part_f32(params, src0, dst);
|
14285
13974
|
} break;
|
14286
13975
|
default:
|
14287
13976
|
{
|
@@ -14295,7 +13984,6 @@ static void ggml_compute_forward_win_part(
|
|
14295
13984
|
static void ggml_compute_forward_win_unpart_f32(
|
14296
13985
|
const struct ggml_compute_params * params,
|
14297
13986
|
const struct ggml_tensor * src0,
|
14298
|
-
const struct ggml_tensor * opt0,
|
14299
13987
|
struct ggml_tensor * dst) {
|
14300
13988
|
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
14301
13989
|
return;
|
@@ -14304,7 +13992,7 @@ static void ggml_compute_forward_win_unpart_f32(
|
|
14304
13992
|
GGML_TENSOR_LOCALS(int64_t, ne0, src0, ne);
|
14305
13993
|
GGML_TENSOR_LOCALS(int64_t, ne, dst, ne);
|
14306
13994
|
|
14307
|
-
const int32_t w = ((const int32_t *)(
|
13995
|
+
const int32_t w = ((const int32_t *)(dst->op_params))[0];
|
14308
13996
|
|
14309
13997
|
// padding
|
14310
13998
|
const int px = (w - ne1%w)%w;
|
@@ -14338,12 +14026,67 @@ static void ggml_compute_forward_win_unpart_f32(
|
|
14338
14026
|
static void ggml_compute_forward_win_unpart(
|
14339
14027
|
const struct ggml_compute_params * params,
|
14340
14028
|
const struct ggml_tensor * src0,
|
14341
|
-
const struct ggml_tensor * opt0,
|
14342
14029
|
struct ggml_tensor * dst) {
|
14343
14030
|
switch (src0->type) {
|
14344
14031
|
case GGML_TYPE_F32:
|
14345
14032
|
{
|
14346
|
-
ggml_compute_forward_win_unpart_f32(params, src0,
|
14033
|
+
ggml_compute_forward_win_unpart_f32(params, src0, dst);
|
14034
|
+
} break;
|
14035
|
+
default:
|
14036
|
+
{
|
14037
|
+
GGML_ASSERT(false);
|
14038
|
+
} break;
|
14039
|
+
}
|
14040
|
+
}
|
14041
|
+
|
14042
|
+
//gmml_compute_forward_unary
|
14043
|
+
|
14044
|
+
static void ggml_compute_forward_unary(
|
14045
|
+
const struct ggml_compute_params * params,
|
14046
|
+
const struct ggml_tensor * src0,
|
14047
|
+
struct ggml_tensor * dst) {
|
14048
|
+
const enum ggml_unary_op op = ggml_get_unary_op(dst);
|
14049
|
+
|
14050
|
+
switch (op) {
|
14051
|
+
case GGML_UNARY_OP_ABS:
|
14052
|
+
{
|
14053
|
+
ggml_compute_forward_abs(params, src0, dst);
|
14054
|
+
} break;
|
14055
|
+
case GGML_UNARY_OP_SGN:
|
14056
|
+
{
|
14057
|
+
ggml_compute_forward_sgn(params, src0, dst);
|
14058
|
+
} break;
|
14059
|
+
case GGML_UNARY_OP_NEG:
|
14060
|
+
{
|
14061
|
+
ggml_compute_forward_neg(params, src0, dst);
|
14062
|
+
} break;
|
14063
|
+
case GGML_UNARY_OP_STEP:
|
14064
|
+
{
|
14065
|
+
ggml_compute_forward_step(params, src0, dst);
|
14066
|
+
} break;
|
14067
|
+
case GGML_UNARY_OP_TANH:
|
14068
|
+
{
|
14069
|
+
ggml_compute_forward_tanh(params, src0, dst);
|
14070
|
+
} break;
|
14071
|
+
case GGML_UNARY_OP_ELU:
|
14072
|
+
{
|
14073
|
+
ggml_compute_forward_elu(params, src0, dst);
|
14074
|
+
} break;
|
14075
|
+
case GGML_UNARY_OP_RELU:
|
14076
|
+
{
|
14077
|
+
ggml_compute_forward_relu(params, src0, dst);
|
14078
|
+
} break;
|
14079
|
+
case GGML_UNARY_OP_GELU:
|
14080
|
+
{
|
14081
|
+
ggml_compute_forward_gelu(params, src0, dst);
|
14082
|
+
} break;
|
14083
|
+
case GGML_UNARY_OP_GELU_QUICK:
|
14084
|
+
{
|
14085
|
+
ggml_compute_forward_gelu_quick(params, src0, dst);
|
14086
|
+
} break;
|
14087
|
+
case GGML_UNARY_OP_SILU:
|
14088
|
+
{
|
14089
|
+
ggml_compute_forward_silu(params, src0, dst);
|
14347
14090
|
} break;
|
14348
14091
|
default:
|
14349
14092
|
{
|
@@ -14862,7 +14605,7 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm
|
|
14862
14605
|
} break;
|
14863
14606
|
case GGML_OP_ACC:
|
14864
14607
|
{
|
14865
|
-
ggml_compute_forward_acc(params, tensor->src[0], tensor->src[1], tensor
|
14608
|
+
ggml_compute_forward_acc(params, tensor->src[0], tensor->src[1], tensor);
|
14866
14609
|
} break;
|
14867
14610
|
case GGML_OP_SUB:
|
14868
14611
|
{
|
@@ -14912,46 +14655,6 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm
|
|
14912
14655
|
{
|
14913
14656
|
ggml_compute_forward_repeat_back(params, tensor->src[0], tensor);
|
14914
14657
|
} break;
|
14915
|
-
case GGML_OP_ABS:
|
14916
|
-
{
|
14917
|
-
ggml_compute_forward_abs(params, tensor->src[0], tensor);
|
14918
|
-
} break;
|
14919
|
-
case GGML_OP_SGN:
|
14920
|
-
{
|
14921
|
-
ggml_compute_forward_sgn(params, tensor->src[0], tensor);
|
14922
|
-
} break;
|
14923
|
-
case GGML_OP_NEG:
|
14924
|
-
{
|
14925
|
-
ggml_compute_forward_neg(params, tensor->src[0], tensor);
|
14926
|
-
} break;
|
14927
|
-
case GGML_OP_STEP:
|
14928
|
-
{
|
14929
|
-
ggml_compute_forward_step(params, tensor->src[0], tensor);
|
14930
|
-
} break;
|
14931
|
-
case GGML_OP_TANH:
|
14932
|
-
{
|
14933
|
-
ggml_compute_forward_tanh(params, tensor->src[0], tensor);
|
14934
|
-
} break;
|
14935
|
-
case GGML_OP_ELU:
|
14936
|
-
{
|
14937
|
-
ggml_compute_forward_elu(params, tensor->src[0], tensor);
|
14938
|
-
} break;
|
14939
|
-
case GGML_OP_RELU:
|
14940
|
-
{
|
14941
|
-
ggml_compute_forward_relu(params, tensor->src[0], tensor);
|
14942
|
-
} break;
|
14943
|
-
case GGML_OP_GELU:
|
14944
|
-
{
|
14945
|
-
ggml_compute_forward_gelu(params, tensor->src[0], tensor);
|
14946
|
-
} break;
|
14947
|
-
case GGML_OP_GELU_QUICK:
|
14948
|
-
{
|
14949
|
-
ggml_compute_forward_gelu_quick(params, tensor->src[0], tensor);
|
14950
|
-
} break;
|
14951
|
-
case GGML_OP_SILU:
|
14952
|
-
{
|
14953
|
-
ggml_compute_forward_silu(params, tensor->src[0], tensor);
|
14954
|
-
} break;
|
14955
14658
|
case GGML_OP_SILU_BACK:
|
14956
14659
|
{
|
14957
14660
|
ggml_compute_forward_silu_back(params, tensor->src[0], tensor->src[1], tensor);
|
@@ -14982,7 +14685,7 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm
|
|
14982
14685
|
} break;
|
14983
14686
|
case GGML_OP_SET:
|
14984
14687
|
{
|
14985
|
-
ggml_compute_forward_set(params, tensor->src[0], tensor->src[1], tensor
|
14688
|
+
ggml_compute_forward_set(params, tensor->src[0], tensor->src[1], tensor);
|
14986
14689
|
} break;
|
14987
14690
|
case GGML_OP_CPY:
|
14988
14691
|
{
|
@@ -15022,11 +14725,11 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm
|
|
15022
14725
|
} break;
|
15023
14726
|
case GGML_OP_DIAG_MASK_INF:
|
15024
14727
|
{
|
15025
|
-
ggml_compute_forward_diag_mask_inf(params, tensor->src[0], tensor
|
14728
|
+
ggml_compute_forward_diag_mask_inf(params, tensor->src[0], tensor);
|
15026
14729
|
} break;
|
15027
14730
|
case GGML_OP_DIAG_MASK_ZERO:
|
15028
14731
|
{
|
15029
|
-
ggml_compute_forward_diag_mask_zero(params, tensor->src[0], tensor
|
14732
|
+
ggml_compute_forward_diag_mask_zero(params, tensor->src[0], tensor);
|
15030
14733
|
} break;
|
15031
14734
|
case GGML_OP_SOFT_MAX:
|
15032
14735
|
{
|
@@ -15038,39 +14741,39 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm
|
|
15038
14741
|
} break;
|
15039
14742
|
case GGML_OP_ROPE:
|
15040
14743
|
{
|
15041
|
-
ggml_compute_forward_rope(params, tensor->src[0], tensor
|
14744
|
+
ggml_compute_forward_rope(params, tensor->src[0], tensor);
|
15042
14745
|
} break;
|
15043
14746
|
case GGML_OP_ROPE_BACK:
|
15044
14747
|
{
|
15045
|
-
ggml_compute_forward_rope_back(params, tensor->src[0], tensor
|
14748
|
+
ggml_compute_forward_rope_back(params, tensor->src[0], tensor);
|
15046
14749
|
} break;
|
15047
14750
|
case GGML_OP_ALIBI:
|
15048
14751
|
{
|
15049
|
-
ggml_compute_forward_alibi(params, tensor->src[0], tensor
|
14752
|
+
ggml_compute_forward_alibi(params, tensor->src[0], tensor);
|
15050
14753
|
} break;
|
15051
14754
|
case GGML_OP_CLAMP:
|
15052
14755
|
{
|
15053
|
-
ggml_compute_forward_clamp(params, tensor->src[0], tensor
|
14756
|
+
ggml_compute_forward_clamp(params, tensor->src[0], tensor);
|
15054
14757
|
} break;
|
15055
14758
|
case GGML_OP_CONV_1D:
|
15056
14759
|
{
|
15057
|
-
ggml_compute_forward_conv_1d(params, tensor->src[0], tensor->src[1], tensor
|
14760
|
+
ggml_compute_forward_conv_1d(params, tensor->src[0], tensor->src[1], tensor);
|
15058
14761
|
} break;
|
15059
14762
|
case GGML_OP_CONV_2D:
|
15060
14763
|
{
|
15061
|
-
ggml_compute_forward_conv_2d(params, tensor->src[0], tensor->src[1], tensor
|
14764
|
+
ggml_compute_forward_conv_2d(params, tensor->src[0], tensor->src[1], tensor);
|
15062
14765
|
} break;
|
15063
14766
|
case GGML_OP_POOL_1D:
|
15064
14767
|
{
|
15065
|
-
ggml_compute_forward_pool_1d(params, tensor->src[0], tensor
|
14768
|
+
ggml_compute_forward_pool_1d(params, tensor->src[0], tensor);
|
15066
14769
|
} break;
|
15067
14770
|
case GGML_OP_POOL_2D:
|
15068
14771
|
{
|
15069
|
-
ggml_compute_forward_pool_2d(params, tensor->src[0], tensor
|
14772
|
+
ggml_compute_forward_pool_2d(params, tensor->src[0], tensor);
|
15070
14773
|
} break;
|
15071
14774
|
case GGML_OP_FLASH_ATTN:
|
15072
14775
|
{
|
15073
|
-
const int32_t t =
|
14776
|
+
const int32_t t = ggml_get_op_params_i32(tensor, 0);
|
15074
14777
|
GGML_ASSERT(t == 0 || t == 1);
|
15075
14778
|
const bool masked = t != 0;
|
15076
14779
|
ggml_compute_forward_flash_attn(params, tensor->src[0], tensor->src[1], tensor->src[2], masked, tensor);
|
@@ -15081,47 +14784,56 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm
|
|
15081
14784
|
} break;
|
15082
14785
|
case GGML_OP_FLASH_ATTN_BACK:
|
15083
14786
|
{
|
15084
|
-
int32_t t =
|
14787
|
+
int32_t t = ggml_get_op_params_i32(tensor, 0);
|
15085
14788
|
GGML_ASSERT(t == 0 || t == 1);
|
15086
14789
|
bool masked = t != 0;
|
15087
14790
|
ggml_compute_forward_flash_attn_back(params, tensor->src[0], tensor->src[1], tensor->src[2], tensor->src[3], masked, tensor);
|
15088
14791
|
} break;
|
15089
14792
|
case GGML_OP_WIN_PART:
|
15090
14793
|
{
|
15091
|
-
ggml_compute_forward_win_part(params, tensor->src[0], tensor
|
14794
|
+
ggml_compute_forward_win_part(params, tensor->src[0], tensor);
|
15092
14795
|
} break;
|
15093
14796
|
case GGML_OP_WIN_UNPART:
|
15094
14797
|
{
|
15095
|
-
ggml_compute_forward_win_unpart(params, tensor->src[0], tensor
|
14798
|
+
ggml_compute_forward_win_unpart(params, tensor->src[0], tensor);
|
14799
|
+
} break;
|
14800
|
+
case GGML_OP_UNARY:
|
14801
|
+
{
|
14802
|
+
ggml_compute_forward_unary(params, tensor->src[0], tensor);
|
15096
14803
|
} break;
|
15097
14804
|
case GGML_OP_MAP_UNARY:
|
15098
14805
|
{
|
15099
|
-
|
14806
|
+
ggml_unary_op_f32_t fun;
|
14807
|
+
memcpy(&fun, tensor->op_params, sizeof(fun));
|
15100
14808
|
ggml_compute_forward_map_unary(params, tensor->src[0], tensor, fun);
|
15101
14809
|
}
|
15102
14810
|
break;
|
15103
14811
|
case GGML_OP_MAP_BINARY:
|
15104
14812
|
{
|
15105
|
-
|
14813
|
+
ggml_binary_op_f32_t fun;
|
14814
|
+
memcpy(&fun, tensor->op_params, sizeof(fun));
|
15106
14815
|
ggml_compute_forward_map_binary(params, tensor->src[0], tensor->src[1], tensor, fun);
|
15107
14816
|
}
|
15108
14817
|
break;
|
15109
14818
|
case GGML_OP_MAP_CUSTOM1:
|
15110
14819
|
{
|
15111
|
-
|
14820
|
+
ggml_custom1_op_f32_t fun;
|
14821
|
+
memcpy(&fun, tensor->op_params, sizeof(fun));
|
15112
14822
|
ggml_compute_forward_map_custom1(params, tensor->src[0], tensor, fun);
|
15113
14823
|
}
|
15114
14824
|
break;
|
15115
14825
|
case GGML_OP_MAP_CUSTOM2:
|
15116
14826
|
{
|
15117
|
-
|
14827
|
+
ggml_custom2_op_f32_t fun;
|
14828
|
+
memcpy(&fun, tensor->op_params, sizeof(fun));
|
15118
14829
|
ggml_compute_forward_map_custom2(params, tensor->src[0], tensor->src[1], tensor, fun);
|
15119
14830
|
}
|
15120
14831
|
break;
|
15121
14832
|
case GGML_OP_MAP_CUSTOM3:
|
15122
14833
|
{
|
15123
|
-
|
15124
|
-
|
14834
|
+
ggml_custom3_op_f32_t fun;
|
14835
|
+
memcpy(&fun, tensor->op_params, sizeof(fun));
|
14836
|
+
ggml_compute_forward_map_custom3(params, tensor->src[0], tensor->src[1], tensor->src[2], tensor, fun);
|
15125
14837
|
}
|
15126
14838
|
break;
|
15127
14839
|
case GGML_OP_CROSS_ENTROPY_LOSS:
|
@@ -15185,12 +14897,10 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor
|
|
15185
14897
|
src0->grad = ggml_add_impl(ctx, src0->grad, tensor->grad, inplace);
|
15186
14898
|
}
|
15187
14899
|
if (src1->grad) {
|
15188
|
-
|
15189
|
-
|
15190
|
-
const size_t
|
15191
|
-
const size_t
|
15192
|
-
const size_t nb3 = (( int32_t * ) tensor->src[2]->data)[2];
|
15193
|
-
const size_t offset = (( int32_t * ) tensor->src[2]->data)[3];
|
14900
|
+
const size_t nb1 = ((int32_t *) tensor->op_params)[0];
|
14901
|
+
const size_t nb2 = ((int32_t *) tensor->op_params)[1];
|
14902
|
+
const size_t nb3 = ((int32_t *) tensor->op_params)[2];
|
14903
|
+
const size_t offset = ((int32_t *) tensor->op_params)[3];
|
15194
14904
|
|
15195
14905
|
struct ggml_tensor * tensor_grad_view = ggml_view_4d(ctx,
|
15196
14906
|
tensor->grad,
|
@@ -15339,73 +15049,6 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor
|
|
15339
15049
|
inplace);
|
15340
15050
|
}
|
15341
15051
|
} break;
|
15342
|
-
case GGML_OP_ABS:
|
15343
|
-
{
|
15344
|
-
if (src0->grad) {
|
15345
|
-
src0->grad =
|
15346
|
-
ggml_add_impl(ctx,
|
15347
|
-
src0->grad,
|
15348
|
-
ggml_mul(ctx,
|
15349
|
-
ggml_sgn(ctx, src0),
|
15350
|
-
tensor->grad),
|
15351
|
-
inplace);
|
15352
|
-
}
|
15353
|
-
} break;
|
15354
|
-
case GGML_OP_SGN:
|
15355
|
-
{
|
15356
|
-
if (src0->grad) {
|
15357
|
-
// noop
|
15358
|
-
}
|
15359
|
-
} break;
|
15360
|
-
case GGML_OP_NEG:
|
15361
|
-
{
|
15362
|
-
if (src0->grad) {
|
15363
|
-
src0->grad = ggml_sub_impl(ctx, src0->grad, tensor->grad, inplace);
|
15364
|
-
}
|
15365
|
-
} break;
|
15366
|
-
case GGML_OP_STEP:
|
15367
|
-
{
|
15368
|
-
if (src0->grad) {
|
15369
|
-
// noop
|
15370
|
-
}
|
15371
|
-
} break;
|
15372
|
-
case GGML_OP_TANH:
|
15373
|
-
{
|
15374
|
-
GGML_ASSERT(false); // TODO: not implemented
|
15375
|
-
} break;
|
15376
|
-
case GGML_OP_ELU:
|
15377
|
-
{
|
15378
|
-
GGML_ASSERT(false); // TODO: not implemented
|
15379
|
-
} break;
|
15380
|
-
case GGML_OP_RELU:
|
15381
|
-
{
|
15382
|
-
if (src0->grad) {
|
15383
|
-
src0->grad = ggml_sub_impl(ctx,
|
15384
|
-
src0->grad,
|
15385
|
-
ggml_mul(ctx,
|
15386
|
-
ggml_step(ctx, src0),
|
15387
|
-
tensor->grad),
|
15388
|
-
inplace);
|
15389
|
-
}
|
15390
|
-
} break;
|
15391
|
-
case GGML_OP_GELU:
|
15392
|
-
{
|
15393
|
-
GGML_ASSERT(false); // TODO: not implemented
|
15394
|
-
} break;
|
15395
|
-
case GGML_OP_GELU_QUICK:
|
15396
|
-
{
|
15397
|
-
GGML_ASSERT(false); // TODO: not implemented
|
15398
|
-
} break;
|
15399
|
-
case GGML_OP_SILU:
|
15400
|
-
{
|
15401
|
-
// necessary for llama
|
15402
|
-
if (src0->grad) {
|
15403
|
-
src0->grad = ggml_add_impl(ctx,
|
15404
|
-
src0->grad,
|
15405
|
-
ggml_silu_back(ctx, src0, tensor->grad),
|
15406
|
-
inplace);
|
15407
|
-
}
|
15408
|
-
} break;
|
15409
15052
|
case GGML_OP_SILU_BACK:
|
15410
15053
|
{
|
15411
15054
|
GGML_ASSERT(false); // TODO: not implemented
|
@@ -15498,12 +15141,10 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor
|
|
15498
15141
|
} break;
|
15499
15142
|
case GGML_OP_SET:
|
15500
15143
|
{
|
15501
|
-
|
15502
|
-
|
15503
|
-
const size_t
|
15504
|
-
const size_t
|
15505
|
-
const size_t nb3 = (( int32_t * ) tensor->src[2]->data)[2];
|
15506
|
-
const size_t offset = (( int32_t * ) tensor->src[2]->data)[3];
|
15144
|
+
const size_t nb1 = ((int32_t *) tensor->op_params)[0];
|
15145
|
+
const size_t nb2 = ((int32_t *) tensor->op_params)[1];
|
15146
|
+
const size_t nb3 = ((int32_t *) tensor->op_params)[2];
|
15147
|
+
const size_t offset = ((int32_t *) tensor->op_params)[3];
|
15507
15148
|
|
15508
15149
|
struct ggml_tensor * tensor_grad_view = NULL;
|
15509
15150
|
|
@@ -15580,8 +15221,7 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor
|
|
15580
15221
|
if (src0->grad) {
|
15581
15222
|
size_t offset;
|
15582
15223
|
|
15583
|
-
|
15584
|
-
memcpy(&offset, tensor->src[2]->data, sizeof(offset));
|
15224
|
+
memcpy(&offset, tensor->op_params, sizeof(offset));
|
15585
15225
|
|
15586
15226
|
size_t nb1 = tensor->nb[1];
|
15587
15227
|
size_t nb2 = tensor->nb[2];
|
@@ -15608,7 +15248,7 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor
|
|
15608
15248
|
{
|
15609
15249
|
// necessary for llama
|
15610
15250
|
if (src0->grad) {
|
15611
|
-
int32_t * axes = (int32_t *) tensor->
|
15251
|
+
int32_t * axes = (int32_t *) tensor->op_params;
|
15612
15252
|
int axis0 = axes[0] & 0x3;
|
15613
15253
|
int axis1 = axes[1] & 0x3;
|
15614
15254
|
int axis2 = axes[2] & 0x3;
|
@@ -15664,33 +15304,23 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor
|
|
15664
15304
|
{
|
15665
15305
|
// necessary for llama
|
15666
15306
|
if (src0->grad) {
|
15667
|
-
|
15668
|
-
assert(ggml_nelements(src1) == 2);
|
15669
|
-
const int n_past = ((int32_t *) src1->data)[0];
|
15307
|
+
const int n_past = ((int32_t *) tensor->op_params)[0];
|
15670
15308
|
src0->grad =
|
15671
15309
|
ggml_add_impl(ctx, src0->grad,
|
15672
15310
|
ggml_diag_mask_zero_impl(ctx, tensor->grad, n_past, false),
|
15673
15311
|
inplace);
|
15674
15312
|
}
|
15675
|
-
if (src1->grad) {
|
15676
|
-
// noop
|
15677
|
-
}
|
15678
15313
|
} break;
|
15679
15314
|
case GGML_OP_DIAG_MASK_ZERO:
|
15680
15315
|
{
|
15681
15316
|
// necessary for llama
|
15682
15317
|
if (src0->grad) {
|
15683
|
-
|
15684
|
-
assert(ggml_nelements(src1) == 2);
|
15685
|
-
const int n_past = ((int32_t *) src1->data)[0];
|
15318
|
+
const int n_past = ((int32_t *) tensor->op_params)[0];
|
15686
15319
|
src0->grad =
|
15687
15320
|
ggml_add_impl(ctx, src0->grad,
|
15688
15321
|
ggml_diag_mask_zero_impl(ctx, tensor->grad, n_past, false),
|
15689
15322
|
inplace);
|
15690
15323
|
}
|
15691
|
-
if (src1->grad) {
|
15692
|
-
// noop
|
15693
|
-
}
|
15694
15324
|
} break;
|
15695
15325
|
case GGML_OP_SOFT_MAX:
|
15696
15326
|
{
|
@@ -15711,33 +15341,28 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor
|
|
15711
15341
|
{
|
15712
15342
|
// necessary for llama
|
15713
15343
|
if (src0->grad) {
|
15714
|
-
|
15715
|
-
|
15716
|
-
const int
|
15717
|
-
const int
|
15718
|
-
const int mode = ((int32_t *) src1->data)[2];
|
15344
|
+
const int n_past = ((int32_t *) tensor->op_params)[0];
|
15345
|
+
const int n_dims = ((int32_t *) tensor->op_params)[1];
|
15346
|
+
const int mode = ((int32_t *) tensor->op_params)[2];
|
15347
|
+
const int n_ctx = ((int32_t *) tensor->op_params)[3];
|
15719
15348
|
src0->grad = ggml_add_impl(ctx,
|
15720
15349
|
src0->grad,
|
15721
15350
|
ggml_rope_back(ctx,
|
15722
15351
|
tensor->grad,
|
15723
15352
|
n_past,
|
15724
15353
|
n_dims,
|
15725
|
-
mode
|
15354
|
+
mode,
|
15355
|
+
n_ctx),
|
15726
15356
|
inplace);
|
15727
15357
|
}
|
15728
|
-
if (src1->grad) {
|
15729
|
-
// noop
|
15730
|
-
}
|
15731
15358
|
} break;
|
15732
15359
|
case GGML_OP_ROPE_BACK:
|
15733
15360
|
{
|
15734
15361
|
if (src0->grad) {
|
15735
|
-
|
15736
|
-
|
15737
|
-
const int
|
15738
|
-
const int
|
15739
|
-
const int mode = ((int32_t *) src1->data)[2];
|
15740
|
-
const int n_ctx = ((int32_t *) src1->data)[3];
|
15362
|
+
const int n_past = ((int32_t *) tensor->op_params)[0];
|
15363
|
+
const int n_dims = ((int32_t *) tensor->op_params)[1];
|
15364
|
+
const int mode = ((int32_t *) tensor->op_params)[2];
|
15365
|
+
const int n_ctx = ((int32_t *) tensor->op_params)[3];
|
15741
15366
|
src0->grad = ggml_add_impl(ctx,
|
15742
15367
|
src0->grad,
|
15743
15368
|
ggml_rope(ctx,
|
@@ -15748,9 +15373,6 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor
|
|
15748
15373
|
n_ctx),
|
15749
15374
|
inplace);
|
15750
15375
|
}
|
15751
|
-
if (src1->grad) {
|
15752
|
-
// noop
|
15753
|
-
}
|
15754
15376
|
} break;
|
15755
15377
|
case GGML_OP_ALIBI:
|
15756
15378
|
{
|
@@ -15780,7 +15402,7 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor
|
|
15780
15402
|
{
|
15781
15403
|
struct ggml_tensor * flash_grad = NULL;
|
15782
15404
|
if (src0->grad || src1->grad || tensor->src[2]->grad) {
|
15783
|
-
int32_t t =
|
15405
|
+
int32_t t = ggml_get_op_params_i32(tensor, 0);
|
15784
15406
|
GGML_ASSERT(t == 0 || t == 1);
|
15785
15407
|
bool masked = t != 0;
|
15786
15408
|
flash_grad =
|
@@ -15943,6 +15565,80 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor
|
|
15943
15565
|
} break;
|
15944
15566
|
case GGML_OP_WIN_PART:
|
15945
15567
|
case GGML_OP_WIN_UNPART:
|
15568
|
+
case GGML_OP_UNARY:
|
15569
|
+
{
|
15570
|
+
switch (ggml_get_unary_op(tensor)) {
|
15571
|
+
case GGML_UNARY_OP_ABS:
|
15572
|
+
{
|
15573
|
+
if (src0->grad) {
|
15574
|
+
src0->grad =
|
15575
|
+
ggml_add_impl(ctx,
|
15576
|
+
src0->grad,
|
15577
|
+
ggml_mul(ctx,
|
15578
|
+
ggml_sgn(ctx, src0),
|
15579
|
+
tensor->grad),
|
15580
|
+
inplace);
|
15581
|
+
}
|
15582
|
+
} break;
|
15583
|
+
case GGML_UNARY_OP_SGN:
|
15584
|
+
{
|
15585
|
+
if (src0->grad) {
|
15586
|
+
// noop
|
15587
|
+
}
|
15588
|
+
} break;
|
15589
|
+
case GGML_UNARY_OP_NEG:
|
15590
|
+
{
|
15591
|
+
if (src0->grad) {
|
15592
|
+
src0->grad = ggml_sub_impl(ctx, src0->grad, tensor->grad, inplace);
|
15593
|
+
}
|
15594
|
+
} break;
|
15595
|
+
case GGML_UNARY_OP_STEP:
|
15596
|
+
{
|
15597
|
+
if (src0->grad) {
|
15598
|
+
// noop
|
15599
|
+
}
|
15600
|
+
} break;
|
15601
|
+
case GGML_UNARY_OP_TANH:
|
15602
|
+
{
|
15603
|
+
GGML_ASSERT(false); // TODO: not implemented
|
15604
|
+
} break;
|
15605
|
+
case GGML_UNARY_OP_ELU:
|
15606
|
+
{
|
15607
|
+
GGML_ASSERT(false); // TODO: not implemented
|
15608
|
+
} break;
|
15609
|
+
case GGML_UNARY_OP_RELU:
|
15610
|
+
{
|
15611
|
+
if (src0->grad) {
|
15612
|
+
src0->grad = ggml_add_impl(ctx,
|
15613
|
+
src0->grad,
|
15614
|
+
ggml_mul(ctx,
|
15615
|
+
ggml_step(ctx, src0),
|
15616
|
+
tensor->grad),
|
15617
|
+
inplace);
|
15618
|
+
}
|
15619
|
+
} break;
|
15620
|
+
case GGML_UNARY_OP_GELU:
|
15621
|
+
{
|
15622
|
+
GGML_ASSERT(false); // TODO: not implemented
|
15623
|
+
} break;
|
15624
|
+
case GGML_UNARY_OP_GELU_QUICK:
|
15625
|
+
{
|
15626
|
+
GGML_ASSERT(false); // TODO: not implemented
|
15627
|
+
} break;
|
15628
|
+
case GGML_UNARY_OP_SILU:
|
15629
|
+
{
|
15630
|
+
// necessary for llama
|
15631
|
+
if (src0->grad) {
|
15632
|
+
src0->grad = ggml_add_impl(ctx,
|
15633
|
+
src0->grad,
|
15634
|
+
ggml_silu_back(ctx, src0, tensor->grad),
|
15635
|
+
inplace);
|
15636
|
+
}
|
15637
|
+
} break;
|
15638
|
+
default:
|
15639
|
+
GGML_ASSERT(false);
|
15640
|
+
}
|
15641
|
+
} break;
|
15946
15642
|
case GGML_OP_MAP_UNARY:
|
15947
15643
|
case GGML_OP_MAP_BINARY:
|
15948
15644
|
case GGML_OP_MAP_CUSTOM1:
|
@@ -15978,6 +15674,34 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor
|
|
15978
15674
|
}
|
15979
15675
|
}
|
15980
15676
|
|
15677
|
+
static_assert(GGML_GRAPH_HASHTABLE_SIZE > GGML_MAX_NODES * 2, "GGML_GRAPH_HT_SIZE is too small");
|
15678
|
+
|
15679
|
+
static size_t hash(void * p) {
|
15680
|
+
return (size_t)p % GGML_GRAPH_HASHTABLE_SIZE;
|
15681
|
+
}
|
15682
|
+
|
15683
|
+
static bool hash_insert(void * hash_table[], void * p) {
|
15684
|
+
size_t h = hash(p);
|
15685
|
+
|
15686
|
+
// linear probing
|
15687
|
+
size_t i = h;
|
15688
|
+
while (hash_table[i] != NULL && hash_table[i] != p) {
|
15689
|
+
i = (i + 1) % GGML_GRAPH_HASHTABLE_SIZE;
|
15690
|
+
if (i == h) {
|
15691
|
+
// hash table is full
|
15692
|
+
GGML_ASSERT(false);
|
15693
|
+
}
|
15694
|
+
}
|
15695
|
+
|
15696
|
+
if (hash_table[i] == p) {
|
15697
|
+
return true;
|
15698
|
+
}
|
15699
|
+
|
15700
|
+
// insert
|
15701
|
+
hash_table[i] = p;
|
15702
|
+
return false;
|
15703
|
+
}
|
15704
|
+
|
15981
15705
|
static void ggml_visit_parents(struct ggml_cgraph * cgraph, struct ggml_tensor * node) {
|
15982
15706
|
if (node->grad == NULL) {
|
15983
15707
|
// this usually happens when we generate intermediate nodes from constants in the backward pass
|
@@ -15988,16 +15712,8 @@ static void ggml_visit_parents(struct ggml_cgraph * cgraph, struct ggml_tensor *
|
|
15988
15712
|
}
|
15989
15713
|
|
15990
15714
|
// check if already visited
|
15991
|
-
|
15992
|
-
|
15993
|
-
return;
|
15994
|
-
}
|
15995
|
-
}
|
15996
|
-
|
15997
|
-
for (int i = 0; i < cgraph->n_leafs; i++) {
|
15998
|
-
if (cgraph->leafs[i] == node) {
|
15999
|
-
return;
|
16000
|
-
}
|
15715
|
+
if (hash_insert(cgraph->visited_hash_table, node)) {
|
15716
|
+
return;
|
16001
15717
|
}
|
16002
15718
|
|
16003
15719
|
for (int i = 0; i < GGML_MAX_SRC; ++i) {
|
@@ -16060,6 +15776,7 @@ struct ggml_cgraph ggml_build_forward(struct ggml_tensor * tensor) {
|
|
16060
15776
|
/*.nodes =*/ { NULL },
|
16061
15777
|
/*.grads =*/ { NULL },
|
16062
15778
|
/*.leafs =*/ { NULL },
|
15779
|
+
/*.hash_table =*/ { NULL },
|
16063
15780
|
/*.perf_runs =*/ 0,
|
16064
15781
|
/*.perf_cycles =*/ 0,
|
16065
15782
|
/*.perf_time_us =*/ 0,
|
@@ -16101,13 +15818,42 @@ struct ggml_cgraph ggml_build_backward(struct ggml_context * ctx, struct ggml_cg
|
|
16101
15818
|
|
16102
15819
|
if (node->is_param) {
|
16103
15820
|
GGML_PRINT_DEBUG("%s: found root node %p\n", __func__, (void *) node);
|
16104
|
-
|
15821
|
+
ggml_build_forward_expand(&result, node->grad);
|
16105
15822
|
}
|
16106
15823
|
}
|
16107
15824
|
|
16108
15825
|
return result;
|
16109
15826
|
}
|
16110
15827
|
|
15828
|
+
struct ggml_cgraph * ggml_new_graph(struct ggml_context * ctx) {
|
15829
|
+
struct ggml_object * obj = ggml_new_object(ctx, GGML_OBJECT_GRAPH, GGML_GRAPH_SIZE);
|
15830
|
+
struct ggml_cgraph * cgraph = (struct ggml_cgraph *) ((char *) ctx->mem_buffer + obj->offs);
|
15831
|
+
|
15832
|
+
*cgraph = (struct ggml_cgraph) {
|
15833
|
+
/*.n_nodes =*/ 0,
|
15834
|
+
/*.n_leafs =*/ 0,
|
15835
|
+
/*.nodes =*/ { NULL },
|
15836
|
+
/*.grads =*/ { NULL },
|
15837
|
+
/*.leafs =*/ { NULL },
|
15838
|
+
/*.hash_table =*/ { NULL },
|
15839
|
+
/*.perf_runs =*/ 0,
|
15840
|
+
/*.perf_cycles =*/ 0,
|
15841
|
+
/*.perf_time_us =*/ 0,
|
15842
|
+
};
|
15843
|
+
|
15844
|
+
return cgraph;
|
15845
|
+
}
|
15846
|
+
|
15847
|
+
struct ggml_cgraph * ggml_build_forward_ctx(struct ggml_context * ctx, struct ggml_tensor * tensor) {
|
15848
|
+
struct ggml_cgraph * cgraph = ggml_new_graph(ctx);
|
15849
|
+
ggml_build_forward_impl(cgraph, tensor, false);
|
15850
|
+
return cgraph;
|
15851
|
+
}
|
15852
|
+
|
15853
|
+
size_t ggml_graph_overhead(void) {
|
15854
|
+
return GGML_OBJECT_SIZE + GGML_PAD(GGML_GRAPH_SIZE, GGML_MEM_ALIGN);
|
15855
|
+
}
|
15856
|
+
|
16111
15857
|
//
|
16112
15858
|
// thread data
|
16113
15859
|
//
|
@@ -16173,7 +15919,7 @@ typedef pthread_t ggml_thread_t;
|
|
16173
15919
|
|
16174
15920
|
// Android's libc implementation "bionic" does not support setting affinity
|
16175
15921
|
#if defined(__linux__) && !defined(__BIONIC__)
|
16176
|
-
void set_numa_thread_affinity(int thread_n, int n_threads) {
|
15922
|
+
static void set_numa_thread_affinity(int thread_n, int n_threads) {
|
16177
15923
|
if (!ggml_is_numa()) {
|
16178
15924
|
return;
|
16179
15925
|
}
|
@@ -16198,7 +15944,7 @@ void set_numa_thread_affinity(int thread_n, int n_threads) {
|
|
16198
15944
|
CPU_FREE(cpus);
|
16199
15945
|
}
|
16200
15946
|
|
16201
|
-
void clear_numa_thread_affinity(void) {
|
15947
|
+
static void clear_numa_thread_affinity(void) {
|
16202
15948
|
if (!ggml_is_numa()) {
|
16203
15949
|
return;
|
16204
15950
|
}
|
@@ -16222,8 +15968,8 @@ void clear_numa_thread_affinity(void) {
|
|
16222
15968
|
#else
|
16223
15969
|
// TODO: Windows etc.
|
16224
15970
|
// (the linux implementation may also work on BSD, someone should test)
|
16225
|
-
void set_numa_thread_affinity(int thread_n, int n_threads) { UNUSED(thread_n); UNUSED(n_threads); }
|
16226
|
-
void clear_numa_thread_affinity(void) {}
|
15971
|
+
static void set_numa_thread_affinity(int thread_n, int n_threads) { UNUSED(thread_n); UNUSED(n_threads); }
|
15972
|
+
static void clear_numa_thread_affinity(void) {}
|
16227
15973
|
#endif
|
16228
15974
|
|
16229
15975
|
struct ggml_compute_state_shared {
|
@@ -16293,8 +16039,8 @@ static thread_ret_t ggml_graph_compute_thread(void * data) {
|
|
16293
16039
|
if (GGML_OP_HAS_FINALIZE[node->op]) {
|
16294
16040
|
params.nth = n_tasks_arr[node_n];
|
16295
16041
|
ggml_compute_forward(¶ms, node);
|
16296
|
-
ggml_graph_compute_perf_stats_node(node, state->shared);
|
16297
16042
|
}
|
16043
|
+
ggml_graph_compute_perf_stats_node(node, state->shared);
|
16298
16044
|
}
|
16299
16045
|
|
16300
16046
|
// distribute new work or execute it direct if 1T
|
@@ -16324,8 +16070,9 @@ static thread_ret_t ggml_graph_compute_thread(void * data) {
|
|
16324
16070
|
if (GGML_OP_HAS_FINALIZE[node->op]) {
|
16325
16071
|
params.type = GGML_TASK_FINALIZE;
|
16326
16072
|
ggml_compute_forward(¶ms, node);
|
16327
|
-
ggml_graph_compute_perf_stats_node(node, state->shared);
|
16328
16073
|
}
|
16074
|
+
|
16075
|
+
ggml_graph_compute_perf_stats_node(node, state->shared);
|
16329
16076
|
} else {
|
16330
16077
|
break;
|
16331
16078
|
}
|
@@ -16434,21 +16181,34 @@ struct ggml_cplan ggml_graph_plan(struct ggml_cgraph * cgraph, int n_threads) {
|
|
16434
16181
|
case GGML_OP_ARGMAX:
|
16435
16182
|
case GGML_OP_REPEAT:
|
16436
16183
|
case GGML_OP_REPEAT_BACK:
|
16437
|
-
|
16438
|
-
case GGML_OP_SGN:
|
16439
|
-
case GGML_OP_NEG:
|
16440
|
-
case GGML_OP_STEP:
|
16441
|
-
case GGML_OP_TANH:
|
16442
|
-
case GGML_OP_ELU:
|
16443
|
-
case GGML_OP_RELU:
|
16444
|
-
{
|
16184
|
+
{
|
16445
16185
|
n_tasks = 1;
|
16446
16186
|
} break;
|
16447
|
-
|
16448
|
-
case
|
16449
|
-
|
16450
|
-
|
16187
|
+
|
16188
|
+
case GGML_OP_UNARY:
|
16189
|
+
{
|
16190
|
+
switch (ggml_get_unary_op(node)) {
|
16191
|
+
case GGML_UNARY_OP_ABS:
|
16192
|
+
case GGML_UNARY_OP_SGN:
|
16193
|
+
case GGML_UNARY_OP_NEG:
|
16194
|
+
case GGML_UNARY_OP_STEP:
|
16195
|
+
case GGML_UNARY_OP_TANH:
|
16196
|
+
case GGML_UNARY_OP_ELU:
|
16197
|
+
case GGML_UNARY_OP_RELU:
|
16198
|
+
{
|
16199
|
+
n_tasks = 1;
|
16200
|
+
} break;
|
16201
|
+
|
16202
|
+
case GGML_UNARY_OP_GELU:
|
16203
|
+
case GGML_UNARY_OP_GELU_QUICK:
|
16204
|
+
case GGML_UNARY_OP_SILU:
|
16205
|
+
{
|
16206
|
+
n_tasks = n_threads;
|
16207
|
+
} break;
|
16208
|
+
}
|
16209
|
+
} break;
|
16451
16210
|
case GGML_OP_SILU_BACK:
|
16211
|
+
case GGML_OP_MUL:
|
16452
16212
|
case GGML_OP_NORM:
|
16453
16213
|
case GGML_OP_RMS_NORM:
|
16454
16214
|
case GGML_OP_RMS_NORM_BACK:
|
@@ -16513,10 +16273,10 @@ struct ggml_cplan ggml_graph_plan(struct ggml_cgraph * cgraph, int n_threads) {
|
|
16513
16273
|
case GGML_OP_GET_ROWS:
|
16514
16274
|
case GGML_OP_GET_ROWS_BACK:
|
16515
16275
|
case GGML_OP_DIAG:
|
16516
|
-
case GGML_OP_DIAG_MASK_ZERO:
|
16517
16276
|
{
|
16518
16277
|
n_tasks = 1;
|
16519
16278
|
} break;
|
16279
|
+
case GGML_OP_DIAG_MASK_ZERO:
|
16520
16280
|
case GGML_OP_DIAG_MASK_INF:
|
16521
16281
|
case GGML_OP_SOFT_MAX:
|
16522
16282
|
case GGML_OP_SOFT_MAX_BACK:
|
@@ -16575,19 +16335,22 @@ struct ggml_cplan ggml_graph_plan(struct ggml_cgraph * cgraph, int n_threads) {
|
|
16575
16335
|
const int64_t ne11 = node->src[1]->ne[1]; // H
|
16576
16336
|
const int64_t ne12 = node->src[1]->ne[2]; // C
|
16577
16337
|
|
16338
|
+
const int64_t ne0 = node->ne[0];
|
16339
|
+
const int64_t ne1 = node->ne[1];
|
16340
|
+
const int64_t ne2 = node->ne[2];
|
16578
16341
|
const int64_t nk = ne00*ne01;
|
16342
|
+
const int64_t ew0 = nk * ne02;
|
16579
16343
|
|
16580
|
-
UNUSED(ne02);
|
16581
16344
|
UNUSED(ne03);
|
16582
|
-
UNUSED(
|
16345
|
+
UNUSED(ne2);
|
16583
16346
|
|
16584
16347
|
size_t cur = 0;
|
16585
16348
|
|
16586
16349
|
if (node->src[0]->type == GGML_TYPE_F16 &&
|
16587
|
-
|
16588
|
-
cur = sizeof(ggml_fp16_t)*(
|
16350
|
+
node->src[1]->type == GGML_TYPE_F32) {
|
16351
|
+
cur = sizeof(ggml_fp16_t)*(ne0*ne1*ew0);
|
16589
16352
|
} else if (node->src[0]->type == GGML_TYPE_F32 &&
|
16590
|
-
|
16353
|
+
node->src[1]->type == GGML_TYPE_F32) {
|
16591
16354
|
cur = sizeof(float)* (ne10*ne11*ne12);
|
16592
16355
|
} else {
|
16593
16356
|
GGML_ASSERT(false);
|
@@ -16806,10 +16569,9 @@ void ggml_graph_reset(struct ggml_cgraph * cgraph) {
|
|
16806
16569
|
void ggml_graph_compute_with_ctx(struct ggml_context * ctx, struct ggml_cgraph * cgraph, int n_threads) {
|
16807
16570
|
struct ggml_cplan cplan = ggml_graph_plan(cgraph, n_threads);
|
16808
16571
|
|
16809
|
-
struct
|
16810
|
-
GGML_ASSERT(buf);
|
16572
|
+
struct ggml_object * obj = ggml_new_object(ctx, GGML_OBJECT_WORK_BUFFER, cplan.work_size);
|
16811
16573
|
|
16812
|
-
cplan.work_data =
|
16574
|
+
cplan.work_data = (uint8_t *)ctx->mem_buffer + obj->offs;
|
16813
16575
|
|
16814
16576
|
ggml_graph_compute(cgraph, &cplan);
|
16815
16577
|
}
|
@@ -16864,9 +16626,6 @@ static void ggml_graph_export_node(const struct ggml_tensor * tensor, const char
|
|
16864
16626
|
}
|
16865
16627
|
|
16866
16628
|
void ggml_graph_export(const struct ggml_cgraph * cgraph, const char * fname) {
|
16867
|
-
//assert(cgraph->work == NULL);
|
16868
|
-
//assert(cgraph->work_size == 0);
|
16869
|
-
|
16870
16629
|
uint64_t size_eval = 0;
|
16871
16630
|
|
16872
16631
|
// compute size of intermediate results
|
@@ -16963,7 +16722,8 @@ void ggml_graph_export(const struct ggml_cgraph * cgraph, const char * fname) {
|
|
16963
16722
|
fwrite(&nb, sizeof(uint64_t), 1, fout);
|
16964
16723
|
}
|
16965
16724
|
|
16966
|
-
fwrite(tensor->name,
|
16725
|
+
fwrite(tensor->name, sizeof(char), GGML_MAX_NAME, fout);
|
16726
|
+
fwrite(tensor->op_params, sizeof(char), GGML_MAX_OP_PARAMS, fout);
|
16967
16727
|
|
16968
16728
|
// dump the data
|
16969
16729
|
// TODO: pad this to 32 byte boundary
|
@@ -16996,7 +16756,8 @@ void ggml_graph_export(const struct ggml_cgraph * cgraph, const char * fname) {
|
|
16996
16756
|
fwrite(&nb, sizeof(uint64_t), 1, fout);
|
16997
16757
|
}
|
16998
16758
|
|
16999
|
-
fwrite(tensor->name,
|
16759
|
+
fwrite(tensor->name, sizeof(char), GGML_MAX_NAME, fout);
|
16760
|
+
fwrite(tensor->op_params, sizeof(char), GGML_MAX_OP_PARAMS, fout);
|
17000
16761
|
|
17001
16762
|
// output the op arguments
|
17002
16763
|
{
|
@@ -17177,7 +16938,8 @@ struct ggml_cgraph ggml_graph_import(const char * fname, struct ggml_context **
|
|
17177
16938
|
|
17178
16939
|
tensor->op = (enum ggml_op) op;
|
17179
16940
|
|
17180
|
-
memcpy(tensor->name,
|
16941
|
+
memcpy(tensor->name, ptr, GGML_MAX_NAME); ptr += GGML_MAX_NAME;
|
16942
|
+
memcpy(tensor->op_params, ptr, GGML_MAX_OP_PARAMS); ptr += GGML_MAX_OP_PARAMS;
|
17181
16943
|
|
17182
16944
|
tensor->data = (void *) ptr;
|
17183
16945
|
|
@@ -17222,7 +16984,8 @@ struct ggml_cgraph ggml_graph_import(const char * fname, struct ggml_context **
|
|
17222
16984
|
nb[j] = nb_cur;
|
17223
16985
|
}
|
17224
16986
|
|
17225
|
-
const char * ptr_name
|
16987
|
+
const char * ptr_name = ptr; ptr += GGML_MAX_NAME;
|
16988
|
+
const char * ptr_op_params = ptr; ptr += GGML_MAX_OP_PARAMS;
|
17226
16989
|
|
17227
16990
|
const int32_t * ptr_arg_idx = (const int32_t *) ptr; ptr += GGML_MAX_SRC*sizeof(int32_t);
|
17228
16991
|
|
@@ -17259,8 +17022,8 @@ struct ggml_cgraph ggml_graph_import(const char * fname, struct ggml_context **
|
|
17259
17022
|
{
|
17260
17023
|
tensor = ggml_view_4d(*ctx_eval, args[0], ne[0], ne[1], ne[2], ne[3], 0, 0, 0, 0);
|
17261
17024
|
|
17262
|
-
|
17263
|
-
memcpy(&offs,
|
17025
|
+
size_t offs;
|
17026
|
+
memcpy(&offs, ptr_op_params, sizeof(offs));
|
17264
17027
|
|
17265
17028
|
tensor->data = ((char *) tensor->data) + offs;
|
17266
17029
|
} break;
|
@@ -17280,7 +17043,8 @@ struct ggml_cgraph ggml_graph_import(const char * fname, struct ggml_context **
|
|
17280
17043
|
} break;
|
17281
17044
|
}
|
17282
17045
|
|
17283
|
-
memcpy(tensor->name,
|
17046
|
+
memcpy(tensor->name, ptr_name, GGML_MAX_NAME);
|
17047
|
+
memcpy(tensor->op_params, ptr_op_params, GGML_MAX_OP_PARAMS);
|
17284
17048
|
|
17285
17049
|
for (int j = 0; j < GGML_MAX_DIMS; ++j) {
|
17286
17050
|
tensor->nb[j] = nb[j];
|
@@ -17305,9 +17069,6 @@ void ggml_graph_print(const struct ggml_cgraph * cgraph) {
|
|
17305
17069
|
|
17306
17070
|
GGML_PRINT("=== GRAPH ===\n");
|
17307
17071
|
|
17308
|
-
GGML_PRINT_DEBUG("n_threads = %d\n", cgraph->n_threads);
|
17309
|
-
GGML_PRINT_DEBUG("total work size = %zu bytes\n", cgraph->work_size);
|
17310
|
-
|
17311
17072
|
GGML_PRINT("n_nodes = %d\n", cgraph->n_nodes);
|
17312
17073
|
for (int i = 0; i < cgraph->n_nodes; i++) {
|
17313
17074
|
struct ggml_tensor * node = cgraph->nodes[i];
|
@@ -17317,7 +17078,7 @@ void ggml_graph_print(const struct ggml_cgraph * cgraph) {
|
|
17317
17078
|
GGML_PRINT(" - %3d: [ %5" PRId64 ", %5" PRId64 ", %5" PRId64 "] %16s %s (%3d) cpu = %7.3f / %7.3f ms, wall = %7.3f / %7.3f ms\n",
|
17318
17079
|
i,
|
17319
17080
|
node->ne[0], node->ne[1], node->ne[2],
|
17320
|
-
|
17081
|
+
ggml_op_name(node->op), node->is_param ? "x" : node->grad ? "g" : " ", node->perf_runs,
|
17321
17082
|
(double) node->perf_cycles / (double) ggml_cycles_per_ms(),
|
17322
17083
|
(double) node->perf_cycles / (double) ggml_cycles_per_ms() / (double) node->perf_runs,
|
17323
17084
|
(double) node->perf_time_us / 1000.0,
|
@@ -17331,7 +17092,7 @@ void ggml_graph_print(const struct ggml_cgraph * cgraph) {
|
|
17331
17092
|
GGML_PRINT(" - %3d: [ %5" PRId64 ", %5" PRId64 "] %8s\n",
|
17332
17093
|
i,
|
17333
17094
|
node->ne[0], node->ne[1],
|
17334
|
-
|
17095
|
+
ggml_op_name(node->op));
|
17335
17096
|
}
|
17336
17097
|
|
17337
17098
|
for (int i = 0; i < GGML_OP_COUNT; i++) {
|
@@ -17339,7 +17100,7 @@ void ggml_graph_print(const struct ggml_cgraph * cgraph) {
|
|
17339
17100
|
continue;
|
17340
17101
|
}
|
17341
17102
|
|
17342
|
-
GGML_PRINT("perf_total_per_op_us[%16s] = %7.3f ms\n",
|
17103
|
+
GGML_PRINT("perf_total_per_op_us[%16s] = %7.3f ms\n", ggml_op_name(i), (double) perf_total_per_op_us[i] / 1000.0);
|
17343
17104
|
}
|
17344
17105
|
|
17345
17106
|
GGML_PRINT("========================================\n");
|
@@ -17433,13 +17194,13 @@ void ggml_graph_dump_dot(const struct ggml_cgraph * gb, const struct ggml_cgraph
|
|
17433
17194
|
}
|
17434
17195
|
|
17435
17196
|
if (node->n_dims == 2) {
|
17436
|
-
fprintf(fp, "%d [%" PRId64 ", %" PRId64 "] | <x>%s", i, node->ne[0], node->ne[1],
|
17197
|
+
fprintf(fp, "%d [%" PRId64 ", %" PRId64 "] | <x>%s", i, node->ne[0], node->ne[1], ggml_op_symbol(node->op));
|
17437
17198
|
} else {
|
17438
|
-
fprintf(fp, "%d [%" PRId64 ", %" PRId64 ", %" PRId64 "] | <x>%s", i, node->ne[0], node->ne[1], node->ne[2],
|
17199
|
+
fprintf(fp, "%d [%" PRId64 ", %" PRId64 ", %" PRId64 "] | <x>%s", i, node->ne[0], node->ne[1], node->ne[2], ggml_op_symbol(node->op));
|
17439
17200
|
}
|
17440
17201
|
|
17441
17202
|
if (node->grad) {
|
17442
|
-
fprintf(fp, " | <g>%s\"; ]\n",
|
17203
|
+
fprintf(fp, " | <g>%s\"; ]\n", ggml_op_symbol(node->grad->op));
|
17443
17204
|
} else {
|
17444
17205
|
fprintf(fp, "\"; ]\n");
|
17445
17206
|
}
|