llama_cpp 0.3.4 → 0.3.5
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +7 -0
- data/ext/llama_cpp/extconf.rb +1 -0
- data/ext/llama_cpp/llama_cpp.cpp +293 -0
- data/ext/llama_cpp/src/ggml-cuda.cu +304 -99
- data/ext/llama_cpp/src/ggml-metal.h +7 -0
- data/ext/llama_cpp/src/ggml-metal.m +201 -71
- data/ext/llama_cpp/src/ggml-metal.metal +68 -54
- data/ext/llama_cpp/src/ggml.c +713 -978
- data/ext/llama_cpp/src/ggml.h +82 -17
- data/ext/llama_cpp/src/k_quants.c +327 -3
- data/ext/llama_cpp/src/llama.cpp +524 -121
- data/ext/llama_cpp/src/llama.h +60 -5
- data/lib/llama_cpp/version.rb +2 -2
- data/sig/llama_cpp.rbs +24 -0
- metadata +2 -2
data/ext/llama_cpp/src/ggml.c
CHANGED
@@ -3440,7 +3440,9 @@ inline static void ggml_vec_mad_f32(const int n, float * restrict y, const float
|
|
3440
3440
|
|
3441
3441
|
//inline static void ggml_vec_scale_f32(const int n, float * y, const float v) { for (int i = 0; i < n; ++i) y[i] *= v; }
|
3442
3442
|
inline static void ggml_vec_scale_f32(const int n, float * y, const float v) {
|
3443
|
-
#if defined(
|
3443
|
+
#if defined(GGML_USE_ACCELERATE)
|
3444
|
+
vDSP_vsmul(y, 1, &v, y, 1, n);
|
3445
|
+
#elif defined(GGML_SIMD)
|
3444
3446
|
const int np = (n & ~(GGML_F32_STEP - 1));
|
3445
3447
|
|
3446
3448
|
GGML_F32_VEC vx = GGML_F32_VEC_SET1(v);
|
@@ -3603,7 +3605,7 @@ inline static void ggml_vec_sum_f32(const int n, float * s, const float * x) {
|
|
3603
3605
|
#endif
|
3604
3606
|
}
|
3605
3607
|
|
3606
|
-
inline static void
|
3608
|
+
inline static void ggml_vec_sum_f32_ggf(const int n, ggml_float * s, const float * x) {
|
3607
3609
|
ggml_float sum = 0.0;
|
3608
3610
|
for (int i = 0; i < n; ++i) {
|
3609
3611
|
sum += (ggml_float)x[i];
|
@@ -3611,6 +3613,14 @@ inline static void ggml_vec_sum_ggf(const int n, ggml_float * s, const float * x
|
|
3611
3613
|
*s = sum;
|
3612
3614
|
}
|
3613
3615
|
|
3616
|
+
inline static void ggml_vec_sum_f16_ggf(const int n, float * s, const ggml_fp16_t * x) {
|
3617
|
+
float sum = 0.0f;
|
3618
|
+
for (int i = 0; i < n; ++i) {
|
3619
|
+
sum += GGML_FP16_TO_FP32(x[i]);
|
3620
|
+
}
|
3621
|
+
*s = sum;
|
3622
|
+
}
|
3623
|
+
|
3614
3624
|
inline static void ggml_vec_max_f32(const int n, float * s, const float * x) {
|
3615
3625
|
#ifndef GGML_USE_ACCELERATE
|
3616
3626
|
float max = -INFINITY;
|
@@ -3750,16 +3760,6 @@ static const char * GGML_OP_NAME[GGML_OP_COUNT] = {
|
|
3750
3760
|
"ARGMAX",
|
3751
3761
|
"REPEAT",
|
3752
3762
|
"REPEAT_BACK",
|
3753
|
-
"ABS",
|
3754
|
-
"SGN",
|
3755
|
-
"NEG",
|
3756
|
-
"STEP",
|
3757
|
-
"TANH",
|
3758
|
-
"ELU",
|
3759
|
-
"RELU",
|
3760
|
-
"GELU",
|
3761
|
-
"GELU_QUICK",
|
3762
|
-
"SILU",
|
3763
3763
|
"SILU_BACK",
|
3764
3764
|
"NORM",
|
3765
3765
|
"RMS_NORM",
|
@@ -3798,6 +3798,8 @@ static const char * GGML_OP_NAME[GGML_OP_COUNT] = {
|
|
3798
3798
|
"WIN_PART",
|
3799
3799
|
"WIN_UNPART",
|
3800
3800
|
|
3801
|
+
"UNARY",
|
3802
|
+
|
3801
3803
|
"MAP_UNARY",
|
3802
3804
|
"MAP_BINARY",
|
3803
3805
|
|
@@ -3809,7 +3811,7 @@ static const char * GGML_OP_NAME[GGML_OP_COUNT] = {
|
|
3809
3811
|
"CROSS_ENTROPY_LOSS_BACK",
|
3810
3812
|
};
|
3811
3813
|
|
3812
|
-
static_assert(GGML_OP_COUNT ==
|
3814
|
+
static_assert(GGML_OP_COUNT == 59, "GGML_OP_COUNT != 59");
|
3813
3815
|
|
3814
3816
|
static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
|
3815
3817
|
"none",
|
@@ -3830,16 +3832,6 @@ static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
|
|
3830
3832
|
"argmax(x)",
|
3831
3833
|
"repeat(x)",
|
3832
3834
|
"repeat_back(x)",
|
3833
|
-
"abs(x)",
|
3834
|
-
"sgn(x)",
|
3835
|
-
"-x",
|
3836
|
-
"step(x)",
|
3837
|
-
"tanh(x)",
|
3838
|
-
"elu(x)",
|
3839
|
-
"relu(x)",
|
3840
|
-
"gelu(x)",
|
3841
|
-
"gelu_quick(x)",
|
3842
|
-
"silu(x)",
|
3843
3835
|
"silu_back(x)",
|
3844
3836
|
"norm(x)",
|
3845
3837
|
"rms_norm(x)",
|
@@ -3878,6 +3870,8 @@ static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
|
|
3878
3870
|
"win_part(x)",
|
3879
3871
|
"win_unpart(x)",
|
3880
3872
|
|
3873
|
+
"unary(x)",
|
3874
|
+
|
3881
3875
|
"f(x)",
|
3882
3876
|
"f(x,y)",
|
3883
3877
|
|
@@ -3889,7 +3883,7 @@ static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
|
|
3889
3883
|
"cross_entropy_loss_back(x,y)",
|
3890
3884
|
};
|
3891
3885
|
|
3892
|
-
static_assert(GGML_OP_COUNT ==
|
3886
|
+
static_assert(GGML_OP_COUNT == 59, "GGML_OP_COUNT != 59");
|
3893
3887
|
|
3894
3888
|
static_assert(GGML_OP_POOL_COUNT == 2, "GGML_OP_POOL_COUNT != 2");
|
3895
3889
|
|
@@ -4077,8 +4071,8 @@ bool ggml_is_numa(void) {
|
|
4077
4071
|
////////////////////////////////////////////////////////////////////////////////
|
4078
4072
|
|
4079
4073
|
void ggml_print_object(const struct ggml_object * obj) {
|
4080
|
-
GGML_PRINT(" - ggml_object: offset = %zu, size = %zu, next = %p\n",
|
4081
|
-
obj->offs, obj->size, (const void *) obj->next);
|
4074
|
+
GGML_PRINT(" - ggml_object: type = %d, offset = %zu, size = %zu, next = %p\n",
|
4075
|
+
obj->type, obj->offs, obj->size, (const void *) obj->next);
|
4082
4076
|
}
|
4083
4077
|
|
4084
4078
|
void ggml_print_objects(const struct ggml_context * ctx) {
|
@@ -4145,6 +4139,10 @@ const char * ggml_op_name(enum ggml_op op) {
|
|
4145
4139
|
return GGML_OP_NAME[op];
|
4146
4140
|
}
|
4147
4141
|
|
4142
|
+
const char * ggml_op_symbol(enum ggml_op op) {
|
4143
|
+
return GGML_OP_SYMBOL[op];
|
4144
|
+
}
|
4145
|
+
|
4148
4146
|
size_t ggml_element_size(const struct ggml_tensor * tensor) {
|
4149
4147
|
return GGML_TYPE_SIZE[tensor->type];
|
4150
4148
|
}
|
@@ -4214,7 +4212,7 @@ enum ggml_type ggml_ftype_to_ggml_type(enum ggml_ftype ftype) {
|
|
4214
4212
|
}
|
4215
4213
|
|
4216
4214
|
size_t ggml_tensor_overhead(void) {
|
4217
|
-
return GGML_OBJECT_SIZE + GGML_TENSOR_SIZE
|
4215
|
+
return GGML_OBJECT_SIZE + GGML_TENSOR_SIZE;
|
4218
4216
|
}
|
4219
4217
|
|
4220
4218
|
bool ggml_is_transposed(const struct ggml_tensor * tensor) {
|
@@ -4231,6 +4229,15 @@ bool ggml_is_contiguous(const struct ggml_tensor * tensor) {
|
|
4231
4229
|
tensor->nb[3] == tensor->nb[2]*tensor->ne[2];
|
4232
4230
|
}
|
4233
4231
|
|
4232
|
+
static inline bool ggml_is_contiguous_except_dim_1(const struct ggml_tensor * tensor) {
|
4233
|
+
static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
|
4234
|
+
|
4235
|
+
return
|
4236
|
+
tensor->nb[0] == GGML_TYPE_SIZE[tensor->type] &&
|
4237
|
+
tensor->nb[2] == tensor->nb[1]*tensor->ne[1] &&
|
4238
|
+
tensor->nb[3] == tensor->nb[2]*tensor->ne[2];
|
4239
|
+
}
|
4240
|
+
|
4234
4241
|
bool ggml_is_permuted(const struct ggml_tensor * tensor) {
|
4235
4242
|
static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
|
4236
4243
|
|
@@ -4376,7 +4383,7 @@ struct ggml_context * ggml_init(struct ggml_init_params params) {
|
|
4376
4383
|
return NULL;
|
4377
4384
|
}
|
4378
4385
|
|
4379
|
-
const size_t mem_size =
|
4386
|
+
const size_t mem_size = params.mem_buffer ? params.mem_size : GGML_PAD(params.mem_size, GGML_MEM_ALIGN);
|
4380
4387
|
|
4381
4388
|
*ctx = (struct ggml_context) {
|
4382
4389
|
/*.mem_size =*/ mem_size,
|
@@ -4443,6 +4450,10 @@ size_t ggml_set_scratch(struct ggml_context * ctx, struct ggml_scratch scratch)
|
|
4443
4450
|
return result;
|
4444
4451
|
}
|
4445
4452
|
|
4453
|
+
bool ggml_get_no_alloc(struct ggml_context * ctx) {
|
4454
|
+
return ctx->no_alloc;
|
4455
|
+
}
|
4456
|
+
|
4446
4457
|
void ggml_set_no_alloc(struct ggml_context * ctx, bool no_alloc) {
|
4447
4458
|
ctx->no_alloc = no_alloc;
|
4448
4459
|
}
|
@@ -4461,12 +4472,14 @@ size_t ggml_get_max_tensor_size(const struct ggml_context * ctx) {
|
|
4461
4472
|
struct ggml_object * obj = ctx->objects_begin;
|
4462
4473
|
|
4463
4474
|
while (obj != NULL) {
|
4464
|
-
|
4475
|
+
if (obj->type == GGML_OBJECT_TENSOR) {
|
4476
|
+
struct ggml_tensor * tensor = (struct ggml_tensor *) ((char *) ctx->mem_buffer + obj->offs);
|
4465
4477
|
|
4466
|
-
|
4478
|
+
const size_t size = ggml_nbytes(tensor);
|
4467
4479
|
|
4468
|
-
|
4469
|
-
|
4480
|
+
if (max_size < size) {
|
4481
|
+
max_size = size;
|
4482
|
+
}
|
4470
4483
|
}
|
4471
4484
|
|
4472
4485
|
obj = obj->next;
|
@@ -4480,7 +4493,7 @@ size_t ggml_get_max_tensor_size(const struct ggml_context * ctx) {
|
|
4480
4493
|
// this is an error prone process, but it is necessary to support inplace
|
4481
4494
|
// operators when using scratch buffers
|
4482
4495
|
// TODO: implement a better way
|
4483
|
-
void ggml_scratch_save(struct ggml_context * ctx) {
|
4496
|
+
static void ggml_scratch_save(struct ggml_context * ctx) {
|
4484
4497
|
// this is needed to allow opt tensors to store their data
|
4485
4498
|
// TODO: again, need to find a better way
|
4486
4499
|
ctx->no_alloc_save = ctx->no_alloc;
|
@@ -4490,7 +4503,7 @@ void ggml_scratch_save(struct ggml_context * ctx) {
|
|
4490
4503
|
ctx->scratch.data = NULL;
|
4491
4504
|
}
|
4492
4505
|
|
4493
|
-
void ggml_scratch_load(struct ggml_context * ctx) {
|
4506
|
+
static void ggml_scratch_load(struct ggml_context * ctx) {
|
4494
4507
|
ctx->no_alloc = ctx->no_alloc_save;
|
4495
4508
|
|
4496
4509
|
ctx->scratch = ctx->scratch_save;
|
@@ -4498,12 +4511,7 @@ void ggml_scratch_load(struct ggml_context * ctx) {
|
|
4498
4511
|
|
4499
4512
|
////////////////////////////////////////////////////////////////////////////////
|
4500
4513
|
|
4501
|
-
struct
|
4502
|
-
struct ggml_context * ctx,
|
4503
|
-
enum ggml_type type,
|
4504
|
-
int n_dims,
|
4505
|
-
const int64_t* ne,
|
4506
|
-
void* data) {
|
4514
|
+
static struct ggml_object * ggml_new_object(struct ggml_context * ctx, enum ggml_object_type type, size_t size) {
|
4507
4515
|
// always insert objects at the end of the context's memory pool
|
4508
4516
|
struct ggml_object * obj_cur = ctx->objects_end;
|
4509
4517
|
|
@@ -4511,77 +4519,79 @@ struct ggml_tensor * ggml_new_tensor_impl(
|
|
4511
4519
|
const size_t cur_size = obj_cur == NULL ? 0 : obj_cur->size;
|
4512
4520
|
const size_t cur_end = cur_offs + cur_size;
|
4513
4521
|
|
4514
|
-
|
4515
|
-
|
4516
|
-
if (data == NULL && !ctx->no_alloc) {
|
4517
|
-
size_needed += GGML_TYPE_SIZE[type]*(ne[0]/GGML_BLCK_SIZE[type]);
|
4518
|
-
for (int i = 1; i < n_dims; i++) {
|
4519
|
-
size_needed *= ne[i];
|
4520
|
-
}
|
4521
|
-
// align to GGML_MEM_ALIGN
|
4522
|
-
size_needed = ((size_needed + GGML_MEM_ALIGN - 1)/GGML_MEM_ALIGN)*GGML_MEM_ALIGN;
|
4523
|
-
}
|
4522
|
+
// align to GGML_MEM_ALIGN
|
4523
|
+
size_t size_needed = GGML_PAD(size, GGML_MEM_ALIGN);
|
4524
4524
|
|
4525
4525
|
char * const mem_buffer = ctx->mem_buffer;
|
4526
4526
|
struct ggml_object * const obj_new = (struct ggml_object *)(mem_buffer + cur_end);
|
4527
4527
|
|
4528
|
-
if (
|
4529
|
-
|
4528
|
+
if (cur_end + size_needed + GGML_OBJECT_SIZE > ctx->mem_size) {
|
4529
|
+
GGML_PRINT("%s: not enough space in the context's memory pool (needed %zu, available %zu)\n",
|
4530
|
+
__func__, cur_end + size_needed, ctx->mem_size);
|
4531
|
+
assert(false);
|
4532
|
+
return NULL;
|
4533
|
+
}
|
4530
4534
|
|
4531
|
-
|
4532
|
-
|
4533
|
-
|
4534
|
-
|
4535
|
-
|
4536
|
-
|
4535
|
+
*obj_new = (struct ggml_object) {
|
4536
|
+
.offs = cur_end + GGML_OBJECT_SIZE,
|
4537
|
+
.size = size_needed,
|
4538
|
+
.next = NULL,
|
4539
|
+
.type = type,
|
4540
|
+
};
|
4537
4541
|
|
4538
|
-
|
4539
|
-
|
4540
|
-
|
4541
|
-
|
4542
|
-
};
|
4542
|
+
ggml_assert_aligned(mem_buffer + obj_new->offs);
|
4543
|
+
|
4544
|
+
if (obj_cur != NULL) {
|
4545
|
+
obj_cur->next = obj_new;
|
4543
4546
|
} else {
|
4544
|
-
|
4545
|
-
|
4546
|
-
|
4547
|
-
|
4548
|
-
|
4547
|
+
// this is the first object in this context
|
4548
|
+
ctx->objects_begin = obj_new;
|
4549
|
+
}
|
4550
|
+
|
4551
|
+
ctx->objects_end = obj_new;
|
4552
|
+
|
4553
|
+
//printf("%s: inserted new object at %zu, size = %zu\n", __func__, cur_end, obj_new->size);
|
4554
|
+
|
4555
|
+
return obj_new;
|
4556
|
+
}
|
4557
|
+
|
4558
|
+
static struct ggml_tensor * ggml_new_tensor_impl(
|
4559
|
+
struct ggml_context * ctx,
|
4560
|
+
enum ggml_type type,
|
4561
|
+
int n_dims,
|
4562
|
+
const int64_t* ne,
|
4563
|
+
void* data) {
|
4564
|
+
|
4565
|
+
size_t data_size = 0;
|
4566
|
+
|
4567
|
+
if (data == NULL && !ctx->no_alloc) {
|
4568
|
+
data_size += GGML_TYPE_SIZE[type]*(ne[0]/GGML_BLCK_SIZE[type]);
|
4569
|
+
for (int i = 1; i < n_dims; i++) {
|
4570
|
+
data_size *= ne[i];
|
4549
4571
|
}
|
4572
|
+
}
|
4550
4573
|
|
4551
|
-
|
4552
|
-
|
4553
|
-
|
4574
|
+
if (ctx->scratch.data != NULL && data == NULL) {
|
4575
|
+
// allocate tensor data in the scratch buffer
|
4576
|
+
if (ctx->scratch.offs + data_size > ctx->scratch.size) {
|
4577
|
+
GGML_PRINT("%s: not enough space in the scratch memory pool (needed %zu, available %zu)\n",
|
4578
|
+
__func__, ctx->scratch.offs + data_size, ctx->scratch.size);
|
4554
4579
|
assert(false);
|
4555
4580
|
return NULL;
|
4556
4581
|
}
|
4557
4582
|
|
4558
4583
|
data = (char * const) ctx->scratch.data + ctx->scratch.offs;
|
4559
4584
|
|
4560
|
-
|
4561
|
-
.offs = cur_end + GGML_OBJECT_SIZE,
|
4562
|
-
.size = GGML_TENSOR_SIZE,
|
4563
|
-
.next = NULL,
|
4564
|
-
};
|
4565
|
-
|
4566
|
-
//printf("scratch offs = %zu, size_needed = %zu\n", ctx->scratch.offs, size_needed);
|
4567
|
-
|
4568
|
-
ctx->scratch.offs += size_needed;
|
4569
|
-
}
|
4585
|
+
ctx->scratch.offs += data_size;
|
4570
4586
|
|
4571
|
-
|
4572
|
-
obj_cur->next = obj_new;
|
4573
|
-
} else {
|
4574
|
-
// this is the first object in this context
|
4575
|
-
ctx->objects_begin = obj_new;
|
4587
|
+
data_size = 0;
|
4576
4588
|
}
|
4577
4589
|
|
4578
|
-
|
4579
|
-
|
4580
|
-
//printf("%s: inserted new object at %zu, size = %zu\n", __func__, cur_end, obj_new->size);
|
4590
|
+
struct ggml_object * const obj_new = ggml_new_object(ctx, GGML_OBJECT_TENSOR, GGML_TENSOR_SIZE + data_size);
|
4581
4591
|
|
4582
|
-
|
4592
|
+
// TODO: for recoverable errors, we would need to free the data allocated from the scratch buffer here
|
4583
4593
|
|
4584
|
-
|
4594
|
+
struct ggml_tensor * const result = (struct ggml_tensor *)((char *)ctx->mem_buffer + obj_new->offs);
|
4585
4595
|
|
4586
4596
|
*result = (struct ggml_tensor) {
|
4587
4597
|
/*.type =*/ type,
|
@@ -4590,6 +4600,7 @@ struct ggml_tensor * ggml_new_tensor_impl(
|
|
4590
4600
|
/*.ne =*/ { 1, 1, 1, 1 },
|
4591
4601
|
/*.nb =*/ { 0, 0, 0, 0 },
|
4592
4602
|
/*.op =*/ GGML_OP_NONE,
|
4603
|
+
/*.op_params =*/ {0},
|
4593
4604
|
/*.is_param =*/ false,
|
4594
4605
|
/*.grad =*/ NULL,
|
4595
4606
|
/*.src =*/ { NULL },
|
@@ -4620,6 +4631,21 @@ struct ggml_tensor * ggml_new_tensor_impl(
|
|
4620
4631
|
return result;
|
4621
4632
|
}
|
4622
4633
|
|
4634
|
+
static void ggml_set_op_params(struct ggml_tensor * tensor, const void * params, size_t params_size) {
|
4635
|
+
assert(params_size <= GGML_MAX_OP_PARAMS);
|
4636
|
+
memcpy(tensor->op_params, params, params_size);
|
4637
|
+
}
|
4638
|
+
|
4639
|
+
static int32_t ggml_get_op_params_i32(const struct ggml_tensor * tensor, uint32_t i) {
|
4640
|
+
assert(i < GGML_MAX_OP_PARAMS / sizeof(int32_t));
|
4641
|
+
return ((const int32_t *)(tensor->op_params))[i];
|
4642
|
+
}
|
4643
|
+
|
4644
|
+
static void ggml_set_op_params_i32(struct ggml_tensor * tensor, uint32_t i, int32_t value) {
|
4645
|
+
assert(i < GGML_MAX_OP_PARAMS / sizeof(int32_t));
|
4646
|
+
((int32_t *)(tensor->op_params))[i] = value;
|
4647
|
+
}
|
4648
|
+
|
4623
4649
|
struct ggml_tensor * ggml_new_tensor(
|
4624
4650
|
struct ggml_context * ctx,
|
4625
4651
|
enum ggml_type type,
|
@@ -4951,6 +4977,11 @@ float * ggml_get_data_f32(const struct ggml_tensor * tensor) {
|
|
4951
4977
|
return (float *)(tensor->data);
|
4952
4978
|
}
|
4953
4979
|
|
4980
|
+
enum ggml_unary_op ggml_get_unary_op(const struct ggml_tensor * tensor) {
|
4981
|
+
GGML_ASSERT(tensor->op == GGML_OP_UNARY);
|
4982
|
+
return (enum ggml_unary_op) ggml_get_op_params_i32(tensor, 0);
|
4983
|
+
}
|
4984
|
+
|
4954
4985
|
const char * ggml_get_name(const struct ggml_tensor * tensor) {
|
4955
4986
|
return tensor->name;
|
4956
4987
|
}
|
@@ -4989,9 +5020,11 @@ struct ggml_tensor * ggml_get_tensor(struct ggml_context * ctx, const char * nam
|
|
4989
5020
|
char * const mem_buffer = ctx->mem_buffer;
|
4990
5021
|
|
4991
5022
|
while (obj != NULL) {
|
4992
|
-
|
4993
|
-
|
4994
|
-
|
5023
|
+
if (obj->type == GGML_OBJECT_TENSOR) {
|
5024
|
+
struct ggml_tensor * cur = (struct ggml_tensor *)(mem_buffer + obj->offs);
|
5025
|
+
if (strcmp(cur->name, name) == 0) {
|
5026
|
+
return cur;
|
5027
|
+
}
|
4995
5028
|
}
|
4996
5029
|
|
4997
5030
|
obj = obj->next;
|
@@ -5004,7 +5037,7 @@ struct ggml_tensor * ggml_get_tensor(struct ggml_context * ctx, const char * nam
|
|
5004
5037
|
|
5005
5038
|
// ggml_dup
|
5006
5039
|
|
5007
|
-
struct ggml_tensor * ggml_dup_impl(
|
5040
|
+
static struct ggml_tensor * ggml_dup_impl(
|
5008
5041
|
struct ggml_context * ctx,
|
5009
5042
|
struct ggml_tensor * a,
|
5010
5043
|
bool inplace) {
|
@@ -5019,7 +5052,6 @@ struct ggml_tensor * ggml_dup_impl(
|
|
5019
5052
|
result->op = GGML_OP_DUP;
|
5020
5053
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
5021
5054
|
result->src[0] = a;
|
5022
|
-
result->src[1] = NULL;
|
5023
5055
|
|
5024
5056
|
return result;
|
5025
5057
|
}
|
@@ -5038,7 +5070,7 @@ struct ggml_tensor * ggml_dup_inplace(
|
|
5038
5070
|
|
5039
5071
|
// ggml_add
|
5040
5072
|
|
5041
|
-
struct ggml_tensor * ggml_add_impl(
|
5073
|
+
static struct ggml_tensor * ggml_add_impl(
|
5042
5074
|
struct ggml_context * ctx,
|
5043
5075
|
struct ggml_tensor * a,
|
5044
5076
|
struct ggml_tensor * b,
|
@@ -5081,7 +5113,7 @@ struct ggml_tensor * ggml_add_inplace(
|
|
5081
5113
|
|
5082
5114
|
// ggml_add1
|
5083
5115
|
|
5084
|
-
struct ggml_tensor * ggml_add1_impl(
|
5116
|
+
static struct ggml_tensor * ggml_add1_impl(
|
5085
5117
|
struct ggml_context * ctx,
|
5086
5118
|
struct ggml_tensor * a,
|
5087
5119
|
struct ggml_tensor * b,
|
@@ -5121,7 +5153,7 @@ struct ggml_tensor * ggml_add1_inplace(
|
|
5121
5153
|
|
5122
5154
|
// ggml_acc
|
5123
5155
|
|
5124
|
-
struct ggml_tensor * ggml_acc_impl(
|
5156
|
+
static struct ggml_tensor * ggml_acc_impl(
|
5125
5157
|
struct ggml_context * ctx,
|
5126
5158
|
struct ggml_tensor * a,
|
5127
5159
|
struct ggml_tensor * b,
|
@@ -5143,23 +5175,13 @@ struct ggml_tensor * ggml_acc_impl(
|
|
5143
5175
|
|
5144
5176
|
struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
5145
5177
|
|
5146
|
-
|
5147
|
-
|
5148
|
-
struct ggml_tensor * c = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, 5);
|
5149
|
-
|
5150
|
-
((int32_t *) c->data)[0] = nb1;
|
5151
|
-
((int32_t *) c->data)[1] = nb2;
|
5152
|
-
((int32_t *) c->data)[2] = nb3;
|
5153
|
-
((int32_t *) c->data)[3] = offset;
|
5154
|
-
((int32_t *) c->data)[4] = inplace ? 1 : 0;
|
5155
|
-
|
5156
|
-
ggml_scratch_load(ctx);
|
5178
|
+
int32_t params[] = { nb1, nb2, nb3, offset, inplace ? 1 : 0 };
|
5179
|
+
ggml_set_op_params(result, params, sizeof(params));
|
5157
5180
|
|
5158
5181
|
result->op = GGML_OP_ACC;
|
5159
5182
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
5160
5183
|
result->src[0] = a;
|
5161
5184
|
result->src[1] = b;
|
5162
|
-
result->src[2] = c;
|
5163
5185
|
|
5164
5186
|
return result;
|
5165
5187
|
}
|
@@ -5188,7 +5210,7 @@ struct ggml_tensor * ggml_acc_inplace(
|
|
5188
5210
|
|
5189
5211
|
// ggml_sub
|
5190
5212
|
|
5191
|
-
struct ggml_tensor * ggml_sub_impl(
|
5213
|
+
static struct ggml_tensor * ggml_sub_impl(
|
5192
5214
|
struct ggml_context * ctx,
|
5193
5215
|
struct ggml_tensor * a,
|
5194
5216
|
struct ggml_tensor * b,
|
@@ -5227,7 +5249,7 @@ struct ggml_tensor * ggml_sub_inplace(
|
|
5227
5249
|
|
5228
5250
|
// ggml_mul
|
5229
5251
|
|
5230
|
-
struct ggml_tensor * ggml_mul_impl(
|
5252
|
+
static struct ggml_tensor * ggml_mul_impl(
|
5231
5253
|
struct ggml_context * ctx,
|
5232
5254
|
struct ggml_tensor * a,
|
5233
5255
|
struct ggml_tensor * b,
|
@@ -5274,7 +5296,7 @@ struct ggml_tensor * ggml_mul_inplace(
|
|
5274
5296
|
|
5275
5297
|
// ggml_div
|
5276
5298
|
|
5277
|
-
struct ggml_tensor * ggml_div_impl(
|
5299
|
+
static struct ggml_tensor * ggml_div_impl(
|
5278
5300
|
struct ggml_context * ctx,
|
5279
5301
|
struct ggml_tensor * a,
|
5280
5302
|
struct ggml_tensor * b,
|
@@ -5317,7 +5339,7 @@ struct ggml_tensor * ggml_div_inplace(
|
|
5317
5339
|
|
5318
5340
|
// ggml_sqr
|
5319
5341
|
|
5320
|
-
struct ggml_tensor * ggml_sqr_impl(
|
5342
|
+
static struct ggml_tensor * ggml_sqr_impl(
|
5321
5343
|
struct ggml_context * ctx,
|
5322
5344
|
struct ggml_tensor * a,
|
5323
5345
|
bool inplace) {
|
@@ -5332,7 +5354,6 @@ struct ggml_tensor * ggml_sqr_impl(
|
|
5332
5354
|
result->op = GGML_OP_SQR;
|
5333
5355
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
5334
5356
|
result->src[0] = a;
|
5335
|
-
result->src[1] = NULL;
|
5336
5357
|
|
5337
5358
|
return result;
|
5338
5359
|
}
|
@@ -5351,7 +5372,7 @@ struct ggml_tensor * ggml_sqr_inplace(
|
|
5351
5372
|
|
5352
5373
|
// ggml_sqrt
|
5353
5374
|
|
5354
|
-
struct ggml_tensor * ggml_sqrt_impl(
|
5375
|
+
static struct ggml_tensor * ggml_sqrt_impl(
|
5355
5376
|
struct ggml_context * ctx,
|
5356
5377
|
struct ggml_tensor * a,
|
5357
5378
|
bool inplace) {
|
@@ -5366,7 +5387,6 @@ struct ggml_tensor * ggml_sqrt_impl(
|
|
5366
5387
|
result->op = GGML_OP_SQRT;
|
5367
5388
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
5368
5389
|
result->src[0] = a;
|
5369
|
-
result->src[1] = NULL;
|
5370
5390
|
|
5371
5391
|
return result;
|
5372
5392
|
}
|
@@ -5386,7 +5406,7 @@ struct ggml_tensor * ggml_sqrt_inplace(
|
|
5386
5406
|
|
5387
5407
|
// ggml_log
|
5388
5408
|
|
5389
|
-
struct ggml_tensor * ggml_log_impl(
|
5409
|
+
static struct ggml_tensor * ggml_log_impl(
|
5390
5410
|
struct ggml_context * ctx,
|
5391
5411
|
struct ggml_tensor * a,
|
5392
5412
|
bool inplace) {
|
@@ -5401,7 +5421,6 @@ struct ggml_tensor * ggml_log_impl(
|
|
5401
5421
|
result->op = GGML_OP_LOG;
|
5402
5422
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
5403
5423
|
result->src[0] = a;
|
5404
|
-
result->src[1] = NULL;
|
5405
5424
|
|
5406
5425
|
return result;
|
5407
5426
|
}
|
@@ -5434,7 +5453,6 @@ struct ggml_tensor * ggml_sum(
|
|
5434
5453
|
result->op = GGML_OP_SUM;
|
5435
5454
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
5436
5455
|
result->src[0] = a;
|
5437
|
-
result->src[1] = NULL;
|
5438
5456
|
|
5439
5457
|
return result;
|
5440
5458
|
}
|
@@ -5461,7 +5479,6 @@ struct ggml_tensor * ggml_sum_rows(
|
|
5461
5479
|
result->op = GGML_OP_SUM_ROWS;
|
5462
5480
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
5463
5481
|
result->src[0] = a;
|
5464
|
-
result->src[1] = NULL;
|
5465
5482
|
|
5466
5483
|
return result;
|
5467
5484
|
}
|
@@ -5484,7 +5501,6 @@ struct ggml_tensor * ggml_mean(
|
|
5484
5501
|
result->op = GGML_OP_MEAN;
|
5485
5502
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
5486
5503
|
result->src[0] = a;
|
5487
|
-
result->src[1] = NULL;
|
5488
5504
|
|
5489
5505
|
return result;
|
5490
5506
|
}
|
@@ -5508,7 +5524,6 @@ struct ggml_tensor * ggml_argmax(
|
|
5508
5524
|
result->op = GGML_OP_ARGMAX;
|
5509
5525
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
5510
5526
|
result->src[0] = a;
|
5511
|
-
result->src[1] = NULL;
|
5512
5527
|
|
5513
5528
|
return result;
|
5514
5529
|
}
|
@@ -5571,343 +5586,142 @@ struct ggml_tensor * ggml_repeat_back(
|
|
5571
5586
|
|
5572
5587
|
// ggml_abs
|
5573
5588
|
|
5574
|
-
struct ggml_tensor * ggml_abs_impl(
|
5575
|
-
struct ggml_context * ctx,
|
5576
|
-
struct ggml_tensor * a,
|
5577
|
-
bool inplace) {
|
5578
|
-
bool is_node = false;
|
5579
|
-
|
5580
|
-
if (!inplace && (a->grad)) {
|
5581
|
-
is_node = true;
|
5582
|
-
}
|
5583
|
-
|
5584
|
-
struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
5585
|
-
|
5586
|
-
result->op = GGML_OP_ABS;
|
5587
|
-
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
5588
|
-
result->src[0] = a;
|
5589
|
-
result->src[1] = NULL;
|
5590
|
-
|
5591
|
-
return result;
|
5592
|
-
}
|
5593
|
-
|
5594
5589
|
struct ggml_tensor * ggml_abs(
|
5595
5590
|
struct ggml_context * ctx,
|
5596
5591
|
struct ggml_tensor * a) {
|
5597
|
-
return
|
5592
|
+
return ggml_unary(ctx, a, GGML_UNARY_OP_ABS);
|
5598
5593
|
}
|
5599
5594
|
|
5600
5595
|
struct ggml_tensor * ggml_abs_inplace(
|
5601
5596
|
struct ggml_context * ctx,
|
5602
5597
|
struct ggml_tensor * a) {
|
5603
|
-
return
|
5598
|
+
return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_ABS);
|
5604
5599
|
}
|
5605
5600
|
|
5606
|
-
|
5607
5601
|
// ggml_sgn
|
5608
5602
|
|
5609
|
-
struct ggml_tensor * ggml_sgn_impl(
|
5610
|
-
struct ggml_context * ctx,
|
5611
|
-
struct ggml_tensor * a,
|
5612
|
-
bool inplace) {
|
5613
|
-
bool is_node = false;
|
5614
|
-
|
5615
|
-
if (!inplace && (a->grad)) {
|
5616
|
-
is_node = true;
|
5617
|
-
}
|
5618
|
-
|
5619
|
-
struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
5620
|
-
|
5621
|
-
result->op = GGML_OP_SGN;
|
5622
|
-
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
5623
|
-
result->src[0] = a;
|
5624
|
-
result->src[1] = NULL;
|
5625
|
-
|
5626
|
-
return result;
|
5627
|
-
}
|
5628
|
-
|
5629
5603
|
struct ggml_tensor * ggml_sgn(
|
5630
5604
|
struct ggml_context * ctx,
|
5631
5605
|
struct ggml_tensor * a) {
|
5632
|
-
return
|
5606
|
+
return ggml_unary(ctx, a, GGML_UNARY_OP_SGN);
|
5633
5607
|
}
|
5634
5608
|
|
5635
5609
|
struct ggml_tensor * ggml_sgn_inplace(
|
5636
5610
|
struct ggml_context * ctx,
|
5637
5611
|
struct ggml_tensor * a) {
|
5638
|
-
return
|
5612
|
+
return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_SGN);
|
5639
5613
|
}
|
5640
5614
|
|
5641
5615
|
// ggml_neg
|
5642
5616
|
|
5643
|
-
struct ggml_tensor * ggml_neg_impl(
|
5644
|
-
struct ggml_context * ctx,
|
5645
|
-
struct ggml_tensor * a,
|
5646
|
-
bool inplace) {
|
5647
|
-
bool is_node = false;
|
5648
|
-
|
5649
|
-
if (!inplace && (a->grad)) {
|
5650
|
-
is_node = true;
|
5651
|
-
}
|
5652
|
-
|
5653
|
-
struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
5654
|
-
|
5655
|
-
result->op = GGML_OP_NEG;
|
5656
|
-
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
5657
|
-
result->src[0] = a;
|
5658
|
-
result->src[1] = NULL;
|
5659
|
-
|
5660
|
-
return result;
|
5661
|
-
}
|
5662
|
-
|
5663
5617
|
struct ggml_tensor * ggml_neg(
|
5664
5618
|
struct ggml_context * ctx,
|
5665
5619
|
struct ggml_tensor * a) {
|
5666
|
-
return
|
5620
|
+
return ggml_unary(ctx, a, GGML_UNARY_OP_NEG);
|
5667
5621
|
}
|
5668
5622
|
|
5669
5623
|
struct ggml_tensor * ggml_neg_inplace(
|
5670
5624
|
struct ggml_context * ctx,
|
5671
5625
|
struct ggml_tensor * a) {
|
5672
|
-
return
|
5626
|
+
return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_NEG);
|
5673
5627
|
}
|
5674
5628
|
|
5675
5629
|
// ggml_step
|
5676
5630
|
|
5677
|
-
struct ggml_tensor * ggml_step_impl(
|
5678
|
-
struct ggml_context * ctx,
|
5679
|
-
struct ggml_tensor * a,
|
5680
|
-
bool inplace) {
|
5681
|
-
bool is_node = false;
|
5682
|
-
|
5683
|
-
if (!inplace && (a->grad)) {
|
5684
|
-
is_node = true;
|
5685
|
-
}
|
5686
|
-
|
5687
|
-
struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
5688
|
-
|
5689
|
-
result->op = GGML_OP_STEP;
|
5690
|
-
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
5691
|
-
result->src[0] = a;
|
5692
|
-
result->src[1] = NULL;
|
5693
|
-
|
5694
|
-
return result;
|
5695
|
-
}
|
5696
|
-
|
5697
5631
|
struct ggml_tensor * ggml_step(
|
5698
5632
|
struct ggml_context * ctx,
|
5699
5633
|
struct ggml_tensor * a) {
|
5700
|
-
return
|
5634
|
+
return ggml_unary(ctx, a, GGML_UNARY_OP_STEP);
|
5701
5635
|
}
|
5702
5636
|
|
5703
5637
|
struct ggml_tensor * ggml_step_inplace(
|
5704
5638
|
struct ggml_context * ctx,
|
5705
5639
|
struct ggml_tensor * a) {
|
5706
|
-
return
|
5640
|
+
return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_STEP);
|
5707
5641
|
}
|
5708
5642
|
|
5709
5643
|
// ggml_tanh
|
5710
5644
|
|
5711
|
-
struct ggml_tensor * ggml_tanh_impl(
|
5712
|
-
struct ggml_context * ctx,
|
5713
|
-
struct ggml_tensor * a,
|
5714
|
-
bool inplace) {
|
5715
|
-
bool is_node = false;
|
5716
|
-
|
5717
|
-
if (!inplace && (a->grad)) {
|
5718
|
-
is_node = true;
|
5719
|
-
}
|
5720
|
-
|
5721
|
-
struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
5722
|
-
|
5723
|
-
result->op = GGML_OP_TANH;
|
5724
|
-
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
5725
|
-
result->src[0] = a;
|
5726
|
-
result->src[1] = NULL;
|
5727
|
-
|
5728
|
-
return result;
|
5729
|
-
}
|
5730
|
-
|
5731
5645
|
struct ggml_tensor * ggml_tanh(
|
5732
5646
|
struct ggml_context * ctx,
|
5733
5647
|
struct ggml_tensor * a) {
|
5734
|
-
return
|
5648
|
+
return ggml_unary(ctx, a, GGML_UNARY_OP_TANH);
|
5735
5649
|
}
|
5736
5650
|
|
5737
5651
|
struct ggml_tensor * ggml_tanh_inplace(
|
5738
5652
|
struct ggml_context * ctx,
|
5739
5653
|
struct ggml_tensor * a) {
|
5740
|
-
return
|
5654
|
+
return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_TANH);
|
5741
5655
|
}
|
5742
5656
|
|
5743
5657
|
// ggml_elu
|
5744
5658
|
|
5745
|
-
struct ggml_tensor * ggml_elu_impl(
|
5746
|
-
struct ggml_context * ctx,
|
5747
|
-
struct ggml_tensor * a,
|
5748
|
-
bool inplace) {
|
5749
|
-
bool is_node = false;
|
5750
|
-
|
5751
|
-
if (!inplace && (a->grad)) {
|
5752
|
-
is_node = true;
|
5753
|
-
}
|
5754
|
-
|
5755
|
-
struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
5756
|
-
|
5757
|
-
result->op = GGML_OP_ELU;
|
5758
|
-
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
5759
|
-
result->src[0] = a;
|
5760
|
-
result->src[1] = NULL;
|
5761
|
-
|
5762
|
-
return result;
|
5763
|
-
}
|
5764
|
-
|
5765
5659
|
struct ggml_tensor * ggml_elu(
|
5766
5660
|
struct ggml_context * ctx,
|
5767
5661
|
struct ggml_tensor * a) {
|
5768
|
-
return
|
5662
|
+
return ggml_unary(ctx, a, GGML_UNARY_OP_ELU);
|
5769
5663
|
}
|
5770
5664
|
|
5771
5665
|
struct ggml_tensor * ggml_elu_inplace(
|
5772
5666
|
struct ggml_context * ctx,
|
5773
5667
|
struct ggml_tensor * a) {
|
5774
|
-
return
|
5668
|
+
return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_ELU);
|
5775
5669
|
}
|
5776
5670
|
|
5777
5671
|
// ggml_relu
|
5778
5672
|
|
5779
|
-
struct ggml_tensor * ggml_relu_impl(
|
5780
|
-
struct ggml_context * ctx,
|
5781
|
-
struct ggml_tensor * a,
|
5782
|
-
bool inplace) {
|
5783
|
-
bool is_node = false;
|
5784
|
-
|
5785
|
-
if (!inplace && (a->grad)) {
|
5786
|
-
is_node = true;
|
5787
|
-
}
|
5788
|
-
|
5789
|
-
struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
5790
|
-
|
5791
|
-
result->op = GGML_OP_RELU;
|
5792
|
-
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
5793
|
-
result->src[0] = a;
|
5794
|
-
result->src[1] = NULL;
|
5795
|
-
|
5796
|
-
return result;
|
5797
|
-
}
|
5798
|
-
|
5799
5673
|
struct ggml_tensor * ggml_relu(
|
5800
5674
|
struct ggml_context * ctx,
|
5801
5675
|
struct ggml_tensor * a) {
|
5802
|
-
return
|
5676
|
+
return ggml_unary(ctx, a, GGML_UNARY_OP_RELU);
|
5803
5677
|
}
|
5804
5678
|
|
5805
5679
|
struct ggml_tensor * ggml_relu_inplace(
|
5806
5680
|
struct ggml_context * ctx,
|
5807
5681
|
struct ggml_tensor * a) {
|
5808
|
-
return
|
5682
|
+
return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_RELU);
|
5809
5683
|
}
|
5810
5684
|
|
5811
5685
|
// ggml_gelu
|
5812
5686
|
|
5813
|
-
struct ggml_tensor * ggml_gelu_impl(
|
5814
|
-
struct ggml_context * ctx,
|
5815
|
-
struct ggml_tensor * a,
|
5816
|
-
bool inplace) {
|
5817
|
-
bool is_node = false;
|
5818
|
-
|
5819
|
-
if (!inplace && (a->grad)) {
|
5820
|
-
is_node = true;
|
5821
|
-
}
|
5822
|
-
|
5823
|
-
struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
5824
|
-
|
5825
|
-
result->op = GGML_OP_GELU;
|
5826
|
-
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
5827
|
-
result->src[0] = a;
|
5828
|
-
result->src[1] = NULL;
|
5829
|
-
|
5830
|
-
return result;
|
5831
|
-
}
|
5832
|
-
|
5833
5687
|
struct ggml_tensor * ggml_gelu(
|
5834
5688
|
struct ggml_context * ctx,
|
5835
5689
|
struct ggml_tensor * a) {
|
5836
|
-
return
|
5690
|
+
return ggml_unary(ctx, a, GGML_UNARY_OP_GELU);
|
5837
5691
|
}
|
5838
5692
|
|
5839
5693
|
struct ggml_tensor * ggml_gelu_inplace(
|
5840
5694
|
struct ggml_context * ctx,
|
5841
5695
|
struct ggml_tensor * a) {
|
5842
|
-
return
|
5696
|
+
return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_GELU);
|
5843
5697
|
}
|
5844
5698
|
|
5845
5699
|
// ggml_gelu_quick
|
5846
5700
|
|
5847
|
-
struct ggml_tensor * ggml_gelu_quick_impl(
|
5848
|
-
struct ggml_context * ctx,
|
5849
|
-
struct ggml_tensor * a,
|
5850
|
-
bool inplace) {
|
5851
|
-
bool is_node = false;
|
5852
|
-
|
5853
|
-
if (!inplace && (a->grad)) {
|
5854
|
-
is_node = true;
|
5855
|
-
}
|
5856
|
-
|
5857
|
-
struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
5858
|
-
|
5859
|
-
result->op = GGML_OP_GELU_QUICK;
|
5860
|
-
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
5861
|
-
result->src[0] = a;
|
5862
|
-
result->src[1] = NULL;
|
5863
|
-
|
5864
|
-
return result;
|
5865
|
-
}
|
5866
|
-
|
5867
5701
|
struct ggml_tensor * ggml_gelu_quick(
|
5868
5702
|
struct ggml_context * ctx,
|
5869
5703
|
struct ggml_tensor * a) {
|
5870
|
-
return
|
5704
|
+
return ggml_unary(ctx, a, GGML_UNARY_OP_GELU_QUICK);
|
5871
5705
|
}
|
5872
5706
|
|
5873
5707
|
struct ggml_tensor * ggml_gelu_quick_inplace(
|
5874
5708
|
struct ggml_context * ctx,
|
5875
5709
|
struct ggml_tensor * a) {
|
5876
|
-
return
|
5710
|
+
return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_GELU_QUICK);
|
5877
5711
|
}
|
5878
5712
|
|
5879
5713
|
// ggml_silu
|
5880
5714
|
|
5881
|
-
struct ggml_tensor * ggml_silu_impl(
|
5882
|
-
struct ggml_context * ctx,
|
5883
|
-
struct ggml_tensor * a,
|
5884
|
-
bool inplace) {
|
5885
|
-
bool is_node = false;
|
5886
|
-
|
5887
|
-
if (!inplace && (a->grad)) {
|
5888
|
-
is_node = true;
|
5889
|
-
}
|
5890
|
-
|
5891
|
-
struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
5892
|
-
|
5893
|
-
result->op = GGML_OP_SILU;
|
5894
|
-
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
5895
|
-
result->src[0] = a;
|
5896
|
-
result->src[1] = NULL;
|
5897
|
-
|
5898
|
-
return result;
|
5899
|
-
}
|
5900
|
-
|
5901
5715
|
struct ggml_tensor * ggml_silu(
|
5902
5716
|
struct ggml_context * ctx,
|
5903
5717
|
struct ggml_tensor * a) {
|
5904
|
-
return
|
5718
|
+
return ggml_unary(ctx, a, GGML_UNARY_OP_SILU);
|
5905
5719
|
}
|
5906
5720
|
|
5907
5721
|
struct ggml_tensor * ggml_silu_inplace(
|
5908
5722
|
struct ggml_context * ctx,
|
5909
5723
|
struct ggml_tensor * a) {
|
5910
|
-
return
|
5724
|
+
return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_SILU);
|
5911
5725
|
}
|
5912
5726
|
|
5913
5727
|
// ggml_silu_back
|
@@ -5935,7 +5749,7 @@ struct ggml_tensor * ggml_silu_back(
|
|
5935
5749
|
|
5936
5750
|
// ggml_norm
|
5937
5751
|
|
5938
|
-
struct ggml_tensor * ggml_norm_impl(
|
5752
|
+
static struct ggml_tensor * ggml_norm_impl(
|
5939
5753
|
struct ggml_context * ctx,
|
5940
5754
|
struct ggml_tensor * a,
|
5941
5755
|
bool inplace) {
|
@@ -5948,10 +5762,11 @@ struct ggml_tensor * ggml_norm_impl(
|
|
5948
5762
|
|
5949
5763
|
struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
5950
5764
|
|
5765
|
+
// TODO: maybe store epsilon here?
|
5766
|
+
|
5951
5767
|
result->op = GGML_OP_NORM;
|
5952
5768
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
5953
5769
|
result->src[0] = a;
|
5954
|
-
result->src[1] = NULL; // TODO: maybe store epsilon here?
|
5955
5770
|
|
5956
5771
|
return result;
|
5957
5772
|
}
|
@@ -5968,9 +5783,10 @@ struct ggml_tensor * ggml_norm_inplace(
|
|
5968
5783
|
return ggml_norm_impl(ctx, a, true);
|
5969
5784
|
}
|
5970
5785
|
|
5971
|
-
struct ggml_tensor * ggml_rms_norm_impl(
|
5786
|
+
static struct ggml_tensor * ggml_rms_norm_impl(
|
5972
5787
|
struct ggml_context * ctx,
|
5973
5788
|
struct ggml_tensor * a,
|
5789
|
+
float eps,
|
5974
5790
|
bool inplace) {
|
5975
5791
|
bool is_node = false;
|
5976
5792
|
|
@@ -5980,24 +5796,27 @@ struct ggml_tensor * ggml_rms_norm_impl(
|
|
5980
5796
|
|
5981
5797
|
struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
5982
5798
|
|
5799
|
+
ggml_set_op_params(result, &eps, sizeof(eps));
|
5800
|
+
|
5983
5801
|
result->op = GGML_OP_RMS_NORM;
|
5984
5802
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
5985
5803
|
result->src[0] = a;
|
5986
|
-
result->src[1] = NULL; // TODO: maybe store epsilon here?
|
5987
5804
|
|
5988
5805
|
return result;
|
5989
5806
|
}
|
5990
5807
|
|
5991
5808
|
struct ggml_tensor * ggml_rms_norm(
|
5992
5809
|
struct ggml_context * ctx,
|
5993
|
-
struct ggml_tensor * a
|
5994
|
-
|
5810
|
+
struct ggml_tensor * a,
|
5811
|
+
float eps) {
|
5812
|
+
return ggml_rms_norm_impl(ctx, a, eps, false);
|
5995
5813
|
}
|
5996
5814
|
|
5997
5815
|
struct ggml_tensor * ggml_rms_norm_inplace(
|
5998
5816
|
struct ggml_context * ctx,
|
5999
|
-
struct ggml_tensor * a
|
6000
|
-
|
5817
|
+
struct ggml_tensor * a,
|
5818
|
+
float eps) {
|
5819
|
+
return ggml_rms_norm_impl(ctx, a, eps, true);
|
6001
5820
|
}
|
6002
5821
|
|
6003
5822
|
struct ggml_tensor * ggml_rms_norm_back(
|
@@ -6076,7 +5895,7 @@ struct ggml_tensor * ggml_out_prod(
|
|
6076
5895
|
|
6077
5896
|
// ggml_scale
|
6078
5897
|
|
6079
|
-
struct ggml_tensor * ggml_scale_impl(
|
5898
|
+
static struct ggml_tensor * ggml_scale_impl(
|
6080
5899
|
struct ggml_context * ctx,
|
6081
5900
|
struct ggml_tensor * a,
|
6082
5901
|
struct ggml_tensor * b,
|
@@ -6116,7 +5935,7 @@ struct ggml_tensor * ggml_scale_inplace(
|
|
6116
5935
|
|
6117
5936
|
// ggml_set
|
6118
5937
|
|
6119
|
-
struct ggml_tensor * ggml_set_impl(
|
5938
|
+
static struct ggml_tensor * ggml_set_impl(
|
6120
5939
|
struct ggml_context * ctx,
|
6121
5940
|
struct ggml_tensor * a,
|
6122
5941
|
struct ggml_tensor * b,
|
@@ -6136,23 +5955,13 @@ struct ggml_tensor * ggml_set_impl(
|
|
6136
5955
|
// make a view of the destination
|
6137
5956
|
struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
6138
5957
|
|
6139
|
-
|
6140
|
-
|
6141
|
-
struct ggml_tensor * c = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, 5);
|
6142
|
-
|
6143
|
-
(( int32_t * ) c->data)[0] = nb1;
|
6144
|
-
(( int32_t * ) c->data)[1] = nb2;
|
6145
|
-
(( int32_t * ) c->data)[2] = nb3;
|
6146
|
-
(( int32_t * ) c->data)[3] = offset;
|
6147
|
-
(( int32_t * ) c->data)[4] = inplace ? 1 : 0;
|
6148
|
-
|
6149
|
-
ggml_scratch_load(ctx);
|
5958
|
+
int32_t params[] = { nb1, nb2, nb3, offset, inplace ? 1 : 0 };
|
5959
|
+
ggml_set_op_params(result, params, sizeof(params));
|
6150
5960
|
|
6151
5961
|
result->op = GGML_OP_SET;
|
6152
5962
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
6153
5963
|
result->src[0] = a;
|
6154
5964
|
result->src[1] = b;
|
6155
|
-
result->src[2] = c;
|
6156
5965
|
|
6157
5966
|
return result;
|
6158
5967
|
}
|
@@ -6216,7 +6025,7 @@ struct ggml_tensor * ggml_set_2d_inplace(
|
|
6216
6025
|
|
6217
6026
|
// ggml_cpy
|
6218
6027
|
|
6219
|
-
struct ggml_tensor * ggml_cpy_impl(
|
6028
|
+
static struct ggml_tensor * ggml_cpy_impl(
|
6220
6029
|
struct ggml_context * ctx,
|
6221
6030
|
struct ggml_tensor * a,
|
6222
6031
|
struct ggml_tensor * b,
|
@@ -6261,7 +6070,7 @@ struct ggml_tensor * ggml_cpy_inplace(
|
|
6261
6070
|
|
6262
6071
|
// ggml_cont
|
6263
6072
|
|
6264
|
-
struct ggml_tensor * ggml_cont_impl(
|
6073
|
+
static struct ggml_tensor * ggml_cont_impl(
|
6265
6074
|
struct ggml_context * ctx,
|
6266
6075
|
struct ggml_tensor * a,
|
6267
6076
|
bool inplace) {
|
@@ -6277,7 +6086,6 @@ struct ggml_tensor * ggml_cont_impl(
|
|
6277
6086
|
result->op = GGML_OP_CONT;
|
6278
6087
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
6279
6088
|
result->src[0] = a;
|
6280
|
-
result->src[1] = NULL;
|
6281
6089
|
|
6282
6090
|
return result;
|
6283
6091
|
}
|
@@ -6321,7 +6129,6 @@ struct ggml_tensor * ggml_reshape(
|
|
6321
6129
|
result->op = GGML_OP_RESHAPE;
|
6322
6130
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
6323
6131
|
result->src[0] = a;
|
6324
|
-
result->src[1] = NULL;
|
6325
6132
|
|
6326
6133
|
return result;
|
6327
6134
|
}
|
@@ -6346,7 +6153,6 @@ struct ggml_tensor * ggml_reshape_1d(
|
|
6346
6153
|
result->op = GGML_OP_RESHAPE;
|
6347
6154
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
6348
6155
|
result->src[0] = a;
|
6349
|
-
result->src[1] = NULL;
|
6350
6156
|
|
6351
6157
|
return result;
|
6352
6158
|
}
|
@@ -6372,7 +6178,6 @@ struct ggml_tensor * ggml_reshape_2d(
|
|
6372
6178
|
result->op = GGML_OP_RESHAPE;
|
6373
6179
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
6374
6180
|
result->src[0] = a;
|
6375
|
-
result->src[1] = NULL;
|
6376
6181
|
|
6377
6182
|
return result;
|
6378
6183
|
}
|
@@ -6399,7 +6204,6 @@ struct ggml_tensor * ggml_reshape_3d(
|
|
6399
6204
|
result->op = GGML_OP_RESHAPE;
|
6400
6205
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
6401
6206
|
result->src[0] = a;
|
6402
|
-
result->src[1] = NULL;
|
6403
6207
|
|
6404
6208
|
return result;
|
6405
6209
|
}
|
@@ -6428,7 +6232,6 @@ struct ggml_tensor * ggml_reshape_4d(
|
|
6428
6232
|
result->op = GGML_OP_RESHAPE;
|
6429
6233
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
6430
6234
|
result->src[0] = a;
|
6431
|
-
result->src[1] = NULL;
|
6432
6235
|
|
6433
6236
|
return result;
|
6434
6237
|
}
|
@@ -6450,19 +6253,11 @@ struct ggml_tensor * ggml_view_1d(
|
|
6450
6253
|
struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, 1, &ne0, (char *) a->data + offset);
|
6451
6254
|
ggml_format_name(result, "%s (view)", a->name);
|
6452
6255
|
|
6453
|
-
|
6454
|
-
|
6455
|
-
struct ggml_tensor * offs = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, 2);
|
6456
|
-
ggml_set_name(offs, "offset");
|
6457
|
-
memcpy(offs->data, &offset, 2*sizeof(int32_t));
|
6458
|
-
|
6459
|
-
ggml_scratch_load(ctx);
|
6256
|
+
ggml_set_op_params(result, &offset, sizeof(offset));
|
6460
6257
|
|
6461
6258
|
result->op = GGML_OP_VIEW;
|
6462
6259
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
6463
6260
|
result->src[0] = a;
|
6464
|
-
result->src[1] = NULL;
|
6465
|
-
result->src[2] = offs;
|
6466
6261
|
|
6467
6262
|
return result;
|
6468
6263
|
}
|
@@ -6488,13 +6283,7 @@ struct ggml_tensor * ggml_view_2d(
|
|
6488
6283
|
struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, 2, ne, (char *) a->data + offset);
|
6489
6284
|
ggml_format_name(result, "%s (view)", a->name);
|
6490
6285
|
|
6491
|
-
|
6492
|
-
|
6493
|
-
struct ggml_tensor * offs = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, 2);
|
6494
|
-
ggml_set_name(offs, "offset");
|
6495
|
-
memcpy(offs->data, &offset, 2*sizeof(int32_t));
|
6496
|
-
|
6497
|
-
ggml_scratch_load(ctx);
|
6286
|
+
ggml_set_op_params(result, &offset, sizeof(offset));
|
6498
6287
|
|
6499
6288
|
result->nb[1] = nb1;
|
6500
6289
|
result->nb[2] = result->nb[1]*ne1;
|
@@ -6503,8 +6292,6 @@ struct ggml_tensor * ggml_view_2d(
|
|
6503
6292
|
result->op = GGML_OP_VIEW;
|
6504
6293
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
6505
6294
|
result->src[0] = a;
|
6506
|
-
result->src[1] = NULL;
|
6507
|
-
result->src[2] = offs;
|
6508
6295
|
|
6509
6296
|
return result;
|
6510
6297
|
}
|
@@ -6532,13 +6319,7 @@ struct ggml_tensor * ggml_view_3d(
|
|
6532
6319
|
struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, 3, ne, (char *) a->data + offset);
|
6533
6320
|
ggml_format_name(result, "%s (view)", a->name);
|
6534
6321
|
|
6535
|
-
|
6536
|
-
|
6537
|
-
struct ggml_tensor * offs = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, 2);
|
6538
|
-
ggml_set_name(offs, "offset");
|
6539
|
-
memcpy(offs->data, &offset, 2*sizeof(int32_t));
|
6540
|
-
|
6541
|
-
ggml_scratch_load(ctx);
|
6322
|
+
ggml_set_op_params(result, &offset, sizeof(offset));
|
6542
6323
|
|
6543
6324
|
result->nb[1] = nb1;
|
6544
6325
|
result->nb[2] = nb2;
|
@@ -6547,8 +6328,6 @@ struct ggml_tensor * ggml_view_3d(
|
|
6547
6328
|
result->op = GGML_OP_VIEW;
|
6548
6329
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
6549
6330
|
result->src[0] = a;
|
6550
|
-
result->src[1] = NULL;
|
6551
|
-
result->src[2] = offs;
|
6552
6331
|
|
6553
6332
|
return result;
|
6554
6333
|
}
|
@@ -6578,13 +6357,7 @@ struct ggml_tensor * ggml_view_4d(
|
|
6578
6357
|
struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, 4, ne, (char *) a->data + offset);
|
6579
6358
|
ggml_format_name(result, "%s (view)", a->name);
|
6580
6359
|
|
6581
|
-
|
6582
|
-
|
6583
|
-
struct ggml_tensor * offs = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, 2);
|
6584
|
-
ggml_set_name(offs, "offset");
|
6585
|
-
memcpy(offs->data, &offset, 2*sizeof(int32_t));
|
6586
|
-
|
6587
|
-
ggml_scratch_load(ctx);
|
6360
|
+
ggml_set_op_params(result, &offset, sizeof(offset));
|
6588
6361
|
|
6589
6362
|
result->nb[1] = nb1;
|
6590
6363
|
result->nb[2] = nb2;
|
@@ -6593,8 +6366,6 @@ struct ggml_tensor * ggml_view_4d(
|
|
6593
6366
|
result->op = GGML_OP_VIEW;
|
6594
6367
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
6595
6368
|
result->src[0] = a;
|
6596
|
-
result->src[1] = NULL;
|
6597
|
-
result->src[2] = offs;
|
6598
6369
|
|
6599
6370
|
return result;
|
6600
6371
|
}
|
@@ -6655,22 +6426,9 @@ struct ggml_tensor * ggml_permute(
|
|
6655
6426
|
result->op = GGML_OP_PERMUTE;
|
6656
6427
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
6657
6428
|
result->src[0] = a;
|
6658
|
-
result->src[1] = NULL;
|
6659
|
-
|
6660
|
-
if (is_node) {
|
6661
|
-
ggml_scratch_save(ctx);
|
6662
6429
|
|
6663
|
-
|
6664
|
-
|
6665
|
-
((int32_t *) b->data)[0] = axis0;
|
6666
|
-
((int32_t *) b->data)[1] = axis1;
|
6667
|
-
((int32_t *) b->data)[2] = axis2;
|
6668
|
-
((int32_t *) b->data)[3] = axis3;
|
6669
|
-
|
6670
|
-
ggml_scratch_load(ctx);
|
6671
|
-
|
6672
|
-
result->src[2] = b;
|
6673
|
-
}
|
6430
|
+
int32_t params[] = { axis0, axis1, axis2, axis3 };
|
6431
|
+
ggml_set_op_params(result, ¶ms, sizeof(params));
|
6674
6432
|
|
6675
6433
|
return result;
|
6676
6434
|
}
|
@@ -6698,7 +6456,6 @@ struct ggml_tensor * ggml_transpose(
|
|
6698
6456
|
result->op = GGML_OP_TRANSPOSE;
|
6699
6457
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
6700
6458
|
result->src[0] = a;
|
6701
|
-
result->src[1] = NULL;
|
6702
6459
|
|
6703
6460
|
return result;
|
6704
6461
|
}
|
@@ -6776,7 +6533,6 @@ struct ggml_tensor * ggml_diag(
|
|
6776
6533
|
result->op = GGML_OP_DIAG;
|
6777
6534
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
6778
6535
|
result->src[0] = a;
|
6779
|
-
result->src[1] = NULL;
|
6780
6536
|
|
6781
6537
|
return result;
|
6782
6538
|
}
|
@@ -6784,7 +6540,7 @@ struct ggml_tensor * ggml_diag(
|
|
6784
6540
|
|
6785
6541
|
// ggml_diag_mask_inf
|
6786
6542
|
|
6787
|
-
struct ggml_tensor * ggml_diag_mask_inf_impl(
|
6543
|
+
static struct ggml_tensor * ggml_diag_mask_inf_impl(
|
6788
6544
|
struct ggml_context * ctx,
|
6789
6545
|
struct ggml_tensor * a,
|
6790
6546
|
int n_past,
|
@@ -6797,19 +6553,12 @@ struct ggml_tensor * ggml_diag_mask_inf_impl(
|
|
6797
6553
|
|
6798
6554
|
struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
6799
6555
|
|
6800
|
-
|
6801
|
-
|
6802
|
-
struct ggml_tensor * b = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, 2);
|
6803
|
-
|
6804
|
-
((int32_t *) b->data)[0] = n_past;
|
6805
|
-
((int32_t *) b->data)[1] = inplace ? 1 : 0;
|
6806
|
-
|
6807
|
-
ggml_scratch_load(ctx);
|
6556
|
+
int32_t params[] = { n_past, inplace ? 1 : 0 };
|
6557
|
+
ggml_set_op_params(result, ¶ms, sizeof(params));
|
6808
6558
|
|
6809
6559
|
result->op = GGML_OP_DIAG_MASK_INF;
|
6810
6560
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
6811
6561
|
result->src[0] = a;
|
6812
|
-
result->src[1] = b;
|
6813
6562
|
|
6814
6563
|
return result;
|
6815
6564
|
}
|
@@ -6831,7 +6580,7 @@ struct ggml_tensor * ggml_diag_mask_inf_inplace(
|
|
6831
6580
|
|
6832
6581
|
// ggml_diag_mask_zero
|
6833
6582
|
|
6834
|
-
struct ggml_tensor * ggml_diag_mask_zero_impl(
|
6583
|
+
static struct ggml_tensor * ggml_diag_mask_zero_impl(
|
6835
6584
|
struct ggml_context * ctx,
|
6836
6585
|
struct ggml_tensor * a,
|
6837
6586
|
int n_past,
|
@@ -6844,20 +6593,12 @@ struct ggml_tensor * ggml_diag_mask_zero_impl(
|
|
6844
6593
|
|
6845
6594
|
struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
6846
6595
|
|
6847
|
-
|
6848
|
-
|
6849
|
-
struct ggml_tensor * b = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, 2);
|
6850
|
-
ggml_set_name(b, "n_past, inplace");
|
6851
|
-
|
6852
|
-
((int32_t *) b->data)[0] = n_past;
|
6853
|
-
((int32_t *) b->data)[1] = inplace ? 1 : 0;
|
6854
|
-
|
6855
|
-
ggml_scratch_load(ctx);
|
6596
|
+
int32_t params[] = { n_past, inplace ? 1 : 0 };
|
6597
|
+
ggml_set_op_params(result, ¶ms, sizeof(params));
|
6856
6598
|
|
6857
6599
|
result->op = GGML_OP_DIAG_MASK_ZERO;
|
6858
6600
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
6859
6601
|
result->src[0] = a;
|
6860
|
-
result->src[1] = b;
|
6861
6602
|
|
6862
6603
|
return result;
|
6863
6604
|
}
|
@@ -6878,7 +6619,7 @@ struct ggml_tensor * ggml_diag_mask_zero_inplace(
|
|
6878
6619
|
|
6879
6620
|
// ggml_soft_max
|
6880
6621
|
|
6881
|
-
struct ggml_tensor * ggml_soft_max_impl(
|
6622
|
+
static struct ggml_tensor * ggml_soft_max_impl(
|
6882
6623
|
struct ggml_context * ctx,
|
6883
6624
|
struct ggml_tensor * a,
|
6884
6625
|
bool inplace) {
|
@@ -6893,7 +6634,6 @@ struct ggml_tensor * ggml_soft_max_impl(
|
|
6893
6634
|
result->op = GGML_OP_SOFT_MAX;
|
6894
6635
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
6895
6636
|
result->src[0] = a;
|
6896
|
-
result->src[1] = NULL;
|
6897
6637
|
|
6898
6638
|
return result;
|
6899
6639
|
}
|
@@ -6913,7 +6653,7 @@ struct ggml_tensor * ggml_soft_max_inplace(
|
|
6913
6653
|
|
6914
6654
|
// ggml_soft_max_back
|
6915
6655
|
|
6916
|
-
struct ggml_tensor * ggml_soft_max_back_impl(
|
6656
|
+
static struct ggml_tensor * ggml_soft_max_back_impl(
|
6917
6657
|
struct ggml_context * ctx,
|
6918
6658
|
struct ggml_tensor * a,
|
6919
6659
|
struct ggml_tensor * b,
|
@@ -6950,7 +6690,7 @@ struct ggml_tensor * ggml_soft_max_back_inplace(
|
|
6950
6690
|
|
6951
6691
|
// ggml_rope
|
6952
6692
|
|
6953
|
-
struct ggml_tensor * ggml_rope_impl(
|
6693
|
+
static struct ggml_tensor * ggml_rope_impl(
|
6954
6694
|
struct ggml_context * ctx,
|
6955
6695
|
struct ggml_tensor * a,
|
6956
6696
|
int n_past,
|
@@ -6969,23 +6709,14 @@ struct ggml_tensor * ggml_rope_impl(
|
|
6969
6709
|
|
6970
6710
|
struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
6971
6711
|
|
6972
|
-
|
6973
|
-
|
6974
|
-
|
6975
|
-
|
6976
|
-
((int32_t *) b->data)[0] = n_past;
|
6977
|
-
((int32_t *) b->data)[1] = n_dims;
|
6978
|
-
((int32_t *) b->data)[2] = mode;
|
6979
|
-
((int32_t *) b->data)[3] = n_ctx;
|
6980
|
-
memcpy((int32_t *) b->data + 4, &freq_base, sizeof(float));
|
6981
|
-
memcpy((int32_t *) b->data + 5, &freq_scale, sizeof(float));
|
6982
|
-
|
6983
|
-
ggml_scratch_load(ctx);
|
6712
|
+
int32_t params[6] = { n_past, n_dims, mode, n_ctx };
|
6713
|
+
memcpy(params + 4, &freq_base, sizeof(float));
|
6714
|
+
memcpy(params + 5, &freq_scale, sizeof(float));
|
6715
|
+
ggml_set_op_params(result, ¶ms, sizeof(params));
|
6984
6716
|
|
6985
6717
|
result->op = GGML_OP_ROPE;
|
6986
6718
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
6987
6719
|
result->src[0] = a;
|
6988
|
-
result->src[1] = b;
|
6989
6720
|
|
6990
6721
|
return result;
|
6991
6722
|
}
|
@@ -7042,22 +6773,12 @@ struct ggml_tensor * ggml_rope_back(
|
|
7042
6773
|
|
7043
6774
|
struct ggml_tensor * result = ggml_dup_tensor(ctx, a);
|
7044
6775
|
|
7045
|
-
|
7046
|
-
|
7047
|
-
struct ggml_tensor * b = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, 4);
|
7048
|
-
ggml_set_name(b, "n_past, n_dims, mode");
|
7049
|
-
|
7050
|
-
((int32_t *) b->data)[0] = n_past;
|
7051
|
-
((int32_t *) b->data)[1] = n_dims;
|
7052
|
-
((int32_t *) b->data)[2] = mode;
|
7053
|
-
((int32_t *) b->data)[3] = n_ctx;
|
7054
|
-
|
7055
|
-
ggml_scratch_load(ctx);
|
6776
|
+
int32_t params[] = { n_past, n_dims, mode, n_ctx };
|
6777
|
+
ggml_set_op_params(result, ¶ms, sizeof(params));
|
7056
6778
|
|
7057
6779
|
result->op = GGML_OP_ROPE_BACK;
|
7058
6780
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
7059
6781
|
result->src[0] = a;
|
7060
|
-
result->src[1] = b;
|
7061
6782
|
|
7062
6783
|
return result;
|
7063
6784
|
}
|
@@ -7082,21 +6803,13 @@ struct ggml_tensor * ggml_alibi(
|
|
7082
6803
|
//struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
7083
6804
|
struct ggml_tensor * result = ggml_view_tensor(ctx, a);
|
7084
6805
|
|
7085
|
-
|
7086
|
-
|
7087
|
-
|
7088
|
-
|
7089
|
-
((int32_t *) b->data)[0] = n_past;
|
7090
|
-
((int32_t *) b->data)[1] = n_head;
|
7091
|
-
GGML_ASSERT(sizeof(float) == sizeof(int32_t));
|
7092
|
-
(((float *) b->data)[2]) = bias_max;
|
7093
|
-
|
7094
|
-
ggml_scratch_load(ctx);
|
6806
|
+
int32_t op_params[3] = { n_past, n_head };
|
6807
|
+
memcpy(op_params + 2, &bias_max, sizeof(float));
|
6808
|
+
ggml_set_op_params(result, &op_params, sizeof(op_params));
|
7095
6809
|
|
7096
6810
|
result->op = GGML_OP_ALIBI;
|
7097
6811
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
7098
6812
|
result->src[0] = a;
|
7099
|
-
result->src[1] = b;
|
7100
6813
|
|
7101
6814
|
return result;
|
7102
6815
|
}
|
@@ -7118,19 +6831,12 @@ struct ggml_tensor * ggml_clamp(
|
|
7118
6831
|
// TODO: when implement backward, fix this:
|
7119
6832
|
struct ggml_tensor * result = ggml_view_tensor(ctx, a);
|
7120
6833
|
|
7121
|
-
|
7122
|
-
|
7123
|
-
struct ggml_tensor * b = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, 2);
|
7124
|
-
|
7125
|
-
((float *) b->data)[0] = min;
|
7126
|
-
((float *) b->data)[1] = max;
|
7127
|
-
|
7128
|
-
ggml_scratch_load(ctx);
|
6834
|
+
float params[] = { min, max };
|
6835
|
+
ggml_set_op_params(result, ¶ms, sizeof(params));
|
7129
6836
|
|
7130
6837
|
result->op = GGML_OP_CLAMP;
|
7131
6838
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
7132
6839
|
result->src[0] = a;
|
7133
|
-
result->src[1] = b;
|
7134
6840
|
|
7135
6841
|
return result;
|
7136
6842
|
}
|
@@ -7163,18 +6869,13 @@ GGML_API struct ggml_tensor * ggml_conv_1d(
|
|
7163
6869
|
};
|
7164
6870
|
struct ggml_tensor* result = ggml_new_tensor(ctx, GGML_TYPE_F32, 2, ne);
|
7165
6871
|
|
7166
|
-
|
7167
|
-
|
7168
|
-
((int32_t*)c->data)[0] = s0;
|
7169
|
-
((int32_t*)c->data)[1] = p0;
|
7170
|
-
((int32_t*)c->data)[2] = d0;
|
7171
|
-
ggml_scratch_load(ctx);
|
6872
|
+
int32_t params[] = { s0, p0, d0 };
|
6873
|
+
ggml_set_op_params(result, ¶ms, sizeof(params));
|
7172
6874
|
|
7173
6875
|
result->op = GGML_OP_CONV_1D;
|
7174
6876
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
7175
6877
|
result->src[0] = a;
|
7176
6878
|
result->src[1] = b;
|
7177
|
-
result->src[2] = c;
|
7178
6879
|
|
7179
6880
|
return result;
|
7180
6881
|
}
|
@@ -7207,21 +6908,13 @@ struct ggml_tensor* ggml_conv_2d(
|
|
7207
6908
|
};
|
7208
6909
|
struct ggml_tensor* result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
|
7209
6910
|
|
7210
|
-
|
7211
|
-
|
7212
|
-
((int32_t*)c->data)[0] = s0;
|
7213
|
-
((int32_t*)c->data)[1] = s1;
|
7214
|
-
((int32_t*)c->data)[2] = p0;
|
7215
|
-
((int32_t*)c->data)[3] = p1;
|
7216
|
-
((int32_t*)c->data)[4] = d0;
|
7217
|
-
((int32_t*)c->data)[5] = d1;
|
7218
|
-
ggml_scratch_load(ctx);
|
6911
|
+
int32_t params[] = { s0, s1, p0, p1, d0, d1 };
|
6912
|
+
ggml_set_op_params(result, ¶ms, sizeof(params));
|
7219
6913
|
|
7220
6914
|
result->op = GGML_OP_CONV_2D;
|
7221
6915
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
7222
6916
|
result->src[0] = a;
|
7223
6917
|
result->src[1] = b;
|
7224
|
-
result->src[2] = c;
|
7225
6918
|
|
7226
6919
|
return result;
|
7227
6920
|
|
@@ -7245,7 +6938,7 @@ static int64_t ggml_calc_pool_output_size(int64_t ins, int ks, int s, int p) {
|
|
7245
6938
|
return (ins + 2 * p - ks) / s + 1;
|
7246
6939
|
}
|
7247
6940
|
|
7248
|
-
//
|
6941
|
+
// ggml_pool_1d
|
7249
6942
|
|
7250
6943
|
struct ggml_tensor* ggml_pool_1d(
|
7251
6944
|
struct ggml_context * ctx,
|
@@ -7268,18 +6961,12 @@ struct ggml_tensor* ggml_pool_1d(
|
|
7268
6961
|
};
|
7269
6962
|
struct ggml_tensor* result = ggml_new_tensor(ctx, GGML_TYPE_F32, 2, ne);
|
7270
6963
|
|
7271
|
-
|
7272
|
-
|
7273
|
-
((int32_t*)c->data)[0] = op;
|
7274
|
-
((int32_t*)c->data)[1] = k0;
|
7275
|
-
((int32_t*)c->data)[2] = s0;
|
7276
|
-
((int32_t*)c->data)[3] = p0;
|
7277
|
-
ggml_scratch_load(ctx);
|
6964
|
+
int32_t params[] = { op, k0, s0, p0 };
|
6965
|
+
ggml_set_op_params(result, ¶ms, sizeof(params));
|
7278
6966
|
|
7279
6967
|
result->op = GGML_OP_POOL_1D;
|
7280
6968
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
7281
6969
|
result->src[0] = a;
|
7282
|
-
result->src[1] = c;
|
7283
6970
|
|
7284
6971
|
return result;
|
7285
6972
|
}
|
@@ -7311,21 +6998,12 @@ struct ggml_tensor* ggml_pool_2d(
|
|
7311
6998
|
};
|
7312
6999
|
struct ggml_tensor* result = ggml_new_tensor(ctx, GGML_TYPE_F32, 3, ne);
|
7313
7000
|
|
7314
|
-
|
7315
|
-
|
7316
|
-
((int32_t*)c->data)[0] = op;
|
7317
|
-
((int32_t*)c->data)[1] = k0;
|
7318
|
-
((int32_t*)c->data)[2] = k1;
|
7319
|
-
((int32_t*)c->data)[3] = s0;
|
7320
|
-
((int32_t*)c->data)[4] = s1;
|
7321
|
-
((int32_t*)c->data)[5] = p0;
|
7322
|
-
((int32_t*)c->data)[6] = p1;
|
7323
|
-
ggml_scratch_load(ctx);
|
7001
|
+
int32_t params[] = { op, k0, k1, s0, s1, p0, p1 };
|
7002
|
+
ggml_set_op_params(result, ¶ms, sizeof(params));
|
7324
7003
|
|
7325
7004
|
result->op = GGML_OP_POOL_2D;
|
7326
7005
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
7327
7006
|
result->src[0] = a;
|
7328
|
-
result->src[1] = c;
|
7329
7007
|
|
7330
7008
|
return result;
|
7331
7009
|
}
|
@@ -7348,14 +7026,16 @@ struct ggml_tensor * ggml_flash_attn(
|
|
7348
7026
|
}
|
7349
7027
|
|
7350
7028
|
//struct ggml_tensor * result = ggml_dup_tensor(ctx, q);
|
7351
|
-
struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32,
|
7029
|
+
struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, q->n_dims, q->ne);
|
7030
|
+
|
7031
|
+
int32_t t = masked ? 1 : 0;
|
7032
|
+
ggml_set_op_params(result, &t, sizeof(t));
|
7352
7033
|
|
7353
7034
|
result->op = GGML_OP_FLASH_ATTN;
|
7354
7035
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
7355
7036
|
result->src[0] = q;
|
7356
7037
|
result->src[1] = k;
|
7357
7038
|
result->src[2] = v;
|
7358
|
-
result->src[3] = ggml_new_i32(ctx, masked ? 1 : 0);
|
7359
7039
|
|
7360
7040
|
return result;
|
7361
7041
|
}
|
@@ -7379,7 +7059,7 @@ struct ggml_tensor * ggml_flash_ff(
|
|
7379
7059
|
}
|
7380
7060
|
|
7381
7061
|
//struct ggml_tensor * result = ggml_dup_tensor(ctx, a);
|
7382
|
-
struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32,
|
7062
|
+
struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, a->n_dims, a->ne);
|
7383
7063
|
|
7384
7064
|
result->op = GGML_OP_FLASH_FF;
|
7385
7065
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
@@ -7445,13 +7125,15 @@ struct ggml_tensor * ggml_flash_attn_back(
|
|
7445
7125
|
|
7446
7126
|
struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
|
7447
7127
|
|
7128
|
+
int32_t masked_i = masked ? 1 : 0;
|
7129
|
+
ggml_set_op_params(result, &masked_i, sizeof(masked_i));
|
7130
|
+
|
7448
7131
|
result->op = GGML_OP_FLASH_ATTN_BACK;
|
7449
7132
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
7450
7133
|
result->src[0] = q;
|
7451
7134
|
result->src[1] = k;
|
7452
7135
|
result->src[2] = v;
|
7453
7136
|
result->src[3] = d;
|
7454
|
-
result->src[4] = ggml_new_i32(ctx, masked ? 1 : 0);
|
7455
7137
|
|
7456
7138
|
return result;
|
7457
7139
|
}
|
@@ -7484,21 +7166,12 @@ struct ggml_tensor * ggml_win_part(
|
|
7484
7166
|
|
7485
7167
|
struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
|
7486
7168
|
|
7487
|
-
|
7488
|
-
|
7489
|
-
struct ggml_tensor * b = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, 3);
|
7490
|
-
|
7491
|
-
((int32_t *) b->data)[0] = npx;
|
7492
|
-
((int32_t *) b->data)[1] = npy;
|
7493
|
-
((int32_t *) b->data)[2] = w;
|
7494
|
-
|
7495
|
-
ggml_scratch_load(ctx);
|
7169
|
+
int32_t params[] = { npx, npy, w };
|
7170
|
+
ggml_set_op_params(result, ¶ms, sizeof(params));
|
7496
7171
|
|
7497
7172
|
result->op = GGML_OP_WIN_PART;
|
7498
7173
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
7499
7174
|
result->src[0] = a;
|
7500
|
-
result->src[1] = NULL;
|
7501
|
-
result->src[2] = b;
|
7502
7175
|
|
7503
7176
|
return result;
|
7504
7177
|
}
|
@@ -7523,26 +7196,57 @@ struct ggml_tensor * ggml_win_unpart(
|
|
7523
7196
|
const int64_t ne[4] = { a->ne[0], w0, h0, 1, };
|
7524
7197
|
struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 3, ne);
|
7525
7198
|
|
7526
|
-
|
7199
|
+
int32_t params[] = { w };
|
7200
|
+
ggml_set_op_params(result, ¶ms, sizeof(params));
|
7527
7201
|
|
7528
|
-
|
7202
|
+
result->op = GGML_OP_WIN_UNPART;
|
7203
|
+
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
7204
|
+
result->src[0] = a;
|
7529
7205
|
|
7530
|
-
|
7206
|
+
return result;
|
7207
|
+
}
|
7531
7208
|
|
7532
|
-
|
7209
|
+
// gmml_unary
|
7533
7210
|
|
7534
|
-
|
7211
|
+
static struct ggml_tensor * ggml_unary_impl(
|
7212
|
+
struct ggml_context * ctx,
|
7213
|
+
struct ggml_tensor * a,
|
7214
|
+
enum ggml_unary_op op,
|
7215
|
+
bool inplace) {
|
7216
|
+
bool is_node = false;
|
7217
|
+
|
7218
|
+
if (!inplace && (a->grad)) {
|
7219
|
+
is_node = true;
|
7220
|
+
}
|
7221
|
+
|
7222
|
+
struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
7223
|
+
|
7224
|
+
ggml_set_op_params_i32(result, 0, (int32_t) op);
|
7225
|
+
|
7226
|
+
result->op = GGML_OP_UNARY;
|
7535
7227
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
7536
7228
|
result->src[0] = a;
|
7537
|
-
result->src[1] = NULL;
|
7538
|
-
result->src[2] = b;
|
7539
7229
|
|
7540
7230
|
return result;
|
7541
7231
|
}
|
7542
7232
|
|
7233
|
+
struct ggml_tensor * ggml_unary(
|
7234
|
+
struct ggml_context * ctx,
|
7235
|
+
struct ggml_tensor * a,
|
7236
|
+
enum ggml_unary_op op) {
|
7237
|
+
return ggml_unary_impl(ctx, a, op, false);
|
7238
|
+
}
|
7239
|
+
|
7240
|
+
struct ggml_tensor * ggml_unary_inplace(
|
7241
|
+
struct ggml_context * ctx,
|
7242
|
+
struct ggml_tensor * a,
|
7243
|
+
enum ggml_unary_op op) {
|
7244
|
+
return ggml_unary_impl(ctx, a, op, true);
|
7245
|
+
}
|
7246
|
+
|
7543
7247
|
// ggml_map_unary
|
7544
7248
|
|
7545
|
-
struct ggml_tensor * ggml_map_unary_impl_f32(
|
7249
|
+
static struct ggml_tensor * ggml_map_unary_impl_f32(
|
7546
7250
|
struct ggml_context * ctx,
|
7547
7251
|
struct ggml_tensor * a,
|
7548
7252
|
const ggml_unary_op_f32_t fun,
|
@@ -7553,19 +7257,13 @@ struct ggml_tensor * ggml_map_unary_impl_f32(
|
|
7553
7257
|
is_node = true;
|
7554
7258
|
}
|
7555
7259
|
|
7556
|
-
struct ggml_tensor *result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
7557
|
-
|
7558
|
-
ggml_scratch_save(ctx);
|
7559
|
-
|
7560
|
-
struct ggml_tensor * addr_tensor = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, sizeof(void *) / sizeof(int32_t));
|
7561
|
-
*((void (**)(void))addr_tensor->data) = (void (*)(void))fun;
|
7260
|
+
struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
7562
7261
|
|
7563
|
-
|
7262
|
+
ggml_set_op_params(result, (const void *) &fun, sizeof(fun));
|
7564
7263
|
|
7565
7264
|
result->op = GGML_OP_MAP_UNARY;
|
7566
7265
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
7567
7266
|
result->src[0] = a;
|
7568
|
-
result->src[2] = addr_tensor;
|
7569
7267
|
|
7570
7268
|
return result;
|
7571
7269
|
}
|
@@ -7586,7 +7284,7 @@ struct ggml_tensor * ggml_map_unary_inplace_f32(
|
|
7586
7284
|
|
7587
7285
|
// ggml_map_binary
|
7588
7286
|
|
7589
|
-
struct ggml_tensor * ggml_map_binary_impl_f32(
|
7287
|
+
static struct ggml_tensor * ggml_map_binary_impl_f32(
|
7590
7288
|
struct ggml_context * ctx,
|
7591
7289
|
struct ggml_tensor * a,
|
7592
7290
|
struct ggml_tensor * b,
|
@@ -7600,20 +7298,14 @@ struct ggml_tensor * ggml_map_binary_impl_f32(
|
|
7600
7298
|
is_node = true;
|
7601
7299
|
}
|
7602
7300
|
|
7603
|
-
struct ggml_tensor *result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
7604
|
-
|
7605
|
-
ggml_scratch_save(ctx);
|
7606
|
-
|
7607
|
-
struct ggml_tensor * addr_tensor = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, sizeof(void *) / sizeof(int32_t));
|
7608
|
-
*((void (**)(void))addr_tensor->data) = (void (*)(void))fun;
|
7301
|
+
struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
7609
7302
|
|
7610
|
-
|
7303
|
+
ggml_set_op_params(result, (const void *) &fun, sizeof(fun));
|
7611
7304
|
|
7612
7305
|
result->op = GGML_OP_MAP_BINARY;
|
7613
7306
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
7614
7307
|
result->src[0] = a;
|
7615
7308
|
result->src[1] = b;
|
7616
|
-
result->src[2] = addr_tensor;
|
7617
7309
|
|
7618
7310
|
return result;
|
7619
7311
|
}
|
@@ -7636,7 +7328,7 @@ struct ggml_tensor * ggml_map_binary_inplace_f32(
|
|
7636
7328
|
|
7637
7329
|
// ggml_map_custom1
|
7638
7330
|
|
7639
|
-
struct ggml_tensor * ggml_map_custom1_impl_f32(
|
7331
|
+
static struct ggml_tensor * ggml_map_custom1_impl_f32(
|
7640
7332
|
struct ggml_context * ctx,
|
7641
7333
|
struct ggml_tensor * a,
|
7642
7334
|
const ggml_custom1_op_f32_t fun,
|
@@ -7647,19 +7339,13 @@ struct ggml_tensor * ggml_map_custom1_impl_f32(
|
|
7647
7339
|
is_node = true;
|
7648
7340
|
}
|
7649
7341
|
|
7650
|
-
struct ggml_tensor *result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
7651
|
-
|
7652
|
-
ggml_scratch_save(ctx);
|
7653
|
-
|
7654
|
-
struct ggml_tensor * addr_tensor = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, sizeof(void *) / sizeof(int32_t));
|
7655
|
-
*((void (**)(void))addr_tensor->data) = (void (*)(void))fun;
|
7342
|
+
struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
7656
7343
|
|
7657
|
-
|
7344
|
+
ggml_set_op_params(result, (const void *) &fun, sizeof(fun));
|
7658
7345
|
|
7659
7346
|
result->op = GGML_OP_MAP_CUSTOM1;
|
7660
7347
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
7661
7348
|
result->src[0] = a;
|
7662
|
-
result->src[2] = addr_tensor;
|
7663
7349
|
|
7664
7350
|
return result;
|
7665
7351
|
}
|
@@ -7680,7 +7366,7 @@ struct ggml_tensor * ggml_map_custom1_inplace_f32(
|
|
7680
7366
|
|
7681
7367
|
// ggml_map_custom2
|
7682
7368
|
|
7683
|
-
struct ggml_tensor * ggml_map_custom2_impl_f32(
|
7369
|
+
static struct ggml_tensor * ggml_map_custom2_impl_f32(
|
7684
7370
|
struct ggml_context * ctx,
|
7685
7371
|
struct ggml_tensor * a,
|
7686
7372
|
struct ggml_tensor * b,
|
@@ -7692,20 +7378,14 @@ struct ggml_tensor * ggml_map_custom2_impl_f32(
|
|
7692
7378
|
is_node = true;
|
7693
7379
|
}
|
7694
7380
|
|
7695
|
-
struct ggml_tensor *result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
7696
|
-
|
7697
|
-
ggml_scratch_save(ctx);
|
7698
|
-
|
7699
|
-
struct ggml_tensor * addr_tensor = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, sizeof(void *) / sizeof(int32_t));
|
7700
|
-
*((void (**)(void))addr_tensor->data) = (void (*)(void))fun;
|
7381
|
+
struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
7701
7382
|
|
7702
|
-
|
7383
|
+
ggml_set_op_params(result, (const void *) &fun, sizeof(fun));
|
7703
7384
|
|
7704
7385
|
result->op = GGML_OP_MAP_CUSTOM2;
|
7705
7386
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
7706
7387
|
result->src[0] = a;
|
7707
7388
|
result->src[1] = b;
|
7708
|
-
result->src[2] = addr_tensor;
|
7709
7389
|
|
7710
7390
|
return result;
|
7711
7391
|
}
|
@@ -7728,7 +7408,7 @@ struct ggml_tensor * ggml_map_custom2_inplace_f32(
|
|
7728
7408
|
|
7729
7409
|
// ggml_map_custom3
|
7730
7410
|
|
7731
|
-
struct ggml_tensor * ggml_map_custom3_impl_f32(
|
7411
|
+
static struct ggml_tensor * ggml_map_custom3_impl_f32(
|
7732
7412
|
struct ggml_context * ctx,
|
7733
7413
|
struct ggml_tensor * a,
|
7734
7414
|
struct ggml_tensor * b,
|
@@ -7741,21 +7421,15 @@ struct ggml_tensor * ggml_map_custom3_impl_f32(
|
|
7741
7421
|
is_node = true;
|
7742
7422
|
}
|
7743
7423
|
|
7744
|
-
struct ggml_tensor *result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
7745
|
-
|
7746
|
-
ggml_scratch_save(ctx);
|
7747
|
-
|
7748
|
-
struct ggml_tensor * addr_tensor = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, sizeof(void *) / sizeof(int32_t));
|
7749
|
-
*((void (**)(void))addr_tensor->data) = (void (*)(void))fun;
|
7424
|
+
struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
7750
7425
|
|
7751
|
-
|
7426
|
+
ggml_set_op_params(result, (const void *) &fun, sizeof(fun));
|
7752
7427
|
|
7753
7428
|
result->op = GGML_OP_MAP_CUSTOM3;
|
7754
7429
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
7755
7430
|
result->src[0] = a;
|
7756
7431
|
result->src[1] = b;
|
7757
|
-
result->src[2] =
|
7758
|
-
result->src[3] = c;
|
7432
|
+
result->src[2] = c;
|
7759
7433
|
|
7760
7434
|
return result;
|
7761
7435
|
}
|
@@ -8983,21 +8657,17 @@ static void ggml_compute_forward_acc_f32(
|
|
8983
8657
|
const struct ggml_compute_params * params,
|
8984
8658
|
const struct ggml_tensor * src0,
|
8985
8659
|
const struct ggml_tensor * src1,
|
8986
|
-
const struct ggml_tensor * opt0,
|
8987
8660
|
struct ggml_tensor * dst) {
|
8988
8661
|
GGML_ASSERT(ggml_are_same_shape(src0, dst));
|
8989
8662
|
GGML_ASSERT(ggml_is_contiguous(dst) && ggml_is_contiguous(src0));
|
8990
8663
|
|
8991
|
-
GGML_ASSERT(opt0->type == GGML_TYPE_I32);
|
8992
|
-
GGML_ASSERT(ggml_nelements(opt0) == 5);
|
8993
|
-
|
8994
8664
|
// view src0 and dst with these strides and data offset inbytes during acc
|
8995
8665
|
// nb0 is implicitely element_size because src0 and dst are contiguous
|
8996
|
-
size_t nb1 = ((int32_t *)
|
8997
|
-
size_t nb2 = ((int32_t *)
|
8998
|
-
size_t nb3 = ((int32_t *)
|
8999
|
-
size_t offset = ((int32_t *)
|
9000
|
-
bool inplace = (bool) ((int32_t *)
|
8666
|
+
size_t nb1 = ((int32_t *) dst->op_params)[0];
|
8667
|
+
size_t nb2 = ((int32_t *) dst->op_params)[1];
|
8668
|
+
size_t nb3 = ((int32_t *) dst->op_params)[2];
|
8669
|
+
size_t offset = ((int32_t *) dst->op_params)[3];
|
8670
|
+
bool inplace = (bool) ((int32_t *) dst->op_params)[4];
|
9001
8671
|
|
9002
8672
|
if (!inplace && (params->type == GGML_TASK_INIT)) {
|
9003
8673
|
// memcpy needs to be synchronized across threads to avoid race conditions.
|
@@ -9066,13 +8736,12 @@ static void ggml_compute_forward_acc(
|
|
9066
8736
|
const struct ggml_compute_params * params,
|
9067
8737
|
const struct ggml_tensor * src0,
|
9068
8738
|
const struct ggml_tensor * src1,
|
9069
|
-
const struct ggml_tensor * opt0,
|
9070
8739
|
struct ggml_tensor * dst) {
|
9071
8740
|
|
9072
8741
|
switch (src0->type) {
|
9073
8742
|
case GGML_TYPE_F32:
|
9074
8743
|
{
|
9075
|
-
ggml_compute_forward_acc_f32(params, src0, src1,
|
8744
|
+
ggml_compute_forward_acc_f32(params, src0, src1, dst);
|
9076
8745
|
} break;
|
9077
8746
|
case GGML_TYPE_F16:
|
9078
8747
|
case GGML_TYPE_Q4_0:
|
@@ -9504,7 +9173,7 @@ static void ggml_compute_forward_sum_f32(
|
|
9504
9173
|
for (int64_t i03 = 0; i03 < ne03; i03++) {
|
9505
9174
|
for (int64_t i02 = 0; i02 < ne02; i02++) {
|
9506
9175
|
for (int64_t i01 = 0; i01 < ne01; i01++) {
|
9507
|
-
|
9176
|
+
ggml_vec_sum_f32_ggf(ne00,
|
9508
9177
|
&row_sum,
|
9509
9178
|
(float *) ((char *) src0->data + i01*nb01 + i02*nb02 + i03*nb03));
|
9510
9179
|
sum += row_sum;
|
@@ -9514,6 +9183,38 @@ static void ggml_compute_forward_sum_f32(
|
|
9514
9183
|
((float *) dst->data)[0] = sum;
|
9515
9184
|
}
|
9516
9185
|
|
9186
|
+
static void ggml_compute_forward_sum_f16(
|
9187
|
+
const struct ggml_compute_params * params,
|
9188
|
+
const struct ggml_tensor * src0,
|
9189
|
+
struct ggml_tensor * dst) {
|
9190
|
+
assert(params->ith == 0);
|
9191
|
+
assert(ggml_is_scalar(dst));
|
9192
|
+
|
9193
|
+
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
9194
|
+
return;
|
9195
|
+
}
|
9196
|
+
|
9197
|
+
assert(src0->nb[0] == sizeof(ggml_fp16_t));
|
9198
|
+
|
9199
|
+
GGML_TENSOR_LOCALS(int64_t, ne0, src0, ne);
|
9200
|
+
GGML_TENSOR_LOCALS(size_t, nb0, src0, nb);
|
9201
|
+
|
9202
|
+
float sum = 0;
|
9203
|
+
float row_sum = 0;
|
9204
|
+
|
9205
|
+
for (int64_t i03 = 0; i03 < ne03; i03++) {
|
9206
|
+
for (int64_t i02 = 0; i02 < ne02; i02++) {
|
9207
|
+
for (int64_t i01 = 0; i01 < ne01; i01++) {
|
9208
|
+
ggml_vec_sum_f16_ggf(ne00,
|
9209
|
+
&row_sum,
|
9210
|
+
(ggml_fp16_t *) ((char *) src0->data + i01 * nb01 + i02 * nb02 + i03 * nb03));
|
9211
|
+
sum += row_sum;
|
9212
|
+
}
|
9213
|
+
}
|
9214
|
+
}
|
9215
|
+
((ggml_fp16_t *) dst->data)[0] = GGML_FP32_TO_FP16(sum);
|
9216
|
+
}
|
9217
|
+
|
9517
9218
|
static void ggml_compute_forward_sum(
|
9518
9219
|
const struct ggml_compute_params * params,
|
9519
9220
|
const struct ggml_tensor * src0,
|
@@ -9523,6 +9224,10 @@ static void ggml_compute_forward_sum(
|
|
9523
9224
|
{
|
9524
9225
|
ggml_compute_forward_sum_f32(params, src0, dst);
|
9525
9226
|
} break;
|
9227
|
+
case GGML_TYPE_F16:
|
9228
|
+
{
|
9229
|
+
ggml_compute_forward_sum_f16(params, src0, dst);
|
9230
|
+
} break;
|
9526
9231
|
default:
|
9527
9232
|
{
|
9528
9233
|
GGML_ASSERT(false);
|
@@ -10118,8 +9823,8 @@ static void ggml_compute_forward_gelu_f32(
|
|
10118
9823
|
const struct ggml_compute_params * params,
|
10119
9824
|
const struct ggml_tensor * src0,
|
10120
9825
|
struct ggml_tensor * dst) {
|
10121
|
-
GGML_ASSERT(
|
10122
|
-
GGML_ASSERT(
|
9826
|
+
GGML_ASSERT(ggml_is_contiguous_except_dim_1(src0));
|
9827
|
+
GGML_ASSERT(ggml_is_contiguous_except_dim_1(dst));
|
10123
9828
|
GGML_ASSERT(ggml_are_same_shape(src0, dst));
|
10124
9829
|
|
10125
9830
|
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
@@ -10177,8 +9882,8 @@ static void ggml_compute_forward_gelu_quick_f32(
|
|
10177
9882
|
const struct ggml_compute_params * params,
|
10178
9883
|
const struct ggml_tensor * src0,
|
10179
9884
|
struct ggml_tensor * dst) {
|
10180
|
-
GGML_ASSERT(
|
10181
|
-
GGML_ASSERT(
|
9885
|
+
GGML_ASSERT(ggml_is_contiguous_except_dim_1(src0));
|
9886
|
+
GGML_ASSERT(ggml_is_contiguous_except_dim_1(dst));
|
10182
9887
|
GGML_ASSERT(ggml_are_same_shape(src0, dst));
|
10183
9888
|
|
10184
9889
|
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
@@ -10236,8 +9941,8 @@ static void ggml_compute_forward_silu_f32(
|
|
10236
9941
|
const struct ggml_compute_params * params,
|
10237
9942
|
const struct ggml_tensor * src0,
|
10238
9943
|
struct ggml_tensor * dst) {
|
10239
|
-
GGML_ASSERT(
|
10240
|
-
GGML_ASSERT(
|
9944
|
+
GGML_ASSERT(ggml_is_contiguous_except_dim_1(src0));
|
9945
|
+
GGML_ASSERT(ggml_is_contiguous_except_dim_1(dst));
|
10241
9946
|
GGML_ASSERT(ggml_are_same_shape(src0, dst));
|
10242
9947
|
|
10243
9948
|
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
@@ -10289,7 +9994,6 @@ static void ggml_compute_forward_silu(
|
|
10289
9994
|
}
|
10290
9995
|
}
|
10291
9996
|
|
10292
|
-
|
10293
9997
|
// ggml_compute_forward_silu_back
|
10294
9998
|
|
10295
9999
|
static void ggml_compute_forward_silu_back_f32(
|
@@ -10297,9 +10001,9 @@ static void ggml_compute_forward_silu_back_f32(
|
|
10297
10001
|
const struct ggml_tensor * src0,
|
10298
10002
|
const struct ggml_tensor * grad,
|
10299
10003
|
struct ggml_tensor * dst) {
|
10300
|
-
GGML_ASSERT(
|
10301
|
-
GGML_ASSERT(
|
10302
|
-
GGML_ASSERT(
|
10004
|
+
GGML_ASSERT(ggml_is_contiguous_except_dim_1(grad));
|
10005
|
+
GGML_ASSERT(ggml_is_contiguous_except_dim_1(src0));
|
10006
|
+
GGML_ASSERT(ggml_is_contiguous_except_dim_1(dst));
|
10303
10007
|
GGML_ASSERT(ggml_are_same_shape(src0, dst));
|
10304
10008
|
GGML_ASSERT(ggml_are_same_shape(src0, grad));
|
10305
10009
|
|
@@ -10439,7 +10143,8 @@ static void ggml_compute_forward_rms_norm_f32(
|
|
10439
10143
|
|
10440
10144
|
GGML_TENSOR_UNARY_OP_LOCALS;
|
10441
10145
|
|
10442
|
-
|
10146
|
+
float eps;
|
10147
|
+
memcpy(&eps, dst->op_params, sizeof(float));
|
10443
10148
|
|
10444
10149
|
// TODO: optimize
|
10445
10150
|
for (int64_t i03 = 0; i03 < ne03; i03++) {
|
@@ -11092,21 +10797,17 @@ static void ggml_compute_forward_set_f32(
|
|
11092
10797
|
const struct ggml_compute_params * params,
|
11093
10798
|
const struct ggml_tensor * src0,
|
11094
10799
|
const struct ggml_tensor * src1,
|
11095
|
-
const struct ggml_tensor * opt0,
|
11096
10800
|
struct ggml_tensor * dst) {
|
11097
10801
|
GGML_ASSERT(ggml_are_same_shape(src0, dst));
|
11098
10802
|
GGML_ASSERT(ggml_is_contiguous(dst) && ggml_is_contiguous(src0));
|
11099
10803
|
|
11100
|
-
GGML_ASSERT(opt0->type == GGML_TYPE_I32);
|
11101
|
-
GGML_ASSERT(ggml_nelements(opt0) == 5);
|
11102
|
-
|
11103
10804
|
// view src0 and dst with these strides and data offset inbytes during set
|
11104
10805
|
// nb0 is implicitely element_size because src0 and dst are contiguous
|
11105
|
-
size_t nb1 = ((int32_t *)
|
11106
|
-
size_t nb2 = ((int32_t *)
|
11107
|
-
size_t nb3 = ((int32_t *)
|
11108
|
-
size_t offset = ((int32_t *)
|
11109
|
-
bool inplace = (bool) ((int32_t *)
|
10806
|
+
size_t nb1 = ((int32_t *) dst->op_params)[0];
|
10807
|
+
size_t nb2 = ((int32_t *) dst->op_params)[1];
|
10808
|
+
size_t nb3 = ((int32_t *) dst->op_params)[2];
|
10809
|
+
size_t offset = ((int32_t *) dst->op_params)[3];
|
10810
|
+
bool inplace = (bool) ((int32_t *) dst->op_params)[4];
|
11110
10811
|
|
11111
10812
|
if (!inplace && (params->type == GGML_TASK_INIT)) {
|
11112
10813
|
// memcpy needs to be synchronized across threads to avoid race conditions.
|
@@ -11166,13 +10867,12 @@ static void ggml_compute_forward_set(
|
|
11166
10867
|
const struct ggml_compute_params * params,
|
11167
10868
|
const struct ggml_tensor * src0,
|
11168
10869
|
const struct ggml_tensor * src1,
|
11169
|
-
const struct ggml_tensor * opt0,
|
11170
10870
|
struct ggml_tensor * dst) {
|
11171
10871
|
|
11172
10872
|
switch (src0->type) {
|
11173
10873
|
case GGML_TYPE_F32:
|
11174
10874
|
{
|
11175
|
-
ggml_compute_forward_set_f32(params, src0, src1,
|
10875
|
+
ggml_compute_forward_set_f32(params, src0, src1, dst);
|
11176
10876
|
} break;
|
11177
10877
|
case GGML_TYPE_F16:
|
11178
10878
|
case GGML_TYPE_Q4_0:
|
@@ -11568,17 +11268,14 @@ static void ggml_compute_forward_diag(
|
|
11568
11268
|
static void ggml_compute_forward_diag_mask_f32(
|
11569
11269
|
const struct ggml_compute_params * params,
|
11570
11270
|
const struct ggml_tensor * src0,
|
11571
|
-
const struct ggml_tensor * src1,
|
11572
11271
|
struct ggml_tensor * dst,
|
11573
11272
|
const float value) {
|
11574
|
-
GGML_ASSERT(src1->type == GGML_TYPE_I32);
|
11575
|
-
GGML_ASSERT(ggml_nelements(src1) == 2);
|
11576
11273
|
|
11577
11274
|
const int ith = params->ith;
|
11578
11275
|
const int nth = params->nth;
|
11579
11276
|
|
11580
|
-
const int n_past = ((int32_t *)
|
11581
|
-
const bool inplace = (bool)((int32_t *)
|
11277
|
+
const int n_past = ((int32_t *) dst->op_params)[0];
|
11278
|
+
const bool inplace = (bool)((int32_t *) dst->op_params)[1];
|
11582
11279
|
|
11583
11280
|
GGML_ASSERT(n_past >= 0);
|
11584
11281
|
|
@@ -11621,12 +11318,11 @@ static void ggml_compute_forward_diag_mask_f32(
|
|
11621
11318
|
static void ggml_compute_forward_diag_mask_inf(
|
11622
11319
|
const struct ggml_compute_params * params,
|
11623
11320
|
const struct ggml_tensor * src0,
|
11624
|
-
const struct ggml_tensor * src1,
|
11625
11321
|
struct ggml_tensor * dst) {
|
11626
11322
|
switch (src0->type) {
|
11627
11323
|
case GGML_TYPE_F32:
|
11628
11324
|
{
|
11629
|
-
ggml_compute_forward_diag_mask_f32(params, src0,
|
11325
|
+
ggml_compute_forward_diag_mask_f32(params, src0, dst, -INFINITY);
|
11630
11326
|
} break;
|
11631
11327
|
default:
|
11632
11328
|
{
|
@@ -11638,12 +11334,11 @@ static void ggml_compute_forward_diag_mask_inf(
|
|
11638
11334
|
static void ggml_compute_forward_diag_mask_zero(
|
11639
11335
|
const struct ggml_compute_params * params,
|
11640
11336
|
const struct ggml_tensor * src0,
|
11641
|
-
const struct ggml_tensor * src1,
|
11642
11337
|
struct ggml_tensor * dst) {
|
11643
11338
|
switch (src0->type) {
|
11644
11339
|
case GGML_TYPE_F32:
|
11645
11340
|
{
|
11646
|
-
ggml_compute_forward_diag_mask_f32(params, src0,
|
11341
|
+
ggml_compute_forward_diag_mask_f32(params, src0, dst, 0);
|
11647
11342
|
} break;
|
11648
11343
|
default:
|
11649
11344
|
{
|
@@ -11841,20 +11536,17 @@ static void ggml_compute_forward_soft_max_back(
|
|
11841
11536
|
static void ggml_compute_forward_alibi_f32(
|
11842
11537
|
const struct ggml_compute_params * params,
|
11843
11538
|
const struct ggml_tensor * src0,
|
11844
|
-
const struct ggml_tensor * src1,
|
11845
11539
|
struct ggml_tensor * dst) {
|
11846
11540
|
assert(params->ith == 0);
|
11847
11541
|
|
11848
|
-
GGML_ASSERT(src1->type == GGML_TYPE_I32);
|
11849
|
-
GGML_ASSERT(ggml_nelements(src1) == 3);
|
11850
|
-
|
11851
11542
|
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
11852
11543
|
return;
|
11853
11544
|
}
|
11854
11545
|
|
11855
|
-
const int
|
11856
|
-
const int
|
11857
|
-
|
11546
|
+
const int n_past = ((int32_t *) dst->op_params)[0];
|
11547
|
+
const int n_head = ((int32_t *) dst->op_params)[1];
|
11548
|
+
float max_bias;
|
11549
|
+
memcpy(&max_bias, (int32_t *) dst->op_params + 2, sizeof(float));
|
11858
11550
|
|
11859
11551
|
assert(n_past >= 0);
|
11860
11552
|
|
@@ -11907,20 +11599,17 @@ static void ggml_compute_forward_alibi_f32(
|
|
11907
11599
|
static void ggml_compute_forward_alibi_f16(
|
11908
11600
|
const struct ggml_compute_params * params,
|
11909
11601
|
const struct ggml_tensor * src0,
|
11910
|
-
const struct ggml_tensor * src1,
|
11911
11602
|
struct ggml_tensor * dst) {
|
11912
11603
|
assert(params->ith == 0);
|
11913
11604
|
|
11914
|
-
GGML_ASSERT(src1->type == GGML_TYPE_I32);
|
11915
|
-
GGML_ASSERT(ggml_nelements(src1) == 3);
|
11916
|
-
|
11917
11605
|
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
11918
11606
|
return;
|
11919
11607
|
}
|
11920
11608
|
|
11921
|
-
const int
|
11922
|
-
const int
|
11923
|
-
|
11609
|
+
const int n_past = ((int32_t *) dst->op_params)[0];
|
11610
|
+
const int n_head = ((int32_t *) dst->op_params)[1];
|
11611
|
+
float max_bias;
|
11612
|
+
memcpy(&max_bias, (int32_t *) dst->op_params + 2, sizeof(float));
|
11924
11613
|
|
11925
11614
|
assert(n_past >= 0);
|
11926
11615
|
|
@@ -11973,16 +11662,15 @@ static void ggml_compute_forward_alibi_f16(
|
|
11973
11662
|
static void ggml_compute_forward_alibi(
|
11974
11663
|
const struct ggml_compute_params * params,
|
11975
11664
|
const struct ggml_tensor * src0,
|
11976
|
-
const struct ggml_tensor * src1,
|
11977
11665
|
struct ggml_tensor * dst) {
|
11978
11666
|
switch (src0->type) {
|
11979
11667
|
case GGML_TYPE_F16:
|
11980
11668
|
{
|
11981
|
-
ggml_compute_forward_alibi_f16(params, src0,
|
11669
|
+
ggml_compute_forward_alibi_f16(params, src0, dst);
|
11982
11670
|
} break;
|
11983
11671
|
case GGML_TYPE_F32:
|
11984
11672
|
{
|
11985
|
-
ggml_compute_forward_alibi_f32(params, src0,
|
11673
|
+
ggml_compute_forward_alibi_f32(params, src0, dst);
|
11986
11674
|
} break;
|
11987
11675
|
case GGML_TYPE_Q4_0:
|
11988
11676
|
case GGML_TYPE_Q4_1:
|
@@ -12012,19 +11700,17 @@ static void ggml_compute_forward_alibi(
|
|
12012
11700
|
static void ggml_compute_forward_clamp_f32(
|
12013
11701
|
const struct ggml_compute_params * params,
|
12014
11702
|
const struct ggml_tensor * src0,
|
12015
|
-
const struct ggml_tensor * src1,
|
12016
11703
|
struct ggml_tensor * dst) {
|
12017
11704
|
assert(params->ith == 0);
|
12018
11705
|
|
12019
|
-
GGML_ASSERT(src1->type == GGML_TYPE_F32);
|
12020
|
-
GGML_ASSERT(ggml_nelements(src1) == 2);
|
12021
|
-
|
12022
11706
|
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
12023
11707
|
return;
|
12024
11708
|
}
|
12025
11709
|
|
12026
|
-
|
12027
|
-
|
11710
|
+
float min;
|
11711
|
+
float max;
|
11712
|
+
memcpy(&min, (float *) dst->op_params + 0, sizeof(float));
|
11713
|
+
memcpy(&max, (float *) dst->op_params + 1, sizeof(float));
|
12028
11714
|
|
12029
11715
|
const int ith = params->ith;
|
12030
11716
|
const int nth = params->nth;
|
@@ -12054,12 +11740,11 @@ static void ggml_compute_forward_clamp_f32(
|
|
12054
11740
|
static void ggml_compute_forward_clamp(
|
12055
11741
|
const struct ggml_compute_params * params,
|
12056
11742
|
const struct ggml_tensor * src0,
|
12057
|
-
const struct ggml_tensor * src1,
|
12058
11743
|
struct ggml_tensor * dst) {
|
12059
11744
|
switch (src0->type) {
|
12060
11745
|
case GGML_TYPE_F32:
|
12061
11746
|
{
|
12062
|
-
ggml_compute_forward_clamp_f32(params, src0,
|
11747
|
+
ggml_compute_forward_clamp_f32(params, src0, dst);
|
12063
11748
|
} break;
|
12064
11749
|
case GGML_TYPE_F16:
|
12065
11750
|
case GGML_TYPE_Q4_0:
|
@@ -12089,10 +11774,7 @@ static void ggml_compute_forward_clamp(
|
|
12089
11774
|
static void ggml_compute_forward_rope_f32(
|
12090
11775
|
const struct ggml_compute_params * params,
|
12091
11776
|
const struct ggml_tensor * src0,
|
12092
|
-
const struct ggml_tensor * src1,
|
12093
11777
|
struct ggml_tensor * dst) {
|
12094
|
-
GGML_ASSERT(src1->type == GGML_TYPE_I32);
|
12095
|
-
GGML_ASSERT(ggml_nelements(src1) == 6);
|
12096
11778
|
|
12097
11779
|
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
12098
11780
|
return;
|
@@ -12101,12 +11783,12 @@ static void ggml_compute_forward_rope_f32(
|
|
12101
11783
|
float freq_base;
|
12102
11784
|
float freq_scale;
|
12103
11785
|
|
12104
|
-
const int n_past = ((int32_t *)
|
12105
|
-
const int n_dims = ((int32_t *)
|
12106
|
-
const int mode = ((int32_t *)
|
12107
|
-
const int n_ctx = ((int32_t *)
|
12108
|
-
memcpy(&freq_base, (int32_t *)
|
12109
|
-
memcpy(&freq_scale, (int32_t *)
|
11786
|
+
const int n_past = ((int32_t *) dst->op_params)[0];
|
11787
|
+
const int n_dims = ((int32_t *) dst->op_params)[1];
|
11788
|
+
const int mode = ((int32_t *) dst->op_params)[2];
|
11789
|
+
const int n_ctx = ((int32_t *) dst->op_params)[3];
|
11790
|
+
memcpy(&freq_base, (int32_t *) dst->op_params + 4, sizeof(float));
|
11791
|
+
memcpy(&freq_scale, (int32_t *) dst->op_params + 5, sizeof(float));
|
12110
11792
|
|
12111
11793
|
assert(n_past >= 0);
|
12112
11794
|
|
@@ -12221,10 +11903,7 @@ static void ggml_compute_forward_rope_f32(
|
|
12221
11903
|
static void ggml_compute_forward_rope_f16(
|
12222
11904
|
const struct ggml_compute_params * params,
|
12223
11905
|
const struct ggml_tensor * src0,
|
12224
|
-
const struct ggml_tensor * src1,
|
12225
11906
|
struct ggml_tensor * dst) {
|
12226
|
-
GGML_ASSERT(src1->type == GGML_TYPE_I32);
|
12227
|
-
GGML_ASSERT(ggml_nelements(src1) == 6);
|
12228
11907
|
|
12229
11908
|
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
12230
11909
|
return;
|
@@ -12233,12 +11912,12 @@ static void ggml_compute_forward_rope_f16(
|
|
12233
11912
|
float freq_base;
|
12234
11913
|
float freq_scale;
|
12235
11914
|
|
12236
|
-
const int n_past = ((int32_t *)
|
12237
|
-
const int n_dims = ((int32_t *)
|
12238
|
-
const int mode = ((int32_t *)
|
12239
|
-
const int n_ctx = ((int32_t *)
|
12240
|
-
memcpy(&freq_base, (int32_t *)
|
12241
|
-
memcpy(&freq_scale, (int32_t *)
|
11915
|
+
const int n_past = ((int32_t *) dst->op_params)[0];
|
11916
|
+
const int n_dims = ((int32_t *) dst->op_params)[1];
|
11917
|
+
const int mode = ((int32_t *) dst->op_params)[2];
|
11918
|
+
const int n_ctx = ((int32_t *) dst->op_params)[3];
|
11919
|
+
memcpy(&freq_base, (int32_t *) dst->op_params + 4, sizeof(float));
|
11920
|
+
memcpy(&freq_scale, (int32_t *) dst->op_params + 5, sizeof(float));
|
12242
11921
|
|
12243
11922
|
assert(n_past >= 0);
|
12244
11923
|
|
@@ -12353,16 +12032,15 @@ static void ggml_compute_forward_rope_f16(
|
|
12353
12032
|
static void ggml_compute_forward_rope(
|
12354
12033
|
const struct ggml_compute_params * params,
|
12355
12034
|
const struct ggml_tensor * src0,
|
12356
|
-
const struct ggml_tensor * src1,
|
12357
12035
|
struct ggml_tensor * dst) {
|
12358
12036
|
switch (src0->type) {
|
12359
12037
|
case GGML_TYPE_F16:
|
12360
12038
|
{
|
12361
|
-
ggml_compute_forward_rope_f16(params, src0,
|
12039
|
+
ggml_compute_forward_rope_f16(params, src0, dst);
|
12362
12040
|
} break;
|
12363
12041
|
case GGML_TYPE_F32:
|
12364
12042
|
{
|
12365
|
-
ggml_compute_forward_rope_f32(params, src0,
|
12043
|
+
ggml_compute_forward_rope_f32(params, src0, dst);
|
12366
12044
|
} break;
|
12367
12045
|
default:
|
12368
12046
|
{
|
@@ -12376,10 +12054,7 @@ static void ggml_compute_forward_rope(
|
|
12376
12054
|
static void ggml_compute_forward_rope_back_f32(
|
12377
12055
|
const struct ggml_compute_params * params,
|
12378
12056
|
const struct ggml_tensor * src0,
|
12379
|
-
const struct ggml_tensor * src1,
|
12380
12057
|
struct ggml_tensor * dst) {
|
12381
|
-
assert(src1->type == GGML_TYPE_I32);
|
12382
|
-
assert(ggml_nelements(src1) == 4);
|
12383
12058
|
|
12384
12059
|
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
12385
12060
|
return;
|
@@ -12389,9 +12064,9 @@ static void ggml_compute_forward_rope_back_f32(
|
|
12389
12064
|
// dx = rope_back(dy, src1)
|
12390
12065
|
// src0 is dy, src1 contains options
|
12391
12066
|
|
12392
|
-
const int n_past = ((int32_t *)
|
12393
|
-
const int n_dims = ((int32_t *)
|
12394
|
-
const int mode = ((int32_t *)
|
12067
|
+
const int n_past = ((int32_t *) dst->op_params)[0];
|
12068
|
+
const int n_dims = ((int32_t *) dst->op_params)[1];
|
12069
|
+
const int mode = ((int32_t *) dst->op_params)[2];
|
12395
12070
|
|
12396
12071
|
assert(n_past >= 0);
|
12397
12072
|
|
@@ -12475,10 +12150,7 @@ static void ggml_compute_forward_rope_back_f32(
|
|
12475
12150
|
static void ggml_compute_forward_rope_back_f16(
|
12476
12151
|
const struct ggml_compute_params * params,
|
12477
12152
|
const struct ggml_tensor * src0,
|
12478
|
-
const struct ggml_tensor * src1,
|
12479
12153
|
struct ggml_tensor * dst) {
|
12480
|
-
assert(src1->type == GGML_TYPE_I32);
|
12481
|
-
assert(ggml_nelements(src1) == 3);
|
12482
12154
|
|
12483
12155
|
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
12484
12156
|
return;
|
@@ -12488,9 +12160,9 @@ static void ggml_compute_forward_rope_back_f16(
|
|
12488
12160
|
// dx = rope_back(dy, src1)
|
12489
12161
|
// src0 is dy, src1 contains options
|
12490
12162
|
|
12491
|
-
const int n_past = ((int32_t *)
|
12492
|
-
const int n_dims = ((int32_t *)
|
12493
|
-
const int mode = ((int32_t *)
|
12163
|
+
const int n_past = ((int32_t *) dst->op_params)[0];
|
12164
|
+
const int n_dims = ((int32_t *) dst->op_params)[1];
|
12165
|
+
const int mode = ((int32_t *) dst->op_params)[2];
|
12494
12166
|
|
12495
12167
|
assert(n_past >= 0);
|
12496
12168
|
|
@@ -12574,16 +12246,15 @@ static void ggml_compute_forward_rope_back_f16(
|
|
12574
12246
|
static void ggml_compute_forward_rope_back(
|
12575
12247
|
const struct ggml_compute_params * params,
|
12576
12248
|
const struct ggml_tensor * src0,
|
12577
|
-
const struct ggml_tensor * src1,
|
12578
12249
|
struct ggml_tensor * dst) {
|
12579
12250
|
switch (src0->type) {
|
12580
12251
|
case GGML_TYPE_F16:
|
12581
12252
|
{
|
12582
|
-
ggml_compute_forward_rope_back_f16(params, src0,
|
12253
|
+
ggml_compute_forward_rope_back_f16(params, src0, dst);
|
12583
12254
|
} break;
|
12584
12255
|
case GGML_TYPE_F32:
|
12585
12256
|
{
|
12586
|
-
ggml_compute_forward_rope_back_f32(params, src0,
|
12257
|
+
ggml_compute_forward_rope_back_f32(params, src0, dst);
|
12587
12258
|
} break;
|
12588
12259
|
default:
|
12589
12260
|
{
|
@@ -12780,7 +12451,7 @@ static void ggml_compute_forward_conv_1d_s1_ph(
|
|
12780
12451
|
const struct ggml_compute_params * params,
|
12781
12452
|
const struct ggml_tensor * src0,
|
12782
12453
|
const struct ggml_tensor * src1,
|
12783
|
-
|
12454
|
+
struct ggml_tensor * dst) {
|
12784
12455
|
switch (src0->type) {
|
12785
12456
|
case GGML_TYPE_F16:
|
12786
12457
|
{
|
@@ -12983,7 +12654,7 @@ static void ggml_compute_forward_conv_1d_s2_ph(
|
|
12983
12654
|
const struct ggml_compute_params * params,
|
12984
12655
|
const struct ggml_tensor * src0,
|
12985
12656
|
const struct ggml_tensor * src1,
|
12986
|
-
|
12657
|
+
struct ggml_tensor * dst) {
|
12987
12658
|
switch (src0->type) {
|
12988
12659
|
case GGML_TYPE_F16:
|
12989
12660
|
{
|
@@ -13003,14 +12674,13 @@ static void ggml_compute_forward_conv_1d_s2_ph(
|
|
13003
12674
|
// ggml_compute_forward_conv_1d
|
13004
12675
|
|
13005
12676
|
static void ggml_compute_forward_conv_1d(
|
13006
|
-
|
13007
|
-
|
13008
|
-
|
13009
|
-
|
13010
|
-
|
13011
|
-
const int32_t
|
13012
|
-
const int32_t
|
13013
|
-
const int32_t d0 = ((const int32_t*)(opt0->data))[2];
|
12677
|
+
const struct ggml_compute_params * params,
|
12678
|
+
const struct ggml_tensor * src0,
|
12679
|
+
const struct ggml_tensor * src1,
|
12680
|
+
struct ggml_tensor * dst) {
|
12681
|
+
const int32_t s0 = ((const int32_t*)(dst->op_params))[0];
|
12682
|
+
const int32_t p0 = ((const int32_t*)(dst->op_params))[1];
|
12683
|
+
const int32_t d0 = ((const int32_t*)(dst->op_params))[2];
|
13014
12684
|
GGML_ASSERT(d0 == 1); // dilation not supported
|
13015
12685
|
GGML_ASSERT(p0 == src0->ne[0]/2); // only half padding supported
|
13016
12686
|
if (s0 == 1) {
|
@@ -13028,7 +12698,6 @@ static void ggml_compute_forward_conv_2d_f16_f32(
|
|
13028
12698
|
const struct ggml_compute_params * params,
|
13029
12699
|
const struct ggml_tensor * src0,
|
13030
12700
|
const struct ggml_tensor * src1,
|
13031
|
-
const struct ggml_tensor * opt0,
|
13032
12701
|
struct ggml_tensor * dst) {
|
13033
12702
|
GGML_ASSERT(src0->type == GGML_TYPE_F16);
|
13034
12703
|
GGML_ASSERT(src1->type == GGML_TYPE_F32);
|
@@ -13048,12 +12717,12 @@ static void ggml_compute_forward_conv_2d_f16_f32(
|
|
13048
12717
|
// size of the convolution row - the kernel size unrolled across all channels
|
13049
12718
|
const int ew0 = nk0*nk1*ne02;
|
13050
12719
|
|
13051
|
-
const int32_t s0 = ((const int32_t*)(
|
13052
|
-
const int32_t s1 = ((const int32_t*)(
|
13053
|
-
const int32_t p0 = ((const int32_t*)(
|
13054
|
-
const int32_t p1 = ((const int32_t*)(
|
13055
|
-
const int32_t d0 = ((const int32_t*)(
|
13056
|
-
const int32_t d1 = ((const int32_t*)(
|
12720
|
+
const int32_t s0 = ((const int32_t*)(dst->op_params))[0];
|
12721
|
+
const int32_t s1 = ((const int32_t*)(dst->op_params))[1];
|
12722
|
+
const int32_t p0 = ((const int32_t*)(dst->op_params))[2];
|
12723
|
+
const int32_t p1 = ((const int32_t*)(dst->op_params))[3];
|
12724
|
+
const int32_t d0 = ((const int32_t*)(dst->op_params))[4];
|
12725
|
+
const int32_t d1 = ((const int32_t*)(dst->op_params))[5];
|
13057
12726
|
|
13058
12727
|
GGML_ASSERT(nb00 == sizeof(ggml_fp16_t));
|
13059
12728
|
GGML_ASSERT(nb10 == sizeof(float));
|
@@ -13125,17 +12794,15 @@ static void ggml_compute_forward_conv_2d(
|
|
13125
12794
|
const struct ggml_compute_params * params,
|
13126
12795
|
const struct ggml_tensor * src0,
|
13127
12796
|
const struct ggml_tensor * src1,
|
13128
|
-
|
13129
|
-
struct ggml_tensor * dst
|
13130
|
-
) {
|
12797
|
+
struct ggml_tensor * dst) {
|
13131
12798
|
switch (src0->type) {
|
13132
12799
|
case GGML_TYPE_F16:
|
13133
12800
|
{
|
13134
|
-
ggml_compute_forward_conv_2d_f16_f32(params, src0, src1,
|
12801
|
+
ggml_compute_forward_conv_2d_f16_f32(params, src0, src1, dst);
|
13135
12802
|
} break;
|
13136
12803
|
case GGML_TYPE_F32:
|
13137
12804
|
{
|
13138
|
-
//ggml_compute_forward_conv_2d_f32(params, src0, src1,
|
12805
|
+
//ggml_compute_forward_conv_2d_f32(params, src0, src1, dst);
|
13139
12806
|
GGML_ASSERT(false);
|
13140
12807
|
} break;
|
13141
12808
|
default:
|
@@ -13200,12 +12867,11 @@ static void ggml_compute_forward_pool_1d_sk_p0(
|
|
13200
12867
|
// ggml_compute_forward_pool_1d
|
13201
12868
|
|
13202
12869
|
static void ggml_compute_forward_pool_1d(
|
13203
|
-
|
13204
|
-
|
13205
|
-
|
13206
|
-
|
13207
|
-
|
13208
|
-
const int* opts = (const int*)opt0->data;
|
12870
|
+
const struct ggml_compute_params * params,
|
12871
|
+
const struct ggml_tensor * src0,
|
12872
|
+
struct ggml_tensor * dst) {
|
12873
|
+
|
12874
|
+
const int32_t* opts = (const int32_t*)dst->op_params;
|
13209
12875
|
enum ggml_op_pool op = opts[0];
|
13210
12876
|
const int k0 = opts[1];
|
13211
12877
|
const int s0 = opts[2];
|
@@ -13219,12 +12885,12 @@ static void ggml_compute_forward_pool_1d(
|
|
13219
12885
|
// ggml_compute_forward_pool_2d_sk_p0
|
13220
12886
|
|
13221
12887
|
static void ggml_compute_forward_pool_2d_sk_p0(
|
13222
|
-
|
13223
|
-
|
13224
|
-
|
13225
|
-
|
13226
|
-
|
13227
|
-
|
12888
|
+
const struct ggml_compute_params * params,
|
12889
|
+
const enum ggml_op_pool op,
|
12890
|
+
const struct ggml_tensor * src,
|
12891
|
+
const int k0,
|
12892
|
+
const int k1,
|
12893
|
+
struct ggml_tensor * dst) {
|
13228
12894
|
assert(src->type == GGML_TYPE_F32);
|
13229
12895
|
assert(params->ith == 0);
|
13230
12896
|
|
@@ -13284,12 +12950,11 @@ static void ggml_compute_forward_pool_2d_sk_p0(
|
|
13284
12950
|
// ggml_compute_forward_pool_2d
|
13285
12951
|
|
13286
12952
|
static void ggml_compute_forward_pool_2d(
|
13287
|
-
|
13288
|
-
|
13289
|
-
|
13290
|
-
|
13291
|
-
|
13292
|
-
const int* opts = (const int*)opt0->data;
|
12953
|
+
const struct ggml_compute_params * params,
|
12954
|
+
const struct ggml_tensor * src0,
|
12955
|
+
struct ggml_tensor * dst) {
|
12956
|
+
|
12957
|
+
const int32_t * opts = (const int32_t *)dst->op_params;
|
13293
12958
|
enum ggml_op_pool op = opts[0];
|
13294
12959
|
const int k0 = opts[1];
|
13295
12960
|
const int k1 = opts[2];
|
@@ -13314,7 +12979,7 @@ static void ggml_compute_forward_flash_attn_f32(
|
|
13314
12979
|
const struct ggml_tensor * k,
|
13315
12980
|
const struct ggml_tensor * v,
|
13316
12981
|
const bool masked,
|
13317
|
-
|
12982
|
+
struct ggml_tensor * dst) {
|
13318
12983
|
int64_t t0 = ggml_perf_time_us();
|
13319
12984
|
UNUSED(t0);
|
13320
12985
|
|
@@ -13492,7 +13157,7 @@ static void ggml_compute_forward_flash_attn_f16(
|
|
13492
13157
|
const struct ggml_tensor * k,
|
13493
13158
|
const struct ggml_tensor * v,
|
13494
13159
|
const bool masked,
|
13495
|
-
|
13160
|
+
struct ggml_tensor * dst) {
|
13496
13161
|
int64_t t0 = ggml_perf_time_us();
|
13497
13162
|
UNUSED(t0);
|
13498
13163
|
|
@@ -14257,7 +13922,6 @@ static void ggml_compute_forward_flash_attn_back(
|
|
14257
13922
|
static void ggml_compute_forward_win_part_f32(
|
14258
13923
|
const struct ggml_compute_params * params,
|
14259
13924
|
const struct ggml_tensor * src0,
|
14260
|
-
const struct ggml_tensor * opt0,
|
14261
13925
|
struct ggml_tensor * dst) {
|
14262
13926
|
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
14263
13927
|
return;
|
@@ -14266,9 +13930,9 @@ static void ggml_compute_forward_win_part_f32(
|
|
14266
13930
|
GGML_TENSOR_LOCALS(int64_t, ne0, src0, ne);
|
14267
13931
|
GGML_TENSOR_LOCALS(int64_t, ne, dst, ne);
|
14268
13932
|
|
14269
|
-
const int32_t nep0 = ((const int32_t *)(
|
14270
|
-
const int32_t nep1 = ((const int32_t *)(
|
14271
|
-
const int32_t w = ((const int32_t *)(
|
13933
|
+
const int32_t nep0 = ((const int32_t *)(dst->op_params))[0];
|
13934
|
+
const int32_t nep1 = ((const int32_t *)(dst->op_params))[1];
|
13935
|
+
const int32_t w = ((const int32_t *)(dst->op_params))[2];
|
14272
13936
|
|
14273
13937
|
assert(ne00 == ne0);
|
14274
13938
|
assert(ne3 == nep0*nep1);
|
@@ -14302,12 +13966,11 @@ static void ggml_compute_forward_win_part_f32(
|
|
14302
13966
|
static void ggml_compute_forward_win_part(
|
14303
13967
|
const struct ggml_compute_params * params,
|
14304
13968
|
const struct ggml_tensor * src0,
|
14305
|
-
const struct ggml_tensor * opt0,
|
14306
13969
|
struct ggml_tensor * dst) {
|
14307
13970
|
switch (src0->type) {
|
14308
13971
|
case GGML_TYPE_F32:
|
14309
13972
|
{
|
14310
|
-
ggml_compute_forward_win_part_f32(params, src0,
|
13973
|
+
ggml_compute_forward_win_part_f32(params, src0, dst);
|
14311
13974
|
} break;
|
14312
13975
|
default:
|
14313
13976
|
{
|
@@ -14321,7 +13984,6 @@ static void ggml_compute_forward_win_part(
|
|
14321
13984
|
static void ggml_compute_forward_win_unpart_f32(
|
14322
13985
|
const struct ggml_compute_params * params,
|
14323
13986
|
const struct ggml_tensor * src0,
|
14324
|
-
const struct ggml_tensor * opt0,
|
14325
13987
|
struct ggml_tensor * dst) {
|
14326
13988
|
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
14327
13989
|
return;
|
@@ -14330,7 +13992,7 @@ static void ggml_compute_forward_win_unpart_f32(
|
|
14330
13992
|
GGML_TENSOR_LOCALS(int64_t, ne0, src0, ne);
|
14331
13993
|
GGML_TENSOR_LOCALS(int64_t, ne, dst, ne);
|
14332
13994
|
|
14333
|
-
const int32_t w = ((const int32_t *)(
|
13995
|
+
const int32_t w = ((const int32_t *)(dst->op_params))[0];
|
14334
13996
|
|
14335
13997
|
// padding
|
14336
13998
|
const int px = (w - ne1%w)%w;
|
@@ -14364,12 +14026,67 @@ static void ggml_compute_forward_win_unpart_f32(
|
|
14364
14026
|
static void ggml_compute_forward_win_unpart(
|
14365
14027
|
const struct ggml_compute_params * params,
|
14366
14028
|
const struct ggml_tensor * src0,
|
14367
|
-
const struct ggml_tensor * opt0,
|
14368
14029
|
struct ggml_tensor * dst) {
|
14369
14030
|
switch (src0->type) {
|
14370
14031
|
case GGML_TYPE_F32:
|
14371
14032
|
{
|
14372
|
-
ggml_compute_forward_win_unpart_f32(params, src0,
|
14033
|
+
ggml_compute_forward_win_unpart_f32(params, src0, dst);
|
14034
|
+
} break;
|
14035
|
+
default:
|
14036
|
+
{
|
14037
|
+
GGML_ASSERT(false);
|
14038
|
+
} break;
|
14039
|
+
}
|
14040
|
+
}
|
14041
|
+
|
14042
|
+
//gmml_compute_forward_unary
|
14043
|
+
|
14044
|
+
static void ggml_compute_forward_unary(
|
14045
|
+
const struct ggml_compute_params * params,
|
14046
|
+
const struct ggml_tensor * src0,
|
14047
|
+
struct ggml_tensor * dst) {
|
14048
|
+
const enum ggml_unary_op op = ggml_get_unary_op(dst);
|
14049
|
+
|
14050
|
+
switch (op) {
|
14051
|
+
case GGML_UNARY_OP_ABS:
|
14052
|
+
{
|
14053
|
+
ggml_compute_forward_abs(params, src0, dst);
|
14054
|
+
} break;
|
14055
|
+
case GGML_UNARY_OP_SGN:
|
14056
|
+
{
|
14057
|
+
ggml_compute_forward_sgn(params, src0, dst);
|
14058
|
+
} break;
|
14059
|
+
case GGML_UNARY_OP_NEG:
|
14060
|
+
{
|
14061
|
+
ggml_compute_forward_neg(params, src0, dst);
|
14062
|
+
} break;
|
14063
|
+
case GGML_UNARY_OP_STEP:
|
14064
|
+
{
|
14065
|
+
ggml_compute_forward_step(params, src0, dst);
|
14066
|
+
} break;
|
14067
|
+
case GGML_UNARY_OP_TANH:
|
14068
|
+
{
|
14069
|
+
ggml_compute_forward_tanh(params, src0, dst);
|
14070
|
+
} break;
|
14071
|
+
case GGML_UNARY_OP_ELU:
|
14072
|
+
{
|
14073
|
+
ggml_compute_forward_elu(params, src0, dst);
|
14074
|
+
} break;
|
14075
|
+
case GGML_UNARY_OP_RELU:
|
14076
|
+
{
|
14077
|
+
ggml_compute_forward_relu(params, src0, dst);
|
14078
|
+
} break;
|
14079
|
+
case GGML_UNARY_OP_GELU:
|
14080
|
+
{
|
14081
|
+
ggml_compute_forward_gelu(params, src0, dst);
|
14082
|
+
} break;
|
14083
|
+
case GGML_UNARY_OP_GELU_QUICK:
|
14084
|
+
{
|
14085
|
+
ggml_compute_forward_gelu_quick(params, src0, dst);
|
14086
|
+
} break;
|
14087
|
+
case GGML_UNARY_OP_SILU:
|
14088
|
+
{
|
14089
|
+
ggml_compute_forward_silu(params, src0, dst);
|
14373
14090
|
} break;
|
14374
14091
|
default:
|
14375
14092
|
{
|
@@ -14888,7 +14605,7 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm
|
|
14888
14605
|
} break;
|
14889
14606
|
case GGML_OP_ACC:
|
14890
14607
|
{
|
14891
|
-
ggml_compute_forward_acc(params, tensor->src[0], tensor->src[1], tensor
|
14608
|
+
ggml_compute_forward_acc(params, tensor->src[0], tensor->src[1], tensor);
|
14892
14609
|
} break;
|
14893
14610
|
case GGML_OP_SUB:
|
14894
14611
|
{
|
@@ -14938,46 +14655,6 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm
|
|
14938
14655
|
{
|
14939
14656
|
ggml_compute_forward_repeat_back(params, tensor->src[0], tensor);
|
14940
14657
|
} break;
|
14941
|
-
case GGML_OP_ABS:
|
14942
|
-
{
|
14943
|
-
ggml_compute_forward_abs(params, tensor->src[0], tensor);
|
14944
|
-
} break;
|
14945
|
-
case GGML_OP_SGN:
|
14946
|
-
{
|
14947
|
-
ggml_compute_forward_sgn(params, tensor->src[0], tensor);
|
14948
|
-
} break;
|
14949
|
-
case GGML_OP_NEG:
|
14950
|
-
{
|
14951
|
-
ggml_compute_forward_neg(params, tensor->src[0], tensor);
|
14952
|
-
} break;
|
14953
|
-
case GGML_OP_STEP:
|
14954
|
-
{
|
14955
|
-
ggml_compute_forward_step(params, tensor->src[0], tensor);
|
14956
|
-
} break;
|
14957
|
-
case GGML_OP_TANH:
|
14958
|
-
{
|
14959
|
-
ggml_compute_forward_tanh(params, tensor->src[0], tensor);
|
14960
|
-
} break;
|
14961
|
-
case GGML_OP_ELU:
|
14962
|
-
{
|
14963
|
-
ggml_compute_forward_elu(params, tensor->src[0], tensor);
|
14964
|
-
} break;
|
14965
|
-
case GGML_OP_RELU:
|
14966
|
-
{
|
14967
|
-
ggml_compute_forward_relu(params, tensor->src[0], tensor);
|
14968
|
-
} break;
|
14969
|
-
case GGML_OP_GELU:
|
14970
|
-
{
|
14971
|
-
ggml_compute_forward_gelu(params, tensor->src[0], tensor);
|
14972
|
-
} break;
|
14973
|
-
case GGML_OP_GELU_QUICK:
|
14974
|
-
{
|
14975
|
-
ggml_compute_forward_gelu_quick(params, tensor->src[0], tensor);
|
14976
|
-
} break;
|
14977
|
-
case GGML_OP_SILU:
|
14978
|
-
{
|
14979
|
-
ggml_compute_forward_silu(params, tensor->src[0], tensor);
|
14980
|
-
} break;
|
14981
14658
|
case GGML_OP_SILU_BACK:
|
14982
14659
|
{
|
14983
14660
|
ggml_compute_forward_silu_back(params, tensor->src[0], tensor->src[1], tensor);
|
@@ -15008,7 +14685,7 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm
|
|
15008
14685
|
} break;
|
15009
14686
|
case GGML_OP_SET:
|
15010
14687
|
{
|
15011
|
-
ggml_compute_forward_set(params, tensor->src[0], tensor->src[1], tensor
|
14688
|
+
ggml_compute_forward_set(params, tensor->src[0], tensor->src[1], tensor);
|
15012
14689
|
} break;
|
15013
14690
|
case GGML_OP_CPY:
|
15014
14691
|
{
|
@@ -15048,11 +14725,11 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm
|
|
15048
14725
|
} break;
|
15049
14726
|
case GGML_OP_DIAG_MASK_INF:
|
15050
14727
|
{
|
15051
|
-
ggml_compute_forward_diag_mask_inf(params, tensor->src[0], tensor
|
14728
|
+
ggml_compute_forward_diag_mask_inf(params, tensor->src[0], tensor);
|
15052
14729
|
} break;
|
15053
14730
|
case GGML_OP_DIAG_MASK_ZERO:
|
15054
14731
|
{
|
15055
|
-
ggml_compute_forward_diag_mask_zero(params, tensor->src[0], tensor
|
14732
|
+
ggml_compute_forward_diag_mask_zero(params, tensor->src[0], tensor);
|
15056
14733
|
} break;
|
15057
14734
|
case GGML_OP_SOFT_MAX:
|
15058
14735
|
{
|
@@ -15064,39 +14741,39 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm
|
|
15064
14741
|
} break;
|
15065
14742
|
case GGML_OP_ROPE:
|
15066
14743
|
{
|
15067
|
-
ggml_compute_forward_rope(params, tensor->src[0], tensor
|
14744
|
+
ggml_compute_forward_rope(params, tensor->src[0], tensor);
|
15068
14745
|
} break;
|
15069
14746
|
case GGML_OP_ROPE_BACK:
|
15070
14747
|
{
|
15071
|
-
ggml_compute_forward_rope_back(params, tensor->src[0], tensor
|
14748
|
+
ggml_compute_forward_rope_back(params, tensor->src[0], tensor);
|
15072
14749
|
} break;
|
15073
14750
|
case GGML_OP_ALIBI:
|
15074
14751
|
{
|
15075
|
-
ggml_compute_forward_alibi(params, tensor->src[0], tensor
|
14752
|
+
ggml_compute_forward_alibi(params, tensor->src[0], tensor);
|
15076
14753
|
} break;
|
15077
14754
|
case GGML_OP_CLAMP:
|
15078
14755
|
{
|
15079
|
-
ggml_compute_forward_clamp(params, tensor->src[0], tensor
|
14756
|
+
ggml_compute_forward_clamp(params, tensor->src[0], tensor);
|
15080
14757
|
} break;
|
15081
14758
|
case GGML_OP_CONV_1D:
|
15082
14759
|
{
|
15083
|
-
ggml_compute_forward_conv_1d(params, tensor->src[0], tensor->src[1], tensor
|
14760
|
+
ggml_compute_forward_conv_1d(params, tensor->src[0], tensor->src[1], tensor);
|
15084
14761
|
} break;
|
15085
14762
|
case GGML_OP_CONV_2D:
|
15086
14763
|
{
|
15087
|
-
ggml_compute_forward_conv_2d(params, tensor->src[0], tensor->src[1], tensor
|
14764
|
+
ggml_compute_forward_conv_2d(params, tensor->src[0], tensor->src[1], tensor);
|
15088
14765
|
} break;
|
15089
14766
|
case GGML_OP_POOL_1D:
|
15090
14767
|
{
|
15091
|
-
ggml_compute_forward_pool_1d(params, tensor->src[0], tensor
|
14768
|
+
ggml_compute_forward_pool_1d(params, tensor->src[0], tensor);
|
15092
14769
|
} break;
|
15093
14770
|
case GGML_OP_POOL_2D:
|
15094
14771
|
{
|
15095
|
-
ggml_compute_forward_pool_2d(params, tensor->src[0], tensor
|
14772
|
+
ggml_compute_forward_pool_2d(params, tensor->src[0], tensor);
|
15096
14773
|
} break;
|
15097
14774
|
case GGML_OP_FLASH_ATTN:
|
15098
14775
|
{
|
15099
|
-
const int32_t t =
|
14776
|
+
const int32_t t = ggml_get_op_params_i32(tensor, 0);
|
15100
14777
|
GGML_ASSERT(t == 0 || t == 1);
|
15101
14778
|
const bool masked = t != 0;
|
15102
14779
|
ggml_compute_forward_flash_attn(params, tensor->src[0], tensor->src[1], tensor->src[2], masked, tensor);
|
@@ -15107,47 +14784,56 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm
|
|
15107
14784
|
} break;
|
15108
14785
|
case GGML_OP_FLASH_ATTN_BACK:
|
15109
14786
|
{
|
15110
|
-
int32_t t =
|
14787
|
+
int32_t t = ggml_get_op_params_i32(tensor, 0);
|
15111
14788
|
GGML_ASSERT(t == 0 || t == 1);
|
15112
14789
|
bool masked = t != 0;
|
15113
14790
|
ggml_compute_forward_flash_attn_back(params, tensor->src[0], tensor->src[1], tensor->src[2], tensor->src[3], masked, tensor);
|
15114
14791
|
} break;
|
15115
14792
|
case GGML_OP_WIN_PART:
|
15116
14793
|
{
|
15117
|
-
ggml_compute_forward_win_part(params, tensor->src[0], tensor
|
14794
|
+
ggml_compute_forward_win_part(params, tensor->src[0], tensor);
|
15118
14795
|
} break;
|
15119
14796
|
case GGML_OP_WIN_UNPART:
|
15120
14797
|
{
|
15121
|
-
ggml_compute_forward_win_unpart(params, tensor->src[0], tensor
|
14798
|
+
ggml_compute_forward_win_unpart(params, tensor->src[0], tensor);
|
14799
|
+
} break;
|
14800
|
+
case GGML_OP_UNARY:
|
14801
|
+
{
|
14802
|
+
ggml_compute_forward_unary(params, tensor->src[0], tensor);
|
15122
14803
|
} break;
|
15123
14804
|
case GGML_OP_MAP_UNARY:
|
15124
14805
|
{
|
15125
|
-
|
14806
|
+
ggml_unary_op_f32_t fun;
|
14807
|
+
memcpy(&fun, tensor->op_params, sizeof(fun));
|
15126
14808
|
ggml_compute_forward_map_unary(params, tensor->src[0], tensor, fun);
|
15127
14809
|
}
|
15128
14810
|
break;
|
15129
14811
|
case GGML_OP_MAP_BINARY:
|
15130
14812
|
{
|
15131
|
-
|
14813
|
+
ggml_binary_op_f32_t fun;
|
14814
|
+
memcpy(&fun, tensor->op_params, sizeof(fun));
|
15132
14815
|
ggml_compute_forward_map_binary(params, tensor->src[0], tensor->src[1], tensor, fun);
|
15133
14816
|
}
|
15134
14817
|
break;
|
15135
14818
|
case GGML_OP_MAP_CUSTOM1:
|
15136
14819
|
{
|
15137
|
-
|
14820
|
+
ggml_custom1_op_f32_t fun;
|
14821
|
+
memcpy(&fun, tensor->op_params, sizeof(fun));
|
15138
14822
|
ggml_compute_forward_map_custom1(params, tensor->src[0], tensor, fun);
|
15139
14823
|
}
|
15140
14824
|
break;
|
15141
14825
|
case GGML_OP_MAP_CUSTOM2:
|
15142
14826
|
{
|
15143
|
-
|
14827
|
+
ggml_custom2_op_f32_t fun;
|
14828
|
+
memcpy(&fun, tensor->op_params, sizeof(fun));
|
15144
14829
|
ggml_compute_forward_map_custom2(params, tensor->src[0], tensor->src[1], tensor, fun);
|
15145
14830
|
}
|
15146
14831
|
break;
|
15147
14832
|
case GGML_OP_MAP_CUSTOM3:
|
15148
14833
|
{
|
15149
|
-
|
15150
|
-
|
14834
|
+
ggml_custom3_op_f32_t fun;
|
14835
|
+
memcpy(&fun, tensor->op_params, sizeof(fun));
|
14836
|
+
ggml_compute_forward_map_custom3(params, tensor->src[0], tensor->src[1], tensor->src[2], tensor, fun);
|
15151
14837
|
}
|
15152
14838
|
break;
|
15153
14839
|
case GGML_OP_CROSS_ENTROPY_LOSS:
|
@@ -15211,12 +14897,10 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor
|
|
15211
14897
|
src0->grad = ggml_add_impl(ctx, src0->grad, tensor->grad, inplace);
|
15212
14898
|
}
|
15213
14899
|
if (src1->grad) {
|
15214
|
-
|
15215
|
-
|
15216
|
-
const size_t
|
15217
|
-
const size_t
|
15218
|
-
const size_t nb3 = (( int32_t * ) tensor->src[2]->data)[2];
|
15219
|
-
const size_t offset = (( int32_t * ) tensor->src[2]->data)[3];
|
14900
|
+
const size_t nb1 = ((int32_t *) tensor->op_params)[0];
|
14901
|
+
const size_t nb2 = ((int32_t *) tensor->op_params)[1];
|
14902
|
+
const size_t nb3 = ((int32_t *) tensor->op_params)[2];
|
14903
|
+
const size_t offset = ((int32_t *) tensor->op_params)[3];
|
15220
14904
|
|
15221
14905
|
struct ggml_tensor * tensor_grad_view = ggml_view_4d(ctx,
|
15222
14906
|
tensor->grad,
|
@@ -15365,73 +15049,6 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor
|
|
15365
15049
|
inplace);
|
15366
15050
|
}
|
15367
15051
|
} break;
|
15368
|
-
case GGML_OP_ABS:
|
15369
|
-
{
|
15370
|
-
if (src0->grad) {
|
15371
|
-
src0->grad =
|
15372
|
-
ggml_add_impl(ctx,
|
15373
|
-
src0->grad,
|
15374
|
-
ggml_mul(ctx,
|
15375
|
-
ggml_sgn(ctx, src0),
|
15376
|
-
tensor->grad),
|
15377
|
-
inplace);
|
15378
|
-
}
|
15379
|
-
} break;
|
15380
|
-
case GGML_OP_SGN:
|
15381
|
-
{
|
15382
|
-
if (src0->grad) {
|
15383
|
-
// noop
|
15384
|
-
}
|
15385
|
-
} break;
|
15386
|
-
case GGML_OP_NEG:
|
15387
|
-
{
|
15388
|
-
if (src0->grad) {
|
15389
|
-
src0->grad = ggml_sub_impl(ctx, src0->grad, tensor->grad, inplace);
|
15390
|
-
}
|
15391
|
-
} break;
|
15392
|
-
case GGML_OP_STEP:
|
15393
|
-
{
|
15394
|
-
if (src0->grad) {
|
15395
|
-
// noop
|
15396
|
-
}
|
15397
|
-
} break;
|
15398
|
-
case GGML_OP_TANH:
|
15399
|
-
{
|
15400
|
-
GGML_ASSERT(false); // TODO: not implemented
|
15401
|
-
} break;
|
15402
|
-
case GGML_OP_ELU:
|
15403
|
-
{
|
15404
|
-
GGML_ASSERT(false); // TODO: not implemented
|
15405
|
-
} break;
|
15406
|
-
case GGML_OP_RELU:
|
15407
|
-
{
|
15408
|
-
if (src0->grad) {
|
15409
|
-
src0->grad = ggml_sub_impl(ctx,
|
15410
|
-
src0->grad,
|
15411
|
-
ggml_mul(ctx,
|
15412
|
-
ggml_step(ctx, src0),
|
15413
|
-
tensor->grad),
|
15414
|
-
inplace);
|
15415
|
-
}
|
15416
|
-
} break;
|
15417
|
-
case GGML_OP_GELU:
|
15418
|
-
{
|
15419
|
-
GGML_ASSERT(false); // TODO: not implemented
|
15420
|
-
} break;
|
15421
|
-
case GGML_OP_GELU_QUICK:
|
15422
|
-
{
|
15423
|
-
GGML_ASSERT(false); // TODO: not implemented
|
15424
|
-
} break;
|
15425
|
-
case GGML_OP_SILU:
|
15426
|
-
{
|
15427
|
-
// necessary for llama
|
15428
|
-
if (src0->grad) {
|
15429
|
-
src0->grad = ggml_add_impl(ctx,
|
15430
|
-
src0->grad,
|
15431
|
-
ggml_silu_back(ctx, src0, tensor->grad),
|
15432
|
-
inplace);
|
15433
|
-
}
|
15434
|
-
} break;
|
15435
15052
|
case GGML_OP_SILU_BACK:
|
15436
15053
|
{
|
15437
15054
|
GGML_ASSERT(false); // TODO: not implemented
|
@@ -15524,12 +15141,10 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor
|
|
15524
15141
|
} break;
|
15525
15142
|
case GGML_OP_SET:
|
15526
15143
|
{
|
15527
|
-
|
15528
|
-
|
15529
|
-
const size_t
|
15530
|
-
const size_t
|
15531
|
-
const size_t nb3 = (( int32_t * ) tensor->src[2]->data)[2];
|
15532
|
-
const size_t offset = (( int32_t * ) tensor->src[2]->data)[3];
|
15144
|
+
const size_t nb1 = ((int32_t *) tensor->op_params)[0];
|
15145
|
+
const size_t nb2 = ((int32_t *) tensor->op_params)[1];
|
15146
|
+
const size_t nb3 = ((int32_t *) tensor->op_params)[2];
|
15147
|
+
const size_t offset = ((int32_t *) tensor->op_params)[3];
|
15533
15148
|
|
15534
15149
|
struct ggml_tensor * tensor_grad_view = NULL;
|
15535
15150
|
|
@@ -15606,8 +15221,7 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor
|
|
15606
15221
|
if (src0->grad) {
|
15607
15222
|
size_t offset;
|
15608
15223
|
|
15609
|
-
|
15610
|
-
memcpy(&offset, tensor->src[2]->data, sizeof(offset));
|
15224
|
+
memcpy(&offset, tensor->op_params, sizeof(offset));
|
15611
15225
|
|
15612
15226
|
size_t nb1 = tensor->nb[1];
|
15613
15227
|
size_t nb2 = tensor->nb[2];
|
@@ -15634,7 +15248,7 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor
|
|
15634
15248
|
{
|
15635
15249
|
// necessary for llama
|
15636
15250
|
if (src0->grad) {
|
15637
|
-
int32_t * axes = (int32_t *) tensor->
|
15251
|
+
int32_t * axes = (int32_t *) tensor->op_params;
|
15638
15252
|
int axis0 = axes[0] & 0x3;
|
15639
15253
|
int axis1 = axes[1] & 0x3;
|
15640
15254
|
int axis2 = axes[2] & 0x3;
|
@@ -15690,33 +15304,23 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor
|
|
15690
15304
|
{
|
15691
15305
|
// necessary for llama
|
15692
15306
|
if (src0->grad) {
|
15693
|
-
|
15694
|
-
assert(ggml_nelements(src1) == 2);
|
15695
|
-
const int n_past = ((int32_t *) src1->data)[0];
|
15307
|
+
const int n_past = ((int32_t *) tensor->op_params)[0];
|
15696
15308
|
src0->grad =
|
15697
15309
|
ggml_add_impl(ctx, src0->grad,
|
15698
15310
|
ggml_diag_mask_zero_impl(ctx, tensor->grad, n_past, false),
|
15699
15311
|
inplace);
|
15700
15312
|
}
|
15701
|
-
if (src1->grad) {
|
15702
|
-
// noop
|
15703
|
-
}
|
15704
15313
|
} break;
|
15705
15314
|
case GGML_OP_DIAG_MASK_ZERO:
|
15706
15315
|
{
|
15707
15316
|
// necessary for llama
|
15708
15317
|
if (src0->grad) {
|
15709
|
-
|
15710
|
-
assert(ggml_nelements(src1) == 2);
|
15711
|
-
const int n_past = ((int32_t *) src1->data)[0];
|
15318
|
+
const int n_past = ((int32_t *) tensor->op_params)[0];
|
15712
15319
|
src0->grad =
|
15713
15320
|
ggml_add_impl(ctx, src0->grad,
|
15714
15321
|
ggml_diag_mask_zero_impl(ctx, tensor->grad, n_past, false),
|
15715
15322
|
inplace);
|
15716
15323
|
}
|
15717
|
-
if (src1->grad) {
|
15718
|
-
// noop
|
15719
|
-
}
|
15720
15324
|
} break;
|
15721
15325
|
case GGML_OP_SOFT_MAX:
|
15722
15326
|
{
|
@@ -15737,12 +15341,10 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor
|
|
15737
15341
|
{
|
15738
15342
|
// necessary for llama
|
15739
15343
|
if (src0->grad) {
|
15740
|
-
|
15741
|
-
|
15742
|
-
const int
|
15743
|
-
const int
|
15744
|
-
const int mode = ((int32_t *) src1->data)[2];
|
15745
|
-
const int n_ctx = ((int32_t *) src1->data)[3];
|
15344
|
+
const int n_past = ((int32_t *) tensor->op_params)[0];
|
15345
|
+
const int n_dims = ((int32_t *) tensor->op_params)[1];
|
15346
|
+
const int mode = ((int32_t *) tensor->op_params)[2];
|
15347
|
+
const int n_ctx = ((int32_t *) tensor->op_params)[3];
|
15746
15348
|
src0->grad = ggml_add_impl(ctx,
|
15747
15349
|
src0->grad,
|
15748
15350
|
ggml_rope_back(ctx,
|
@@ -15753,19 +15355,14 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor
|
|
15753
15355
|
n_ctx),
|
15754
15356
|
inplace);
|
15755
15357
|
}
|
15756
|
-
if (src1->grad) {
|
15757
|
-
// noop
|
15758
|
-
}
|
15759
15358
|
} break;
|
15760
15359
|
case GGML_OP_ROPE_BACK:
|
15761
15360
|
{
|
15762
15361
|
if (src0->grad) {
|
15763
|
-
|
15764
|
-
|
15765
|
-
const int
|
15766
|
-
const int
|
15767
|
-
const int mode = ((int32_t *) src1->data)[2];
|
15768
|
-
const int n_ctx = ((int32_t *) src1->data)[3];
|
15362
|
+
const int n_past = ((int32_t *) tensor->op_params)[0];
|
15363
|
+
const int n_dims = ((int32_t *) tensor->op_params)[1];
|
15364
|
+
const int mode = ((int32_t *) tensor->op_params)[2];
|
15365
|
+
const int n_ctx = ((int32_t *) tensor->op_params)[3];
|
15769
15366
|
src0->grad = ggml_add_impl(ctx,
|
15770
15367
|
src0->grad,
|
15771
15368
|
ggml_rope(ctx,
|
@@ -15776,9 +15373,6 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor
|
|
15776
15373
|
n_ctx),
|
15777
15374
|
inplace);
|
15778
15375
|
}
|
15779
|
-
if (src1->grad) {
|
15780
|
-
// noop
|
15781
|
-
}
|
15782
15376
|
} break;
|
15783
15377
|
case GGML_OP_ALIBI:
|
15784
15378
|
{
|
@@ -15808,7 +15402,7 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor
|
|
15808
15402
|
{
|
15809
15403
|
struct ggml_tensor * flash_grad = NULL;
|
15810
15404
|
if (src0->grad || src1->grad || tensor->src[2]->grad) {
|
15811
|
-
int32_t t =
|
15405
|
+
int32_t t = ggml_get_op_params_i32(tensor, 0);
|
15812
15406
|
GGML_ASSERT(t == 0 || t == 1);
|
15813
15407
|
bool masked = t != 0;
|
15814
15408
|
flash_grad =
|
@@ -15971,6 +15565,80 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor
|
|
15971
15565
|
} break;
|
15972
15566
|
case GGML_OP_WIN_PART:
|
15973
15567
|
case GGML_OP_WIN_UNPART:
|
15568
|
+
case GGML_OP_UNARY:
|
15569
|
+
{
|
15570
|
+
switch (ggml_get_unary_op(tensor)) {
|
15571
|
+
case GGML_UNARY_OP_ABS:
|
15572
|
+
{
|
15573
|
+
if (src0->grad) {
|
15574
|
+
src0->grad =
|
15575
|
+
ggml_add_impl(ctx,
|
15576
|
+
src0->grad,
|
15577
|
+
ggml_mul(ctx,
|
15578
|
+
ggml_sgn(ctx, src0),
|
15579
|
+
tensor->grad),
|
15580
|
+
inplace);
|
15581
|
+
}
|
15582
|
+
} break;
|
15583
|
+
case GGML_UNARY_OP_SGN:
|
15584
|
+
{
|
15585
|
+
if (src0->grad) {
|
15586
|
+
// noop
|
15587
|
+
}
|
15588
|
+
} break;
|
15589
|
+
case GGML_UNARY_OP_NEG:
|
15590
|
+
{
|
15591
|
+
if (src0->grad) {
|
15592
|
+
src0->grad = ggml_sub_impl(ctx, src0->grad, tensor->grad, inplace);
|
15593
|
+
}
|
15594
|
+
} break;
|
15595
|
+
case GGML_UNARY_OP_STEP:
|
15596
|
+
{
|
15597
|
+
if (src0->grad) {
|
15598
|
+
// noop
|
15599
|
+
}
|
15600
|
+
} break;
|
15601
|
+
case GGML_UNARY_OP_TANH:
|
15602
|
+
{
|
15603
|
+
GGML_ASSERT(false); // TODO: not implemented
|
15604
|
+
} break;
|
15605
|
+
case GGML_UNARY_OP_ELU:
|
15606
|
+
{
|
15607
|
+
GGML_ASSERT(false); // TODO: not implemented
|
15608
|
+
} break;
|
15609
|
+
case GGML_UNARY_OP_RELU:
|
15610
|
+
{
|
15611
|
+
if (src0->grad) {
|
15612
|
+
src0->grad = ggml_add_impl(ctx,
|
15613
|
+
src0->grad,
|
15614
|
+
ggml_mul(ctx,
|
15615
|
+
ggml_step(ctx, src0),
|
15616
|
+
tensor->grad),
|
15617
|
+
inplace);
|
15618
|
+
}
|
15619
|
+
} break;
|
15620
|
+
case GGML_UNARY_OP_GELU:
|
15621
|
+
{
|
15622
|
+
GGML_ASSERT(false); // TODO: not implemented
|
15623
|
+
} break;
|
15624
|
+
case GGML_UNARY_OP_GELU_QUICK:
|
15625
|
+
{
|
15626
|
+
GGML_ASSERT(false); // TODO: not implemented
|
15627
|
+
} break;
|
15628
|
+
case GGML_UNARY_OP_SILU:
|
15629
|
+
{
|
15630
|
+
// necessary for llama
|
15631
|
+
if (src0->grad) {
|
15632
|
+
src0->grad = ggml_add_impl(ctx,
|
15633
|
+
src0->grad,
|
15634
|
+
ggml_silu_back(ctx, src0, tensor->grad),
|
15635
|
+
inplace);
|
15636
|
+
}
|
15637
|
+
} break;
|
15638
|
+
default:
|
15639
|
+
GGML_ASSERT(false);
|
15640
|
+
}
|
15641
|
+
} break;
|
15974
15642
|
case GGML_OP_MAP_UNARY:
|
15975
15643
|
case GGML_OP_MAP_BINARY:
|
15976
15644
|
case GGML_OP_MAP_CUSTOM1:
|
@@ -16006,6 +15674,34 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor
|
|
16006
15674
|
}
|
16007
15675
|
}
|
16008
15676
|
|
15677
|
+
static_assert(GGML_GRAPH_HASHTABLE_SIZE > GGML_MAX_NODES * 2, "GGML_GRAPH_HT_SIZE is too small");
|
15678
|
+
|
15679
|
+
static size_t hash(void * p) {
|
15680
|
+
return (size_t)p % GGML_GRAPH_HASHTABLE_SIZE;
|
15681
|
+
}
|
15682
|
+
|
15683
|
+
static bool hash_insert(void * hash_table[], void * p) {
|
15684
|
+
size_t h = hash(p);
|
15685
|
+
|
15686
|
+
// linear probing
|
15687
|
+
size_t i = h;
|
15688
|
+
while (hash_table[i] != NULL && hash_table[i] != p) {
|
15689
|
+
i = (i + 1) % GGML_GRAPH_HASHTABLE_SIZE;
|
15690
|
+
if (i == h) {
|
15691
|
+
// hash table is full
|
15692
|
+
GGML_ASSERT(false);
|
15693
|
+
}
|
15694
|
+
}
|
15695
|
+
|
15696
|
+
if (hash_table[i] == p) {
|
15697
|
+
return true;
|
15698
|
+
}
|
15699
|
+
|
15700
|
+
// insert
|
15701
|
+
hash_table[i] = p;
|
15702
|
+
return false;
|
15703
|
+
}
|
15704
|
+
|
16009
15705
|
static void ggml_visit_parents(struct ggml_cgraph * cgraph, struct ggml_tensor * node) {
|
16010
15706
|
if (node->grad == NULL) {
|
16011
15707
|
// this usually happens when we generate intermediate nodes from constants in the backward pass
|
@@ -16016,16 +15712,8 @@ static void ggml_visit_parents(struct ggml_cgraph * cgraph, struct ggml_tensor *
|
|
16016
15712
|
}
|
16017
15713
|
|
16018
15714
|
// check if already visited
|
16019
|
-
|
16020
|
-
|
16021
|
-
return;
|
16022
|
-
}
|
16023
|
-
}
|
16024
|
-
|
16025
|
-
for (int i = 0; i < cgraph->n_leafs; i++) {
|
16026
|
-
if (cgraph->leafs[i] == node) {
|
16027
|
-
return;
|
16028
|
-
}
|
15715
|
+
if (hash_insert(cgraph->visited_hash_table, node)) {
|
15716
|
+
return;
|
16029
15717
|
}
|
16030
15718
|
|
16031
15719
|
for (int i = 0; i < GGML_MAX_SRC; ++i) {
|
@@ -16088,6 +15776,7 @@ struct ggml_cgraph ggml_build_forward(struct ggml_tensor * tensor) {
|
|
16088
15776
|
/*.nodes =*/ { NULL },
|
16089
15777
|
/*.grads =*/ { NULL },
|
16090
15778
|
/*.leafs =*/ { NULL },
|
15779
|
+
/*.hash_table =*/ { NULL },
|
16091
15780
|
/*.perf_runs =*/ 0,
|
16092
15781
|
/*.perf_cycles =*/ 0,
|
16093
15782
|
/*.perf_time_us =*/ 0,
|
@@ -16129,13 +15818,42 @@ struct ggml_cgraph ggml_build_backward(struct ggml_context * ctx, struct ggml_cg
|
|
16129
15818
|
|
16130
15819
|
if (node->is_param) {
|
16131
15820
|
GGML_PRINT_DEBUG("%s: found root node %p\n", __func__, (void *) node);
|
16132
|
-
|
15821
|
+
ggml_build_forward_expand(&result, node->grad);
|
16133
15822
|
}
|
16134
15823
|
}
|
16135
15824
|
|
16136
15825
|
return result;
|
16137
15826
|
}
|
16138
15827
|
|
15828
|
+
struct ggml_cgraph * ggml_new_graph(struct ggml_context * ctx) {
|
15829
|
+
struct ggml_object * obj = ggml_new_object(ctx, GGML_OBJECT_GRAPH, GGML_GRAPH_SIZE);
|
15830
|
+
struct ggml_cgraph * cgraph = (struct ggml_cgraph *) ((char *) ctx->mem_buffer + obj->offs);
|
15831
|
+
|
15832
|
+
*cgraph = (struct ggml_cgraph) {
|
15833
|
+
/*.n_nodes =*/ 0,
|
15834
|
+
/*.n_leafs =*/ 0,
|
15835
|
+
/*.nodes =*/ { NULL },
|
15836
|
+
/*.grads =*/ { NULL },
|
15837
|
+
/*.leafs =*/ { NULL },
|
15838
|
+
/*.hash_table =*/ { NULL },
|
15839
|
+
/*.perf_runs =*/ 0,
|
15840
|
+
/*.perf_cycles =*/ 0,
|
15841
|
+
/*.perf_time_us =*/ 0,
|
15842
|
+
};
|
15843
|
+
|
15844
|
+
return cgraph;
|
15845
|
+
}
|
15846
|
+
|
15847
|
+
struct ggml_cgraph * ggml_build_forward_ctx(struct ggml_context * ctx, struct ggml_tensor * tensor) {
|
15848
|
+
struct ggml_cgraph * cgraph = ggml_new_graph(ctx);
|
15849
|
+
ggml_build_forward_impl(cgraph, tensor, false);
|
15850
|
+
return cgraph;
|
15851
|
+
}
|
15852
|
+
|
15853
|
+
size_t ggml_graph_overhead(void) {
|
15854
|
+
return GGML_OBJECT_SIZE + GGML_PAD(GGML_GRAPH_SIZE, GGML_MEM_ALIGN);
|
15855
|
+
}
|
15856
|
+
|
16139
15857
|
//
|
16140
15858
|
// thread data
|
16141
15859
|
//
|
@@ -16201,7 +15919,7 @@ typedef pthread_t ggml_thread_t;
|
|
16201
15919
|
|
16202
15920
|
// Android's libc implementation "bionic" does not support setting affinity
|
16203
15921
|
#if defined(__linux__) && !defined(__BIONIC__)
|
16204
|
-
void set_numa_thread_affinity(int thread_n, int n_threads) {
|
15922
|
+
static void set_numa_thread_affinity(int thread_n, int n_threads) {
|
16205
15923
|
if (!ggml_is_numa()) {
|
16206
15924
|
return;
|
16207
15925
|
}
|
@@ -16226,7 +15944,7 @@ void set_numa_thread_affinity(int thread_n, int n_threads) {
|
|
16226
15944
|
CPU_FREE(cpus);
|
16227
15945
|
}
|
16228
15946
|
|
16229
|
-
void clear_numa_thread_affinity(void) {
|
15947
|
+
static void clear_numa_thread_affinity(void) {
|
16230
15948
|
if (!ggml_is_numa()) {
|
16231
15949
|
return;
|
16232
15950
|
}
|
@@ -16250,8 +15968,8 @@ void clear_numa_thread_affinity(void) {
|
|
16250
15968
|
#else
|
16251
15969
|
// TODO: Windows etc.
|
16252
15970
|
// (the linux implementation may also work on BSD, someone should test)
|
16253
|
-
void set_numa_thread_affinity(int thread_n, int n_threads) { UNUSED(thread_n); UNUSED(n_threads); }
|
16254
|
-
void clear_numa_thread_affinity(void) {}
|
15971
|
+
static void set_numa_thread_affinity(int thread_n, int n_threads) { UNUSED(thread_n); UNUSED(n_threads); }
|
15972
|
+
static void clear_numa_thread_affinity(void) {}
|
16255
15973
|
#endif
|
16256
15974
|
|
16257
15975
|
struct ggml_compute_state_shared {
|
@@ -16463,21 +16181,34 @@ struct ggml_cplan ggml_graph_plan(struct ggml_cgraph * cgraph, int n_threads) {
|
|
16463
16181
|
case GGML_OP_ARGMAX:
|
16464
16182
|
case GGML_OP_REPEAT:
|
16465
16183
|
case GGML_OP_REPEAT_BACK:
|
16466
|
-
|
16467
|
-
case GGML_OP_SGN:
|
16468
|
-
case GGML_OP_NEG:
|
16469
|
-
case GGML_OP_STEP:
|
16470
|
-
case GGML_OP_TANH:
|
16471
|
-
case GGML_OP_ELU:
|
16472
|
-
case GGML_OP_RELU:
|
16473
|
-
{
|
16184
|
+
{
|
16474
16185
|
n_tasks = 1;
|
16475
16186
|
} break;
|
16476
|
-
|
16477
|
-
case
|
16478
|
-
|
16479
|
-
|
16187
|
+
|
16188
|
+
case GGML_OP_UNARY:
|
16189
|
+
{
|
16190
|
+
switch (ggml_get_unary_op(node)) {
|
16191
|
+
case GGML_UNARY_OP_ABS:
|
16192
|
+
case GGML_UNARY_OP_SGN:
|
16193
|
+
case GGML_UNARY_OP_NEG:
|
16194
|
+
case GGML_UNARY_OP_STEP:
|
16195
|
+
case GGML_UNARY_OP_TANH:
|
16196
|
+
case GGML_UNARY_OP_ELU:
|
16197
|
+
case GGML_UNARY_OP_RELU:
|
16198
|
+
{
|
16199
|
+
n_tasks = 1;
|
16200
|
+
} break;
|
16201
|
+
|
16202
|
+
case GGML_UNARY_OP_GELU:
|
16203
|
+
case GGML_UNARY_OP_GELU_QUICK:
|
16204
|
+
case GGML_UNARY_OP_SILU:
|
16205
|
+
{
|
16206
|
+
n_tasks = n_threads;
|
16207
|
+
} break;
|
16208
|
+
}
|
16209
|
+
} break;
|
16480
16210
|
case GGML_OP_SILU_BACK:
|
16211
|
+
case GGML_OP_MUL:
|
16481
16212
|
case GGML_OP_NORM:
|
16482
16213
|
case GGML_OP_RMS_NORM:
|
16483
16214
|
case GGML_OP_RMS_NORM_BACK:
|
@@ -16542,10 +16273,10 @@ struct ggml_cplan ggml_graph_plan(struct ggml_cgraph * cgraph, int n_threads) {
|
|
16542
16273
|
case GGML_OP_GET_ROWS:
|
16543
16274
|
case GGML_OP_GET_ROWS_BACK:
|
16544
16275
|
case GGML_OP_DIAG:
|
16545
|
-
case GGML_OP_DIAG_MASK_ZERO:
|
16546
16276
|
{
|
16547
16277
|
n_tasks = 1;
|
16548
16278
|
} break;
|
16279
|
+
case GGML_OP_DIAG_MASK_ZERO:
|
16549
16280
|
case GGML_OP_DIAG_MASK_INF:
|
16550
16281
|
case GGML_OP_SOFT_MAX:
|
16551
16282
|
case GGML_OP_SOFT_MAX_BACK:
|
@@ -16838,10 +16569,9 @@ void ggml_graph_reset(struct ggml_cgraph * cgraph) {
|
|
16838
16569
|
void ggml_graph_compute_with_ctx(struct ggml_context * ctx, struct ggml_cgraph * cgraph, int n_threads) {
|
16839
16570
|
struct ggml_cplan cplan = ggml_graph_plan(cgraph, n_threads);
|
16840
16571
|
|
16841
|
-
struct
|
16842
|
-
GGML_ASSERT(buf);
|
16572
|
+
struct ggml_object * obj = ggml_new_object(ctx, GGML_OBJECT_WORK_BUFFER, cplan.work_size);
|
16843
16573
|
|
16844
|
-
cplan.work_data =
|
16574
|
+
cplan.work_data = (uint8_t *)ctx->mem_buffer + obj->offs;
|
16845
16575
|
|
16846
16576
|
ggml_graph_compute(cgraph, &cplan);
|
16847
16577
|
}
|
@@ -16992,7 +16722,8 @@ void ggml_graph_export(const struct ggml_cgraph * cgraph, const char * fname) {
|
|
16992
16722
|
fwrite(&nb, sizeof(uint64_t), 1, fout);
|
16993
16723
|
}
|
16994
16724
|
|
16995
|
-
fwrite(tensor->name,
|
16725
|
+
fwrite(tensor->name, sizeof(char), GGML_MAX_NAME, fout);
|
16726
|
+
fwrite(tensor->op_params, sizeof(char), GGML_MAX_OP_PARAMS, fout);
|
16996
16727
|
|
16997
16728
|
// dump the data
|
16998
16729
|
// TODO: pad this to 32 byte boundary
|
@@ -17025,7 +16756,8 @@ void ggml_graph_export(const struct ggml_cgraph * cgraph, const char * fname) {
|
|
17025
16756
|
fwrite(&nb, sizeof(uint64_t), 1, fout);
|
17026
16757
|
}
|
17027
16758
|
|
17028
|
-
fwrite(tensor->name,
|
16759
|
+
fwrite(tensor->name, sizeof(char), GGML_MAX_NAME, fout);
|
16760
|
+
fwrite(tensor->op_params, sizeof(char), GGML_MAX_OP_PARAMS, fout);
|
17029
16761
|
|
17030
16762
|
// output the op arguments
|
17031
16763
|
{
|
@@ -17206,7 +16938,8 @@ struct ggml_cgraph ggml_graph_import(const char * fname, struct ggml_context **
|
|
17206
16938
|
|
17207
16939
|
tensor->op = (enum ggml_op) op;
|
17208
16940
|
|
17209
|
-
memcpy(tensor->name,
|
16941
|
+
memcpy(tensor->name, ptr, GGML_MAX_NAME); ptr += GGML_MAX_NAME;
|
16942
|
+
memcpy(tensor->op_params, ptr, GGML_MAX_OP_PARAMS); ptr += GGML_MAX_OP_PARAMS;
|
17210
16943
|
|
17211
16944
|
tensor->data = (void *) ptr;
|
17212
16945
|
|
@@ -17251,7 +16984,8 @@ struct ggml_cgraph ggml_graph_import(const char * fname, struct ggml_context **
|
|
17251
16984
|
nb[j] = nb_cur;
|
17252
16985
|
}
|
17253
16986
|
|
17254
|
-
const char * ptr_name
|
16987
|
+
const char * ptr_name = ptr; ptr += GGML_MAX_NAME;
|
16988
|
+
const char * ptr_op_params = ptr; ptr += GGML_MAX_OP_PARAMS;
|
17255
16989
|
|
17256
16990
|
const int32_t * ptr_arg_idx = (const int32_t *) ptr; ptr += GGML_MAX_SRC*sizeof(int32_t);
|
17257
16991
|
|
@@ -17288,8 +17022,8 @@ struct ggml_cgraph ggml_graph_import(const char * fname, struct ggml_context **
|
|
17288
17022
|
{
|
17289
17023
|
tensor = ggml_view_4d(*ctx_eval, args[0], ne[0], ne[1], ne[2], ne[3], 0, 0, 0, 0);
|
17290
17024
|
|
17291
|
-
|
17292
|
-
memcpy(&offs,
|
17025
|
+
size_t offs;
|
17026
|
+
memcpy(&offs, ptr_op_params, sizeof(offs));
|
17293
17027
|
|
17294
17028
|
tensor->data = ((char *) tensor->data) + offs;
|
17295
17029
|
} break;
|
@@ -17309,7 +17043,8 @@ struct ggml_cgraph ggml_graph_import(const char * fname, struct ggml_context **
|
|
17309
17043
|
} break;
|
17310
17044
|
}
|
17311
17045
|
|
17312
|
-
memcpy(tensor->name,
|
17046
|
+
memcpy(tensor->name, ptr_name, GGML_MAX_NAME);
|
17047
|
+
memcpy(tensor->op_params, ptr_op_params, GGML_MAX_OP_PARAMS);
|
17313
17048
|
|
17314
17049
|
for (int j = 0; j < GGML_MAX_DIMS; ++j) {
|
17315
17050
|
tensor->nb[j] = nb[j];
|
@@ -17343,7 +17078,7 @@ void ggml_graph_print(const struct ggml_cgraph * cgraph) {
|
|
17343
17078
|
GGML_PRINT(" - %3d: [ %5" PRId64 ", %5" PRId64 ", %5" PRId64 "] %16s %s (%3d) cpu = %7.3f / %7.3f ms, wall = %7.3f / %7.3f ms\n",
|
17344
17079
|
i,
|
17345
17080
|
node->ne[0], node->ne[1], node->ne[2],
|
17346
|
-
|
17081
|
+
ggml_op_name(node->op), node->is_param ? "x" : node->grad ? "g" : " ", node->perf_runs,
|
17347
17082
|
(double) node->perf_cycles / (double) ggml_cycles_per_ms(),
|
17348
17083
|
(double) node->perf_cycles / (double) ggml_cycles_per_ms() / (double) node->perf_runs,
|
17349
17084
|
(double) node->perf_time_us / 1000.0,
|
@@ -17357,7 +17092,7 @@ void ggml_graph_print(const struct ggml_cgraph * cgraph) {
|
|
17357
17092
|
GGML_PRINT(" - %3d: [ %5" PRId64 ", %5" PRId64 "] %8s\n",
|
17358
17093
|
i,
|
17359
17094
|
node->ne[0], node->ne[1],
|
17360
|
-
|
17095
|
+
ggml_op_name(node->op));
|
17361
17096
|
}
|
17362
17097
|
|
17363
17098
|
for (int i = 0; i < GGML_OP_COUNT; i++) {
|
@@ -17365,7 +17100,7 @@ void ggml_graph_print(const struct ggml_cgraph * cgraph) {
|
|
17365
17100
|
continue;
|
17366
17101
|
}
|
17367
17102
|
|
17368
|
-
GGML_PRINT("perf_total_per_op_us[%16s] = %7.3f ms\n",
|
17103
|
+
GGML_PRINT("perf_total_per_op_us[%16s] = %7.3f ms\n", ggml_op_name(i), (double) perf_total_per_op_us[i] / 1000.0);
|
17369
17104
|
}
|
17370
17105
|
|
17371
17106
|
GGML_PRINT("========================================\n");
|
@@ -17459,13 +17194,13 @@ void ggml_graph_dump_dot(const struct ggml_cgraph * gb, const struct ggml_cgraph
|
|
17459
17194
|
}
|
17460
17195
|
|
17461
17196
|
if (node->n_dims == 2) {
|
17462
|
-
fprintf(fp, "%d [%" PRId64 ", %" PRId64 "] | <x>%s", i, node->ne[0], node->ne[1],
|
17197
|
+
fprintf(fp, "%d [%" PRId64 ", %" PRId64 "] | <x>%s", i, node->ne[0], node->ne[1], ggml_op_symbol(node->op));
|
17463
17198
|
} else {
|
17464
|
-
fprintf(fp, "%d [%" PRId64 ", %" PRId64 ", %" PRId64 "] | <x>%s", i, node->ne[0], node->ne[1], node->ne[2],
|
17199
|
+
fprintf(fp, "%d [%" PRId64 ", %" PRId64 ", %" PRId64 "] | <x>%s", i, node->ne[0], node->ne[1], node->ne[2], ggml_op_symbol(node->op));
|
17465
17200
|
}
|
17466
17201
|
|
17467
17202
|
if (node->grad) {
|
17468
|
-
fprintf(fp, " | <g>%s\"; ]\n",
|
17203
|
+
fprintf(fp, " | <g>%s\"; ]\n", ggml_op_symbol(node->grad->op));
|
17469
17204
|
} else {
|
17470
17205
|
fprintf(fp, "\"; ]\n");
|
17471
17206
|
}
|