llama_cpp 0.3.4 → 0.3.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +7 -0
- data/ext/llama_cpp/extconf.rb +1 -0
- data/ext/llama_cpp/llama_cpp.cpp +293 -0
- data/ext/llama_cpp/src/ggml-cuda.cu +304 -99
- data/ext/llama_cpp/src/ggml-metal.h +7 -0
- data/ext/llama_cpp/src/ggml-metal.m +201 -71
- data/ext/llama_cpp/src/ggml-metal.metal +68 -54
- data/ext/llama_cpp/src/ggml.c +713 -978
- data/ext/llama_cpp/src/ggml.h +82 -17
- data/ext/llama_cpp/src/k_quants.c +327 -3
- data/ext/llama_cpp/src/llama.cpp +524 -121
- data/ext/llama_cpp/src/llama.h +60 -5
- data/lib/llama_cpp/version.rb +2 -2
- data/sig/llama_cpp.rbs +24 -0
- metadata +2 -2
data/ext/llama_cpp/src/ggml.c
CHANGED
@@ -3440,7 +3440,9 @@ inline static void ggml_vec_mad_f32(const int n, float * restrict y, const float
|
|
3440
3440
|
|
3441
3441
|
//inline static void ggml_vec_scale_f32(const int n, float * y, const float v) { for (int i = 0; i < n; ++i) y[i] *= v; }
|
3442
3442
|
inline static void ggml_vec_scale_f32(const int n, float * y, const float v) {
|
3443
|
-
#if defined(
|
3443
|
+
#if defined(GGML_USE_ACCELERATE)
|
3444
|
+
vDSP_vsmul(y, 1, &v, y, 1, n);
|
3445
|
+
#elif defined(GGML_SIMD)
|
3444
3446
|
const int np = (n & ~(GGML_F32_STEP - 1));
|
3445
3447
|
|
3446
3448
|
GGML_F32_VEC vx = GGML_F32_VEC_SET1(v);
|
@@ -3603,7 +3605,7 @@ inline static void ggml_vec_sum_f32(const int n, float * s, const float * x) {
|
|
3603
3605
|
#endif
|
3604
3606
|
}
|
3605
3607
|
|
3606
|
-
inline static void
|
3608
|
+
inline static void ggml_vec_sum_f32_ggf(const int n, ggml_float * s, const float * x) {
|
3607
3609
|
ggml_float sum = 0.0;
|
3608
3610
|
for (int i = 0; i < n; ++i) {
|
3609
3611
|
sum += (ggml_float)x[i];
|
@@ -3611,6 +3613,14 @@ inline static void ggml_vec_sum_ggf(const int n, ggml_float * s, const float * x
|
|
3611
3613
|
*s = sum;
|
3612
3614
|
}
|
3613
3615
|
|
3616
|
+
inline static void ggml_vec_sum_f16_ggf(const int n, float * s, const ggml_fp16_t * x) {
|
3617
|
+
float sum = 0.0f;
|
3618
|
+
for (int i = 0; i < n; ++i) {
|
3619
|
+
sum += GGML_FP16_TO_FP32(x[i]);
|
3620
|
+
}
|
3621
|
+
*s = sum;
|
3622
|
+
}
|
3623
|
+
|
3614
3624
|
inline static void ggml_vec_max_f32(const int n, float * s, const float * x) {
|
3615
3625
|
#ifndef GGML_USE_ACCELERATE
|
3616
3626
|
float max = -INFINITY;
|
@@ -3750,16 +3760,6 @@ static const char * GGML_OP_NAME[GGML_OP_COUNT] = {
|
|
3750
3760
|
"ARGMAX",
|
3751
3761
|
"REPEAT",
|
3752
3762
|
"REPEAT_BACK",
|
3753
|
-
"ABS",
|
3754
|
-
"SGN",
|
3755
|
-
"NEG",
|
3756
|
-
"STEP",
|
3757
|
-
"TANH",
|
3758
|
-
"ELU",
|
3759
|
-
"RELU",
|
3760
|
-
"GELU",
|
3761
|
-
"GELU_QUICK",
|
3762
|
-
"SILU",
|
3763
3763
|
"SILU_BACK",
|
3764
3764
|
"NORM",
|
3765
3765
|
"RMS_NORM",
|
@@ -3798,6 +3798,8 @@ static const char * GGML_OP_NAME[GGML_OP_COUNT] = {
|
|
3798
3798
|
"WIN_PART",
|
3799
3799
|
"WIN_UNPART",
|
3800
3800
|
|
3801
|
+
"UNARY",
|
3802
|
+
|
3801
3803
|
"MAP_UNARY",
|
3802
3804
|
"MAP_BINARY",
|
3803
3805
|
|
@@ -3809,7 +3811,7 @@ static const char * GGML_OP_NAME[GGML_OP_COUNT] = {
|
|
3809
3811
|
"CROSS_ENTROPY_LOSS_BACK",
|
3810
3812
|
};
|
3811
3813
|
|
3812
|
-
static_assert(GGML_OP_COUNT ==
|
3814
|
+
static_assert(GGML_OP_COUNT == 59, "GGML_OP_COUNT != 59");
|
3813
3815
|
|
3814
3816
|
static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
|
3815
3817
|
"none",
|
@@ -3830,16 +3832,6 @@ static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
|
|
3830
3832
|
"argmax(x)",
|
3831
3833
|
"repeat(x)",
|
3832
3834
|
"repeat_back(x)",
|
3833
|
-
"abs(x)",
|
3834
|
-
"sgn(x)",
|
3835
|
-
"-x",
|
3836
|
-
"step(x)",
|
3837
|
-
"tanh(x)",
|
3838
|
-
"elu(x)",
|
3839
|
-
"relu(x)",
|
3840
|
-
"gelu(x)",
|
3841
|
-
"gelu_quick(x)",
|
3842
|
-
"silu(x)",
|
3843
3835
|
"silu_back(x)",
|
3844
3836
|
"norm(x)",
|
3845
3837
|
"rms_norm(x)",
|
@@ -3878,6 +3870,8 @@ static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
|
|
3878
3870
|
"win_part(x)",
|
3879
3871
|
"win_unpart(x)",
|
3880
3872
|
|
3873
|
+
"unary(x)",
|
3874
|
+
|
3881
3875
|
"f(x)",
|
3882
3876
|
"f(x,y)",
|
3883
3877
|
|
@@ -3889,7 +3883,7 @@ static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
|
|
3889
3883
|
"cross_entropy_loss_back(x,y)",
|
3890
3884
|
};
|
3891
3885
|
|
3892
|
-
static_assert(GGML_OP_COUNT ==
|
3886
|
+
static_assert(GGML_OP_COUNT == 59, "GGML_OP_COUNT != 59");
|
3893
3887
|
|
3894
3888
|
static_assert(GGML_OP_POOL_COUNT == 2, "GGML_OP_POOL_COUNT != 2");
|
3895
3889
|
|
@@ -4077,8 +4071,8 @@ bool ggml_is_numa(void) {
|
|
4077
4071
|
////////////////////////////////////////////////////////////////////////////////
|
4078
4072
|
|
4079
4073
|
void ggml_print_object(const struct ggml_object * obj) {
|
4080
|
-
GGML_PRINT(" - ggml_object: offset = %zu, size = %zu, next = %p\n",
|
4081
|
-
obj->offs, obj->size, (const void *) obj->next);
|
4074
|
+
GGML_PRINT(" - ggml_object: type = %d, offset = %zu, size = %zu, next = %p\n",
|
4075
|
+
obj->type, obj->offs, obj->size, (const void *) obj->next);
|
4082
4076
|
}
|
4083
4077
|
|
4084
4078
|
void ggml_print_objects(const struct ggml_context * ctx) {
|
@@ -4145,6 +4139,10 @@ const char * ggml_op_name(enum ggml_op op) {
|
|
4145
4139
|
return GGML_OP_NAME[op];
|
4146
4140
|
}
|
4147
4141
|
|
4142
|
+
const char * ggml_op_symbol(enum ggml_op op) {
|
4143
|
+
return GGML_OP_SYMBOL[op];
|
4144
|
+
}
|
4145
|
+
|
4148
4146
|
size_t ggml_element_size(const struct ggml_tensor * tensor) {
|
4149
4147
|
return GGML_TYPE_SIZE[tensor->type];
|
4150
4148
|
}
|
@@ -4214,7 +4212,7 @@ enum ggml_type ggml_ftype_to_ggml_type(enum ggml_ftype ftype) {
|
|
4214
4212
|
}
|
4215
4213
|
|
4216
4214
|
size_t ggml_tensor_overhead(void) {
|
4217
|
-
return GGML_OBJECT_SIZE + GGML_TENSOR_SIZE
|
4215
|
+
return GGML_OBJECT_SIZE + GGML_TENSOR_SIZE;
|
4218
4216
|
}
|
4219
4217
|
|
4220
4218
|
bool ggml_is_transposed(const struct ggml_tensor * tensor) {
|
@@ -4231,6 +4229,15 @@ bool ggml_is_contiguous(const struct ggml_tensor * tensor) {
|
|
4231
4229
|
tensor->nb[3] == tensor->nb[2]*tensor->ne[2];
|
4232
4230
|
}
|
4233
4231
|
|
4232
|
+
static inline bool ggml_is_contiguous_except_dim_1(const struct ggml_tensor * tensor) {
|
4233
|
+
static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
|
4234
|
+
|
4235
|
+
return
|
4236
|
+
tensor->nb[0] == GGML_TYPE_SIZE[tensor->type] &&
|
4237
|
+
tensor->nb[2] == tensor->nb[1]*tensor->ne[1] &&
|
4238
|
+
tensor->nb[3] == tensor->nb[2]*tensor->ne[2];
|
4239
|
+
}
|
4240
|
+
|
4234
4241
|
bool ggml_is_permuted(const struct ggml_tensor * tensor) {
|
4235
4242
|
static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
|
4236
4243
|
|
@@ -4376,7 +4383,7 @@ struct ggml_context * ggml_init(struct ggml_init_params params) {
|
|
4376
4383
|
return NULL;
|
4377
4384
|
}
|
4378
4385
|
|
4379
|
-
const size_t mem_size =
|
4386
|
+
const size_t mem_size = params.mem_buffer ? params.mem_size : GGML_PAD(params.mem_size, GGML_MEM_ALIGN);
|
4380
4387
|
|
4381
4388
|
*ctx = (struct ggml_context) {
|
4382
4389
|
/*.mem_size =*/ mem_size,
|
@@ -4443,6 +4450,10 @@ size_t ggml_set_scratch(struct ggml_context * ctx, struct ggml_scratch scratch)
|
|
4443
4450
|
return result;
|
4444
4451
|
}
|
4445
4452
|
|
4453
|
+
bool ggml_get_no_alloc(struct ggml_context * ctx) {
|
4454
|
+
return ctx->no_alloc;
|
4455
|
+
}
|
4456
|
+
|
4446
4457
|
void ggml_set_no_alloc(struct ggml_context * ctx, bool no_alloc) {
|
4447
4458
|
ctx->no_alloc = no_alloc;
|
4448
4459
|
}
|
@@ -4461,12 +4472,14 @@ size_t ggml_get_max_tensor_size(const struct ggml_context * ctx) {
|
|
4461
4472
|
struct ggml_object * obj = ctx->objects_begin;
|
4462
4473
|
|
4463
4474
|
while (obj != NULL) {
|
4464
|
-
|
4475
|
+
if (obj->type == GGML_OBJECT_TENSOR) {
|
4476
|
+
struct ggml_tensor * tensor = (struct ggml_tensor *) ((char *) ctx->mem_buffer + obj->offs);
|
4465
4477
|
|
4466
|
-
|
4478
|
+
const size_t size = ggml_nbytes(tensor);
|
4467
4479
|
|
4468
|
-
|
4469
|
-
|
4480
|
+
if (max_size < size) {
|
4481
|
+
max_size = size;
|
4482
|
+
}
|
4470
4483
|
}
|
4471
4484
|
|
4472
4485
|
obj = obj->next;
|
@@ -4480,7 +4493,7 @@ size_t ggml_get_max_tensor_size(const struct ggml_context * ctx) {
|
|
4480
4493
|
// this is an error prone process, but it is necessary to support inplace
|
4481
4494
|
// operators when using scratch buffers
|
4482
4495
|
// TODO: implement a better way
|
4483
|
-
void ggml_scratch_save(struct ggml_context * ctx) {
|
4496
|
+
static void ggml_scratch_save(struct ggml_context * ctx) {
|
4484
4497
|
// this is needed to allow opt tensors to store their data
|
4485
4498
|
// TODO: again, need to find a better way
|
4486
4499
|
ctx->no_alloc_save = ctx->no_alloc;
|
@@ -4490,7 +4503,7 @@ void ggml_scratch_save(struct ggml_context * ctx) {
|
|
4490
4503
|
ctx->scratch.data = NULL;
|
4491
4504
|
}
|
4492
4505
|
|
4493
|
-
void ggml_scratch_load(struct ggml_context * ctx) {
|
4506
|
+
static void ggml_scratch_load(struct ggml_context * ctx) {
|
4494
4507
|
ctx->no_alloc = ctx->no_alloc_save;
|
4495
4508
|
|
4496
4509
|
ctx->scratch = ctx->scratch_save;
|
@@ -4498,12 +4511,7 @@ void ggml_scratch_load(struct ggml_context * ctx) {
|
|
4498
4511
|
|
4499
4512
|
////////////////////////////////////////////////////////////////////////////////
|
4500
4513
|
|
4501
|
-
struct
|
4502
|
-
struct ggml_context * ctx,
|
4503
|
-
enum ggml_type type,
|
4504
|
-
int n_dims,
|
4505
|
-
const int64_t* ne,
|
4506
|
-
void* data) {
|
4514
|
+
static struct ggml_object * ggml_new_object(struct ggml_context * ctx, enum ggml_object_type type, size_t size) {
|
4507
4515
|
// always insert objects at the end of the context's memory pool
|
4508
4516
|
struct ggml_object * obj_cur = ctx->objects_end;
|
4509
4517
|
|
@@ -4511,77 +4519,79 @@ struct ggml_tensor * ggml_new_tensor_impl(
|
|
4511
4519
|
const size_t cur_size = obj_cur == NULL ? 0 : obj_cur->size;
|
4512
4520
|
const size_t cur_end = cur_offs + cur_size;
|
4513
4521
|
|
4514
|
-
|
4515
|
-
|
4516
|
-
if (data == NULL && !ctx->no_alloc) {
|
4517
|
-
size_needed += GGML_TYPE_SIZE[type]*(ne[0]/GGML_BLCK_SIZE[type]);
|
4518
|
-
for (int i = 1; i < n_dims; i++) {
|
4519
|
-
size_needed *= ne[i];
|
4520
|
-
}
|
4521
|
-
// align to GGML_MEM_ALIGN
|
4522
|
-
size_needed = ((size_needed + GGML_MEM_ALIGN - 1)/GGML_MEM_ALIGN)*GGML_MEM_ALIGN;
|
4523
|
-
}
|
4522
|
+
// align to GGML_MEM_ALIGN
|
4523
|
+
size_t size_needed = GGML_PAD(size, GGML_MEM_ALIGN);
|
4524
4524
|
|
4525
4525
|
char * const mem_buffer = ctx->mem_buffer;
|
4526
4526
|
struct ggml_object * const obj_new = (struct ggml_object *)(mem_buffer + cur_end);
|
4527
4527
|
|
4528
|
-
if (
|
4529
|
-
|
4528
|
+
if (cur_end + size_needed + GGML_OBJECT_SIZE > ctx->mem_size) {
|
4529
|
+
GGML_PRINT("%s: not enough space in the context's memory pool (needed %zu, available %zu)\n",
|
4530
|
+
__func__, cur_end + size_needed, ctx->mem_size);
|
4531
|
+
assert(false);
|
4532
|
+
return NULL;
|
4533
|
+
}
|
4530
4534
|
|
4531
|
-
|
4532
|
-
|
4533
|
-
|
4534
|
-
|
4535
|
-
|
4536
|
-
|
4535
|
+
*obj_new = (struct ggml_object) {
|
4536
|
+
.offs = cur_end + GGML_OBJECT_SIZE,
|
4537
|
+
.size = size_needed,
|
4538
|
+
.next = NULL,
|
4539
|
+
.type = type,
|
4540
|
+
};
|
4537
4541
|
|
4538
|
-
|
4539
|
-
|
4540
|
-
|
4541
|
-
|
4542
|
-
};
|
4542
|
+
ggml_assert_aligned(mem_buffer + obj_new->offs);
|
4543
|
+
|
4544
|
+
if (obj_cur != NULL) {
|
4545
|
+
obj_cur->next = obj_new;
|
4543
4546
|
} else {
|
4544
|
-
|
4545
|
-
|
4546
|
-
|
4547
|
-
|
4548
|
-
|
4547
|
+
// this is the first object in this context
|
4548
|
+
ctx->objects_begin = obj_new;
|
4549
|
+
}
|
4550
|
+
|
4551
|
+
ctx->objects_end = obj_new;
|
4552
|
+
|
4553
|
+
//printf("%s: inserted new object at %zu, size = %zu\n", __func__, cur_end, obj_new->size);
|
4554
|
+
|
4555
|
+
return obj_new;
|
4556
|
+
}
|
4557
|
+
|
4558
|
+
static struct ggml_tensor * ggml_new_tensor_impl(
|
4559
|
+
struct ggml_context * ctx,
|
4560
|
+
enum ggml_type type,
|
4561
|
+
int n_dims,
|
4562
|
+
const int64_t* ne,
|
4563
|
+
void* data) {
|
4564
|
+
|
4565
|
+
size_t data_size = 0;
|
4566
|
+
|
4567
|
+
if (data == NULL && !ctx->no_alloc) {
|
4568
|
+
data_size += GGML_TYPE_SIZE[type]*(ne[0]/GGML_BLCK_SIZE[type]);
|
4569
|
+
for (int i = 1; i < n_dims; i++) {
|
4570
|
+
data_size *= ne[i];
|
4549
4571
|
}
|
4572
|
+
}
|
4550
4573
|
|
4551
|
-
|
4552
|
-
|
4553
|
-
|
4574
|
+
if (ctx->scratch.data != NULL && data == NULL) {
|
4575
|
+
// allocate tensor data in the scratch buffer
|
4576
|
+
if (ctx->scratch.offs + data_size > ctx->scratch.size) {
|
4577
|
+
GGML_PRINT("%s: not enough space in the scratch memory pool (needed %zu, available %zu)\n",
|
4578
|
+
__func__, ctx->scratch.offs + data_size, ctx->scratch.size);
|
4554
4579
|
assert(false);
|
4555
4580
|
return NULL;
|
4556
4581
|
}
|
4557
4582
|
|
4558
4583
|
data = (char * const) ctx->scratch.data + ctx->scratch.offs;
|
4559
4584
|
|
4560
|
-
|
4561
|
-
.offs = cur_end + GGML_OBJECT_SIZE,
|
4562
|
-
.size = GGML_TENSOR_SIZE,
|
4563
|
-
.next = NULL,
|
4564
|
-
};
|
4565
|
-
|
4566
|
-
//printf("scratch offs = %zu, size_needed = %zu\n", ctx->scratch.offs, size_needed);
|
4567
|
-
|
4568
|
-
ctx->scratch.offs += size_needed;
|
4569
|
-
}
|
4585
|
+
ctx->scratch.offs += data_size;
|
4570
4586
|
|
4571
|
-
|
4572
|
-
obj_cur->next = obj_new;
|
4573
|
-
} else {
|
4574
|
-
// this is the first object in this context
|
4575
|
-
ctx->objects_begin = obj_new;
|
4587
|
+
data_size = 0;
|
4576
4588
|
}
|
4577
4589
|
|
4578
|
-
|
4579
|
-
|
4580
|
-
//printf("%s: inserted new object at %zu, size = %zu\n", __func__, cur_end, obj_new->size);
|
4590
|
+
struct ggml_object * const obj_new = ggml_new_object(ctx, GGML_OBJECT_TENSOR, GGML_TENSOR_SIZE + data_size);
|
4581
4591
|
|
4582
|
-
|
4592
|
+
// TODO: for recoverable errors, we would need to free the data allocated from the scratch buffer here
|
4583
4593
|
|
4584
|
-
|
4594
|
+
struct ggml_tensor * const result = (struct ggml_tensor *)((char *)ctx->mem_buffer + obj_new->offs);
|
4585
4595
|
|
4586
4596
|
*result = (struct ggml_tensor) {
|
4587
4597
|
/*.type =*/ type,
|
@@ -4590,6 +4600,7 @@ struct ggml_tensor * ggml_new_tensor_impl(
|
|
4590
4600
|
/*.ne =*/ { 1, 1, 1, 1 },
|
4591
4601
|
/*.nb =*/ { 0, 0, 0, 0 },
|
4592
4602
|
/*.op =*/ GGML_OP_NONE,
|
4603
|
+
/*.op_params =*/ {0},
|
4593
4604
|
/*.is_param =*/ false,
|
4594
4605
|
/*.grad =*/ NULL,
|
4595
4606
|
/*.src =*/ { NULL },
|
@@ -4620,6 +4631,21 @@ struct ggml_tensor * ggml_new_tensor_impl(
|
|
4620
4631
|
return result;
|
4621
4632
|
}
|
4622
4633
|
|
4634
|
+
static void ggml_set_op_params(struct ggml_tensor * tensor, const void * params, size_t params_size) {
|
4635
|
+
assert(params_size <= GGML_MAX_OP_PARAMS);
|
4636
|
+
memcpy(tensor->op_params, params, params_size);
|
4637
|
+
}
|
4638
|
+
|
4639
|
+
static int32_t ggml_get_op_params_i32(const struct ggml_tensor * tensor, uint32_t i) {
|
4640
|
+
assert(i < GGML_MAX_OP_PARAMS / sizeof(int32_t));
|
4641
|
+
return ((const int32_t *)(tensor->op_params))[i];
|
4642
|
+
}
|
4643
|
+
|
4644
|
+
static void ggml_set_op_params_i32(struct ggml_tensor * tensor, uint32_t i, int32_t value) {
|
4645
|
+
assert(i < GGML_MAX_OP_PARAMS / sizeof(int32_t));
|
4646
|
+
((int32_t *)(tensor->op_params))[i] = value;
|
4647
|
+
}
|
4648
|
+
|
4623
4649
|
struct ggml_tensor * ggml_new_tensor(
|
4624
4650
|
struct ggml_context * ctx,
|
4625
4651
|
enum ggml_type type,
|
@@ -4951,6 +4977,11 @@ float * ggml_get_data_f32(const struct ggml_tensor * tensor) {
|
|
4951
4977
|
return (float *)(tensor->data);
|
4952
4978
|
}
|
4953
4979
|
|
4980
|
+
enum ggml_unary_op ggml_get_unary_op(const struct ggml_tensor * tensor) {
|
4981
|
+
GGML_ASSERT(tensor->op == GGML_OP_UNARY);
|
4982
|
+
return (enum ggml_unary_op) ggml_get_op_params_i32(tensor, 0);
|
4983
|
+
}
|
4984
|
+
|
4954
4985
|
const char * ggml_get_name(const struct ggml_tensor * tensor) {
|
4955
4986
|
return tensor->name;
|
4956
4987
|
}
|
@@ -4989,9 +5020,11 @@ struct ggml_tensor * ggml_get_tensor(struct ggml_context * ctx, const char * nam
|
|
4989
5020
|
char * const mem_buffer = ctx->mem_buffer;
|
4990
5021
|
|
4991
5022
|
while (obj != NULL) {
|
4992
|
-
|
4993
|
-
|
4994
|
-
|
5023
|
+
if (obj->type == GGML_OBJECT_TENSOR) {
|
5024
|
+
struct ggml_tensor * cur = (struct ggml_tensor *)(mem_buffer + obj->offs);
|
5025
|
+
if (strcmp(cur->name, name) == 0) {
|
5026
|
+
return cur;
|
5027
|
+
}
|
4995
5028
|
}
|
4996
5029
|
|
4997
5030
|
obj = obj->next;
|
@@ -5004,7 +5037,7 @@ struct ggml_tensor * ggml_get_tensor(struct ggml_context * ctx, const char * nam
|
|
5004
5037
|
|
5005
5038
|
// ggml_dup
|
5006
5039
|
|
5007
|
-
struct ggml_tensor * ggml_dup_impl(
|
5040
|
+
static struct ggml_tensor * ggml_dup_impl(
|
5008
5041
|
struct ggml_context * ctx,
|
5009
5042
|
struct ggml_tensor * a,
|
5010
5043
|
bool inplace) {
|
@@ -5019,7 +5052,6 @@ struct ggml_tensor * ggml_dup_impl(
|
|
5019
5052
|
result->op = GGML_OP_DUP;
|
5020
5053
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
5021
5054
|
result->src[0] = a;
|
5022
|
-
result->src[1] = NULL;
|
5023
5055
|
|
5024
5056
|
return result;
|
5025
5057
|
}
|
@@ -5038,7 +5070,7 @@ struct ggml_tensor * ggml_dup_inplace(
|
|
5038
5070
|
|
5039
5071
|
// ggml_add
|
5040
5072
|
|
5041
|
-
struct ggml_tensor * ggml_add_impl(
|
5073
|
+
static struct ggml_tensor * ggml_add_impl(
|
5042
5074
|
struct ggml_context * ctx,
|
5043
5075
|
struct ggml_tensor * a,
|
5044
5076
|
struct ggml_tensor * b,
|
@@ -5081,7 +5113,7 @@ struct ggml_tensor * ggml_add_inplace(
|
|
5081
5113
|
|
5082
5114
|
// ggml_add1
|
5083
5115
|
|
5084
|
-
struct ggml_tensor * ggml_add1_impl(
|
5116
|
+
static struct ggml_tensor * ggml_add1_impl(
|
5085
5117
|
struct ggml_context * ctx,
|
5086
5118
|
struct ggml_tensor * a,
|
5087
5119
|
struct ggml_tensor * b,
|
@@ -5121,7 +5153,7 @@ struct ggml_tensor * ggml_add1_inplace(
|
|
5121
5153
|
|
5122
5154
|
// ggml_acc
|
5123
5155
|
|
5124
|
-
struct ggml_tensor * ggml_acc_impl(
|
5156
|
+
static struct ggml_tensor * ggml_acc_impl(
|
5125
5157
|
struct ggml_context * ctx,
|
5126
5158
|
struct ggml_tensor * a,
|
5127
5159
|
struct ggml_tensor * b,
|
@@ -5143,23 +5175,13 @@ struct ggml_tensor * ggml_acc_impl(
|
|
5143
5175
|
|
5144
5176
|
struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
5145
5177
|
|
5146
|
-
|
5147
|
-
|
5148
|
-
struct ggml_tensor * c = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, 5);
|
5149
|
-
|
5150
|
-
((int32_t *) c->data)[0] = nb1;
|
5151
|
-
((int32_t *) c->data)[1] = nb2;
|
5152
|
-
((int32_t *) c->data)[2] = nb3;
|
5153
|
-
((int32_t *) c->data)[3] = offset;
|
5154
|
-
((int32_t *) c->data)[4] = inplace ? 1 : 0;
|
5155
|
-
|
5156
|
-
ggml_scratch_load(ctx);
|
5178
|
+
int32_t params[] = { nb1, nb2, nb3, offset, inplace ? 1 : 0 };
|
5179
|
+
ggml_set_op_params(result, params, sizeof(params));
|
5157
5180
|
|
5158
5181
|
result->op = GGML_OP_ACC;
|
5159
5182
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
5160
5183
|
result->src[0] = a;
|
5161
5184
|
result->src[1] = b;
|
5162
|
-
result->src[2] = c;
|
5163
5185
|
|
5164
5186
|
return result;
|
5165
5187
|
}
|
@@ -5188,7 +5210,7 @@ struct ggml_tensor * ggml_acc_inplace(
|
|
5188
5210
|
|
5189
5211
|
// ggml_sub
|
5190
5212
|
|
5191
|
-
struct ggml_tensor * ggml_sub_impl(
|
5213
|
+
static struct ggml_tensor * ggml_sub_impl(
|
5192
5214
|
struct ggml_context * ctx,
|
5193
5215
|
struct ggml_tensor * a,
|
5194
5216
|
struct ggml_tensor * b,
|
@@ -5227,7 +5249,7 @@ struct ggml_tensor * ggml_sub_inplace(
|
|
5227
5249
|
|
5228
5250
|
// ggml_mul
|
5229
5251
|
|
5230
|
-
struct ggml_tensor * ggml_mul_impl(
|
5252
|
+
static struct ggml_tensor * ggml_mul_impl(
|
5231
5253
|
struct ggml_context * ctx,
|
5232
5254
|
struct ggml_tensor * a,
|
5233
5255
|
struct ggml_tensor * b,
|
@@ -5274,7 +5296,7 @@ struct ggml_tensor * ggml_mul_inplace(
|
|
5274
5296
|
|
5275
5297
|
// ggml_div
|
5276
5298
|
|
5277
|
-
struct ggml_tensor * ggml_div_impl(
|
5299
|
+
static struct ggml_tensor * ggml_div_impl(
|
5278
5300
|
struct ggml_context * ctx,
|
5279
5301
|
struct ggml_tensor * a,
|
5280
5302
|
struct ggml_tensor * b,
|
@@ -5317,7 +5339,7 @@ struct ggml_tensor * ggml_div_inplace(
|
|
5317
5339
|
|
5318
5340
|
// ggml_sqr
|
5319
5341
|
|
5320
|
-
struct ggml_tensor * ggml_sqr_impl(
|
5342
|
+
static struct ggml_tensor * ggml_sqr_impl(
|
5321
5343
|
struct ggml_context * ctx,
|
5322
5344
|
struct ggml_tensor * a,
|
5323
5345
|
bool inplace) {
|
@@ -5332,7 +5354,6 @@ struct ggml_tensor * ggml_sqr_impl(
|
|
5332
5354
|
result->op = GGML_OP_SQR;
|
5333
5355
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
5334
5356
|
result->src[0] = a;
|
5335
|
-
result->src[1] = NULL;
|
5336
5357
|
|
5337
5358
|
return result;
|
5338
5359
|
}
|
@@ -5351,7 +5372,7 @@ struct ggml_tensor * ggml_sqr_inplace(
|
|
5351
5372
|
|
5352
5373
|
// ggml_sqrt
|
5353
5374
|
|
5354
|
-
struct ggml_tensor * ggml_sqrt_impl(
|
5375
|
+
static struct ggml_tensor * ggml_sqrt_impl(
|
5355
5376
|
struct ggml_context * ctx,
|
5356
5377
|
struct ggml_tensor * a,
|
5357
5378
|
bool inplace) {
|
@@ -5366,7 +5387,6 @@ struct ggml_tensor * ggml_sqrt_impl(
|
|
5366
5387
|
result->op = GGML_OP_SQRT;
|
5367
5388
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
5368
5389
|
result->src[0] = a;
|
5369
|
-
result->src[1] = NULL;
|
5370
5390
|
|
5371
5391
|
return result;
|
5372
5392
|
}
|
@@ -5386,7 +5406,7 @@ struct ggml_tensor * ggml_sqrt_inplace(
|
|
5386
5406
|
|
5387
5407
|
// ggml_log
|
5388
5408
|
|
5389
|
-
struct ggml_tensor * ggml_log_impl(
|
5409
|
+
static struct ggml_tensor * ggml_log_impl(
|
5390
5410
|
struct ggml_context * ctx,
|
5391
5411
|
struct ggml_tensor * a,
|
5392
5412
|
bool inplace) {
|
@@ -5401,7 +5421,6 @@ struct ggml_tensor * ggml_log_impl(
|
|
5401
5421
|
result->op = GGML_OP_LOG;
|
5402
5422
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
5403
5423
|
result->src[0] = a;
|
5404
|
-
result->src[1] = NULL;
|
5405
5424
|
|
5406
5425
|
return result;
|
5407
5426
|
}
|
@@ -5434,7 +5453,6 @@ struct ggml_tensor * ggml_sum(
|
|
5434
5453
|
result->op = GGML_OP_SUM;
|
5435
5454
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
5436
5455
|
result->src[0] = a;
|
5437
|
-
result->src[1] = NULL;
|
5438
5456
|
|
5439
5457
|
return result;
|
5440
5458
|
}
|
@@ -5461,7 +5479,6 @@ struct ggml_tensor * ggml_sum_rows(
|
|
5461
5479
|
result->op = GGML_OP_SUM_ROWS;
|
5462
5480
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
5463
5481
|
result->src[0] = a;
|
5464
|
-
result->src[1] = NULL;
|
5465
5482
|
|
5466
5483
|
return result;
|
5467
5484
|
}
|
@@ -5484,7 +5501,6 @@ struct ggml_tensor * ggml_mean(
|
|
5484
5501
|
result->op = GGML_OP_MEAN;
|
5485
5502
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
5486
5503
|
result->src[0] = a;
|
5487
|
-
result->src[1] = NULL;
|
5488
5504
|
|
5489
5505
|
return result;
|
5490
5506
|
}
|
@@ -5508,7 +5524,6 @@ struct ggml_tensor * ggml_argmax(
|
|
5508
5524
|
result->op = GGML_OP_ARGMAX;
|
5509
5525
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
5510
5526
|
result->src[0] = a;
|
5511
|
-
result->src[1] = NULL;
|
5512
5527
|
|
5513
5528
|
return result;
|
5514
5529
|
}
|
@@ -5571,343 +5586,142 @@ struct ggml_tensor * ggml_repeat_back(
|
|
5571
5586
|
|
5572
5587
|
// ggml_abs
|
5573
5588
|
|
5574
|
-
struct ggml_tensor * ggml_abs_impl(
|
5575
|
-
struct ggml_context * ctx,
|
5576
|
-
struct ggml_tensor * a,
|
5577
|
-
bool inplace) {
|
5578
|
-
bool is_node = false;
|
5579
|
-
|
5580
|
-
if (!inplace && (a->grad)) {
|
5581
|
-
is_node = true;
|
5582
|
-
}
|
5583
|
-
|
5584
|
-
struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
5585
|
-
|
5586
|
-
result->op = GGML_OP_ABS;
|
5587
|
-
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
5588
|
-
result->src[0] = a;
|
5589
|
-
result->src[1] = NULL;
|
5590
|
-
|
5591
|
-
return result;
|
5592
|
-
}
|
5593
|
-
|
5594
5589
|
struct ggml_tensor * ggml_abs(
|
5595
5590
|
struct ggml_context * ctx,
|
5596
5591
|
struct ggml_tensor * a) {
|
5597
|
-
return
|
5592
|
+
return ggml_unary(ctx, a, GGML_UNARY_OP_ABS);
|
5598
5593
|
}
|
5599
5594
|
|
5600
5595
|
struct ggml_tensor * ggml_abs_inplace(
|
5601
5596
|
struct ggml_context * ctx,
|
5602
5597
|
struct ggml_tensor * a) {
|
5603
|
-
return
|
5598
|
+
return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_ABS);
|
5604
5599
|
}
|
5605
5600
|
|
5606
|
-
|
5607
5601
|
// ggml_sgn
|
5608
5602
|
|
5609
|
-
struct ggml_tensor * ggml_sgn_impl(
|
5610
|
-
struct ggml_context * ctx,
|
5611
|
-
struct ggml_tensor * a,
|
5612
|
-
bool inplace) {
|
5613
|
-
bool is_node = false;
|
5614
|
-
|
5615
|
-
if (!inplace && (a->grad)) {
|
5616
|
-
is_node = true;
|
5617
|
-
}
|
5618
|
-
|
5619
|
-
struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
5620
|
-
|
5621
|
-
result->op = GGML_OP_SGN;
|
5622
|
-
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
5623
|
-
result->src[0] = a;
|
5624
|
-
result->src[1] = NULL;
|
5625
|
-
|
5626
|
-
return result;
|
5627
|
-
}
|
5628
|
-
|
5629
5603
|
struct ggml_tensor * ggml_sgn(
|
5630
5604
|
struct ggml_context * ctx,
|
5631
5605
|
struct ggml_tensor * a) {
|
5632
|
-
return
|
5606
|
+
return ggml_unary(ctx, a, GGML_UNARY_OP_SGN);
|
5633
5607
|
}
|
5634
5608
|
|
5635
5609
|
struct ggml_tensor * ggml_sgn_inplace(
|
5636
5610
|
struct ggml_context * ctx,
|
5637
5611
|
struct ggml_tensor * a) {
|
5638
|
-
return
|
5612
|
+
return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_SGN);
|
5639
5613
|
}
|
5640
5614
|
|
5641
5615
|
// ggml_neg
|
5642
5616
|
|
5643
|
-
struct ggml_tensor * ggml_neg_impl(
|
5644
|
-
struct ggml_context * ctx,
|
5645
|
-
struct ggml_tensor * a,
|
5646
|
-
bool inplace) {
|
5647
|
-
bool is_node = false;
|
5648
|
-
|
5649
|
-
if (!inplace && (a->grad)) {
|
5650
|
-
is_node = true;
|
5651
|
-
}
|
5652
|
-
|
5653
|
-
struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
5654
|
-
|
5655
|
-
result->op = GGML_OP_NEG;
|
5656
|
-
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
5657
|
-
result->src[0] = a;
|
5658
|
-
result->src[1] = NULL;
|
5659
|
-
|
5660
|
-
return result;
|
5661
|
-
}
|
5662
|
-
|
5663
5617
|
struct ggml_tensor * ggml_neg(
|
5664
5618
|
struct ggml_context * ctx,
|
5665
5619
|
struct ggml_tensor * a) {
|
5666
|
-
return
|
5620
|
+
return ggml_unary(ctx, a, GGML_UNARY_OP_NEG);
|
5667
5621
|
}
|
5668
5622
|
|
5669
5623
|
struct ggml_tensor * ggml_neg_inplace(
|
5670
5624
|
struct ggml_context * ctx,
|
5671
5625
|
struct ggml_tensor * a) {
|
5672
|
-
return
|
5626
|
+
return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_NEG);
|
5673
5627
|
}
|
5674
5628
|
|
5675
5629
|
// ggml_step
|
5676
5630
|
|
5677
|
-
struct ggml_tensor * ggml_step_impl(
|
5678
|
-
struct ggml_context * ctx,
|
5679
|
-
struct ggml_tensor * a,
|
5680
|
-
bool inplace) {
|
5681
|
-
bool is_node = false;
|
5682
|
-
|
5683
|
-
if (!inplace && (a->grad)) {
|
5684
|
-
is_node = true;
|
5685
|
-
}
|
5686
|
-
|
5687
|
-
struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
5688
|
-
|
5689
|
-
result->op = GGML_OP_STEP;
|
5690
|
-
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
5691
|
-
result->src[0] = a;
|
5692
|
-
result->src[1] = NULL;
|
5693
|
-
|
5694
|
-
return result;
|
5695
|
-
}
|
5696
|
-
|
5697
5631
|
struct ggml_tensor * ggml_step(
|
5698
5632
|
struct ggml_context * ctx,
|
5699
5633
|
struct ggml_tensor * a) {
|
5700
|
-
return
|
5634
|
+
return ggml_unary(ctx, a, GGML_UNARY_OP_STEP);
|
5701
5635
|
}
|
5702
5636
|
|
5703
5637
|
struct ggml_tensor * ggml_step_inplace(
|
5704
5638
|
struct ggml_context * ctx,
|
5705
5639
|
struct ggml_tensor * a) {
|
5706
|
-
return
|
5640
|
+
return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_STEP);
|
5707
5641
|
}
|
5708
5642
|
|
5709
5643
|
// ggml_tanh
|
5710
5644
|
|
5711
|
-
struct ggml_tensor * ggml_tanh_impl(
|
5712
|
-
struct ggml_context * ctx,
|
5713
|
-
struct ggml_tensor * a,
|
5714
|
-
bool inplace) {
|
5715
|
-
bool is_node = false;
|
5716
|
-
|
5717
|
-
if (!inplace && (a->grad)) {
|
5718
|
-
is_node = true;
|
5719
|
-
}
|
5720
|
-
|
5721
|
-
struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
5722
|
-
|
5723
|
-
result->op = GGML_OP_TANH;
|
5724
|
-
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
5725
|
-
result->src[0] = a;
|
5726
|
-
result->src[1] = NULL;
|
5727
|
-
|
5728
|
-
return result;
|
5729
|
-
}
|
5730
|
-
|
5731
5645
|
struct ggml_tensor * ggml_tanh(
|
5732
5646
|
struct ggml_context * ctx,
|
5733
5647
|
struct ggml_tensor * a) {
|
5734
|
-
return
|
5648
|
+
return ggml_unary(ctx, a, GGML_UNARY_OP_TANH);
|
5735
5649
|
}
|
5736
5650
|
|
5737
5651
|
struct ggml_tensor * ggml_tanh_inplace(
|
5738
5652
|
struct ggml_context * ctx,
|
5739
5653
|
struct ggml_tensor * a) {
|
5740
|
-
return
|
5654
|
+
return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_TANH);
|
5741
5655
|
}
|
5742
5656
|
|
5743
5657
|
// ggml_elu
|
5744
5658
|
|
5745
|
-
struct ggml_tensor * ggml_elu_impl(
|
5746
|
-
struct ggml_context * ctx,
|
5747
|
-
struct ggml_tensor * a,
|
5748
|
-
bool inplace) {
|
5749
|
-
bool is_node = false;
|
5750
|
-
|
5751
|
-
if (!inplace && (a->grad)) {
|
5752
|
-
is_node = true;
|
5753
|
-
}
|
5754
|
-
|
5755
|
-
struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
5756
|
-
|
5757
|
-
result->op = GGML_OP_ELU;
|
5758
|
-
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
5759
|
-
result->src[0] = a;
|
5760
|
-
result->src[1] = NULL;
|
5761
|
-
|
5762
|
-
return result;
|
5763
|
-
}
|
5764
|
-
|
5765
5659
|
struct ggml_tensor * ggml_elu(
|
5766
5660
|
struct ggml_context * ctx,
|
5767
5661
|
struct ggml_tensor * a) {
|
5768
|
-
return
|
5662
|
+
return ggml_unary(ctx, a, GGML_UNARY_OP_ELU);
|
5769
5663
|
}
|
5770
5664
|
|
5771
5665
|
struct ggml_tensor * ggml_elu_inplace(
|
5772
5666
|
struct ggml_context * ctx,
|
5773
5667
|
struct ggml_tensor * a) {
|
5774
|
-
return
|
5668
|
+
return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_ELU);
|
5775
5669
|
}
|
5776
5670
|
|
5777
5671
|
// ggml_relu
|
5778
5672
|
|
5779
|
-
struct ggml_tensor * ggml_relu_impl(
|
5780
|
-
struct ggml_context * ctx,
|
5781
|
-
struct ggml_tensor * a,
|
5782
|
-
bool inplace) {
|
5783
|
-
bool is_node = false;
|
5784
|
-
|
5785
|
-
if (!inplace && (a->grad)) {
|
5786
|
-
is_node = true;
|
5787
|
-
}
|
5788
|
-
|
5789
|
-
struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
5790
|
-
|
5791
|
-
result->op = GGML_OP_RELU;
|
5792
|
-
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
5793
|
-
result->src[0] = a;
|
5794
|
-
result->src[1] = NULL;
|
5795
|
-
|
5796
|
-
return result;
|
5797
|
-
}
|
5798
|
-
|
5799
5673
|
struct ggml_tensor * ggml_relu(
|
5800
5674
|
struct ggml_context * ctx,
|
5801
5675
|
struct ggml_tensor * a) {
|
5802
|
-
return
|
5676
|
+
return ggml_unary(ctx, a, GGML_UNARY_OP_RELU);
|
5803
5677
|
}
|
5804
5678
|
|
5805
5679
|
struct ggml_tensor * ggml_relu_inplace(
|
5806
5680
|
struct ggml_context * ctx,
|
5807
5681
|
struct ggml_tensor * a) {
|
5808
|
-
return
|
5682
|
+
return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_RELU);
|
5809
5683
|
}
|
5810
5684
|
|
5811
5685
|
// ggml_gelu
|
5812
5686
|
|
5813
|
-
struct ggml_tensor * ggml_gelu_impl(
|
5814
|
-
struct ggml_context * ctx,
|
5815
|
-
struct ggml_tensor * a,
|
5816
|
-
bool inplace) {
|
5817
|
-
bool is_node = false;
|
5818
|
-
|
5819
|
-
if (!inplace && (a->grad)) {
|
5820
|
-
is_node = true;
|
5821
|
-
}
|
5822
|
-
|
5823
|
-
struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
5824
|
-
|
5825
|
-
result->op = GGML_OP_GELU;
|
5826
|
-
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
5827
|
-
result->src[0] = a;
|
5828
|
-
result->src[1] = NULL;
|
5829
|
-
|
5830
|
-
return result;
|
5831
|
-
}
|
5832
|
-
|
5833
5687
|
struct ggml_tensor * ggml_gelu(
|
5834
5688
|
struct ggml_context * ctx,
|
5835
5689
|
struct ggml_tensor * a) {
|
5836
|
-
return
|
5690
|
+
return ggml_unary(ctx, a, GGML_UNARY_OP_GELU);
|
5837
5691
|
}
|
5838
5692
|
|
5839
5693
|
struct ggml_tensor * ggml_gelu_inplace(
|
5840
5694
|
struct ggml_context * ctx,
|
5841
5695
|
struct ggml_tensor * a) {
|
5842
|
-
return
|
5696
|
+
return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_GELU);
|
5843
5697
|
}
|
5844
5698
|
|
5845
5699
|
// ggml_gelu_quick
|
5846
5700
|
|
5847
|
-
struct ggml_tensor * ggml_gelu_quick_impl(
|
5848
|
-
struct ggml_context * ctx,
|
5849
|
-
struct ggml_tensor * a,
|
5850
|
-
bool inplace) {
|
5851
|
-
bool is_node = false;
|
5852
|
-
|
5853
|
-
if (!inplace && (a->grad)) {
|
5854
|
-
is_node = true;
|
5855
|
-
}
|
5856
|
-
|
5857
|
-
struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
5858
|
-
|
5859
|
-
result->op = GGML_OP_GELU_QUICK;
|
5860
|
-
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
5861
|
-
result->src[0] = a;
|
5862
|
-
result->src[1] = NULL;
|
5863
|
-
|
5864
|
-
return result;
|
5865
|
-
}
|
5866
|
-
|
5867
5701
|
struct ggml_tensor * ggml_gelu_quick(
|
5868
5702
|
struct ggml_context * ctx,
|
5869
5703
|
struct ggml_tensor * a) {
|
5870
|
-
return
|
5704
|
+
return ggml_unary(ctx, a, GGML_UNARY_OP_GELU_QUICK);
|
5871
5705
|
}
|
5872
5706
|
|
5873
5707
|
struct ggml_tensor * ggml_gelu_quick_inplace(
|
5874
5708
|
struct ggml_context * ctx,
|
5875
5709
|
struct ggml_tensor * a) {
|
5876
|
-
return
|
5710
|
+
return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_GELU_QUICK);
|
5877
5711
|
}
|
5878
5712
|
|
5879
5713
|
// ggml_silu
|
5880
5714
|
|
5881
|
-
struct ggml_tensor * ggml_silu_impl(
|
5882
|
-
struct ggml_context * ctx,
|
5883
|
-
struct ggml_tensor * a,
|
5884
|
-
bool inplace) {
|
5885
|
-
bool is_node = false;
|
5886
|
-
|
5887
|
-
if (!inplace && (a->grad)) {
|
5888
|
-
is_node = true;
|
5889
|
-
}
|
5890
|
-
|
5891
|
-
struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
5892
|
-
|
5893
|
-
result->op = GGML_OP_SILU;
|
5894
|
-
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
5895
|
-
result->src[0] = a;
|
5896
|
-
result->src[1] = NULL;
|
5897
|
-
|
5898
|
-
return result;
|
5899
|
-
}
|
5900
|
-
|
5901
5715
|
struct ggml_tensor * ggml_silu(
|
5902
5716
|
struct ggml_context * ctx,
|
5903
5717
|
struct ggml_tensor * a) {
|
5904
|
-
return
|
5718
|
+
return ggml_unary(ctx, a, GGML_UNARY_OP_SILU);
|
5905
5719
|
}
|
5906
5720
|
|
5907
5721
|
struct ggml_tensor * ggml_silu_inplace(
|
5908
5722
|
struct ggml_context * ctx,
|
5909
5723
|
struct ggml_tensor * a) {
|
5910
|
-
return
|
5724
|
+
return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_SILU);
|
5911
5725
|
}
|
5912
5726
|
|
5913
5727
|
// ggml_silu_back
|
@@ -5935,7 +5749,7 @@ struct ggml_tensor * ggml_silu_back(
|
|
5935
5749
|
|
5936
5750
|
// ggml_norm
|
5937
5751
|
|
5938
|
-
struct ggml_tensor * ggml_norm_impl(
|
5752
|
+
static struct ggml_tensor * ggml_norm_impl(
|
5939
5753
|
struct ggml_context * ctx,
|
5940
5754
|
struct ggml_tensor * a,
|
5941
5755
|
bool inplace) {
|
@@ -5948,10 +5762,11 @@ struct ggml_tensor * ggml_norm_impl(
|
|
5948
5762
|
|
5949
5763
|
struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
5950
5764
|
|
5765
|
+
// TODO: maybe store epsilon here?
|
5766
|
+
|
5951
5767
|
result->op = GGML_OP_NORM;
|
5952
5768
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
5953
5769
|
result->src[0] = a;
|
5954
|
-
result->src[1] = NULL; // TODO: maybe store epsilon here?
|
5955
5770
|
|
5956
5771
|
return result;
|
5957
5772
|
}
|
@@ -5968,9 +5783,10 @@ struct ggml_tensor * ggml_norm_inplace(
|
|
5968
5783
|
return ggml_norm_impl(ctx, a, true);
|
5969
5784
|
}
|
5970
5785
|
|
5971
|
-
struct ggml_tensor * ggml_rms_norm_impl(
|
5786
|
+
static struct ggml_tensor * ggml_rms_norm_impl(
|
5972
5787
|
struct ggml_context * ctx,
|
5973
5788
|
struct ggml_tensor * a,
|
5789
|
+
float eps,
|
5974
5790
|
bool inplace) {
|
5975
5791
|
bool is_node = false;
|
5976
5792
|
|
@@ -5980,24 +5796,27 @@ struct ggml_tensor * ggml_rms_norm_impl(
|
|
5980
5796
|
|
5981
5797
|
struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
5982
5798
|
|
5799
|
+
ggml_set_op_params(result, &eps, sizeof(eps));
|
5800
|
+
|
5983
5801
|
result->op = GGML_OP_RMS_NORM;
|
5984
5802
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
5985
5803
|
result->src[0] = a;
|
5986
|
-
result->src[1] = NULL; // TODO: maybe store epsilon here?
|
5987
5804
|
|
5988
5805
|
return result;
|
5989
5806
|
}
|
5990
5807
|
|
5991
5808
|
struct ggml_tensor * ggml_rms_norm(
|
5992
5809
|
struct ggml_context * ctx,
|
5993
|
-
struct ggml_tensor * a
|
5994
|
-
|
5810
|
+
struct ggml_tensor * a,
|
5811
|
+
float eps) {
|
5812
|
+
return ggml_rms_norm_impl(ctx, a, eps, false);
|
5995
5813
|
}
|
5996
5814
|
|
5997
5815
|
struct ggml_tensor * ggml_rms_norm_inplace(
|
5998
5816
|
struct ggml_context * ctx,
|
5999
|
-
struct ggml_tensor * a
|
6000
|
-
|
5817
|
+
struct ggml_tensor * a,
|
5818
|
+
float eps) {
|
5819
|
+
return ggml_rms_norm_impl(ctx, a, eps, true);
|
6001
5820
|
}
|
6002
5821
|
|
6003
5822
|
struct ggml_tensor * ggml_rms_norm_back(
|
@@ -6076,7 +5895,7 @@ struct ggml_tensor * ggml_out_prod(
|
|
6076
5895
|
|
6077
5896
|
// ggml_scale
|
6078
5897
|
|
6079
|
-
struct ggml_tensor * ggml_scale_impl(
|
5898
|
+
static struct ggml_tensor * ggml_scale_impl(
|
6080
5899
|
struct ggml_context * ctx,
|
6081
5900
|
struct ggml_tensor * a,
|
6082
5901
|
struct ggml_tensor * b,
|
@@ -6116,7 +5935,7 @@ struct ggml_tensor * ggml_scale_inplace(
|
|
6116
5935
|
|
6117
5936
|
// ggml_set
|
6118
5937
|
|
6119
|
-
struct ggml_tensor * ggml_set_impl(
|
5938
|
+
static struct ggml_tensor * ggml_set_impl(
|
6120
5939
|
struct ggml_context * ctx,
|
6121
5940
|
struct ggml_tensor * a,
|
6122
5941
|
struct ggml_tensor * b,
|
@@ -6136,23 +5955,13 @@ struct ggml_tensor * ggml_set_impl(
|
|
6136
5955
|
// make a view of the destination
|
6137
5956
|
struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
6138
5957
|
|
6139
|
-
|
6140
|
-
|
6141
|
-
struct ggml_tensor * c = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, 5);
|
6142
|
-
|
6143
|
-
(( int32_t * ) c->data)[0] = nb1;
|
6144
|
-
(( int32_t * ) c->data)[1] = nb2;
|
6145
|
-
(( int32_t * ) c->data)[2] = nb3;
|
6146
|
-
(( int32_t * ) c->data)[3] = offset;
|
6147
|
-
(( int32_t * ) c->data)[4] = inplace ? 1 : 0;
|
6148
|
-
|
6149
|
-
ggml_scratch_load(ctx);
|
5958
|
+
int32_t params[] = { nb1, nb2, nb3, offset, inplace ? 1 : 0 };
|
5959
|
+
ggml_set_op_params(result, params, sizeof(params));
|
6150
5960
|
|
6151
5961
|
result->op = GGML_OP_SET;
|
6152
5962
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
6153
5963
|
result->src[0] = a;
|
6154
5964
|
result->src[1] = b;
|
6155
|
-
result->src[2] = c;
|
6156
5965
|
|
6157
5966
|
return result;
|
6158
5967
|
}
|
@@ -6216,7 +6025,7 @@ struct ggml_tensor * ggml_set_2d_inplace(
|
|
6216
6025
|
|
6217
6026
|
// ggml_cpy
|
6218
6027
|
|
6219
|
-
struct ggml_tensor * ggml_cpy_impl(
|
6028
|
+
static struct ggml_tensor * ggml_cpy_impl(
|
6220
6029
|
struct ggml_context * ctx,
|
6221
6030
|
struct ggml_tensor * a,
|
6222
6031
|
struct ggml_tensor * b,
|
@@ -6261,7 +6070,7 @@ struct ggml_tensor * ggml_cpy_inplace(
|
|
6261
6070
|
|
6262
6071
|
// ggml_cont
|
6263
6072
|
|
6264
|
-
struct ggml_tensor * ggml_cont_impl(
|
6073
|
+
static struct ggml_tensor * ggml_cont_impl(
|
6265
6074
|
struct ggml_context * ctx,
|
6266
6075
|
struct ggml_tensor * a,
|
6267
6076
|
bool inplace) {
|
@@ -6277,7 +6086,6 @@ struct ggml_tensor * ggml_cont_impl(
|
|
6277
6086
|
result->op = GGML_OP_CONT;
|
6278
6087
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
6279
6088
|
result->src[0] = a;
|
6280
|
-
result->src[1] = NULL;
|
6281
6089
|
|
6282
6090
|
return result;
|
6283
6091
|
}
|
@@ -6321,7 +6129,6 @@ struct ggml_tensor * ggml_reshape(
|
|
6321
6129
|
result->op = GGML_OP_RESHAPE;
|
6322
6130
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
6323
6131
|
result->src[0] = a;
|
6324
|
-
result->src[1] = NULL;
|
6325
6132
|
|
6326
6133
|
return result;
|
6327
6134
|
}
|
@@ -6346,7 +6153,6 @@ struct ggml_tensor * ggml_reshape_1d(
|
|
6346
6153
|
result->op = GGML_OP_RESHAPE;
|
6347
6154
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
6348
6155
|
result->src[0] = a;
|
6349
|
-
result->src[1] = NULL;
|
6350
6156
|
|
6351
6157
|
return result;
|
6352
6158
|
}
|
@@ -6372,7 +6178,6 @@ struct ggml_tensor * ggml_reshape_2d(
|
|
6372
6178
|
result->op = GGML_OP_RESHAPE;
|
6373
6179
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
6374
6180
|
result->src[0] = a;
|
6375
|
-
result->src[1] = NULL;
|
6376
6181
|
|
6377
6182
|
return result;
|
6378
6183
|
}
|
@@ -6399,7 +6204,6 @@ struct ggml_tensor * ggml_reshape_3d(
|
|
6399
6204
|
result->op = GGML_OP_RESHAPE;
|
6400
6205
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
6401
6206
|
result->src[0] = a;
|
6402
|
-
result->src[1] = NULL;
|
6403
6207
|
|
6404
6208
|
return result;
|
6405
6209
|
}
|
@@ -6428,7 +6232,6 @@ struct ggml_tensor * ggml_reshape_4d(
|
|
6428
6232
|
result->op = GGML_OP_RESHAPE;
|
6429
6233
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
6430
6234
|
result->src[0] = a;
|
6431
|
-
result->src[1] = NULL;
|
6432
6235
|
|
6433
6236
|
return result;
|
6434
6237
|
}
|
@@ -6450,19 +6253,11 @@ struct ggml_tensor * ggml_view_1d(
|
|
6450
6253
|
struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, 1, &ne0, (char *) a->data + offset);
|
6451
6254
|
ggml_format_name(result, "%s (view)", a->name);
|
6452
6255
|
|
6453
|
-
|
6454
|
-
|
6455
|
-
struct ggml_tensor * offs = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, 2);
|
6456
|
-
ggml_set_name(offs, "offset");
|
6457
|
-
memcpy(offs->data, &offset, 2*sizeof(int32_t));
|
6458
|
-
|
6459
|
-
ggml_scratch_load(ctx);
|
6256
|
+
ggml_set_op_params(result, &offset, sizeof(offset));
|
6460
6257
|
|
6461
6258
|
result->op = GGML_OP_VIEW;
|
6462
6259
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
6463
6260
|
result->src[0] = a;
|
6464
|
-
result->src[1] = NULL;
|
6465
|
-
result->src[2] = offs;
|
6466
6261
|
|
6467
6262
|
return result;
|
6468
6263
|
}
|
@@ -6488,13 +6283,7 @@ struct ggml_tensor * ggml_view_2d(
|
|
6488
6283
|
struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, 2, ne, (char *) a->data + offset);
|
6489
6284
|
ggml_format_name(result, "%s (view)", a->name);
|
6490
6285
|
|
6491
|
-
|
6492
|
-
|
6493
|
-
struct ggml_tensor * offs = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, 2);
|
6494
|
-
ggml_set_name(offs, "offset");
|
6495
|
-
memcpy(offs->data, &offset, 2*sizeof(int32_t));
|
6496
|
-
|
6497
|
-
ggml_scratch_load(ctx);
|
6286
|
+
ggml_set_op_params(result, &offset, sizeof(offset));
|
6498
6287
|
|
6499
6288
|
result->nb[1] = nb1;
|
6500
6289
|
result->nb[2] = result->nb[1]*ne1;
|
@@ -6503,8 +6292,6 @@ struct ggml_tensor * ggml_view_2d(
|
|
6503
6292
|
result->op = GGML_OP_VIEW;
|
6504
6293
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
6505
6294
|
result->src[0] = a;
|
6506
|
-
result->src[1] = NULL;
|
6507
|
-
result->src[2] = offs;
|
6508
6295
|
|
6509
6296
|
return result;
|
6510
6297
|
}
|
@@ -6532,13 +6319,7 @@ struct ggml_tensor * ggml_view_3d(
|
|
6532
6319
|
struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, 3, ne, (char *) a->data + offset);
|
6533
6320
|
ggml_format_name(result, "%s (view)", a->name);
|
6534
6321
|
|
6535
|
-
|
6536
|
-
|
6537
|
-
struct ggml_tensor * offs = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, 2);
|
6538
|
-
ggml_set_name(offs, "offset");
|
6539
|
-
memcpy(offs->data, &offset, 2*sizeof(int32_t));
|
6540
|
-
|
6541
|
-
ggml_scratch_load(ctx);
|
6322
|
+
ggml_set_op_params(result, &offset, sizeof(offset));
|
6542
6323
|
|
6543
6324
|
result->nb[1] = nb1;
|
6544
6325
|
result->nb[2] = nb2;
|
@@ -6547,8 +6328,6 @@ struct ggml_tensor * ggml_view_3d(
|
|
6547
6328
|
result->op = GGML_OP_VIEW;
|
6548
6329
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
6549
6330
|
result->src[0] = a;
|
6550
|
-
result->src[1] = NULL;
|
6551
|
-
result->src[2] = offs;
|
6552
6331
|
|
6553
6332
|
return result;
|
6554
6333
|
}
|
@@ -6578,13 +6357,7 @@ struct ggml_tensor * ggml_view_4d(
|
|
6578
6357
|
struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, 4, ne, (char *) a->data + offset);
|
6579
6358
|
ggml_format_name(result, "%s (view)", a->name);
|
6580
6359
|
|
6581
|
-
|
6582
|
-
|
6583
|
-
struct ggml_tensor * offs = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, 2);
|
6584
|
-
ggml_set_name(offs, "offset");
|
6585
|
-
memcpy(offs->data, &offset, 2*sizeof(int32_t));
|
6586
|
-
|
6587
|
-
ggml_scratch_load(ctx);
|
6360
|
+
ggml_set_op_params(result, &offset, sizeof(offset));
|
6588
6361
|
|
6589
6362
|
result->nb[1] = nb1;
|
6590
6363
|
result->nb[2] = nb2;
|
@@ -6593,8 +6366,6 @@ struct ggml_tensor * ggml_view_4d(
|
|
6593
6366
|
result->op = GGML_OP_VIEW;
|
6594
6367
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
6595
6368
|
result->src[0] = a;
|
6596
|
-
result->src[1] = NULL;
|
6597
|
-
result->src[2] = offs;
|
6598
6369
|
|
6599
6370
|
return result;
|
6600
6371
|
}
|
@@ -6655,22 +6426,9 @@ struct ggml_tensor * ggml_permute(
|
|
6655
6426
|
result->op = GGML_OP_PERMUTE;
|
6656
6427
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
6657
6428
|
result->src[0] = a;
|
6658
|
-
result->src[1] = NULL;
|
6659
|
-
|
6660
|
-
if (is_node) {
|
6661
|
-
ggml_scratch_save(ctx);
|
6662
6429
|
|
6663
|
-
|
6664
|
-
|
6665
|
-
((int32_t *) b->data)[0] = axis0;
|
6666
|
-
((int32_t *) b->data)[1] = axis1;
|
6667
|
-
((int32_t *) b->data)[2] = axis2;
|
6668
|
-
((int32_t *) b->data)[3] = axis3;
|
6669
|
-
|
6670
|
-
ggml_scratch_load(ctx);
|
6671
|
-
|
6672
|
-
result->src[2] = b;
|
6673
|
-
}
|
6430
|
+
int32_t params[] = { axis0, axis1, axis2, axis3 };
|
6431
|
+
ggml_set_op_params(result, ¶ms, sizeof(params));
|
6674
6432
|
|
6675
6433
|
return result;
|
6676
6434
|
}
|
@@ -6698,7 +6456,6 @@ struct ggml_tensor * ggml_transpose(
|
|
6698
6456
|
result->op = GGML_OP_TRANSPOSE;
|
6699
6457
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
6700
6458
|
result->src[0] = a;
|
6701
|
-
result->src[1] = NULL;
|
6702
6459
|
|
6703
6460
|
return result;
|
6704
6461
|
}
|
@@ -6776,7 +6533,6 @@ struct ggml_tensor * ggml_diag(
|
|
6776
6533
|
result->op = GGML_OP_DIAG;
|
6777
6534
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
6778
6535
|
result->src[0] = a;
|
6779
|
-
result->src[1] = NULL;
|
6780
6536
|
|
6781
6537
|
return result;
|
6782
6538
|
}
|
@@ -6784,7 +6540,7 @@ struct ggml_tensor * ggml_diag(
|
|
6784
6540
|
|
6785
6541
|
// ggml_diag_mask_inf
|
6786
6542
|
|
6787
|
-
struct ggml_tensor * ggml_diag_mask_inf_impl(
|
6543
|
+
static struct ggml_tensor * ggml_diag_mask_inf_impl(
|
6788
6544
|
struct ggml_context * ctx,
|
6789
6545
|
struct ggml_tensor * a,
|
6790
6546
|
int n_past,
|
@@ -6797,19 +6553,12 @@ struct ggml_tensor * ggml_diag_mask_inf_impl(
|
|
6797
6553
|
|
6798
6554
|
struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
6799
6555
|
|
6800
|
-
|
6801
|
-
|
6802
|
-
struct ggml_tensor * b = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, 2);
|
6803
|
-
|
6804
|
-
((int32_t *) b->data)[0] = n_past;
|
6805
|
-
((int32_t *) b->data)[1] = inplace ? 1 : 0;
|
6806
|
-
|
6807
|
-
ggml_scratch_load(ctx);
|
6556
|
+
int32_t params[] = { n_past, inplace ? 1 : 0 };
|
6557
|
+
ggml_set_op_params(result, ¶ms, sizeof(params));
|
6808
6558
|
|
6809
6559
|
result->op = GGML_OP_DIAG_MASK_INF;
|
6810
6560
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
6811
6561
|
result->src[0] = a;
|
6812
|
-
result->src[1] = b;
|
6813
6562
|
|
6814
6563
|
return result;
|
6815
6564
|
}
|
@@ -6831,7 +6580,7 @@ struct ggml_tensor * ggml_diag_mask_inf_inplace(
|
|
6831
6580
|
|
6832
6581
|
// ggml_diag_mask_zero
|
6833
6582
|
|
6834
|
-
struct ggml_tensor * ggml_diag_mask_zero_impl(
|
6583
|
+
static struct ggml_tensor * ggml_diag_mask_zero_impl(
|
6835
6584
|
struct ggml_context * ctx,
|
6836
6585
|
struct ggml_tensor * a,
|
6837
6586
|
int n_past,
|
@@ -6844,20 +6593,12 @@ struct ggml_tensor * ggml_diag_mask_zero_impl(
|
|
6844
6593
|
|
6845
6594
|
struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
6846
6595
|
|
6847
|
-
|
6848
|
-
|
6849
|
-
struct ggml_tensor * b = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, 2);
|
6850
|
-
ggml_set_name(b, "n_past, inplace");
|
6851
|
-
|
6852
|
-
((int32_t *) b->data)[0] = n_past;
|
6853
|
-
((int32_t *) b->data)[1] = inplace ? 1 : 0;
|
6854
|
-
|
6855
|
-
ggml_scratch_load(ctx);
|
6596
|
+
int32_t params[] = { n_past, inplace ? 1 : 0 };
|
6597
|
+
ggml_set_op_params(result, ¶ms, sizeof(params));
|
6856
6598
|
|
6857
6599
|
result->op = GGML_OP_DIAG_MASK_ZERO;
|
6858
6600
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
6859
6601
|
result->src[0] = a;
|
6860
|
-
result->src[1] = b;
|
6861
6602
|
|
6862
6603
|
return result;
|
6863
6604
|
}
|
@@ -6878,7 +6619,7 @@ struct ggml_tensor * ggml_diag_mask_zero_inplace(
|
|
6878
6619
|
|
6879
6620
|
// ggml_soft_max
|
6880
6621
|
|
6881
|
-
struct ggml_tensor * ggml_soft_max_impl(
|
6622
|
+
static struct ggml_tensor * ggml_soft_max_impl(
|
6882
6623
|
struct ggml_context * ctx,
|
6883
6624
|
struct ggml_tensor * a,
|
6884
6625
|
bool inplace) {
|
@@ -6893,7 +6634,6 @@ struct ggml_tensor * ggml_soft_max_impl(
|
|
6893
6634
|
result->op = GGML_OP_SOFT_MAX;
|
6894
6635
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
6895
6636
|
result->src[0] = a;
|
6896
|
-
result->src[1] = NULL;
|
6897
6637
|
|
6898
6638
|
return result;
|
6899
6639
|
}
|
@@ -6913,7 +6653,7 @@ struct ggml_tensor * ggml_soft_max_inplace(
|
|
6913
6653
|
|
6914
6654
|
// ggml_soft_max_back
|
6915
6655
|
|
6916
|
-
struct ggml_tensor * ggml_soft_max_back_impl(
|
6656
|
+
static struct ggml_tensor * ggml_soft_max_back_impl(
|
6917
6657
|
struct ggml_context * ctx,
|
6918
6658
|
struct ggml_tensor * a,
|
6919
6659
|
struct ggml_tensor * b,
|
@@ -6950,7 +6690,7 @@ struct ggml_tensor * ggml_soft_max_back_inplace(
|
|
6950
6690
|
|
6951
6691
|
// ggml_rope
|
6952
6692
|
|
6953
|
-
struct ggml_tensor * ggml_rope_impl(
|
6693
|
+
static struct ggml_tensor * ggml_rope_impl(
|
6954
6694
|
struct ggml_context * ctx,
|
6955
6695
|
struct ggml_tensor * a,
|
6956
6696
|
int n_past,
|
@@ -6969,23 +6709,14 @@ struct ggml_tensor * ggml_rope_impl(
|
|
6969
6709
|
|
6970
6710
|
struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
6971
6711
|
|
6972
|
-
|
6973
|
-
|
6974
|
-
|
6975
|
-
|
6976
|
-
((int32_t *) b->data)[0] = n_past;
|
6977
|
-
((int32_t *) b->data)[1] = n_dims;
|
6978
|
-
((int32_t *) b->data)[2] = mode;
|
6979
|
-
((int32_t *) b->data)[3] = n_ctx;
|
6980
|
-
memcpy((int32_t *) b->data + 4, &freq_base, sizeof(float));
|
6981
|
-
memcpy((int32_t *) b->data + 5, &freq_scale, sizeof(float));
|
6982
|
-
|
6983
|
-
ggml_scratch_load(ctx);
|
6712
|
+
int32_t params[6] = { n_past, n_dims, mode, n_ctx };
|
6713
|
+
memcpy(params + 4, &freq_base, sizeof(float));
|
6714
|
+
memcpy(params + 5, &freq_scale, sizeof(float));
|
6715
|
+
ggml_set_op_params(result, ¶ms, sizeof(params));
|
6984
6716
|
|
6985
6717
|
result->op = GGML_OP_ROPE;
|
6986
6718
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
6987
6719
|
result->src[0] = a;
|
6988
|
-
result->src[1] = b;
|
6989
6720
|
|
6990
6721
|
return result;
|
6991
6722
|
}
|
@@ -7042,22 +6773,12 @@ struct ggml_tensor * ggml_rope_back(
|
|
7042
6773
|
|
7043
6774
|
struct ggml_tensor * result = ggml_dup_tensor(ctx, a);
|
7044
6775
|
|
7045
|
-
|
7046
|
-
|
7047
|
-
struct ggml_tensor * b = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, 4);
|
7048
|
-
ggml_set_name(b, "n_past, n_dims, mode");
|
7049
|
-
|
7050
|
-
((int32_t *) b->data)[0] = n_past;
|
7051
|
-
((int32_t *) b->data)[1] = n_dims;
|
7052
|
-
((int32_t *) b->data)[2] = mode;
|
7053
|
-
((int32_t *) b->data)[3] = n_ctx;
|
7054
|
-
|
7055
|
-
ggml_scratch_load(ctx);
|
6776
|
+
int32_t params[] = { n_past, n_dims, mode, n_ctx };
|
6777
|
+
ggml_set_op_params(result, ¶ms, sizeof(params));
|
7056
6778
|
|
7057
6779
|
result->op = GGML_OP_ROPE_BACK;
|
7058
6780
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
7059
6781
|
result->src[0] = a;
|
7060
|
-
result->src[1] = b;
|
7061
6782
|
|
7062
6783
|
return result;
|
7063
6784
|
}
|
@@ -7082,21 +6803,13 @@ struct ggml_tensor * ggml_alibi(
|
|
7082
6803
|
//struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
7083
6804
|
struct ggml_tensor * result = ggml_view_tensor(ctx, a);
|
7084
6805
|
|
7085
|
-
|
7086
|
-
|
7087
|
-
|
7088
|
-
|
7089
|
-
((int32_t *) b->data)[0] = n_past;
|
7090
|
-
((int32_t *) b->data)[1] = n_head;
|
7091
|
-
GGML_ASSERT(sizeof(float) == sizeof(int32_t));
|
7092
|
-
(((float *) b->data)[2]) = bias_max;
|
7093
|
-
|
7094
|
-
ggml_scratch_load(ctx);
|
6806
|
+
int32_t op_params[3] = { n_past, n_head };
|
6807
|
+
memcpy(op_params + 2, &bias_max, sizeof(float));
|
6808
|
+
ggml_set_op_params(result, &op_params, sizeof(op_params));
|
7095
6809
|
|
7096
6810
|
result->op = GGML_OP_ALIBI;
|
7097
6811
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
7098
6812
|
result->src[0] = a;
|
7099
|
-
result->src[1] = b;
|
7100
6813
|
|
7101
6814
|
return result;
|
7102
6815
|
}
|
@@ -7118,19 +6831,12 @@ struct ggml_tensor * ggml_clamp(
|
|
7118
6831
|
// TODO: when implement backward, fix this:
|
7119
6832
|
struct ggml_tensor * result = ggml_view_tensor(ctx, a);
|
7120
6833
|
|
7121
|
-
|
7122
|
-
|
7123
|
-
struct ggml_tensor * b = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, 2);
|
7124
|
-
|
7125
|
-
((float *) b->data)[0] = min;
|
7126
|
-
((float *) b->data)[1] = max;
|
7127
|
-
|
7128
|
-
ggml_scratch_load(ctx);
|
6834
|
+
float params[] = { min, max };
|
6835
|
+
ggml_set_op_params(result, ¶ms, sizeof(params));
|
7129
6836
|
|
7130
6837
|
result->op = GGML_OP_CLAMP;
|
7131
6838
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
7132
6839
|
result->src[0] = a;
|
7133
|
-
result->src[1] = b;
|
7134
6840
|
|
7135
6841
|
return result;
|
7136
6842
|
}
|
@@ -7163,18 +6869,13 @@ GGML_API struct ggml_tensor * ggml_conv_1d(
|
|
7163
6869
|
};
|
7164
6870
|
struct ggml_tensor* result = ggml_new_tensor(ctx, GGML_TYPE_F32, 2, ne);
|
7165
6871
|
|
7166
|
-
|
7167
|
-
|
7168
|
-
((int32_t*)c->data)[0] = s0;
|
7169
|
-
((int32_t*)c->data)[1] = p0;
|
7170
|
-
((int32_t*)c->data)[2] = d0;
|
7171
|
-
ggml_scratch_load(ctx);
|
6872
|
+
int32_t params[] = { s0, p0, d0 };
|
6873
|
+
ggml_set_op_params(result, ¶ms, sizeof(params));
|
7172
6874
|
|
7173
6875
|
result->op = GGML_OP_CONV_1D;
|
7174
6876
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
7175
6877
|
result->src[0] = a;
|
7176
6878
|
result->src[1] = b;
|
7177
|
-
result->src[2] = c;
|
7178
6879
|
|
7179
6880
|
return result;
|
7180
6881
|
}
|
@@ -7207,21 +6908,13 @@ struct ggml_tensor* ggml_conv_2d(
|
|
7207
6908
|
};
|
7208
6909
|
struct ggml_tensor* result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
|
7209
6910
|
|
7210
|
-
|
7211
|
-
|
7212
|
-
((int32_t*)c->data)[0] = s0;
|
7213
|
-
((int32_t*)c->data)[1] = s1;
|
7214
|
-
((int32_t*)c->data)[2] = p0;
|
7215
|
-
((int32_t*)c->data)[3] = p1;
|
7216
|
-
((int32_t*)c->data)[4] = d0;
|
7217
|
-
((int32_t*)c->data)[5] = d1;
|
7218
|
-
ggml_scratch_load(ctx);
|
6911
|
+
int32_t params[] = { s0, s1, p0, p1, d0, d1 };
|
6912
|
+
ggml_set_op_params(result, ¶ms, sizeof(params));
|
7219
6913
|
|
7220
6914
|
result->op = GGML_OP_CONV_2D;
|
7221
6915
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
7222
6916
|
result->src[0] = a;
|
7223
6917
|
result->src[1] = b;
|
7224
|
-
result->src[2] = c;
|
7225
6918
|
|
7226
6919
|
return result;
|
7227
6920
|
|
@@ -7245,7 +6938,7 @@ static int64_t ggml_calc_pool_output_size(int64_t ins, int ks, int s, int p) {
|
|
7245
6938
|
return (ins + 2 * p - ks) / s + 1;
|
7246
6939
|
}
|
7247
6940
|
|
7248
|
-
//
|
6941
|
+
// ggml_pool_1d
|
7249
6942
|
|
7250
6943
|
struct ggml_tensor* ggml_pool_1d(
|
7251
6944
|
struct ggml_context * ctx,
|
@@ -7268,18 +6961,12 @@ struct ggml_tensor* ggml_pool_1d(
|
|
7268
6961
|
};
|
7269
6962
|
struct ggml_tensor* result = ggml_new_tensor(ctx, GGML_TYPE_F32, 2, ne);
|
7270
6963
|
|
7271
|
-
|
7272
|
-
|
7273
|
-
((int32_t*)c->data)[0] = op;
|
7274
|
-
((int32_t*)c->data)[1] = k0;
|
7275
|
-
((int32_t*)c->data)[2] = s0;
|
7276
|
-
((int32_t*)c->data)[3] = p0;
|
7277
|
-
ggml_scratch_load(ctx);
|
6964
|
+
int32_t params[] = { op, k0, s0, p0 };
|
6965
|
+
ggml_set_op_params(result, ¶ms, sizeof(params));
|
7278
6966
|
|
7279
6967
|
result->op = GGML_OP_POOL_1D;
|
7280
6968
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
7281
6969
|
result->src[0] = a;
|
7282
|
-
result->src[1] = c;
|
7283
6970
|
|
7284
6971
|
return result;
|
7285
6972
|
}
|
@@ -7311,21 +6998,12 @@ struct ggml_tensor* ggml_pool_2d(
|
|
7311
6998
|
};
|
7312
6999
|
struct ggml_tensor* result = ggml_new_tensor(ctx, GGML_TYPE_F32, 3, ne);
|
7313
7000
|
|
7314
|
-
|
7315
|
-
|
7316
|
-
((int32_t*)c->data)[0] = op;
|
7317
|
-
((int32_t*)c->data)[1] = k0;
|
7318
|
-
((int32_t*)c->data)[2] = k1;
|
7319
|
-
((int32_t*)c->data)[3] = s0;
|
7320
|
-
((int32_t*)c->data)[4] = s1;
|
7321
|
-
((int32_t*)c->data)[5] = p0;
|
7322
|
-
((int32_t*)c->data)[6] = p1;
|
7323
|
-
ggml_scratch_load(ctx);
|
7001
|
+
int32_t params[] = { op, k0, k1, s0, s1, p0, p1 };
|
7002
|
+
ggml_set_op_params(result, ¶ms, sizeof(params));
|
7324
7003
|
|
7325
7004
|
result->op = GGML_OP_POOL_2D;
|
7326
7005
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
7327
7006
|
result->src[0] = a;
|
7328
|
-
result->src[1] = c;
|
7329
7007
|
|
7330
7008
|
return result;
|
7331
7009
|
}
|
@@ -7348,14 +7026,16 @@ struct ggml_tensor * ggml_flash_attn(
|
|
7348
7026
|
}
|
7349
7027
|
|
7350
7028
|
//struct ggml_tensor * result = ggml_dup_tensor(ctx, q);
|
7351
|
-
struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32,
|
7029
|
+
struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, q->n_dims, q->ne);
|
7030
|
+
|
7031
|
+
int32_t t = masked ? 1 : 0;
|
7032
|
+
ggml_set_op_params(result, &t, sizeof(t));
|
7352
7033
|
|
7353
7034
|
result->op = GGML_OP_FLASH_ATTN;
|
7354
7035
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
7355
7036
|
result->src[0] = q;
|
7356
7037
|
result->src[1] = k;
|
7357
7038
|
result->src[2] = v;
|
7358
|
-
result->src[3] = ggml_new_i32(ctx, masked ? 1 : 0);
|
7359
7039
|
|
7360
7040
|
return result;
|
7361
7041
|
}
|
@@ -7379,7 +7059,7 @@ struct ggml_tensor * ggml_flash_ff(
|
|
7379
7059
|
}
|
7380
7060
|
|
7381
7061
|
//struct ggml_tensor * result = ggml_dup_tensor(ctx, a);
|
7382
|
-
struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32,
|
7062
|
+
struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, a->n_dims, a->ne);
|
7383
7063
|
|
7384
7064
|
result->op = GGML_OP_FLASH_FF;
|
7385
7065
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
@@ -7445,13 +7125,15 @@ struct ggml_tensor * ggml_flash_attn_back(
|
|
7445
7125
|
|
7446
7126
|
struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
|
7447
7127
|
|
7128
|
+
int32_t masked_i = masked ? 1 : 0;
|
7129
|
+
ggml_set_op_params(result, &masked_i, sizeof(masked_i));
|
7130
|
+
|
7448
7131
|
result->op = GGML_OP_FLASH_ATTN_BACK;
|
7449
7132
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
7450
7133
|
result->src[0] = q;
|
7451
7134
|
result->src[1] = k;
|
7452
7135
|
result->src[2] = v;
|
7453
7136
|
result->src[3] = d;
|
7454
|
-
result->src[4] = ggml_new_i32(ctx, masked ? 1 : 0);
|
7455
7137
|
|
7456
7138
|
return result;
|
7457
7139
|
}
|
@@ -7484,21 +7166,12 @@ struct ggml_tensor * ggml_win_part(
|
|
7484
7166
|
|
7485
7167
|
struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
|
7486
7168
|
|
7487
|
-
|
7488
|
-
|
7489
|
-
struct ggml_tensor * b = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, 3);
|
7490
|
-
|
7491
|
-
((int32_t *) b->data)[0] = npx;
|
7492
|
-
((int32_t *) b->data)[1] = npy;
|
7493
|
-
((int32_t *) b->data)[2] = w;
|
7494
|
-
|
7495
|
-
ggml_scratch_load(ctx);
|
7169
|
+
int32_t params[] = { npx, npy, w };
|
7170
|
+
ggml_set_op_params(result, ¶ms, sizeof(params));
|
7496
7171
|
|
7497
7172
|
result->op = GGML_OP_WIN_PART;
|
7498
7173
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
7499
7174
|
result->src[0] = a;
|
7500
|
-
result->src[1] = NULL;
|
7501
|
-
result->src[2] = b;
|
7502
7175
|
|
7503
7176
|
return result;
|
7504
7177
|
}
|
@@ -7523,26 +7196,57 @@ struct ggml_tensor * ggml_win_unpart(
|
|
7523
7196
|
const int64_t ne[4] = { a->ne[0], w0, h0, 1, };
|
7524
7197
|
struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 3, ne);
|
7525
7198
|
|
7526
|
-
|
7199
|
+
int32_t params[] = { w };
|
7200
|
+
ggml_set_op_params(result, ¶ms, sizeof(params));
|
7527
7201
|
|
7528
|
-
|
7202
|
+
result->op = GGML_OP_WIN_UNPART;
|
7203
|
+
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
7204
|
+
result->src[0] = a;
|
7529
7205
|
|
7530
|
-
|
7206
|
+
return result;
|
7207
|
+
}
|
7531
7208
|
|
7532
|
-
|
7209
|
+
// gmml_unary
|
7533
7210
|
|
7534
|
-
|
7211
|
+
static struct ggml_tensor * ggml_unary_impl(
|
7212
|
+
struct ggml_context * ctx,
|
7213
|
+
struct ggml_tensor * a,
|
7214
|
+
enum ggml_unary_op op,
|
7215
|
+
bool inplace) {
|
7216
|
+
bool is_node = false;
|
7217
|
+
|
7218
|
+
if (!inplace && (a->grad)) {
|
7219
|
+
is_node = true;
|
7220
|
+
}
|
7221
|
+
|
7222
|
+
struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
7223
|
+
|
7224
|
+
ggml_set_op_params_i32(result, 0, (int32_t) op);
|
7225
|
+
|
7226
|
+
result->op = GGML_OP_UNARY;
|
7535
7227
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
7536
7228
|
result->src[0] = a;
|
7537
|
-
result->src[1] = NULL;
|
7538
|
-
result->src[2] = b;
|
7539
7229
|
|
7540
7230
|
return result;
|
7541
7231
|
}
|
7542
7232
|
|
7233
|
+
struct ggml_tensor * ggml_unary(
|
7234
|
+
struct ggml_context * ctx,
|
7235
|
+
struct ggml_tensor * a,
|
7236
|
+
enum ggml_unary_op op) {
|
7237
|
+
return ggml_unary_impl(ctx, a, op, false);
|
7238
|
+
}
|
7239
|
+
|
7240
|
+
struct ggml_tensor * ggml_unary_inplace(
|
7241
|
+
struct ggml_context * ctx,
|
7242
|
+
struct ggml_tensor * a,
|
7243
|
+
enum ggml_unary_op op) {
|
7244
|
+
return ggml_unary_impl(ctx, a, op, true);
|
7245
|
+
}
|
7246
|
+
|
7543
7247
|
// ggml_map_unary
|
7544
7248
|
|
7545
|
-
struct ggml_tensor * ggml_map_unary_impl_f32(
|
7249
|
+
static struct ggml_tensor * ggml_map_unary_impl_f32(
|
7546
7250
|
struct ggml_context * ctx,
|
7547
7251
|
struct ggml_tensor * a,
|
7548
7252
|
const ggml_unary_op_f32_t fun,
|
@@ -7553,19 +7257,13 @@ struct ggml_tensor * ggml_map_unary_impl_f32(
|
|
7553
7257
|
is_node = true;
|
7554
7258
|
}
|
7555
7259
|
|
7556
|
-
struct ggml_tensor *result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
7557
|
-
|
7558
|
-
ggml_scratch_save(ctx);
|
7559
|
-
|
7560
|
-
struct ggml_tensor * addr_tensor = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, sizeof(void *) / sizeof(int32_t));
|
7561
|
-
*((void (**)(void))addr_tensor->data) = (void (*)(void))fun;
|
7260
|
+
struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
7562
7261
|
|
7563
|
-
|
7262
|
+
ggml_set_op_params(result, (const void *) &fun, sizeof(fun));
|
7564
7263
|
|
7565
7264
|
result->op = GGML_OP_MAP_UNARY;
|
7566
7265
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
7567
7266
|
result->src[0] = a;
|
7568
|
-
result->src[2] = addr_tensor;
|
7569
7267
|
|
7570
7268
|
return result;
|
7571
7269
|
}
|
@@ -7586,7 +7284,7 @@ struct ggml_tensor * ggml_map_unary_inplace_f32(
|
|
7586
7284
|
|
7587
7285
|
// ggml_map_binary
|
7588
7286
|
|
7589
|
-
struct ggml_tensor * ggml_map_binary_impl_f32(
|
7287
|
+
static struct ggml_tensor * ggml_map_binary_impl_f32(
|
7590
7288
|
struct ggml_context * ctx,
|
7591
7289
|
struct ggml_tensor * a,
|
7592
7290
|
struct ggml_tensor * b,
|
@@ -7600,20 +7298,14 @@ struct ggml_tensor * ggml_map_binary_impl_f32(
|
|
7600
7298
|
is_node = true;
|
7601
7299
|
}
|
7602
7300
|
|
7603
|
-
struct ggml_tensor *result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
7604
|
-
|
7605
|
-
ggml_scratch_save(ctx);
|
7606
|
-
|
7607
|
-
struct ggml_tensor * addr_tensor = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, sizeof(void *) / sizeof(int32_t));
|
7608
|
-
*((void (**)(void))addr_tensor->data) = (void (*)(void))fun;
|
7301
|
+
struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
7609
7302
|
|
7610
|
-
|
7303
|
+
ggml_set_op_params(result, (const void *) &fun, sizeof(fun));
|
7611
7304
|
|
7612
7305
|
result->op = GGML_OP_MAP_BINARY;
|
7613
7306
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
7614
7307
|
result->src[0] = a;
|
7615
7308
|
result->src[1] = b;
|
7616
|
-
result->src[2] = addr_tensor;
|
7617
7309
|
|
7618
7310
|
return result;
|
7619
7311
|
}
|
@@ -7636,7 +7328,7 @@ struct ggml_tensor * ggml_map_binary_inplace_f32(
|
|
7636
7328
|
|
7637
7329
|
// ggml_map_custom1
|
7638
7330
|
|
7639
|
-
struct ggml_tensor * ggml_map_custom1_impl_f32(
|
7331
|
+
static struct ggml_tensor * ggml_map_custom1_impl_f32(
|
7640
7332
|
struct ggml_context * ctx,
|
7641
7333
|
struct ggml_tensor * a,
|
7642
7334
|
const ggml_custom1_op_f32_t fun,
|
@@ -7647,19 +7339,13 @@ struct ggml_tensor * ggml_map_custom1_impl_f32(
|
|
7647
7339
|
is_node = true;
|
7648
7340
|
}
|
7649
7341
|
|
7650
|
-
struct ggml_tensor *result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
7651
|
-
|
7652
|
-
ggml_scratch_save(ctx);
|
7653
|
-
|
7654
|
-
struct ggml_tensor * addr_tensor = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, sizeof(void *) / sizeof(int32_t));
|
7655
|
-
*((void (**)(void))addr_tensor->data) = (void (*)(void))fun;
|
7342
|
+
struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
7656
7343
|
|
7657
|
-
|
7344
|
+
ggml_set_op_params(result, (const void *) &fun, sizeof(fun));
|
7658
7345
|
|
7659
7346
|
result->op = GGML_OP_MAP_CUSTOM1;
|
7660
7347
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
7661
7348
|
result->src[0] = a;
|
7662
|
-
result->src[2] = addr_tensor;
|
7663
7349
|
|
7664
7350
|
return result;
|
7665
7351
|
}
|
@@ -7680,7 +7366,7 @@ struct ggml_tensor * ggml_map_custom1_inplace_f32(
|
|
7680
7366
|
|
7681
7367
|
// ggml_map_custom2
|
7682
7368
|
|
7683
|
-
struct ggml_tensor * ggml_map_custom2_impl_f32(
|
7369
|
+
static struct ggml_tensor * ggml_map_custom2_impl_f32(
|
7684
7370
|
struct ggml_context * ctx,
|
7685
7371
|
struct ggml_tensor * a,
|
7686
7372
|
struct ggml_tensor * b,
|
@@ -7692,20 +7378,14 @@ struct ggml_tensor * ggml_map_custom2_impl_f32(
|
|
7692
7378
|
is_node = true;
|
7693
7379
|
}
|
7694
7380
|
|
7695
|
-
struct ggml_tensor *result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
7696
|
-
|
7697
|
-
ggml_scratch_save(ctx);
|
7698
|
-
|
7699
|
-
struct ggml_tensor * addr_tensor = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, sizeof(void *) / sizeof(int32_t));
|
7700
|
-
*((void (**)(void))addr_tensor->data) = (void (*)(void))fun;
|
7381
|
+
struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
7701
7382
|
|
7702
|
-
|
7383
|
+
ggml_set_op_params(result, (const void *) &fun, sizeof(fun));
|
7703
7384
|
|
7704
7385
|
result->op = GGML_OP_MAP_CUSTOM2;
|
7705
7386
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
7706
7387
|
result->src[0] = a;
|
7707
7388
|
result->src[1] = b;
|
7708
|
-
result->src[2] = addr_tensor;
|
7709
7389
|
|
7710
7390
|
return result;
|
7711
7391
|
}
|
@@ -7728,7 +7408,7 @@ struct ggml_tensor * ggml_map_custom2_inplace_f32(
|
|
7728
7408
|
|
7729
7409
|
// ggml_map_custom3
|
7730
7410
|
|
7731
|
-
struct ggml_tensor * ggml_map_custom3_impl_f32(
|
7411
|
+
static struct ggml_tensor * ggml_map_custom3_impl_f32(
|
7732
7412
|
struct ggml_context * ctx,
|
7733
7413
|
struct ggml_tensor * a,
|
7734
7414
|
struct ggml_tensor * b,
|
@@ -7741,21 +7421,15 @@ struct ggml_tensor * ggml_map_custom3_impl_f32(
|
|
7741
7421
|
is_node = true;
|
7742
7422
|
}
|
7743
7423
|
|
7744
|
-
struct ggml_tensor *result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
7745
|
-
|
7746
|
-
ggml_scratch_save(ctx);
|
7747
|
-
|
7748
|
-
struct ggml_tensor * addr_tensor = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, sizeof(void *) / sizeof(int32_t));
|
7749
|
-
*((void (**)(void))addr_tensor->data) = (void (*)(void))fun;
|
7424
|
+
struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
7750
7425
|
|
7751
|
-
|
7426
|
+
ggml_set_op_params(result, (const void *) &fun, sizeof(fun));
|
7752
7427
|
|
7753
7428
|
result->op = GGML_OP_MAP_CUSTOM3;
|
7754
7429
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
7755
7430
|
result->src[0] = a;
|
7756
7431
|
result->src[1] = b;
|
7757
|
-
result->src[2] =
|
7758
|
-
result->src[3] = c;
|
7432
|
+
result->src[2] = c;
|
7759
7433
|
|
7760
7434
|
return result;
|
7761
7435
|
}
|
@@ -8983,21 +8657,17 @@ static void ggml_compute_forward_acc_f32(
|
|
8983
8657
|
const struct ggml_compute_params * params,
|
8984
8658
|
const struct ggml_tensor * src0,
|
8985
8659
|
const struct ggml_tensor * src1,
|
8986
|
-
const struct ggml_tensor * opt0,
|
8987
8660
|
struct ggml_tensor * dst) {
|
8988
8661
|
GGML_ASSERT(ggml_are_same_shape(src0, dst));
|
8989
8662
|
GGML_ASSERT(ggml_is_contiguous(dst) && ggml_is_contiguous(src0));
|
8990
8663
|
|
8991
|
-
GGML_ASSERT(opt0->type == GGML_TYPE_I32);
|
8992
|
-
GGML_ASSERT(ggml_nelements(opt0) == 5);
|
8993
|
-
|
8994
8664
|
// view src0 and dst with these strides and data offset inbytes during acc
|
8995
8665
|
// nb0 is implicitely element_size because src0 and dst are contiguous
|
8996
|
-
size_t nb1 = ((int32_t *)
|
8997
|
-
size_t nb2 = ((int32_t *)
|
8998
|
-
size_t nb3 = ((int32_t *)
|
8999
|
-
size_t offset = ((int32_t *)
|
9000
|
-
bool inplace = (bool) ((int32_t *)
|
8666
|
+
size_t nb1 = ((int32_t *) dst->op_params)[0];
|
8667
|
+
size_t nb2 = ((int32_t *) dst->op_params)[1];
|
8668
|
+
size_t nb3 = ((int32_t *) dst->op_params)[2];
|
8669
|
+
size_t offset = ((int32_t *) dst->op_params)[3];
|
8670
|
+
bool inplace = (bool) ((int32_t *) dst->op_params)[4];
|
9001
8671
|
|
9002
8672
|
if (!inplace && (params->type == GGML_TASK_INIT)) {
|
9003
8673
|
// memcpy needs to be synchronized across threads to avoid race conditions.
|
@@ -9066,13 +8736,12 @@ static void ggml_compute_forward_acc(
|
|
9066
8736
|
const struct ggml_compute_params * params,
|
9067
8737
|
const struct ggml_tensor * src0,
|
9068
8738
|
const struct ggml_tensor * src1,
|
9069
|
-
const struct ggml_tensor * opt0,
|
9070
8739
|
struct ggml_tensor * dst) {
|
9071
8740
|
|
9072
8741
|
switch (src0->type) {
|
9073
8742
|
case GGML_TYPE_F32:
|
9074
8743
|
{
|
9075
|
-
ggml_compute_forward_acc_f32(params, src0, src1,
|
8744
|
+
ggml_compute_forward_acc_f32(params, src0, src1, dst);
|
9076
8745
|
} break;
|
9077
8746
|
case GGML_TYPE_F16:
|
9078
8747
|
case GGML_TYPE_Q4_0:
|
@@ -9504,7 +9173,7 @@ static void ggml_compute_forward_sum_f32(
|
|
9504
9173
|
for (int64_t i03 = 0; i03 < ne03; i03++) {
|
9505
9174
|
for (int64_t i02 = 0; i02 < ne02; i02++) {
|
9506
9175
|
for (int64_t i01 = 0; i01 < ne01; i01++) {
|
9507
|
-
|
9176
|
+
ggml_vec_sum_f32_ggf(ne00,
|
9508
9177
|
&row_sum,
|
9509
9178
|
(float *) ((char *) src0->data + i01*nb01 + i02*nb02 + i03*nb03));
|
9510
9179
|
sum += row_sum;
|
@@ -9514,6 +9183,38 @@ static void ggml_compute_forward_sum_f32(
|
|
9514
9183
|
((float *) dst->data)[0] = sum;
|
9515
9184
|
}
|
9516
9185
|
|
9186
|
+
static void ggml_compute_forward_sum_f16(
|
9187
|
+
const struct ggml_compute_params * params,
|
9188
|
+
const struct ggml_tensor * src0,
|
9189
|
+
struct ggml_tensor * dst) {
|
9190
|
+
assert(params->ith == 0);
|
9191
|
+
assert(ggml_is_scalar(dst));
|
9192
|
+
|
9193
|
+
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
9194
|
+
return;
|
9195
|
+
}
|
9196
|
+
|
9197
|
+
assert(src0->nb[0] == sizeof(ggml_fp16_t));
|
9198
|
+
|
9199
|
+
GGML_TENSOR_LOCALS(int64_t, ne0, src0, ne);
|
9200
|
+
GGML_TENSOR_LOCALS(size_t, nb0, src0, nb);
|
9201
|
+
|
9202
|
+
float sum = 0;
|
9203
|
+
float row_sum = 0;
|
9204
|
+
|
9205
|
+
for (int64_t i03 = 0; i03 < ne03; i03++) {
|
9206
|
+
for (int64_t i02 = 0; i02 < ne02; i02++) {
|
9207
|
+
for (int64_t i01 = 0; i01 < ne01; i01++) {
|
9208
|
+
ggml_vec_sum_f16_ggf(ne00,
|
9209
|
+
&row_sum,
|
9210
|
+
(ggml_fp16_t *) ((char *) src0->data + i01 * nb01 + i02 * nb02 + i03 * nb03));
|
9211
|
+
sum += row_sum;
|
9212
|
+
}
|
9213
|
+
}
|
9214
|
+
}
|
9215
|
+
((ggml_fp16_t *) dst->data)[0] = GGML_FP32_TO_FP16(sum);
|
9216
|
+
}
|
9217
|
+
|
9517
9218
|
static void ggml_compute_forward_sum(
|
9518
9219
|
const struct ggml_compute_params * params,
|
9519
9220
|
const struct ggml_tensor * src0,
|
@@ -9523,6 +9224,10 @@ static void ggml_compute_forward_sum(
|
|
9523
9224
|
{
|
9524
9225
|
ggml_compute_forward_sum_f32(params, src0, dst);
|
9525
9226
|
} break;
|
9227
|
+
case GGML_TYPE_F16:
|
9228
|
+
{
|
9229
|
+
ggml_compute_forward_sum_f16(params, src0, dst);
|
9230
|
+
} break;
|
9526
9231
|
default:
|
9527
9232
|
{
|
9528
9233
|
GGML_ASSERT(false);
|
@@ -10118,8 +9823,8 @@ static void ggml_compute_forward_gelu_f32(
|
|
10118
9823
|
const struct ggml_compute_params * params,
|
10119
9824
|
const struct ggml_tensor * src0,
|
10120
9825
|
struct ggml_tensor * dst) {
|
10121
|
-
GGML_ASSERT(
|
10122
|
-
GGML_ASSERT(
|
9826
|
+
GGML_ASSERT(ggml_is_contiguous_except_dim_1(src0));
|
9827
|
+
GGML_ASSERT(ggml_is_contiguous_except_dim_1(dst));
|
10123
9828
|
GGML_ASSERT(ggml_are_same_shape(src0, dst));
|
10124
9829
|
|
10125
9830
|
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
@@ -10177,8 +9882,8 @@ static void ggml_compute_forward_gelu_quick_f32(
|
|
10177
9882
|
const struct ggml_compute_params * params,
|
10178
9883
|
const struct ggml_tensor * src0,
|
10179
9884
|
struct ggml_tensor * dst) {
|
10180
|
-
GGML_ASSERT(
|
10181
|
-
GGML_ASSERT(
|
9885
|
+
GGML_ASSERT(ggml_is_contiguous_except_dim_1(src0));
|
9886
|
+
GGML_ASSERT(ggml_is_contiguous_except_dim_1(dst));
|
10182
9887
|
GGML_ASSERT(ggml_are_same_shape(src0, dst));
|
10183
9888
|
|
10184
9889
|
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
@@ -10236,8 +9941,8 @@ static void ggml_compute_forward_silu_f32(
|
|
10236
9941
|
const struct ggml_compute_params * params,
|
10237
9942
|
const struct ggml_tensor * src0,
|
10238
9943
|
struct ggml_tensor * dst) {
|
10239
|
-
GGML_ASSERT(
|
10240
|
-
GGML_ASSERT(
|
9944
|
+
GGML_ASSERT(ggml_is_contiguous_except_dim_1(src0));
|
9945
|
+
GGML_ASSERT(ggml_is_contiguous_except_dim_1(dst));
|
10241
9946
|
GGML_ASSERT(ggml_are_same_shape(src0, dst));
|
10242
9947
|
|
10243
9948
|
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
@@ -10289,7 +9994,6 @@ static void ggml_compute_forward_silu(
|
|
10289
9994
|
}
|
10290
9995
|
}
|
10291
9996
|
|
10292
|
-
|
10293
9997
|
// ggml_compute_forward_silu_back
|
10294
9998
|
|
10295
9999
|
static void ggml_compute_forward_silu_back_f32(
|
@@ -10297,9 +10001,9 @@ static void ggml_compute_forward_silu_back_f32(
|
|
10297
10001
|
const struct ggml_tensor * src0,
|
10298
10002
|
const struct ggml_tensor * grad,
|
10299
10003
|
struct ggml_tensor * dst) {
|
10300
|
-
GGML_ASSERT(
|
10301
|
-
GGML_ASSERT(
|
10302
|
-
GGML_ASSERT(
|
10004
|
+
GGML_ASSERT(ggml_is_contiguous_except_dim_1(grad));
|
10005
|
+
GGML_ASSERT(ggml_is_contiguous_except_dim_1(src0));
|
10006
|
+
GGML_ASSERT(ggml_is_contiguous_except_dim_1(dst));
|
10303
10007
|
GGML_ASSERT(ggml_are_same_shape(src0, dst));
|
10304
10008
|
GGML_ASSERT(ggml_are_same_shape(src0, grad));
|
10305
10009
|
|
@@ -10439,7 +10143,8 @@ static void ggml_compute_forward_rms_norm_f32(
|
|
10439
10143
|
|
10440
10144
|
GGML_TENSOR_UNARY_OP_LOCALS;
|
10441
10145
|
|
10442
|
-
|
10146
|
+
float eps;
|
10147
|
+
memcpy(&eps, dst->op_params, sizeof(float));
|
10443
10148
|
|
10444
10149
|
// TODO: optimize
|
10445
10150
|
for (int64_t i03 = 0; i03 < ne03; i03++) {
|
@@ -11092,21 +10797,17 @@ static void ggml_compute_forward_set_f32(
|
|
11092
10797
|
const struct ggml_compute_params * params,
|
11093
10798
|
const struct ggml_tensor * src0,
|
11094
10799
|
const struct ggml_tensor * src1,
|
11095
|
-
const struct ggml_tensor * opt0,
|
11096
10800
|
struct ggml_tensor * dst) {
|
11097
10801
|
GGML_ASSERT(ggml_are_same_shape(src0, dst));
|
11098
10802
|
GGML_ASSERT(ggml_is_contiguous(dst) && ggml_is_contiguous(src0));
|
11099
10803
|
|
11100
|
-
GGML_ASSERT(opt0->type == GGML_TYPE_I32);
|
11101
|
-
GGML_ASSERT(ggml_nelements(opt0) == 5);
|
11102
|
-
|
11103
10804
|
// view src0 and dst with these strides and data offset inbytes during set
|
11104
10805
|
// nb0 is implicitely element_size because src0 and dst are contiguous
|
11105
|
-
size_t nb1 = ((int32_t *)
|
11106
|
-
size_t nb2 = ((int32_t *)
|
11107
|
-
size_t nb3 = ((int32_t *)
|
11108
|
-
size_t offset = ((int32_t *)
|
11109
|
-
bool inplace = (bool) ((int32_t *)
|
10806
|
+
size_t nb1 = ((int32_t *) dst->op_params)[0];
|
10807
|
+
size_t nb2 = ((int32_t *) dst->op_params)[1];
|
10808
|
+
size_t nb3 = ((int32_t *) dst->op_params)[2];
|
10809
|
+
size_t offset = ((int32_t *) dst->op_params)[3];
|
10810
|
+
bool inplace = (bool) ((int32_t *) dst->op_params)[4];
|
11110
10811
|
|
11111
10812
|
if (!inplace && (params->type == GGML_TASK_INIT)) {
|
11112
10813
|
// memcpy needs to be synchronized across threads to avoid race conditions.
|
@@ -11166,13 +10867,12 @@ static void ggml_compute_forward_set(
|
|
11166
10867
|
const struct ggml_compute_params * params,
|
11167
10868
|
const struct ggml_tensor * src0,
|
11168
10869
|
const struct ggml_tensor * src1,
|
11169
|
-
const struct ggml_tensor * opt0,
|
11170
10870
|
struct ggml_tensor * dst) {
|
11171
10871
|
|
11172
10872
|
switch (src0->type) {
|
11173
10873
|
case GGML_TYPE_F32:
|
11174
10874
|
{
|
11175
|
-
ggml_compute_forward_set_f32(params, src0, src1,
|
10875
|
+
ggml_compute_forward_set_f32(params, src0, src1, dst);
|
11176
10876
|
} break;
|
11177
10877
|
case GGML_TYPE_F16:
|
11178
10878
|
case GGML_TYPE_Q4_0:
|
@@ -11568,17 +11268,14 @@ static void ggml_compute_forward_diag(
|
|
11568
11268
|
static void ggml_compute_forward_diag_mask_f32(
|
11569
11269
|
const struct ggml_compute_params * params,
|
11570
11270
|
const struct ggml_tensor * src0,
|
11571
|
-
const struct ggml_tensor * src1,
|
11572
11271
|
struct ggml_tensor * dst,
|
11573
11272
|
const float value) {
|
11574
|
-
GGML_ASSERT(src1->type == GGML_TYPE_I32);
|
11575
|
-
GGML_ASSERT(ggml_nelements(src1) == 2);
|
11576
11273
|
|
11577
11274
|
const int ith = params->ith;
|
11578
11275
|
const int nth = params->nth;
|
11579
11276
|
|
11580
|
-
const int n_past = ((int32_t *)
|
11581
|
-
const bool inplace = (bool)((int32_t *)
|
11277
|
+
const int n_past = ((int32_t *) dst->op_params)[0];
|
11278
|
+
const bool inplace = (bool)((int32_t *) dst->op_params)[1];
|
11582
11279
|
|
11583
11280
|
GGML_ASSERT(n_past >= 0);
|
11584
11281
|
|
@@ -11621,12 +11318,11 @@ static void ggml_compute_forward_diag_mask_f32(
|
|
11621
11318
|
static void ggml_compute_forward_diag_mask_inf(
|
11622
11319
|
const struct ggml_compute_params * params,
|
11623
11320
|
const struct ggml_tensor * src0,
|
11624
|
-
const struct ggml_tensor * src1,
|
11625
11321
|
struct ggml_tensor * dst) {
|
11626
11322
|
switch (src0->type) {
|
11627
11323
|
case GGML_TYPE_F32:
|
11628
11324
|
{
|
11629
|
-
ggml_compute_forward_diag_mask_f32(params, src0,
|
11325
|
+
ggml_compute_forward_diag_mask_f32(params, src0, dst, -INFINITY);
|
11630
11326
|
} break;
|
11631
11327
|
default:
|
11632
11328
|
{
|
@@ -11638,12 +11334,11 @@ static void ggml_compute_forward_diag_mask_inf(
|
|
11638
11334
|
static void ggml_compute_forward_diag_mask_zero(
|
11639
11335
|
const struct ggml_compute_params * params,
|
11640
11336
|
const struct ggml_tensor * src0,
|
11641
|
-
const struct ggml_tensor * src1,
|
11642
11337
|
struct ggml_tensor * dst) {
|
11643
11338
|
switch (src0->type) {
|
11644
11339
|
case GGML_TYPE_F32:
|
11645
11340
|
{
|
11646
|
-
ggml_compute_forward_diag_mask_f32(params, src0,
|
11341
|
+
ggml_compute_forward_diag_mask_f32(params, src0, dst, 0);
|
11647
11342
|
} break;
|
11648
11343
|
default:
|
11649
11344
|
{
|
@@ -11841,20 +11536,17 @@ static void ggml_compute_forward_soft_max_back(
|
|
11841
11536
|
static void ggml_compute_forward_alibi_f32(
|
11842
11537
|
const struct ggml_compute_params * params,
|
11843
11538
|
const struct ggml_tensor * src0,
|
11844
|
-
const struct ggml_tensor * src1,
|
11845
11539
|
struct ggml_tensor * dst) {
|
11846
11540
|
assert(params->ith == 0);
|
11847
11541
|
|
11848
|
-
GGML_ASSERT(src1->type == GGML_TYPE_I32);
|
11849
|
-
GGML_ASSERT(ggml_nelements(src1) == 3);
|
11850
|
-
|
11851
11542
|
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
11852
11543
|
return;
|
11853
11544
|
}
|
11854
11545
|
|
11855
|
-
const int
|
11856
|
-
const int
|
11857
|
-
|
11546
|
+
const int n_past = ((int32_t *) dst->op_params)[0];
|
11547
|
+
const int n_head = ((int32_t *) dst->op_params)[1];
|
11548
|
+
float max_bias;
|
11549
|
+
memcpy(&max_bias, (int32_t *) dst->op_params + 2, sizeof(float));
|
11858
11550
|
|
11859
11551
|
assert(n_past >= 0);
|
11860
11552
|
|
@@ -11907,20 +11599,17 @@ static void ggml_compute_forward_alibi_f32(
|
|
11907
11599
|
static void ggml_compute_forward_alibi_f16(
|
11908
11600
|
const struct ggml_compute_params * params,
|
11909
11601
|
const struct ggml_tensor * src0,
|
11910
|
-
const struct ggml_tensor * src1,
|
11911
11602
|
struct ggml_tensor * dst) {
|
11912
11603
|
assert(params->ith == 0);
|
11913
11604
|
|
11914
|
-
GGML_ASSERT(src1->type == GGML_TYPE_I32);
|
11915
|
-
GGML_ASSERT(ggml_nelements(src1) == 3);
|
11916
|
-
|
11917
11605
|
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
11918
11606
|
return;
|
11919
11607
|
}
|
11920
11608
|
|
11921
|
-
const int
|
11922
|
-
const int
|
11923
|
-
|
11609
|
+
const int n_past = ((int32_t *) dst->op_params)[0];
|
11610
|
+
const int n_head = ((int32_t *) dst->op_params)[1];
|
11611
|
+
float max_bias;
|
11612
|
+
memcpy(&max_bias, (int32_t *) dst->op_params + 2, sizeof(float));
|
11924
11613
|
|
11925
11614
|
assert(n_past >= 0);
|
11926
11615
|
|
@@ -11973,16 +11662,15 @@ static void ggml_compute_forward_alibi_f16(
|
|
11973
11662
|
static void ggml_compute_forward_alibi(
|
11974
11663
|
const struct ggml_compute_params * params,
|
11975
11664
|
const struct ggml_tensor * src0,
|
11976
|
-
const struct ggml_tensor * src1,
|
11977
11665
|
struct ggml_tensor * dst) {
|
11978
11666
|
switch (src0->type) {
|
11979
11667
|
case GGML_TYPE_F16:
|
11980
11668
|
{
|
11981
|
-
ggml_compute_forward_alibi_f16(params, src0,
|
11669
|
+
ggml_compute_forward_alibi_f16(params, src0, dst);
|
11982
11670
|
} break;
|
11983
11671
|
case GGML_TYPE_F32:
|
11984
11672
|
{
|
11985
|
-
ggml_compute_forward_alibi_f32(params, src0,
|
11673
|
+
ggml_compute_forward_alibi_f32(params, src0, dst);
|
11986
11674
|
} break;
|
11987
11675
|
case GGML_TYPE_Q4_0:
|
11988
11676
|
case GGML_TYPE_Q4_1:
|
@@ -12012,19 +11700,17 @@ static void ggml_compute_forward_alibi(
|
|
12012
11700
|
static void ggml_compute_forward_clamp_f32(
|
12013
11701
|
const struct ggml_compute_params * params,
|
12014
11702
|
const struct ggml_tensor * src0,
|
12015
|
-
const struct ggml_tensor * src1,
|
12016
11703
|
struct ggml_tensor * dst) {
|
12017
11704
|
assert(params->ith == 0);
|
12018
11705
|
|
12019
|
-
GGML_ASSERT(src1->type == GGML_TYPE_F32);
|
12020
|
-
GGML_ASSERT(ggml_nelements(src1) == 2);
|
12021
|
-
|
12022
11706
|
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
12023
11707
|
return;
|
12024
11708
|
}
|
12025
11709
|
|
12026
|
-
|
12027
|
-
|
11710
|
+
float min;
|
11711
|
+
float max;
|
11712
|
+
memcpy(&min, (float *) dst->op_params + 0, sizeof(float));
|
11713
|
+
memcpy(&max, (float *) dst->op_params + 1, sizeof(float));
|
12028
11714
|
|
12029
11715
|
const int ith = params->ith;
|
12030
11716
|
const int nth = params->nth;
|
@@ -12054,12 +11740,11 @@ static void ggml_compute_forward_clamp_f32(
|
|
12054
11740
|
static void ggml_compute_forward_clamp(
|
12055
11741
|
const struct ggml_compute_params * params,
|
12056
11742
|
const struct ggml_tensor * src0,
|
12057
|
-
const struct ggml_tensor * src1,
|
12058
11743
|
struct ggml_tensor * dst) {
|
12059
11744
|
switch (src0->type) {
|
12060
11745
|
case GGML_TYPE_F32:
|
12061
11746
|
{
|
12062
|
-
ggml_compute_forward_clamp_f32(params, src0,
|
11747
|
+
ggml_compute_forward_clamp_f32(params, src0, dst);
|
12063
11748
|
} break;
|
12064
11749
|
case GGML_TYPE_F16:
|
12065
11750
|
case GGML_TYPE_Q4_0:
|
@@ -12089,10 +11774,7 @@ static void ggml_compute_forward_clamp(
|
|
12089
11774
|
static void ggml_compute_forward_rope_f32(
|
12090
11775
|
const struct ggml_compute_params * params,
|
12091
11776
|
const struct ggml_tensor * src0,
|
12092
|
-
const struct ggml_tensor * src1,
|
12093
11777
|
struct ggml_tensor * dst) {
|
12094
|
-
GGML_ASSERT(src1->type == GGML_TYPE_I32);
|
12095
|
-
GGML_ASSERT(ggml_nelements(src1) == 6);
|
12096
11778
|
|
12097
11779
|
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
12098
11780
|
return;
|
@@ -12101,12 +11783,12 @@ static void ggml_compute_forward_rope_f32(
|
|
12101
11783
|
float freq_base;
|
12102
11784
|
float freq_scale;
|
12103
11785
|
|
12104
|
-
const int n_past = ((int32_t *)
|
12105
|
-
const int n_dims = ((int32_t *)
|
12106
|
-
const int mode = ((int32_t *)
|
12107
|
-
const int n_ctx = ((int32_t *)
|
12108
|
-
memcpy(&freq_base, (int32_t *)
|
12109
|
-
memcpy(&freq_scale, (int32_t *)
|
11786
|
+
const int n_past = ((int32_t *) dst->op_params)[0];
|
11787
|
+
const int n_dims = ((int32_t *) dst->op_params)[1];
|
11788
|
+
const int mode = ((int32_t *) dst->op_params)[2];
|
11789
|
+
const int n_ctx = ((int32_t *) dst->op_params)[3];
|
11790
|
+
memcpy(&freq_base, (int32_t *) dst->op_params + 4, sizeof(float));
|
11791
|
+
memcpy(&freq_scale, (int32_t *) dst->op_params + 5, sizeof(float));
|
12110
11792
|
|
12111
11793
|
assert(n_past >= 0);
|
12112
11794
|
|
@@ -12221,10 +11903,7 @@ static void ggml_compute_forward_rope_f32(
|
|
12221
11903
|
static void ggml_compute_forward_rope_f16(
|
12222
11904
|
const struct ggml_compute_params * params,
|
12223
11905
|
const struct ggml_tensor * src0,
|
12224
|
-
const struct ggml_tensor * src1,
|
12225
11906
|
struct ggml_tensor * dst) {
|
12226
|
-
GGML_ASSERT(src1->type == GGML_TYPE_I32);
|
12227
|
-
GGML_ASSERT(ggml_nelements(src1) == 6);
|
12228
11907
|
|
12229
11908
|
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
12230
11909
|
return;
|
@@ -12233,12 +11912,12 @@ static void ggml_compute_forward_rope_f16(
|
|
12233
11912
|
float freq_base;
|
12234
11913
|
float freq_scale;
|
12235
11914
|
|
12236
|
-
const int n_past = ((int32_t *)
|
12237
|
-
const int n_dims = ((int32_t *)
|
12238
|
-
const int mode = ((int32_t *)
|
12239
|
-
const int n_ctx = ((int32_t *)
|
12240
|
-
memcpy(&freq_base, (int32_t *)
|
12241
|
-
memcpy(&freq_scale, (int32_t *)
|
11915
|
+
const int n_past = ((int32_t *) dst->op_params)[0];
|
11916
|
+
const int n_dims = ((int32_t *) dst->op_params)[1];
|
11917
|
+
const int mode = ((int32_t *) dst->op_params)[2];
|
11918
|
+
const int n_ctx = ((int32_t *) dst->op_params)[3];
|
11919
|
+
memcpy(&freq_base, (int32_t *) dst->op_params + 4, sizeof(float));
|
11920
|
+
memcpy(&freq_scale, (int32_t *) dst->op_params + 5, sizeof(float));
|
12242
11921
|
|
12243
11922
|
assert(n_past >= 0);
|
12244
11923
|
|
@@ -12353,16 +12032,15 @@ static void ggml_compute_forward_rope_f16(
|
|
12353
12032
|
static void ggml_compute_forward_rope(
|
12354
12033
|
const struct ggml_compute_params * params,
|
12355
12034
|
const struct ggml_tensor * src0,
|
12356
|
-
const struct ggml_tensor * src1,
|
12357
12035
|
struct ggml_tensor * dst) {
|
12358
12036
|
switch (src0->type) {
|
12359
12037
|
case GGML_TYPE_F16:
|
12360
12038
|
{
|
12361
|
-
ggml_compute_forward_rope_f16(params, src0,
|
12039
|
+
ggml_compute_forward_rope_f16(params, src0, dst);
|
12362
12040
|
} break;
|
12363
12041
|
case GGML_TYPE_F32:
|
12364
12042
|
{
|
12365
|
-
ggml_compute_forward_rope_f32(params, src0,
|
12043
|
+
ggml_compute_forward_rope_f32(params, src0, dst);
|
12366
12044
|
} break;
|
12367
12045
|
default:
|
12368
12046
|
{
|
@@ -12376,10 +12054,7 @@ static void ggml_compute_forward_rope(
|
|
12376
12054
|
static void ggml_compute_forward_rope_back_f32(
|
12377
12055
|
const struct ggml_compute_params * params,
|
12378
12056
|
const struct ggml_tensor * src0,
|
12379
|
-
const struct ggml_tensor * src1,
|
12380
12057
|
struct ggml_tensor * dst) {
|
12381
|
-
assert(src1->type == GGML_TYPE_I32);
|
12382
|
-
assert(ggml_nelements(src1) == 4);
|
12383
12058
|
|
12384
12059
|
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
12385
12060
|
return;
|
@@ -12389,9 +12064,9 @@ static void ggml_compute_forward_rope_back_f32(
|
|
12389
12064
|
// dx = rope_back(dy, src1)
|
12390
12065
|
// src0 is dy, src1 contains options
|
12391
12066
|
|
12392
|
-
const int n_past = ((int32_t *)
|
12393
|
-
const int n_dims = ((int32_t *)
|
12394
|
-
const int mode = ((int32_t *)
|
12067
|
+
const int n_past = ((int32_t *) dst->op_params)[0];
|
12068
|
+
const int n_dims = ((int32_t *) dst->op_params)[1];
|
12069
|
+
const int mode = ((int32_t *) dst->op_params)[2];
|
12395
12070
|
|
12396
12071
|
assert(n_past >= 0);
|
12397
12072
|
|
@@ -12475,10 +12150,7 @@ static void ggml_compute_forward_rope_back_f32(
|
|
12475
12150
|
static void ggml_compute_forward_rope_back_f16(
|
12476
12151
|
const struct ggml_compute_params * params,
|
12477
12152
|
const struct ggml_tensor * src0,
|
12478
|
-
const struct ggml_tensor * src1,
|
12479
12153
|
struct ggml_tensor * dst) {
|
12480
|
-
assert(src1->type == GGML_TYPE_I32);
|
12481
|
-
assert(ggml_nelements(src1) == 3);
|
12482
12154
|
|
12483
12155
|
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
12484
12156
|
return;
|
@@ -12488,9 +12160,9 @@ static void ggml_compute_forward_rope_back_f16(
|
|
12488
12160
|
// dx = rope_back(dy, src1)
|
12489
12161
|
// src0 is dy, src1 contains options
|
12490
12162
|
|
12491
|
-
const int n_past = ((int32_t *)
|
12492
|
-
const int n_dims = ((int32_t *)
|
12493
|
-
const int mode = ((int32_t *)
|
12163
|
+
const int n_past = ((int32_t *) dst->op_params)[0];
|
12164
|
+
const int n_dims = ((int32_t *) dst->op_params)[1];
|
12165
|
+
const int mode = ((int32_t *) dst->op_params)[2];
|
12494
12166
|
|
12495
12167
|
assert(n_past >= 0);
|
12496
12168
|
|
@@ -12574,16 +12246,15 @@ static void ggml_compute_forward_rope_back_f16(
|
|
12574
12246
|
static void ggml_compute_forward_rope_back(
|
12575
12247
|
const struct ggml_compute_params * params,
|
12576
12248
|
const struct ggml_tensor * src0,
|
12577
|
-
const struct ggml_tensor * src1,
|
12578
12249
|
struct ggml_tensor * dst) {
|
12579
12250
|
switch (src0->type) {
|
12580
12251
|
case GGML_TYPE_F16:
|
12581
12252
|
{
|
12582
|
-
ggml_compute_forward_rope_back_f16(params, src0,
|
12253
|
+
ggml_compute_forward_rope_back_f16(params, src0, dst);
|
12583
12254
|
} break;
|
12584
12255
|
case GGML_TYPE_F32:
|
12585
12256
|
{
|
12586
|
-
ggml_compute_forward_rope_back_f32(params, src0,
|
12257
|
+
ggml_compute_forward_rope_back_f32(params, src0, dst);
|
12587
12258
|
} break;
|
12588
12259
|
default:
|
12589
12260
|
{
|
@@ -12780,7 +12451,7 @@ static void ggml_compute_forward_conv_1d_s1_ph(
|
|
12780
12451
|
const struct ggml_compute_params * params,
|
12781
12452
|
const struct ggml_tensor * src0,
|
12782
12453
|
const struct ggml_tensor * src1,
|
12783
|
-
|
12454
|
+
struct ggml_tensor * dst) {
|
12784
12455
|
switch (src0->type) {
|
12785
12456
|
case GGML_TYPE_F16:
|
12786
12457
|
{
|
@@ -12983,7 +12654,7 @@ static void ggml_compute_forward_conv_1d_s2_ph(
|
|
12983
12654
|
const struct ggml_compute_params * params,
|
12984
12655
|
const struct ggml_tensor * src0,
|
12985
12656
|
const struct ggml_tensor * src1,
|
12986
|
-
|
12657
|
+
struct ggml_tensor * dst) {
|
12987
12658
|
switch (src0->type) {
|
12988
12659
|
case GGML_TYPE_F16:
|
12989
12660
|
{
|
@@ -13003,14 +12674,13 @@ static void ggml_compute_forward_conv_1d_s2_ph(
|
|
13003
12674
|
// ggml_compute_forward_conv_1d
|
13004
12675
|
|
13005
12676
|
static void ggml_compute_forward_conv_1d(
|
13006
|
-
|
13007
|
-
|
13008
|
-
|
13009
|
-
|
13010
|
-
|
13011
|
-
const int32_t
|
13012
|
-
const int32_t
|
13013
|
-
const int32_t d0 = ((const int32_t*)(opt0->data))[2];
|
12677
|
+
const struct ggml_compute_params * params,
|
12678
|
+
const struct ggml_tensor * src0,
|
12679
|
+
const struct ggml_tensor * src1,
|
12680
|
+
struct ggml_tensor * dst) {
|
12681
|
+
const int32_t s0 = ((const int32_t*)(dst->op_params))[0];
|
12682
|
+
const int32_t p0 = ((const int32_t*)(dst->op_params))[1];
|
12683
|
+
const int32_t d0 = ((const int32_t*)(dst->op_params))[2];
|
13014
12684
|
GGML_ASSERT(d0 == 1); // dilation not supported
|
13015
12685
|
GGML_ASSERT(p0 == src0->ne[0]/2); // only half padding supported
|
13016
12686
|
if (s0 == 1) {
|
@@ -13028,7 +12698,6 @@ static void ggml_compute_forward_conv_2d_f16_f32(
|
|
13028
12698
|
const struct ggml_compute_params * params,
|
13029
12699
|
const struct ggml_tensor * src0,
|
13030
12700
|
const struct ggml_tensor * src1,
|
13031
|
-
const struct ggml_tensor * opt0,
|
13032
12701
|
struct ggml_tensor * dst) {
|
13033
12702
|
GGML_ASSERT(src0->type == GGML_TYPE_F16);
|
13034
12703
|
GGML_ASSERT(src1->type == GGML_TYPE_F32);
|
@@ -13048,12 +12717,12 @@ static void ggml_compute_forward_conv_2d_f16_f32(
|
|
13048
12717
|
// size of the convolution row - the kernel size unrolled across all channels
|
13049
12718
|
const int ew0 = nk0*nk1*ne02;
|
13050
12719
|
|
13051
|
-
const int32_t s0 = ((const int32_t*)(
|
13052
|
-
const int32_t s1 = ((const int32_t*)(
|
13053
|
-
const int32_t p0 = ((const int32_t*)(
|
13054
|
-
const int32_t p1 = ((const int32_t*)(
|
13055
|
-
const int32_t d0 = ((const int32_t*)(
|
13056
|
-
const int32_t d1 = ((const int32_t*)(
|
12720
|
+
const int32_t s0 = ((const int32_t*)(dst->op_params))[0];
|
12721
|
+
const int32_t s1 = ((const int32_t*)(dst->op_params))[1];
|
12722
|
+
const int32_t p0 = ((const int32_t*)(dst->op_params))[2];
|
12723
|
+
const int32_t p1 = ((const int32_t*)(dst->op_params))[3];
|
12724
|
+
const int32_t d0 = ((const int32_t*)(dst->op_params))[4];
|
12725
|
+
const int32_t d1 = ((const int32_t*)(dst->op_params))[5];
|
13057
12726
|
|
13058
12727
|
GGML_ASSERT(nb00 == sizeof(ggml_fp16_t));
|
13059
12728
|
GGML_ASSERT(nb10 == sizeof(float));
|
@@ -13125,17 +12794,15 @@ static void ggml_compute_forward_conv_2d(
|
|
13125
12794
|
const struct ggml_compute_params * params,
|
13126
12795
|
const struct ggml_tensor * src0,
|
13127
12796
|
const struct ggml_tensor * src1,
|
13128
|
-
|
13129
|
-
struct ggml_tensor * dst
|
13130
|
-
) {
|
12797
|
+
struct ggml_tensor * dst) {
|
13131
12798
|
switch (src0->type) {
|
13132
12799
|
case GGML_TYPE_F16:
|
13133
12800
|
{
|
13134
|
-
ggml_compute_forward_conv_2d_f16_f32(params, src0, src1,
|
12801
|
+
ggml_compute_forward_conv_2d_f16_f32(params, src0, src1, dst);
|
13135
12802
|
} break;
|
13136
12803
|
case GGML_TYPE_F32:
|
13137
12804
|
{
|
13138
|
-
//ggml_compute_forward_conv_2d_f32(params, src0, src1,
|
12805
|
+
//ggml_compute_forward_conv_2d_f32(params, src0, src1, dst);
|
13139
12806
|
GGML_ASSERT(false);
|
13140
12807
|
} break;
|
13141
12808
|
default:
|
@@ -13200,12 +12867,11 @@ static void ggml_compute_forward_pool_1d_sk_p0(
|
|
13200
12867
|
// ggml_compute_forward_pool_1d
|
13201
12868
|
|
13202
12869
|
static void ggml_compute_forward_pool_1d(
|
13203
|
-
|
13204
|
-
|
13205
|
-
|
13206
|
-
|
13207
|
-
|
13208
|
-
const int* opts = (const int*)opt0->data;
|
12870
|
+
const struct ggml_compute_params * params,
|
12871
|
+
const struct ggml_tensor * src0,
|
12872
|
+
struct ggml_tensor * dst) {
|
12873
|
+
|
12874
|
+
const int32_t* opts = (const int32_t*)dst->op_params;
|
13209
12875
|
enum ggml_op_pool op = opts[0];
|
13210
12876
|
const int k0 = opts[1];
|
13211
12877
|
const int s0 = opts[2];
|
@@ -13219,12 +12885,12 @@ static void ggml_compute_forward_pool_1d(
|
|
13219
12885
|
// ggml_compute_forward_pool_2d_sk_p0
|
13220
12886
|
|
13221
12887
|
static void ggml_compute_forward_pool_2d_sk_p0(
|
13222
|
-
|
13223
|
-
|
13224
|
-
|
13225
|
-
|
13226
|
-
|
13227
|
-
|
12888
|
+
const struct ggml_compute_params * params,
|
12889
|
+
const enum ggml_op_pool op,
|
12890
|
+
const struct ggml_tensor * src,
|
12891
|
+
const int k0,
|
12892
|
+
const int k1,
|
12893
|
+
struct ggml_tensor * dst) {
|
13228
12894
|
assert(src->type == GGML_TYPE_F32);
|
13229
12895
|
assert(params->ith == 0);
|
13230
12896
|
|
@@ -13284,12 +12950,11 @@ static void ggml_compute_forward_pool_2d_sk_p0(
|
|
13284
12950
|
// ggml_compute_forward_pool_2d
|
13285
12951
|
|
13286
12952
|
static void ggml_compute_forward_pool_2d(
|
13287
|
-
|
13288
|
-
|
13289
|
-
|
13290
|
-
|
13291
|
-
|
13292
|
-
const int* opts = (const int*)opt0->data;
|
12953
|
+
const struct ggml_compute_params * params,
|
12954
|
+
const struct ggml_tensor * src0,
|
12955
|
+
struct ggml_tensor * dst) {
|
12956
|
+
|
12957
|
+
const int32_t * opts = (const int32_t *)dst->op_params;
|
13293
12958
|
enum ggml_op_pool op = opts[0];
|
13294
12959
|
const int k0 = opts[1];
|
13295
12960
|
const int k1 = opts[2];
|
@@ -13314,7 +12979,7 @@ static void ggml_compute_forward_flash_attn_f32(
|
|
13314
12979
|
const struct ggml_tensor * k,
|
13315
12980
|
const struct ggml_tensor * v,
|
13316
12981
|
const bool masked,
|
13317
|
-
|
12982
|
+
struct ggml_tensor * dst) {
|
13318
12983
|
int64_t t0 = ggml_perf_time_us();
|
13319
12984
|
UNUSED(t0);
|
13320
12985
|
|
@@ -13492,7 +13157,7 @@ static void ggml_compute_forward_flash_attn_f16(
|
|
13492
13157
|
const struct ggml_tensor * k,
|
13493
13158
|
const struct ggml_tensor * v,
|
13494
13159
|
const bool masked,
|
13495
|
-
|
13160
|
+
struct ggml_tensor * dst) {
|
13496
13161
|
int64_t t0 = ggml_perf_time_us();
|
13497
13162
|
UNUSED(t0);
|
13498
13163
|
|
@@ -14257,7 +13922,6 @@ static void ggml_compute_forward_flash_attn_back(
|
|
14257
13922
|
static void ggml_compute_forward_win_part_f32(
|
14258
13923
|
const struct ggml_compute_params * params,
|
14259
13924
|
const struct ggml_tensor * src0,
|
14260
|
-
const struct ggml_tensor * opt0,
|
14261
13925
|
struct ggml_tensor * dst) {
|
14262
13926
|
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
14263
13927
|
return;
|
@@ -14266,9 +13930,9 @@ static void ggml_compute_forward_win_part_f32(
|
|
14266
13930
|
GGML_TENSOR_LOCALS(int64_t, ne0, src0, ne);
|
14267
13931
|
GGML_TENSOR_LOCALS(int64_t, ne, dst, ne);
|
14268
13932
|
|
14269
|
-
const int32_t nep0 = ((const int32_t *)(
|
14270
|
-
const int32_t nep1 = ((const int32_t *)(
|
14271
|
-
const int32_t w = ((const int32_t *)(
|
13933
|
+
const int32_t nep0 = ((const int32_t *)(dst->op_params))[0];
|
13934
|
+
const int32_t nep1 = ((const int32_t *)(dst->op_params))[1];
|
13935
|
+
const int32_t w = ((const int32_t *)(dst->op_params))[2];
|
14272
13936
|
|
14273
13937
|
assert(ne00 == ne0);
|
14274
13938
|
assert(ne3 == nep0*nep1);
|
@@ -14302,12 +13966,11 @@ static void ggml_compute_forward_win_part_f32(
|
|
14302
13966
|
static void ggml_compute_forward_win_part(
|
14303
13967
|
const struct ggml_compute_params * params,
|
14304
13968
|
const struct ggml_tensor * src0,
|
14305
|
-
const struct ggml_tensor * opt0,
|
14306
13969
|
struct ggml_tensor * dst) {
|
14307
13970
|
switch (src0->type) {
|
14308
13971
|
case GGML_TYPE_F32:
|
14309
13972
|
{
|
14310
|
-
ggml_compute_forward_win_part_f32(params, src0,
|
13973
|
+
ggml_compute_forward_win_part_f32(params, src0, dst);
|
14311
13974
|
} break;
|
14312
13975
|
default:
|
14313
13976
|
{
|
@@ -14321,7 +13984,6 @@ static void ggml_compute_forward_win_part(
|
|
14321
13984
|
static void ggml_compute_forward_win_unpart_f32(
|
14322
13985
|
const struct ggml_compute_params * params,
|
14323
13986
|
const struct ggml_tensor * src0,
|
14324
|
-
const struct ggml_tensor * opt0,
|
14325
13987
|
struct ggml_tensor * dst) {
|
14326
13988
|
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
14327
13989
|
return;
|
@@ -14330,7 +13992,7 @@ static void ggml_compute_forward_win_unpart_f32(
|
|
14330
13992
|
GGML_TENSOR_LOCALS(int64_t, ne0, src0, ne);
|
14331
13993
|
GGML_TENSOR_LOCALS(int64_t, ne, dst, ne);
|
14332
13994
|
|
14333
|
-
const int32_t w = ((const int32_t *)(
|
13995
|
+
const int32_t w = ((const int32_t *)(dst->op_params))[0];
|
14334
13996
|
|
14335
13997
|
// padding
|
14336
13998
|
const int px = (w - ne1%w)%w;
|
@@ -14364,12 +14026,67 @@ static void ggml_compute_forward_win_unpart_f32(
|
|
14364
14026
|
static void ggml_compute_forward_win_unpart(
|
14365
14027
|
const struct ggml_compute_params * params,
|
14366
14028
|
const struct ggml_tensor * src0,
|
14367
|
-
const struct ggml_tensor * opt0,
|
14368
14029
|
struct ggml_tensor * dst) {
|
14369
14030
|
switch (src0->type) {
|
14370
14031
|
case GGML_TYPE_F32:
|
14371
14032
|
{
|
14372
|
-
ggml_compute_forward_win_unpart_f32(params, src0,
|
14033
|
+
ggml_compute_forward_win_unpart_f32(params, src0, dst);
|
14034
|
+
} break;
|
14035
|
+
default:
|
14036
|
+
{
|
14037
|
+
GGML_ASSERT(false);
|
14038
|
+
} break;
|
14039
|
+
}
|
14040
|
+
}
|
14041
|
+
|
14042
|
+
//gmml_compute_forward_unary
|
14043
|
+
|
14044
|
+
static void ggml_compute_forward_unary(
|
14045
|
+
const struct ggml_compute_params * params,
|
14046
|
+
const struct ggml_tensor * src0,
|
14047
|
+
struct ggml_tensor * dst) {
|
14048
|
+
const enum ggml_unary_op op = ggml_get_unary_op(dst);
|
14049
|
+
|
14050
|
+
switch (op) {
|
14051
|
+
case GGML_UNARY_OP_ABS:
|
14052
|
+
{
|
14053
|
+
ggml_compute_forward_abs(params, src0, dst);
|
14054
|
+
} break;
|
14055
|
+
case GGML_UNARY_OP_SGN:
|
14056
|
+
{
|
14057
|
+
ggml_compute_forward_sgn(params, src0, dst);
|
14058
|
+
} break;
|
14059
|
+
case GGML_UNARY_OP_NEG:
|
14060
|
+
{
|
14061
|
+
ggml_compute_forward_neg(params, src0, dst);
|
14062
|
+
} break;
|
14063
|
+
case GGML_UNARY_OP_STEP:
|
14064
|
+
{
|
14065
|
+
ggml_compute_forward_step(params, src0, dst);
|
14066
|
+
} break;
|
14067
|
+
case GGML_UNARY_OP_TANH:
|
14068
|
+
{
|
14069
|
+
ggml_compute_forward_tanh(params, src0, dst);
|
14070
|
+
} break;
|
14071
|
+
case GGML_UNARY_OP_ELU:
|
14072
|
+
{
|
14073
|
+
ggml_compute_forward_elu(params, src0, dst);
|
14074
|
+
} break;
|
14075
|
+
case GGML_UNARY_OP_RELU:
|
14076
|
+
{
|
14077
|
+
ggml_compute_forward_relu(params, src0, dst);
|
14078
|
+
} break;
|
14079
|
+
case GGML_UNARY_OP_GELU:
|
14080
|
+
{
|
14081
|
+
ggml_compute_forward_gelu(params, src0, dst);
|
14082
|
+
} break;
|
14083
|
+
case GGML_UNARY_OP_GELU_QUICK:
|
14084
|
+
{
|
14085
|
+
ggml_compute_forward_gelu_quick(params, src0, dst);
|
14086
|
+
} break;
|
14087
|
+
case GGML_UNARY_OP_SILU:
|
14088
|
+
{
|
14089
|
+
ggml_compute_forward_silu(params, src0, dst);
|
14373
14090
|
} break;
|
14374
14091
|
default:
|
14375
14092
|
{
|
@@ -14888,7 +14605,7 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm
|
|
14888
14605
|
} break;
|
14889
14606
|
case GGML_OP_ACC:
|
14890
14607
|
{
|
14891
|
-
ggml_compute_forward_acc(params, tensor->src[0], tensor->src[1], tensor
|
14608
|
+
ggml_compute_forward_acc(params, tensor->src[0], tensor->src[1], tensor);
|
14892
14609
|
} break;
|
14893
14610
|
case GGML_OP_SUB:
|
14894
14611
|
{
|
@@ -14938,46 +14655,6 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm
|
|
14938
14655
|
{
|
14939
14656
|
ggml_compute_forward_repeat_back(params, tensor->src[0], tensor);
|
14940
14657
|
} break;
|
14941
|
-
case GGML_OP_ABS:
|
14942
|
-
{
|
14943
|
-
ggml_compute_forward_abs(params, tensor->src[0], tensor);
|
14944
|
-
} break;
|
14945
|
-
case GGML_OP_SGN:
|
14946
|
-
{
|
14947
|
-
ggml_compute_forward_sgn(params, tensor->src[0], tensor);
|
14948
|
-
} break;
|
14949
|
-
case GGML_OP_NEG:
|
14950
|
-
{
|
14951
|
-
ggml_compute_forward_neg(params, tensor->src[0], tensor);
|
14952
|
-
} break;
|
14953
|
-
case GGML_OP_STEP:
|
14954
|
-
{
|
14955
|
-
ggml_compute_forward_step(params, tensor->src[0], tensor);
|
14956
|
-
} break;
|
14957
|
-
case GGML_OP_TANH:
|
14958
|
-
{
|
14959
|
-
ggml_compute_forward_tanh(params, tensor->src[0], tensor);
|
14960
|
-
} break;
|
14961
|
-
case GGML_OP_ELU:
|
14962
|
-
{
|
14963
|
-
ggml_compute_forward_elu(params, tensor->src[0], tensor);
|
14964
|
-
} break;
|
14965
|
-
case GGML_OP_RELU:
|
14966
|
-
{
|
14967
|
-
ggml_compute_forward_relu(params, tensor->src[0], tensor);
|
14968
|
-
} break;
|
14969
|
-
case GGML_OP_GELU:
|
14970
|
-
{
|
14971
|
-
ggml_compute_forward_gelu(params, tensor->src[0], tensor);
|
14972
|
-
} break;
|
14973
|
-
case GGML_OP_GELU_QUICK:
|
14974
|
-
{
|
14975
|
-
ggml_compute_forward_gelu_quick(params, tensor->src[0], tensor);
|
14976
|
-
} break;
|
14977
|
-
case GGML_OP_SILU:
|
14978
|
-
{
|
14979
|
-
ggml_compute_forward_silu(params, tensor->src[0], tensor);
|
14980
|
-
} break;
|
14981
14658
|
case GGML_OP_SILU_BACK:
|
14982
14659
|
{
|
14983
14660
|
ggml_compute_forward_silu_back(params, tensor->src[0], tensor->src[1], tensor);
|
@@ -15008,7 +14685,7 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm
|
|
15008
14685
|
} break;
|
15009
14686
|
case GGML_OP_SET:
|
15010
14687
|
{
|
15011
|
-
ggml_compute_forward_set(params, tensor->src[0], tensor->src[1], tensor
|
14688
|
+
ggml_compute_forward_set(params, tensor->src[0], tensor->src[1], tensor);
|
15012
14689
|
} break;
|
15013
14690
|
case GGML_OP_CPY:
|
15014
14691
|
{
|
@@ -15048,11 +14725,11 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm
|
|
15048
14725
|
} break;
|
15049
14726
|
case GGML_OP_DIAG_MASK_INF:
|
15050
14727
|
{
|
15051
|
-
ggml_compute_forward_diag_mask_inf(params, tensor->src[0], tensor
|
14728
|
+
ggml_compute_forward_diag_mask_inf(params, tensor->src[0], tensor);
|
15052
14729
|
} break;
|
15053
14730
|
case GGML_OP_DIAG_MASK_ZERO:
|
15054
14731
|
{
|
15055
|
-
ggml_compute_forward_diag_mask_zero(params, tensor->src[0], tensor
|
14732
|
+
ggml_compute_forward_diag_mask_zero(params, tensor->src[0], tensor);
|
15056
14733
|
} break;
|
15057
14734
|
case GGML_OP_SOFT_MAX:
|
15058
14735
|
{
|
@@ -15064,39 +14741,39 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm
|
|
15064
14741
|
} break;
|
15065
14742
|
case GGML_OP_ROPE:
|
15066
14743
|
{
|
15067
|
-
ggml_compute_forward_rope(params, tensor->src[0], tensor
|
14744
|
+
ggml_compute_forward_rope(params, tensor->src[0], tensor);
|
15068
14745
|
} break;
|
15069
14746
|
case GGML_OP_ROPE_BACK:
|
15070
14747
|
{
|
15071
|
-
ggml_compute_forward_rope_back(params, tensor->src[0], tensor
|
14748
|
+
ggml_compute_forward_rope_back(params, tensor->src[0], tensor);
|
15072
14749
|
} break;
|
15073
14750
|
case GGML_OP_ALIBI:
|
15074
14751
|
{
|
15075
|
-
ggml_compute_forward_alibi(params, tensor->src[0], tensor
|
14752
|
+
ggml_compute_forward_alibi(params, tensor->src[0], tensor);
|
15076
14753
|
} break;
|
15077
14754
|
case GGML_OP_CLAMP:
|
15078
14755
|
{
|
15079
|
-
ggml_compute_forward_clamp(params, tensor->src[0], tensor
|
14756
|
+
ggml_compute_forward_clamp(params, tensor->src[0], tensor);
|
15080
14757
|
} break;
|
15081
14758
|
case GGML_OP_CONV_1D:
|
15082
14759
|
{
|
15083
|
-
ggml_compute_forward_conv_1d(params, tensor->src[0], tensor->src[1], tensor
|
14760
|
+
ggml_compute_forward_conv_1d(params, tensor->src[0], tensor->src[1], tensor);
|
15084
14761
|
} break;
|
15085
14762
|
case GGML_OP_CONV_2D:
|
15086
14763
|
{
|
15087
|
-
ggml_compute_forward_conv_2d(params, tensor->src[0], tensor->src[1], tensor
|
14764
|
+
ggml_compute_forward_conv_2d(params, tensor->src[0], tensor->src[1], tensor);
|
15088
14765
|
} break;
|
15089
14766
|
case GGML_OP_POOL_1D:
|
15090
14767
|
{
|
15091
|
-
ggml_compute_forward_pool_1d(params, tensor->src[0], tensor
|
14768
|
+
ggml_compute_forward_pool_1d(params, tensor->src[0], tensor);
|
15092
14769
|
} break;
|
15093
14770
|
case GGML_OP_POOL_2D:
|
15094
14771
|
{
|
15095
|
-
ggml_compute_forward_pool_2d(params, tensor->src[0], tensor
|
14772
|
+
ggml_compute_forward_pool_2d(params, tensor->src[0], tensor);
|
15096
14773
|
} break;
|
15097
14774
|
case GGML_OP_FLASH_ATTN:
|
15098
14775
|
{
|
15099
|
-
const int32_t t =
|
14776
|
+
const int32_t t = ggml_get_op_params_i32(tensor, 0);
|
15100
14777
|
GGML_ASSERT(t == 0 || t == 1);
|
15101
14778
|
const bool masked = t != 0;
|
15102
14779
|
ggml_compute_forward_flash_attn(params, tensor->src[0], tensor->src[1], tensor->src[2], masked, tensor);
|
@@ -15107,47 +14784,56 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm
|
|
15107
14784
|
} break;
|
15108
14785
|
case GGML_OP_FLASH_ATTN_BACK:
|
15109
14786
|
{
|
15110
|
-
int32_t t =
|
14787
|
+
int32_t t = ggml_get_op_params_i32(tensor, 0);
|
15111
14788
|
GGML_ASSERT(t == 0 || t == 1);
|
15112
14789
|
bool masked = t != 0;
|
15113
14790
|
ggml_compute_forward_flash_attn_back(params, tensor->src[0], tensor->src[1], tensor->src[2], tensor->src[3], masked, tensor);
|
15114
14791
|
} break;
|
15115
14792
|
case GGML_OP_WIN_PART:
|
15116
14793
|
{
|
15117
|
-
ggml_compute_forward_win_part(params, tensor->src[0], tensor
|
14794
|
+
ggml_compute_forward_win_part(params, tensor->src[0], tensor);
|
15118
14795
|
} break;
|
15119
14796
|
case GGML_OP_WIN_UNPART:
|
15120
14797
|
{
|
15121
|
-
ggml_compute_forward_win_unpart(params, tensor->src[0], tensor
|
14798
|
+
ggml_compute_forward_win_unpart(params, tensor->src[0], tensor);
|
14799
|
+
} break;
|
14800
|
+
case GGML_OP_UNARY:
|
14801
|
+
{
|
14802
|
+
ggml_compute_forward_unary(params, tensor->src[0], tensor);
|
15122
14803
|
} break;
|
15123
14804
|
case GGML_OP_MAP_UNARY:
|
15124
14805
|
{
|
15125
|
-
|
14806
|
+
ggml_unary_op_f32_t fun;
|
14807
|
+
memcpy(&fun, tensor->op_params, sizeof(fun));
|
15126
14808
|
ggml_compute_forward_map_unary(params, tensor->src[0], tensor, fun);
|
15127
14809
|
}
|
15128
14810
|
break;
|
15129
14811
|
case GGML_OP_MAP_BINARY:
|
15130
14812
|
{
|
15131
|
-
|
14813
|
+
ggml_binary_op_f32_t fun;
|
14814
|
+
memcpy(&fun, tensor->op_params, sizeof(fun));
|
15132
14815
|
ggml_compute_forward_map_binary(params, tensor->src[0], tensor->src[1], tensor, fun);
|
15133
14816
|
}
|
15134
14817
|
break;
|
15135
14818
|
case GGML_OP_MAP_CUSTOM1:
|
15136
14819
|
{
|
15137
|
-
|
14820
|
+
ggml_custom1_op_f32_t fun;
|
14821
|
+
memcpy(&fun, tensor->op_params, sizeof(fun));
|
15138
14822
|
ggml_compute_forward_map_custom1(params, tensor->src[0], tensor, fun);
|
15139
14823
|
}
|
15140
14824
|
break;
|
15141
14825
|
case GGML_OP_MAP_CUSTOM2:
|
15142
14826
|
{
|
15143
|
-
|
14827
|
+
ggml_custom2_op_f32_t fun;
|
14828
|
+
memcpy(&fun, tensor->op_params, sizeof(fun));
|
15144
14829
|
ggml_compute_forward_map_custom2(params, tensor->src[0], tensor->src[1], tensor, fun);
|
15145
14830
|
}
|
15146
14831
|
break;
|
15147
14832
|
case GGML_OP_MAP_CUSTOM3:
|
15148
14833
|
{
|
15149
|
-
|
15150
|
-
|
14834
|
+
ggml_custom3_op_f32_t fun;
|
14835
|
+
memcpy(&fun, tensor->op_params, sizeof(fun));
|
14836
|
+
ggml_compute_forward_map_custom3(params, tensor->src[0], tensor->src[1], tensor->src[2], tensor, fun);
|
15151
14837
|
}
|
15152
14838
|
break;
|
15153
14839
|
case GGML_OP_CROSS_ENTROPY_LOSS:
|
@@ -15211,12 +14897,10 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor
|
|
15211
14897
|
src0->grad = ggml_add_impl(ctx, src0->grad, tensor->grad, inplace);
|
15212
14898
|
}
|
15213
14899
|
if (src1->grad) {
|
15214
|
-
|
15215
|
-
|
15216
|
-
const size_t
|
15217
|
-
const size_t
|
15218
|
-
const size_t nb3 = (( int32_t * ) tensor->src[2]->data)[2];
|
15219
|
-
const size_t offset = (( int32_t * ) tensor->src[2]->data)[3];
|
14900
|
+
const size_t nb1 = ((int32_t *) tensor->op_params)[0];
|
14901
|
+
const size_t nb2 = ((int32_t *) tensor->op_params)[1];
|
14902
|
+
const size_t nb3 = ((int32_t *) tensor->op_params)[2];
|
14903
|
+
const size_t offset = ((int32_t *) tensor->op_params)[3];
|
15220
14904
|
|
15221
14905
|
struct ggml_tensor * tensor_grad_view = ggml_view_4d(ctx,
|
15222
14906
|
tensor->grad,
|
@@ -15365,73 +15049,6 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor
|
|
15365
15049
|
inplace);
|
15366
15050
|
}
|
15367
15051
|
} break;
|
15368
|
-
case GGML_OP_ABS:
|
15369
|
-
{
|
15370
|
-
if (src0->grad) {
|
15371
|
-
src0->grad =
|
15372
|
-
ggml_add_impl(ctx,
|
15373
|
-
src0->grad,
|
15374
|
-
ggml_mul(ctx,
|
15375
|
-
ggml_sgn(ctx, src0),
|
15376
|
-
tensor->grad),
|
15377
|
-
inplace);
|
15378
|
-
}
|
15379
|
-
} break;
|
15380
|
-
case GGML_OP_SGN:
|
15381
|
-
{
|
15382
|
-
if (src0->grad) {
|
15383
|
-
// noop
|
15384
|
-
}
|
15385
|
-
} break;
|
15386
|
-
case GGML_OP_NEG:
|
15387
|
-
{
|
15388
|
-
if (src0->grad) {
|
15389
|
-
src0->grad = ggml_sub_impl(ctx, src0->grad, tensor->grad, inplace);
|
15390
|
-
}
|
15391
|
-
} break;
|
15392
|
-
case GGML_OP_STEP:
|
15393
|
-
{
|
15394
|
-
if (src0->grad) {
|
15395
|
-
// noop
|
15396
|
-
}
|
15397
|
-
} break;
|
15398
|
-
case GGML_OP_TANH:
|
15399
|
-
{
|
15400
|
-
GGML_ASSERT(false); // TODO: not implemented
|
15401
|
-
} break;
|
15402
|
-
case GGML_OP_ELU:
|
15403
|
-
{
|
15404
|
-
GGML_ASSERT(false); // TODO: not implemented
|
15405
|
-
} break;
|
15406
|
-
case GGML_OP_RELU:
|
15407
|
-
{
|
15408
|
-
if (src0->grad) {
|
15409
|
-
src0->grad = ggml_sub_impl(ctx,
|
15410
|
-
src0->grad,
|
15411
|
-
ggml_mul(ctx,
|
15412
|
-
ggml_step(ctx, src0),
|
15413
|
-
tensor->grad),
|
15414
|
-
inplace);
|
15415
|
-
}
|
15416
|
-
} break;
|
15417
|
-
case GGML_OP_GELU:
|
15418
|
-
{
|
15419
|
-
GGML_ASSERT(false); // TODO: not implemented
|
15420
|
-
} break;
|
15421
|
-
case GGML_OP_GELU_QUICK:
|
15422
|
-
{
|
15423
|
-
GGML_ASSERT(false); // TODO: not implemented
|
15424
|
-
} break;
|
15425
|
-
case GGML_OP_SILU:
|
15426
|
-
{
|
15427
|
-
// necessary for llama
|
15428
|
-
if (src0->grad) {
|
15429
|
-
src0->grad = ggml_add_impl(ctx,
|
15430
|
-
src0->grad,
|
15431
|
-
ggml_silu_back(ctx, src0, tensor->grad),
|
15432
|
-
inplace);
|
15433
|
-
}
|
15434
|
-
} break;
|
15435
15052
|
case GGML_OP_SILU_BACK:
|
15436
15053
|
{
|
15437
15054
|
GGML_ASSERT(false); // TODO: not implemented
|
@@ -15524,12 +15141,10 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor
|
|
15524
15141
|
} break;
|
15525
15142
|
case GGML_OP_SET:
|
15526
15143
|
{
|
15527
|
-
|
15528
|
-
|
15529
|
-
const size_t
|
15530
|
-
const size_t
|
15531
|
-
const size_t nb3 = (( int32_t * ) tensor->src[2]->data)[2];
|
15532
|
-
const size_t offset = (( int32_t * ) tensor->src[2]->data)[3];
|
15144
|
+
const size_t nb1 = ((int32_t *) tensor->op_params)[0];
|
15145
|
+
const size_t nb2 = ((int32_t *) tensor->op_params)[1];
|
15146
|
+
const size_t nb3 = ((int32_t *) tensor->op_params)[2];
|
15147
|
+
const size_t offset = ((int32_t *) tensor->op_params)[3];
|
15533
15148
|
|
15534
15149
|
struct ggml_tensor * tensor_grad_view = NULL;
|
15535
15150
|
|
@@ -15606,8 +15221,7 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor
|
|
15606
15221
|
if (src0->grad) {
|
15607
15222
|
size_t offset;
|
15608
15223
|
|
15609
|
-
|
15610
|
-
memcpy(&offset, tensor->src[2]->data, sizeof(offset));
|
15224
|
+
memcpy(&offset, tensor->op_params, sizeof(offset));
|
15611
15225
|
|
15612
15226
|
size_t nb1 = tensor->nb[1];
|
15613
15227
|
size_t nb2 = tensor->nb[2];
|
@@ -15634,7 +15248,7 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor
|
|
15634
15248
|
{
|
15635
15249
|
// necessary for llama
|
15636
15250
|
if (src0->grad) {
|
15637
|
-
int32_t * axes = (int32_t *) tensor->
|
15251
|
+
int32_t * axes = (int32_t *) tensor->op_params;
|
15638
15252
|
int axis0 = axes[0] & 0x3;
|
15639
15253
|
int axis1 = axes[1] & 0x3;
|
15640
15254
|
int axis2 = axes[2] & 0x3;
|
@@ -15690,33 +15304,23 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor
|
|
15690
15304
|
{
|
15691
15305
|
// necessary for llama
|
15692
15306
|
if (src0->grad) {
|
15693
|
-
|
15694
|
-
assert(ggml_nelements(src1) == 2);
|
15695
|
-
const int n_past = ((int32_t *) src1->data)[0];
|
15307
|
+
const int n_past = ((int32_t *) tensor->op_params)[0];
|
15696
15308
|
src0->grad =
|
15697
15309
|
ggml_add_impl(ctx, src0->grad,
|
15698
15310
|
ggml_diag_mask_zero_impl(ctx, tensor->grad, n_past, false),
|
15699
15311
|
inplace);
|
15700
15312
|
}
|
15701
|
-
if (src1->grad) {
|
15702
|
-
// noop
|
15703
|
-
}
|
15704
15313
|
} break;
|
15705
15314
|
case GGML_OP_DIAG_MASK_ZERO:
|
15706
15315
|
{
|
15707
15316
|
// necessary for llama
|
15708
15317
|
if (src0->grad) {
|
15709
|
-
|
15710
|
-
assert(ggml_nelements(src1) == 2);
|
15711
|
-
const int n_past = ((int32_t *) src1->data)[0];
|
15318
|
+
const int n_past = ((int32_t *) tensor->op_params)[0];
|
15712
15319
|
src0->grad =
|
15713
15320
|
ggml_add_impl(ctx, src0->grad,
|
15714
15321
|
ggml_diag_mask_zero_impl(ctx, tensor->grad, n_past, false),
|
15715
15322
|
inplace);
|
15716
15323
|
}
|
15717
|
-
if (src1->grad) {
|
15718
|
-
// noop
|
15719
|
-
}
|
15720
15324
|
} break;
|
15721
15325
|
case GGML_OP_SOFT_MAX:
|
15722
15326
|
{
|
@@ -15737,12 +15341,10 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor
|
|
15737
15341
|
{
|
15738
15342
|
// necessary for llama
|
15739
15343
|
if (src0->grad) {
|
15740
|
-
|
15741
|
-
|
15742
|
-
const int
|
15743
|
-
const int
|
15744
|
-
const int mode = ((int32_t *) src1->data)[2];
|
15745
|
-
const int n_ctx = ((int32_t *) src1->data)[3];
|
15344
|
+
const int n_past = ((int32_t *) tensor->op_params)[0];
|
15345
|
+
const int n_dims = ((int32_t *) tensor->op_params)[1];
|
15346
|
+
const int mode = ((int32_t *) tensor->op_params)[2];
|
15347
|
+
const int n_ctx = ((int32_t *) tensor->op_params)[3];
|
15746
15348
|
src0->grad = ggml_add_impl(ctx,
|
15747
15349
|
src0->grad,
|
15748
15350
|
ggml_rope_back(ctx,
|
@@ -15753,19 +15355,14 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor
|
|
15753
15355
|
n_ctx),
|
15754
15356
|
inplace);
|
15755
15357
|
}
|
15756
|
-
if (src1->grad) {
|
15757
|
-
// noop
|
15758
|
-
}
|
15759
15358
|
} break;
|
15760
15359
|
case GGML_OP_ROPE_BACK:
|
15761
15360
|
{
|
15762
15361
|
if (src0->grad) {
|
15763
|
-
|
15764
|
-
|
15765
|
-
const int
|
15766
|
-
const int
|
15767
|
-
const int mode = ((int32_t *) src1->data)[2];
|
15768
|
-
const int n_ctx = ((int32_t *) src1->data)[3];
|
15362
|
+
const int n_past = ((int32_t *) tensor->op_params)[0];
|
15363
|
+
const int n_dims = ((int32_t *) tensor->op_params)[1];
|
15364
|
+
const int mode = ((int32_t *) tensor->op_params)[2];
|
15365
|
+
const int n_ctx = ((int32_t *) tensor->op_params)[3];
|
15769
15366
|
src0->grad = ggml_add_impl(ctx,
|
15770
15367
|
src0->grad,
|
15771
15368
|
ggml_rope(ctx,
|
@@ -15776,9 +15373,6 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor
|
|
15776
15373
|
n_ctx),
|
15777
15374
|
inplace);
|
15778
15375
|
}
|
15779
|
-
if (src1->grad) {
|
15780
|
-
// noop
|
15781
|
-
}
|
15782
15376
|
} break;
|
15783
15377
|
case GGML_OP_ALIBI:
|
15784
15378
|
{
|
@@ -15808,7 +15402,7 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor
|
|
15808
15402
|
{
|
15809
15403
|
struct ggml_tensor * flash_grad = NULL;
|
15810
15404
|
if (src0->grad || src1->grad || tensor->src[2]->grad) {
|
15811
|
-
int32_t t =
|
15405
|
+
int32_t t = ggml_get_op_params_i32(tensor, 0);
|
15812
15406
|
GGML_ASSERT(t == 0 || t == 1);
|
15813
15407
|
bool masked = t != 0;
|
15814
15408
|
flash_grad =
|
@@ -15971,6 +15565,80 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor
|
|
15971
15565
|
} break;
|
15972
15566
|
case GGML_OP_WIN_PART:
|
15973
15567
|
case GGML_OP_WIN_UNPART:
|
15568
|
+
case GGML_OP_UNARY:
|
15569
|
+
{
|
15570
|
+
switch (ggml_get_unary_op(tensor)) {
|
15571
|
+
case GGML_UNARY_OP_ABS:
|
15572
|
+
{
|
15573
|
+
if (src0->grad) {
|
15574
|
+
src0->grad =
|
15575
|
+
ggml_add_impl(ctx,
|
15576
|
+
src0->grad,
|
15577
|
+
ggml_mul(ctx,
|
15578
|
+
ggml_sgn(ctx, src0),
|
15579
|
+
tensor->grad),
|
15580
|
+
inplace);
|
15581
|
+
}
|
15582
|
+
} break;
|
15583
|
+
case GGML_UNARY_OP_SGN:
|
15584
|
+
{
|
15585
|
+
if (src0->grad) {
|
15586
|
+
// noop
|
15587
|
+
}
|
15588
|
+
} break;
|
15589
|
+
case GGML_UNARY_OP_NEG:
|
15590
|
+
{
|
15591
|
+
if (src0->grad) {
|
15592
|
+
src0->grad = ggml_sub_impl(ctx, src0->grad, tensor->grad, inplace);
|
15593
|
+
}
|
15594
|
+
} break;
|
15595
|
+
case GGML_UNARY_OP_STEP:
|
15596
|
+
{
|
15597
|
+
if (src0->grad) {
|
15598
|
+
// noop
|
15599
|
+
}
|
15600
|
+
} break;
|
15601
|
+
case GGML_UNARY_OP_TANH:
|
15602
|
+
{
|
15603
|
+
GGML_ASSERT(false); // TODO: not implemented
|
15604
|
+
} break;
|
15605
|
+
case GGML_UNARY_OP_ELU:
|
15606
|
+
{
|
15607
|
+
GGML_ASSERT(false); // TODO: not implemented
|
15608
|
+
} break;
|
15609
|
+
case GGML_UNARY_OP_RELU:
|
15610
|
+
{
|
15611
|
+
if (src0->grad) {
|
15612
|
+
src0->grad = ggml_add_impl(ctx,
|
15613
|
+
src0->grad,
|
15614
|
+
ggml_mul(ctx,
|
15615
|
+
ggml_step(ctx, src0),
|
15616
|
+
tensor->grad),
|
15617
|
+
inplace);
|
15618
|
+
}
|
15619
|
+
} break;
|
15620
|
+
case GGML_UNARY_OP_GELU:
|
15621
|
+
{
|
15622
|
+
GGML_ASSERT(false); // TODO: not implemented
|
15623
|
+
} break;
|
15624
|
+
case GGML_UNARY_OP_GELU_QUICK:
|
15625
|
+
{
|
15626
|
+
GGML_ASSERT(false); // TODO: not implemented
|
15627
|
+
} break;
|
15628
|
+
case GGML_UNARY_OP_SILU:
|
15629
|
+
{
|
15630
|
+
// necessary for llama
|
15631
|
+
if (src0->grad) {
|
15632
|
+
src0->grad = ggml_add_impl(ctx,
|
15633
|
+
src0->grad,
|
15634
|
+
ggml_silu_back(ctx, src0, tensor->grad),
|
15635
|
+
inplace);
|
15636
|
+
}
|
15637
|
+
} break;
|
15638
|
+
default:
|
15639
|
+
GGML_ASSERT(false);
|
15640
|
+
}
|
15641
|
+
} break;
|
15974
15642
|
case GGML_OP_MAP_UNARY:
|
15975
15643
|
case GGML_OP_MAP_BINARY:
|
15976
15644
|
case GGML_OP_MAP_CUSTOM1:
|
@@ -16006,6 +15674,34 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor
|
|
16006
15674
|
}
|
16007
15675
|
}
|
16008
15676
|
|
15677
|
+
static_assert(GGML_GRAPH_HASHTABLE_SIZE > GGML_MAX_NODES * 2, "GGML_GRAPH_HT_SIZE is too small");
|
15678
|
+
|
15679
|
+
static size_t hash(void * p) {
|
15680
|
+
return (size_t)p % GGML_GRAPH_HASHTABLE_SIZE;
|
15681
|
+
}
|
15682
|
+
|
15683
|
+
static bool hash_insert(void * hash_table[], void * p) {
|
15684
|
+
size_t h = hash(p);
|
15685
|
+
|
15686
|
+
// linear probing
|
15687
|
+
size_t i = h;
|
15688
|
+
while (hash_table[i] != NULL && hash_table[i] != p) {
|
15689
|
+
i = (i + 1) % GGML_GRAPH_HASHTABLE_SIZE;
|
15690
|
+
if (i == h) {
|
15691
|
+
// hash table is full
|
15692
|
+
GGML_ASSERT(false);
|
15693
|
+
}
|
15694
|
+
}
|
15695
|
+
|
15696
|
+
if (hash_table[i] == p) {
|
15697
|
+
return true;
|
15698
|
+
}
|
15699
|
+
|
15700
|
+
// insert
|
15701
|
+
hash_table[i] = p;
|
15702
|
+
return false;
|
15703
|
+
}
|
15704
|
+
|
16009
15705
|
static void ggml_visit_parents(struct ggml_cgraph * cgraph, struct ggml_tensor * node) {
|
16010
15706
|
if (node->grad == NULL) {
|
16011
15707
|
// this usually happens when we generate intermediate nodes from constants in the backward pass
|
@@ -16016,16 +15712,8 @@ static void ggml_visit_parents(struct ggml_cgraph * cgraph, struct ggml_tensor *
|
|
16016
15712
|
}
|
16017
15713
|
|
16018
15714
|
// check if already visited
|
16019
|
-
|
16020
|
-
|
16021
|
-
return;
|
16022
|
-
}
|
16023
|
-
}
|
16024
|
-
|
16025
|
-
for (int i = 0; i < cgraph->n_leafs; i++) {
|
16026
|
-
if (cgraph->leafs[i] == node) {
|
16027
|
-
return;
|
16028
|
-
}
|
15715
|
+
if (hash_insert(cgraph->visited_hash_table, node)) {
|
15716
|
+
return;
|
16029
15717
|
}
|
16030
15718
|
|
16031
15719
|
for (int i = 0; i < GGML_MAX_SRC; ++i) {
|
@@ -16088,6 +15776,7 @@ struct ggml_cgraph ggml_build_forward(struct ggml_tensor * tensor) {
|
|
16088
15776
|
/*.nodes =*/ { NULL },
|
16089
15777
|
/*.grads =*/ { NULL },
|
16090
15778
|
/*.leafs =*/ { NULL },
|
15779
|
+
/*.hash_table =*/ { NULL },
|
16091
15780
|
/*.perf_runs =*/ 0,
|
16092
15781
|
/*.perf_cycles =*/ 0,
|
16093
15782
|
/*.perf_time_us =*/ 0,
|
@@ -16129,13 +15818,42 @@ struct ggml_cgraph ggml_build_backward(struct ggml_context * ctx, struct ggml_cg
|
|
16129
15818
|
|
16130
15819
|
if (node->is_param) {
|
16131
15820
|
GGML_PRINT_DEBUG("%s: found root node %p\n", __func__, (void *) node);
|
16132
|
-
|
15821
|
+
ggml_build_forward_expand(&result, node->grad);
|
16133
15822
|
}
|
16134
15823
|
}
|
16135
15824
|
|
16136
15825
|
return result;
|
16137
15826
|
}
|
16138
15827
|
|
15828
|
+
struct ggml_cgraph * ggml_new_graph(struct ggml_context * ctx) {
|
15829
|
+
struct ggml_object * obj = ggml_new_object(ctx, GGML_OBJECT_GRAPH, GGML_GRAPH_SIZE);
|
15830
|
+
struct ggml_cgraph * cgraph = (struct ggml_cgraph *) ((char *) ctx->mem_buffer + obj->offs);
|
15831
|
+
|
15832
|
+
*cgraph = (struct ggml_cgraph) {
|
15833
|
+
/*.n_nodes =*/ 0,
|
15834
|
+
/*.n_leafs =*/ 0,
|
15835
|
+
/*.nodes =*/ { NULL },
|
15836
|
+
/*.grads =*/ { NULL },
|
15837
|
+
/*.leafs =*/ { NULL },
|
15838
|
+
/*.hash_table =*/ { NULL },
|
15839
|
+
/*.perf_runs =*/ 0,
|
15840
|
+
/*.perf_cycles =*/ 0,
|
15841
|
+
/*.perf_time_us =*/ 0,
|
15842
|
+
};
|
15843
|
+
|
15844
|
+
return cgraph;
|
15845
|
+
}
|
15846
|
+
|
15847
|
+
struct ggml_cgraph * ggml_build_forward_ctx(struct ggml_context * ctx, struct ggml_tensor * tensor) {
|
15848
|
+
struct ggml_cgraph * cgraph = ggml_new_graph(ctx);
|
15849
|
+
ggml_build_forward_impl(cgraph, tensor, false);
|
15850
|
+
return cgraph;
|
15851
|
+
}
|
15852
|
+
|
15853
|
+
size_t ggml_graph_overhead(void) {
|
15854
|
+
return GGML_OBJECT_SIZE + GGML_PAD(GGML_GRAPH_SIZE, GGML_MEM_ALIGN);
|
15855
|
+
}
|
15856
|
+
|
16139
15857
|
//
|
16140
15858
|
// thread data
|
16141
15859
|
//
|
@@ -16201,7 +15919,7 @@ typedef pthread_t ggml_thread_t;
|
|
16201
15919
|
|
16202
15920
|
// Android's libc implementation "bionic" does not support setting affinity
|
16203
15921
|
#if defined(__linux__) && !defined(__BIONIC__)
|
16204
|
-
void set_numa_thread_affinity(int thread_n, int n_threads) {
|
15922
|
+
static void set_numa_thread_affinity(int thread_n, int n_threads) {
|
16205
15923
|
if (!ggml_is_numa()) {
|
16206
15924
|
return;
|
16207
15925
|
}
|
@@ -16226,7 +15944,7 @@ void set_numa_thread_affinity(int thread_n, int n_threads) {
|
|
16226
15944
|
CPU_FREE(cpus);
|
16227
15945
|
}
|
16228
15946
|
|
16229
|
-
void clear_numa_thread_affinity(void) {
|
15947
|
+
static void clear_numa_thread_affinity(void) {
|
16230
15948
|
if (!ggml_is_numa()) {
|
16231
15949
|
return;
|
16232
15950
|
}
|
@@ -16250,8 +15968,8 @@ void clear_numa_thread_affinity(void) {
|
|
16250
15968
|
#else
|
16251
15969
|
// TODO: Windows etc.
|
16252
15970
|
// (the linux implementation may also work on BSD, someone should test)
|
16253
|
-
void set_numa_thread_affinity(int thread_n, int n_threads) { UNUSED(thread_n); UNUSED(n_threads); }
|
16254
|
-
void clear_numa_thread_affinity(void) {}
|
15971
|
+
static void set_numa_thread_affinity(int thread_n, int n_threads) { UNUSED(thread_n); UNUSED(n_threads); }
|
15972
|
+
static void clear_numa_thread_affinity(void) {}
|
16255
15973
|
#endif
|
16256
15974
|
|
16257
15975
|
struct ggml_compute_state_shared {
|
@@ -16463,21 +16181,34 @@ struct ggml_cplan ggml_graph_plan(struct ggml_cgraph * cgraph, int n_threads) {
|
|
16463
16181
|
case GGML_OP_ARGMAX:
|
16464
16182
|
case GGML_OP_REPEAT:
|
16465
16183
|
case GGML_OP_REPEAT_BACK:
|
16466
|
-
|
16467
|
-
case GGML_OP_SGN:
|
16468
|
-
case GGML_OP_NEG:
|
16469
|
-
case GGML_OP_STEP:
|
16470
|
-
case GGML_OP_TANH:
|
16471
|
-
case GGML_OP_ELU:
|
16472
|
-
case GGML_OP_RELU:
|
16473
|
-
{
|
16184
|
+
{
|
16474
16185
|
n_tasks = 1;
|
16475
16186
|
} break;
|
16476
|
-
|
16477
|
-
case
|
16478
|
-
|
16479
|
-
|
16187
|
+
|
16188
|
+
case GGML_OP_UNARY:
|
16189
|
+
{
|
16190
|
+
switch (ggml_get_unary_op(node)) {
|
16191
|
+
case GGML_UNARY_OP_ABS:
|
16192
|
+
case GGML_UNARY_OP_SGN:
|
16193
|
+
case GGML_UNARY_OP_NEG:
|
16194
|
+
case GGML_UNARY_OP_STEP:
|
16195
|
+
case GGML_UNARY_OP_TANH:
|
16196
|
+
case GGML_UNARY_OP_ELU:
|
16197
|
+
case GGML_UNARY_OP_RELU:
|
16198
|
+
{
|
16199
|
+
n_tasks = 1;
|
16200
|
+
} break;
|
16201
|
+
|
16202
|
+
case GGML_UNARY_OP_GELU:
|
16203
|
+
case GGML_UNARY_OP_GELU_QUICK:
|
16204
|
+
case GGML_UNARY_OP_SILU:
|
16205
|
+
{
|
16206
|
+
n_tasks = n_threads;
|
16207
|
+
} break;
|
16208
|
+
}
|
16209
|
+
} break;
|
16480
16210
|
case GGML_OP_SILU_BACK:
|
16211
|
+
case GGML_OP_MUL:
|
16481
16212
|
case GGML_OP_NORM:
|
16482
16213
|
case GGML_OP_RMS_NORM:
|
16483
16214
|
case GGML_OP_RMS_NORM_BACK:
|
@@ -16542,10 +16273,10 @@ struct ggml_cplan ggml_graph_plan(struct ggml_cgraph * cgraph, int n_threads) {
|
|
16542
16273
|
case GGML_OP_GET_ROWS:
|
16543
16274
|
case GGML_OP_GET_ROWS_BACK:
|
16544
16275
|
case GGML_OP_DIAG:
|
16545
|
-
case GGML_OP_DIAG_MASK_ZERO:
|
16546
16276
|
{
|
16547
16277
|
n_tasks = 1;
|
16548
16278
|
} break;
|
16279
|
+
case GGML_OP_DIAG_MASK_ZERO:
|
16549
16280
|
case GGML_OP_DIAG_MASK_INF:
|
16550
16281
|
case GGML_OP_SOFT_MAX:
|
16551
16282
|
case GGML_OP_SOFT_MAX_BACK:
|
@@ -16838,10 +16569,9 @@ void ggml_graph_reset(struct ggml_cgraph * cgraph) {
|
|
16838
16569
|
void ggml_graph_compute_with_ctx(struct ggml_context * ctx, struct ggml_cgraph * cgraph, int n_threads) {
|
16839
16570
|
struct ggml_cplan cplan = ggml_graph_plan(cgraph, n_threads);
|
16840
16571
|
|
16841
|
-
struct
|
16842
|
-
GGML_ASSERT(buf);
|
16572
|
+
struct ggml_object * obj = ggml_new_object(ctx, GGML_OBJECT_WORK_BUFFER, cplan.work_size);
|
16843
16573
|
|
16844
|
-
cplan.work_data =
|
16574
|
+
cplan.work_data = (uint8_t *)ctx->mem_buffer + obj->offs;
|
16845
16575
|
|
16846
16576
|
ggml_graph_compute(cgraph, &cplan);
|
16847
16577
|
}
|
@@ -16992,7 +16722,8 @@ void ggml_graph_export(const struct ggml_cgraph * cgraph, const char * fname) {
|
|
16992
16722
|
fwrite(&nb, sizeof(uint64_t), 1, fout);
|
16993
16723
|
}
|
16994
16724
|
|
16995
|
-
fwrite(tensor->name,
|
16725
|
+
fwrite(tensor->name, sizeof(char), GGML_MAX_NAME, fout);
|
16726
|
+
fwrite(tensor->op_params, sizeof(char), GGML_MAX_OP_PARAMS, fout);
|
16996
16727
|
|
16997
16728
|
// dump the data
|
16998
16729
|
// TODO: pad this to 32 byte boundary
|
@@ -17025,7 +16756,8 @@ void ggml_graph_export(const struct ggml_cgraph * cgraph, const char * fname) {
|
|
17025
16756
|
fwrite(&nb, sizeof(uint64_t), 1, fout);
|
17026
16757
|
}
|
17027
16758
|
|
17028
|
-
fwrite(tensor->name,
|
16759
|
+
fwrite(tensor->name, sizeof(char), GGML_MAX_NAME, fout);
|
16760
|
+
fwrite(tensor->op_params, sizeof(char), GGML_MAX_OP_PARAMS, fout);
|
17029
16761
|
|
17030
16762
|
// output the op arguments
|
17031
16763
|
{
|
@@ -17206,7 +16938,8 @@ struct ggml_cgraph ggml_graph_import(const char * fname, struct ggml_context **
|
|
17206
16938
|
|
17207
16939
|
tensor->op = (enum ggml_op) op;
|
17208
16940
|
|
17209
|
-
memcpy(tensor->name,
|
16941
|
+
memcpy(tensor->name, ptr, GGML_MAX_NAME); ptr += GGML_MAX_NAME;
|
16942
|
+
memcpy(tensor->op_params, ptr, GGML_MAX_OP_PARAMS); ptr += GGML_MAX_OP_PARAMS;
|
17210
16943
|
|
17211
16944
|
tensor->data = (void *) ptr;
|
17212
16945
|
|
@@ -17251,7 +16984,8 @@ struct ggml_cgraph ggml_graph_import(const char * fname, struct ggml_context **
|
|
17251
16984
|
nb[j] = nb_cur;
|
17252
16985
|
}
|
17253
16986
|
|
17254
|
-
const char * ptr_name
|
16987
|
+
const char * ptr_name = ptr; ptr += GGML_MAX_NAME;
|
16988
|
+
const char * ptr_op_params = ptr; ptr += GGML_MAX_OP_PARAMS;
|
17255
16989
|
|
17256
16990
|
const int32_t * ptr_arg_idx = (const int32_t *) ptr; ptr += GGML_MAX_SRC*sizeof(int32_t);
|
17257
16991
|
|
@@ -17288,8 +17022,8 @@ struct ggml_cgraph ggml_graph_import(const char * fname, struct ggml_context **
|
|
17288
17022
|
{
|
17289
17023
|
tensor = ggml_view_4d(*ctx_eval, args[0], ne[0], ne[1], ne[2], ne[3], 0, 0, 0, 0);
|
17290
17024
|
|
17291
|
-
|
17292
|
-
memcpy(&offs,
|
17025
|
+
size_t offs;
|
17026
|
+
memcpy(&offs, ptr_op_params, sizeof(offs));
|
17293
17027
|
|
17294
17028
|
tensor->data = ((char *) tensor->data) + offs;
|
17295
17029
|
} break;
|
@@ -17309,7 +17043,8 @@ struct ggml_cgraph ggml_graph_import(const char * fname, struct ggml_context **
|
|
17309
17043
|
} break;
|
17310
17044
|
}
|
17311
17045
|
|
17312
|
-
memcpy(tensor->name,
|
17046
|
+
memcpy(tensor->name, ptr_name, GGML_MAX_NAME);
|
17047
|
+
memcpy(tensor->op_params, ptr_op_params, GGML_MAX_OP_PARAMS);
|
17313
17048
|
|
17314
17049
|
for (int j = 0; j < GGML_MAX_DIMS; ++j) {
|
17315
17050
|
tensor->nb[j] = nb[j];
|
@@ -17343,7 +17078,7 @@ void ggml_graph_print(const struct ggml_cgraph * cgraph) {
|
|
17343
17078
|
GGML_PRINT(" - %3d: [ %5" PRId64 ", %5" PRId64 ", %5" PRId64 "] %16s %s (%3d) cpu = %7.3f / %7.3f ms, wall = %7.3f / %7.3f ms\n",
|
17344
17079
|
i,
|
17345
17080
|
node->ne[0], node->ne[1], node->ne[2],
|
17346
|
-
|
17081
|
+
ggml_op_name(node->op), node->is_param ? "x" : node->grad ? "g" : " ", node->perf_runs,
|
17347
17082
|
(double) node->perf_cycles / (double) ggml_cycles_per_ms(),
|
17348
17083
|
(double) node->perf_cycles / (double) ggml_cycles_per_ms() / (double) node->perf_runs,
|
17349
17084
|
(double) node->perf_time_us / 1000.0,
|
@@ -17357,7 +17092,7 @@ void ggml_graph_print(const struct ggml_cgraph * cgraph) {
|
|
17357
17092
|
GGML_PRINT(" - %3d: [ %5" PRId64 ", %5" PRId64 "] %8s\n",
|
17358
17093
|
i,
|
17359
17094
|
node->ne[0], node->ne[1],
|
17360
|
-
|
17095
|
+
ggml_op_name(node->op));
|
17361
17096
|
}
|
17362
17097
|
|
17363
17098
|
for (int i = 0; i < GGML_OP_COUNT; i++) {
|
@@ -17365,7 +17100,7 @@ void ggml_graph_print(const struct ggml_cgraph * cgraph) {
|
|
17365
17100
|
continue;
|
17366
17101
|
}
|
17367
17102
|
|
17368
|
-
GGML_PRINT("perf_total_per_op_us[%16s] = %7.3f ms\n",
|
17103
|
+
GGML_PRINT("perf_total_per_op_us[%16s] = %7.3f ms\n", ggml_op_name(i), (double) perf_total_per_op_us[i] / 1000.0);
|
17369
17104
|
}
|
17370
17105
|
|
17371
17106
|
GGML_PRINT("========================================\n");
|
@@ -17459,13 +17194,13 @@ void ggml_graph_dump_dot(const struct ggml_cgraph * gb, const struct ggml_cgraph
|
|
17459
17194
|
}
|
17460
17195
|
|
17461
17196
|
if (node->n_dims == 2) {
|
17462
|
-
fprintf(fp, "%d [%" PRId64 ", %" PRId64 "] | <x>%s", i, node->ne[0], node->ne[1],
|
17197
|
+
fprintf(fp, "%d [%" PRId64 ", %" PRId64 "] | <x>%s", i, node->ne[0], node->ne[1], ggml_op_symbol(node->op));
|
17463
17198
|
} else {
|
17464
|
-
fprintf(fp, "%d [%" PRId64 ", %" PRId64 ", %" PRId64 "] | <x>%s", i, node->ne[0], node->ne[1], node->ne[2],
|
17199
|
+
fprintf(fp, "%d [%" PRId64 ", %" PRId64 ", %" PRId64 "] | <x>%s", i, node->ne[0], node->ne[1], node->ne[2], ggml_op_symbol(node->op));
|
17465
17200
|
}
|
17466
17201
|
|
17467
17202
|
if (node->grad) {
|
17468
|
-
fprintf(fp, " | <g>%s\"; ]\n",
|
17203
|
+
fprintf(fp, " | <g>%s\"; ]\n", ggml_op_symbol(node->grad->op));
|
17469
17204
|
} else {
|
17470
17205
|
fprintf(fp, "\"; ]\n");
|
17471
17206
|
}
|