llama_cpp 0.3.4 → 0.3.6
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +12 -0
- data/README.md +18 -2
- data/ext/llama_cpp/extconf.rb +2 -1
- data/ext/llama_cpp/llama_cpp.cpp +315 -8
- data/ext/llama_cpp/src/ggml-alloc.c +541 -0
- data/ext/llama_cpp/src/ggml-alloc.h +22 -0
- data/ext/llama_cpp/src/ggml-cuda.cu +2271 -414
- data/ext/llama_cpp/src/ggml-cuda.h +1 -0
- data/ext/llama_cpp/src/ggml-metal.h +7 -0
- data/ext/llama_cpp/src/ggml-metal.m +218 -87
- data/ext/llama_cpp/src/ggml-metal.metal +72 -55
- data/ext/llama_cpp/src/ggml.c +754 -996
- data/ext/llama_cpp/src/ggml.h +94 -18
- data/ext/llama_cpp/src/k_quants.c +350 -24
- data/ext/llama_cpp/src/llama.cpp +713 -179
- data/ext/llama_cpp/src/llama.h +61 -5
- data/lib/llama_cpp/version.rb +2 -2
- data/sig/llama_cpp.rbs +26 -0
- metadata +4 -2
data/ext/llama_cpp/src/ggml.c
CHANGED
@@ -3440,7 +3440,9 @@ inline static void ggml_vec_mad_f32(const int n, float * restrict y, const float
|
|
3440
3440
|
|
3441
3441
|
//inline static void ggml_vec_scale_f32(const int n, float * y, const float v) { for (int i = 0; i < n; ++i) y[i] *= v; }
|
3442
3442
|
inline static void ggml_vec_scale_f32(const int n, float * y, const float v) {
|
3443
|
-
#if defined(
|
3443
|
+
#if defined(GGML_USE_ACCELERATE)
|
3444
|
+
vDSP_vsmul(y, 1, &v, y, 1, n);
|
3445
|
+
#elif defined(GGML_SIMD)
|
3444
3446
|
const int np = (n & ~(GGML_F32_STEP - 1));
|
3445
3447
|
|
3446
3448
|
GGML_F32_VEC vx = GGML_F32_VEC_SET1(v);
|
@@ -3603,7 +3605,7 @@ inline static void ggml_vec_sum_f32(const int n, float * s, const float * x) {
|
|
3603
3605
|
#endif
|
3604
3606
|
}
|
3605
3607
|
|
3606
|
-
inline static void
|
3608
|
+
inline static void ggml_vec_sum_f32_ggf(const int n, ggml_float * s, const float * x) {
|
3607
3609
|
ggml_float sum = 0.0;
|
3608
3610
|
for (int i = 0; i < n; ++i) {
|
3609
3611
|
sum += (ggml_float)x[i];
|
@@ -3611,6 +3613,14 @@ inline static void ggml_vec_sum_ggf(const int n, ggml_float * s, const float * x
|
|
3611
3613
|
*s = sum;
|
3612
3614
|
}
|
3613
3615
|
|
3616
|
+
inline static void ggml_vec_sum_f16_ggf(const int n, float * s, const ggml_fp16_t * x) {
|
3617
|
+
float sum = 0.0f;
|
3618
|
+
for (int i = 0; i < n; ++i) {
|
3619
|
+
sum += GGML_FP16_TO_FP32(x[i]);
|
3620
|
+
}
|
3621
|
+
*s = sum;
|
3622
|
+
}
|
3623
|
+
|
3614
3624
|
inline static void ggml_vec_max_f32(const int n, float * s, const float * x) {
|
3615
3625
|
#ifndef GGML_USE_ACCELERATE
|
3616
3626
|
float max = -INFINITY;
|
@@ -3750,16 +3760,6 @@ static const char * GGML_OP_NAME[GGML_OP_COUNT] = {
|
|
3750
3760
|
"ARGMAX",
|
3751
3761
|
"REPEAT",
|
3752
3762
|
"REPEAT_BACK",
|
3753
|
-
"ABS",
|
3754
|
-
"SGN",
|
3755
|
-
"NEG",
|
3756
|
-
"STEP",
|
3757
|
-
"TANH",
|
3758
|
-
"ELU",
|
3759
|
-
"RELU",
|
3760
|
-
"GELU",
|
3761
|
-
"GELU_QUICK",
|
3762
|
-
"SILU",
|
3763
3763
|
"SILU_BACK",
|
3764
3764
|
"NORM",
|
3765
3765
|
"RMS_NORM",
|
@@ -3798,6 +3798,8 @@ static const char * GGML_OP_NAME[GGML_OP_COUNT] = {
|
|
3798
3798
|
"WIN_PART",
|
3799
3799
|
"WIN_UNPART",
|
3800
3800
|
|
3801
|
+
"UNARY",
|
3802
|
+
|
3801
3803
|
"MAP_UNARY",
|
3802
3804
|
"MAP_BINARY",
|
3803
3805
|
|
@@ -3809,7 +3811,7 @@ static const char * GGML_OP_NAME[GGML_OP_COUNT] = {
|
|
3809
3811
|
"CROSS_ENTROPY_LOSS_BACK",
|
3810
3812
|
};
|
3811
3813
|
|
3812
|
-
static_assert(GGML_OP_COUNT ==
|
3814
|
+
static_assert(GGML_OP_COUNT == 59, "GGML_OP_COUNT != 59");
|
3813
3815
|
|
3814
3816
|
static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
|
3815
3817
|
"none",
|
@@ -3830,16 +3832,6 @@ static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
|
|
3830
3832
|
"argmax(x)",
|
3831
3833
|
"repeat(x)",
|
3832
3834
|
"repeat_back(x)",
|
3833
|
-
"abs(x)",
|
3834
|
-
"sgn(x)",
|
3835
|
-
"-x",
|
3836
|
-
"step(x)",
|
3837
|
-
"tanh(x)",
|
3838
|
-
"elu(x)",
|
3839
|
-
"relu(x)",
|
3840
|
-
"gelu(x)",
|
3841
|
-
"gelu_quick(x)",
|
3842
|
-
"silu(x)",
|
3843
3835
|
"silu_back(x)",
|
3844
3836
|
"norm(x)",
|
3845
3837
|
"rms_norm(x)",
|
@@ -3878,6 +3870,8 @@ static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
|
|
3878
3870
|
"win_part(x)",
|
3879
3871
|
"win_unpart(x)",
|
3880
3872
|
|
3873
|
+
"unary(x)",
|
3874
|
+
|
3881
3875
|
"f(x)",
|
3882
3876
|
"f(x,y)",
|
3883
3877
|
|
@@ -3889,7 +3883,7 @@ static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
|
|
3889
3883
|
"cross_entropy_loss_back(x,y)",
|
3890
3884
|
};
|
3891
3885
|
|
3892
|
-
static_assert(GGML_OP_COUNT ==
|
3886
|
+
static_assert(GGML_OP_COUNT == 59, "GGML_OP_COUNT != 59");
|
3893
3887
|
|
3894
3888
|
static_assert(GGML_OP_POOL_COUNT == 2, "GGML_OP_POOL_COUNT != 2");
|
3895
3889
|
|
@@ -4077,8 +4071,8 @@ bool ggml_is_numa(void) {
|
|
4077
4071
|
////////////////////////////////////////////////////////////////////////////////
|
4078
4072
|
|
4079
4073
|
void ggml_print_object(const struct ggml_object * obj) {
|
4080
|
-
GGML_PRINT(" - ggml_object: offset = %zu, size = %zu, next = %p\n",
|
4081
|
-
obj->offs, obj->size, (const void *) obj->next);
|
4074
|
+
GGML_PRINT(" - ggml_object: type = %d, offset = %zu, size = %zu, next = %p\n",
|
4075
|
+
obj->type, obj->offs, obj->size, (const void *) obj->next);
|
4082
4076
|
}
|
4083
4077
|
|
4084
4078
|
void ggml_print_objects(const struct ggml_context * ctx) {
|
@@ -4145,6 +4139,10 @@ const char * ggml_op_name(enum ggml_op op) {
|
|
4145
4139
|
return GGML_OP_NAME[op];
|
4146
4140
|
}
|
4147
4141
|
|
4142
|
+
const char * ggml_op_symbol(enum ggml_op op) {
|
4143
|
+
return GGML_OP_SYMBOL[op];
|
4144
|
+
}
|
4145
|
+
|
4148
4146
|
size_t ggml_element_size(const struct ggml_tensor * tensor) {
|
4149
4147
|
return GGML_TYPE_SIZE[tensor->type];
|
4150
4148
|
}
|
@@ -4214,7 +4212,7 @@ enum ggml_type ggml_ftype_to_ggml_type(enum ggml_ftype ftype) {
|
|
4214
4212
|
}
|
4215
4213
|
|
4216
4214
|
size_t ggml_tensor_overhead(void) {
|
4217
|
-
return GGML_OBJECT_SIZE + GGML_TENSOR_SIZE
|
4215
|
+
return GGML_OBJECT_SIZE + GGML_TENSOR_SIZE;
|
4218
4216
|
}
|
4219
4217
|
|
4220
4218
|
bool ggml_is_transposed(const struct ggml_tensor * tensor) {
|
@@ -4231,6 +4229,15 @@ bool ggml_is_contiguous(const struct ggml_tensor * tensor) {
|
|
4231
4229
|
tensor->nb[3] == tensor->nb[2]*tensor->ne[2];
|
4232
4230
|
}
|
4233
4231
|
|
4232
|
+
static inline bool ggml_is_contiguous_except_dim_1(const struct ggml_tensor * tensor) {
|
4233
|
+
static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
|
4234
|
+
|
4235
|
+
return
|
4236
|
+
tensor->nb[0] == GGML_TYPE_SIZE[tensor->type] &&
|
4237
|
+
tensor->nb[2] == tensor->nb[1]*tensor->ne[1] &&
|
4238
|
+
tensor->nb[3] == tensor->nb[2]*tensor->ne[2];
|
4239
|
+
}
|
4240
|
+
|
4234
4241
|
bool ggml_is_permuted(const struct ggml_tensor * tensor) {
|
4235
4242
|
static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
|
4236
4243
|
|
@@ -4376,7 +4383,7 @@ struct ggml_context * ggml_init(struct ggml_init_params params) {
|
|
4376
4383
|
return NULL;
|
4377
4384
|
}
|
4378
4385
|
|
4379
|
-
const size_t mem_size =
|
4386
|
+
const size_t mem_size = params.mem_buffer ? params.mem_size : GGML_PAD(params.mem_size, GGML_MEM_ALIGN);
|
4380
4387
|
|
4381
4388
|
*ctx = (struct ggml_context) {
|
4382
4389
|
/*.mem_size =*/ mem_size,
|
@@ -4443,6 +4450,10 @@ size_t ggml_set_scratch(struct ggml_context * ctx, struct ggml_scratch scratch)
|
|
4443
4450
|
return result;
|
4444
4451
|
}
|
4445
4452
|
|
4453
|
+
bool ggml_get_no_alloc(struct ggml_context * ctx) {
|
4454
|
+
return ctx->no_alloc;
|
4455
|
+
}
|
4456
|
+
|
4446
4457
|
void ggml_set_no_alloc(struct ggml_context * ctx, bool no_alloc) {
|
4447
4458
|
ctx->no_alloc = no_alloc;
|
4448
4459
|
}
|
@@ -4461,12 +4472,14 @@ size_t ggml_get_max_tensor_size(const struct ggml_context * ctx) {
|
|
4461
4472
|
struct ggml_object * obj = ctx->objects_begin;
|
4462
4473
|
|
4463
4474
|
while (obj != NULL) {
|
4464
|
-
|
4475
|
+
if (obj->type == GGML_OBJECT_TENSOR) {
|
4476
|
+
struct ggml_tensor * tensor = (struct ggml_tensor *) ((char *) ctx->mem_buffer + obj->offs);
|
4465
4477
|
|
4466
|
-
|
4478
|
+
const size_t size = ggml_nbytes(tensor);
|
4467
4479
|
|
4468
|
-
|
4469
|
-
|
4480
|
+
if (max_size < size) {
|
4481
|
+
max_size = size;
|
4482
|
+
}
|
4470
4483
|
}
|
4471
4484
|
|
4472
4485
|
obj = obj->next;
|
@@ -4480,7 +4493,7 @@ size_t ggml_get_max_tensor_size(const struct ggml_context * ctx) {
|
|
4480
4493
|
// this is an error prone process, but it is necessary to support inplace
|
4481
4494
|
// operators when using scratch buffers
|
4482
4495
|
// TODO: implement a better way
|
4483
|
-
void ggml_scratch_save(struct ggml_context * ctx) {
|
4496
|
+
static void ggml_scratch_save(struct ggml_context * ctx) {
|
4484
4497
|
// this is needed to allow opt tensors to store their data
|
4485
4498
|
// TODO: again, need to find a better way
|
4486
4499
|
ctx->no_alloc_save = ctx->no_alloc;
|
@@ -4490,7 +4503,7 @@ void ggml_scratch_save(struct ggml_context * ctx) {
|
|
4490
4503
|
ctx->scratch.data = NULL;
|
4491
4504
|
}
|
4492
4505
|
|
4493
|
-
void ggml_scratch_load(struct ggml_context * ctx) {
|
4506
|
+
static void ggml_scratch_load(struct ggml_context * ctx) {
|
4494
4507
|
ctx->no_alloc = ctx->no_alloc_save;
|
4495
4508
|
|
4496
4509
|
ctx->scratch = ctx->scratch_save;
|
@@ -4498,12 +4511,7 @@ void ggml_scratch_load(struct ggml_context * ctx) {
|
|
4498
4511
|
|
4499
4512
|
////////////////////////////////////////////////////////////////////////////////
|
4500
4513
|
|
4501
|
-
struct
|
4502
|
-
struct ggml_context * ctx,
|
4503
|
-
enum ggml_type type,
|
4504
|
-
int n_dims,
|
4505
|
-
const int64_t* ne,
|
4506
|
-
void* data) {
|
4514
|
+
static struct ggml_object * ggml_new_object(struct ggml_context * ctx, enum ggml_object_type type, size_t size) {
|
4507
4515
|
// always insert objects at the end of the context's memory pool
|
4508
4516
|
struct ggml_object * obj_cur = ctx->objects_end;
|
4509
4517
|
|
@@ -4511,77 +4519,81 @@ struct ggml_tensor * ggml_new_tensor_impl(
|
|
4511
4519
|
const size_t cur_size = obj_cur == NULL ? 0 : obj_cur->size;
|
4512
4520
|
const size_t cur_end = cur_offs + cur_size;
|
4513
4521
|
|
4514
|
-
|
4515
|
-
|
4516
|
-
if (data == NULL && !ctx->no_alloc) {
|
4517
|
-
size_needed += GGML_TYPE_SIZE[type]*(ne[0]/GGML_BLCK_SIZE[type]);
|
4518
|
-
for (int i = 1; i < n_dims; i++) {
|
4519
|
-
size_needed *= ne[i];
|
4520
|
-
}
|
4521
|
-
// align to GGML_MEM_ALIGN
|
4522
|
-
size_needed = ((size_needed + GGML_MEM_ALIGN - 1)/GGML_MEM_ALIGN)*GGML_MEM_ALIGN;
|
4523
|
-
}
|
4522
|
+
// align to GGML_MEM_ALIGN
|
4523
|
+
size_t size_needed = GGML_PAD(size, GGML_MEM_ALIGN);
|
4524
4524
|
|
4525
4525
|
char * const mem_buffer = ctx->mem_buffer;
|
4526
4526
|
struct ggml_object * const obj_new = (struct ggml_object *)(mem_buffer + cur_end);
|
4527
4527
|
|
4528
|
-
if (
|
4529
|
-
|
4528
|
+
if (cur_end + size_needed + GGML_OBJECT_SIZE > ctx->mem_size) {
|
4529
|
+
GGML_PRINT("%s: not enough space in the context's memory pool (needed %zu, available %zu)\n",
|
4530
|
+
__func__, cur_end + size_needed, ctx->mem_size);
|
4531
|
+
assert(false);
|
4532
|
+
return NULL;
|
4533
|
+
}
|
4534
|
+
|
4535
|
+
*obj_new = (struct ggml_object) {
|
4536
|
+
.offs = cur_end + GGML_OBJECT_SIZE,
|
4537
|
+
.size = size_needed,
|
4538
|
+
.next = NULL,
|
4539
|
+
.type = type,
|
4540
|
+
};
|
4530
4541
|
|
4531
|
-
|
4532
|
-
GGML_PRINT("%s: not enough space in the context's memory pool (needed %zu, available %zu)\n",
|
4533
|
-
__func__, cur_end + size_needed + GGML_OBJECT_SIZE, ctx->mem_size);
|
4534
|
-
assert(false);
|
4535
|
-
return NULL;
|
4536
|
-
}
|
4542
|
+
ggml_assert_aligned(mem_buffer + obj_new->offs);
|
4537
4543
|
|
4538
|
-
|
4539
|
-
|
4540
|
-
.size = size_needed,
|
4541
|
-
.next = NULL,
|
4542
|
-
};
|
4544
|
+
if (obj_cur != NULL) {
|
4545
|
+
obj_cur->next = obj_new;
|
4543
4546
|
} else {
|
4544
|
-
|
4545
|
-
|
4546
|
-
|
4547
|
-
|
4548
|
-
|
4547
|
+
// this is the first object in this context
|
4548
|
+
ctx->objects_begin = obj_new;
|
4549
|
+
}
|
4550
|
+
|
4551
|
+
ctx->objects_end = obj_new;
|
4552
|
+
|
4553
|
+
//printf("%s: inserted new object at %zu, size = %zu\n", __func__, cur_end, obj_new->size);
|
4554
|
+
|
4555
|
+
return obj_new;
|
4556
|
+
}
|
4557
|
+
|
4558
|
+
static struct ggml_tensor * ggml_new_tensor_impl(
|
4559
|
+
struct ggml_context * ctx,
|
4560
|
+
enum ggml_type type,
|
4561
|
+
int n_dims,
|
4562
|
+
const int64_t * ne,
|
4563
|
+
void * data) {
|
4564
|
+
|
4565
|
+
assert(n_dims >= 1 && n_dims <= GGML_MAX_DIMS);
|
4566
|
+
|
4567
|
+
size_t data_size = 0;
|
4568
|
+
|
4569
|
+
if (data == NULL && !ctx->no_alloc) {
|
4570
|
+
data_size += GGML_TYPE_SIZE[type]*(ne[0]/GGML_BLCK_SIZE[type]);
|
4571
|
+
for (int i = 1; i < n_dims; i++) {
|
4572
|
+
data_size *= ne[i];
|
4549
4573
|
}
|
4574
|
+
}
|
4550
4575
|
|
4551
|
-
|
4552
|
-
|
4553
|
-
|
4576
|
+
if (ctx->scratch.data != NULL && data == NULL) {
|
4577
|
+
// allocate tensor data in the scratch buffer
|
4578
|
+
if (ctx->scratch.offs + data_size > ctx->scratch.size) {
|
4579
|
+
GGML_PRINT("%s: not enough space in the scratch memory pool (needed %zu, available %zu)\n",
|
4580
|
+
__func__, ctx->scratch.offs + data_size, ctx->scratch.size);
|
4554
4581
|
assert(false);
|
4555
4582
|
return NULL;
|
4556
4583
|
}
|
4557
4584
|
|
4558
4585
|
data = (char * const) ctx->scratch.data + ctx->scratch.offs;
|
4559
4586
|
|
4560
|
-
|
4561
|
-
.offs = cur_end + GGML_OBJECT_SIZE,
|
4562
|
-
.size = GGML_TENSOR_SIZE,
|
4563
|
-
.next = NULL,
|
4564
|
-
};
|
4565
|
-
|
4566
|
-
//printf("scratch offs = %zu, size_needed = %zu\n", ctx->scratch.offs, size_needed);
|
4587
|
+
ctx->scratch.offs += data_size;
|
4567
4588
|
|
4568
|
-
|
4589
|
+
data_size = 0;
|
4569
4590
|
}
|
4570
4591
|
|
4571
|
-
|
4572
|
-
obj_cur->next = obj_new;
|
4573
|
-
} else {
|
4574
|
-
// this is the first object in this context
|
4575
|
-
ctx->objects_begin = obj_new;
|
4576
|
-
}
|
4577
|
-
|
4578
|
-
ctx->objects_end = obj_new;
|
4592
|
+
struct ggml_object * const obj_new = ggml_new_object(ctx, GGML_OBJECT_TENSOR, GGML_TENSOR_SIZE + data_size);
|
4579
4593
|
|
4580
|
-
//
|
4594
|
+
// TODO: for recoverable errors, we would need to free the data allocated from the scratch buffer here
|
4581
4595
|
|
4582
|
-
struct ggml_tensor * const result = (struct ggml_tensor *)(mem_buffer + obj_new->offs);
|
4583
|
-
|
4584
|
-
ggml_assert_aligned(result);
|
4596
|
+
struct ggml_tensor * const result = (struct ggml_tensor *)((char *)ctx->mem_buffer + obj_new->offs);
|
4585
4597
|
|
4586
4598
|
*result = (struct ggml_tensor) {
|
4587
4599
|
/*.type =*/ type,
|
@@ -4590,6 +4602,7 @@ struct ggml_tensor * ggml_new_tensor_impl(
|
|
4590
4602
|
/*.ne =*/ { 1, 1, 1, 1 },
|
4591
4603
|
/*.nb =*/ { 0, 0, 0, 0 },
|
4592
4604
|
/*.op =*/ GGML_OP_NONE,
|
4605
|
+
/*.op_params =*/ {0},
|
4593
4606
|
/*.is_param =*/ false,
|
4594
4607
|
/*.grad =*/ NULL,
|
4595
4608
|
/*.src =*/ { NULL },
|
@@ -4620,24 +4633,39 @@ struct ggml_tensor * ggml_new_tensor_impl(
|
|
4620
4633
|
return result;
|
4621
4634
|
}
|
4622
4635
|
|
4636
|
+
static void ggml_set_op_params(struct ggml_tensor * tensor, const void * params, size_t params_size) {
|
4637
|
+
assert(params_size <= GGML_MAX_OP_PARAMS);
|
4638
|
+
memcpy(tensor->op_params, params, params_size);
|
4639
|
+
}
|
4640
|
+
|
4641
|
+
static int32_t ggml_get_op_params_i32(const struct ggml_tensor * tensor, uint32_t i) {
|
4642
|
+
assert(i < GGML_MAX_OP_PARAMS / sizeof(int32_t));
|
4643
|
+
return ((const int32_t *)(tensor->op_params))[i];
|
4644
|
+
}
|
4645
|
+
|
4646
|
+
static void ggml_set_op_params_i32(struct ggml_tensor * tensor, uint32_t i, int32_t value) {
|
4647
|
+
assert(i < GGML_MAX_OP_PARAMS / sizeof(int32_t));
|
4648
|
+
((int32_t *)(tensor->op_params))[i] = value;
|
4649
|
+
}
|
4650
|
+
|
4623
4651
|
struct ggml_tensor * ggml_new_tensor(
|
4624
4652
|
struct ggml_context * ctx,
|
4625
|
-
enum ggml_type
|
4626
|
-
int
|
4627
|
-
const int64_t
|
4653
|
+
enum ggml_type type,
|
4654
|
+
int n_dims,
|
4655
|
+
const int64_t * ne) {
|
4628
4656
|
return ggml_new_tensor_impl(ctx, type, n_dims, ne, NULL);
|
4629
4657
|
}
|
4630
4658
|
|
4631
4659
|
struct ggml_tensor * ggml_new_tensor_1d(
|
4632
4660
|
struct ggml_context * ctx,
|
4633
|
-
enum ggml_type
|
4661
|
+
enum ggml_type type,
|
4634
4662
|
int64_t ne0) {
|
4635
4663
|
return ggml_new_tensor(ctx, type, 1, &ne0);
|
4636
4664
|
}
|
4637
4665
|
|
4638
4666
|
struct ggml_tensor * ggml_new_tensor_2d(
|
4639
4667
|
struct ggml_context * ctx,
|
4640
|
-
enum ggml_type
|
4668
|
+
enum ggml_type type,
|
4641
4669
|
int64_t ne0,
|
4642
4670
|
int64_t ne1) {
|
4643
4671
|
const int64_t ne[2] = { ne0, ne1 };
|
@@ -4646,7 +4674,7 @@ struct ggml_tensor * ggml_new_tensor_2d(
|
|
4646
4674
|
|
4647
4675
|
struct ggml_tensor * ggml_new_tensor_3d(
|
4648
4676
|
struct ggml_context * ctx,
|
4649
|
-
enum ggml_type
|
4677
|
+
enum ggml_type type,
|
4650
4678
|
int64_t ne0,
|
4651
4679
|
int64_t ne1,
|
4652
4680
|
int64_t ne2) {
|
@@ -4951,6 +4979,11 @@ float * ggml_get_data_f32(const struct ggml_tensor * tensor) {
|
|
4951
4979
|
return (float *)(tensor->data);
|
4952
4980
|
}
|
4953
4981
|
|
4982
|
+
enum ggml_unary_op ggml_get_unary_op(const struct ggml_tensor * tensor) {
|
4983
|
+
GGML_ASSERT(tensor->op == GGML_OP_UNARY);
|
4984
|
+
return (enum ggml_unary_op) ggml_get_op_params_i32(tensor, 0);
|
4985
|
+
}
|
4986
|
+
|
4954
4987
|
const char * ggml_get_name(const struct ggml_tensor * tensor) {
|
4955
4988
|
return tensor->name;
|
4956
4989
|
}
|
@@ -4989,9 +5022,11 @@ struct ggml_tensor * ggml_get_tensor(struct ggml_context * ctx, const char * nam
|
|
4989
5022
|
char * const mem_buffer = ctx->mem_buffer;
|
4990
5023
|
|
4991
5024
|
while (obj != NULL) {
|
4992
|
-
|
4993
|
-
|
4994
|
-
|
5025
|
+
if (obj->type == GGML_OBJECT_TENSOR) {
|
5026
|
+
struct ggml_tensor * cur = (struct ggml_tensor *)(mem_buffer + obj->offs);
|
5027
|
+
if (strcmp(cur->name, name) == 0) {
|
5028
|
+
return cur;
|
5029
|
+
}
|
4995
5030
|
}
|
4996
5031
|
|
4997
5032
|
obj = obj->next;
|
@@ -5004,7 +5039,7 @@ struct ggml_tensor * ggml_get_tensor(struct ggml_context * ctx, const char * nam
|
|
5004
5039
|
|
5005
5040
|
// ggml_dup
|
5006
5041
|
|
5007
|
-
struct ggml_tensor * ggml_dup_impl(
|
5042
|
+
static struct ggml_tensor * ggml_dup_impl(
|
5008
5043
|
struct ggml_context * ctx,
|
5009
5044
|
struct ggml_tensor * a,
|
5010
5045
|
bool inplace) {
|
@@ -5019,7 +5054,6 @@ struct ggml_tensor * ggml_dup_impl(
|
|
5019
5054
|
result->op = GGML_OP_DUP;
|
5020
5055
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
5021
5056
|
result->src[0] = a;
|
5022
|
-
result->src[1] = NULL;
|
5023
5057
|
|
5024
5058
|
return result;
|
5025
5059
|
}
|
@@ -5038,7 +5072,7 @@ struct ggml_tensor * ggml_dup_inplace(
|
|
5038
5072
|
|
5039
5073
|
// ggml_add
|
5040
5074
|
|
5041
|
-
struct ggml_tensor * ggml_add_impl(
|
5075
|
+
static struct ggml_tensor * ggml_add_impl(
|
5042
5076
|
struct ggml_context * ctx,
|
5043
5077
|
struct ggml_tensor * a,
|
5044
5078
|
struct ggml_tensor * b,
|
@@ -5081,7 +5115,7 @@ struct ggml_tensor * ggml_add_inplace(
|
|
5081
5115
|
|
5082
5116
|
// ggml_add1
|
5083
5117
|
|
5084
|
-
struct ggml_tensor * ggml_add1_impl(
|
5118
|
+
static struct ggml_tensor * ggml_add1_impl(
|
5085
5119
|
struct ggml_context * ctx,
|
5086
5120
|
struct ggml_tensor * a,
|
5087
5121
|
struct ggml_tensor * b,
|
@@ -5121,7 +5155,7 @@ struct ggml_tensor * ggml_add1_inplace(
|
|
5121
5155
|
|
5122
5156
|
// ggml_acc
|
5123
5157
|
|
5124
|
-
struct ggml_tensor * ggml_acc_impl(
|
5158
|
+
static struct ggml_tensor * ggml_acc_impl(
|
5125
5159
|
struct ggml_context * ctx,
|
5126
5160
|
struct ggml_tensor * a,
|
5127
5161
|
struct ggml_tensor * b,
|
@@ -5143,23 +5177,13 @@ struct ggml_tensor * ggml_acc_impl(
|
|
5143
5177
|
|
5144
5178
|
struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
5145
5179
|
|
5146
|
-
|
5147
|
-
|
5148
|
-
struct ggml_tensor * c = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, 5);
|
5149
|
-
|
5150
|
-
((int32_t *) c->data)[0] = nb1;
|
5151
|
-
((int32_t *) c->data)[1] = nb2;
|
5152
|
-
((int32_t *) c->data)[2] = nb3;
|
5153
|
-
((int32_t *) c->data)[3] = offset;
|
5154
|
-
((int32_t *) c->data)[4] = inplace ? 1 : 0;
|
5155
|
-
|
5156
|
-
ggml_scratch_load(ctx);
|
5180
|
+
int32_t params[] = { nb1, nb2, nb3, offset, inplace ? 1 : 0 };
|
5181
|
+
ggml_set_op_params(result, params, sizeof(params));
|
5157
5182
|
|
5158
5183
|
result->op = GGML_OP_ACC;
|
5159
5184
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
5160
5185
|
result->src[0] = a;
|
5161
5186
|
result->src[1] = b;
|
5162
|
-
result->src[2] = c;
|
5163
5187
|
|
5164
5188
|
return result;
|
5165
5189
|
}
|
@@ -5188,7 +5212,7 @@ struct ggml_tensor * ggml_acc_inplace(
|
|
5188
5212
|
|
5189
5213
|
// ggml_sub
|
5190
5214
|
|
5191
|
-
struct ggml_tensor * ggml_sub_impl(
|
5215
|
+
static struct ggml_tensor * ggml_sub_impl(
|
5192
5216
|
struct ggml_context * ctx,
|
5193
5217
|
struct ggml_tensor * a,
|
5194
5218
|
struct ggml_tensor * b,
|
@@ -5227,7 +5251,7 @@ struct ggml_tensor * ggml_sub_inplace(
|
|
5227
5251
|
|
5228
5252
|
// ggml_mul
|
5229
5253
|
|
5230
|
-
struct ggml_tensor * ggml_mul_impl(
|
5254
|
+
static struct ggml_tensor * ggml_mul_impl(
|
5231
5255
|
struct ggml_context * ctx,
|
5232
5256
|
struct ggml_tensor * a,
|
5233
5257
|
struct ggml_tensor * b,
|
@@ -5274,7 +5298,7 @@ struct ggml_tensor * ggml_mul_inplace(
|
|
5274
5298
|
|
5275
5299
|
// ggml_div
|
5276
5300
|
|
5277
|
-
struct ggml_tensor * ggml_div_impl(
|
5301
|
+
static struct ggml_tensor * ggml_div_impl(
|
5278
5302
|
struct ggml_context * ctx,
|
5279
5303
|
struct ggml_tensor * a,
|
5280
5304
|
struct ggml_tensor * b,
|
@@ -5317,7 +5341,7 @@ struct ggml_tensor * ggml_div_inplace(
|
|
5317
5341
|
|
5318
5342
|
// ggml_sqr
|
5319
5343
|
|
5320
|
-
struct ggml_tensor * ggml_sqr_impl(
|
5344
|
+
static struct ggml_tensor * ggml_sqr_impl(
|
5321
5345
|
struct ggml_context * ctx,
|
5322
5346
|
struct ggml_tensor * a,
|
5323
5347
|
bool inplace) {
|
@@ -5332,7 +5356,6 @@ struct ggml_tensor * ggml_sqr_impl(
|
|
5332
5356
|
result->op = GGML_OP_SQR;
|
5333
5357
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
5334
5358
|
result->src[0] = a;
|
5335
|
-
result->src[1] = NULL;
|
5336
5359
|
|
5337
5360
|
return result;
|
5338
5361
|
}
|
@@ -5351,7 +5374,7 @@ struct ggml_tensor * ggml_sqr_inplace(
|
|
5351
5374
|
|
5352
5375
|
// ggml_sqrt
|
5353
5376
|
|
5354
|
-
struct ggml_tensor * ggml_sqrt_impl(
|
5377
|
+
static struct ggml_tensor * ggml_sqrt_impl(
|
5355
5378
|
struct ggml_context * ctx,
|
5356
5379
|
struct ggml_tensor * a,
|
5357
5380
|
bool inplace) {
|
@@ -5366,7 +5389,6 @@ struct ggml_tensor * ggml_sqrt_impl(
|
|
5366
5389
|
result->op = GGML_OP_SQRT;
|
5367
5390
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
5368
5391
|
result->src[0] = a;
|
5369
|
-
result->src[1] = NULL;
|
5370
5392
|
|
5371
5393
|
return result;
|
5372
5394
|
}
|
@@ -5386,7 +5408,7 @@ struct ggml_tensor * ggml_sqrt_inplace(
|
|
5386
5408
|
|
5387
5409
|
// ggml_log
|
5388
5410
|
|
5389
|
-
struct ggml_tensor * ggml_log_impl(
|
5411
|
+
static struct ggml_tensor * ggml_log_impl(
|
5390
5412
|
struct ggml_context * ctx,
|
5391
5413
|
struct ggml_tensor * a,
|
5392
5414
|
bool inplace) {
|
@@ -5401,7 +5423,6 @@ struct ggml_tensor * ggml_log_impl(
|
|
5401
5423
|
result->op = GGML_OP_LOG;
|
5402
5424
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
5403
5425
|
result->src[0] = a;
|
5404
|
-
result->src[1] = NULL;
|
5405
5426
|
|
5406
5427
|
return result;
|
5407
5428
|
}
|
@@ -5434,7 +5455,6 @@ struct ggml_tensor * ggml_sum(
|
|
5434
5455
|
result->op = GGML_OP_SUM;
|
5435
5456
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
5436
5457
|
result->src[0] = a;
|
5437
|
-
result->src[1] = NULL;
|
5438
5458
|
|
5439
5459
|
return result;
|
5440
5460
|
}
|
@@ -5461,7 +5481,6 @@ struct ggml_tensor * ggml_sum_rows(
|
|
5461
5481
|
result->op = GGML_OP_SUM_ROWS;
|
5462
5482
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
5463
5483
|
result->src[0] = a;
|
5464
|
-
result->src[1] = NULL;
|
5465
5484
|
|
5466
5485
|
return result;
|
5467
5486
|
}
|
@@ -5484,7 +5503,6 @@ struct ggml_tensor * ggml_mean(
|
|
5484
5503
|
result->op = GGML_OP_MEAN;
|
5485
5504
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
5486
5505
|
result->src[0] = a;
|
5487
|
-
result->src[1] = NULL;
|
5488
5506
|
|
5489
5507
|
return result;
|
5490
5508
|
}
|
@@ -5508,7 +5526,6 @@ struct ggml_tensor * ggml_argmax(
|
|
5508
5526
|
result->op = GGML_OP_ARGMAX;
|
5509
5527
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
5510
5528
|
result->src[0] = a;
|
5511
|
-
result->src[1] = NULL;
|
5512
5529
|
|
5513
5530
|
return result;
|
5514
5531
|
}
|
@@ -5571,343 +5588,142 @@ struct ggml_tensor * ggml_repeat_back(
|
|
5571
5588
|
|
5572
5589
|
// ggml_abs
|
5573
5590
|
|
5574
|
-
struct ggml_tensor * ggml_abs_impl(
|
5575
|
-
struct ggml_context * ctx,
|
5576
|
-
struct ggml_tensor * a,
|
5577
|
-
bool inplace) {
|
5578
|
-
bool is_node = false;
|
5579
|
-
|
5580
|
-
if (!inplace && (a->grad)) {
|
5581
|
-
is_node = true;
|
5582
|
-
}
|
5583
|
-
|
5584
|
-
struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
5585
|
-
|
5586
|
-
result->op = GGML_OP_ABS;
|
5587
|
-
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
5588
|
-
result->src[0] = a;
|
5589
|
-
result->src[1] = NULL;
|
5590
|
-
|
5591
|
-
return result;
|
5592
|
-
}
|
5593
|
-
|
5594
5591
|
struct ggml_tensor * ggml_abs(
|
5595
5592
|
struct ggml_context * ctx,
|
5596
5593
|
struct ggml_tensor * a) {
|
5597
|
-
return
|
5594
|
+
return ggml_unary(ctx, a, GGML_UNARY_OP_ABS);
|
5598
5595
|
}
|
5599
5596
|
|
5600
5597
|
struct ggml_tensor * ggml_abs_inplace(
|
5601
5598
|
struct ggml_context * ctx,
|
5602
5599
|
struct ggml_tensor * a) {
|
5603
|
-
return
|
5600
|
+
return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_ABS);
|
5604
5601
|
}
|
5605
5602
|
|
5606
|
-
|
5607
5603
|
// ggml_sgn
|
5608
5604
|
|
5609
|
-
struct ggml_tensor * ggml_sgn_impl(
|
5610
|
-
struct ggml_context * ctx,
|
5611
|
-
struct ggml_tensor * a,
|
5612
|
-
bool inplace) {
|
5613
|
-
bool is_node = false;
|
5614
|
-
|
5615
|
-
if (!inplace && (a->grad)) {
|
5616
|
-
is_node = true;
|
5617
|
-
}
|
5618
|
-
|
5619
|
-
struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
5620
|
-
|
5621
|
-
result->op = GGML_OP_SGN;
|
5622
|
-
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
5623
|
-
result->src[0] = a;
|
5624
|
-
result->src[1] = NULL;
|
5625
|
-
|
5626
|
-
return result;
|
5627
|
-
}
|
5628
|
-
|
5629
5605
|
struct ggml_tensor * ggml_sgn(
|
5630
5606
|
struct ggml_context * ctx,
|
5631
5607
|
struct ggml_tensor * a) {
|
5632
|
-
return
|
5608
|
+
return ggml_unary(ctx, a, GGML_UNARY_OP_SGN);
|
5633
5609
|
}
|
5634
5610
|
|
5635
5611
|
struct ggml_tensor * ggml_sgn_inplace(
|
5636
5612
|
struct ggml_context * ctx,
|
5637
5613
|
struct ggml_tensor * a) {
|
5638
|
-
return
|
5614
|
+
return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_SGN);
|
5639
5615
|
}
|
5640
5616
|
|
5641
5617
|
// ggml_neg
|
5642
5618
|
|
5643
|
-
struct ggml_tensor * ggml_neg_impl(
|
5644
|
-
struct ggml_context * ctx,
|
5645
|
-
struct ggml_tensor * a,
|
5646
|
-
bool inplace) {
|
5647
|
-
bool is_node = false;
|
5648
|
-
|
5649
|
-
if (!inplace && (a->grad)) {
|
5650
|
-
is_node = true;
|
5651
|
-
}
|
5652
|
-
|
5653
|
-
struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
5654
|
-
|
5655
|
-
result->op = GGML_OP_NEG;
|
5656
|
-
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
5657
|
-
result->src[0] = a;
|
5658
|
-
result->src[1] = NULL;
|
5659
|
-
|
5660
|
-
return result;
|
5661
|
-
}
|
5662
|
-
|
5663
5619
|
struct ggml_tensor * ggml_neg(
|
5664
5620
|
struct ggml_context * ctx,
|
5665
5621
|
struct ggml_tensor * a) {
|
5666
|
-
return
|
5622
|
+
return ggml_unary(ctx, a, GGML_UNARY_OP_NEG);
|
5667
5623
|
}
|
5668
5624
|
|
5669
5625
|
struct ggml_tensor * ggml_neg_inplace(
|
5670
5626
|
struct ggml_context * ctx,
|
5671
5627
|
struct ggml_tensor * a) {
|
5672
|
-
return
|
5628
|
+
return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_NEG);
|
5673
5629
|
}
|
5674
5630
|
|
5675
5631
|
// ggml_step
|
5676
5632
|
|
5677
|
-
struct ggml_tensor * ggml_step_impl(
|
5678
|
-
struct ggml_context * ctx,
|
5679
|
-
struct ggml_tensor * a,
|
5680
|
-
bool inplace) {
|
5681
|
-
bool is_node = false;
|
5682
|
-
|
5683
|
-
if (!inplace && (a->grad)) {
|
5684
|
-
is_node = true;
|
5685
|
-
}
|
5686
|
-
|
5687
|
-
struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
5688
|
-
|
5689
|
-
result->op = GGML_OP_STEP;
|
5690
|
-
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
5691
|
-
result->src[0] = a;
|
5692
|
-
result->src[1] = NULL;
|
5693
|
-
|
5694
|
-
return result;
|
5695
|
-
}
|
5696
|
-
|
5697
5633
|
struct ggml_tensor * ggml_step(
|
5698
5634
|
struct ggml_context * ctx,
|
5699
5635
|
struct ggml_tensor * a) {
|
5700
|
-
return
|
5636
|
+
return ggml_unary(ctx, a, GGML_UNARY_OP_STEP);
|
5701
5637
|
}
|
5702
5638
|
|
5703
5639
|
struct ggml_tensor * ggml_step_inplace(
|
5704
5640
|
struct ggml_context * ctx,
|
5705
5641
|
struct ggml_tensor * a) {
|
5706
|
-
return
|
5642
|
+
return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_STEP);
|
5707
5643
|
}
|
5708
5644
|
|
5709
5645
|
// ggml_tanh
|
5710
5646
|
|
5711
|
-
struct ggml_tensor * ggml_tanh_impl(
|
5712
|
-
struct ggml_context * ctx,
|
5713
|
-
struct ggml_tensor * a,
|
5714
|
-
bool inplace) {
|
5715
|
-
bool is_node = false;
|
5716
|
-
|
5717
|
-
if (!inplace && (a->grad)) {
|
5718
|
-
is_node = true;
|
5719
|
-
}
|
5720
|
-
|
5721
|
-
struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
5722
|
-
|
5723
|
-
result->op = GGML_OP_TANH;
|
5724
|
-
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
5725
|
-
result->src[0] = a;
|
5726
|
-
result->src[1] = NULL;
|
5727
|
-
|
5728
|
-
return result;
|
5729
|
-
}
|
5730
|
-
|
5731
5647
|
struct ggml_tensor * ggml_tanh(
|
5732
5648
|
struct ggml_context * ctx,
|
5733
5649
|
struct ggml_tensor * a) {
|
5734
|
-
return
|
5650
|
+
return ggml_unary(ctx, a, GGML_UNARY_OP_TANH);
|
5735
5651
|
}
|
5736
5652
|
|
5737
5653
|
struct ggml_tensor * ggml_tanh_inplace(
|
5738
5654
|
struct ggml_context * ctx,
|
5739
5655
|
struct ggml_tensor * a) {
|
5740
|
-
return
|
5656
|
+
return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_TANH);
|
5741
5657
|
}
|
5742
5658
|
|
5743
5659
|
// ggml_elu
|
5744
5660
|
|
5745
|
-
struct ggml_tensor * ggml_elu_impl(
|
5746
|
-
struct ggml_context * ctx,
|
5747
|
-
struct ggml_tensor * a,
|
5748
|
-
bool inplace) {
|
5749
|
-
bool is_node = false;
|
5750
|
-
|
5751
|
-
if (!inplace && (a->grad)) {
|
5752
|
-
is_node = true;
|
5753
|
-
}
|
5754
|
-
|
5755
|
-
struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
5756
|
-
|
5757
|
-
result->op = GGML_OP_ELU;
|
5758
|
-
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
5759
|
-
result->src[0] = a;
|
5760
|
-
result->src[1] = NULL;
|
5761
|
-
|
5762
|
-
return result;
|
5763
|
-
}
|
5764
|
-
|
5765
5661
|
struct ggml_tensor * ggml_elu(
|
5766
5662
|
struct ggml_context * ctx,
|
5767
5663
|
struct ggml_tensor * a) {
|
5768
|
-
return
|
5664
|
+
return ggml_unary(ctx, a, GGML_UNARY_OP_ELU);
|
5769
5665
|
}
|
5770
5666
|
|
5771
5667
|
struct ggml_tensor * ggml_elu_inplace(
|
5772
5668
|
struct ggml_context * ctx,
|
5773
5669
|
struct ggml_tensor * a) {
|
5774
|
-
return
|
5670
|
+
return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_ELU);
|
5775
5671
|
}
|
5776
5672
|
|
5777
5673
|
// ggml_relu
|
5778
5674
|
|
5779
|
-
struct ggml_tensor * ggml_relu_impl(
|
5780
|
-
struct ggml_context * ctx,
|
5781
|
-
struct ggml_tensor * a,
|
5782
|
-
bool inplace) {
|
5783
|
-
bool is_node = false;
|
5784
|
-
|
5785
|
-
if (!inplace && (a->grad)) {
|
5786
|
-
is_node = true;
|
5787
|
-
}
|
5788
|
-
|
5789
|
-
struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
5790
|
-
|
5791
|
-
result->op = GGML_OP_RELU;
|
5792
|
-
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
5793
|
-
result->src[0] = a;
|
5794
|
-
result->src[1] = NULL;
|
5795
|
-
|
5796
|
-
return result;
|
5797
|
-
}
|
5798
|
-
|
5799
5675
|
struct ggml_tensor * ggml_relu(
|
5800
5676
|
struct ggml_context * ctx,
|
5801
5677
|
struct ggml_tensor * a) {
|
5802
|
-
return
|
5678
|
+
return ggml_unary(ctx, a, GGML_UNARY_OP_RELU);
|
5803
5679
|
}
|
5804
5680
|
|
5805
5681
|
struct ggml_tensor * ggml_relu_inplace(
|
5806
5682
|
struct ggml_context * ctx,
|
5807
5683
|
struct ggml_tensor * a) {
|
5808
|
-
return
|
5684
|
+
return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_RELU);
|
5809
5685
|
}
|
5810
5686
|
|
5811
5687
|
// ggml_gelu
|
5812
5688
|
|
5813
|
-
struct ggml_tensor * ggml_gelu_impl(
|
5814
|
-
struct ggml_context * ctx,
|
5815
|
-
struct ggml_tensor * a,
|
5816
|
-
bool inplace) {
|
5817
|
-
bool is_node = false;
|
5818
|
-
|
5819
|
-
if (!inplace && (a->grad)) {
|
5820
|
-
is_node = true;
|
5821
|
-
}
|
5822
|
-
|
5823
|
-
struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
5824
|
-
|
5825
|
-
result->op = GGML_OP_GELU;
|
5826
|
-
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
5827
|
-
result->src[0] = a;
|
5828
|
-
result->src[1] = NULL;
|
5829
|
-
|
5830
|
-
return result;
|
5831
|
-
}
|
5832
|
-
|
5833
5689
|
struct ggml_tensor * ggml_gelu(
|
5834
5690
|
struct ggml_context * ctx,
|
5835
5691
|
struct ggml_tensor * a) {
|
5836
|
-
return
|
5692
|
+
return ggml_unary(ctx, a, GGML_UNARY_OP_GELU);
|
5837
5693
|
}
|
5838
5694
|
|
5839
5695
|
struct ggml_tensor * ggml_gelu_inplace(
|
5840
5696
|
struct ggml_context * ctx,
|
5841
5697
|
struct ggml_tensor * a) {
|
5842
|
-
return
|
5698
|
+
return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_GELU);
|
5843
5699
|
}
|
5844
5700
|
|
5845
5701
|
// ggml_gelu_quick
|
5846
5702
|
|
5847
|
-
struct ggml_tensor * ggml_gelu_quick_impl(
|
5848
|
-
struct ggml_context * ctx,
|
5849
|
-
struct ggml_tensor * a,
|
5850
|
-
bool inplace) {
|
5851
|
-
bool is_node = false;
|
5852
|
-
|
5853
|
-
if (!inplace && (a->grad)) {
|
5854
|
-
is_node = true;
|
5855
|
-
}
|
5856
|
-
|
5857
|
-
struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
5858
|
-
|
5859
|
-
result->op = GGML_OP_GELU_QUICK;
|
5860
|
-
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
5861
|
-
result->src[0] = a;
|
5862
|
-
result->src[1] = NULL;
|
5863
|
-
|
5864
|
-
return result;
|
5865
|
-
}
|
5866
|
-
|
5867
5703
|
struct ggml_tensor * ggml_gelu_quick(
|
5868
5704
|
struct ggml_context * ctx,
|
5869
5705
|
struct ggml_tensor * a) {
|
5870
|
-
return
|
5706
|
+
return ggml_unary(ctx, a, GGML_UNARY_OP_GELU_QUICK);
|
5871
5707
|
}
|
5872
5708
|
|
5873
5709
|
struct ggml_tensor * ggml_gelu_quick_inplace(
|
5874
5710
|
struct ggml_context * ctx,
|
5875
5711
|
struct ggml_tensor * a) {
|
5876
|
-
return
|
5712
|
+
return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_GELU_QUICK);
|
5877
5713
|
}
|
5878
5714
|
|
5879
5715
|
// ggml_silu
|
5880
5716
|
|
5881
|
-
struct ggml_tensor * ggml_silu_impl(
|
5882
|
-
struct ggml_context * ctx,
|
5883
|
-
struct ggml_tensor * a,
|
5884
|
-
bool inplace) {
|
5885
|
-
bool is_node = false;
|
5886
|
-
|
5887
|
-
if (!inplace && (a->grad)) {
|
5888
|
-
is_node = true;
|
5889
|
-
}
|
5890
|
-
|
5891
|
-
struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
5892
|
-
|
5893
|
-
result->op = GGML_OP_SILU;
|
5894
|
-
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
5895
|
-
result->src[0] = a;
|
5896
|
-
result->src[1] = NULL;
|
5897
|
-
|
5898
|
-
return result;
|
5899
|
-
}
|
5900
|
-
|
5901
5717
|
struct ggml_tensor * ggml_silu(
|
5902
5718
|
struct ggml_context * ctx,
|
5903
5719
|
struct ggml_tensor * a) {
|
5904
|
-
return
|
5720
|
+
return ggml_unary(ctx, a, GGML_UNARY_OP_SILU);
|
5905
5721
|
}
|
5906
5722
|
|
5907
5723
|
struct ggml_tensor * ggml_silu_inplace(
|
5908
5724
|
struct ggml_context * ctx,
|
5909
5725
|
struct ggml_tensor * a) {
|
5910
|
-
return
|
5726
|
+
return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_SILU);
|
5911
5727
|
}
|
5912
5728
|
|
5913
5729
|
// ggml_silu_back
|
@@ -5935,7 +5751,7 @@ struct ggml_tensor * ggml_silu_back(
|
|
5935
5751
|
|
5936
5752
|
// ggml_norm
|
5937
5753
|
|
5938
|
-
struct ggml_tensor * ggml_norm_impl(
|
5754
|
+
static struct ggml_tensor * ggml_norm_impl(
|
5939
5755
|
struct ggml_context * ctx,
|
5940
5756
|
struct ggml_tensor * a,
|
5941
5757
|
bool inplace) {
|
@@ -5948,10 +5764,11 @@ struct ggml_tensor * ggml_norm_impl(
|
|
5948
5764
|
|
5949
5765
|
struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
5950
5766
|
|
5767
|
+
// TODO: maybe store epsilon here?
|
5768
|
+
|
5951
5769
|
result->op = GGML_OP_NORM;
|
5952
5770
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
5953
5771
|
result->src[0] = a;
|
5954
|
-
result->src[1] = NULL; // TODO: maybe store epsilon here?
|
5955
5772
|
|
5956
5773
|
return result;
|
5957
5774
|
}
|
@@ -5968,9 +5785,10 @@ struct ggml_tensor * ggml_norm_inplace(
|
|
5968
5785
|
return ggml_norm_impl(ctx, a, true);
|
5969
5786
|
}
|
5970
5787
|
|
5971
|
-
struct ggml_tensor * ggml_rms_norm_impl(
|
5788
|
+
static struct ggml_tensor * ggml_rms_norm_impl(
|
5972
5789
|
struct ggml_context * ctx,
|
5973
5790
|
struct ggml_tensor * a,
|
5791
|
+
float eps,
|
5974
5792
|
bool inplace) {
|
5975
5793
|
bool is_node = false;
|
5976
5794
|
|
@@ -5980,24 +5798,27 @@ struct ggml_tensor * ggml_rms_norm_impl(
|
|
5980
5798
|
|
5981
5799
|
struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
5982
5800
|
|
5801
|
+
ggml_set_op_params(result, &eps, sizeof(eps));
|
5802
|
+
|
5983
5803
|
result->op = GGML_OP_RMS_NORM;
|
5984
5804
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
5985
5805
|
result->src[0] = a;
|
5986
|
-
result->src[1] = NULL; // TODO: maybe store epsilon here?
|
5987
5806
|
|
5988
5807
|
return result;
|
5989
5808
|
}
|
5990
5809
|
|
5991
5810
|
struct ggml_tensor * ggml_rms_norm(
|
5992
5811
|
struct ggml_context * ctx,
|
5993
|
-
struct ggml_tensor * a
|
5994
|
-
|
5812
|
+
struct ggml_tensor * a,
|
5813
|
+
float eps) {
|
5814
|
+
return ggml_rms_norm_impl(ctx, a, eps, false);
|
5995
5815
|
}
|
5996
5816
|
|
5997
5817
|
struct ggml_tensor * ggml_rms_norm_inplace(
|
5998
5818
|
struct ggml_context * ctx,
|
5999
|
-
struct ggml_tensor * a
|
6000
|
-
|
5819
|
+
struct ggml_tensor * a,
|
5820
|
+
float eps) {
|
5821
|
+
return ggml_rms_norm_impl(ctx, a, eps, true);
|
6001
5822
|
}
|
6002
5823
|
|
6003
5824
|
struct ggml_tensor * ggml_rms_norm_back(
|
@@ -6076,7 +5897,7 @@ struct ggml_tensor * ggml_out_prod(
|
|
6076
5897
|
|
6077
5898
|
// ggml_scale
|
6078
5899
|
|
6079
|
-
struct ggml_tensor * ggml_scale_impl(
|
5900
|
+
static struct ggml_tensor * ggml_scale_impl(
|
6080
5901
|
struct ggml_context * ctx,
|
6081
5902
|
struct ggml_tensor * a,
|
6082
5903
|
struct ggml_tensor * b,
|
@@ -6116,7 +5937,7 @@ struct ggml_tensor * ggml_scale_inplace(
|
|
6116
5937
|
|
6117
5938
|
// ggml_set
|
6118
5939
|
|
6119
|
-
struct ggml_tensor * ggml_set_impl(
|
5940
|
+
static struct ggml_tensor * ggml_set_impl(
|
6120
5941
|
struct ggml_context * ctx,
|
6121
5942
|
struct ggml_tensor * a,
|
6122
5943
|
struct ggml_tensor * b,
|
@@ -6136,23 +5957,13 @@ struct ggml_tensor * ggml_set_impl(
|
|
6136
5957
|
// make a view of the destination
|
6137
5958
|
struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
6138
5959
|
|
6139
|
-
|
6140
|
-
|
6141
|
-
struct ggml_tensor * c = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, 5);
|
6142
|
-
|
6143
|
-
(( int32_t * ) c->data)[0] = nb1;
|
6144
|
-
(( int32_t * ) c->data)[1] = nb2;
|
6145
|
-
(( int32_t * ) c->data)[2] = nb3;
|
6146
|
-
(( int32_t * ) c->data)[3] = offset;
|
6147
|
-
(( int32_t * ) c->data)[4] = inplace ? 1 : 0;
|
6148
|
-
|
6149
|
-
ggml_scratch_load(ctx);
|
5960
|
+
int32_t params[] = { nb1, nb2, nb3, offset, inplace ? 1 : 0 };
|
5961
|
+
ggml_set_op_params(result, params, sizeof(params));
|
6150
5962
|
|
6151
5963
|
result->op = GGML_OP_SET;
|
6152
5964
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
6153
5965
|
result->src[0] = a;
|
6154
5966
|
result->src[1] = b;
|
6155
|
-
result->src[2] = c;
|
6156
5967
|
|
6157
5968
|
return result;
|
6158
5969
|
}
|
@@ -6216,7 +6027,7 @@ struct ggml_tensor * ggml_set_2d_inplace(
|
|
6216
6027
|
|
6217
6028
|
// ggml_cpy
|
6218
6029
|
|
6219
|
-
struct ggml_tensor * ggml_cpy_impl(
|
6030
|
+
static struct ggml_tensor * ggml_cpy_impl(
|
6220
6031
|
struct ggml_context * ctx,
|
6221
6032
|
struct ggml_tensor * a,
|
6222
6033
|
struct ggml_tensor * b,
|
@@ -6261,7 +6072,7 @@ struct ggml_tensor * ggml_cpy_inplace(
|
|
6261
6072
|
|
6262
6073
|
// ggml_cont
|
6263
6074
|
|
6264
|
-
struct ggml_tensor * ggml_cont_impl(
|
6075
|
+
static struct ggml_tensor * ggml_cont_impl(
|
6265
6076
|
struct ggml_context * ctx,
|
6266
6077
|
struct ggml_tensor * a,
|
6267
6078
|
bool inplace) {
|
@@ -6277,7 +6088,6 @@ struct ggml_tensor * ggml_cont_impl(
|
|
6277
6088
|
result->op = GGML_OP_CONT;
|
6278
6089
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
6279
6090
|
result->src[0] = a;
|
6280
|
-
result->src[1] = NULL;
|
6281
6091
|
|
6282
6092
|
return result;
|
6283
6093
|
}
|
@@ -6321,7 +6131,6 @@ struct ggml_tensor * ggml_reshape(
|
|
6321
6131
|
result->op = GGML_OP_RESHAPE;
|
6322
6132
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
6323
6133
|
result->src[0] = a;
|
6324
|
-
result->src[1] = NULL;
|
6325
6134
|
|
6326
6135
|
return result;
|
6327
6136
|
}
|
@@ -6346,7 +6155,6 @@ struct ggml_tensor * ggml_reshape_1d(
|
|
6346
6155
|
result->op = GGML_OP_RESHAPE;
|
6347
6156
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
6348
6157
|
result->src[0] = a;
|
6349
|
-
result->src[1] = NULL;
|
6350
6158
|
|
6351
6159
|
return result;
|
6352
6160
|
}
|
@@ -6372,7 +6180,6 @@ struct ggml_tensor * ggml_reshape_2d(
|
|
6372
6180
|
result->op = GGML_OP_RESHAPE;
|
6373
6181
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
6374
6182
|
result->src[0] = a;
|
6375
|
-
result->src[1] = NULL;
|
6376
6183
|
|
6377
6184
|
return result;
|
6378
6185
|
}
|
@@ -6399,7 +6206,6 @@ struct ggml_tensor * ggml_reshape_3d(
|
|
6399
6206
|
result->op = GGML_OP_RESHAPE;
|
6400
6207
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
6401
6208
|
result->src[0] = a;
|
6402
|
-
result->src[1] = NULL;
|
6403
6209
|
|
6404
6210
|
return result;
|
6405
6211
|
}
|
@@ -6428,13 +6234,33 @@ struct ggml_tensor * ggml_reshape_4d(
|
|
6428
6234
|
result->op = GGML_OP_RESHAPE;
|
6429
6235
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
6430
6236
|
result->src[0] = a;
|
6431
|
-
result->src[1] = NULL;
|
6432
6237
|
|
6433
6238
|
return result;
|
6434
6239
|
}
|
6435
6240
|
|
6436
6241
|
// ggml_view_1d
|
6437
6242
|
|
6243
|
+
static struct ggml_tensor * ggml_view_tensor_offset(
|
6244
|
+
struct ggml_context * ctx,
|
6245
|
+
struct ggml_tensor * a,
|
6246
|
+
int n_dims,
|
6247
|
+
const int64_t * ne,
|
6248
|
+
size_t offset) {
|
6249
|
+
// don't calculate an offset from an unallocated tensor
|
6250
|
+
void * data = NULL;
|
6251
|
+
if (a->data != NULL) {
|
6252
|
+
data = (char *) a->data + offset;
|
6253
|
+
}
|
6254
|
+
|
6255
|
+
struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, n_dims, ne, data);
|
6256
|
+
|
6257
|
+
ggml_format_name(result, "%s (view)", a->name);
|
6258
|
+
|
6259
|
+
ggml_set_op_params(result, &offset, sizeof(offset));
|
6260
|
+
|
6261
|
+
return result;
|
6262
|
+
}
|
6263
|
+
|
6438
6264
|
struct ggml_tensor * ggml_view_1d(
|
6439
6265
|
struct ggml_context * ctx,
|
6440
6266
|
struct ggml_tensor * a,
|
@@ -6447,22 +6273,11 @@ struct ggml_tensor * ggml_view_1d(
|
|
6447
6273
|
is_node = true;
|
6448
6274
|
}
|
6449
6275
|
|
6450
|
-
struct ggml_tensor * result =
|
6451
|
-
ggml_format_name(result, "%s (view)", a->name);
|
6452
|
-
|
6453
|
-
ggml_scratch_save(ctx);
|
6454
|
-
|
6455
|
-
struct ggml_tensor * offs = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, 2);
|
6456
|
-
ggml_set_name(offs, "offset");
|
6457
|
-
memcpy(offs->data, &offset, 2*sizeof(int32_t));
|
6458
|
-
|
6459
|
-
ggml_scratch_load(ctx);
|
6276
|
+
struct ggml_tensor * result = ggml_view_tensor_offset(ctx, a, 1, &ne0, offset);
|
6460
6277
|
|
6461
6278
|
result->op = GGML_OP_VIEW;
|
6462
6279
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
6463
6280
|
result->src[0] = a;
|
6464
|
-
result->src[1] = NULL;
|
6465
|
-
result->src[2] = offs;
|
6466
6281
|
|
6467
6282
|
return result;
|
6468
6283
|
}
|
@@ -6485,16 +6300,7 @@ struct ggml_tensor * ggml_view_2d(
|
|
6485
6300
|
|
6486
6301
|
const int64_t ne[GGML_MAX_DIMS] = { ne0, ne1, 1, 1 };
|
6487
6302
|
|
6488
|
-
struct ggml_tensor * result =
|
6489
|
-
ggml_format_name(result, "%s (view)", a->name);
|
6490
|
-
|
6491
|
-
ggml_scratch_save(ctx);
|
6492
|
-
|
6493
|
-
struct ggml_tensor * offs = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, 2);
|
6494
|
-
ggml_set_name(offs, "offset");
|
6495
|
-
memcpy(offs->data, &offset, 2*sizeof(int32_t));
|
6496
|
-
|
6497
|
-
ggml_scratch_load(ctx);
|
6303
|
+
struct ggml_tensor * result = ggml_view_tensor_offset(ctx, a, 2, ne, offset);
|
6498
6304
|
|
6499
6305
|
result->nb[1] = nb1;
|
6500
6306
|
result->nb[2] = result->nb[1]*ne1;
|
@@ -6503,8 +6309,6 @@ struct ggml_tensor * ggml_view_2d(
|
|
6503
6309
|
result->op = GGML_OP_VIEW;
|
6504
6310
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
6505
6311
|
result->src[0] = a;
|
6506
|
-
result->src[1] = NULL;
|
6507
|
-
result->src[2] = offs;
|
6508
6312
|
|
6509
6313
|
return result;
|
6510
6314
|
}
|
@@ -6529,16 +6333,7 @@ struct ggml_tensor * ggml_view_3d(
|
|
6529
6333
|
|
6530
6334
|
const int64_t ne[GGML_MAX_DIMS] = { ne0, ne1, ne2, 1 };
|
6531
6335
|
|
6532
|
-
struct ggml_tensor * result =
|
6533
|
-
ggml_format_name(result, "%s (view)", a->name);
|
6534
|
-
|
6535
|
-
ggml_scratch_save(ctx);
|
6536
|
-
|
6537
|
-
struct ggml_tensor * offs = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, 2);
|
6538
|
-
ggml_set_name(offs, "offset");
|
6539
|
-
memcpy(offs->data, &offset, 2*sizeof(int32_t));
|
6540
|
-
|
6541
|
-
ggml_scratch_load(ctx);
|
6336
|
+
struct ggml_tensor * result = ggml_view_tensor_offset(ctx, a, 3, ne, offset);
|
6542
6337
|
|
6543
6338
|
result->nb[1] = nb1;
|
6544
6339
|
result->nb[2] = nb2;
|
@@ -6547,8 +6342,6 @@ struct ggml_tensor * ggml_view_3d(
|
|
6547
6342
|
result->op = GGML_OP_VIEW;
|
6548
6343
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
6549
6344
|
result->src[0] = a;
|
6550
|
-
result->src[1] = NULL;
|
6551
|
-
result->src[2] = offs;
|
6552
6345
|
|
6553
6346
|
return result;
|
6554
6347
|
}
|
@@ -6575,16 +6368,7 @@ struct ggml_tensor * ggml_view_4d(
|
|
6575
6368
|
|
6576
6369
|
const int64_t ne[GGML_MAX_DIMS] = { ne0, ne1, ne2, ne3 };
|
6577
6370
|
|
6578
|
-
struct ggml_tensor * result =
|
6579
|
-
ggml_format_name(result, "%s (view)", a->name);
|
6580
|
-
|
6581
|
-
ggml_scratch_save(ctx);
|
6582
|
-
|
6583
|
-
struct ggml_tensor * offs = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, 2);
|
6584
|
-
ggml_set_name(offs, "offset");
|
6585
|
-
memcpy(offs->data, &offset, 2*sizeof(int32_t));
|
6586
|
-
|
6587
|
-
ggml_scratch_load(ctx);
|
6371
|
+
struct ggml_tensor * result = ggml_view_tensor_offset(ctx, a, 4, ne, offset);
|
6588
6372
|
|
6589
6373
|
result->nb[1] = nb1;
|
6590
6374
|
result->nb[2] = nb2;
|
@@ -6593,8 +6377,6 @@ struct ggml_tensor * ggml_view_4d(
|
|
6593
6377
|
result->op = GGML_OP_VIEW;
|
6594
6378
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
6595
6379
|
result->src[0] = a;
|
6596
|
-
result->src[1] = NULL;
|
6597
|
-
result->src[2] = offs;
|
6598
6380
|
|
6599
6381
|
return result;
|
6600
6382
|
}
|
@@ -6655,22 +6437,9 @@ struct ggml_tensor * ggml_permute(
|
|
6655
6437
|
result->op = GGML_OP_PERMUTE;
|
6656
6438
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
6657
6439
|
result->src[0] = a;
|
6658
|
-
result->src[1] = NULL;
|
6659
6440
|
|
6660
|
-
|
6661
|
-
|
6662
|
-
|
6663
|
-
struct ggml_tensor * b = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, 4);
|
6664
|
-
|
6665
|
-
((int32_t *) b->data)[0] = axis0;
|
6666
|
-
((int32_t *) b->data)[1] = axis1;
|
6667
|
-
((int32_t *) b->data)[2] = axis2;
|
6668
|
-
((int32_t *) b->data)[3] = axis3;
|
6669
|
-
|
6670
|
-
ggml_scratch_load(ctx);
|
6671
|
-
|
6672
|
-
result->src[2] = b;
|
6673
|
-
}
|
6441
|
+
int32_t params[] = { axis0, axis1, axis2, axis3 };
|
6442
|
+
ggml_set_op_params(result, ¶ms, sizeof(params));
|
6674
6443
|
|
6675
6444
|
return result;
|
6676
6445
|
}
|
@@ -6698,7 +6467,6 @@ struct ggml_tensor * ggml_transpose(
|
|
6698
6467
|
result->op = GGML_OP_TRANSPOSE;
|
6699
6468
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
6700
6469
|
result->src[0] = a;
|
6701
|
-
result->src[1] = NULL;
|
6702
6470
|
|
6703
6471
|
return result;
|
6704
6472
|
}
|
@@ -6776,7 +6544,6 @@ struct ggml_tensor * ggml_diag(
|
|
6776
6544
|
result->op = GGML_OP_DIAG;
|
6777
6545
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
6778
6546
|
result->src[0] = a;
|
6779
|
-
result->src[1] = NULL;
|
6780
6547
|
|
6781
6548
|
return result;
|
6782
6549
|
}
|
@@ -6784,7 +6551,7 @@ struct ggml_tensor * ggml_diag(
|
|
6784
6551
|
|
6785
6552
|
// ggml_diag_mask_inf
|
6786
6553
|
|
6787
|
-
struct ggml_tensor * ggml_diag_mask_inf_impl(
|
6554
|
+
static struct ggml_tensor * ggml_diag_mask_inf_impl(
|
6788
6555
|
struct ggml_context * ctx,
|
6789
6556
|
struct ggml_tensor * a,
|
6790
6557
|
int n_past,
|
@@ -6797,19 +6564,12 @@ struct ggml_tensor * ggml_diag_mask_inf_impl(
|
|
6797
6564
|
|
6798
6565
|
struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
6799
6566
|
|
6800
|
-
|
6801
|
-
|
6802
|
-
struct ggml_tensor * b = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, 2);
|
6803
|
-
|
6804
|
-
((int32_t *) b->data)[0] = n_past;
|
6805
|
-
((int32_t *) b->data)[1] = inplace ? 1 : 0;
|
6806
|
-
|
6807
|
-
ggml_scratch_load(ctx);
|
6567
|
+
int32_t params[] = { n_past, inplace ? 1 : 0 };
|
6568
|
+
ggml_set_op_params(result, ¶ms, sizeof(params));
|
6808
6569
|
|
6809
6570
|
result->op = GGML_OP_DIAG_MASK_INF;
|
6810
6571
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
6811
6572
|
result->src[0] = a;
|
6812
|
-
result->src[1] = b;
|
6813
6573
|
|
6814
6574
|
return result;
|
6815
6575
|
}
|
@@ -6831,7 +6591,7 @@ struct ggml_tensor * ggml_diag_mask_inf_inplace(
|
|
6831
6591
|
|
6832
6592
|
// ggml_diag_mask_zero
|
6833
6593
|
|
6834
|
-
struct ggml_tensor * ggml_diag_mask_zero_impl(
|
6594
|
+
static struct ggml_tensor * ggml_diag_mask_zero_impl(
|
6835
6595
|
struct ggml_context * ctx,
|
6836
6596
|
struct ggml_tensor * a,
|
6837
6597
|
int n_past,
|
@@ -6844,20 +6604,12 @@ struct ggml_tensor * ggml_diag_mask_zero_impl(
|
|
6844
6604
|
|
6845
6605
|
struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
6846
6606
|
|
6847
|
-
|
6848
|
-
|
6849
|
-
struct ggml_tensor * b = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, 2);
|
6850
|
-
ggml_set_name(b, "n_past, inplace");
|
6851
|
-
|
6852
|
-
((int32_t *) b->data)[0] = n_past;
|
6853
|
-
((int32_t *) b->data)[1] = inplace ? 1 : 0;
|
6854
|
-
|
6855
|
-
ggml_scratch_load(ctx);
|
6607
|
+
int32_t params[] = { n_past, inplace ? 1 : 0 };
|
6608
|
+
ggml_set_op_params(result, ¶ms, sizeof(params));
|
6856
6609
|
|
6857
6610
|
result->op = GGML_OP_DIAG_MASK_ZERO;
|
6858
6611
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
6859
6612
|
result->src[0] = a;
|
6860
|
-
result->src[1] = b;
|
6861
6613
|
|
6862
6614
|
return result;
|
6863
6615
|
}
|
@@ -6878,7 +6630,7 @@ struct ggml_tensor * ggml_diag_mask_zero_inplace(
|
|
6878
6630
|
|
6879
6631
|
// ggml_soft_max
|
6880
6632
|
|
6881
|
-
struct ggml_tensor * ggml_soft_max_impl(
|
6633
|
+
static struct ggml_tensor * ggml_soft_max_impl(
|
6882
6634
|
struct ggml_context * ctx,
|
6883
6635
|
struct ggml_tensor * a,
|
6884
6636
|
bool inplace) {
|
@@ -6893,7 +6645,6 @@ struct ggml_tensor * ggml_soft_max_impl(
|
|
6893
6645
|
result->op = GGML_OP_SOFT_MAX;
|
6894
6646
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
6895
6647
|
result->src[0] = a;
|
6896
|
-
result->src[1] = NULL;
|
6897
6648
|
|
6898
6649
|
return result;
|
6899
6650
|
}
|
@@ -6913,7 +6664,7 @@ struct ggml_tensor * ggml_soft_max_inplace(
|
|
6913
6664
|
|
6914
6665
|
// ggml_soft_max_back
|
6915
6666
|
|
6916
|
-
struct ggml_tensor * ggml_soft_max_back_impl(
|
6667
|
+
static struct ggml_tensor * ggml_soft_max_back_impl(
|
6917
6668
|
struct ggml_context * ctx,
|
6918
6669
|
struct ggml_tensor * a,
|
6919
6670
|
struct ggml_tensor * b,
|
@@ -6950,7 +6701,7 @@ struct ggml_tensor * ggml_soft_max_back_inplace(
|
|
6950
6701
|
|
6951
6702
|
// ggml_rope
|
6952
6703
|
|
6953
|
-
struct ggml_tensor * ggml_rope_impl(
|
6704
|
+
static struct ggml_tensor * ggml_rope_impl(
|
6954
6705
|
struct ggml_context * ctx,
|
6955
6706
|
struct ggml_tensor * a,
|
6956
6707
|
int n_past,
|
@@ -6969,23 +6720,14 @@ struct ggml_tensor * ggml_rope_impl(
|
|
6969
6720
|
|
6970
6721
|
struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
6971
6722
|
|
6972
|
-
|
6973
|
-
|
6974
|
-
|
6975
|
-
|
6976
|
-
((int32_t *) b->data)[0] = n_past;
|
6977
|
-
((int32_t *) b->data)[1] = n_dims;
|
6978
|
-
((int32_t *) b->data)[2] = mode;
|
6979
|
-
((int32_t *) b->data)[3] = n_ctx;
|
6980
|
-
memcpy((int32_t *) b->data + 4, &freq_base, sizeof(float));
|
6981
|
-
memcpy((int32_t *) b->data + 5, &freq_scale, sizeof(float));
|
6982
|
-
|
6983
|
-
ggml_scratch_load(ctx);
|
6723
|
+
int32_t params[6] = { n_past, n_dims, mode, n_ctx };
|
6724
|
+
memcpy(params + 4, &freq_base, sizeof(float));
|
6725
|
+
memcpy(params + 5, &freq_scale, sizeof(float));
|
6726
|
+
ggml_set_op_params(result, ¶ms, sizeof(params));
|
6984
6727
|
|
6985
6728
|
result->op = GGML_OP_ROPE;
|
6986
6729
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
6987
6730
|
result->src[0] = a;
|
6988
|
-
result->src[1] = b;
|
6989
6731
|
|
6990
6732
|
return result;
|
6991
6733
|
}
|
@@ -7010,6 +6752,18 @@ struct ggml_tensor * ggml_rope_inplace(
|
|
7010
6752
|
return ggml_rope_impl(ctx, a, n_past, n_dims, mode, n_ctx, 10000.0f, 1.0f, true);
|
7011
6753
|
}
|
7012
6754
|
|
6755
|
+
struct ggml_tensor * ggml_rope_custom(
|
6756
|
+
struct ggml_context * ctx,
|
6757
|
+
struct ggml_tensor * a,
|
6758
|
+
int n_past,
|
6759
|
+
int n_dims,
|
6760
|
+
int mode,
|
6761
|
+
int n_ctx,
|
6762
|
+
float freq_base,
|
6763
|
+
float freq_scale) {
|
6764
|
+
return ggml_rope_impl(ctx, a, n_past, n_dims, mode, n_ctx, freq_base, freq_scale, false);
|
6765
|
+
}
|
6766
|
+
|
7013
6767
|
struct ggml_tensor * ggml_rope_custom_inplace(
|
7014
6768
|
struct ggml_context * ctx,
|
7015
6769
|
struct ggml_tensor * a,
|
@@ -7042,22 +6796,12 @@ struct ggml_tensor * ggml_rope_back(
|
|
7042
6796
|
|
7043
6797
|
struct ggml_tensor * result = ggml_dup_tensor(ctx, a);
|
7044
6798
|
|
7045
|
-
|
7046
|
-
|
7047
|
-
struct ggml_tensor * b = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, 4);
|
7048
|
-
ggml_set_name(b, "n_past, n_dims, mode");
|
7049
|
-
|
7050
|
-
((int32_t *) b->data)[0] = n_past;
|
7051
|
-
((int32_t *) b->data)[1] = n_dims;
|
7052
|
-
((int32_t *) b->data)[2] = mode;
|
7053
|
-
((int32_t *) b->data)[3] = n_ctx;
|
7054
|
-
|
7055
|
-
ggml_scratch_load(ctx);
|
6799
|
+
int32_t params[] = { n_past, n_dims, mode, n_ctx };
|
6800
|
+
ggml_set_op_params(result, ¶ms, sizeof(params));
|
7056
6801
|
|
7057
6802
|
result->op = GGML_OP_ROPE_BACK;
|
7058
6803
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
7059
6804
|
result->src[0] = a;
|
7060
|
-
result->src[1] = b;
|
7061
6805
|
|
7062
6806
|
return result;
|
7063
6807
|
}
|
@@ -7082,21 +6826,13 @@ struct ggml_tensor * ggml_alibi(
|
|
7082
6826
|
//struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
7083
6827
|
struct ggml_tensor * result = ggml_view_tensor(ctx, a);
|
7084
6828
|
|
7085
|
-
|
7086
|
-
|
7087
|
-
|
7088
|
-
|
7089
|
-
((int32_t *) b->data)[0] = n_past;
|
7090
|
-
((int32_t *) b->data)[1] = n_head;
|
7091
|
-
GGML_ASSERT(sizeof(float) == sizeof(int32_t));
|
7092
|
-
(((float *) b->data)[2]) = bias_max;
|
7093
|
-
|
7094
|
-
ggml_scratch_load(ctx);
|
6829
|
+
int32_t op_params[3] = { n_past, n_head };
|
6830
|
+
memcpy(op_params + 2, &bias_max, sizeof(float));
|
6831
|
+
ggml_set_op_params(result, &op_params, sizeof(op_params));
|
7095
6832
|
|
7096
6833
|
result->op = GGML_OP_ALIBI;
|
7097
6834
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
7098
6835
|
result->src[0] = a;
|
7099
|
-
result->src[1] = b;
|
7100
6836
|
|
7101
6837
|
return result;
|
7102
6838
|
}
|
@@ -7118,19 +6854,12 @@ struct ggml_tensor * ggml_clamp(
|
|
7118
6854
|
// TODO: when implement backward, fix this:
|
7119
6855
|
struct ggml_tensor * result = ggml_view_tensor(ctx, a);
|
7120
6856
|
|
7121
|
-
|
7122
|
-
|
7123
|
-
struct ggml_tensor * b = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, 2);
|
7124
|
-
|
7125
|
-
((float *) b->data)[0] = min;
|
7126
|
-
((float *) b->data)[1] = max;
|
7127
|
-
|
7128
|
-
ggml_scratch_load(ctx);
|
6857
|
+
float params[] = { min, max };
|
6858
|
+
ggml_set_op_params(result, ¶ms, sizeof(params));
|
7129
6859
|
|
7130
6860
|
result->op = GGML_OP_CLAMP;
|
7131
6861
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
7132
6862
|
result->src[0] = a;
|
7133
|
-
result->src[1] = b;
|
7134
6863
|
|
7135
6864
|
return result;
|
7136
6865
|
}
|
@@ -7163,18 +6892,13 @@ GGML_API struct ggml_tensor * ggml_conv_1d(
|
|
7163
6892
|
};
|
7164
6893
|
struct ggml_tensor* result = ggml_new_tensor(ctx, GGML_TYPE_F32, 2, ne);
|
7165
6894
|
|
7166
|
-
|
7167
|
-
|
7168
|
-
((int32_t*)c->data)[0] = s0;
|
7169
|
-
((int32_t*)c->data)[1] = p0;
|
7170
|
-
((int32_t*)c->data)[2] = d0;
|
7171
|
-
ggml_scratch_load(ctx);
|
6895
|
+
int32_t params[] = { s0, p0, d0 };
|
6896
|
+
ggml_set_op_params(result, ¶ms, sizeof(params));
|
7172
6897
|
|
7173
6898
|
result->op = GGML_OP_CONV_1D;
|
7174
6899
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
7175
6900
|
result->src[0] = a;
|
7176
6901
|
result->src[1] = b;
|
7177
|
-
result->src[2] = c;
|
7178
6902
|
|
7179
6903
|
return result;
|
7180
6904
|
}
|
@@ -7207,21 +6931,13 @@ struct ggml_tensor* ggml_conv_2d(
|
|
7207
6931
|
};
|
7208
6932
|
struct ggml_tensor* result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
|
7209
6933
|
|
7210
|
-
|
7211
|
-
|
7212
|
-
((int32_t*)c->data)[0] = s0;
|
7213
|
-
((int32_t*)c->data)[1] = s1;
|
7214
|
-
((int32_t*)c->data)[2] = p0;
|
7215
|
-
((int32_t*)c->data)[3] = p1;
|
7216
|
-
((int32_t*)c->data)[4] = d0;
|
7217
|
-
((int32_t*)c->data)[5] = d1;
|
7218
|
-
ggml_scratch_load(ctx);
|
6934
|
+
int32_t params[] = { s0, s1, p0, p1, d0, d1 };
|
6935
|
+
ggml_set_op_params(result, ¶ms, sizeof(params));
|
7219
6936
|
|
7220
6937
|
result->op = GGML_OP_CONV_2D;
|
7221
6938
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
7222
6939
|
result->src[0] = a;
|
7223
6940
|
result->src[1] = b;
|
7224
|
-
result->src[2] = c;
|
7225
6941
|
|
7226
6942
|
return result;
|
7227
6943
|
|
@@ -7245,7 +6961,7 @@ static int64_t ggml_calc_pool_output_size(int64_t ins, int ks, int s, int p) {
|
|
7245
6961
|
return (ins + 2 * p - ks) / s + 1;
|
7246
6962
|
}
|
7247
6963
|
|
7248
|
-
//
|
6964
|
+
// ggml_pool_1d
|
7249
6965
|
|
7250
6966
|
struct ggml_tensor* ggml_pool_1d(
|
7251
6967
|
struct ggml_context * ctx,
|
@@ -7268,18 +6984,12 @@ struct ggml_tensor* ggml_pool_1d(
|
|
7268
6984
|
};
|
7269
6985
|
struct ggml_tensor* result = ggml_new_tensor(ctx, GGML_TYPE_F32, 2, ne);
|
7270
6986
|
|
7271
|
-
|
7272
|
-
|
7273
|
-
((int32_t*)c->data)[0] = op;
|
7274
|
-
((int32_t*)c->data)[1] = k0;
|
7275
|
-
((int32_t*)c->data)[2] = s0;
|
7276
|
-
((int32_t*)c->data)[3] = p0;
|
7277
|
-
ggml_scratch_load(ctx);
|
6987
|
+
int32_t params[] = { op, k0, s0, p0 };
|
6988
|
+
ggml_set_op_params(result, ¶ms, sizeof(params));
|
7278
6989
|
|
7279
6990
|
result->op = GGML_OP_POOL_1D;
|
7280
6991
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
7281
6992
|
result->src[0] = a;
|
7282
|
-
result->src[1] = c;
|
7283
6993
|
|
7284
6994
|
return result;
|
7285
6995
|
}
|
@@ -7311,21 +7021,12 @@ struct ggml_tensor* ggml_pool_2d(
|
|
7311
7021
|
};
|
7312
7022
|
struct ggml_tensor* result = ggml_new_tensor(ctx, GGML_TYPE_F32, 3, ne);
|
7313
7023
|
|
7314
|
-
|
7315
|
-
|
7316
|
-
((int32_t*)c->data)[0] = op;
|
7317
|
-
((int32_t*)c->data)[1] = k0;
|
7318
|
-
((int32_t*)c->data)[2] = k1;
|
7319
|
-
((int32_t*)c->data)[3] = s0;
|
7320
|
-
((int32_t*)c->data)[4] = s1;
|
7321
|
-
((int32_t*)c->data)[5] = p0;
|
7322
|
-
((int32_t*)c->data)[6] = p1;
|
7323
|
-
ggml_scratch_load(ctx);
|
7024
|
+
int32_t params[] = { op, k0, k1, s0, s1, p0, p1 };
|
7025
|
+
ggml_set_op_params(result, ¶ms, sizeof(params));
|
7324
7026
|
|
7325
7027
|
result->op = GGML_OP_POOL_2D;
|
7326
7028
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
7327
7029
|
result->src[0] = a;
|
7328
|
-
result->src[1] = c;
|
7329
7030
|
|
7330
7031
|
return result;
|
7331
7032
|
}
|
@@ -7348,14 +7049,16 @@ struct ggml_tensor * ggml_flash_attn(
|
|
7348
7049
|
}
|
7349
7050
|
|
7350
7051
|
//struct ggml_tensor * result = ggml_dup_tensor(ctx, q);
|
7351
|
-
struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32,
|
7052
|
+
struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, q->n_dims, q->ne);
|
7053
|
+
|
7054
|
+
int32_t t = masked ? 1 : 0;
|
7055
|
+
ggml_set_op_params(result, &t, sizeof(t));
|
7352
7056
|
|
7353
7057
|
result->op = GGML_OP_FLASH_ATTN;
|
7354
7058
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
7355
7059
|
result->src[0] = q;
|
7356
7060
|
result->src[1] = k;
|
7357
7061
|
result->src[2] = v;
|
7358
|
-
result->src[3] = ggml_new_i32(ctx, masked ? 1 : 0);
|
7359
7062
|
|
7360
7063
|
return result;
|
7361
7064
|
}
|
@@ -7379,7 +7082,7 @@ struct ggml_tensor * ggml_flash_ff(
|
|
7379
7082
|
}
|
7380
7083
|
|
7381
7084
|
//struct ggml_tensor * result = ggml_dup_tensor(ctx, a);
|
7382
|
-
struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32,
|
7085
|
+
struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, a->n_dims, a->ne);
|
7383
7086
|
|
7384
7087
|
result->op = GGML_OP_FLASH_FF;
|
7385
7088
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
@@ -7445,13 +7148,15 @@ struct ggml_tensor * ggml_flash_attn_back(
|
|
7445
7148
|
|
7446
7149
|
struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
|
7447
7150
|
|
7151
|
+
int32_t masked_i = masked ? 1 : 0;
|
7152
|
+
ggml_set_op_params(result, &masked_i, sizeof(masked_i));
|
7153
|
+
|
7448
7154
|
result->op = GGML_OP_FLASH_ATTN_BACK;
|
7449
7155
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
7450
7156
|
result->src[0] = q;
|
7451
7157
|
result->src[1] = k;
|
7452
7158
|
result->src[2] = v;
|
7453
7159
|
result->src[3] = d;
|
7454
|
-
result->src[4] = ggml_new_i32(ctx, masked ? 1 : 0);
|
7455
7160
|
|
7456
7161
|
return result;
|
7457
7162
|
}
|
@@ -7484,21 +7189,12 @@ struct ggml_tensor * ggml_win_part(
|
|
7484
7189
|
|
7485
7190
|
struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
|
7486
7191
|
|
7487
|
-
|
7488
|
-
|
7489
|
-
struct ggml_tensor * b = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, 3);
|
7490
|
-
|
7491
|
-
((int32_t *) b->data)[0] = npx;
|
7492
|
-
((int32_t *) b->data)[1] = npy;
|
7493
|
-
((int32_t *) b->data)[2] = w;
|
7494
|
-
|
7495
|
-
ggml_scratch_load(ctx);
|
7192
|
+
int32_t params[] = { npx, npy, w };
|
7193
|
+
ggml_set_op_params(result, ¶ms, sizeof(params));
|
7496
7194
|
|
7497
7195
|
result->op = GGML_OP_WIN_PART;
|
7498
7196
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
7499
7197
|
result->src[0] = a;
|
7500
|
-
result->src[1] = NULL;
|
7501
|
-
result->src[2] = b;
|
7502
7198
|
|
7503
7199
|
return result;
|
7504
7200
|
}
|
@@ -7523,26 +7219,57 @@ struct ggml_tensor * ggml_win_unpart(
|
|
7523
7219
|
const int64_t ne[4] = { a->ne[0], w0, h0, 1, };
|
7524
7220
|
struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 3, ne);
|
7525
7221
|
|
7526
|
-
|
7222
|
+
int32_t params[] = { w };
|
7223
|
+
ggml_set_op_params(result, ¶ms, sizeof(params));
|
7224
|
+
|
7225
|
+
result->op = GGML_OP_WIN_UNPART;
|
7226
|
+
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
7227
|
+
result->src[0] = a;
|
7527
7228
|
|
7528
|
-
|
7229
|
+
return result;
|
7230
|
+
}
|
7529
7231
|
|
7530
|
-
|
7232
|
+
// gmml_unary
|
7531
7233
|
|
7532
|
-
|
7234
|
+
static struct ggml_tensor * ggml_unary_impl(
|
7235
|
+
struct ggml_context * ctx,
|
7236
|
+
struct ggml_tensor * a,
|
7237
|
+
enum ggml_unary_op op,
|
7238
|
+
bool inplace) {
|
7239
|
+
bool is_node = false;
|
7533
7240
|
|
7534
|
-
|
7241
|
+
if (!inplace && (a->grad)) {
|
7242
|
+
is_node = true;
|
7243
|
+
}
|
7244
|
+
|
7245
|
+
struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
7246
|
+
|
7247
|
+
ggml_set_op_params_i32(result, 0, (int32_t) op);
|
7248
|
+
|
7249
|
+
result->op = GGML_OP_UNARY;
|
7535
7250
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
7536
7251
|
result->src[0] = a;
|
7537
|
-
result->src[1] = NULL;
|
7538
|
-
result->src[2] = b;
|
7539
7252
|
|
7540
7253
|
return result;
|
7541
7254
|
}
|
7542
7255
|
|
7256
|
+
struct ggml_tensor * ggml_unary(
|
7257
|
+
struct ggml_context * ctx,
|
7258
|
+
struct ggml_tensor * a,
|
7259
|
+
enum ggml_unary_op op) {
|
7260
|
+
return ggml_unary_impl(ctx, a, op, false);
|
7261
|
+
}
|
7262
|
+
|
7263
|
+
struct ggml_tensor * ggml_unary_inplace(
|
7264
|
+
struct ggml_context * ctx,
|
7265
|
+
struct ggml_tensor * a,
|
7266
|
+
enum ggml_unary_op op) {
|
7267
|
+
return ggml_unary_impl(ctx, a, op, true);
|
7268
|
+
}
|
7269
|
+
|
7543
7270
|
// ggml_map_unary
|
7544
7271
|
|
7545
|
-
struct ggml_tensor * ggml_map_unary_impl_f32(
|
7272
|
+
static struct ggml_tensor * ggml_map_unary_impl_f32(
|
7546
7273
|
struct ggml_context * ctx,
|
7547
7274
|
struct ggml_tensor * a,
|
7548
7275
|
const ggml_unary_op_f32_t fun,
|
@@ -7553,19 +7280,13 @@ struct ggml_tensor * ggml_map_unary_impl_f32(
|
|
7553
7280
|
is_node = true;
|
7554
7281
|
}
|
7555
7282
|
|
7556
|
-
struct ggml_tensor *result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
7557
|
-
|
7558
|
-
ggml_scratch_save(ctx);
|
7283
|
+
struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
7559
7284
|
|
7560
|
-
|
7561
|
-
*((void (**)(void))addr_tensor->data) = (void (*)(void))fun;
|
7562
|
-
|
7563
|
-
ggml_scratch_load(ctx);
|
7285
|
+
ggml_set_op_params(result, (const void *) &fun, sizeof(fun));
|
7564
7286
|
|
7565
7287
|
result->op = GGML_OP_MAP_UNARY;
|
7566
7288
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
7567
7289
|
result->src[0] = a;
|
7568
|
-
result->src[2] = addr_tensor;
|
7569
7290
|
|
7570
7291
|
return result;
|
7571
7292
|
}
|
@@ -7586,7 +7307,7 @@ struct ggml_tensor * ggml_map_unary_inplace_f32(
|
|
7586
7307
|
|
7587
7308
|
// ggml_map_binary
|
7588
7309
|
|
7589
|
-
struct ggml_tensor * ggml_map_binary_impl_f32(
|
7310
|
+
static struct ggml_tensor * ggml_map_binary_impl_f32(
|
7590
7311
|
struct ggml_context * ctx,
|
7591
7312
|
struct ggml_tensor * a,
|
7592
7313
|
struct ggml_tensor * b,
|
@@ -7600,20 +7321,14 @@ struct ggml_tensor * ggml_map_binary_impl_f32(
|
|
7600
7321
|
is_node = true;
|
7601
7322
|
}
|
7602
7323
|
|
7603
|
-
struct ggml_tensor *result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
7604
|
-
|
7605
|
-
ggml_scratch_save(ctx);
|
7606
|
-
|
7607
|
-
struct ggml_tensor * addr_tensor = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, sizeof(void *) / sizeof(int32_t));
|
7608
|
-
*((void (**)(void))addr_tensor->data) = (void (*)(void))fun;
|
7324
|
+
struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
7609
7325
|
|
7610
|
-
|
7326
|
+
ggml_set_op_params(result, (const void *) &fun, sizeof(fun));
|
7611
7327
|
|
7612
7328
|
result->op = GGML_OP_MAP_BINARY;
|
7613
7329
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
7614
7330
|
result->src[0] = a;
|
7615
7331
|
result->src[1] = b;
|
7616
|
-
result->src[2] = addr_tensor;
|
7617
7332
|
|
7618
7333
|
return result;
|
7619
7334
|
}
|
@@ -7636,7 +7351,7 @@ struct ggml_tensor * ggml_map_binary_inplace_f32(
|
|
7636
7351
|
|
7637
7352
|
// ggml_map_custom1
|
7638
7353
|
|
7639
|
-
struct ggml_tensor * ggml_map_custom1_impl_f32(
|
7354
|
+
static struct ggml_tensor * ggml_map_custom1_impl_f32(
|
7640
7355
|
struct ggml_context * ctx,
|
7641
7356
|
struct ggml_tensor * a,
|
7642
7357
|
const ggml_custom1_op_f32_t fun,
|
@@ -7647,19 +7362,13 @@ struct ggml_tensor * ggml_map_custom1_impl_f32(
|
|
7647
7362
|
is_node = true;
|
7648
7363
|
}
|
7649
7364
|
|
7650
|
-
struct ggml_tensor *result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
7651
|
-
|
7652
|
-
ggml_scratch_save(ctx);
|
7653
|
-
|
7654
|
-
struct ggml_tensor * addr_tensor = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, sizeof(void *) / sizeof(int32_t));
|
7655
|
-
*((void (**)(void))addr_tensor->data) = (void (*)(void))fun;
|
7365
|
+
struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
7656
7366
|
|
7657
|
-
|
7367
|
+
ggml_set_op_params(result, (const void *) &fun, sizeof(fun));
|
7658
7368
|
|
7659
7369
|
result->op = GGML_OP_MAP_CUSTOM1;
|
7660
7370
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
7661
7371
|
result->src[0] = a;
|
7662
|
-
result->src[2] = addr_tensor;
|
7663
7372
|
|
7664
7373
|
return result;
|
7665
7374
|
}
|
@@ -7680,7 +7389,7 @@ struct ggml_tensor * ggml_map_custom1_inplace_f32(
|
|
7680
7389
|
|
7681
7390
|
// ggml_map_custom2
|
7682
7391
|
|
7683
|
-
struct ggml_tensor * ggml_map_custom2_impl_f32(
|
7392
|
+
static struct ggml_tensor * ggml_map_custom2_impl_f32(
|
7684
7393
|
struct ggml_context * ctx,
|
7685
7394
|
struct ggml_tensor * a,
|
7686
7395
|
struct ggml_tensor * b,
|
@@ -7692,20 +7401,14 @@ struct ggml_tensor * ggml_map_custom2_impl_f32(
|
|
7692
7401
|
is_node = true;
|
7693
7402
|
}
|
7694
7403
|
|
7695
|
-
struct ggml_tensor *result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
7696
|
-
|
7697
|
-
ggml_scratch_save(ctx);
|
7404
|
+
struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
7698
7405
|
|
7699
|
-
|
7700
|
-
*((void (**)(void))addr_tensor->data) = (void (*)(void))fun;
|
7701
|
-
|
7702
|
-
ggml_scratch_load(ctx);
|
7406
|
+
ggml_set_op_params(result, (const void *) &fun, sizeof(fun));
|
7703
7407
|
|
7704
7408
|
result->op = GGML_OP_MAP_CUSTOM2;
|
7705
7409
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
7706
7410
|
result->src[0] = a;
|
7707
7411
|
result->src[1] = b;
|
7708
|
-
result->src[2] = addr_tensor;
|
7709
7412
|
|
7710
7413
|
return result;
|
7711
7414
|
}
|
@@ -7728,7 +7431,7 @@ struct ggml_tensor * ggml_map_custom2_inplace_f32(
|
|
7728
7431
|
|
7729
7432
|
// ggml_map_custom3
|
7730
7433
|
|
7731
|
-
struct ggml_tensor * ggml_map_custom3_impl_f32(
|
7434
|
+
static struct ggml_tensor * ggml_map_custom3_impl_f32(
|
7732
7435
|
struct ggml_context * ctx,
|
7733
7436
|
struct ggml_tensor * a,
|
7734
7437
|
struct ggml_tensor * b,
|
@@ -7741,21 +7444,15 @@ struct ggml_tensor * ggml_map_custom3_impl_f32(
|
|
7741
7444
|
is_node = true;
|
7742
7445
|
}
|
7743
7446
|
|
7744
|
-
struct ggml_tensor *result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
7745
|
-
|
7746
|
-
ggml_scratch_save(ctx);
|
7747
|
-
|
7748
|
-
struct ggml_tensor * addr_tensor = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, sizeof(void *) / sizeof(int32_t));
|
7749
|
-
*((void (**)(void))addr_tensor->data) = (void (*)(void))fun;
|
7447
|
+
struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
7750
7448
|
|
7751
|
-
|
7449
|
+
ggml_set_op_params(result, (const void *) &fun, sizeof(fun));
|
7752
7450
|
|
7753
7451
|
result->op = GGML_OP_MAP_CUSTOM3;
|
7754
7452
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
7755
7453
|
result->src[0] = a;
|
7756
7454
|
result->src[1] = b;
|
7757
|
-
result->src[2] =
|
7758
|
-
result->src[3] = c;
|
7455
|
+
result->src[2] = c;
|
7759
7456
|
|
7760
7457
|
return result;
|
7761
7458
|
}
|
@@ -8983,21 +8680,17 @@ static void ggml_compute_forward_acc_f32(
|
|
8983
8680
|
const struct ggml_compute_params * params,
|
8984
8681
|
const struct ggml_tensor * src0,
|
8985
8682
|
const struct ggml_tensor * src1,
|
8986
|
-
const struct ggml_tensor * opt0,
|
8987
8683
|
struct ggml_tensor * dst) {
|
8988
8684
|
GGML_ASSERT(ggml_are_same_shape(src0, dst));
|
8989
8685
|
GGML_ASSERT(ggml_is_contiguous(dst) && ggml_is_contiguous(src0));
|
8990
8686
|
|
8991
|
-
GGML_ASSERT(opt0->type == GGML_TYPE_I32);
|
8992
|
-
GGML_ASSERT(ggml_nelements(opt0) == 5);
|
8993
|
-
|
8994
8687
|
// view src0 and dst with these strides and data offset inbytes during acc
|
8995
8688
|
// nb0 is implicitely element_size because src0 and dst are contiguous
|
8996
|
-
size_t nb1 = ((int32_t *)
|
8997
|
-
size_t nb2 = ((int32_t *)
|
8998
|
-
size_t nb3 = ((int32_t *)
|
8999
|
-
size_t offset = ((int32_t *)
|
9000
|
-
bool inplace = (bool) ((int32_t *)
|
8689
|
+
size_t nb1 = ((int32_t *) dst->op_params)[0];
|
8690
|
+
size_t nb2 = ((int32_t *) dst->op_params)[1];
|
8691
|
+
size_t nb3 = ((int32_t *) dst->op_params)[2];
|
8692
|
+
size_t offset = ((int32_t *) dst->op_params)[3];
|
8693
|
+
bool inplace = (bool) ((int32_t *) dst->op_params)[4];
|
9001
8694
|
|
9002
8695
|
if (!inplace && (params->type == GGML_TASK_INIT)) {
|
9003
8696
|
// memcpy needs to be synchronized across threads to avoid race conditions.
|
@@ -9066,13 +8759,12 @@ static void ggml_compute_forward_acc(
|
|
9066
8759
|
const struct ggml_compute_params * params,
|
9067
8760
|
const struct ggml_tensor * src0,
|
9068
8761
|
const struct ggml_tensor * src1,
|
9069
|
-
const struct ggml_tensor * opt0,
|
9070
8762
|
struct ggml_tensor * dst) {
|
9071
8763
|
|
9072
8764
|
switch (src0->type) {
|
9073
8765
|
case GGML_TYPE_F32:
|
9074
8766
|
{
|
9075
|
-
ggml_compute_forward_acc_f32(params, src0, src1,
|
8767
|
+
ggml_compute_forward_acc_f32(params, src0, src1, dst);
|
9076
8768
|
} break;
|
9077
8769
|
case GGML_TYPE_F16:
|
9078
8770
|
case GGML_TYPE_Q4_0:
|
@@ -9504,7 +9196,7 @@ static void ggml_compute_forward_sum_f32(
|
|
9504
9196
|
for (int64_t i03 = 0; i03 < ne03; i03++) {
|
9505
9197
|
for (int64_t i02 = 0; i02 < ne02; i02++) {
|
9506
9198
|
for (int64_t i01 = 0; i01 < ne01; i01++) {
|
9507
|
-
|
9199
|
+
ggml_vec_sum_f32_ggf(ne00,
|
9508
9200
|
&row_sum,
|
9509
9201
|
(float *) ((char *) src0->data + i01*nb01 + i02*nb02 + i03*nb03));
|
9510
9202
|
sum += row_sum;
|
@@ -9514,6 +9206,38 @@ static void ggml_compute_forward_sum_f32(
|
|
9514
9206
|
((float *) dst->data)[0] = sum;
|
9515
9207
|
}
|
9516
9208
|
|
9209
|
+
static void ggml_compute_forward_sum_f16(
|
9210
|
+
const struct ggml_compute_params * params,
|
9211
|
+
const struct ggml_tensor * src0,
|
9212
|
+
struct ggml_tensor * dst) {
|
9213
|
+
assert(params->ith == 0);
|
9214
|
+
assert(ggml_is_scalar(dst));
|
9215
|
+
|
9216
|
+
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
9217
|
+
return;
|
9218
|
+
}
|
9219
|
+
|
9220
|
+
assert(src0->nb[0] == sizeof(ggml_fp16_t));
|
9221
|
+
|
9222
|
+
GGML_TENSOR_LOCALS(int64_t, ne0, src0, ne);
|
9223
|
+
GGML_TENSOR_LOCALS(size_t, nb0, src0, nb);
|
9224
|
+
|
9225
|
+
float sum = 0;
|
9226
|
+
float row_sum = 0;
|
9227
|
+
|
9228
|
+
for (int64_t i03 = 0; i03 < ne03; i03++) {
|
9229
|
+
for (int64_t i02 = 0; i02 < ne02; i02++) {
|
9230
|
+
for (int64_t i01 = 0; i01 < ne01; i01++) {
|
9231
|
+
ggml_vec_sum_f16_ggf(ne00,
|
9232
|
+
&row_sum,
|
9233
|
+
(ggml_fp16_t *) ((char *) src0->data + i01 * nb01 + i02 * nb02 + i03 * nb03));
|
9234
|
+
sum += row_sum;
|
9235
|
+
}
|
9236
|
+
}
|
9237
|
+
}
|
9238
|
+
((ggml_fp16_t *) dst->data)[0] = GGML_FP32_TO_FP16(sum);
|
9239
|
+
}
|
9240
|
+
|
9517
9241
|
static void ggml_compute_forward_sum(
|
9518
9242
|
const struct ggml_compute_params * params,
|
9519
9243
|
const struct ggml_tensor * src0,
|
@@ -9523,6 +9247,10 @@ static void ggml_compute_forward_sum(
|
|
9523
9247
|
{
|
9524
9248
|
ggml_compute_forward_sum_f32(params, src0, dst);
|
9525
9249
|
} break;
|
9250
|
+
case GGML_TYPE_F16:
|
9251
|
+
{
|
9252
|
+
ggml_compute_forward_sum_f16(params, src0, dst);
|
9253
|
+
} break;
|
9526
9254
|
default:
|
9527
9255
|
{
|
9528
9256
|
GGML_ASSERT(false);
|
@@ -10118,8 +9846,8 @@ static void ggml_compute_forward_gelu_f32(
|
|
10118
9846
|
const struct ggml_compute_params * params,
|
10119
9847
|
const struct ggml_tensor * src0,
|
10120
9848
|
struct ggml_tensor * dst) {
|
10121
|
-
GGML_ASSERT(
|
10122
|
-
GGML_ASSERT(
|
9849
|
+
GGML_ASSERT(ggml_is_contiguous_except_dim_1(src0));
|
9850
|
+
GGML_ASSERT(ggml_is_contiguous_except_dim_1(dst));
|
10123
9851
|
GGML_ASSERT(ggml_are_same_shape(src0, dst));
|
10124
9852
|
|
10125
9853
|
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
@@ -10177,8 +9905,8 @@ static void ggml_compute_forward_gelu_quick_f32(
|
|
10177
9905
|
const struct ggml_compute_params * params,
|
10178
9906
|
const struct ggml_tensor * src0,
|
10179
9907
|
struct ggml_tensor * dst) {
|
10180
|
-
GGML_ASSERT(
|
10181
|
-
GGML_ASSERT(
|
9908
|
+
GGML_ASSERT(ggml_is_contiguous_except_dim_1(src0));
|
9909
|
+
GGML_ASSERT(ggml_is_contiguous_except_dim_1(dst));
|
10182
9910
|
GGML_ASSERT(ggml_are_same_shape(src0, dst));
|
10183
9911
|
|
10184
9912
|
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
@@ -10236,8 +9964,8 @@ static void ggml_compute_forward_silu_f32(
|
|
10236
9964
|
const struct ggml_compute_params * params,
|
10237
9965
|
const struct ggml_tensor * src0,
|
10238
9966
|
struct ggml_tensor * dst) {
|
10239
|
-
GGML_ASSERT(
|
10240
|
-
GGML_ASSERT(
|
9967
|
+
GGML_ASSERT(ggml_is_contiguous_except_dim_1(src0));
|
9968
|
+
GGML_ASSERT(ggml_is_contiguous_except_dim_1(dst));
|
10241
9969
|
GGML_ASSERT(ggml_are_same_shape(src0, dst));
|
10242
9970
|
|
10243
9971
|
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
@@ -10289,7 +10017,6 @@ static void ggml_compute_forward_silu(
|
|
10289
10017
|
}
|
10290
10018
|
}
|
10291
10019
|
|
10292
|
-
|
10293
10020
|
// ggml_compute_forward_silu_back
|
10294
10021
|
|
10295
10022
|
static void ggml_compute_forward_silu_back_f32(
|
@@ -10297,9 +10024,9 @@ static void ggml_compute_forward_silu_back_f32(
|
|
10297
10024
|
const struct ggml_tensor * src0,
|
10298
10025
|
const struct ggml_tensor * grad,
|
10299
10026
|
struct ggml_tensor * dst) {
|
10300
|
-
GGML_ASSERT(
|
10301
|
-
GGML_ASSERT(
|
10302
|
-
GGML_ASSERT(
|
10027
|
+
GGML_ASSERT(ggml_is_contiguous_except_dim_1(grad));
|
10028
|
+
GGML_ASSERT(ggml_is_contiguous_except_dim_1(src0));
|
10029
|
+
GGML_ASSERT(ggml_is_contiguous_except_dim_1(dst));
|
10303
10030
|
GGML_ASSERT(ggml_are_same_shape(src0, dst));
|
10304
10031
|
GGML_ASSERT(ggml_are_same_shape(src0, grad));
|
10305
10032
|
|
@@ -10439,7 +10166,8 @@ static void ggml_compute_forward_rms_norm_f32(
|
|
10439
10166
|
|
10440
10167
|
GGML_TENSOR_UNARY_OP_LOCALS;
|
10441
10168
|
|
10442
|
-
|
10169
|
+
float eps;
|
10170
|
+
memcpy(&eps, dst->op_params, sizeof(float));
|
10443
10171
|
|
10444
10172
|
// TODO: optimize
|
10445
10173
|
for (int64_t i03 = 0; i03 < ne03; i03++) {
|
@@ -11092,21 +10820,17 @@ static void ggml_compute_forward_set_f32(
|
|
11092
10820
|
const struct ggml_compute_params * params,
|
11093
10821
|
const struct ggml_tensor * src0,
|
11094
10822
|
const struct ggml_tensor * src1,
|
11095
|
-
const struct ggml_tensor * opt0,
|
11096
10823
|
struct ggml_tensor * dst) {
|
11097
10824
|
GGML_ASSERT(ggml_are_same_shape(src0, dst));
|
11098
10825
|
GGML_ASSERT(ggml_is_contiguous(dst) && ggml_is_contiguous(src0));
|
11099
10826
|
|
11100
|
-
GGML_ASSERT(opt0->type == GGML_TYPE_I32);
|
11101
|
-
GGML_ASSERT(ggml_nelements(opt0) == 5);
|
11102
|
-
|
11103
10827
|
// view src0 and dst with these strides and data offset inbytes during set
|
11104
10828
|
// nb0 is implicitely element_size because src0 and dst are contiguous
|
11105
|
-
size_t nb1 = ((int32_t *)
|
11106
|
-
size_t nb2 = ((int32_t *)
|
11107
|
-
size_t nb3 = ((int32_t *)
|
11108
|
-
size_t offset = ((int32_t *)
|
11109
|
-
bool inplace = (bool) ((int32_t *)
|
10829
|
+
size_t nb1 = ((int32_t *) dst->op_params)[0];
|
10830
|
+
size_t nb2 = ((int32_t *) dst->op_params)[1];
|
10831
|
+
size_t nb3 = ((int32_t *) dst->op_params)[2];
|
10832
|
+
size_t offset = ((int32_t *) dst->op_params)[3];
|
10833
|
+
bool inplace = (bool) ((int32_t *) dst->op_params)[4];
|
11110
10834
|
|
11111
10835
|
if (!inplace && (params->type == GGML_TASK_INIT)) {
|
11112
10836
|
// memcpy needs to be synchronized across threads to avoid race conditions.
|
@@ -11166,13 +10890,12 @@ static void ggml_compute_forward_set(
|
|
11166
10890
|
const struct ggml_compute_params * params,
|
11167
10891
|
const struct ggml_tensor * src0,
|
11168
10892
|
const struct ggml_tensor * src1,
|
11169
|
-
const struct ggml_tensor * opt0,
|
11170
10893
|
struct ggml_tensor * dst) {
|
11171
10894
|
|
11172
10895
|
switch (src0->type) {
|
11173
10896
|
case GGML_TYPE_F32:
|
11174
10897
|
{
|
11175
|
-
ggml_compute_forward_set_f32(params, src0, src1,
|
10898
|
+
ggml_compute_forward_set_f32(params, src0, src1, dst);
|
11176
10899
|
} break;
|
11177
10900
|
case GGML_TYPE_F16:
|
11178
10901
|
case GGML_TYPE_Q4_0:
|
@@ -11568,17 +11291,14 @@ static void ggml_compute_forward_diag(
|
|
11568
11291
|
static void ggml_compute_forward_diag_mask_f32(
|
11569
11292
|
const struct ggml_compute_params * params,
|
11570
11293
|
const struct ggml_tensor * src0,
|
11571
|
-
const struct ggml_tensor * src1,
|
11572
11294
|
struct ggml_tensor * dst,
|
11573
11295
|
const float value) {
|
11574
|
-
GGML_ASSERT(src1->type == GGML_TYPE_I32);
|
11575
|
-
GGML_ASSERT(ggml_nelements(src1) == 2);
|
11576
11296
|
|
11577
11297
|
const int ith = params->ith;
|
11578
11298
|
const int nth = params->nth;
|
11579
11299
|
|
11580
|
-
const int n_past = ((int32_t *)
|
11581
|
-
const bool inplace = (bool)((int32_t *)
|
11300
|
+
const int n_past = ((int32_t *) dst->op_params)[0];
|
11301
|
+
const bool inplace = (bool)((int32_t *) dst->op_params)[1];
|
11582
11302
|
|
11583
11303
|
GGML_ASSERT(n_past >= 0);
|
11584
11304
|
|
@@ -11621,12 +11341,11 @@ static void ggml_compute_forward_diag_mask_f32(
|
|
11621
11341
|
static void ggml_compute_forward_diag_mask_inf(
|
11622
11342
|
const struct ggml_compute_params * params,
|
11623
11343
|
const struct ggml_tensor * src0,
|
11624
|
-
const struct ggml_tensor * src1,
|
11625
11344
|
struct ggml_tensor * dst) {
|
11626
11345
|
switch (src0->type) {
|
11627
11346
|
case GGML_TYPE_F32:
|
11628
11347
|
{
|
11629
|
-
ggml_compute_forward_diag_mask_f32(params, src0,
|
11348
|
+
ggml_compute_forward_diag_mask_f32(params, src0, dst, -INFINITY);
|
11630
11349
|
} break;
|
11631
11350
|
default:
|
11632
11351
|
{
|
@@ -11638,12 +11357,11 @@ static void ggml_compute_forward_diag_mask_inf(
|
|
11638
11357
|
static void ggml_compute_forward_diag_mask_zero(
|
11639
11358
|
const struct ggml_compute_params * params,
|
11640
11359
|
const struct ggml_tensor * src0,
|
11641
|
-
const struct ggml_tensor * src1,
|
11642
11360
|
struct ggml_tensor * dst) {
|
11643
11361
|
switch (src0->type) {
|
11644
11362
|
case GGML_TYPE_F32:
|
11645
11363
|
{
|
11646
|
-
ggml_compute_forward_diag_mask_f32(params, src0,
|
11364
|
+
ggml_compute_forward_diag_mask_f32(params, src0, dst, 0);
|
11647
11365
|
} break;
|
11648
11366
|
default:
|
11649
11367
|
{
|
@@ -11841,20 +11559,17 @@ static void ggml_compute_forward_soft_max_back(
|
|
11841
11559
|
static void ggml_compute_forward_alibi_f32(
|
11842
11560
|
const struct ggml_compute_params * params,
|
11843
11561
|
const struct ggml_tensor * src0,
|
11844
|
-
const struct ggml_tensor * src1,
|
11845
11562
|
struct ggml_tensor * dst) {
|
11846
11563
|
assert(params->ith == 0);
|
11847
11564
|
|
11848
|
-
GGML_ASSERT(src1->type == GGML_TYPE_I32);
|
11849
|
-
GGML_ASSERT(ggml_nelements(src1) == 3);
|
11850
|
-
|
11851
11565
|
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
11852
11566
|
return;
|
11853
11567
|
}
|
11854
11568
|
|
11855
|
-
const int
|
11856
|
-
const int
|
11857
|
-
|
11569
|
+
const int n_past = ((int32_t *) dst->op_params)[0];
|
11570
|
+
const int n_head = ((int32_t *) dst->op_params)[1];
|
11571
|
+
float max_bias;
|
11572
|
+
memcpy(&max_bias, (int32_t *) dst->op_params + 2, sizeof(float));
|
11858
11573
|
|
11859
11574
|
assert(n_past >= 0);
|
11860
11575
|
|
@@ -11907,20 +11622,17 @@ static void ggml_compute_forward_alibi_f32(
|
|
11907
11622
|
static void ggml_compute_forward_alibi_f16(
|
11908
11623
|
const struct ggml_compute_params * params,
|
11909
11624
|
const struct ggml_tensor * src0,
|
11910
|
-
const struct ggml_tensor * src1,
|
11911
11625
|
struct ggml_tensor * dst) {
|
11912
11626
|
assert(params->ith == 0);
|
11913
11627
|
|
11914
|
-
GGML_ASSERT(src1->type == GGML_TYPE_I32);
|
11915
|
-
GGML_ASSERT(ggml_nelements(src1) == 3);
|
11916
|
-
|
11917
11628
|
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
11918
11629
|
return;
|
11919
11630
|
}
|
11920
11631
|
|
11921
|
-
const int
|
11922
|
-
const int
|
11923
|
-
|
11632
|
+
const int n_past = ((int32_t *) dst->op_params)[0];
|
11633
|
+
const int n_head = ((int32_t *) dst->op_params)[1];
|
11634
|
+
float max_bias;
|
11635
|
+
memcpy(&max_bias, (int32_t *) dst->op_params + 2, sizeof(float));
|
11924
11636
|
|
11925
11637
|
assert(n_past >= 0);
|
11926
11638
|
|
@@ -11973,16 +11685,15 @@ static void ggml_compute_forward_alibi_f16(
|
|
11973
11685
|
static void ggml_compute_forward_alibi(
|
11974
11686
|
const struct ggml_compute_params * params,
|
11975
11687
|
const struct ggml_tensor * src0,
|
11976
|
-
const struct ggml_tensor * src1,
|
11977
11688
|
struct ggml_tensor * dst) {
|
11978
11689
|
switch (src0->type) {
|
11979
11690
|
case GGML_TYPE_F16:
|
11980
11691
|
{
|
11981
|
-
ggml_compute_forward_alibi_f16(params, src0,
|
11692
|
+
ggml_compute_forward_alibi_f16(params, src0, dst);
|
11982
11693
|
} break;
|
11983
11694
|
case GGML_TYPE_F32:
|
11984
11695
|
{
|
11985
|
-
ggml_compute_forward_alibi_f32(params, src0,
|
11696
|
+
ggml_compute_forward_alibi_f32(params, src0, dst);
|
11986
11697
|
} break;
|
11987
11698
|
case GGML_TYPE_Q4_0:
|
11988
11699
|
case GGML_TYPE_Q4_1:
|
@@ -12012,19 +11723,17 @@ static void ggml_compute_forward_alibi(
|
|
12012
11723
|
static void ggml_compute_forward_clamp_f32(
|
12013
11724
|
const struct ggml_compute_params * params,
|
12014
11725
|
const struct ggml_tensor * src0,
|
12015
|
-
const struct ggml_tensor * src1,
|
12016
11726
|
struct ggml_tensor * dst) {
|
12017
11727
|
assert(params->ith == 0);
|
12018
11728
|
|
12019
|
-
GGML_ASSERT(src1->type == GGML_TYPE_F32);
|
12020
|
-
GGML_ASSERT(ggml_nelements(src1) == 2);
|
12021
|
-
|
12022
11729
|
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
12023
11730
|
return;
|
12024
11731
|
}
|
12025
11732
|
|
12026
|
-
|
12027
|
-
|
11733
|
+
float min;
|
11734
|
+
float max;
|
11735
|
+
memcpy(&min, (float *) dst->op_params + 0, sizeof(float));
|
11736
|
+
memcpy(&max, (float *) dst->op_params + 1, sizeof(float));
|
12028
11737
|
|
12029
11738
|
const int ith = params->ith;
|
12030
11739
|
const int nth = params->nth;
|
@@ -12054,12 +11763,11 @@ static void ggml_compute_forward_clamp_f32(
|
|
12054
11763
|
static void ggml_compute_forward_clamp(
|
12055
11764
|
const struct ggml_compute_params * params,
|
12056
11765
|
const struct ggml_tensor * src0,
|
12057
|
-
const struct ggml_tensor * src1,
|
12058
11766
|
struct ggml_tensor * dst) {
|
12059
11767
|
switch (src0->type) {
|
12060
11768
|
case GGML_TYPE_F32:
|
12061
11769
|
{
|
12062
|
-
ggml_compute_forward_clamp_f32(params, src0,
|
11770
|
+
ggml_compute_forward_clamp_f32(params, src0, dst);
|
12063
11771
|
} break;
|
12064
11772
|
case GGML_TYPE_F16:
|
12065
11773
|
case GGML_TYPE_Q4_0:
|
@@ -12089,10 +11797,7 @@ static void ggml_compute_forward_clamp(
|
|
12089
11797
|
static void ggml_compute_forward_rope_f32(
|
12090
11798
|
const struct ggml_compute_params * params,
|
12091
11799
|
const struct ggml_tensor * src0,
|
12092
|
-
const struct ggml_tensor * src1,
|
12093
11800
|
struct ggml_tensor * dst) {
|
12094
|
-
GGML_ASSERT(src1->type == GGML_TYPE_I32);
|
12095
|
-
GGML_ASSERT(ggml_nelements(src1) == 6);
|
12096
11801
|
|
12097
11802
|
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
12098
11803
|
return;
|
@@ -12101,12 +11806,12 @@ static void ggml_compute_forward_rope_f32(
|
|
12101
11806
|
float freq_base;
|
12102
11807
|
float freq_scale;
|
12103
11808
|
|
12104
|
-
const int n_past = ((int32_t *)
|
12105
|
-
const int n_dims = ((int32_t *)
|
12106
|
-
const int mode = ((int32_t *)
|
12107
|
-
const int n_ctx = ((int32_t *)
|
12108
|
-
memcpy(&freq_base, (int32_t *)
|
12109
|
-
memcpy(&freq_scale, (int32_t *)
|
11809
|
+
const int n_past = ((int32_t *) dst->op_params)[0];
|
11810
|
+
const int n_dims = ((int32_t *) dst->op_params)[1];
|
11811
|
+
const int mode = ((int32_t *) dst->op_params)[2];
|
11812
|
+
const int n_ctx = ((int32_t *) dst->op_params)[3];
|
11813
|
+
memcpy(&freq_base, (int32_t *) dst->op_params + 4, sizeof(float));
|
11814
|
+
memcpy(&freq_scale, (int32_t *) dst->op_params + 5, sizeof(float));
|
12110
11815
|
|
12111
11816
|
assert(n_past >= 0);
|
12112
11817
|
|
@@ -12221,10 +11926,7 @@ static void ggml_compute_forward_rope_f32(
|
|
12221
11926
|
static void ggml_compute_forward_rope_f16(
|
12222
11927
|
const struct ggml_compute_params * params,
|
12223
11928
|
const struct ggml_tensor * src0,
|
12224
|
-
const struct ggml_tensor * src1,
|
12225
11929
|
struct ggml_tensor * dst) {
|
12226
|
-
GGML_ASSERT(src1->type == GGML_TYPE_I32);
|
12227
|
-
GGML_ASSERT(ggml_nelements(src1) == 6);
|
12228
11930
|
|
12229
11931
|
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
12230
11932
|
return;
|
@@ -12233,12 +11935,12 @@ static void ggml_compute_forward_rope_f16(
|
|
12233
11935
|
float freq_base;
|
12234
11936
|
float freq_scale;
|
12235
11937
|
|
12236
|
-
const int n_past = ((int32_t *)
|
12237
|
-
const int n_dims = ((int32_t *)
|
12238
|
-
const int mode = ((int32_t *)
|
12239
|
-
const int n_ctx = ((int32_t *)
|
12240
|
-
memcpy(&freq_base, (int32_t *)
|
12241
|
-
memcpy(&freq_scale, (int32_t *)
|
11938
|
+
const int n_past = ((int32_t *) dst->op_params)[0];
|
11939
|
+
const int n_dims = ((int32_t *) dst->op_params)[1];
|
11940
|
+
const int mode = ((int32_t *) dst->op_params)[2];
|
11941
|
+
const int n_ctx = ((int32_t *) dst->op_params)[3];
|
11942
|
+
memcpy(&freq_base, (int32_t *) dst->op_params + 4, sizeof(float));
|
11943
|
+
memcpy(&freq_scale, (int32_t *) dst->op_params + 5, sizeof(float));
|
12242
11944
|
|
12243
11945
|
assert(n_past >= 0);
|
12244
11946
|
|
@@ -12353,16 +12055,15 @@ static void ggml_compute_forward_rope_f16(
|
|
12353
12055
|
static void ggml_compute_forward_rope(
|
12354
12056
|
const struct ggml_compute_params * params,
|
12355
12057
|
const struct ggml_tensor * src0,
|
12356
|
-
const struct ggml_tensor * src1,
|
12357
12058
|
struct ggml_tensor * dst) {
|
12358
12059
|
switch (src0->type) {
|
12359
12060
|
case GGML_TYPE_F16:
|
12360
12061
|
{
|
12361
|
-
ggml_compute_forward_rope_f16(params, src0,
|
12062
|
+
ggml_compute_forward_rope_f16(params, src0, dst);
|
12362
12063
|
} break;
|
12363
12064
|
case GGML_TYPE_F32:
|
12364
12065
|
{
|
12365
|
-
ggml_compute_forward_rope_f32(params, src0,
|
12066
|
+
ggml_compute_forward_rope_f32(params, src0, dst);
|
12366
12067
|
} break;
|
12367
12068
|
default:
|
12368
12069
|
{
|
@@ -12376,10 +12077,7 @@ static void ggml_compute_forward_rope(
|
|
12376
12077
|
static void ggml_compute_forward_rope_back_f32(
|
12377
12078
|
const struct ggml_compute_params * params,
|
12378
12079
|
const struct ggml_tensor * src0,
|
12379
|
-
const struct ggml_tensor * src1,
|
12380
12080
|
struct ggml_tensor * dst) {
|
12381
|
-
assert(src1->type == GGML_TYPE_I32);
|
12382
|
-
assert(ggml_nelements(src1) == 4);
|
12383
12081
|
|
12384
12082
|
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
12385
12083
|
return;
|
@@ -12389,9 +12087,9 @@ static void ggml_compute_forward_rope_back_f32(
|
|
12389
12087
|
// dx = rope_back(dy, src1)
|
12390
12088
|
// src0 is dy, src1 contains options
|
12391
12089
|
|
12392
|
-
const int n_past = ((int32_t *)
|
12393
|
-
const int n_dims = ((int32_t *)
|
12394
|
-
const int mode = ((int32_t *)
|
12090
|
+
const int n_past = ((int32_t *) dst->op_params)[0];
|
12091
|
+
const int n_dims = ((int32_t *) dst->op_params)[1];
|
12092
|
+
const int mode = ((int32_t *) dst->op_params)[2];
|
12395
12093
|
|
12396
12094
|
assert(n_past >= 0);
|
12397
12095
|
|
@@ -12475,10 +12173,7 @@ static void ggml_compute_forward_rope_back_f32(
|
|
12475
12173
|
static void ggml_compute_forward_rope_back_f16(
|
12476
12174
|
const struct ggml_compute_params * params,
|
12477
12175
|
const struct ggml_tensor * src0,
|
12478
|
-
const struct ggml_tensor * src1,
|
12479
12176
|
struct ggml_tensor * dst) {
|
12480
|
-
assert(src1->type == GGML_TYPE_I32);
|
12481
|
-
assert(ggml_nelements(src1) == 3);
|
12482
12177
|
|
12483
12178
|
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
12484
12179
|
return;
|
@@ -12488,9 +12183,9 @@ static void ggml_compute_forward_rope_back_f16(
|
|
12488
12183
|
// dx = rope_back(dy, src1)
|
12489
12184
|
// src0 is dy, src1 contains options
|
12490
12185
|
|
12491
|
-
const int n_past = ((int32_t *)
|
12492
|
-
const int n_dims = ((int32_t *)
|
12493
|
-
const int mode = ((int32_t *)
|
12186
|
+
const int n_past = ((int32_t *) dst->op_params)[0];
|
12187
|
+
const int n_dims = ((int32_t *) dst->op_params)[1];
|
12188
|
+
const int mode = ((int32_t *) dst->op_params)[2];
|
12494
12189
|
|
12495
12190
|
assert(n_past >= 0);
|
12496
12191
|
|
@@ -12574,16 +12269,15 @@ static void ggml_compute_forward_rope_back_f16(
|
|
12574
12269
|
static void ggml_compute_forward_rope_back(
|
12575
12270
|
const struct ggml_compute_params * params,
|
12576
12271
|
const struct ggml_tensor * src0,
|
12577
|
-
const struct ggml_tensor * src1,
|
12578
12272
|
struct ggml_tensor * dst) {
|
12579
12273
|
switch (src0->type) {
|
12580
12274
|
case GGML_TYPE_F16:
|
12581
12275
|
{
|
12582
|
-
ggml_compute_forward_rope_back_f16(params, src0,
|
12276
|
+
ggml_compute_forward_rope_back_f16(params, src0, dst);
|
12583
12277
|
} break;
|
12584
12278
|
case GGML_TYPE_F32:
|
12585
12279
|
{
|
12586
|
-
ggml_compute_forward_rope_back_f32(params, src0,
|
12280
|
+
ggml_compute_forward_rope_back_f32(params, src0, dst);
|
12587
12281
|
} break;
|
12588
12282
|
default:
|
12589
12283
|
{
|
@@ -12780,7 +12474,7 @@ static void ggml_compute_forward_conv_1d_s1_ph(
|
|
12780
12474
|
const struct ggml_compute_params * params,
|
12781
12475
|
const struct ggml_tensor * src0,
|
12782
12476
|
const struct ggml_tensor * src1,
|
12783
|
-
|
12477
|
+
struct ggml_tensor * dst) {
|
12784
12478
|
switch (src0->type) {
|
12785
12479
|
case GGML_TYPE_F16:
|
12786
12480
|
{
|
@@ -12983,7 +12677,7 @@ static void ggml_compute_forward_conv_1d_s2_ph(
|
|
12983
12677
|
const struct ggml_compute_params * params,
|
12984
12678
|
const struct ggml_tensor * src0,
|
12985
12679
|
const struct ggml_tensor * src1,
|
12986
|
-
|
12680
|
+
struct ggml_tensor * dst) {
|
12987
12681
|
switch (src0->type) {
|
12988
12682
|
case GGML_TYPE_F16:
|
12989
12683
|
{
|
@@ -13003,14 +12697,13 @@ static void ggml_compute_forward_conv_1d_s2_ph(
|
|
13003
12697
|
// ggml_compute_forward_conv_1d
|
13004
12698
|
|
13005
12699
|
static void ggml_compute_forward_conv_1d(
|
13006
|
-
|
13007
|
-
|
13008
|
-
|
13009
|
-
|
13010
|
-
|
13011
|
-
const int32_t
|
13012
|
-
const int32_t
|
13013
|
-
const int32_t d0 = ((const int32_t*)(opt0->data))[2];
|
12700
|
+
const struct ggml_compute_params * params,
|
12701
|
+
const struct ggml_tensor * src0,
|
12702
|
+
const struct ggml_tensor * src1,
|
12703
|
+
struct ggml_tensor * dst) {
|
12704
|
+
const int32_t s0 = ((const int32_t*)(dst->op_params))[0];
|
12705
|
+
const int32_t p0 = ((const int32_t*)(dst->op_params))[1];
|
12706
|
+
const int32_t d0 = ((const int32_t*)(dst->op_params))[2];
|
13014
12707
|
GGML_ASSERT(d0 == 1); // dilation not supported
|
13015
12708
|
GGML_ASSERT(p0 == src0->ne[0]/2); // only half padding supported
|
13016
12709
|
if (s0 == 1) {
|
@@ -13028,7 +12721,6 @@ static void ggml_compute_forward_conv_2d_f16_f32(
|
|
13028
12721
|
const struct ggml_compute_params * params,
|
13029
12722
|
const struct ggml_tensor * src0,
|
13030
12723
|
const struct ggml_tensor * src1,
|
13031
|
-
const struct ggml_tensor * opt0,
|
13032
12724
|
struct ggml_tensor * dst) {
|
13033
12725
|
GGML_ASSERT(src0->type == GGML_TYPE_F16);
|
13034
12726
|
GGML_ASSERT(src1->type == GGML_TYPE_F32);
|
@@ -13048,12 +12740,12 @@ static void ggml_compute_forward_conv_2d_f16_f32(
|
|
13048
12740
|
// size of the convolution row - the kernel size unrolled across all channels
|
13049
12741
|
const int ew0 = nk0*nk1*ne02;
|
13050
12742
|
|
13051
|
-
const int32_t s0 = ((const int32_t*)(
|
13052
|
-
const int32_t s1 = ((const int32_t*)(
|
13053
|
-
const int32_t p0 = ((const int32_t*)(
|
13054
|
-
const int32_t p1 = ((const int32_t*)(
|
13055
|
-
const int32_t d0 = ((const int32_t*)(
|
13056
|
-
const int32_t d1 = ((const int32_t*)(
|
12743
|
+
const int32_t s0 = ((const int32_t*)(dst->op_params))[0];
|
12744
|
+
const int32_t s1 = ((const int32_t*)(dst->op_params))[1];
|
12745
|
+
const int32_t p0 = ((const int32_t*)(dst->op_params))[2];
|
12746
|
+
const int32_t p1 = ((const int32_t*)(dst->op_params))[3];
|
12747
|
+
const int32_t d0 = ((const int32_t*)(dst->op_params))[4];
|
12748
|
+
const int32_t d1 = ((const int32_t*)(dst->op_params))[5];
|
13057
12749
|
|
13058
12750
|
GGML_ASSERT(nb00 == sizeof(ggml_fp16_t));
|
13059
12751
|
GGML_ASSERT(nb10 == sizeof(float));
|
@@ -13125,17 +12817,15 @@ static void ggml_compute_forward_conv_2d(
|
|
13125
12817
|
const struct ggml_compute_params * params,
|
13126
12818
|
const struct ggml_tensor * src0,
|
13127
12819
|
const struct ggml_tensor * src1,
|
13128
|
-
|
13129
|
-
struct ggml_tensor * dst
|
13130
|
-
) {
|
12820
|
+
struct ggml_tensor * dst) {
|
13131
12821
|
switch (src0->type) {
|
13132
12822
|
case GGML_TYPE_F16:
|
13133
12823
|
{
|
13134
|
-
ggml_compute_forward_conv_2d_f16_f32(params, src0, src1,
|
12824
|
+
ggml_compute_forward_conv_2d_f16_f32(params, src0, src1, dst);
|
13135
12825
|
} break;
|
13136
12826
|
case GGML_TYPE_F32:
|
13137
12827
|
{
|
13138
|
-
//ggml_compute_forward_conv_2d_f32(params, src0, src1,
|
12828
|
+
//ggml_compute_forward_conv_2d_f32(params, src0, src1, dst);
|
13139
12829
|
GGML_ASSERT(false);
|
13140
12830
|
} break;
|
13141
12831
|
default:
|
@@ -13200,12 +12890,11 @@ static void ggml_compute_forward_pool_1d_sk_p0(
|
|
13200
12890
|
// ggml_compute_forward_pool_1d
|
13201
12891
|
|
13202
12892
|
static void ggml_compute_forward_pool_1d(
|
13203
|
-
|
13204
|
-
|
13205
|
-
|
13206
|
-
|
13207
|
-
|
13208
|
-
const int* opts = (const int*)opt0->data;
|
12893
|
+
const struct ggml_compute_params * params,
|
12894
|
+
const struct ggml_tensor * src0,
|
12895
|
+
struct ggml_tensor * dst) {
|
12896
|
+
|
12897
|
+
const int32_t* opts = (const int32_t*)dst->op_params;
|
13209
12898
|
enum ggml_op_pool op = opts[0];
|
13210
12899
|
const int k0 = opts[1];
|
13211
12900
|
const int s0 = opts[2];
|
@@ -13219,12 +12908,12 @@ static void ggml_compute_forward_pool_1d(
|
|
13219
12908
|
// ggml_compute_forward_pool_2d_sk_p0
|
13220
12909
|
|
13221
12910
|
static void ggml_compute_forward_pool_2d_sk_p0(
|
13222
|
-
|
13223
|
-
|
13224
|
-
|
13225
|
-
|
13226
|
-
|
13227
|
-
|
12911
|
+
const struct ggml_compute_params * params,
|
12912
|
+
const enum ggml_op_pool op,
|
12913
|
+
const struct ggml_tensor * src,
|
12914
|
+
const int k0,
|
12915
|
+
const int k1,
|
12916
|
+
struct ggml_tensor * dst) {
|
13228
12917
|
assert(src->type == GGML_TYPE_F32);
|
13229
12918
|
assert(params->ith == 0);
|
13230
12919
|
|
@@ -13284,12 +12973,11 @@ static void ggml_compute_forward_pool_2d_sk_p0(
|
|
13284
12973
|
// ggml_compute_forward_pool_2d
|
13285
12974
|
|
13286
12975
|
static void ggml_compute_forward_pool_2d(
|
13287
|
-
|
13288
|
-
|
13289
|
-
|
13290
|
-
|
13291
|
-
|
13292
|
-
const int* opts = (const int*)opt0->data;
|
12976
|
+
const struct ggml_compute_params * params,
|
12977
|
+
const struct ggml_tensor * src0,
|
12978
|
+
struct ggml_tensor * dst) {
|
12979
|
+
|
12980
|
+
const int32_t * opts = (const int32_t *)dst->op_params;
|
13293
12981
|
enum ggml_op_pool op = opts[0];
|
13294
12982
|
const int k0 = opts[1];
|
13295
12983
|
const int k1 = opts[2];
|
@@ -13314,7 +13002,7 @@ static void ggml_compute_forward_flash_attn_f32(
|
|
13314
13002
|
const struct ggml_tensor * k,
|
13315
13003
|
const struct ggml_tensor * v,
|
13316
13004
|
const bool masked,
|
13317
|
-
|
13005
|
+
struct ggml_tensor * dst) {
|
13318
13006
|
int64_t t0 = ggml_perf_time_us();
|
13319
13007
|
UNUSED(t0);
|
13320
13008
|
|
@@ -13492,7 +13180,7 @@ static void ggml_compute_forward_flash_attn_f16(
|
|
13492
13180
|
const struct ggml_tensor * k,
|
13493
13181
|
const struct ggml_tensor * v,
|
13494
13182
|
const bool masked,
|
13495
|
-
|
13183
|
+
struct ggml_tensor * dst) {
|
13496
13184
|
int64_t t0 = ggml_perf_time_us();
|
13497
13185
|
UNUSED(t0);
|
13498
13186
|
|
@@ -14257,7 +13945,6 @@ static void ggml_compute_forward_flash_attn_back(
|
|
14257
13945
|
static void ggml_compute_forward_win_part_f32(
|
14258
13946
|
const struct ggml_compute_params * params,
|
14259
13947
|
const struct ggml_tensor * src0,
|
14260
|
-
const struct ggml_tensor * opt0,
|
14261
13948
|
struct ggml_tensor * dst) {
|
14262
13949
|
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
14263
13950
|
return;
|
@@ -14266,9 +13953,9 @@ static void ggml_compute_forward_win_part_f32(
|
|
14266
13953
|
GGML_TENSOR_LOCALS(int64_t, ne0, src0, ne);
|
14267
13954
|
GGML_TENSOR_LOCALS(int64_t, ne, dst, ne);
|
14268
13955
|
|
14269
|
-
const int32_t nep0 = ((const int32_t *)(
|
14270
|
-
const int32_t nep1 = ((const int32_t *)(
|
14271
|
-
const int32_t w = ((const int32_t *)(
|
13956
|
+
const int32_t nep0 = ((const int32_t *)(dst->op_params))[0];
|
13957
|
+
const int32_t nep1 = ((const int32_t *)(dst->op_params))[1];
|
13958
|
+
const int32_t w = ((const int32_t *)(dst->op_params))[2];
|
14272
13959
|
|
14273
13960
|
assert(ne00 == ne0);
|
14274
13961
|
assert(ne3 == nep0*nep1);
|
@@ -14302,12 +13989,11 @@ static void ggml_compute_forward_win_part_f32(
|
|
14302
13989
|
static void ggml_compute_forward_win_part(
|
14303
13990
|
const struct ggml_compute_params * params,
|
14304
13991
|
const struct ggml_tensor * src0,
|
14305
|
-
const struct ggml_tensor * opt0,
|
14306
13992
|
struct ggml_tensor * dst) {
|
14307
13993
|
switch (src0->type) {
|
14308
13994
|
case GGML_TYPE_F32:
|
14309
13995
|
{
|
14310
|
-
ggml_compute_forward_win_part_f32(params, src0,
|
13996
|
+
ggml_compute_forward_win_part_f32(params, src0, dst);
|
14311
13997
|
} break;
|
14312
13998
|
default:
|
14313
13999
|
{
|
@@ -14321,7 +14007,6 @@ static void ggml_compute_forward_win_part(
|
|
14321
14007
|
static void ggml_compute_forward_win_unpart_f32(
|
14322
14008
|
const struct ggml_compute_params * params,
|
14323
14009
|
const struct ggml_tensor * src0,
|
14324
|
-
const struct ggml_tensor * opt0,
|
14325
14010
|
struct ggml_tensor * dst) {
|
14326
14011
|
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
14327
14012
|
return;
|
@@ -14330,7 +14015,7 @@ static void ggml_compute_forward_win_unpart_f32(
|
|
14330
14015
|
GGML_TENSOR_LOCALS(int64_t, ne0, src0, ne);
|
14331
14016
|
GGML_TENSOR_LOCALS(int64_t, ne, dst, ne);
|
14332
14017
|
|
14333
|
-
const int32_t w = ((const int32_t *)(
|
14018
|
+
const int32_t w = ((const int32_t *)(dst->op_params))[0];
|
14334
14019
|
|
14335
14020
|
// padding
|
14336
14021
|
const int px = (w - ne1%w)%w;
|
@@ -14364,12 +14049,67 @@ static void ggml_compute_forward_win_unpart_f32(
|
|
14364
14049
|
static void ggml_compute_forward_win_unpart(
|
14365
14050
|
const struct ggml_compute_params * params,
|
14366
14051
|
const struct ggml_tensor * src0,
|
14367
|
-
const struct ggml_tensor * opt0,
|
14368
14052
|
struct ggml_tensor * dst) {
|
14369
14053
|
switch (src0->type) {
|
14370
14054
|
case GGML_TYPE_F32:
|
14371
14055
|
{
|
14372
|
-
ggml_compute_forward_win_unpart_f32(params, src0,
|
14056
|
+
ggml_compute_forward_win_unpart_f32(params, src0, dst);
|
14057
|
+
} break;
|
14058
|
+
default:
|
14059
|
+
{
|
14060
|
+
GGML_ASSERT(false);
|
14061
|
+
} break;
|
14062
|
+
}
|
14063
|
+
}
|
14064
|
+
|
14065
|
+
//gmml_compute_forward_unary
|
14066
|
+
|
14067
|
+
static void ggml_compute_forward_unary(
|
14068
|
+
const struct ggml_compute_params * params,
|
14069
|
+
const struct ggml_tensor * src0,
|
14070
|
+
struct ggml_tensor * dst) {
|
14071
|
+
const enum ggml_unary_op op = ggml_get_unary_op(dst);
|
14072
|
+
|
14073
|
+
switch (op) {
|
14074
|
+
case GGML_UNARY_OP_ABS:
|
14075
|
+
{
|
14076
|
+
ggml_compute_forward_abs(params, src0, dst);
|
14077
|
+
} break;
|
14078
|
+
case GGML_UNARY_OP_SGN:
|
14079
|
+
{
|
14080
|
+
ggml_compute_forward_sgn(params, src0, dst);
|
14081
|
+
} break;
|
14082
|
+
case GGML_UNARY_OP_NEG:
|
14083
|
+
{
|
14084
|
+
ggml_compute_forward_neg(params, src0, dst);
|
14085
|
+
} break;
|
14086
|
+
case GGML_UNARY_OP_STEP:
|
14087
|
+
{
|
14088
|
+
ggml_compute_forward_step(params, src0, dst);
|
14089
|
+
} break;
|
14090
|
+
case GGML_UNARY_OP_TANH:
|
14091
|
+
{
|
14092
|
+
ggml_compute_forward_tanh(params, src0, dst);
|
14093
|
+
} break;
|
14094
|
+
case GGML_UNARY_OP_ELU:
|
14095
|
+
{
|
14096
|
+
ggml_compute_forward_elu(params, src0, dst);
|
14097
|
+
} break;
|
14098
|
+
case GGML_UNARY_OP_RELU:
|
14099
|
+
{
|
14100
|
+
ggml_compute_forward_relu(params, src0, dst);
|
14101
|
+
} break;
|
14102
|
+
case GGML_UNARY_OP_GELU:
|
14103
|
+
{
|
14104
|
+
ggml_compute_forward_gelu(params, src0, dst);
|
14105
|
+
} break;
|
14106
|
+
case GGML_UNARY_OP_GELU_QUICK:
|
14107
|
+
{
|
14108
|
+
ggml_compute_forward_gelu_quick(params, src0, dst);
|
14109
|
+
} break;
|
14110
|
+
case GGML_UNARY_OP_SILU:
|
14111
|
+
{
|
14112
|
+
ggml_compute_forward_silu(params, src0, dst);
|
14373
14113
|
} break;
|
14374
14114
|
default:
|
14375
14115
|
{
|
@@ -14888,7 +14628,7 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm
|
|
14888
14628
|
} break;
|
14889
14629
|
case GGML_OP_ACC:
|
14890
14630
|
{
|
14891
|
-
ggml_compute_forward_acc(params, tensor->src[0], tensor->src[1], tensor
|
14631
|
+
ggml_compute_forward_acc(params, tensor->src[0], tensor->src[1], tensor);
|
14892
14632
|
} break;
|
14893
14633
|
case GGML_OP_SUB:
|
14894
14634
|
{
|
@@ -14938,46 +14678,6 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm
|
|
14938
14678
|
{
|
14939
14679
|
ggml_compute_forward_repeat_back(params, tensor->src[0], tensor);
|
14940
14680
|
} break;
|
14941
|
-
case GGML_OP_ABS:
|
14942
|
-
{
|
14943
|
-
ggml_compute_forward_abs(params, tensor->src[0], tensor);
|
14944
|
-
} break;
|
14945
|
-
case GGML_OP_SGN:
|
14946
|
-
{
|
14947
|
-
ggml_compute_forward_sgn(params, tensor->src[0], tensor);
|
14948
|
-
} break;
|
14949
|
-
case GGML_OP_NEG:
|
14950
|
-
{
|
14951
|
-
ggml_compute_forward_neg(params, tensor->src[0], tensor);
|
14952
|
-
} break;
|
14953
|
-
case GGML_OP_STEP:
|
14954
|
-
{
|
14955
|
-
ggml_compute_forward_step(params, tensor->src[0], tensor);
|
14956
|
-
} break;
|
14957
|
-
case GGML_OP_TANH:
|
14958
|
-
{
|
14959
|
-
ggml_compute_forward_tanh(params, tensor->src[0], tensor);
|
14960
|
-
} break;
|
14961
|
-
case GGML_OP_ELU:
|
14962
|
-
{
|
14963
|
-
ggml_compute_forward_elu(params, tensor->src[0], tensor);
|
14964
|
-
} break;
|
14965
|
-
case GGML_OP_RELU:
|
14966
|
-
{
|
14967
|
-
ggml_compute_forward_relu(params, tensor->src[0], tensor);
|
14968
|
-
} break;
|
14969
|
-
case GGML_OP_GELU:
|
14970
|
-
{
|
14971
|
-
ggml_compute_forward_gelu(params, tensor->src[0], tensor);
|
14972
|
-
} break;
|
14973
|
-
case GGML_OP_GELU_QUICK:
|
14974
|
-
{
|
14975
|
-
ggml_compute_forward_gelu_quick(params, tensor->src[0], tensor);
|
14976
|
-
} break;
|
14977
|
-
case GGML_OP_SILU:
|
14978
|
-
{
|
14979
|
-
ggml_compute_forward_silu(params, tensor->src[0], tensor);
|
14980
|
-
} break;
|
14981
14681
|
case GGML_OP_SILU_BACK:
|
14982
14682
|
{
|
14983
14683
|
ggml_compute_forward_silu_back(params, tensor->src[0], tensor->src[1], tensor);
|
@@ -15008,7 +14708,7 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm
|
|
15008
14708
|
} break;
|
15009
14709
|
case GGML_OP_SET:
|
15010
14710
|
{
|
15011
|
-
ggml_compute_forward_set(params, tensor->src[0], tensor->src[1], tensor
|
14711
|
+
ggml_compute_forward_set(params, tensor->src[0], tensor->src[1], tensor);
|
15012
14712
|
} break;
|
15013
14713
|
case GGML_OP_CPY:
|
15014
14714
|
{
|
@@ -15048,11 +14748,11 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm
|
|
15048
14748
|
} break;
|
15049
14749
|
case GGML_OP_DIAG_MASK_INF:
|
15050
14750
|
{
|
15051
|
-
ggml_compute_forward_diag_mask_inf(params, tensor->src[0], tensor
|
14751
|
+
ggml_compute_forward_diag_mask_inf(params, tensor->src[0], tensor);
|
15052
14752
|
} break;
|
15053
14753
|
case GGML_OP_DIAG_MASK_ZERO:
|
15054
14754
|
{
|
15055
|
-
ggml_compute_forward_diag_mask_zero(params, tensor->src[0], tensor
|
14755
|
+
ggml_compute_forward_diag_mask_zero(params, tensor->src[0], tensor);
|
15056
14756
|
} break;
|
15057
14757
|
case GGML_OP_SOFT_MAX:
|
15058
14758
|
{
|
@@ -15064,39 +14764,39 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm
|
|
15064
14764
|
} break;
|
15065
14765
|
case GGML_OP_ROPE:
|
15066
14766
|
{
|
15067
|
-
ggml_compute_forward_rope(params, tensor->src[0], tensor
|
14767
|
+
ggml_compute_forward_rope(params, tensor->src[0], tensor);
|
15068
14768
|
} break;
|
15069
14769
|
case GGML_OP_ROPE_BACK:
|
15070
14770
|
{
|
15071
|
-
ggml_compute_forward_rope_back(params, tensor->src[0], tensor
|
14771
|
+
ggml_compute_forward_rope_back(params, tensor->src[0], tensor);
|
15072
14772
|
} break;
|
15073
14773
|
case GGML_OP_ALIBI:
|
15074
14774
|
{
|
15075
|
-
ggml_compute_forward_alibi(params, tensor->src[0], tensor
|
14775
|
+
ggml_compute_forward_alibi(params, tensor->src[0], tensor);
|
15076
14776
|
} break;
|
15077
14777
|
case GGML_OP_CLAMP:
|
15078
14778
|
{
|
15079
|
-
ggml_compute_forward_clamp(params, tensor->src[0], tensor
|
14779
|
+
ggml_compute_forward_clamp(params, tensor->src[0], tensor);
|
15080
14780
|
} break;
|
15081
14781
|
case GGML_OP_CONV_1D:
|
15082
14782
|
{
|
15083
|
-
ggml_compute_forward_conv_1d(params, tensor->src[0], tensor->src[1], tensor
|
14783
|
+
ggml_compute_forward_conv_1d(params, tensor->src[0], tensor->src[1], tensor);
|
15084
14784
|
} break;
|
15085
14785
|
case GGML_OP_CONV_2D:
|
15086
14786
|
{
|
15087
|
-
ggml_compute_forward_conv_2d(params, tensor->src[0], tensor->src[1], tensor
|
14787
|
+
ggml_compute_forward_conv_2d(params, tensor->src[0], tensor->src[1], tensor);
|
15088
14788
|
} break;
|
15089
14789
|
case GGML_OP_POOL_1D:
|
15090
14790
|
{
|
15091
|
-
ggml_compute_forward_pool_1d(params, tensor->src[0], tensor
|
14791
|
+
ggml_compute_forward_pool_1d(params, tensor->src[0], tensor);
|
15092
14792
|
} break;
|
15093
14793
|
case GGML_OP_POOL_2D:
|
15094
14794
|
{
|
15095
|
-
ggml_compute_forward_pool_2d(params, tensor->src[0], tensor
|
14795
|
+
ggml_compute_forward_pool_2d(params, tensor->src[0], tensor);
|
15096
14796
|
} break;
|
15097
14797
|
case GGML_OP_FLASH_ATTN:
|
15098
14798
|
{
|
15099
|
-
const int32_t t =
|
14799
|
+
const int32_t t = ggml_get_op_params_i32(tensor, 0);
|
15100
14800
|
GGML_ASSERT(t == 0 || t == 1);
|
15101
14801
|
const bool masked = t != 0;
|
15102
14802
|
ggml_compute_forward_flash_attn(params, tensor->src[0], tensor->src[1], tensor->src[2], masked, tensor);
|
@@ -15107,47 +14807,56 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm
|
|
15107
14807
|
} break;
|
15108
14808
|
case GGML_OP_FLASH_ATTN_BACK:
|
15109
14809
|
{
|
15110
|
-
int32_t t =
|
14810
|
+
int32_t t = ggml_get_op_params_i32(tensor, 0);
|
15111
14811
|
GGML_ASSERT(t == 0 || t == 1);
|
15112
14812
|
bool masked = t != 0;
|
15113
14813
|
ggml_compute_forward_flash_attn_back(params, tensor->src[0], tensor->src[1], tensor->src[2], tensor->src[3], masked, tensor);
|
15114
14814
|
} break;
|
15115
14815
|
case GGML_OP_WIN_PART:
|
15116
14816
|
{
|
15117
|
-
ggml_compute_forward_win_part(params, tensor->src[0], tensor
|
14817
|
+
ggml_compute_forward_win_part(params, tensor->src[0], tensor);
|
15118
14818
|
} break;
|
15119
14819
|
case GGML_OP_WIN_UNPART:
|
15120
14820
|
{
|
15121
|
-
ggml_compute_forward_win_unpart(params, tensor->src[0], tensor
|
14821
|
+
ggml_compute_forward_win_unpart(params, tensor->src[0], tensor);
|
14822
|
+
} break;
|
14823
|
+
case GGML_OP_UNARY:
|
14824
|
+
{
|
14825
|
+
ggml_compute_forward_unary(params, tensor->src[0], tensor);
|
15122
14826
|
} break;
|
15123
14827
|
case GGML_OP_MAP_UNARY:
|
15124
14828
|
{
|
15125
|
-
|
14829
|
+
ggml_unary_op_f32_t fun;
|
14830
|
+
memcpy(&fun, tensor->op_params, sizeof(fun));
|
15126
14831
|
ggml_compute_forward_map_unary(params, tensor->src[0], tensor, fun);
|
15127
14832
|
}
|
15128
14833
|
break;
|
15129
14834
|
case GGML_OP_MAP_BINARY:
|
15130
14835
|
{
|
15131
|
-
|
14836
|
+
ggml_binary_op_f32_t fun;
|
14837
|
+
memcpy(&fun, tensor->op_params, sizeof(fun));
|
15132
14838
|
ggml_compute_forward_map_binary(params, tensor->src[0], tensor->src[1], tensor, fun);
|
15133
14839
|
}
|
15134
14840
|
break;
|
15135
14841
|
case GGML_OP_MAP_CUSTOM1:
|
15136
14842
|
{
|
15137
|
-
|
14843
|
+
ggml_custom1_op_f32_t fun;
|
14844
|
+
memcpy(&fun, tensor->op_params, sizeof(fun));
|
15138
14845
|
ggml_compute_forward_map_custom1(params, tensor->src[0], tensor, fun);
|
15139
14846
|
}
|
15140
14847
|
break;
|
15141
14848
|
case GGML_OP_MAP_CUSTOM2:
|
15142
14849
|
{
|
15143
|
-
|
14850
|
+
ggml_custom2_op_f32_t fun;
|
14851
|
+
memcpy(&fun, tensor->op_params, sizeof(fun));
|
15144
14852
|
ggml_compute_forward_map_custom2(params, tensor->src[0], tensor->src[1], tensor, fun);
|
15145
14853
|
}
|
15146
14854
|
break;
|
15147
14855
|
case GGML_OP_MAP_CUSTOM3:
|
15148
14856
|
{
|
15149
|
-
|
15150
|
-
|
14857
|
+
ggml_custom3_op_f32_t fun;
|
14858
|
+
memcpy(&fun, tensor->op_params, sizeof(fun));
|
14859
|
+
ggml_compute_forward_map_custom3(params, tensor->src[0], tensor->src[1], tensor->src[2], tensor, fun);
|
15151
14860
|
}
|
15152
14861
|
break;
|
15153
14862
|
case GGML_OP_CROSS_ENTROPY_LOSS:
|
@@ -15211,12 +14920,10 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor
|
|
15211
14920
|
src0->grad = ggml_add_impl(ctx, src0->grad, tensor->grad, inplace);
|
15212
14921
|
}
|
15213
14922
|
if (src1->grad) {
|
15214
|
-
|
15215
|
-
|
15216
|
-
const size_t
|
15217
|
-
const size_t
|
15218
|
-
const size_t nb3 = (( int32_t * ) tensor->src[2]->data)[2];
|
15219
|
-
const size_t offset = (( int32_t * ) tensor->src[2]->data)[3];
|
14923
|
+
const size_t nb1 = ((int32_t *) tensor->op_params)[0];
|
14924
|
+
const size_t nb2 = ((int32_t *) tensor->op_params)[1];
|
14925
|
+
const size_t nb3 = ((int32_t *) tensor->op_params)[2];
|
14926
|
+
const size_t offset = ((int32_t *) tensor->op_params)[3];
|
15220
14927
|
|
15221
14928
|
struct ggml_tensor * tensor_grad_view = ggml_view_4d(ctx,
|
15222
14929
|
tensor->grad,
|
@@ -15365,73 +15072,6 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor
|
|
15365
15072
|
inplace);
|
15366
15073
|
}
|
15367
15074
|
} break;
|
15368
|
-
case GGML_OP_ABS:
|
15369
|
-
{
|
15370
|
-
if (src0->grad) {
|
15371
|
-
src0->grad =
|
15372
|
-
ggml_add_impl(ctx,
|
15373
|
-
src0->grad,
|
15374
|
-
ggml_mul(ctx,
|
15375
|
-
ggml_sgn(ctx, src0),
|
15376
|
-
tensor->grad),
|
15377
|
-
inplace);
|
15378
|
-
}
|
15379
|
-
} break;
|
15380
|
-
case GGML_OP_SGN:
|
15381
|
-
{
|
15382
|
-
if (src0->grad) {
|
15383
|
-
// noop
|
15384
|
-
}
|
15385
|
-
} break;
|
15386
|
-
case GGML_OP_NEG:
|
15387
|
-
{
|
15388
|
-
if (src0->grad) {
|
15389
|
-
src0->grad = ggml_sub_impl(ctx, src0->grad, tensor->grad, inplace);
|
15390
|
-
}
|
15391
|
-
} break;
|
15392
|
-
case GGML_OP_STEP:
|
15393
|
-
{
|
15394
|
-
if (src0->grad) {
|
15395
|
-
// noop
|
15396
|
-
}
|
15397
|
-
} break;
|
15398
|
-
case GGML_OP_TANH:
|
15399
|
-
{
|
15400
|
-
GGML_ASSERT(false); // TODO: not implemented
|
15401
|
-
} break;
|
15402
|
-
case GGML_OP_ELU:
|
15403
|
-
{
|
15404
|
-
GGML_ASSERT(false); // TODO: not implemented
|
15405
|
-
} break;
|
15406
|
-
case GGML_OP_RELU:
|
15407
|
-
{
|
15408
|
-
if (src0->grad) {
|
15409
|
-
src0->grad = ggml_sub_impl(ctx,
|
15410
|
-
src0->grad,
|
15411
|
-
ggml_mul(ctx,
|
15412
|
-
ggml_step(ctx, src0),
|
15413
|
-
tensor->grad),
|
15414
|
-
inplace);
|
15415
|
-
}
|
15416
|
-
} break;
|
15417
|
-
case GGML_OP_GELU:
|
15418
|
-
{
|
15419
|
-
GGML_ASSERT(false); // TODO: not implemented
|
15420
|
-
} break;
|
15421
|
-
case GGML_OP_GELU_QUICK:
|
15422
|
-
{
|
15423
|
-
GGML_ASSERT(false); // TODO: not implemented
|
15424
|
-
} break;
|
15425
|
-
case GGML_OP_SILU:
|
15426
|
-
{
|
15427
|
-
// necessary for llama
|
15428
|
-
if (src0->grad) {
|
15429
|
-
src0->grad = ggml_add_impl(ctx,
|
15430
|
-
src0->grad,
|
15431
|
-
ggml_silu_back(ctx, src0, tensor->grad),
|
15432
|
-
inplace);
|
15433
|
-
}
|
15434
|
-
} break;
|
15435
15075
|
case GGML_OP_SILU_BACK:
|
15436
15076
|
{
|
15437
15077
|
GGML_ASSERT(false); // TODO: not implemented
|
@@ -15524,12 +15164,10 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor
|
|
15524
15164
|
} break;
|
15525
15165
|
case GGML_OP_SET:
|
15526
15166
|
{
|
15527
|
-
|
15528
|
-
|
15529
|
-
const size_t
|
15530
|
-
const size_t
|
15531
|
-
const size_t nb3 = (( int32_t * ) tensor->src[2]->data)[2];
|
15532
|
-
const size_t offset = (( int32_t * ) tensor->src[2]->data)[3];
|
15167
|
+
const size_t nb1 = ((int32_t *) tensor->op_params)[0];
|
15168
|
+
const size_t nb2 = ((int32_t *) tensor->op_params)[1];
|
15169
|
+
const size_t nb3 = ((int32_t *) tensor->op_params)[2];
|
15170
|
+
const size_t offset = ((int32_t *) tensor->op_params)[3];
|
15533
15171
|
|
15534
15172
|
struct ggml_tensor * tensor_grad_view = NULL;
|
15535
15173
|
|
@@ -15606,8 +15244,7 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor
|
|
15606
15244
|
if (src0->grad) {
|
15607
15245
|
size_t offset;
|
15608
15246
|
|
15609
|
-
|
15610
|
-
memcpy(&offset, tensor->src[2]->data, sizeof(offset));
|
15247
|
+
memcpy(&offset, tensor->op_params, sizeof(offset));
|
15611
15248
|
|
15612
15249
|
size_t nb1 = tensor->nb[1];
|
15613
15250
|
size_t nb2 = tensor->nb[2];
|
@@ -15634,7 +15271,7 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor
|
|
15634
15271
|
{
|
15635
15272
|
// necessary for llama
|
15636
15273
|
if (src0->grad) {
|
15637
|
-
int32_t * axes = (int32_t *) tensor->
|
15274
|
+
int32_t * axes = (int32_t *) tensor->op_params;
|
15638
15275
|
int axis0 = axes[0] & 0x3;
|
15639
15276
|
int axis1 = axes[1] & 0x3;
|
15640
15277
|
int axis2 = axes[2] & 0x3;
|
@@ -15690,33 +15327,23 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor
|
|
15690
15327
|
{
|
15691
15328
|
// necessary for llama
|
15692
15329
|
if (src0->grad) {
|
15693
|
-
|
15694
|
-
assert(ggml_nelements(src1) == 2);
|
15695
|
-
const int n_past = ((int32_t *) src1->data)[0];
|
15330
|
+
const int n_past = ((int32_t *) tensor->op_params)[0];
|
15696
15331
|
src0->grad =
|
15697
15332
|
ggml_add_impl(ctx, src0->grad,
|
15698
15333
|
ggml_diag_mask_zero_impl(ctx, tensor->grad, n_past, false),
|
15699
15334
|
inplace);
|
15700
15335
|
}
|
15701
|
-
if (src1->grad) {
|
15702
|
-
// noop
|
15703
|
-
}
|
15704
15336
|
} break;
|
15705
15337
|
case GGML_OP_DIAG_MASK_ZERO:
|
15706
15338
|
{
|
15707
15339
|
// necessary for llama
|
15708
15340
|
if (src0->grad) {
|
15709
|
-
|
15710
|
-
assert(ggml_nelements(src1) == 2);
|
15711
|
-
const int n_past = ((int32_t *) src1->data)[0];
|
15341
|
+
const int n_past = ((int32_t *) tensor->op_params)[0];
|
15712
15342
|
src0->grad =
|
15713
15343
|
ggml_add_impl(ctx, src0->grad,
|
15714
15344
|
ggml_diag_mask_zero_impl(ctx, tensor->grad, n_past, false),
|
15715
15345
|
inplace);
|
15716
15346
|
}
|
15717
|
-
if (src1->grad) {
|
15718
|
-
// noop
|
15719
|
-
}
|
15720
15347
|
} break;
|
15721
15348
|
case GGML_OP_SOFT_MAX:
|
15722
15349
|
{
|
@@ -15737,12 +15364,10 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor
|
|
15737
15364
|
{
|
15738
15365
|
// necessary for llama
|
15739
15366
|
if (src0->grad) {
|
15740
|
-
|
15741
|
-
|
15742
|
-
const int
|
15743
|
-
const int
|
15744
|
-
const int mode = ((int32_t *) src1->data)[2];
|
15745
|
-
const int n_ctx = ((int32_t *) src1->data)[3];
|
15367
|
+
const int n_past = ((int32_t *) tensor->op_params)[0];
|
15368
|
+
const int n_dims = ((int32_t *) tensor->op_params)[1];
|
15369
|
+
const int mode = ((int32_t *) tensor->op_params)[2];
|
15370
|
+
const int n_ctx = ((int32_t *) tensor->op_params)[3];
|
15746
15371
|
src0->grad = ggml_add_impl(ctx,
|
15747
15372
|
src0->grad,
|
15748
15373
|
ggml_rope_back(ctx,
|
@@ -15753,19 +15378,14 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor
|
|
15753
15378
|
n_ctx),
|
15754
15379
|
inplace);
|
15755
15380
|
}
|
15756
|
-
if (src1->grad) {
|
15757
|
-
// noop
|
15758
|
-
}
|
15759
15381
|
} break;
|
15760
15382
|
case GGML_OP_ROPE_BACK:
|
15761
15383
|
{
|
15762
15384
|
if (src0->grad) {
|
15763
|
-
|
15764
|
-
|
15765
|
-
const int
|
15766
|
-
const int
|
15767
|
-
const int mode = ((int32_t *) src1->data)[2];
|
15768
|
-
const int n_ctx = ((int32_t *) src1->data)[3];
|
15385
|
+
const int n_past = ((int32_t *) tensor->op_params)[0];
|
15386
|
+
const int n_dims = ((int32_t *) tensor->op_params)[1];
|
15387
|
+
const int mode = ((int32_t *) tensor->op_params)[2];
|
15388
|
+
const int n_ctx = ((int32_t *) tensor->op_params)[3];
|
15769
15389
|
src0->grad = ggml_add_impl(ctx,
|
15770
15390
|
src0->grad,
|
15771
15391
|
ggml_rope(ctx,
|
@@ -15776,9 +15396,6 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor
|
|
15776
15396
|
n_ctx),
|
15777
15397
|
inplace);
|
15778
15398
|
}
|
15779
|
-
if (src1->grad) {
|
15780
|
-
// noop
|
15781
|
-
}
|
15782
15399
|
} break;
|
15783
15400
|
case GGML_OP_ALIBI:
|
15784
15401
|
{
|
@@ -15808,7 +15425,7 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor
|
|
15808
15425
|
{
|
15809
15426
|
struct ggml_tensor * flash_grad = NULL;
|
15810
15427
|
if (src0->grad || src1->grad || tensor->src[2]->grad) {
|
15811
|
-
int32_t t =
|
15428
|
+
int32_t t = ggml_get_op_params_i32(tensor, 0);
|
15812
15429
|
GGML_ASSERT(t == 0 || t == 1);
|
15813
15430
|
bool masked = t != 0;
|
15814
15431
|
flash_grad =
|
@@ -15971,6 +15588,80 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor
|
|
15971
15588
|
} break;
|
15972
15589
|
case GGML_OP_WIN_PART:
|
15973
15590
|
case GGML_OP_WIN_UNPART:
|
15591
|
+
case GGML_OP_UNARY:
|
15592
|
+
{
|
15593
|
+
switch (ggml_get_unary_op(tensor)) {
|
15594
|
+
case GGML_UNARY_OP_ABS:
|
15595
|
+
{
|
15596
|
+
if (src0->grad) {
|
15597
|
+
src0->grad =
|
15598
|
+
ggml_add_impl(ctx,
|
15599
|
+
src0->grad,
|
15600
|
+
ggml_mul(ctx,
|
15601
|
+
ggml_sgn(ctx, src0),
|
15602
|
+
tensor->grad),
|
15603
|
+
inplace);
|
15604
|
+
}
|
15605
|
+
} break;
|
15606
|
+
case GGML_UNARY_OP_SGN:
|
15607
|
+
{
|
15608
|
+
if (src0->grad) {
|
15609
|
+
// noop
|
15610
|
+
}
|
15611
|
+
} break;
|
15612
|
+
case GGML_UNARY_OP_NEG:
|
15613
|
+
{
|
15614
|
+
if (src0->grad) {
|
15615
|
+
src0->grad = ggml_sub_impl(ctx, src0->grad, tensor->grad, inplace);
|
15616
|
+
}
|
15617
|
+
} break;
|
15618
|
+
case GGML_UNARY_OP_STEP:
|
15619
|
+
{
|
15620
|
+
if (src0->grad) {
|
15621
|
+
// noop
|
15622
|
+
}
|
15623
|
+
} break;
|
15624
|
+
case GGML_UNARY_OP_TANH:
|
15625
|
+
{
|
15626
|
+
GGML_ASSERT(false); // TODO: not implemented
|
15627
|
+
} break;
|
15628
|
+
case GGML_UNARY_OP_ELU:
|
15629
|
+
{
|
15630
|
+
GGML_ASSERT(false); // TODO: not implemented
|
15631
|
+
} break;
|
15632
|
+
case GGML_UNARY_OP_RELU:
|
15633
|
+
{
|
15634
|
+
if (src0->grad) {
|
15635
|
+
src0->grad = ggml_add_impl(ctx,
|
15636
|
+
src0->grad,
|
15637
|
+
ggml_mul(ctx,
|
15638
|
+
ggml_step(ctx, src0),
|
15639
|
+
tensor->grad),
|
15640
|
+
inplace);
|
15641
|
+
}
|
15642
|
+
} break;
|
15643
|
+
case GGML_UNARY_OP_GELU:
|
15644
|
+
{
|
15645
|
+
GGML_ASSERT(false); // TODO: not implemented
|
15646
|
+
} break;
|
15647
|
+
case GGML_UNARY_OP_GELU_QUICK:
|
15648
|
+
{
|
15649
|
+
GGML_ASSERT(false); // TODO: not implemented
|
15650
|
+
} break;
|
15651
|
+
case GGML_UNARY_OP_SILU:
|
15652
|
+
{
|
15653
|
+
// necessary for llama
|
15654
|
+
if (src0->grad) {
|
15655
|
+
src0->grad = ggml_add_impl(ctx,
|
15656
|
+
src0->grad,
|
15657
|
+
ggml_silu_back(ctx, src0, tensor->grad),
|
15658
|
+
inplace);
|
15659
|
+
}
|
15660
|
+
} break;
|
15661
|
+
default:
|
15662
|
+
GGML_ASSERT(false);
|
15663
|
+
}
|
15664
|
+
} break;
|
15974
15665
|
case GGML_OP_MAP_UNARY:
|
15975
15666
|
case GGML_OP_MAP_BINARY:
|
15976
15667
|
case GGML_OP_MAP_CUSTOM1:
|
@@ -16006,6 +15697,34 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor
|
|
16006
15697
|
}
|
16007
15698
|
}
|
16008
15699
|
|
15700
|
+
static_assert(GGML_GRAPH_HASHTABLE_SIZE > GGML_MAX_NODES * 2, "GGML_GRAPH_HT_SIZE is too small");
|
15701
|
+
|
15702
|
+
static size_t hash(void * p) {
|
15703
|
+
return (size_t)p % GGML_GRAPH_HASHTABLE_SIZE;
|
15704
|
+
}
|
15705
|
+
|
15706
|
+
static bool hash_insert(void * hash_table[], void * p) {
|
15707
|
+
size_t h = hash(p);
|
15708
|
+
|
15709
|
+
// linear probing
|
15710
|
+
size_t i = h;
|
15711
|
+
while (hash_table[i] != NULL && hash_table[i] != p) {
|
15712
|
+
i = (i + 1) % GGML_GRAPH_HASHTABLE_SIZE;
|
15713
|
+
if (i == h) {
|
15714
|
+
// hash table is full
|
15715
|
+
GGML_ASSERT(false);
|
15716
|
+
}
|
15717
|
+
}
|
15718
|
+
|
15719
|
+
if (hash_table[i] == p) {
|
15720
|
+
return true;
|
15721
|
+
}
|
15722
|
+
|
15723
|
+
// insert
|
15724
|
+
hash_table[i] = p;
|
15725
|
+
return false;
|
15726
|
+
}
|
15727
|
+
|
16009
15728
|
static void ggml_visit_parents(struct ggml_cgraph * cgraph, struct ggml_tensor * node) {
|
16010
15729
|
if (node->grad == NULL) {
|
16011
15730
|
// this usually happens when we generate intermediate nodes from constants in the backward pass
|
@@ -16016,16 +15735,8 @@ static void ggml_visit_parents(struct ggml_cgraph * cgraph, struct ggml_tensor *
|
|
16016
15735
|
}
|
16017
15736
|
|
16018
15737
|
// check if already visited
|
16019
|
-
|
16020
|
-
|
16021
|
-
return;
|
16022
|
-
}
|
16023
|
-
}
|
16024
|
-
|
16025
|
-
for (int i = 0; i < cgraph->n_leafs; i++) {
|
16026
|
-
if (cgraph->leafs[i] == node) {
|
16027
|
-
return;
|
16028
|
-
}
|
15738
|
+
if (hash_insert(cgraph->visited_hash_table, node)) {
|
15739
|
+
return;
|
16029
15740
|
}
|
16030
15741
|
|
16031
15742
|
for (int i = 0; i < GGML_MAX_SRC; ++i) {
|
@@ -16088,6 +15799,7 @@ struct ggml_cgraph ggml_build_forward(struct ggml_tensor * tensor) {
|
|
16088
15799
|
/*.nodes =*/ { NULL },
|
16089
15800
|
/*.grads =*/ { NULL },
|
16090
15801
|
/*.leafs =*/ { NULL },
|
15802
|
+
/*.hash_table =*/ { NULL },
|
16091
15803
|
/*.perf_runs =*/ 0,
|
16092
15804
|
/*.perf_cycles =*/ 0,
|
16093
15805
|
/*.perf_time_us =*/ 0,
|
@@ -16129,13 +15841,42 @@ struct ggml_cgraph ggml_build_backward(struct ggml_context * ctx, struct ggml_cg
|
|
16129
15841
|
|
16130
15842
|
if (node->is_param) {
|
16131
15843
|
GGML_PRINT_DEBUG("%s: found root node %p\n", __func__, (void *) node);
|
16132
|
-
|
15844
|
+
ggml_build_forward_expand(&result, node->grad);
|
16133
15845
|
}
|
16134
15846
|
}
|
16135
15847
|
|
16136
15848
|
return result;
|
16137
15849
|
}
|
16138
15850
|
|
15851
|
+
struct ggml_cgraph * ggml_new_graph(struct ggml_context * ctx) {
|
15852
|
+
struct ggml_object * obj = ggml_new_object(ctx, GGML_OBJECT_GRAPH, GGML_GRAPH_SIZE);
|
15853
|
+
struct ggml_cgraph * cgraph = (struct ggml_cgraph *) ((char *) ctx->mem_buffer + obj->offs);
|
15854
|
+
|
15855
|
+
*cgraph = (struct ggml_cgraph) {
|
15856
|
+
/*.n_nodes =*/ 0,
|
15857
|
+
/*.n_leafs =*/ 0,
|
15858
|
+
/*.nodes =*/ { NULL },
|
15859
|
+
/*.grads =*/ { NULL },
|
15860
|
+
/*.leafs =*/ { NULL },
|
15861
|
+
/*.hash_table =*/ { NULL },
|
15862
|
+
/*.perf_runs =*/ 0,
|
15863
|
+
/*.perf_cycles =*/ 0,
|
15864
|
+
/*.perf_time_us =*/ 0,
|
15865
|
+
};
|
15866
|
+
|
15867
|
+
return cgraph;
|
15868
|
+
}
|
15869
|
+
|
15870
|
+
struct ggml_cgraph * ggml_build_forward_ctx(struct ggml_context * ctx, struct ggml_tensor * tensor) {
|
15871
|
+
struct ggml_cgraph * cgraph = ggml_new_graph(ctx);
|
15872
|
+
ggml_build_forward_impl(cgraph, tensor, false);
|
15873
|
+
return cgraph;
|
15874
|
+
}
|
15875
|
+
|
15876
|
+
size_t ggml_graph_overhead(void) {
|
15877
|
+
return GGML_OBJECT_SIZE + GGML_PAD(GGML_GRAPH_SIZE, GGML_MEM_ALIGN);
|
15878
|
+
}
|
15879
|
+
|
16139
15880
|
//
|
16140
15881
|
// thread data
|
16141
15882
|
//
|
@@ -16201,7 +15942,7 @@ typedef pthread_t ggml_thread_t;
|
|
16201
15942
|
|
16202
15943
|
// Android's libc implementation "bionic" does not support setting affinity
|
16203
15944
|
#if defined(__linux__) && !defined(__BIONIC__)
|
16204
|
-
void set_numa_thread_affinity(int thread_n, int n_threads) {
|
15945
|
+
static void set_numa_thread_affinity(int thread_n, int n_threads) {
|
16205
15946
|
if (!ggml_is_numa()) {
|
16206
15947
|
return;
|
16207
15948
|
}
|
@@ -16226,7 +15967,7 @@ void set_numa_thread_affinity(int thread_n, int n_threads) {
|
|
16226
15967
|
CPU_FREE(cpus);
|
16227
15968
|
}
|
16228
15969
|
|
16229
|
-
void clear_numa_thread_affinity(void) {
|
15970
|
+
static void clear_numa_thread_affinity(void) {
|
16230
15971
|
if (!ggml_is_numa()) {
|
16231
15972
|
return;
|
16232
15973
|
}
|
@@ -16250,8 +15991,8 @@ void clear_numa_thread_affinity(void) {
|
|
16250
15991
|
#else
|
16251
15992
|
// TODO: Windows etc.
|
16252
15993
|
// (the linux implementation may also work on BSD, someone should test)
|
16253
|
-
void set_numa_thread_affinity(int thread_n, int n_threads) { UNUSED(thread_n); UNUSED(n_threads); }
|
16254
|
-
void clear_numa_thread_affinity(void) {}
|
15994
|
+
static void set_numa_thread_affinity(int thread_n, int n_threads) { UNUSED(thread_n); UNUSED(n_threads); }
|
15995
|
+
static void clear_numa_thread_affinity(void) {}
|
16255
15996
|
#endif
|
16256
15997
|
|
16257
15998
|
struct ggml_compute_state_shared {
|
@@ -16463,21 +16204,34 @@ struct ggml_cplan ggml_graph_plan(struct ggml_cgraph * cgraph, int n_threads) {
|
|
16463
16204
|
case GGML_OP_ARGMAX:
|
16464
16205
|
case GGML_OP_REPEAT:
|
16465
16206
|
case GGML_OP_REPEAT_BACK:
|
16466
|
-
|
16467
|
-
case GGML_OP_SGN:
|
16468
|
-
case GGML_OP_NEG:
|
16469
|
-
case GGML_OP_STEP:
|
16470
|
-
case GGML_OP_TANH:
|
16471
|
-
case GGML_OP_ELU:
|
16472
|
-
case GGML_OP_RELU:
|
16473
|
-
{
|
16207
|
+
{
|
16474
16208
|
n_tasks = 1;
|
16475
16209
|
} break;
|
16476
|
-
|
16477
|
-
case
|
16478
|
-
|
16479
|
-
|
16210
|
+
|
16211
|
+
case GGML_OP_UNARY:
|
16212
|
+
{
|
16213
|
+
switch (ggml_get_unary_op(node)) {
|
16214
|
+
case GGML_UNARY_OP_ABS:
|
16215
|
+
case GGML_UNARY_OP_SGN:
|
16216
|
+
case GGML_UNARY_OP_NEG:
|
16217
|
+
case GGML_UNARY_OP_STEP:
|
16218
|
+
case GGML_UNARY_OP_TANH:
|
16219
|
+
case GGML_UNARY_OP_ELU:
|
16220
|
+
case GGML_UNARY_OP_RELU:
|
16221
|
+
{
|
16222
|
+
n_tasks = 1;
|
16223
|
+
} break;
|
16224
|
+
|
16225
|
+
case GGML_UNARY_OP_GELU:
|
16226
|
+
case GGML_UNARY_OP_GELU_QUICK:
|
16227
|
+
case GGML_UNARY_OP_SILU:
|
16228
|
+
{
|
16229
|
+
n_tasks = n_threads;
|
16230
|
+
} break;
|
16231
|
+
}
|
16232
|
+
} break;
|
16480
16233
|
case GGML_OP_SILU_BACK:
|
16234
|
+
case GGML_OP_MUL:
|
16481
16235
|
case GGML_OP_NORM:
|
16482
16236
|
case GGML_OP_RMS_NORM:
|
16483
16237
|
case GGML_OP_RMS_NORM_BACK:
|
@@ -16542,10 +16296,10 @@ struct ggml_cplan ggml_graph_plan(struct ggml_cgraph * cgraph, int n_threads) {
|
|
16542
16296
|
case GGML_OP_GET_ROWS:
|
16543
16297
|
case GGML_OP_GET_ROWS_BACK:
|
16544
16298
|
case GGML_OP_DIAG:
|
16545
|
-
case GGML_OP_DIAG_MASK_ZERO:
|
16546
16299
|
{
|
16547
16300
|
n_tasks = 1;
|
16548
16301
|
} break;
|
16302
|
+
case GGML_OP_DIAG_MASK_ZERO:
|
16549
16303
|
case GGML_OP_DIAG_MASK_INF:
|
16550
16304
|
case GGML_OP_SOFT_MAX:
|
16551
16305
|
case GGML_OP_SOFT_MAX_BACK:
|
@@ -16838,10 +16592,9 @@ void ggml_graph_reset(struct ggml_cgraph * cgraph) {
|
|
16838
16592
|
void ggml_graph_compute_with_ctx(struct ggml_context * ctx, struct ggml_cgraph * cgraph, int n_threads) {
|
16839
16593
|
struct ggml_cplan cplan = ggml_graph_plan(cgraph, n_threads);
|
16840
16594
|
|
16841
|
-
struct
|
16842
|
-
GGML_ASSERT(buf);
|
16595
|
+
struct ggml_object * obj = ggml_new_object(ctx, GGML_OBJECT_WORK_BUFFER, cplan.work_size);
|
16843
16596
|
|
16844
|
-
cplan.work_data =
|
16597
|
+
cplan.work_data = (uint8_t *)ctx->mem_buffer + obj->offs;
|
16845
16598
|
|
16846
16599
|
ggml_graph_compute(cgraph, &cplan);
|
16847
16600
|
}
|
@@ -16992,7 +16745,8 @@ void ggml_graph_export(const struct ggml_cgraph * cgraph, const char * fname) {
|
|
16992
16745
|
fwrite(&nb, sizeof(uint64_t), 1, fout);
|
16993
16746
|
}
|
16994
16747
|
|
16995
|
-
fwrite(tensor->name,
|
16748
|
+
fwrite(tensor->name, sizeof(char), GGML_MAX_NAME, fout);
|
16749
|
+
fwrite(tensor->op_params, sizeof(char), GGML_MAX_OP_PARAMS, fout);
|
16996
16750
|
|
16997
16751
|
// dump the data
|
16998
16752
|
// TODO: pad this to 32 byte boundary
|
@@ -17025,7 +16779,8 @@ void ggml_graph_export(const struct ggml_cgraph * cgraph, const char * fname) {
|
|
17025
16779
|
fwrite(&nb, sizeof(uint64_t), 1, fout);
|
17026
16780
|
}
|
17027
16781
|
|
17028
|
-
fwrite(tensor->name,
|
16782
|
+
fwrite(tensor->name, sizeof(char), GGML_MAX_NAME, fout);
|
16783
|
+
fwrite(tensor->op_params, sizeof(char), GGML_MAX_OP_PARAMS, fout);
|
17029
16784
|
|
17030
16785
|
// output the op arguments
|
17031
16786
|
{
|
@@ -17206,7 +16961,8 @@ struct ggml_cgraph ggml_graph_import(const char * fname, struct ggml_context **
|
|
17206
16961
|
|
17207
16962
|
tensor->op = (enum ggml_op) op;
|
17208
16963
|
|
17209
|
-
memcpy(tensor->name,
|
16964
|
+
memcpy(tensor->name, ptr, GGML_MAX_NAME); ptr += GGML_MAX_NAME;
|
16965
|
+
memcpy(tensor->op_params, ptr, GGML_MAX_OP_PARAMS); ptr += GGML_MAX_OP_PARAMS;
|
17210
16966
|
|
17211
16967
|
tensor->data = (void *) ptr;
|
17212
16968
|
|
@@ -17251,7 +17007,8 @@ struct ggml_cgraph ggml_graph_import(const char * fname, struct ggml_context **
|
|
17251
17007
|
nb[j] = nb_cur;
|
17252
17008
|
}
|
17253
17009
|
|
17254
|
-
const char * ptr_name
|
17010
|
+
const char * ptr_name = ptr; ptr += GGML_MAX_NAME;
|
17011
|
+
const char * ptr_op_params = ptr; ptr += GGML_MAX_OP_PARAMS;
|
17255
17012
|
|
17256
17013
|
const int32_t * ptr_arg_idx = (const int32_t *) ptr; ptr += GGML_MAX_SRC*sizeof(int32_t);
|
17257
17014
|
|
@@ -17288,8 +17045,8 @@ struct ggml_cgraph ggml_graph_import(const char * fname, struct ggml_context **
|
|
17288
17045
|
{
|
17289
17046
|
tensor = ggml_view_4d(*ctx_eval, args[0], ne[0], ne[1], ne[2], ne[3], 0, 0, 0, 0);
|
17290
17047
|
|
17291
|
-
|
17292
|
-
memcpy(&offs,
|
17048
|
+
size_t offs;
|
17049
|
+
memcpy(&offs, ptr_op_params, sizeof(offs));
|
17293
17050
|
|
17294
17051
|
tensor->data = ((char *) tensor->data) + offs;
|
17295
17052
|
} break;
|
@@ -17309,7 +17066,8 @@ struct ggml_cgraph ggml_graph_import(const char * fname, struct ggml_context **
|
|
17309
17066
|
} break;
|
17310
17067
|
}
|
17311
17068
|
|
17312
|
-
memcpy(tensor->name,
|
17069
|
+
memcpy(tensor->name, ptr_name, GGML_MAX_NAME);
|
17070
|
+
memcpy(tensor->op_params, ptr_op_params, GGML_MAX_OP_PARAMS);
|
17313
17071
|
|
17314
17072
|
for (int j = 0; j < GGML_MAX_DIMS; ++j) {
|
17315
17073
|
tensor->nb[j] = nb[j];
|
@@ -17343,7 +17101,7 @@ void ggml_graph_print(const struct ggml_cgraph * cgraph) {
|
|
17343
17101
|
GGML_PRINT(" - %3d: [ %5" PRId64 ", %5" PRId64 ", %5" PRId64 "] %16s %s (%3d) cpu = %7.3f / %7.3f ms, wall = %7.3f / %7.3f ms\n",
|
17344
17102
|
i,
|
17345
17103
|
node->ne[0], node->ne[1], node->ne[2],
|
17346
|
-
|
17104
|
+
ggml_op_name(node->op), node->is_param ? "x" : node->grad ? "g" : " ", node->perf_runs,
|
17347
17105
|
(double) node->perf_cycles / (double) ggml_cycles_per_ms(),
|
17348
17106
|
(double) node->perf_cycles / (double) ggml_cycles_per_ms() / (double) node->perf_runs,
|
17349
17107
|
(double) node->perf_time_us / 1000.0,
|
@@ -17357,7 +17115,7 @@ void ggml_graph_print(const struct ggml_cgraph * cgraph) {
|
|
17357
17115
|
GGML_PRINT(" - %3d: [ %5" PRId64 ", %5" PRId64 "] %8s\n",
|
17358
17116
|
i,
|
17359
17117
|
node->ne[0], node->ne[1],
|
17360
|
-
|
17118
|
+
ggml_op_name(node->op));
|
17361
17119
|
}
|
17362
17120
|
|
17363
17121
|
for (int i = 0; i < GGML_OP_COUNT; i++) {
|
@@ -17365,7 +17123,7 @@ void ggml_graph_print(const struct ggml_cgraph * cgraph) {
|
|
17365
17123
|
continue;
|
17366
17124
|
}
|
17367
17125
|
|
17368
|
-
GGML_PRINT("perf_total_per_op_us[%16s] = %7.3f ms\n",
|
17126
|
+
GGML_PRINT("perf_total_per_op_us[%16s] = %7.3f ms\n", ggml_op_name(i), (double) perf_total_per_op_us[i] / 1000.0);
|
17369
17127
|
}
|
17370
17128
|
|
17371
17129
|
GGML_PRINT("========================================\n");
|
@@ -17459,13 +17217,13 @@ void ggml_graph_dump_dot(const struct ggml_cgraph * gb, const struct ggml_cgraph
|
|
17459
17217
|
}
|
17460
17218
|
|
17461
17219
|
if (node->n_dims == 2) {
|
17462
|
-
fprintf(fp, "%d [%" PRId64 ", %" PRId64 "] | <x>%s", i, node->ne[0], node->ne[1],
|
17220
|
+
fprintf(fp, "%d [%" PRId64 ", %" PRId64 "] | <x>%s", i, node->ne[0], node->ne[1], ggml_op_symbol(node->op));
|
17463
17221
|
} else {
|
17464
|
-
fprintf(fp, "%d [%" PRId64 ", %" PRId64 ", %" PRId64 "] | <x>%s", i, node->ne[0], node->ne[1], node->ne[2],
|
17222
|
+
fprintf(fp, "%d [%" PRId64 ", %" PRId64 ", %" PRId64 "] | <x>%s", i, node->ne[0], node->ne[1], node->ne[2], ggml_op_symbol(node->op));
|
17465
17223
|
}
|
17466
17224
|
|
17467
17225
|
if (node->grad) {
|
17468
|
-
fprintf(fp, " | <g>%s\"; ]\n",
|
17226
|
+
fprintf(fp, " | <g>%s\"; ]\n", ggml_op_symbol(node->grad->op));
|
17469
17227
|
} else {
|
17470
17228
|
fprintf(fp, "\"; ]\n");
|
17471
17229
|
}
|